/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1373 - (hide annotations)
Thu Aug 22 08:16:28 2013 UTC (10 years, 8 months ago) by dpavlin
File size: 18411 byte(s)
better error messages

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 1066 to
11 dpavlin 923 search_display search display sorted
12 dpavlin 912
13 dpavlin 536 rec1 rec2 rec
14 dpavlin 1015 frec frec_eq frec_ne
15 dpavlin 536 regex prefix suffix surround
16     first lookup join_with
17 dpavlin 707 save_into_lookup
18 dpavlin 562
19     split_rec_on
20 dpavlin 785
21     get set
22 dpavlin 791 count
23 dpavlin 980
24 dpavlin 1206 row
25     rec_array
26    
27 dpavlin 536 /;
28 dpavlin 10
29     use warnings;
30     use strict;
31 dpavlin 536
32     #use base qw/WebPAC::Common/;
33 dpavlin 550 use Data::Dump qw/dump/;
34 dpavlin 725 use Carp qw/confess/;
35 dpavlin 10
36 dpavlin 550 # debugging warn(s)
37     my $debug = 0;
38 dpavlin 1037 _debug( $debug );
39 dpavlin 550
40 dpavlin 1021 # FIXME
41 dpavlin 980 use WebPAC::Normalize::ISBN;
42     push @EXPORT, ( 'isbn_10', 'isbn_13' );
43 dpavlin 550
44 dpavlin 1021 use WebPAC::Normalize::MARC;
45 dpavlin 1036 push @EXPORT, ( qw/
46     marc marc_indicators marc_repeatable_subfield
47     marc_compose marc_leader marc_fixed
48     marc_duplicate marc_remove marc_count
49     marc_original_order
50     marc_template
51     /);
52 dpavlin 1021
53 dpavlin 1118 use Storable qw/dclone/;
54    
55 dpavlin 10 =head1 NAME
56    
57 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
58 dpavlin 10
59     =cut
60    
61 dpavlin 1066 our $VERSION = '0.36';
62 dpavlin 10
63     =head1 SYNOPSIS
64    
65 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
66     from input records using perl functions which are specialized for set
67     processing.
68 dpavlin 10
69 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
70     means that you check it's validity before running WebPAC using
71     C<perl -c normalize.pl>.
72 dpavlin 15
73 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
74 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
75 dpavlin 547 C<marc>.
76 dpavlin 15
77 dpavlin 10 =head1 FUNCTIONS
78    
79 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
80     All other functions are available for use within normalisation rules.
81    
82 dpavlin 536 =head2 data_structure
83 dpavlin 10
84 dpavlin 536 Return data structure
85 dpavlin 13
86 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
87 dpavlin 725 lookup => $lookup_hash,
88 dpavlin 536 row => $row,
89     rules => $normalize_pl_config,
90 dpavlin 541 marc_encoding => 'utf-8',
91 dpavlin 595 config => $config,
92 dpavlin 736 load_row_coderef => sub {
93 dpavlin 979 my ($database,$input,$mfn) = @_;
94 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
95 dpavlin 725 },
96 dpavlin 13 );
97    
98 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
99 dpavlin 540 other are optional.
100    
101 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
102 dpavlin 725 die if it's not defined.
103    
104 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
105 dpavlin 15
106 dpavlin 538 Since this function isn't exported you have to call it with
107     C<WebPAC::Normalize::data_structure>.
108    
109 dpavlin 10 =cut
110    
111 dpavlin 736 my $load_row_coderef;
112 dpavlin 725
113 dpavlin 536 sub data_structure {
114     my $arg = {@_};
115 dpavlin 13
116 dpavlin 536 die "need row argument" unless ($arg->{row});
117     die "need normalisation argument" unless ($arg->{rules});
118 dpavlin 31
119 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
120 dpavlin 983 _set_ds( $arg->{row} );
121 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
122 dpavlin 541 _clean_ds( %{ $arg } );
123 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
124 dpavlin 725
125 dpavlin 1011 no strict 'subs';
126 dpavlin 1364 no warnings 'all';
127 dpavlin 1011 eval "$arg->{rules};";
128 dpavlin 1373 die "error evaling [$@] using rules " . $arg->{rules} . "\n" if ($@);
129 dpavlin 540
130 dpavlin 538 return _get_ds();
131 dpavlin 10 }
132    
133 dpavlin 983 =head2 _set_ds
134 dpavlin 13
135 dpavlin 536 Set current record hash
136 dpavlin 433
137 dpavlin 983 _set_ds( $rec );
138 dpavlin 433
139     =cut
140    
141 dpavlin 536 my $rec;
142 dpavlin 433
143 dpavlin 983 sub _set_ds {
144 dpavlin 536 $rec = shift or die "no record hash";
145 dpavlin 1036 $WebPAC::Normalize::MARC::rec = $rec;
146 dpavlin 433 }
147    
148 dpavlin 1367 =head2 _get_rec
149 dpavlin 1021
150     my $rec = _get_rec();
151    
152     =cut
153    
154     sub _get_rec { $rec };
155    
156 dpavlin 1367 =head2 _set_rec
157    
158     _set_rec( $rec );
159    
160     =cut
161    
162     sub _set_rec { $rec = $_[0] }
163    
164 dpavlin 595 =head2 _set_config
165    
166     Set current config hash
167    
168     _set_config( $config );
169    
170     Magic keys are:
171    
172     =over 4
173    
174     =item _
175    
176     Code of current database
177    
178     =item _mfn
179    
180     Current MFN
181    
182     =back
183    
184     =cut
185    
186     my $config;
187    
188     sub _set_config {
189     $config = shift;
190     }
191    
192 dpavlin 538 =head2 _get_ds
193    
194     Return hash formatted as data structure
195    
196     my $ds = _get_ds();
197    
198     =cut
199    
200 dpavlin 1036 my $out;
201 dpavlin 538
202     sub _get_ds {
203 dpavlin 982 #warn "## out = ",dump($out);
204 dpavlin 538 return $out;
205     }
206    
207     =head2 _clean_ds
208    
209     Clean data structure hash for next record
210    
211     _clean_ds();
212    
213     =cut
214    
215     sub _clean_ds {
216 dpavlin 541 my $a = {@_};
217 dpavlin 1036 $out = undef;
218     WebPAC::Normalize::MARC::_clean();
219 dpavlin 538 }
220    
221     =head2 _set_lookup
222    
223     Set current lookup hash
224    
225     _set_lookup( $lookup );
226    
227     =cut
228    
229     my $lookup;
230    
231     sub _set_lookup {
232     $lookup = shift;
233     }
234    
235 dpavlin 707 =head2 _get_lookup
236    
237     Get current lookup hash
238    
239     my $lookup = _get_lookup();
240    
241     =cut
242    
243     sub _get_lookup {
244     return $lookup;
245     }
246    
247 dpavlin 736 =head2 _set_load_row
248 dpavlin 725
249     Setup code reference which will return L<data_structure> from
250     L<WebPAC::Store>
251    
252 dpavlin 736 _set_load_row(sub {
253 dpavlin 725 my ($database,$input,$mfn) = @_;
254 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
255 dpavlin 725 });
256    
257     =cut
258    
259 dpavlin 736 sub _set_load_row {
260 dpavlin 725 my $coderef = shift;
261     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
262    
263 dpavlin 736 $load_row_coderef = $coderef;
264 dpavlin 725 }
265    
266 dpavlin 554 =head2 _debug
267    
268     Change level of debug warnings
269    
270     _debug( 2 );
271    
272     =cut
273    
274     sub _debug {
275     my $l = shift;
276     return $debug unless defined($l);
277 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
278 dpavlin 554 $debug = $l;
279 dpavlin 1036 $WebPAC::Normalize::MARC::debug = $debug;
280 dpavlin 554 }
281    
282 dpavlin 540 =head1 Functions to create C<data_structure>
283    
284     Those functions generally have to first in your normalization file.
285    
286 dpavlin 1066 =head2 to
287    
288     Generic way to set values for some name
289    
290     to('field-name', 'name-value' => rec('200','a') );
291    
292     There are many helpers defined below which might be easier to use.
293    
294     =cut
295    
296     sub to {
297     my $type = shift or confess "need type -- BUG?";
298     my $name = shift or confess "needs name as first argument";
299     my @o = grep { defined($_) && $_ ne '' } @_;
300     return unless (@o);
301     $out->{$name}->{$type} = \@o;
302     }
303    
304 dpavlin 912 =head2 search_display
305 dpavlin 433
306 dpavlin 912 Define output for L<search> and L<display> at the same time
307 dpavlin 433
308 dpavlin 912 search_display('Title', rec('200','a') );
309 dpavlin 13
310     =cut
311    
312 dpavlin 912 sub search_display {
313     my $name = shift or die "search_display needs name as first argument";
314 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
315     return unless (@o);
316     $out->{$name}->{search} = \@o;
317     $out->{$name}->{display} = \@o;
318     }
319 dpavlin 13
320 dpavlin 915 =head2 tag
321    
322 dpavlin 1066 Old name for L<search_display>, it will probably be removed at one point.
323 dpavlin 915
324     =cut
325    
326     sub tag {
327     search_display( @_ );
328     }
329    
330 dpavlin 536 =head2 display
331 dpavlin 13
332 dpavlin 912 Define output just for I<display>
333 dpavlin 125
334 dpavlin 536 @v = display('Title', rec('200','a') );
335 dpavlin 125
336 dpavlin 536 =cut
337 dpavlin 125
338 dpavlin 1066 sub display { to( 'display', @_ ) }
339 dpavlin 13
340 dpavlin 536 =head2 search
341 dpavlin 13
342 dpavlin 536 Prepare values just for I<search>
343 dpavlin 13
344 dpavlin 536 @v = search('Title', rec('200','a') );
345 dpavlin 433
346 dpavlin 536 =cut
347 dpavlin 13
348 dpavlin 1066 sub search { to( 'search', @_ ) }
349 dpavlin 13
350 dpavlin 923 =head2 sorted
351    
352     Insert into lists which will be automatically sorted
353    
354     sorted('Title', rec('200','a') );
355    
356     =cut
357    
358 dpavlin 1066 sub sorted { to( 'sorted', @_ ) }
359 dpavlin 923
360 dpavlin 1205 =head2 row
361 dpavlin 923
362 dpavlin 1205 Insert new row of data into output module
363    
364     row( column => 'foo', column2 => 'bar' );
365    
366     =cut
367    
368     use Data::Dump qw/dump/;
369    
370     sub row {
371 dpavlin 1210 die "array doesn't have odd number of elements but $#_: ",dump( @_ ) if $#_ % 2 == 1;
372     my $table = shift @_;
373     push @{ $out->{'_rows'}->{$table} }, {@_};
374 dpavlin 1205 }
375    
376    
377 dpavlin 540 =head1 Functions to extract data from input
378    
379     This function should be used inside functions to create C<data_structure> described
380     above.
381    
382 dpavlin 641 =head2 _pack_subfields_hash
383    
384 dpavlin 669 @subfields = _pack_subfields_hash( $h );
385     $subfields = _pack_subfields_hash( $h, 1 );
386 dpavlin 641
387 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
388     with subfields (denoted by C<^>) and values.
389    
390 dpavlin 641 =cut
391    
392     sub _pack_subfields_hash {
393    
394 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
395 dpavlin 641
396 dpavlin 1118 my ($hash,$include_subfields) = @_;
397 dpavlin 641
398 dpavlin 831 # sanity and ease of use
399 dpavlin 1118 return $hash if (ref($hash) ne 'HASH');
400 dpavlin 831
401 dpavlin 1118 my $h = dclone( $hash );
402    
403 dpavlin 641 if ( defined($h->{subfields}) ) {
404     my $sfs = delete $h->{subfields} || die "no subfields?";
405     my @out;
406     while (@$sfs) {
407     my $sf = shift @$sfs;
408     push @out, '^' . $sf if ($include_subfields);
409     my $o = shift @$sfs;
410     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
411     # single element subfields are not arrays
412 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
413 dpavlin 667
414 dpavlin 641 push @out, $h->{$sf};
415     } else {
416 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
417 dpavlin 641 push @out, $h->{$sf}->[$o];
418     }
419     }
420 dpavlin 667 if ($include_subfields) {
421     return join('', @out);
422     } else {
423     return @out;
424     }
425 dpavlin 641 } else {
426 dpavlin 667 if ($include_subfields) {
427     my $out = '';
428 dpavlin 668 foreach my $sf (sort keys %$h) {
429 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
430     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
431     } else {
432     $out .= '^' . $sf . $h->{$sf};
433     }
434     }
435     return $out;
436     } else {
437     # FIXME this should probably be in alphabetical order instead of hash order
438     values %{$h};
439     }
440 dpavlin 641 }
441     }
442    
443 dpavlin 536 =head2 rec1
444 dpavlin 371
445 dpavlin 536 Return all values in some field
446 dpavlin 371
447 dpavlin 536 @v = rec1('200')
448 dpavlin 15
449 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
450 dpavlin 15
451 dpavlin 536 =cut
452 dpavlin 15
453 dpavlin 536 sub rec1 {
454     my $f = shift;
455 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
456 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
457 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
458 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
459 dpavlin 641 my @out;
460     foreach my $h ( @{ $rec->{$f} } ) {
461     if (ref($h) eq 'HASH') {
462     push @out, ( _pack_subfields_hash( $h ) );
463 dpavlin 31 } else {
464 dpavlin 641 push @out, $h;
465 dpavlin 31 }
466 dpavlin 641 }
467     return @out;
468 dpavlin 536 } elsif( defined($rec->{$f}) ) {
469     return $rec->{$f};
470 dpavlin 15 }
471     }
472    
473 dpavlin 536 =head2 rec2
474 dpavlin 15
475 dpavlin 536 Return all values in specific field and subfield
476 dpavlin 13
477 dpavlin 536 @v = rec2('200','a')
478 dpavlin 13
479     =cut
480    
481 dpavlin 536 sub rec2 {
482     my $f = shift;
483     return unless (defined($rec && $rec->{$f}));
484     my $sf = shift;
485 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
486 dpavlin 589 return map {
487     if (ref($_->{$sf}) eq 'ARRAY') {
488     @{ $_->{$sf} };
489     } else {
490     $_->{$sf};
491     }
492 dpavlin 1048 } grep { ref($_) eq 'HASH' && defined $_->{$sf} } @{ $rec->{$f} };
493 dpavlin 536 }
494 dpavlin 13
495 dpavlin 536 =head2 rec
496 dpavlin 13
497 dpavlin 536 syntaxtic sugar for
498 dpavlin 13
499 dpavlin 536 @v = rec('200')
500     @v = rec('200','a')
501 dpavlin 13
502 dpavlin 750 If rec() returns just single value, it will
503     return scalar, not array.
504    
505 dpavlin 536 =cut
506 dpavlin 373
507 dpavlin 536 sub rec {
508 dpavlin 583 my @out;
509 dpavlin 536 if ($#_ == 0) {
510 dpavlin 583 @out = rec1(@_);
511 dpavlin 536 } elsif ($#_ == 1) {
512 dpavlin 583 @out = rec2(@_);
513 dpavlin 13 }
514 dpavlin 750 if ($#out == 0 && ! wantarray) {
515     return $out[0];
516     } elsif (@out) {
517 dpavlin 583 return @out;
518     } else {
519     return '';
520     }
521 dpavlin 13 }
522    
523 dpavlin 1012 =head2 frec
524    
525     Returns first value from field
526    
527     $v = frec('200');
528     $v = frec('200','a');
529    
530     =cut
531    
532     sub frec {
533     my @out = rec(@_);
534     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
535     return shift @out;
536     }
537    
538 dpavlin 1015 =head2 frec_eq
539    
540     =head2 frec_ne
541    
542     Check if first values from two fields are same or different
543    
544     if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
545     # values are same
546     } else {
547     # values are different
548     }
549    
550     Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
551     could write something like:
552    
553     if ( frec( '900','a' ) eq frec( '910','c' ) ) {
554     # yada tada
555     }
556    
557     but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
558     in order to parse text and create invalid function C<eqfrec>.
559    
560     =cut
561    
562     sub frec_eq {
563     my ( $f1,$sf1, $f2, $sf2 ) = @_;
564     return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
565     }
566    
567     sub frec_ne {
568     return ! frec_eq( @_ );
569     }
570    
571 dpavlin 536 =head2 regex
572 dpavlin 15
573 dpavlin 536 Apply regex to some or all values
574 dpavlin 15
575 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
576 dpavlin 15
577     =cut
578    
579 dpavlin 536 sub regex {
580     my $r = shift;
581     my @out;
582 dpavlin 550 #warn "r: $r\n", dump(\@_);
583 dpavlin 536 foreach my $t (@_) {
584     next unless ($t);
585     eval "\$t =~ $r";
586     push @out, $t if ($t && $t ne '');
587 dpavlin 368 }
588 dpavlin 536 return @out;
589 dpavlin 15 }
590    
591 dpavlin 536 =head2 prefix
592 dpavlin 15
593 dpavlin 536 Prefix all values with a string
594 dpavlin 15
595 dpavlin 536 @v = prefix( 'my_', @v );
596 dpavlin 15
597     =cut
598    
599 dpavlin 536 sub prefix {
600 dpavlin 819 my $p = shift;
601     return @_ unless defined( $p );
602 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
603     }
604 dpavlin 15
605 dpavlin 536 =head2 suffix
606 dpavlin 15
607 dpavlin 536 suffix all values with a string
608 dpavlin 15
609 dpavlin 536 @v = suffix( '_my', @v );
610 dpavlin 15
611 dpavlin 536 =cut
612 dpavlin 15
613 dpavlin 536 sub suffix {
614 dpavlin 819 my $s = shift;
615     return @_ unless defined( $s );
616 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
617 dpavlin 15 }
618    
619 dpavlin 536 =head2 surround
620 dpavlin 13
621 dpavlin 536 surround all values with a two strings
622 dpavlin 13
623 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
624 dpavlin 13
625     =cut
626    
627 dpavlin 536 sub surround {
628 dpavlin 819 my $p = shift;
629     my $s = shift;
630     $p = '' unless defined( $p );
631     $s = '' unless defined( $s );
632 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
633 dpavlin 13 }
634    
635 dpavlin 536 =head2 first
636 dpavlin 13
637 dpavlin 536 Return first element
638 dpavlin 15
639 dpavlin 536 $v = first( @v );
640 dpavlin 13
641     =cut
642    
643 dpavlin 536 sub first {
644     my $r = shift;
645     return $r;
646 dpavlin 13 }
647    
648 dpavlin 536 =head2 lookup
649 dpavlin 13
650 dpavlin 536 Consult lookup hashes for some value
651 dpavlin 13
652 dpavlin 725 @v = lookup(
653     sub {
654     'ffkk/peri/mfn'.rec('000')
655     },
656     'ffkk','peri','200-a-200-e',
657     sub {
658     first(rec(200,'a')).' '.first(rec('200','e'))
659     }
660     );
661 dpavlin 13
662 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
663     normal lookup definition in C<conf/lookup/something.pl> which looks like:
664 dpavlin 707
665 dpavlin 725 lookup(
666     # which results to return from record recorded in lookup
667     sub { 'ffkk/peri/mfn' . rec('000') },
668     # from which database and input
669     'ffkk','peri',
670     # such that following values match
671     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
672     # if this part is missing, we will try to match same fields
673     # from lookup record and current one, or you can override
674     # which records to use from current record using
675     sub { rec('900','x') . ' ' . rec('900','y') },
676     )
677    
678     You can think about this lookup as SQL (if that helps):
679    
680     select
681     sub { what }
682     from
683     database, input
684     where
685     sub { filter from lookuped record }
686     having
687     sub { optional filter on current record }
688    
689     Easy as pie, right?
690    
691 dpavlin 13 =cut
692    
693 dpavlin 536 sub lookup {
694 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
695    
696 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
697 dpavlin 725
698 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
699 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
700    
701 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
702 dpavlin 725
703     my $mfns;
704     my @having = $having->();
705    
706 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
707 dpavlin 725
708     foreach my $h ( @having ) {
709     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
710 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
711 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
712     }
713 dpavlin 536 }
714 dpavlin 725
715     return unless ($mfns);
716    
717     my @mfns = sort keys %$mfns;
718    
719 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
720 dpavlin 725
721     my $old_rec = $rec;
722     my @out;
723    
724     foreach my $mfn (@mfns) {
725 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
726 dpavlin 725
727 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
728 dpavlin 725
729     my @vals = $what->();
730    
731     push @out, ( @vals );
732    
733 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
734 dpavlin 725 }
735    
736     # if (ref($lookup->{$k}) eq 'ARRAY') {
737     # return @{ $lookup->{$k} };
738     # } else {
739     # return $lookup->{$k};
740     # }
741    
742     $rec = $old_rec;
743    
744 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
745 dpavlin 725
746 dpavlin 740 if ($#out == 0) {
747     return $out[0];
748     } else {
749     return @out;
750     }
751 dpavlin 13 }
752    
753 dpavlin 707 =head2 save_into_lookup
754    
755 dpavlin 725 Save value into lookup. It associates current database, input
756     and specific keys with one or more values which will be
757     associated over MFN.
758 dpavlin 707
759 dpavlin 725 MFN will be extracted from first occurence current of field 000
760     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
761    
762     my $nr = save_into_lookup($database,$input,$key,sub {
763 dpavlin 707 # code which produce one or more values
764     });
765    
766 dpavlin 725 It returns number of items saved.
767 dpavlin 707
768 dpavlin 725 This function shouldn't be called directly, it's called from code created by
769     L<WebPAC::Parser>.
770    
771 dpavlin 707 =cut
772    
773     sub save_into_lookup {
774 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
775     die "save_into_lookup needs database" unless defined($database);
776     die "save_into_lookup needs input" unless defined($input);
777     die "save_into_lookup needs key" unless defined($key);
778 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
779 dpavlin 725
780 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
781 dpavlin 725
782     my $mfn =
783     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
784     defined($config->{_mfn}) ? $config->{_mfn} :
785     die "mfn not defined or zero";
786    
787     my $nr = 0;
788    
789 dpavlin 707 foreach my $v ( $coderef->() ) {
790 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
791 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
792 dpavlin 725 $nr++;
793 dpavlin 707 }
794 dpavlin 725
795     return $nr;
796 dpavlin 707 }
797    
798 dpavlin 595 =head2 config
799    
800     Consult config values stored in C<config.yml>
801    
802     # return database code (key under databases in yaml)
803     $database_code = config(); # use _ from hash
804     $database_name = config('name');
805     $database_input_name = config('input name');
806    
807     Up to three levels are supported.
808    
809     =cut
810    
811     sub config {
812     return unless ($config);
813    
814     my $p = shift;
815    
816     $p ||= '';
817    
818     my $v;
819    
820     warn "### getting config($p)\n" if ($debug > 1);
821    
822     my @p = split(/\s+/,$p);
823     if ($#p < 0) {
824     $v = $config->{ '_' }; # special, database code
825     } else {
826    
827     my $c = dclone( $config );
828    
829     foreach my $k (@p) {
830     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
831     if (ref($c) eq 'ARRAY') {
832     $c = shift @$c;
833     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
834     last;
835     }
836    
837     if (! defined($c->{$k}) ) {
838     $c = undef;
839     last;
840     } else {
841     $c = $c->{$k};
842     }
843     }
844     $v = $c if ($c);
845    
846     }
847    
848     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
849     warn "config( '$p' ) is empty\n" if (! $v);
850    
851     return $v;
852     }
853    
854     =head2 id
855    
856     Returns unique id of this record
857    
858     $id = id();
859    
860     Returns C<42/2> for 2nd occurence of MFN 42.
861    
862     =cut
863    
864     sub id {
865     my $mfn = $config->{_mfn} || die "no _mfn in config data";
866 dpavlin 1036 return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
867 dpavlin 595 }
868    
869 dpavlin 536 =head2 join_with
870 dpavlin 13
871 dpavlin 536 Joins walues with some delimiter
872 dpavlin 10
873 dpavlin 536 $v = join_with(", ", @v);
874 dpavlin 10
875 dpavlin 536 =cut
876 dpavlin 10
877 dpavlin 536 sub join_with {
878     my $d = shift;
879 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
880 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
881     return '' unless defined($v);
882     return $v;
883 dpavlin 536 }
884 dpavlin 10
885 dpavlin 562 =head2 split_rec_on
886    
887     Split record subfield on some regex and take one of parts out
888    
889     $a_before_semi_column =
890     split_rec_on('200','a', /\s*;\s*/, $part);
891    
892     C<$part> is optional number of element. First element is
893     B<1>, not 0!
894    
895     If there is no C<$part> parameter or C<$part> is 0, this function will
896     return all values produced by splitting.
897    
898     =cut
899    
900     sub split_rec_on {
901     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
902    
903     my ($fld, $sf, $regex, $part) = @_;
904 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
905 dpavlin 562
906     my @r = rec( $fld, $sf );
907     my $v = shift @r;
908 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
909 dpavlin 562
910 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
911 dpavlin 566
912 dpavlin 562 my @s = split( $regex, $v );
913 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
914 dpavlin 566 if ($part && $part > 0) {
915 dpavlin 562 return $s[ $part - 1 ];
916     } else {
917 dpavlin 571 return @s;
918 dpavlin 562 }
919     }
920    
921 dpavlin 785 my $hash;
922    
923     =head2 set
924    
925     set( key => 'value' );
926    
927     =cut
928    
929     sub set {
930     my ($k,$v) = @_;
931 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
932 dpavlin 785 $hash->{$k} = $v;
933     };
934    
935     =head2 get
936    
937     get( 'key' );
938    
939     =cut
940    
941     sub get {
942     my $k = shift || return;
943     my $v = $hash->{$k};
944 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
945 dpavlin 785 return $v;
946     }
947    
948 dpavlin 791 =head2 count
949 dpavlin 785
950 dpavlin 791 if ( count( @result ) == 1 ) {
951     # do something if only 1 result is there
952     }
953    
954     =cut
955    
956     sub count {
957 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
958 dpavlin 791 return @_ . '';
959     }
960    
961 dpavlin 1214 =head2 rec_array
962    
963     Always return field as array
964    
965 dpavlin 1216 foreach my $d ( rec_array('field') ) {
966 dpavlin 1214 warn $d;
967     }
968    
969     =cut
970    
971     sub rec_array {
972     my $d = $rec->{ $_[0] };
973     return @$d if ref($d) eq 'ARRAY';
974     return ($d);
975     }
976    
977 dpavlin 536 # END
978     1;

  ViewVC Help
Powered by ViewVC 1.1.26