/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1118 - (hide annotations)
Sun Oct 26 15:57:37 2008 UTC (15 years, 6 months ago) by dpavlin
File size: 17791 byte(s)
 r1747@llin:  dpavlin | 2008-10-26 16:55:31 +0100
 bug fix: don't destroy subfields values in data hash

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 1066 to
11 dpavlin 923 search_display search display sorted
12 dpavlin 912
13 dpavlin 536 rec1 rec2 rec
14 dpavlin 1015 frec frec_eq frec_ne
15 dpavlin 536 regex prefix suffix surround
16     first lookup join_with
17 dpavlin 707 save_into_lookup
18 dpavlin 562
19     split_rec_on
20 dpavlin 785
21     get set
22 dpavlin 791 count
23 dpavlin 980
24 dpavlin 536 /;
25 dpavlin 10
26     use warnings;
27     use strict;
28 dpavlin 536
29     #use base qw/WebPAC::Common/;
30 dpavlin 550 use Data::Dump qw/dump/;
31 dpavlin 725 use Carp qw/confess/;
32 dpavlin 10
33 dpavlin 550 # debugging warn(s)
34     my $debug = 0;
35 dpavlin 1037 _debug( $debug );
36 dpavlin 550
37 dpavlin 1021 # FIXME
38 dpavlin 980 use WebPAC::Normalize::ISBN;
39     push @EXPORT, ( 'isbn_10', 'isbn_13' );
40 dpavlin 550
41 dpavlin 1021 use WebPAC::Normalize::MARC;
42 dpavlin 1036 push @EXPORT, ( qw/
43     marc marc_indicators marc_repeatable_subfield
44     marc_compose marc_leader marc_fixed
45     marc_duplicate marc_remove marc_count
46     marc_original_order
47     marc_template
48     /);
49 dpavlin 1021
50 dpavlin 1118 use Storable qw/dclone/;
51    
52 dpavlin 10 =head1 NAME
53    
54 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
55 dpavlin 10
56     =cut
57    
58 dpavlin 1066 our $VERSION = '0.36';
59 dpavlin 10
60     =head1 SYNOPSIS
61    
62 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
63     from input records using perl functions which are specialized for set
64     processing.
65 dpavlin 10
66 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
67     means that you check it's validity before running WebPAC using
68     C<perl -c normalize.pl>.
69 dpavlin 15
70 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
71 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
72 dpavlin 547 C<marc>.
73 dpavlin 15
74 dpavlin 10 =head1 FUNCTIONS
75    
76 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
77     All other functions are available for use within normalisation rules.
78    
79 dpavlin 536 =head2 data_structure
80 dpavlin 10
81 dpavlin 536 Return data structure
82 dpavlin 13
83 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
84 dpavlin 725 lookup => $lookup_hash,
85 dpavlin 536 row => $row,
86     rules => $normalize_pl_config,
87 dpavlin 541 marc_encoding => 'utf-8',
88 dpavlin 595 config => $config,
89 dpavlin 736 load_row_coderef => sub {
90 dpavlin 979 my ($database,$input,$mfn) = @_;
91 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
92 dpavlin 725 },
93 dpavlin 13 );
94    
95 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
96 dpavlin 540 other are optional.
97    
98 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
99 dpavlin 725 die if it's not defined.
100    
101 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
102 dpavlin 15
103 dpavlin 538 Since this function isn't exported you have to call it with
104     C<WebPAC::Normalize::data_structure>.
105    
106 dpavlin 10 =cut
107    
108 dpavlin 736 my $load_row_coderef;
109 dpavlin 725
110 dpavlin 536 sub data_structure {
111     my $arg = {@_};
112 dpavlin 13
113 dpavlin 536 die "need row argument" unless ($arg->{row});
114     die "need normalisation argument" unless ($arg->{rules});
115 dpavlin 31
116 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
117 dpavlin 983 _set_ds( $arg->{row} );
118 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
119 dpavlin 541 _clean_ds( %{ $arg } );
120 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
121 dpavlin 725
122 dpavlin 1011 no strict 'subs';
123     no warnings 'redefine';
124     eval "$arg->{rules};";
125 dpavlin 536 die "error evaling $arg->{rules}: $@\n" if ($@);
126 dpavlin 540
127 dpavlin 538 return _get_ds();
128 dpavlin 10 }
129    
130 dpavlin 983 =head2 _set_ds
131 dpavlin 13
132 dpavlin 536 Set current record hash
133 dpavlin 433
134 dpavlin 983 _set_ds( $rec );
135 dpavlin 433
136     =cut
137    
138 dpavlin 536 my $rec;
139 dpavlin 433
140 dpavlin 983 sub _set_ds {
141 dpavlin 536 $rec = shift or die "no record hash";
142 dpavlin 1036 $WebPAC::Normalize::MARC::rec = $rec;
143 dpavlin 433 }
144    
145 dpavlin 1021 =head2
146    
147     my $rec = _get_rec();
148    
149     =cut
150    
151     sub _get_rec { $rec };
152    
153 dpavlin 595 =head2 _set_config
154    
155     Set current config hash
156    
157     _set_config( $config );
158    
159     Magic keys are:
160    
161     =over 4
162    
163     =item _
164    
165     Code of current database
166    
167     =item _mfn
168    
169     Current MFN
170    
171     =back
172    
173     =cut
174    
175     my $config;
176    
177     sub _set_config {
178     $config = shift;
179     }
180    
181 dpavlin 538 =head2 _get_ds
182    
183     Return hash formatted as data structure
184    
185     my $ds = _get_ds();
186    
187     =cut
188    
189 dpavlin 1036 my $out;
190 dpavlin 538
191     sub _get_ds {
192 dpavlin 982 #warn "## out = ",dump($out);
193 dpavlin 538 return $out;
194     }
195    
196     =head2 _clean_ds
197    
198     Clean data structure hash for next record
199    
200     _clean_ds();
201    
202     =cut
203    
204     sub _clean_ds {
205 dpavlin 541 my $a = {@_};
206 dpavlin 1036 $out = undef;
207     WebPAC::Normalize::MARC::_clean();
208 dpavlin 538 }
209    
210     =head2 _set_lookup
211    
212     Set current lookup hash
213    
214     _set_lookup( $lookup );
215    
216     =cut
217    
218     my $lookup;
219    
220     sub _set_lookup {
221     $lookup = shift;
222     }
223    
224 dpavlin 707 =head2 _get_lookup
225    
226     Get current lookup hash
227    
228     my $lookup = _get_lookup();
229    
230     =cut
231    
232     sub _get_lookup {
233     return $lookup;
234     }
235    
236 dpavlin 736 =head2 _set_load_row
237 dpavlin 725
238     Setup code reference which will return L<data_structure> from
239     L<WebPAC::Store>
240    
241 dpavlin 736 _set_load_row(sub {
242 dpavlin 725 my ($database,$input,$mfn) = @_;
243 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
244 dpavlin 725 });
245    
246     =cut
247    
248 dpavlin 736 sub _set_load_row {
249 dpavlin 725 my $coderef = shift;
250     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
251    
252 dpavlin 736 $load_row_coderef = $coderef;
253 dpavlin 725 }
254    
255 dpavlin 554 =head2 _debug
256    
257     Change level of debug warnings
258    
259     _debug( 2 );
260    
261     =cut
262    
263     sub _debug {
264     my $l = shift;
265     return $debug unless defined($l);
266 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
267 dpavlin 554 $debug = $l;
268 dpavlin 1036 $WebPAC::Normalize::MARC::debug = $debug;
269 dpavlin 554 }
270    
271 dpavlin 540 =head1 Functions to create C<data_structure>
272    
273     Those functions generally have to first in your normalization file.
274    
275 dpavlin 1066 =head2 to
276    
277     Generic way to set values for some name
278    
279     to('field-name', 'name-value' => rec('200','a') );
280    
281     There are many helpers defined below which might be easier to use.
282    
283     =cut
284    
285     sub to {
286     my $type = shift or confess "need type -- BUG?";
287     my $name = shift or confess "needs name as first argument";
288     my @o = grep { defined($_) && $_ ne '' } @_;
289     return unless (@o);
290     $out->{$name}->{$type} = \@o;
291     }
292    
293 dpavlin 912 =head2 search_display
294 dpavlin 433
295 dpavlin 912 Define output for L<search> and L<display> at the same time
296 dpavlin 433
297 dpavlin 912 search_display('Title', rec('200','a') );
298 dpavlin 13
299     =cut
300    
301 dpavlin 912 sub search_display {
302     my $name = shift or die "search_display needs name as first argument";
303 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
304     return unless (@o);
305     $out->{$name}->{search} = \@o;
306     $out->{$name}->{display} = \@o;
307     }
308 dpavlin 13
309 dpavlin 915 =head2 tag
310    
311 dpavlin 1066 Old name for L<search_display>, it will probably be removed at one point.
312 dpavlin 915
313     =cut
314    
315     sub tag {
316     search_display( @_ );
317     }
318    
319 dpavlin 536 =head2 display
320 dpavlin 13
321 dpavlin 912 Define output just for I<display>
322 dpavlin 125
323 dpavlin 536 @v = display('Title', rec('200','a') );
324 dpavlin 125
325 dpavlin 536 =cut
326 dpavlin 125
327 dpavlin 1066 sub display { to( 'display', @_ ) }
328 dpavlin 13
329 dpavlin 536 =head2 search
330 dpavlin 13
331 dpavlin 536 Prepare values just for I<search>
332 dpavlin 13
333 dpavlin 536 @v = search('Title', rec('200','a') );
334 dpavlin 433
335 dpavlin 536 =cut
336 dpavlin 13
337 dpavlin 1066 sub search { to( 'search', @_ ) }
338 dpavlin 13
339 dpavlin 923 =head2 sorted
340    
341     Insert into lists which will be automatically sorted
342    
343     sorted('Title', rec('200','a') );
344    
345     =cut
346    
347 dpavlin 1066 sub sorted { to( 'sorted', @_ ) }
348 dpavlin 923
349    
350 dpavlin 540 =head1 Functions to extract data from input
351    
352     This function should be used inside functions to create C<data_structure> described
353     above.
354    
355 dpavlin 641 =head2 _pack_subfields_hash
356    
357 dpavlin 669 @subfields = _pack_subfields_hash( $h );
358     $subfields = _pack_subfields_hash( $h, 1 );
359 dpavlin 641
360 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
361     with subfields (denoted by C<^>) and values.
362    
363 dpavlin 641 =cut
364    
365     sub _pack_subfields_hash {
366    
367 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
368 dpavlin 641
369 dpavlin 1118 my ($hash,$include_subfields) = @_;
370 dpavlin 641
371 dpavlin 831 # sanity and ease of use
372 dpavlin 1118 return $hash if (ref($hash) ne 'HASH');
373 dpavlin 831
374 dpavlin 1118 my $h = dclone( $hash );
375    
376 dpavlin 641 if ( defined($h->{subfields}) ) {
377     my $sfs = delete $h->{subfields} || die "no subfields?";
378     my @out;
379     while (@$sfs) {
380     my $sf = shift @$sfs;
381     push @out, '^' . $sf if ($include_subfields);
382     my $o = shift @$sfs;
383     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
384     # single element subfields are not arrays
385 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
386 dpavlin 667
387 dpavlin 641 push @out, $h->{$sf};
388     } else {
389 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
390 dpavlin 641 push @out, $h->{$sf}->[$o];
391     }
392     }
393 dpavlin 667 if ($include_subfields) {
394     return join('', @out);
395     } else {
396     return @out;
397     }
398 dpavlin 641 } else {
399 dpavlin 667 if ($include_subfields) {
400     my $out = '';
401 dpavlin 668 foreach my $sf (sort keys %$h) {
402 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
403     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
404     } else {
405     $out .= '^' . $sf . $h->{$sf};
406     }
407     }
408     return $out;
409     } else {
410     # FIXME this should probably be in alphabetical order instead of hash order
411     values %{$h};
412     }
413 dpavlin 641 }
414     }
415    
416 dpavlin 536 =head2 rec1
417 dpavlin 371
418 dpavlin 536 Return all values in some field
419 dpavlin 371
420 dpavlin 536 @v = rec1('200')
421 dpavlin 15
422 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
423 dpavlin 15
424 dpavlin 536 =cut
425 dpavlin 15
426 dpavlin 536 sub rec1 {
427     my $f = shift;
428 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
429 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
430 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
431 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
432 dpavlin 641 my @out;
433     foreach my $h ( @{ $rec->{$f} } ) {
434     if (ref($h) eq 'HASH') {
435     push @out, ( _pack_subfields_hash( $h ) );
436 dpavlin 31 } else {
437 dpavlin 641 push @out, $h;
438 dpavlin 31 }
439 dpavlin 641 }
440     return @out;
441 dpavlin 536 } elsif( defined($rec->{$f}) ) {
442     return $rec->{$f};
443 dpavlin 15 }
444     }
445    
446 dpavlin 536 =head2 rec2
447 dpavlin 15
448 dpavlin 536 Return all values in specific field and subfield
449 dpavlin 13
450 dpavlin 536 @v = rec2('200','a')
451 dpavlin 13
452     =cut
453    
454 dpavlin 536 sub rec2 {
455     my $f = shift;
456     return unless (defined($rec && $rec->{$f}));
457     my $sf = shift;
458 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
459 dpavlin 589 return map {
460     if (ref($_->{$sf}) eq 'ARRAY') {
461     @{ $_->{$sf} };
462     } else {
463     $_->{$sf};
464     }
465 dpavlin 1048 } grep { ref($_) eq 'HASH' && defined $_->{$sf} } @{ $rec->{$f} };
466 dpavlin 536 }
467 dpavlin 13
468 dpavlin 536 =head2 rec
469 dpavlin 13
470 dpavlin 536 syntaxtic sugar for
471 dpavlin 13
472 dpavlin 536 @v = rec('200')
473     @v = rec('200','a')
474 dpavlin 13
475 dpavlin 750 If rec() returns just single value, it will
476     return scalar, not array.
477    
478 dpavlin 536 =cut
479 dpavlin 373
480 dpavlin 536 sub rec {
481 dpavlin 583 my @out;
482 dpavlin 536 if ($#_ == 0) {
483 dpavlin 583 @out = rec1(@_);
484 dpavlin 536 } elsif ($#_ == 1) {
485 dpavlin 583 @out = rec2(@_);
486 dpavlin 13 }
487 dpavlin 750 if ($#out == 0 && ! wantarray) {
488     return $out[0];
489     } elsif (@out) {
490 dpavlin 583 return @out;
491     } else {
492     return '';
493     }
494 dpavlin 13 }
495    
496 dpavlin 1012 =head2 frec
497    
498     Returns first value from field
499    
500     $v = frec('200');
501     $v = frec('200','a');
502    
503     =cut
504    
505     sub frec {
506     my @out = rec(@_);
507     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
508     return shift @out;
509     }
510    
511 dpavlin 1015 =head2 frec_eq
512    
513     =head2 frec_ne
514    
515     Check if first values from two fields are same or different
516    
517     if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
518     # values are same
519     } else {
520     # values are different
521     }
522    
523     Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
524     could write something like:
525    
526     if ( frec( '900','a' ) eq frec( '910','c' ) ) {
527     # yada tada
528     }
529    
530     but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
531     in order to parse text and create invalid function C<eqfrec>.
532    
533     =cut
534    
535     sub frec_eq {
536     my ( $f1,$sf1, $f2, $sf2 ) = @_;
537     return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
538     }
539    
540     sub frec_ne {
541     return ! frec_eq( @_ );
542     }
543    
544 dpavlin 536 =head2 regex
545 dpavlin 15
546 dpavlin 536 Apply regex to some or all values
547 dpavlin 15
548 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
549 dpavlin 15
550     =cut
551    
552 dpavlin 536 sub regex {
553     my $r = shift;
554     my @out;
555 dpavlin 550 #warn "r: $r\n", dump(\@_);
556 dpavlin 536 foreach my $t (@_) {
557     next unless ($t);
558     eval "\$t =~ $r";
559     push @out, $t if ($t && $t ne '');
560 dpavlin 368 }
561 dpavlin 536 return @out;
562 dpavlin 15 }
563    
564 dpavlin 536 =head2 prefix
565 dpavlin 15
566 dpavlin 536 Prefix all values with a string
567 dpavlin 15
568 dpavlin 536 @v = prefix( 'my_', @v );
569 dpavlin 15
570     =cut
571    
572 dpavlin 536 sub prefix {
573 dpavlin 819 my $p = shift;
574     return @_ unless defined( $p );
575 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
576     }
577 dpavlin 15
578 dpavlin 536 =head2 suffix
579 dpavlin 15
580 dpavlin 536 suffix all values with a string
581 dpavlin 15
582 dpavlin 536 @v = suffix( '_my', @v );
583 dpavlin 15
584 dpavlin 536 =cut
585 dpavlin 15
586 dpavlin 536 sub suffix {
587 dpavlin 819 my $s = shift;
588     return @_ unless defined( $s );
589 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
590 dpavlin 15 }
591    
592 dpavlin 536 =head2 surround
593 dpavlin 13
594 dpavlin 536 surround all values with a two strings
595 dpavlin 13
596 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
597 dpavlin 13
598     =cut
599    
600 dpavlin 536 sub surround {
601 dpavlin 819 my $p = shift;
602     my $s = shift;
603     $p = '' unless defined( $p );
604     $s = '' unless defined( $s );
605 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
606 dpavlin 13 }
607    
608 dpavlin 536 =head2 first
609 dpavlin 13
610 dpavlin 536 Return first element
611 dpavlin 15
612 dpavlin 536 $v = first( @v );
613 dpavlin 13
614     =cut
615    
616 dpavlin 536 sub first {
617     my $r = shift;
618     return $r;
619 dpavlin 13 }
620    
621 dpavlin 536 =head2 lookup
622 dpavlin 13
623 dpavlin 536 Consult lookup hashes for some value
624 dpavlin 13
625 dpavlin 725 @v = lookup(
626     sub {
627     'ffkk/peri/mfn'.rec('000')
628     },
629     'ffkk','peri','200-a-200-e',
630     sub {
631     first(rec(200,'a')).' '.first(rec('200','e'))
632     }
633     );
634 dpavlin 13
635 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
636     normal lookup definition in C<conf/lookup/something.pl> which looks like:
637 dpavlin 707
638 dpavlin 725 lookup(
639     # which results to return from record recorded in lookup
640     sub { 'ffkk/peri/mfn' . rec('000') },
641     # from which database and input
642     'ffkk','peri',
643     # such that following values match
644     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
645     # if this part is missing, we will try to match same fields
646     # from lookup record and current one, or you can override
647     # which records to use from current record using
648     sub { rec('900','x') . ' ' . rec('900','y') },
649     )
650    
651     You can think about this lookup as SQL (if that helps):
652    
653     select
654     sub { what }
655     from
656     database, input
657     where
658     sub { filter from lookuped record }
659     having
660     sub { optional filter on current record }
661    
662     Easy as pie, right?
663    
664 dpavlin 13 =cut
665    
666 dpavlin 536 sub lookup {
667 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
668    
669 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
670 dpavlin 725
671 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
672 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
673    
674 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
675 dpavlin 725
676     my $mfns;
677     my @having = $having->();
678    
679 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
680 dpavlin 725
681     foreach my $h ( @having ) {
682     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
683 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
684 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
685     }
686 dpavlin 536 }
687 dpavlin 725
688     return unless ($mfns);
689    
690     my @mfns = sort keys %$mfns;
691    
692 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
693 dpavlin 725
694     my $old_rec = $rec;
695     my @out;
696    
697     foreach my $mfn (@mfns) {
698 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
699 dpavlin 725
700 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
701 dpavlin 725
702     my @vals = $what->();
703    
704     push @out, ( @vals );
705    
706 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
707 dpavlin 725 }
708    
709     # if (ref($lookup->{$k}) eq 'ARRAY') {
710     # return @{ $lookup->{$k} };
711     # } else {
712     # return $lookup->{$k};
713     # }
714    
715     $rec = $old_rec;
716    
717 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
718 dpavlin 725
719 dpavlin 740 if ($#out == 0) {
720     return $out[0];
721     } else {
722     return @out;
723     }
724 dpavlin 13 }
725    
726 dpavlin 707 =head2 save_into_lookup
727    
728 dpavlin 725 Save value into lookup. It associates current database, input
729     and specific keys with one or more values which will be
730     associated over MFN.
731 dpavlin 707
732 dpavlin 725 MFN will be extracted from first occurence current of field 000
733     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
734    
735     my $nr = save_into_lookup($database,$input,$key,sub {
736 dpavlin 707 # code which produce one or more values
737     });
738    
739 dpavlin 725 It returns number of items saved.
740 dpavlin 707
741 dpavlin 725 This function shouldn't be called directly, it's called from code created by
742     L<WebPAC::Parser>.
743    
744 dpavlin 707 =cut
745    
746     sub save_into_lookup {
747 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
748     die "save_into_lookup needs database" unless defined($database);
749     die "save_into_lookup needs input" unless defined($input);
750     die "save_into_lookup needs key" unless defined($key);
751 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
752 dpavlin 725
753 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
754 dpavlin 725
755     my $mfn =
756     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
757     defined($config->{_mfn}) ? $config->{_mfn} :
758     die "mfn not defined or zero";
759    
760     my $nr = 0;
761    
762 dpavlin 707 foreach my $v ( $coderef->() ) {
763 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
764 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
765 dpavlin 725 $nr++;
766 dpavlin 707 }
767 dpavlin 725
768     return $nr;
769 dpavlin 707 }
770    
771 dpavlin 595 =head2 config
772    
773     Consult config values stored in C<config.yml>
774    
775     # return database code (key under databases in yaml)
776     $database_code = config(); # use _ from hash
777     $database_name = config('name');
778     $database_input_name = config('input name');
779    
780     Up to three levels are supported.
781    
782     =cut
783    
784     sub config {
785     return unless ($config);
786    
787     my $p = shift;
788    
789     $p ||= '';
790    
791     my $v;
792    
793     warn "### getting config($p)\n" if ($debug > 1);
794    
795     my @p = split(/\s+/,$p);
796     if ($#p < 0) {
797     $v = $config->{ '_' }; # special, database code
798     } else {
799    
800     my $c = dclone( $config );
801    
802     foreach my $k (@p) {
803     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
804     if (ref($c) eq 'ARRAY') {
805     $c = shift @$c;
806     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
807     last;
808     }
809    
810     if (! defined($c->{$k}) ) {
811     $c = undef;
812     last;
813     } else {
814     $c = $c->{$k};
815     }
816     }
817     $v = $c if ($c);
818    
819     }
820    
821     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
822     warn "config( '$p' ) is empty\n" if (! $v);
823    
824     return $v;
825     }
826    
827     =head2 id
828    
829     Returns unique id of this record
830    
831     $id = id();
832    
833     Returns C<42/2> for 2nd occurence of MFN 42.
834    
835     =cut
836    
837     sub id {
838     my $mfn = $config->{_mfn} || die "no _mfn in config data";
839 dpavlin 1036 return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
840 dpavlin 595 }
841    
842 dpavlin 536 =head2 join_with
843 dpavlin 13
844 dpavlin 536 Joins walues with some delimiter
845 dpavlin 10
846 dpavlin 536 $v = join_with(", ", @v);
847 dpavlin 10
848 dpavlin 536 =cut
849 dpavlin 10
850 dpavlin 536 sub join_with {
851     my $d = shift;
852 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
853 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
854     return '' unless defined($v);
855     return $v;
856 dpavlin 536 }
857 dpavlin 10
858 dpavlin 562 =head2 split_rec_on
859    
860     Split record subfield on some regex and take one of parts out
861    
862     $a_before_semi_column =
863     split_rec_on('200','a', /\s*;\s*/, $part);
864    
865     C<$part> is optional number of element. First element is
866     B<1>, not 0!
867    
868     If there is no C<$part> parameter or C<$part> is 0, this function will
869     return all values produced by splitting.
870    
871     =cut
872    
873     sub split_rec_on {
874     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
875    
876     my ($fld, $sf, $regex, $part) = @_;
877 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
878 dpavlin 562
879     my @r = rec( $fld, $sf );
880     my $v = shift @r;
881 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
882 dpavlin 562
883 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
884 dpavlin 566
885 dpavlin 562 my @s = split( $regex, $v );
886 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
887 dpavlin 566 if ($part && $part > 0) {
888 dpavlin 562 return $s[ $part - 1 ];
889     } else {
890 dpavlin 571 return @s;
891 dpavlin 562 }
892     }
893    
894 dpavlin 785 my $hash;
895    
896     =head2 set
897    
898     set( key => 'value' );
899    
900     =cut
901    
902     sub set {
903     my ($k,$v) = @_;
904 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
905 dpavlin 785 $hash->{$k} = $v;
906     };
907    
908     =head2 get
909    
910     get( 'key' );
911    
912     =cut
913    
914     sub get {
915     my $k = shift || return;
916     my $v = $hash->{$k};
917 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
918 dpavlin 785 return $v;
919     }
920    
921 dpavlin 791 =head2 count
922 dpavlin 785
923 dpavlin 791 if ( count( @result ) == 1 ) {
924     # do something if only 1 result is there
925     }
926    
927     =cut
928    
929     sub count {
930 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
931 dpavlin 791 return @_ . '';
932     }
933    
934 dpavlin 536 # END
935     1;

  ViewVC Help
Powered by ViewVC 1.1.26