/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1216 - (hide annotations)
Tue Jun 2 13:17:24 2009 UTC (14 years, 10 months ago) by dpavlin
File size: 18311 byte(s)
fix pod example for rec_array

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 1066 to
11 dpavlin 923 search_display search display sorted
12 dpavlin 912
13 dpavlin 536 rec1 rec2 rec
14 dpavlin 1015 frec frec_eq frec_ne
15 dpavlin 536 regex prefix suffix surround
16     first lookup join_with
17 dpavlin 707 save_into_lookup
18 dpavlin 562
19     split_rec_on
20 dpavlin 785
21     get set
22 dpavlin 791 count
23 dpavlin 980
24 dpavlin 1206 row
25     rec_array
26    
27 dpavlin 536 /;
28 dpavlin 10
29     use warnings;
30     use strict;
31 dpavlin 536
32     #use base qw/WebPAC::Common/;
33 dpavlin 550 use Data::Dump qw/dump/;
34 dpavlin 725 use Carp qw/confess/;
35 dpavlin 10
36 dpavlin 550 # debugging warn(s)
37     my $debug = 0;
38 dpavlin 1037 _debug( $debug );
39 dpavlin 550
40 dpavlin 1021 # FIXME
41 dpavlin 980 use WebPAC::Normalize::ISBN;
42     push @EXPORT, ( 'isbn_10', 'isbn_13' );
43 dpavlin 550
44 dpavlin 1021 use WebPAC::Normalize::MARC;
45 dpavlin 1036 push @EXPORT, ( qw/
46     marc marc_indicators marc_repeatable_subfield
47     marc_compose marc_leader marc_fixed
48     marc_duplicate marc_remove marc_count
49     marc_original_order
50     marc_template
51     /);
52 dpavlin 1021
53 dpavlin 1118 use Storable qw/dclone/;
54    
55 dpavlin 10 =head1 NAME
56    
57 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
58 dpavlin 10
59     =cut
60    
61 dpavlin 1066 our $VERSION = '0.36';
62 dpavlin 10
63     =head1 SYNOPSIS
64    
65 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
66     from input records using perl functions which are specialized for set
67     processing.
68 dpavlin 10
69 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
70     means that you check it's validity before running WebPAC using
71     C<perl -c normalize.pl>.
72 dpavlin 15
73 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
74 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
75 dpavlin 547 C<marc>.
76 dpavlin 15
77 dpavlin 10 =head1 FUNCTIONS
78    
79 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
80     All other functions are available for use within normalisation rules.
81    
82 dpavlin 536 =head2 data_structure
83 dpavlin 10
84 dpavlin 536 Return data structure
85 dpavlin 13
86 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
87 dpavlin 725 lookup => $lookup_hash,
88 dpavlin 536 row => $row,
89     rules => $normalize_pl_config,
90 dpavlin 541 marc_encoding => 'utf-8',
91 dpavlin 595 config => $config,
92 dpavlin 736 load_row_coderef => sub {
93 dpavlin 979 my ($database,$input,$mfn) = @_;
94 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
95 dpavlin 725 },
96 dpavlin 13 );
97    
98 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
99 dpavlin 540 other are optional.
100    
101 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
102 dpavlin 725 die if it's not defined.
103    
104 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
105 dpavlin 15
106 dpavlin 538 Since this function isn't exported you have to call it with
107     C<WebPAC::Normalize::data_structure>.
108    
109 dpavlin 10 =cut
110    
111 dpavlin 736 my $load_row_coderef;
112 dpavlin 725
113 dpavlin 536 sub data_structure {
114     my $arg = {@_};
115 dpavlin 13
116 dpavlin 536 die "need row argument" unless ($arg->{row});
117     die "need normalisation argument" unless ($arg->{rules});
118 dpavlin 31
119 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
120 dpavlin 983 _set_ds( $arg->{row} );
121 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
122 dpavlin 541 _clean_ds( %{ $arg } );
123 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
124 dpavlin 725
125 dpavlin 1011 no strict 'subs';
126     no warnings 'redefine';
127     eval "$arg->{rules};";
128 dpavlin 536 die "error evaling $arg->{rules}: $@\n" if ($@);
129 dpavlin 540
130 dpavlin 538 return _get_ds();
131 dpavlin 10 }
132    
133 dpavlin 983 =head2 _set_ds
134 dpavlin 13
135 dpavlin 536 Set current record hash
136 dpavlin 433
137 dpavlin 983 _set_ds( $rec );
138 dpavlin 433
139     =cut
140    
141 dpavlin 536 my $rec;
142 dpavlin 433
143 dpavlin 983 sub _set_ds {
144 dpavlin 536 $rec = shift or die "no record hash";
145 dpavlin 1036 $WebPAC::Normalize::MARC::rec = $rec;
146 dpavlin 433 }
147    
148 dpavlin 1021 =head2
149    
150     my $rec = _get_rec();
151    
152     =cut
153    
154     sub _get_rec { $rec };
155    
156 dpavlin 595 =head2 _set_config
157    
158     Set current config hash
159    
160     _set_config( $config );
161    
162     Magic keys are:
163    
164     =over 4
165    
166     =item _
167    
168     Code of current database
169    
170     =item _mfn
171    
172     Current MFN
173    
174     =back
175    
176     =cut
177    
178     my $config;
179    
180     sub _set_config {
181     $config = shift;
182     }
183    
184 dpavlin 538 =head2 _get_ds
185    
186     Return hash formatted as data structure
187    
188     my $ds = _get_ds();
189    
190     =cut
191    
192 dpavlin 1036 my $out;
193 dpavlin 538
194     sub _get_ds {
195 dpavlin 982 #warn "## out = ",dump($out);
196 dpavlin 538 return $out;
197     }
198    
199     =head2 _clean_ds
200    
201     Clean data structure hash for next record
202    
203     _clean_ds();
204    
205     =cut
206    
207     sub _clean_ds {
208 dpavlin 541 my $a = {@_};
209 dpavlin 1036 $out = undef;
210     WebPAC::Normalize::MARC::_clean();
211 dpavlin 538 }
212    
213     =head2 _set_lookup
214    
215     Set current lookup hash
216    
217     _set_lookup( $lookup );
218    
219     =cut
220    
221     my $lookup;
222    
223     sub _set_lookup {
224     $lookup = shift;
225     }
226    
227 dpavlin 707 =head2 _get_lookup
228    
229     Get current lookup hash
230    
231     my $lookup = _get_lookup();
232    
233     =cut
234    
235     sub _get_lookup {
236     return $lookup;
237     }
238    
239 dpavlin 736 =head2 _set_load_row
240 dpavlin 725
241     Setup code reference which will return L<data_structure> from
242     L<WebPAC::Store>
243    
244 dpavlin 736 _set_load_row(sub {
245 dpavlin 725 my ($database,$input,$mfn) = @_;
246 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
247 dpavlin 725 });
248    
249     =cut
250    
251 dpavlin 736 sub _set_load_row {
252 dpavlin 725 my $coderef = shift;
253     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
254    
255 dpavlin 736 $load_row_coderef = $coderef;
256 dpavlin 725 }
257    
258 dpavlin 554 =head2 _debug
259    
260     Change level of debug warnings
261    
262     _debug( 2 );
263    
264     =cut
265    
266     sub _debug {
267     my $l = shift;
268     return $debug unless defined($l);
269 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
270 dpavlin 554 $debug = $l;
271 dpavlin 1036 $WebPAC::Normalize::MARC::debug = $debug;
272 dpavlin 554 }
273    
274 dpavlin 540 =head1 Functions to create C<data_structure>
275    
276     Those functions generally have to first in your normalization file.
277    
278 dpavlin 1066 =head2 to
279    
280     Generic way to set values for some name
281    
282     to('field-name', 'name-value' => rec('200','a') );
283    
284     There are many helpers defined below which might be easier to use.
285    
286     =cut
287    
288     sub to {
289     my $type = shift or confess "need type -- BUG?";
290     my $name = shift or confess "needs name as first argument";
291     my @o = grep { defined($_) && $_ ne '' } @_;
292     return unless (@o);
293     $out->{$name}->{$type} = \@o;
294     }
295    
296 dpavlin 912 =head2 search_display
297 dpavlin 433
298 dpavlin 912 Define output for L<search> and L<display> at the same time
299 dpavlin 433
300 dpavlin 912 search_display('Title', rec('200','a') );
301 dpavlin 13
302     =cut
303    
304 dpavlin 912 sub search_display {
305     my $name = shift or die "search_display needs name as first argument";
306 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
307     return unless (@o);
308     $out->{$name}->{search} = \@o;
309     $out->{$name}->{display} = \@o;
310     }
311 dpavlin 13
312 dpavlin 915 =head2 tag
313    
314 dpavlin 1066 Old name for L<search_display>, it will probably be removed at one point.
315 dpavlin 915
316     =cut
317    
318     sub tag {
319     search_display( @_ );
320     }
321    
322 dpavlin 536 =head2 display
323 dpavlin 13
324 dpavlin 912 Define output just for I<display>
325 dpavlin 125
326 dpavlin 536 @v = display('Title', rec('200','a') );
327 dpavlin 125
328 dpavlin 536 =cut
329 dpavlin 125
330 dpavlin 1066 sub display { to( 'display', @_ ) }
331 dpavlin 13
332 dpavlin 536 =head2 search
333 dpavlin 13
334 dpavlin 536 Prepare values just for I<search>
335 dpavlin 13
336 dpavlin 536 @v = search('Title', rec('200','a') );
337 dpavlin 433
338 dpavlin 536 =cut
339 dpavlin 13
340 dpavlin 1066 sub search { to( 'search', @_ ) }
341 dpavlin 13
342 dpavlin 923 =head2 sorted
343    
344     Insert into lists which will be automatically sorted
345    
346     sorted('Title', rec('200','a') );
347    
348     =cut
349    
350 dpavlin 1066 sub sorted { to( 'sorted', @_ ) }
351 dpavlin 923
352 dpavlin 1205 =head2 row
353 dpavlin 923
354 dpavlin 1205 Insert new row of data into output module
355    
356     row( column => 'foo', column2 => 'bar' );
357    
358     =cut
359    
360     use Data::Dump qw/dump/;
361    
362     sub row {
363 dpavlin 1210 die "array doesn't have odd number of elements but $#_: ",dump( @_ ) if $#_ % 2 == 1;
364     my $table = shift @_;
365     push @{ $out->{'_rows'}->{$table} }, {@_};
366 dpavlin 1205 }
367    
368    
369 dpavlin 540 =head1 Functions to extract data from input
370    
371     This function should be used inside functions to create C<data_structure> described
372     above.
373    
374 dpavlin 641 =head2 _pack_subfields_hash
375    
376 dpavlin 669 @subfields = _pack_subfields_hash( $h );
377     $subfields = _pack_subfields_hash( $h, 1 );
378 dpavlin 641
379 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
380     with subfields (denoted by C<^>) and values.
381    
382 dpavlin 641 =cut
383    
384     sub _pack_subfields_hash {
385    
386 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
387 dpavlin 641
388 dpavlin 1118 my ($hash,$include_subfields) = @_;
389 dpavlin 641
390 dpavlin 831 # sanity and ease of use
391 dpavlin 1118 return $hash if (ref($hash) ne 'HASH');
392 dpavlin 831
393 dpavlin 1118 my $h = dclone( $hash );
394    
395 dpavlin 641 if ( defined($h->{subfields}) ) {
396     my $sfs = delete $h->{subfields} || die "no subfields?";
397     my @out;
398     while (@$sfs) {
399     my $sf = shift @$sfs;
400     push @out, '^' . $sf if ($include_subfields);
401     my $o = shift @$sfs;
402     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
403     # single element subfields are not arrays
404 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
405 dpavlin 667
406 dpavlin 641 push @out, $h->{$sf};
407     } else {
408 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
409 dpavlin 641 push @out, $h->{$sf}->[$o];
410     }
411     }
412 dpavlin 667 if ($include_subfields) {
413     return join('', @out);
414     } else {
415     return @out;
416     }
417 dpavlin 641 } else {
418 dpavlin 667 if ($include_subfields) {
419     my $out = '';
420 dpavlin 668 foreach my $sf (sort keys %$h) {
421 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
422     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
423     } else {
424     $out .= '^' . $sf . $h->{$sf};
425     }
426     }
427     return $out;
428     } else {
429     # FIXME this should probably be in alphabetical order instead of hash order
430     values %{$h};
431     }
432 dpavlin 641 }
433     }
434    
435 dpavlin 536 =head2 rec1
436 dpavlin 371
437 dpavlin 536 Return all values in some field
438 dpavlin 371
439 dpavlin 536 @v = rec1('200')
440 dpavlin 15
441 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
442 dpavlin 15
443 dpavlin 536 =cut
444 dpavlin 15
445 dpavlin 536 sub rec1 {
446     my $f = shift;
447 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
448 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
449 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
450 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
451 dpavlin 641 my @out;
452     foreach my $h ( @{ $rec->{$f} } ) {
453     if (ref($h) eq 'HASH') {
454     push @out, ( _pack_subfields_hash( $h ) );
455 dpavlin 31 } else {
456 dpavlin 641 push @out, $h;
457 dpavlin 31 }
458 dpavlin 641 }
459     return @out;
460 dpavlin 536 } elsif( defined($rec->{$f}) ) {
461     return $rec->{$f};
462 dpavlin 15 }
463     }
464    
465 dpavlin 536 =head2 rec2
466 dpavlin 15
467 dpavlin 536 Return all values in specific field and subfield
468 dpavlin 13
469 dpavlin 536 @v = rec2('200','a')
470 dpavlin 13
471     =cut
472    
473 dpavlin 536 sub rec2 {
474     my $f = shift;
475     return unless (defined($rec && $rec->{$f}));
476     my $sf = shift;
477 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
478 dpavlin 589 return map {
479     if (ref($_->{$sf}) eq 'ARRAY') {
480     @{ $_->{$sf} };
481     } else {
482     $_->{$sf};
483     }
484 dpavlin 1048 } grep { ref($_) eq 'HASH' && defined $_->{$sf} } @{ $rec->{$f} };
485 dpavlin 536 }
486 dpavlin 13
487 dpavlin 536 =head2 rec
488 dpavlin 13
489 dpavlin 536 syntaxtic sugar for
490 dpavlin 13
491 dpavlin 536 @v = rec('200')
492     @v = rec('200','a')
493 dpavlin 13
494 dpavlin 750 If rec() returns just single value, it will
495     return scalar, not array.
496    
497 dpavlin 536 =cut
498 dpavlin 373
499 dpavlin 536 sub rec {
500 dpavlin 583 my @out;
501 dpavlin 536 if ($#_ == 0) {
502 dpavlin 583 @out = rec1(@_);
503 dpavlin 536 } elsif ($#_ == 1) {
504 dpavlin 583 @out = rec2(@_);
505 dpavlin 13 }
506 dpavlin 750 if ($#out == 0 && ! wantarray) {
507     return $out[0];
508     } elsif (@out) {
509 dpavlin 583 return @out;
510     } else {
511     return '';
512     }
513 dpavlin 13 }
514    
515 dpavlin 1012 =head2 frec
516    
517     Returns first value from field
518    
519     $v = frec('200');
520     $v = frec('200','a');
521    
522     =cut
523    
524     sub frec {
525     my @out = rec(@_);
526     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
527     return shift @out;
528     }
529    
530 dpavlin 1015 =head2 frec_eq
531    
532     =head2 frec_ne
533    
534     Check if first values from two fields are same or different
535    
536     if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
537     # values are same
538     } else {
539     # values are different
540     }
541    
542     Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
543     could write something like:
544    
545     if ( frec( '900','a' ) eq frec( '910','c' ) ) {
546     # yada tada
547     }
548    
549     but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
550     in order to parse text and create invalid function C<eqfrec>.
551    
552     =cut
553    
554     sub frec_eq {
555     my ( $f1,$sf1, $f2, $sf2 ) = @_;
556     return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
557     }
558    
559     sub frec_ne {
560     return ! frec_eq( @_ );
561     }
562    
563 dpavlin 536 =head2 regex
564 dpavlin 15
565 dpavlin 536 Apply regex to some or all values
566 dpavlin 15
567 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
568 dpavlin 15
569     =cut
570    
571 dpavlin 536 sub regex {
572     my $r = shift;
573     my @out;
574 dpavlin 550 #warn "r: $r\n", dump(\@_);
575 dpavlin 536 foreach my $t (@_) {
576     next unless ($t);
577     eval "\$t =~ $r";
578     push @out, $t if ($t && $t ne '');
579 dpavlin 368 }
580 dpavlin 536 return @out;
581 dpavlin 15 }
582    
583 dpavlin 536 =head2 prefix
584 dpavlin 15
585 dpavlin 536 Prefix all values with a string
586 dpavlin 15
587 dpavlin 536 @v = prefix( 'my_', @v );
588 dpavlin 15
589     =cut
590    
591 dpavlin 536 sub prefix {
592 dpavlin 819 my $p = shift;
593     return @_ unless defined( $p );
594 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
595     }
596 dpavlin 15
597 dpavlin 536 =head2 suffix
598 dpavlin 15
599 dpavlin 536 suffix all values with a string
600 dpavlin 15
601 dpavlin 536 @v = suffix( '_my', @v );
602 dpavlin 15
603 dpavlin 536 =cut
604 dpavlin 15
605 dpavlin 536 sub suffix {
606 dpavlin 819 my $s = shift;
607     return @_ unless defined( $s );
608 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
609 dpavlin 15 }
610    
611 dpavlin 536 =head2 surround
612 dpavlin 13
613 dpavlin 536 surround all values with a two strings
614 dpavlin 13
615 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
616 dpavlin 13
617     =cut
618    
619 dpavlin 536 sub surround {
620 dpavlin 819 my $p = shift;
621     my $s = shift;
622     $p = '' unless defined( $p );
623     $s = '' unless defined( $s );
624 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
625 dpavlin 13 }
626    
627 dpavlin 536 =head2 first
628 dpavlin 13
629 dpavlin 536 Return first element
630 dpavlin 15
631 dpavlin 536 $v = first( @v );
632 dpavlin 13
633     =cut
634    
635 dpavlin 536 sub first {
636     my $r = shift;
637     return $r;
638 dpavlin 13 }
639    
640 dpavlin 536 =head2 lookup
641 dpavlin 13
642 dpavlin 536 Consult lookup hashes for some value
643 dpavlin 13
644 dpavlin 725 @v = lookup(
645     sub {
646     'ffkk/peri/mfn'.rec('000')
647     },
648     'ffkk','peri','200-a-200-e',
649     sub {
650     first(rec(200,'a')).' '.first(rec('200','e'))
651     }
652     );
653 dpavlin 13
654 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
655     normal lookup definition in C<conf/lookup/something.pl> which looks like:
656 dpavlin 707
657 dpavlin 725 lookup(
658     # which results to return from record recorded in lookup
659     sub { 'ffkk/peri/mfn' . rec('000') },
660     # from which database and input
661     'ffkk','peri',
662     # such that following values match
663     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
664     # if this part is missing, we will try to match same fields
665     # from lookup record and current one, or you can override
666     # which records to use from current record using
667     sub { rec('900','x') . ' ' . rec('900','y') },
668     )
669    
670     You can think about this lookup as SQL (if that helps):
671    
672     select
673     sub { what }
674     from
675     database, input
676     where
677     sub { filter from lookuped record }
678     having
679     sub { optional filter on current record }
680    
681     Easy as pie, right?
682    
683 dpavlin 13 =cut
684    
685 dpavlin 536 sub lookup {
686 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
687    
688 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
689 dpavlin 725
690 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
691 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
692    
693 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
694 dpavlin 725
695     my $mfns;
696     my @having = $having->();
697    
698 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
699 dpavlin 725
700     foreach my $h ( @having ) {
701     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
702 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
703 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
704     }
705 dpavlin 536 }
706 dpavlin 725
707     return unless ($mfns);
708    
709     my @mfns = sort keys %$mfns;
710    
711 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
712 dpavlin 725
713     my $old_rec = $rec;
714     my @out;
715    
716     foreach my $mfn (@mfns) {
717 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
718 dpavlin 725
719 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
720 dpavlin 725
721     my @vals = $what->();
722    
723     push @out, ( @vals );
724    
725 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
726 dpavlin 725 }
727    
728     # if (ref($lookup->{$k}) eq 'ARRAY') {
729     # return @{ $lookup->{$k} };
730     # } else {
731     # return $lookup->{$k};
732     # }
733    
734     $rec = $old_rec;
735    
736 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
737 dpavlin 725
738 dpavlin 740 if ($#out == 0) {
739     return $out[0];
740     } else {
741     return @out;
742     }
743 dpavlin 13 }
744    
745 dpavlin 707 =head2 save_into_lookup
746    
747 dpavlin 725 Save value into lookup. It associates current database, input
748     and specific keys with one or more values which will be
749     associated over MFN.
750 dpavlin 707
751 dpavlin 725 MFN will be extracted from first occurence current of field 000
752     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
753    
754     my $nr = save_into_lookup($database,$input,$key,sub {
755 dpavlin 707 # code which produce one or more values
756     });
757    
758 dpavlin 725 It returns number of items saved.
759 dpavlin 707
760 dpavlin 725 This function shouldn't be called directly, it's called from code created by
761     L<WebPAC::Parser>.
762    
763 dpavlin 707 =cut
764    
765     sub save_into_lookup {
766 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
767     die "save_into_lookup needs database" unless defined($database);
768     die "save_into_lookup needs input" unless defined($input);
769     die "save_into_lookup needs key" unless defined($key);
770 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
771 dpavlin 725
772 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
773 dpavlin 725
774     my $mfn =
775     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
776     defined($config->{_mfn}) ? $config->{_mfn} :
777     die "mfn not defined or zero";
778    
779     my $nr = 0;
780    
781 dpavlin 707 foreach my $v ( $coderef->() ) {
782 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
783 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
784 dpavlin 725 $nr++;
785 dpavlin 707 }
786 dpavlin 725
787     return $nr;
788 dpavlin 707 }
789    
790 dpavlin 595 =head2 config
791    
792     Consult config values stored in C<config.yml>
793    
794     # return database code (key under databases in yaml)
795     $database_code = config(); # use _ from hash
796     $database_name = config('name');
797     $database_input_name = config('input name');
798    
799     Up to three levels are supported.
800    
801     =cut
802    
803     sub config {
804     return unless ($config);
805    
806     my $p = shift;
807    
808     $p ||= '';
809    
810     my $v;
811    
812     warn "### getting config($p)\n" if ($debug > 1);
813    
814     my @p = split(/\s+/,$p);
815     if ($#p < 0) {
816     $v = $config->{ '_' }; # special, database code
817     } else {
818    
819     my $c = dclone( $config );
820    
821     foreach my $k (@p) {
822     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
823     if (ref($c) eq 'ARRAY') {
824     $c = shift @$c;
825     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
826     last;
827     }
828    
829     if (! defined($c->{$k}) ) {
830     $c = undef;
831     last;
832     } else {
833     $c = $c->{$k};
834     }
835     }
836     $v = $c if ($c);
837    
838     }
839    
840     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
841     warn "config( '$p' ) is empty\n" if (! $v);
842    
843     return $v;
844     }
845    
846     =head2 id
847    
848     Returns unique id of this record
849    
850     $id = id();
851    
852     Returns C<42/2> for 2nd occurence of MFN 42.
853    
854     =cut
855    
856     sub id {
857     my $mfn = $config->{_mfn} || die "no _mfn in config data";
858 dpavlin 1036 return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
859 dpavlin 595 }
860    
861 dpavlin 536 =head2 join_with
862 dpavlin 13
863 dpavlin 536 Joins walues with some delimiter
864 dpavlin 10
865 dpavlin 536 $v = join_with(", ", @v);
866 dpavlin 10
867 dpavlin 536 =cut
868 dpavlin 10
869 dpavlin 536 sub join_with {
870     my $d = shift;
871 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
872 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
873     return '' unless defined($v);
874     return $v;
875 dpavlin 536 }
876 dpavlin 10
877 dpavlin 562 =head2 split_rec_on
878    
879     Split record subfield on some regex and take one of parts out
880    
881     $a_before_semi_column =
882     split_rec_on('200','a', /\s*;\s*/, $part);
883    
884     C<$part> is optional number of element. First element is
885     B<1>, not 0!
886    
887     If there is no C<$part> parameter or C<$part> is 0, this function will
888     return all values produced by splitting.
889    
890     =cut
891    
892     sub split_rec_on {
893     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
894    
895     my ($fld, $sf, $regex, $part) = @_;
896 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
897 dpavlin 562
898     my @r = rec( $fld, $sf );
899     my $v = shift @r;
900 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
901 dpavlin 562
902 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
903 dpavlin 566
904 dpavlin 562 my @s = split( $regex, $v );
905 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
906 dpavlin 566 if ($part && $part > 0) {
907 dpavlin 562 return $s[ $part - 1 ];
908     } else {
909 dpavlin 571 return @s;
910 dpavlin 562 }
911     }
912    
913 dpavlin 785 my $hash;
914    
915     =head2 set
916    
917     set( key => 'value' );
918    
919     =cut
920    
921     sub set {
922     my ($k,$v) = @_;
923 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
924 dpavlin 785 $hash->{$k} = $v;
925     };
926    
927     =head2 get
928    
929     get( 'key' );
930    
931     =cut
932    
933     sub get {
934     my $k = shift || return;
935     my $v = $hash->{$k};
936 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
937 dpavlin 785 return $v;
938     }
939    
940 dpavlin 791 =head2 count
941 dpavlin 785
942 dpavlin 791 if ( count( @result ) == 1 ) {
943     # do something if only 1 result is there
944     }
945    
946     =cut
947    
948     sub count {
949 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
950 dpavlin 791 return @_ . '';
951     }
952    
953 dpavlin 1214 =head2 rec_array
954    
955     Always return field as array
956    
957 dpavlin 1216 foreach my $d ( rec_array('field') ) {
958 dpavlin 1214 warn $d;
959     }
960    
961     =cut
962    
963     sub rec_array {
964     my $d = $rec->{ $_[0] };
965     return @$d if ref($d) eq 'ARRAY';
966     return ($d);
967     }
968    
969 dpavlin 536 # END
970     1;

  ViewVC Help
Powered by ViewVC 1.1.26