/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1205 - (hide annotations)
Fri May 29 20:32:13 2009 UTC (14 years, 11 months ago) by dpavlin
File size: 18182 byte(s)
 r1896@llin:  dpavlin | 2009-05-29 22:32:12 +0200
 added rec_array and row and small example how to create
 multiple rows from single record in input file

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 1066 to
11 dpavlin 923 search_display search display sorted
12 dpavlin 912
13 dpavlin 536 rec1 rec2 rec
14 dpavlin 1015 frec frec_eq frec_ne
15 dpavlin 536 regex prefix suffix surround
16     first lookup join_with
17 dpavlin 707 save_into_lookup
18 dpavlin 562
19     split_rec_on
20 dpavlin 785
21     get set
22 dpavlin 791 count
23 dpavlin 980
24 dpavlin 536 /;
25 dpavlin 10
26     use warnings;
27     use strict;
28 dpavlin 536
29     #use base qw/WebPAC::Common/;
30 dpavlin 550 use Data::Dump qw/dump/;
31 dpavlin 725 use Carp qw/confess/;
32 dpavlin 10
33 dpavlin 550 # debugging warn(s)
34     my $debug = 0;
35 dpavlin 1037 _debug( $debug );
36 dpavlin 550
37 dpavlin 1021 # FIXME
38 dpavlin 980 use WebPAC::Normalize::ISBN;
39     push @EXPORT, ( 'isbn_10', 'isbn_13' );
40 dpavlin 550
41 dpavlin 1021 use WebPAC::Normalize::MARC;
42 dpavlin 1036 push @EXPORT, ( qw/
43     marc marc_indicators marc_repeatable_subfield
44     marc_compose marc_leader marc_fixed
45     marc_duplicate marc_remove marc_count
46     marc_original_order
47     marc_template
48     /);
49 dpavlin 1021
50 dpavlin 1118 use Storable qw/dclone/;
51    
52 dpavlin 10 =head1 NAME
53    
54 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
55 dpavlin 10
56     =cut
57    
58 dpavlin 1066 our $VERSION = '0.36';
59 dpavlin 10
60     =head1 SYNOPSIS
61    
62 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
63     from input records using perl functions which are specialized for set
64     processing.
65 dpavlin 10
66 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
67     means that you check it's validity before running WebPAC using
68     C<perl -c normalize.pl>.
69 dpavlin 15
70 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
71 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
72 dpavlin 547 C<marc>.
73 dpavlin 15
74 dpavlin 10 =head1 FUNCTIONS
75    
76 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
77     All other functions are available for use within normalisation rules.
78    
79 dpavlin 536 =head2 data_structure
80 dpavlin 10
81 dpavlin 536 Return data structure
82 dpavlin 13
83 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
84 dpavlin 725 lookup => $lookup_hash,
85 dpavlin 536 row => $row,
86     rules => $normalize_pl_config,
87 dpavlin 541 marc_encoding => 'utf-8',
88 dpavlin 595 config => $config,
89 dpavlin 736 load_row_coderef => sub {
90 dpavlin 979 my ($database,$input,$mfn) = @_;
91 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
92 dpavlin 725 },
93 dpavlin 13 );
94    
95 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
96 dpavlin 540 other are optional.
97    
98 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
99 dpavlin 725 die if it's not defined.
100    
101 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
102 dpavlin 15
103 dpavlin 538 Since this function isn't exported you have to call it with
104     C<WebPAC::Normalize::data_structure>.
105    
106 dpavlin 10 =cut
107    
108 dpavlin 736 my $load_row_coderef;
109 dpavlin 725
110 dpavlin 536 sub data_structure {
111     my $arg = {@_};
112 dpavlin 13
113 dpavlin 536 die "need row argument" unless ($arg->{row});
114     die "need normalisation argument" unless ($arg->{rules});
115 dpavlin 31
116 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
117 dpavlin 983 _set_ds( $arg->{row} );
118 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
119 dpavlin 541 _clean_ds( %{ $arg } );
120 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
121 dpavlin 725
122 dpavlin 1011 no strict 'subs';
123     no warnings 'redefine';
124     eval "$arg->{rules};";
125 dpavlin 536 die "error evaling $arg->{rules}: $@\n" if ($@);
126 dpavlin 540
127 dpavlin 538 return _get_ds();
128 dpavlin 10 }
129    
130 dpavlin 983 =head2 _set_ds
131 dpavlin 13
132 dpavlin 536 Set current record hash
133 dpavlin 433
134 dpavlin 983 _set_ds( $rec );
135 dpavlin 433
136     =cut
137    
138 dpavlin 536 my $rec;
139 dpavlin 433
140 dpavlin 983 sub _set_ds {
141 dpavlin 536 $rec = shift or die "no record hash";
142 dpavlin 1036 $WebPAC::Normalize::MARC::rec = $rec;
143 dpavlin 433 }
144    
145 dpavlin 1021 =head2
146    
147     my $rec = _get_rec();
148    
149     =cut
150    
151     sub _get_rec { $rec };
152    
153 dpavlin 1205 sub rec_array {
154     my $d = $rec->{ $_[0] };
155     return @$d if ref($d) eq 'ARRAY';
156     die "field $_[0] not array: ",dump( $d );
157     }
158    
159 dpavlin 595 =head2 _set_config
160    
161     Set current config hash
162    
163     _set_config( $config );
164    
165     Magic keys are:
166    
167     =over 4
168    
169     =item _
170    
171     Code of current database
172    
173     =item _mfn
174    
175     Current MFN
176    
177     =back
178    
179     =cut
180    
181     my $config;
182    
183     sub _set_config {
184     $config = shift;
185     }
186    
187 dpavlin 538 =head2 _get_ds
188    
189     Return hash formatted as data structure
190    
191     my $ds = _get_ds();
192    
193     =cut
194    
195 dpavlin 1036 my $out;
196 dpavlin 538
197     sub _get_ds {
198 dpavlin 982 #warn "## out = ",dump($out);
199 dpavlin 538 return $out;
200     }
201    
202     =head2 _clean_ds
203    
204     Clean data structure hash for next record
205    
206     _clean_ds();
207    
208     =cut
209    
210     sub _clean_ds {
211 dpavlin 541 my $a = {@_};
212 dpavlin 1036 $out = undef;
213     WebPAC::Normalize::MARC::_clean();
214 dpavlin 538 }
215    
216     =head2 _set_lookup
217    
218     Set current lookup hash
219    
220     _set_lookup( $lookup );
221    
222     =cut
223    
224     my $lookup;
225    
226     sub _set_lookup {
227     $lookup = shift;
228     }
229    
230 dpavlin 707 =head2 _get_lookup
231    
232     Get current lookup hash
233    
234     my $lookup = _get_lookup();
235    
236     =cut
237    
238     sub _get_lookup {
239     return $lookup;
240     }
241    
242 dpavlin 736 =head2 _set_load_row
243 dpavlin 725
244     Setup code reference which will return L<data_structure> from
245     L<WebPAC::Store>
246    
247 dpavlin 736 _set_load_row(sub {
248 dpavlin 725 my ($database,$input,$mfn) = @_;
249 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
250 dpavlin 725 });
251    
252     =cut
253    
254 dpavlin 736 sub _set_load_row {
255 dpavlin 725 my $coderef = shift;
256     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
257    
258 dpavlin 736 $load_row_coderef = $coderef;
259 dpavlin 725 }
260    
261 dpavlin 554 =head2 _debug
262    
263     Change level of debug warnings
264    
265     _debug( 2 );
266    
267     =cut
268    
269     sub _debug {
270     my $l = shift;
271     return $debug unless defined($l);
272 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
273 dpavlin 554 $debug = $l;
274 dpavlin 1036 $WebPAC::Normalize::MARC::debug = $debug;
275 dpavlin 554 }
276    
277 dpavlin 540 =head1 Functions to create C<data_structure>
278    
279     Those functions generally have to first in your normalization file.
280    
281 dpavlin 1066 =head2 to
282    
283     Generic way to set values for some name
284    
285     to('field-name', 'name-value' => rec('200','a') );
286    
287     There are many helpers defined below which might be easier to use.
288    
289     =cut
290    
291     sub to {
292     my $type = shift or confess "need type -- BUG?";
293     my $name = shift or confess "needs name as first argument";
294     my @o = grep { defined($_) && $_ ne '' } @_;
295     return unless (@o);
296     $out->{$name}->{$type} = \@o;
297     }
298    
299 dpavlin 912 =head2 search_display
300 dpavlin 433
301 dpavlin 912 Define output for L<search> and L<display> at the same time
302 dpavlin 433
303 dpavlin 912 search_display('Title', rec('200','a') );
304 dpavlin 13
305     =cut
306    
307 dpavlin 912 sub search_display {
308     my $name = shift or die "search_display needs name as first argument";
309 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
310     return unless (@o);
311     $out->{$name}->{search} = \@o;
312     $out->{$name}->{display} = \@o;
313     }
314 dpavlin 13
315 dpavlin 915 =head2 tag
316    
317 dpavlin 1066 Old name for L<search_display>, it will probably be removed at one point.
318 dpavlin 915
319     =cut
320    
321     sub tag {
322     search_display( @_ );
323     }
324    
325 dpavlin 536 =head2 display
326 dpavlin 13
327 dpavlin 912 Define output just for I<display>
328 dpavlin 125
329 dpavlin 536 @v = display('Title', rec('200','a') );
330 dpavlin 125
331 dpavlin 536 =cut
332 dpavlin 125
333 dpavlin 1066 sub display { to( 'display', @_ ) }
334 dpavlin 13
335 dpavlin 536 =head2 search
336 dpavlin 13
337 dpavlin 536 Prepare values just for I<search>
338 dpavlin 13
339 dpavlin 536 @v = search('Title', rec('200','a') );
340 dpavlin 433
341 dpavlin 536 =cut
342 dpavlin 13
343 dpavlin 1066 sub search { to( 'search', @_ ) }
344 dpavlin 13
345 dpavlin 923 =head2 sorted
346    
347     Insert into lists which will be automatically sorted
348    
349     sorted('Title', rec('200','a') );
350    
351     =cut
352    
353 dpavlin 1066 sub sorted { to( 'sorted', @_ ) }
354 dpavlin 923
355 dpavlin 1205 =head2 row
356 dpavlin 923
357 dpavlin 1205 Insert new row of data into output module
358    
359     row( column => 'foo', column2 => 'bar' );
360    
361     =cut
362    
363     use Data::Dump qw/dump/;
364    
365     sub row {
366     die "array doesn't have even number of elements but $#_: ",dump( @_ ) if $#_ % 2 != 1;
367    
368     push @{ $out->{'_rows'} }, {@_};
369     }
370    
371    
372 dpavlin 540 =head1 Functions to extract data from input
373    
374     This function should be used inside functions to create C<data_structure> described
375     above.
376    
377 dpavlin 641 =head2 _pack_subfields_hash
378    
379 dpavlin 669 @subfields = _pack_subfields_hash( $h );
380     $subfields = _pack_subfields_hash( $h, 1 );
381 dpavlin 641
382 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
383     with subfields (denoted by C<^>) and values.
384    
385 dpavlin 641 =cut
386    
387     sub _pack_subfields_hash {
388    
389 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
390 dpavlin 641
391 dpavlin 1118 my ($hash,$include_subfields) = @_;
392 dpavlin 641
393 dpavlin 831 # sanity and ease of use
394 dpavlin 1118 return $hash if (ref($hash) ne 'HASH');
395 dpavlin 831
396 dpavlin 1118 my $h = dclone( $hash );
397    
398 dpavlin 641 if ( defined($h->{subfields}) ) {
399     my $sfs = delete $h->{subfields} || die "no subfields?";
400     my @out;
401     while (@$sfs) {
402     my $sf = shift @$sfs;
403     push @out, '^' . $sf if ($include_subfields);
404     my $o = shift @$sfs;
405     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
406     # single element subfields are not arrays
407 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
408 dpavlin 667
409 dpavlin 641 push @out, $h->{$sf};
410     } else {
411 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
412 dpavlin 641 push @out, $h->{$sf}->[$o];
413     }
414     }
415 dpavlin 667 if ($include_subfields) {
416     return join('', @out);
417     } else {
418     return @out;
419     }
420 dpavlin 641 } else {
421 dpavlin 667 if ($include_subfields) {
422     my $out = '';
423 dpavlin 668 foreach my $sf (sort keys %$h) {
424 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
425     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
426     } else {
427     $out .= '^' . $sf . $h->{$sf};
428     }
429     }
430     return $out;
431     } else {
432     # FIXME this should probably be in alphabetical order instead of hash order
433     values %{$h};
434     }
435 dpavlin 641 }
436     }
437    
438 dpavlin 536 =head2 rec1
439 dpavlin 371
440 dpavlin 536 Return all values in some field
441 dpavlin 371
442 dpavlin 536 @v = rec1('200')
443 dpavlin 15
444 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
445 dpavlin 15
446 dpavlin 536 =cut
447 dpavlin 15
448 dpavlin 536 sub rec1 {
449     my $f = shift;
450 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
451 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
452 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
453 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
454 dpavlin 641 my @out;
455     foreach my $h ( @{ $rec->{$f} } ) {
456     if (ref($h) eq 'HASH') {
457     push @out, ( _pack_subfields_hash( $h ) );
458 dpavlin 31 } else {
459 dpavlin 641 push @out, $h;
460 dpavlin 31 }
461 dpavlin 641 }
462     return @out;
463 dpavlin 536 } elsif( defined($rec->{$f}) ) {
464     return $rec->{$f};
465 dpavlin 15 }
466     }
467    
468 dpavlin 536 =head2 rec2
469 dpavlin 15
470 dpavlin 536 Return all values in specific field and subfield
471 dpavlin 13
472 dpavlin 536 @v = rec2('200','a')
473 dpavlin 13
474     =cut
475    
476 dpavlin 536 sub rec2 {
477     my $f = shift;
478     return unless (defined($rec && $rec->{$f}));
479     my $sf = shift;
480 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
481 dpavlin 589 return map {
482     if (ref($_->{$sf}) eq 'ARRAY') {
483     @{ $_->{$sf} };
484     } else {
485     $_->{$sf};
486     }
487 dpavlin 1048 } grep { ref($_) eq 'HASH' && defined $_->{$sf} } @{ $rec->{$f} };
488 dpavlin 536 }
489 dpavlin 13
490 dpavlin 536 =head2 rec
491 dpavlin 13
492 dpavlin 536 syntaxtic sugar for
493 dpavlin 13
494 dpavlin 536 @v = rec('200')
495     @v = rec('200','a')
496 dpavlin 13
497 dpavlin 750 If rec() returns just single value, it will
498     return scalar, not array.
499    
500 dpavlin 536 =cut
501 dpavlin 373
502 dpavlin 536 sub rec {
503 dpavlin 583 my @out;
504 dpavlin 536 if ($#_ == 0) {
505 dpavlin 583 @out = rec1(@_);
506 dpavlin 536 } elsif ($#_ == 1) {
507 dpavlin 583 @out = rec2(@_);
508 dpavlin 13 }
509 dpavlin 750 if ($#out == 0 && ! wantarray) {
510     return $out[0];
511     } elsif (@out) {
512 dpavlin 583 return @out;
513     } else {
514     return '';
515     }
516 dpavlin 13 }
517    
518 dpavlin 1012 =head2 frec
519    
520     Returns first value from field
521    
522     $v = frec('200');
523     $v = frec('200','a');
524    
525     =cut
526    
527     sub frec {
528     my @out = rec(@_);
529     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
530     return shift @out;
531     }
532    
533 dpavlin 1015 =head2 frec_eq
534    
535     =head2 frec_ne
536    
537     Check if first values from two fields are same or different
538    
539     if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
540     # values are same
541     } else {
542     # values are different
543     }
544    
545     Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
546     could write something like:
547    
548     if ( frec( '900','a' ) eq frec( '910','c' ) ) {
549     # yada tada
550     }
551    
552     but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
553     in order to parse text and create invalid function C<eqfrec>.
554    
555     =cut
556    
557     sub frec_eq {
558     my ( $f1,$sf1, $f2, $sf2 ) = @_;
559     return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
560     }
561    
562     sub frec_ne {
563     return ! frec_eq( @_ );
564     }
565    
566 dpavlin 536 =head2 regex
567 dpavlin 15
568 dpavlin 536 Apply regex to some or all values
569 dpavlin 15
570 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
571 dpavlin 15
572     =cut
573    
574 dpavlin 536 sub regex {
575     my $r = shift;
576     my @out;
577 dpavlin 550 #warn "r: $r\n", dump(\@_);
578 dpavlin 536 foreach my $t (@_) {
579     next unless ($t);
580     eval "\$t =~ $r";
581     push @out, $t if ($t && $t ne '');
582 dpavlin 368 }
583 dpavlin 536 return @out;
584 dpavlin 15 }
585    
586 dpavlin 536 =head2 prefix
587 dpavlin 15
588 dpavlin 536 Prefix all values with a string
589 dpavlin 15
590 dpavlin 536 @v = prefix( 'my_', @v );
591 dpavlin 15
592     =cut
593    
594 dpavlin 536 sub prefix {
595 dpavlin 819 my $p = shift;
596     return @_ unless defined( $p );
597 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
598     }
599 dpavlin 15
600 dpavlin 536 =head2 suffix
601 dpavlin 15
602 dpavlin 536 suffix all values with a string
603 dpavlin 15
604 dpavlin 536 @v = suffix( '_my', @v );
605 dpavlin 15
606 dpavlin 536 =cut
607 dpavlin 15
608 dpavlin 536 sub suffix {
609 dpavlin 819 my $s = shift;
610     return @_ unless defined( $s );
611 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
612 dpavlin 15 }
613    
614 dpavlin 536 =head2 surround
615 dpavlin 13
616 dpavlin 536 surround all values with a two strings
617 dpavlin 13
618 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
619 dpavlin 13
620     =cut
621    
622 dpavlin 536 sub surround {
623 dpavlin 819 my $p = shift;
624     my $s = shift;
625     $p = '' unless defined( $p );
626     $s = '' unless defined( $s );
627 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
628 dpavlin 13 }
629    
630 dpavlin 536 =head2 first
631 dpavlin 13
632 dpavlin 536 Return first element
633 dpavlin 15
634 dpavlin 536 $v = first( @v );
635 dpavlin 13
636     =cut
637    
638 dpavlin 536 sub first {
639     my $r = shift;
640     return $r;
641 dpavlin 13 }
642    
643 dpavlin 536 =head2 lookup
644 dpavlin 13
645 dpavlin 536 Consult lookup hashes for some value
646 dpavlin 13
647 dpavlin 725 @v = lookup(
648     sub {
649     'ffkk/peri/mfn'.rec('000')
650     },
651     'ffkk','peri','200-a-200-e',
652     sub {
653     first(rec(200,'a')).' '.first(rec('200','e'))
654     }
655     );
656 dpavlin 13
657 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
658     normal lookup definition in C<conf/lookup/something.pl> which looks like:
659 dpavlin 707
660 dpavlin 725 lookup(
661     # which results to return from record recorded in lookup
662     sub { 'ffkk/peri/mfn' . rec('000') },
663     # from which database and input
664     'ffkk','peri',
665     # such that following values match
666     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
667     # if this part is missing, we will try to match same fields
668     # from lookup record and current one, or you can override
669     # which records to use from current record using
670     sub { rec('900','x') . ' ' . rec('900','y') },
671     )
672    
673     You can think about this lookup as SQL (if that helps):
674    
675     select
676     sub { what }
677     from
678     database, input
679     where
680     sub { filter from lookuped record }
681     having
682     sub { optional filter on current record }
683    
684     Easy as pie, right?
685    
686 dpavlin 13 =cut
687    
688 dpavlin 536 sub lookup {
689 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
690    
691 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
692 dpavlin 725
693 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
694 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
695    
696 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
697 dpavlin 725
698     my $mfns;
699     my @having = $having->();
700    
701 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
702 dpavlin 725
703     foreach my $h ( @having ) {
704     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
705 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
706 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
707     }
708 dpavlin 536 }
709 dpavlin 725
710     return unless ($mfns);
711    
712     my @mfns = sort keys %$mfns;
713    
714 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
715 dpavlin 725
716     my $old_rec = $rec;
717     my @out;
718    
719     foreach my $mfn (@mfns) {
720 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
721 dpavlin 725
722 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
723 dpavlin 725
724     my @vals = $what->();
725    
726     push @out, ( @vals );
727    
728 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
729 dpavlin 725 }
730    
731     # if (ref($lookup->{$k}) eq 'ARRAY') {
732     # return @{ $lookup->{$k} };
733     # } else {
734     # return $lookup->{$k};
735     # }
736    
737     $rec = $old_rec;
738    
739 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
740 dpavlin 725
741 dpavlin 740 if ($#out == 0) {
742     return $out[0];
743     } else {
744     return @out;
745     }
746 dpavlin 13 }
747    
748 dpavlin 707 =head2 save_into_lookup
749    
750 dpavlin 725 Save value into lookup. It associates current database, input
751     and specific keys with one or more values which will be
752     associated over MFN.
753 dpavlin 707
754 dpavlin 725 MFN will be extracted from first occurence current of field 000
755     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
756    
757     my $nr = save_into_lookup($database,$input,$key,sub {
758 dpavlin 707 # code which produce one or more values
759     });
760    
761 dpavlin 725 It returns number of items saved.
762 dpavlin 707
763 dpavlin 725 This function shouldn't be called directly, it's called from code created by
764     L<WebPAC::Parser>.
765    
766 dpavlin 707 =cut
767    
768     sub save_into_lookup {
769 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
770     die "save_into_lookup needs database" unless defined($database);
771     die "save_into_lookup needs input" unless defined($input);
772     die "save_into_lookup needs key" unless defined($key);
773 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
774 dpavlin 725
775 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
776 dpavlin 725
777     my $mfn =
778     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
779     defined($config->{_mfn}) ? $config->{_mfn} :
780     die "mfn not defined or zero";
781    
782     my $nr = 0;
783    
784 dpavlin 707 foreach my $v ( $coderef->() ) {
785 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
786 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
787 dpavlin 725 $nr++;
788 dpavlin 707 }
789 dpavlin 725
790     return $nr;
791 dpavlin 707 }
792    
793 dpavlin 595 =head2 config
794    
795     Consult config values stored in C<config.yml>
796    
797     # return database code (key under databases in yaml)
798     $database_code = config(); # use _ from hash
799     $database_name = config('name');
800     $database_input_name = config('input name');
801    
802     Up to three levels are supported.
803    
804     =cut
805    
806     sub config {
807     return unless ($config);
808    
809     my $p = shift;
810    
811     $p ||= '';
812    
813     my $v;
814    
815     warn "### getting config($p)\n" if ($debug > 1);
816    
817     my @p = split(/\s+/,$p);
818     if ($#p < 0) {
819     $v = $config->{ '_' }; # special, database code
820     } else {
821    
822     my $c = dclone( $config );
823    
824     foreach my $k (@p) {
825     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
826     if (ref($c) eq 'ARRAY') {
827     $c = shift @$c;
828     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
829     last;
830     }
831    
832     if (! defined($c->{$k}) ) {
833     $c = undef;
834     last;
835     } else {
836     $c = $c->{$k};
837     }
838     }
839     $v = $c if ($c);
840    
841     }
842    
843     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
844     warn "config( '$p' ) is empty\n" if (! $v);
845    
846     return $v;
847     }
848    
849     =head2 id
850    
851     Returns unique id of this record
852    
853     $id = id();
854    
855     Returns C<42/2> for 2nd occurence of MFN 42.
856    
857     =cut
858    
859     sub id {
860     my $mfn = $config->{_mfn} || die "no _mfn in config data";
861 dpavlin 1036 return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
862 dpavlin 595 }
863    
864 dpavlin 536 =head2 join_with
865 dpavlin 13
866 dpavlin 536 Joins walues with some delimiter
867 dpavlin 10
868 dpavlin 536 $v = join_with(", ", @v);
869 dpavlin 10
870 dpavlin 536 =cut
871 dpavlin 10
872 dpavlin 536 sub join_with {
873     my $d = shift;
874 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
875 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
876     return '' unless defined($v);
877     return $v;
878 dpavlin 536 }
879 dpavlin 10
880 dpavlin 562 =head2 split_rec_on
881    
882     Split record subfield on some regex and take one of parts out
883    
884     $a_before_semi_column =
885     split_rec_on('200','a', /\s*;\s*/, $part);
886    
887     C<$part> is optional number of element. First element is
888     B<1>, not 0!
889    
890     If there is no C<$part> parameter or C<$part> is 0, this function will
891     return all values produced by splitting.
892    
893     =cut
894    
895     sub split_rec_on {
896     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
897    
898     my ($fld, $sf, $regex, $part) = @_;
899 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
900 dpavlin 562
901     my @r = rec( $fld, $sf );
902     my $v = shift @r;
903 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
904 dpavlin 562
905 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
906 dpavlin 566
907 dpavlin 562 my @s = split( $regex, $v );
908 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
909 dpavlin 566 if ($part && $part > 0) {
910 dpavlin 562 return $s[ $part - 1 ];
911     } else {
912 dpavlin 571 return @s;
913 dpavlin 562 }
914     }
915    
916 dpavlin 785 my $hash;
917    
918     =head2 set
919    
920     set( key => 'value' );
921    
922     =cut
923    
924     sub set {
925     my ($k,$v) = @_;
926 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
927 dpavlin 785 $hash->{$k} = $v;
928     };
929    
930     =head2 get
931    
932     get( 'key' );
933    
934     =cut
935    
936     sub get {
937     my $k = shift || return;
938     my $v = $hash->{$k};
939 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
940 dpavlin 785 return $v;
941     }
942    
943 dpavlin 791 =head2 count
944 dpavlin 785
945 dpavlin 791 if ( count( @result ) == 1 ) {
946     # do something if only 1 result is there
947     }
948    
949     =cut
950    
951     sub count {
952 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
953 dpavlin 791 return @_ . '';
954     }
955    
956 dpavlin 536 # END
957     1;

  ViewVC Help
Powered by ViewVC 1.1.26