/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1037 - (hide annotations)
Mon Nov 12 11:17:19 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 17593 byte(s)
 r1612@llin:  dpavlin | 2007-11-12 12:17:17 +0100
 fixed debug levels: now tests run with -d will display
 own diag messages, while -d -d will be 1st level of
 debug for WebPAC, and so on...

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 923 search_display search display sorted
11 dpavlin 912
12 dpavlin 536 rec1 rec2 rec
13 dpavlin 1015 frec frec_eq frec_ne
14 dpavlin 536 regex prefix suffix surround
15     first lookup join_with
16 dpavlin 707 save_into_lookup
17 dpavlin 562
18     split_rec_on
19 dpavlin 785
20     get set
21 dpavlin 791 count
22 dpavlin 980
23 dpavlin 536 /;
24 dpavlin 10
25     use warnings;
26     use strict;
27 dpavlin 536
28     #use base qw/WebPAC::Common/;
29 dpavlin 550 use Data::Dump qw/dump/;
30 dpavlin 725 use Carp qw/confess/;
31 dpavlin 10
32 dpavlin 550 # debugging warn(s)
33     my $debug = 0;
34 dpavlin 1037 _debug( $debug );
35 dpavlin 550
36 dpavlin 1021 # FIXME
37 dpavlin 980 use WebPAC::Normalize::ISBN;
38     push @EXPORT, ( 'isbn_10', 'isbn_13' );
39 dpavlin 550
40 dpavlin 1021 use WebPAC::Normalize::MARC;
41 dpavlin 1036 push @EXPORT, ( qw/
42     marc marc_indicators marc_repeatable_subfield
43     marc_compose marc_leader marc_fixed
44     marc_duplicate marc_remove marc_count
45     marc_original_order
46     marc_template
47     /);
48 dpavlin 1021
49 dpavlin 10 =head1 NAME
50    
51 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
52 dpavlin 10
53     =cut
54    
55 dpavlin 1021 our $VERSION = '0.35';
56 dpavlin 10
57     =head1 SYNOPSIS
58    
59 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
60     from input records using perl functions which are specialized for set
61     processing.
62 dpavlin 10
63 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
64     means that you check it's validity before running WebPAC using
65     C<perl -c normalize.pl>.
66 dpavlin 15
67 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
68 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
69 dpavlin 547 C<marc>.
70 dpavlin 15
71 dpavlin 10 =head1 FUNCTIONS
72    
73 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
74     All other functions are available for use within normalisation rules.
75    
76 dpavlin 536 =head2 data_structure
77 dpavlin 10
78 dpavlin 536 Return data structure
79 dpavlin 13
80 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
81 dpavlin 725 lookup => $lookup_hash,
82 dpavlin 536 row => $row,
83     rules => $normalize_pl_config,
84 dpavlin 541 marc_encoding => 'utf-8',
85 dpavlin 595 config => $config,
86 dpavlin 736 load_row_coderef => sub {
87 dpavlin 979 my ($database,$input,$mfn) = @_;
88 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
89 dpavlin 725 },
90 dpavlin 13 );
91    
92 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
93 dpavlin 540 other are optional.
94    
95 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
96 dpavlin 725 die if it's not defined.
97    
98 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
99 dpavlin 15
100 dpavlin 538 Since this function isn't exported you have to call it with
101     C<WebPAC::Normalize::data_structure>.
102    
103 dpavlin 10 =cut
104    
105 dpavlin 736 my $load_row_coderef;
106 dpavlin 725
107 dpavlin 536 sub data_structure {
108     my $arg = {@_};
109 dpavlin 13
110 dpavlin 536 die "need row argument" unless ($arg->{row});
111     die "need normalisation argument" unless ($arg->{rules});
112 dpavlin 31
113 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
114 dpavlin 983 _set_ds( $arg->{row} );
115 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
116 dpavlin 541 _clean_ds( %{ $arg } );
117 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
118 dpavlin 725
119 dpavlin 1011 no strict 'subs';
120     no warnings 'redefine';
121     eval "$arg->{rules};";
122 dpavlin 536 die "error evaling $arg->{rules}: $@\n" if ($@);
123 dpavlin 540
124 dpavlin 538 return _get_ds();
125 dpavlin 10 }
126    
127 dpavlin 983 =head2 _set_ds
128 dpavlin 13
129 dpavlin 536 Set current record hash
130 dpavlin 433
131 dpavlin 983 _set_ds( $rec );
132 dpavlin 433
133     =cut
134    
135 dpavlin 536 my $rec;
136 dpavlin 433
137 dpavlin 983 sub _set_ds {
138 dpavlin 536 $rec = shift or die "no record hash";
139 dpavlin 1036 $WebPAC::Normalize::MARC::rec = $rec;
140 dpavlin 433 }
141    
142 dpavlin 1021 =head2
143    
144     my $rec = _get_rec();
145    
146     =cut
147    
148     sub _get_rec { $rec };
149    
150 dpavlin 595 =head2 _set_config
151    
152     Set current config hash
153    
154     _set_config( $config );
155    
156     Magic keys are:
157    
158     =over 4
159    
160     =item _
161    
162     Code of current database
163    
164     =item _mfn
165    
166     Current MFN
167    
168     =back
169    
170     =cut
171    
172     my $config;
173    
174     sub _set_config {
175     $config = shift;
176     }
177    
178 dpavlin 538 =head2 _get_ds
179    
180     Return hash formatted as data structure
181    
182     my $ds = _get_ds();
183    
184     =cut
185    
186 dpavlin 1036 my $out;
187 dpavlin 538
188     sub _get_ds {
189 dpavlin 982 #warn "## out = ",dump($out);
190 dpavlin 538 return $out;
191     }
192    
193     =head2 _clean_ds
194    
195     Clean data structure hash for next record
196    
197     _clean_ds();
198    
199     =cut
200    
201     sub _clean_ds {
202 dpavlin 541 my $a = {@_};
203 dpavlin 1036 $out = undef;
204     WebPAC::Normalize::MARC::_clean();
205 dpavlin 538 }
206    
207     =head2 _set_lookup
208    
209     Set current lookup hash
210    
211     _set_lookup( $lookup );
212    
213     =cut
214    
215     my $lookup;
216    
217     sub _set_lookup {
218     $lookup = shift;
219     }
220    
221 dpavlin 707 =head2 _get_lookup
222    
223     Get current lookup hash
224    
225     my $lookup = _get_lookup();
226    
227     =cut
228    
229     sub _get_lookup {
230     return $lookup;
231     }
232    
233 dpavlin 736 =head2 _set_load_row
234 dpavlin 725
235     Setup code reference which will return L<data_structure> from
236     L<WebPAC::Store>
237    
238 dpavlin 736 _set_load_row(sub {
239 dpavlin 725 my ($database,$input,$mfn) = @_;
240 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
241 dpavlin 725 });
242    
243     =cut
244    
245 dpavlin 736 sub _set_load_row {
246 dpavlin 725 my $coderef = shift;
247     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
248    
249 dpavlin 736 $load_row_coderef = $coderef;
250 dpavlin 725 }
251    
252 dpavlin 554 =head2 _debug
253    
254     Change level of debug warnings
255    
256     _debug( 2 );
257    
258     =cut
259    
260     sub _debug {
261     my $l = shift;
262     return $debug unless defined($l);
263 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
264 dpavlin 554 $debug = $l;
265 dpavlin 1036 $WebPAC::Normalize::MARC::debug = $debug;
266 dpavlin 1037 warn "#### MARC::debug = ",dump($WebPAC::Normalize::MARC::debug);
267 dpavlin 554 }
268    
269 dpavlin 540 =head1 Functions to create C<data_structure>
270    
271     Those functions generally have to first in your normalization file.
272    
273 dpavlin 912 =head2 search_display
274 dpavlin 433
275 dpavlin 912 Define output for L<search> and L<display> at the same time
276 dpavlin 433
277 dpavlin 912 search_display('Title', rec('200','a') );
278 dpavlin 13
279    
280     =cut
281    
282 dpavlin 912 sub search_display {
283     my $name = shift or die "search_display needs name as first argument";
284 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
285     return unless (@o);
286     $out->{$name}->{search} = \@o;
287     $out->{$name}->{display} = \@o;
288     }
289 dpavlin 13
290 dpavlin 915 =head2 tag
291    
292     Old name for L<search_display>, but supported
293    
294     =cut
295    
296     sub tag {
297     search_display( @_ );
298     }
299    
300 dpavlin 536 =head2 display
301 dpavlin 13
302 dpavlin 912 Define output just for I<display>
303 dpavlin 125
304 dpavlin 536 @v = display('Title', rec('200','a') );
305 dpavlin 125
306 dpavlin 536 =cut
307 dpavlin 125
308 dpavlin 923 sub _field {
309     my $type = shift or confess "need type -- BUG?";
310     my $name = shift or confess "needs name as first argument";
311 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
312     return unless (@o);
313 dpavlin 923 $out->{$name}->{$type} = \@o;
314 dpavlin 536 }
315 dpavlin 13
316 dpavlin 923 sub display { _field( 'display', @_ ) }
317    
318 dpavlin 536 =head2 search
319 dpavlin 13
320 dpavlin 536 Prepare values just for I<search>
321 dpavlin 13
322 dpavlin 536 @v = search('Title', rec('200','a') );
323 dpavlin 433
324 dpavlin 536 =cut
325 dpavlin 13
326 dpavlin 923 sub search { _field( 'search', @_ ) }
327 dpavlin 13
328 dpavlin 923 =head2 sorted
329    
330     Insert into lists which will be automatically sorted
331    
332     sorted('Title', rec('200','a') );
333    
334     =cut
335    
336     sub sorted { _field( 'sorted', @_ ) }
337    
338    
339 dpavlin 564
340 dpavlin 540 =head1 Functions to extract data from input
341    
342     This function should be used inside functions to create C<data_structure> described
343     above.
344    
345 dpavlin 641 =head2 _pack_subfields_hash
346    
347 dpavlin 669 @subfields = _pack_subfields_hash( $h );
348     $subfields = _pack_subfields_hash( $h, 1 );
349 dpavlin 641
350 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
351     with subfields (denoted by C<^>) and values.
352    
353 dpavlin 641 =cut
354    
355     sub _pack_subfields_hash {
356    
357 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
358 dpavlin 641
359     my ($h,$include_subfields) = @_;
360    
361 dpavlin 831 # sanity and ease of use
362     return $h if (ref($h) ne 'HASH');
363    
364 dpavlin 641 if ( defined($h->{subfields}) ) {
365     my $sfs = delete $h->{subfields} || die "no subfields?";
366     my @out;
367     while (@$sfs) {
368     my $sf = shift @$sfs;
369     push @out, '^' . $sf if ($include_subfields);
370     my $o = shift @$sfs;
371     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
372     # single element subfields are not arrays
373 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
374 dpavlin 667
375 dpavlin 641 push @out, $h->{$sf};
376     } else {
377 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
378 dpavlin 641 push @out, $h->{$sf}->[$o];
379     }
380     }
381 dpavlin 667 if ($include_subfields) {
382     return join('', @out);
383     } else {
384     return @out;
385     }
386 dpavlin 641 } else {
387 dpavlin 667 if ($include_subfields) {
388     my $out = '';
389 dpavlin 668 foreach my $sf (sort keys %$h) {
390 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
391     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
392     } else {
393     $out .= '^' . $sf . $h->{$sf};
394     }
395     }
396     return $out;
397     } else {
398     # FIXME this should probably be in alphabetical order instead of hash order
399     values %{$h};
400     }
401 dpavlin 641 }
402     }
403    
404 dpavlin 536 =head2 rec1
405 dpavlin 371
406 dpavlin 536 Return all values in some field
407 dpavlin 371
408 dpavlin 536 @v = rec1('200')
409 dpavlin 15
410 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
411 dpavlin 15
412 dpavlin 536 =cut
413 dpavlin 15
414 dpavlin 536 sub rec1 {
415     my $f = shift;
416 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
417 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
418 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
419 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
420 dpavlin 641 my @out;
421     foreach my $h ( @{ $rec->{$f} } ) {
422     if (ref($h) eq 'HASH') {
423     push @out, ( _pack_subfields_hash( $h ) );
424 dpavlin 31 } else {
425 dpavlin 641 push @out, $h;
426 dpavlin 31 }
427 dpavlin 641 }
428     return @out;
429 dpavlin 536 } elsif( defined($rec->{$f}) ) {
430     return $rec->{$f};
431 dpavlin 15 }
432     }
433    
434 dpavlin 536 =head2 rec2
435 dpavlin 15
436 dpavlin 536 Return all values in specific field and subfield
437 dpavlin 13
438 dpavlin 536 @v = rec2('200','a')
439 dpavlin 13
440     =cut
441    
442 dpavlin 536 sub rec2 {
443     my $f = shift;
444     return unless (defined($rec && $rec->{$f}));
445     my $sf = shift;
446 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
447 dpavlin 589 return map {
448     if (ref($_->{$sf}) eq 'ARRAY') {
449     @{ $_->{$sf} };
450     } else {
451     $_->{$sf};
452     }
453     } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
454 dpavlin 536 }
455 dpavlin 13
456 dpavlin 536 =head2 rec
457 dpavlin 13
458 dpavlin 536 syntaxtic sugar for
459 dpavlin 13
460 dpavlin 536 @v = rec('200')
461     @v = rec('200','a')
462 dpavlin 13
463 dpavlin 750 If rec() returns just single value, it will
464     return scalar, not array.
465    
466 dpavlin 536 =cut
467 dpavlin 373
468 dpavlin 536 sub rec {
469 dpavlin 583 my @out;
470 dpavlin 536 if ($#_ == 0) {
471 dpavlin 583 @out = rec1(@_);
472 dpavlin 536 } elsif ($#_ == 1) {
473 dpavlin 583 @out = rec2(@_);
474 dpavlin 13 }
475 dpavlin 750 if ($#out == 0 && ! wantarray) {
476     return $out[0];
477     } elsif (@out) {
478 dpavlin 583 return @out;
479     } else {
480     return '';
481     }
482 dpavlin 13 }
483    
484 dpavlin 1012 =head2 frec
485    
486     Returns first value from field
487    
488     $v = frec('200');
489     $v = frec('200','a');
490    
491     =cut
492    
493     sub frec {
494     my @out = rec(@_);
495     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
496     return shift @out;
497     }
498    
499 dpavlin 1015 =head2 frec_eq
500    
501     =head2 frec_ne
502    
503     Check if first values from two fields are same or different
504    
505     if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
506     # values are same
507     } else {
508     # values are different
509     }
510    
511     Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
512     could write something like:
513    
514     if ( frec( '900','a' ) eq frec( '910','c' ) ) {
515     # yada tada
516     }
517    
518     but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
519     in order to parse text and create invalid function C<eqfrec>.
520    
521     =cut
522    
523     sub frec_eq {
524     my ( $f1,$sf1, $f2, $sf2 ) = @_;
525     return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
526     }
527    
528     sub frec_ne {
529     return ! frec_eq( @_ );
530     }
531    
532 dpavlin 536 =head2 regex
533 dpavlin 15
534 dpavlin 536 Apply regex to some or all values
535 dpavlin 15
536 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
537 dpavlin 15
538     =cut
539    
540 dpavlin 536 sub regex {
541     my $r = shift;
542     my @out;
543 dpavlin 550 #warn "r: $r\n", dump(\@_);
544 dpavlin 536 foreach my $t (@_) {
545     next unless ($t);
546     eval "\$t =~ $r";
547     push @out, $t if ($t && $t ne '');
548 dpavlin 368 }
549 dpavlin 536 return @out;
550 dpavlin 15 }
551    
552 dpavlin 536 =head2 prefix
553 dpavlin 15
554 dpavlin 536 Prefix all values with a string
555 dpavlin 15
556 dpavlin 536 @v = prefix( 'my_', @v );
557 dpavlin 15
558     =cut
559    
560 dpavlin 536 sub prefix {
561 dpavlin 819 my $p = shift;
562     return @_ unless defined( $p );
563 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
564     }
565 dpavlin 15
566 dpavlin 536 =head2 suffix
567 dpavlin 15
568 dpavlin 536 suffix all values with a string
569 dpavlin 15
570 dpavlin 536 @v = suffix( '_my', @v );
571 dpavlin 15
572 dpavlin 536 =cut
573 dpavlin 15
574 dpavlin 536 sub suffix {
575 dpavlin 819 my $s = shift;
576     return @_ unless defined( $s );
577 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
578 dpavlin 15 }
579    
580 dpavlin 536 =head2 surround
581 dpavlin 13
582 dpavlin 536 surround all values with a two strings
583 dpavlin 13
584 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
585 dpavlin 13
586     =cut
587    
588 dpavlin 536 sub surround {
589 dpavlin 819 my $p = shift;
590     my $s = shift;
591     $p = '' unless defined( $p );
592     $s = '' unless defined( $s );
593 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
594 dpavlin 13 }
595    
596 dpavlin 536 =head2 first
597 dpavlin 13
598 dpavlin 536 Return first element
599 dpavlin 15
600 dpavlin 536 $v = first( @v );
601 dpavlin 13
602     =cut
603    
604 dpavlin 536 sub first {
605     my $r = shift;
606     return $r;
607 dpavlin 13 }
608    
609 dpavlin 536 =head2 lookup
610 dpavlin 13
611 dpavlin 536 Consult lookup hashes for some value
612 dpavlin 13
613 dpavlin 725 @v = lookup(
614     sub {
615     'ffkk/peri/mfn'.rec('000')
616     },
617     'ffkk','peri','200-a-200-e',
618     sub {
619     first(rec(200,'a')).' '.first(rec('200','e'))
620     }
621     );
622 dpavlin 13
623 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
624     normal lookup definition in C<conf/lookup/something.pl> which looks like:
625 dpavlin 707
626 dpavlin 725 lookup(
627     # which results to return from record recorded in lookup
628     sub { 'ffkk/peri/mfn' . rec('000') },
629     # from which database and input
630     'ffkk','peri',
631     # such that following values match
632     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
633     # if this part is missing, we will try to match same fields
634     # from lookup record and current one, or you can override
635     # which records to use from current record using
636     sub { rec('900','x') . ' ' . rec('900','y') },
637     )
638    
639     You can think about this lookup as SQL (if that helps):
640    
641     select
642     sub { what }
643     from
644     database, input
645     where
646     sub { filter from lookuped record }
647     having
648     sub { optional filter on current record }
649    
650     Easy as pie, right?
651    
652 dpavlin 13 =cut
653    
654 dpavlin 536 sub lookup {
655 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
656    
657 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
658 dpavlin 725
659 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
660 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
661    
662 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
663 dpavlin 725
664     my $mfns;
665     my @having = $having->();
666    
667 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
668 dpavlin 725
669     foreach my $h ( @having ) {
670     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
671 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
672 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
673     }
674 dpavlin 536 }
675 dpavlin 725
676     return unless ($mfns);
677    
678     my @mfns = sort keys %$mfns;
679    
680 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
681 dpavlin 725
682     my $old_rec = $rec;
683     my @out;
684    
685     foreach my $mfn (@mfns) {
686 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
687 dpavlin 725
688 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
689 dpavlin 725
690     my @vals = $what->();
691    
692     push @out, ( @vals );
693    
694 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
695 dpavlin 725 }
696    
697     # if (ref($lookup->{$k}) eq 'ARRAY') {
698     # return @{ $lookup->{$k} };
699     # } else {
700     # return $lookup->{$k};
701     # }
702    
703     $rec = $old_rec;
704    
705 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
706 dpavlin 725
707 dpavlin 740 if ($#out == 0) {
708     return $out[0];
709     } else {
710     return @out;
711     }
712 dpavlin 13 }
713    
714 dpavlin 707 =head2 save_into_lookup
715    
716 dpavlin 725 Save value into lookup. It associates current database, input
717     and specific keys with one or more values which will be
718     associated over MFN.
719 dpavlin 707
720 dpavlin 725 MFN will be extracted from first occurence current of field 000
721     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
722    
723     my $nr = save_into_lookup($database,$input,$key,sub {
724 dpavlin 707 # code which produce one or more values
725     });
726    
727 dpavlin 725 It returns number of items saved.
728 dpavlin 707
729 dpavlin 725 This function shouldn't be called directly, it's called from code created by
730     L<WebPAC::Parser>.
731    
732 dpavlin 707 =cut
733    
734     sub save_into_lookup {
735 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
736     die "save_into_lookup needs database" unless defined($database);
737     die "save_into_lookup needs input" unless defined($input);
738     die "save_into_lookup needs key" unless defined($key);
739 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
740 dpavlin 725
741 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
742 dpavlin 725
743     my $mfn =
744     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
745     defined($config->{_mfn}) ? $config->{_mfn} :
746     die "mfn not defined or zero";
747    
748     my $nr = 0;
749    
750 dpavlin 707 foreach my $v ( $coderef->() ) {
751 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
752 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
753 dpavlin 725 $nr++;
754 dpavlin 707 }
755 dpavlin 725
756     return $nr;
757 dpavlin 707 }
758    
759 dpavlin 595 =head2 config
760    
761     Consult config values stored in C<config.yml>
762    
763     # return database code (key under databases in yaml)
764     $database_code = config(); # use _ from hash
765     $database_name = config('name');
766     $database_input_name = config('input name');
767    
768     Up to three levels are supported.
769    
770     =cut
771    
772     sub config {
773     return unless ($config);
774    
775     my $p = shift;
776    
777     $p ||= '';
778    
779     my $v;
780    
781     warn "### getting config($p)\n" if ($debug > 1);
782    
783     my @p = split(/\s+/,$p);
784     if ($#p < 0) {
785     $v = $config->{ '_' }; # special, database code
786     } else {
787    
788     my $c = dclone( $config );
789    
790     foreach my $k (@p) {
791     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
792     if (ref($c) eq 'ARRAY') {
793     $c = shift @$c;
794     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
795     last;
796     }
797    
798     if (! defined($c->{$k}) ) {
799     $c = undef;
800     last;
801     } else {
802     $c = $c->{$k};
803     }
804     }
805     $v = $c if ($c);
806    
807     }
808    
809     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
810     warn "config( '$p' ) is empty\n" if (! $v);
811    
812     return $v;
813     }
814    
815     =head2 id
816    
817     Returns unique id of this record
818    
819     $id = id();
820    
821     Returns C<42/2> for 2nd occurence of MFN 42.
822    
823     =cut
824    
825     sub id {
826     my $mfn = $config->{_mfn} || die "no _mfn in config data";
827 dpavlin 1036 return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
828 dpavlin 595 }
829    
830 dpavlin 536 =head2 join_with
831 dpavlin 13
832 dpavlin 536 Joins walues with some delimiter
833 dpavlin 10
834 dpavlin 536 $v = join_with(", ", @v);
835 dpavlin 10
836 dpavlin 536 =cut
837 dpavlin 10
838 dpavlin 536 sub join_with {
839     my $d = shift;
840 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
841 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
842     return '' unless defined($v);
843     return $v;
844 dpavlin 536 }
845 dpavlin 10
846 dpavlin 562 =head2 split_rec_on
847    
848     Split record subfield on some regex and take one of parts out
849    
850     $a_before_semi_column =
851     split_rec_on('200','a', /\s*;\s*/, $part);
852    
853     C<$part> is optional number of element. First element is
854     B<1>, not 0!
855    
856     If there is no C<$part> parameter or C<$part> is 0, this function will
857     return all values produced by splitting.
858    
859     =cut
860    
861     sub split_rec_on {
862     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
863    
864     my ($fld, $sf, $regex, $part) = @_;
865 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
866 dpavlin 562
867     my @r = rec( $fld, $sf );
868     my $v = shift @r;
869 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
870 dpavlin 562
871 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
872 dpavlin 566
873 dpavlin 562 my @s = split( $regex, $v );
874 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
875 dpavlin 566 if ($part && $part > 0) {
876 dpavlin 562 return $s[ $part - 1 ];
877     } else {
878 dpavlin 571 return @s;
879 dpavlin 562 }
880     }
881    
882 dpavlin 785 my $hash;
883    
884     =head2 set
885    
886     set( key => 'value' );
887    
888     =cut
889    
890     sub set {
891     my ($k,$v) = @_;
892 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
893 dpavlin 785 $hash->{$k} = $v;
894     };
895    
896     =head2 get
897    
898     get( 'key' );
899    
900     =cut
901    
902     sub get {
903     my $k = shift || return;
904     my $v = $hash->{$k};
905 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
906 dpavlin 785 return $v;
907     }
908    
909 dpavlin 791 =head2 count
910 dpavlin 785
911 dpavlin 791 if ( count( @result ) == 1 ) {
912     # do something if only 1 result is there
913     }
914    
915     =cut
916    
917     sub count {
918 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
919 dpavlin 791 return @_ . '';
920     }
921    
922 dpavlin 536 # END
923     1;

  ViewVC Help
Powered by ViewVC 1.1.26