/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1048 - (hide annotations)
Mon Nov 19 16:33:09 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 17535 byte(s)
 r1633@llin:  dpavlin | 2007-11-19 17:33:09 +0100
 fetch indicators with rec(900,'i1') and/or rec(900,'i2') as it should be
 (fixed bug which prevented this from working for some values like 0)

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 923 search_display search display sorted
11 dpavlin 912
12 dpavlin 536 rec1 rec2 rec
13 dpavlin 1015 frec frec_eq frec_ne
14 dpavlin 536 regex prefix suffix surround
15     first lookup join_with
16 dpavlin 707 save_into_lookup
17 dpavlin 562
18     split_rec_on
19 dpavlin 785
20     get set
21 dpavlin 791 count
22 dpavlin 980
23 dpavlin 536 /;
24 dpavlin 10
25     use warnings;
26     use strict;
27 dpavlin 536
28     #use base qw/WebPAC::Common/;
29 dpavlin 550 use Data::Dump qw/dump/;
30 dpavlin 725 use Carp qw/confess/;
31 dpavlin 10
32 dpavlin 550 # debugging warn(s)
33     my $debug = 0;
34 dpavlin 1037 _debug( $debug );
35 dpavlin 550
36 dpavlin 1021 # FIXME
37 dpavlin 980 use WebPAC::Normalize::ISBN;
38     push @EXPORT, ( 'isbn_10', 'isbn_13' );
39 dpavlin 550
40 dpavlin 1021 use WebPAC::Normalize::MARC;
41 dpavlin 1036 push @EXPORT, ( qw/
42     marc marc_indicators marc_repeatable_subfield
43     marc_compose marc_leader marc_fixed
44     marc_duplicate marc_remove marc_count
45     marc_original_order
46     marc_template
47     /);
48 dpavlin 1021
49 dpavlin 10 =head1 NAME
50    
51 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
52 dpavlin 10
53     =cut
54    
55 dpavlin 1021 our $VERSION = '0.35';
56 dpavlin 10
57     =head1 SYNOPSIS
58    
59 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
60     from input records using perl functions which are specialized for set
61     processing.
62 dpavlin 10
63 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
64     means that you check it's validity before running WebPAC using
65     C<perl -c normalize.pl>.
66 dpavlin 15
67 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
68 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
69 dpavlin 547 C<marc>.
70 dpavlin 15
71 dpavlin 10 =head1 FUNCTIONS
72    
73 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
74     All other functions are available for use within normalisation rules.
75    
76 dpavlin 536 =head2 data_structure
77 dpavlin 10
78 dpavlin 536 Return data structure
79 dpavlin 13
80 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
81 dpavlin 725 lookup => $lookup_hash,
82 dpavlin 536 row => $row,
83     rules => $normalize_pl_config,
84 dpavlin 541 marc_encoding => 'utf-8',
85 dpavlin 595 config => $config,
86 dpavlin 736 load_row_coderef => sub {
87 dpavlin 979 my ($database,$input,$mfn) = @_;
88 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
89 dpavlin 725 },
90 dpavlin 13 );
91    
92 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
93 dpavlin 540 other are optional.
94    
95 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
96 dpavlin 725 die if it's not defined.
97    
98 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
99 dpavlin 15
100 dpavlin 538 Since this function isn't exported you have to call it with
101     C<WebPAC::Normalize::data_structure>.
102    
103 dpavlin 10 =cut
104    
105 dpavlin 736 my $load_row_coderef;
106 dpavlin 725
107 dpavlin 536 sub data_structure {
108     my $arg = {@_};
109 dpavlin 13
110 dpavlin 536 die "need row argument" unless ($arg->{row});
111     die "need normalisation argument" unless ($arg->{rules});
112 dpavlin 31
113 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
114 dpavlin 983 _set_ds( $arg->{row} );
115 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
116 dpavlin 541 _clean_ds( %{ $arg } );
117 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
118 dpavlin 725
119 dpavlin 1011 no strict 'subs';
120     no warnings 'redefine';
121     eval "$arg->{rules};";
122 dpavlin 536 die "error evaling $arg->{rules}: $@\n" if ($@);
123 dpavlin 540
124 dpavlin 538 return _get_ds();
125 dpavlin 10 }
126    
127 dpavlin 983 =head2 _set_ds
128 dpavlin 13
129 dpavlin 536 Set current record hash
130 dpavlin 433
131 dpavlin 983 _set_ds( $rec );
132 dpavlin 433
133     =cut
134    
135 dpavlin 536 my $rec;
136 dpavlin 433
137 dpavlin 983 sub _set_ds {
138 dpavlin 536 $rec = shift or die "no record hash";
139 dpavlin 1036 $WebPAC::Normalize::MARC::rec = $rec;
140 dpavlin 433 }
141    
142 dpavlin 1021 =head2
143    
144     my $rec = _get_rec();
145    
146     =cut
147    
148     sub _get_rec { $rec };
149    
150 dpavlin 595 =head2 _set_config
151    
152     Set current config hash
153    
154     _set_config( $config );
155    
156     Magic keys are:
157    
158     =over 4
159    
160     =item _
161    
162     Code of current database
163    
164     =item _mfn
165    
166     Current MFN
167    
168     =back
169    
170     =cut
171    
172     my $config;
173    
174     sub _set_config {
175     $config = shift;
176     }
177    
178 dpavlin 538 =head2 _get_ds
179    
180     Return hash formatted as data structure
181    
182     my $ds = _get_ds();
183    
184     =cut
185    
186 dpavlin 1036 my $out;
187 dpavlin 538
188     sub _get_ds {
189 dpavlin 982 #warn "## out = ",dump($out);
190 dpavlin 538 return $out;
191     }
192    
193     =head2 _clean_ds
194    
195     Clean data structure hash for next record
196    
197     _clean_ds();
198    
199     =cut
200    
201     sub _clean_ds {
202 dpavlin 541 my $a = {@_};
203 dpavlin 1036 $out = undef;
204     WebPAC::Normalize::MARC::_clean();
205 dpavlin 538 }
206    
207     =head2 _set_lookup
208    
209     Set current lookup hash
210    
211     _set_lookup( $lookup );
212    
213     =cut
214    
215     my $lookup;
216    
217     sub _set_lookup {
218     $lookup = shift;
219     }
220    
221 dpavlin 707 =head2 _get_lookup
222    
223     Get current lookup hash
224    
225     my $lookup = _get_lookup();
226    
227     =cut
228    
229     sub _get_lookup {
230     return $lookup;
231     }
232    
233 dpavlin 736 =head2 _set_load_row
234 dpavlin 725
235     Setup code reference which will return L<data_structure> from
236     L<WebPAC::Store>
237    
238 dpavlin 736 _set_load_row(sub {
239 dpavlin 725 my ($database,$input,$mfn) = @_;
240 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
241 dpavlin 725 });
242    
243     =cut
244    
245 dpavlin 736 sub _set_load_row {
246 dpavlin 725 my $coderef = shift;
247     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
248    
249 dpavlin 736 $load_row_coderef = $coderef;
250 dpavlin 725 }
251    
252 dpavlin 554 =head2 _debug
253    
254     Change level of debug warnings
255    
256     _debug( 2 );
257    
258     =cut
259    
260     sub _debug {
261     my $l = shift;
262     return $debug unless defined($l);
263 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
264 dpavlin 554 $debug = $l;
265 dpavlin 1036 $WebPAC::Normalize::MARC::debug = $debug;
266 dpavlin 554 }
267    
268 dpavlin 540 =head1 Functions to create C<data_structure>
269    
270     Those functions generally have to first in your normalization file.
271    
272 dpavlin 912 =head2 search_display
273 dpavlin 433
274 dpavlin 912 Define output for L<search> and L<display> at the same time
275 dpavlin 433
276 dpavlin 912 search_display('Title', rec('200','a') );
277 dpavlin 13
278    
279     =cut
280    
281 dpavlin 912 sub search_display {
282     my $name = shift or die "search_display needs name as first argument";
283 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
284     return unless (@o);
285     $out->{$name}->{search} = \@o;
286     $out->{$name}->{display} = \@o;
287     }
288 dpavlin 13
289 dpavlin 915 =head2 tag
290    
291     Old name for L<search_display>, but supported
292    
293     =cut
294    
295     sub tag {
296     search_display( @_ );
297     }
298    
299 dpavlin 536 =head2 display
300 dpavlin 13
301 dpavlin 912 Define output just for I<display>
302 dpavlin 125
303 dpavlin 536 @v = display('Title', rec('200','a') );
304 dpavlin 125
305 dpavlin 536 =cut
306 dpavlin 125
307 dpavlin 923 sub _field {
308     my $type = shift or confess "need type -- BUG?";
309     my $name = shift or confess "needs name as first argument";
310 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
311     return unless (@o);
312 dpavlin 923 $out->{$name}->{$type} = \@o;
313 dpavlin 536 }
314 dpavlin 13
315 dpavlin 923 sub display { _field( 'display', @_ ) }
316    
317 dpavlin 536 =head2 search
318 dpavlin 13
319 dpavlin 536 Prepare values just for I<search>
320 dpavlin 13
321 dpavlin 536 @v = search('Title', rec('200','a') );
322 dpavlin 433
323 dpavlin 536 =cut
324 dpavlin 13
325 dpavlin 923 sub search { _field( 'search', @_ ) }
326 dpavlin 13
327 dpavlin 923 =head2 sorted
328    
329     Insert into lists which will be automatically sorted
330    
331     sorted('Title', rec('200','a') );
332    
333     =cut
334    
335     sub sorted { _field( 'sorted', @_ ) }
336    
337    
338 dpavlin 564
339 dpavlin 540 =head1 Functions to extract data from input
340    
341     This function should be used inside functions to create C<data_structure> described
342     above.
343    
344 dpavlin 641 =head2 _pack_subfields_hash
345    
346 dpavlin 669 @subfields = _pack_subfields_hash( $h );
347     $subfields = _pack_subfields_hash( $h, 1 );
348 dpavlin 641
349 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
350     with subfields (denoted by C<^>) and values.
351    
352 dpavlin 641 =cut
353    
354     sub _pack_subfields_hash {
355    
356 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
357 dpavlin 641
358     my ($h,$include_subfields) = @_;
359    
360 dpavlin 831 # sanity and ease of use
361     return $h if (ref($h) ne 'HASH');
362    
363 dpavlin 641 if ( defined($h->{subfields}) ) {
364     my $sfs = delete $h->{subfields} || die "no subfields?";
365     my @out;
366     while (@$sfs) {
367     my $sf = shift @$sfs;
368     push @out, '^' . $sf if ($include_subfields);
369     my $o = shift @$sfs;
370     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
371     # single element subfields are not arrays
372 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
373 dpavlin 667
374 dpavlin 641 push @out, $h->{$sf};
375     } else {
376 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
377 dpavlin 641 push @out, $h->{$sf}->[$o];
378     }
379     }
380 dpavlin 667 if ($include_subfields) {
381     return join('', @out);
382     } else {
383     return @out;
384     }
385 dpavlin 641 } else {
386 dpavlin 667 if ($include_subfields) {
387     my $out = '';
388 dpavlin 668 foreach my $sf (sort keys %$h) {
389 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
390     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
391     } else {
392     $out .= '^' . $sf . $h->{$sf};
393     }
394     }
395     return $out;
396     } else {
397     # FIXME this should probably be in alphabetical order instead of hash order
398     values %{$h};
399     }
400 dpavlin 641 }
401     }
402    
403 dpavlin 536 =head2 rec1
404 dpavlin 371
405 dpavlin 536 Return all values in some field
406 dpavlin 371
407 dpavlin 536 @v = rec1('200')
408 dpavlin 15
409 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
410 dpavlin 15
411 dpavlin 536 =cut
412 dpavlin 15
413 dpavlin 536 sub rec1 {
414     my $f = shift;
415 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
416 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
417 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
418 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
419 dpavlin 641 my @out;
420     foreach my $h ( @{ $rec->{$f} } ) {
421     if (ref($h) eq 'HASH') {
422     push @out, ( _pack_subfields_hash( $h ) );
423 dpavlin 31 } else {
424 dpavlin 641 push @out, $h;
425 dpavlin 31 }
426 dpavlin 641 }
427     return @out;
428 dpavlin 536 } elsif( defined($rec->{$f}) ) {
429     return $rec->{$f};
430 dpavlin 15 }
431     }
432    
433 dpavlin 536 =head2 rec2
434 dpavlin 15
435 dpavlin 536 Return all values in specific field and subfield
436 dpavlin 13
437 dpavlin 536 @v = rec2('200','a')
438 dpavlin 13
439     =cut
440    
441 dpavlin 536 sub rec2 {
442     my $f = shift;
443     return unless (defined($rec && $rec->{$f}));
444     my $sf = shift;
445 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
446 dpavlin 589 return map {
447     if (ref($_->{$sf}) eq 'ARRAY') {
448     @{ $_->{$sf} };
449     } else {
450     $_->{$sf};
451     }
452 dpavlin 1048 } grep { ref($_) eq 'HASH' && defined $_->{$sf} } @{ $rec->{$f} };
453 dpavlin 536 }
454 dpavlin 13
455 dpavlin 536 =head2 rec
456 dpavlin 13
457 dpavlin 536 syntaxtic sugar for
458 dpavlin 13
459 dpavlin 536 @v = rec('200')
460     @v = rec('200','a')
461 dpavlin 13
462 dpavlin 750 If rec() returns just single value, it will
463     return scalar, not array.
464    
465 dpavlin 536 =cut
466 dpavlin 373
467 dpavlin 536 sub rec {
468 dpavlin 583 my @out;
469 dpavlin 536 if ($#_ == 0) {
470 dpavlin 583 @out = rec1(@_);
471 dpavlin 536 } elsif ($#_ == 1) {
472 dpavlin 583 @out = rec2(@_);
473 dpavlin 13 }
474 dpavlin 750 if ($#out == 0 && ! wantarray) {
475     return $out[0];
476     } elsif (@out) {
477 dpavlin 583 return @out;
478     } else {
479     return '';
480     }
481 dpavlin 13 }
482    
483 dpavlin 1012 =head2 frec
484    
485     Returns first value from field
486    
487     $v = frec('200');
488     $v = frec('200','a');
489    
490     =cut
491    
492     sub frec {
493     my @out = rec(@_);
494     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
495     return shift @out;
496     }
497    
498 dpavlin 1015 =head2 frec_eq
499    
500     =head2 frec_ne
501    
502     Check if first values from two fields are same or different
503    
504     if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
505     # values are same
506     } else {
507     # values are different
508     }
509    
510     Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
511     could write something like:
512    
513     if ( frec( '900','a' ) eq frec( '910','c' ) ) {
514     # yada tada
515     }
516    
517     but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
518     in order to parse text and create invalid function C<eqfrec>.
519    
520     =cut
521    
522     sub frec_eq {
523     my ( $f1,$sf1, $f2, $sf2 ) = @_;
524     return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
525     }
526    
527     sub frec_ne {
528     return ! frec_eq( @_ );
529     }
530    
531 dpavlin 536 =head2 regex
532 dpavlin 15
533 dpavlin 536 Apply regex to some or all values
534 dpavlin 15
535 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
536 dpavlin 15
537     =cut
538    
539 dpavlin 536 sub regex {
540     my $r = shift;
541     my @out;
542 dpavlin 550 #warn "r: $r\n", dump(\@_);
543 dpavlin 536 foreach my $t (@_) {
544     next unless ($t);
545     eval "\$t =~ $r";
546     push @out, $t if ($t && $t ne '');
547 dpavlin 368 }
548 dpavlin 536 return @out;
549 dpavlin 15 }
550    
551 dpavlin 536 =head2 prefix
552 dpavlin 15
553 dpavlin 536 Prefix all values with a string
554 dpavlin 15
555 dpavlin 536 @v = prefix( 'my_', @v );
556 dpavlin 15
557     =cut
558    
559 dpavlin 536 sub prefix {
560 dpavlin 819 my $p = shift;
561     return @_ unless defined( $p );
562 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
563     }
564 dpavlin 15
565 dpavlin 536 =head2 suffix
566 dpavlin 15
567 dpavlin 536 suffix all values with a string
568 dpavlin 15
569 dpavlin 536 @v = suffix( '_my', @v );
570 dpavlin 15
571 dpavlin 536 =cut
572 dpavlin 15
573 dpavlin 536 sub suffix {
574 dpavlin 819 my $s = shift;
575     return @_ unless defined( $s );
576 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
577 dpavlin 15 }
578    
579 dpavlin 536 =head2 surround
580 dpavlin 13
581 dpavlin 536 surround all values with a two strings
582 dpavlin 13
583 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
584 dpavlin 13
585     =cut
586    
587 dpavlin 536 sub surround {
588 dpavlin 819 my $p = shift;
589     my $s = shift;
590     $p = '' unless defined( $p );
591     $s = '' unless defined( $s );
592 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
593 dpavlin 13 }
594    
595 dpavlin 536 =head2 first
596 dpavlin 13
597 dpavlin 536 Return first element
598 dpavlin 15
599 dpavlin 536 $v = first( @v );
600 dpavlin 13
601     =cut
602    
603 dpavlin 536 sub first {
604     my $r = shift;
605     return $r;
606 dpavlin 13 }
607    
608 dpavlin 536 =head2 lookup
609 dpavlin 13
610 dpavlin 536 Consult lookup hashes for some value
611 dpavlin 13
612 dpavlin 725 @v = lookup(
613     sub {
614     'ffkk/peri/mfn'.rec('000')
615     },
616     'ffkk','peri','200-a-200-e',
617     sub {
618     first(rec(200,'a')).' '.first(rec('200','e'))
619     }
620     );
621 dpavlin 13
622 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
623     normal lookup definition in C<conf/lookup/something.pl> which looks like:
624 dpavlin 707
625 dpavlin 725 lookup(
626     # which results to return from record recorded in lookup
627     sub { 'ffkk/peri/mfn' . rec('000') },
628     # from which database and input
629     'ffkk','peri',
630     # such that following values match
631     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
632     # if this part is missing, we will try to match same fields
633     # from lookup record and current one, or you can override
634     # which records to use from current record using
635     sub { rec('900','x') . ' ' . rec('900','y') },
636     )
637    
638     You can think about this lookup as SQL (if that helps):
639    
640     select
641     sub { what }
642     from
643     database, input
644     where
645     sub { filter from lookuped record }
646     having
647     sub { optional filter on current record }
648    
649     Easy as pie, right?
650    
651 dpavlin 13 =cut
652    
653 dpavlin 536 sub lookup {
654 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
655    
656 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
657 dpavlin 725
658 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
659 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
660    
661 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
662 dpavlin 725
663     my $mfns;
664     my @having = $having->();
665    
666 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
667 dpavlin 725
668     foreach my $h ( @having ) {
669     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
670 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
671 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
672     }
673 dpavlin 536 }
674 dpavlin 725
675     return unless ($mfns);
676    
677     my @mfns = sort keys %$mfns;
678    
679 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
680 dpavlin 725
681     my $old_rec = $rec;
682     my @out;
683    
684     foreach my $mfn (@mfns) {
685 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
686 dpavlin 725
687 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
688 dpavlin 725
689     my @vals = $what->();
690    
691     push @out, ( @vals );
692    
693 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
694 dpavlin 725 }
695    
696     # if (ref($lookup->{$k}) eq 'ARRAY') {
697     # return @{ $lookup->{$k} };
698     # } else {
699     # return $lookup->{$k};
700     # }
701    
702     $rec = $old_rec;
703    
704 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
705 dpavlin 725
706 dpavlin 740 if ($#out == 0) {
707     return $out[0];
708     } else {
709     return @out;
710     }
711 dpavlin 13 }
712    
713 dpavlin 707 =head2 save_into_lookup
714    
715 dpavlin 725 Save value into lookup. It associates current database, input
716     and specific keys with one or more values which will be
717     associated over MFN.
718 dpavlin 707
719 dpavlin 725 MFN will be extracted from first occurence current of field 000
720     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
721    
722     my $nr = save_into_lookup($database,$input,$key,sub {
723 dpavlin 707 # code which produce one or more values
724     });
725    
726 dpavlin 725 It returns number of items saved.
727 dpavlin 707
728 dpavlin 725 This function shouldn't be called directly, it's called from code created by
729     L<WebPAC::Parser>.
730    
731 dpavlin 707 =cut
732    
733     sub save_into_lookup {
734 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
735     die "save_into_lookup needs database" unless defined($database);
736     die "save_into_lookup needs input" unless defined($input);
737     die "save_into_lookup needs key" unless defined($key);
738 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
739 dpavlin 725
740 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
741 dpavlin 725
742     my $mfn =
743     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
744     defined($config->{_mfn}) ? $config->{_mfn} :
745     die "mfn not defined or zero";
746    
747     my $nr = 0;
748    
749 dpavlin 707 foreach my $v ( $coderef->() ) {
750 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
751 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
752 dpavlin 725 $nr++;
753 dpavlin 707 }
754 dpavlin 725
755     return $nr;
756 dpavlin 707 }
757    
758 dpavlin 595 =head2 config
759    
760     Consult config values stored in C<config.yml>
761    
762     # return database code (key under databases in yaml)
763     $database_code = config(); # use _ from hash
764     $database_name = config('name');
765     $database_input_name = config('input name');
766    
767     Up to three levels are supported.
768    
769     =cut
770    
771     sub config {
772     return unless ($config);
773    
774     my $p = shift;
775    
776     $p ||= '';
777    
778     my $v;
779    
780     warn "### getting config($p)\n" if ($debug > 1);
781    
782     my @p = split(/\s+/,$p);
783     if ($#p < 0) {
784     $v = $config->{ '_' }; # special, database code
785     } else {
786    
787     my $c = dclone( $config );
788    
789     foreach my $k (@p) {
790     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
791     if (ref($c) eq 'ARRAY') {
792     $c = shift @$c;
793     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
794     last;
795     }
796    
797     if (! defined($c->{$k}) ) {
798     $c = undef;
799     last;
800     } else {
801     $c = $c->{$k};
802     }
803     }
804     $v = $c if ($c);
805    
806     }
807    
808     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
809     warn "config( '$p' ) is empty\n" if (! $v);
810    
811     return $v;
812     }
813    
814     =head2 id
815    
816     Returns unique id of this record
817    
818     $id = id();
819    
820     Returns C<42/2> for 2nd occurence of MFN 42.
821    
822     =cut
823    
824     sub id {
825     my $mfn = $config->{_mfn} || die "no _mfn in config data";
826 dpavlin 1036 return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
827 dpavlin 595 }
828    
829 dpavlin 536 =head2 join_with
830 dpavlin 13
831 dpavlin 536 Joins walues with some delimiter
832 dpavlin 10
833 dpavlin 536 $v = join_with(", ", @v);
834 dpavlin 10
835 dpavlin 536 =cut
836 dpavlin 10
837 dpavlin 536 sub join_with {
838     my $d = shift;
839 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
840 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
841     return '' unless defined($v);
842     return $v;
843 dpavlin 536 }
844 dpavlin 10
845 dpavlin 562 =head2 split_rec_on
846    
847     Split record subfield on some regex and take one of parts out
848    
849     $a_before_semi_column =
850     split_rec_on('200','a', /\s*;\s*/, $part);
851    
852     C<$part> is optional number of element. First element is
853     B<1>, not 0!
854    
855     If there is no C<$part> parameter or C<$part> is 0, this function will
856     return all values produced by splitting.
857    
858     =cut
859    
860     sub split_rec_on {
861     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
862    
863     my ($fld, $sf, $regex, $part) = @_;
864 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
865 dpavlin 562
866     my @r = rec( $fld, $sf );
867     my $v = shift @r;
868 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
869 dpavlin 562
870 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
871 dpavlin 566
872 dpavlin 562 my @s = split( $regex, $v );
873 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
874 dpavlin 566 if ($part && $part > 0) {
875 dpavlin 562 return $s[ $part - 1 ];
876     } else {
877 dpavlin 571 return @s;
878 dpavlin 562 }
879     }
880    
881 dpavlin 785 my $hash;
882    
883     =head2 set
884    
885     set( key => 'value' );
886    
887     =cut
888    
889     sub set {
890     my ($k,$v) = @_;
891 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
892 dpavlin 785 $hash->{$k} = $v;
893     };
894    
895     =head2 get
896    
897     get( 'key' );
898    
899     =cut
900    
901     sub get {
902     my $k = shift || return;
903     my $v = $hash->{$k};
904 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
905 dpavlin 785 return $v;
906     }
907    
908 dpavlin 791 =head2 count
909 dpavlin 785
910 dpavlin 791 if ( count( @result ) == 1 ) {
911     # do something if only 1 result is there
912     }
913    
914     =cut
915    
916     sub count {
917 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
918 dpavlin 791 return @_ . '';
919     }
920    
921 dpavlin 536 # END
922     1;

  ViewVC Help
Powered by ViewVC 1.1.26