/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1066 - (hide annotations)
Tue Nov 27 22:31:09 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 17729 byte(s)
 r1667@llin:  dpavlin | 2007-11-27 23:31:09 +0100
 export to as general way to set different output fields

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 1066 to
11 dpavlin 923 search_display search display sorted
12 dpavlin 912
13 dpavlin 536 rec1 rec2 rec
14 dpavlin 1015 frec frec_eq frec_ne
15 dpavlin 536 regex prefix suffix surround
16     first lookup join_with
17 dpavlin 707 save_into_lookup
18 dpavlin 562
19     split_rec_on
20 dpavlin 785
21     get set
22 dpavlin 791 count
23 dpavlin 980
24 dpavlin 536 /;
25 dpavlin 10
26     use warnings;
27     use strict;
28 dpavlin 536
29     #use base qw/WebPAC::Common/;
30 dpavlin 550 use Data::Dump qw/dump/;
31 dpavlin 725 use Carp qw/confess/;
32 dpavlin 10
33 dpavlin 550 # debugging warn(s)
34     my $debug = 0;
35 dpavlin 1037 _debug( $debug );
36 dpavlin 550
37 dpavlin 1021 # FIXME
38 dpavlin 980 use WebPAC::Normalize::ISBN;
39     push @EXPORT, ( 'isbn_10', 'isbn_13' );
40 dpavlin 550
41 dpavlin 1021 use WebPAC::Normalize::MARC;
42 dpavlin 1036 push @EXPORT, ( qw/
43     marc marc_indicators marc_repeatable_subfield
44     marc_compose marc_leader marc_fixed
45     marc_duplicate marc_remove marc_count
46     marc_original_order
47     marc_template
48     /);
49 dpavlin 1021
50 dpavlin 10 =head1 NAME
51    
52 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
53 dpavlin 10
54     =cut
55    
56 dpavlin 1066 our $VERSION = '0.36';
57 dpavlin 10
58     =head1 SYNOPSIS
59    
60 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
61     from input records using perl functions which are specialized for set
62     processing.
63 dpavlin 10
64 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
65     means that you check it's validity before running WebPAC using
66     C<perl -c normalize.pl>.
67 dpavlin 15
68 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
69 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
70 dpavlin 547 C<marc>.
71 dpavlin 15
72 dpavlin 10 =head1 FUNCTIONS
73    
74 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
75     All other functions are available for use within normalisation rules.
76    
77 dpavlin 536 =head2 data_structure
78 dpavlin 10
79 dpavlin 536 Return data structure
80 dpavlin 13
81 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
82 dpavlin 725 lookup => $lookup_hash,
83 dpavlin 536 row => $row,
84     rules => $normalize_pl_config,
85 dpavlin 541 marc_encoding => 'utf-8',
86 dpavlin 595 config => $config,
87 dpavlin 736 load_row_coderef => sub {
88 dpavlin 979 my ($database,$input,$mfn) = @_;
89 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
90 dpavlin 725 },
91 dpavlin 13 );
92    
93 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
94 dpavlin 540 other are optional.
95    
96 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
97 dpavlin 725 die if it's not defined.
98    
99 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
100 dpavlin 15
101 dpavlin 538 Since this function isn't exported you have to call it with
102     C<WebPAC::Normalize::data_structure>.
103    
104 dpavlin 10 =cut
105    
106 dpavlin 736 my $load_row_coderef;
107 dpavlin 725
108 dpavlin 536 sub data_structure {
109     my $arg = {@_};
110 dpavlin 13
111 dpavlin 536 die "need row argument" unless ($arg->{row});
112     die "need normalisation argument" unless ($arg->{rules});
113 dpavlin 31
114 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
115 dpavlin 983 _set_ds( $arg->{row} );
116 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
117 dpavlin 541 _clean_ds( %{ $arg } );
118 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
119 dpavlin 725
120 dpavlin 1011 no strict 'subs';
121     no warnings 'redefine';
122     eval "$arg->{rules};";
123 dpavlin 536 die "error evaling $arg->{rules}: $@\n" if ($@);
124 dpavlin 540
125 dpavlin 538 return _get_ds();
126 dpavlin 10 }
127    
128 dpavlin 983 =head2 _set_ds
129 dpavlin 13
130 dpavlin 536 Set current record hash
131 dpavlin 433
132 dpavlin 983 _set_ds( $rec );
133 dpavlin 433
134     =cut
135    
136 dpavlin 536 my $rec;
137 dpavlin 433
138 dpavlin 983 sub _set_ds {
139 dpavlin 536 $rec = shift or die "no record hash";
140 dpavlin 1036 $WebPAC::Normalize::MARC::rec = $rec;
141 dpavlin 433 }
142    
143 dpavlin 1021 =head2
144    
145     my $rec = _get_rec();
146    
147     =cut
148    
149     sub _get_rec { $rec };
150    
151 dpavlin 595 =head2 _set_config
152    
153     Set current config hash
154    
155     _set_config( $config );
156    
157     Magic keys are:
158    
159     =over 4
160    
161     =item _
162    
163     Code of current database
164    
165     =item _mfn
166    
167     Current MFN
168    
169     =back
170    
171     =cut
172    
173     my $config;
174    
175     sub _set_config {
176     $config = shift;
177     }
178    
179 dpavlin 538 =head2 _get_ds
180    
181     Return hash formatted as data structure
182    
183     my $ds = _get_ds();
184    
185     =cut
186    
187 dpavlin 1036 my $out;
188 dpavlin 538
189     sub _get_ds {
190 dpavlin 982 #warn "## out = ",dump($out);
191 dpavlin 538 return $out;
192     }
193    
194     =head2 _clean_ds
195    
196     Clean data structure hash for next record
197    
198     _clean_ds();
199    
200     =cut
201    
202     sub _clean_ds {
203 dpavlin 541 my $a = {@_};
204 dpavlin 1036 $out = undef;
205     WebPAC::Normalize::MARC::_clean();
206 dpavlin 538 }
207    
208     =head2 _set_lookup
209    
210     Set current lookup hash
211    
212     _set_lookup( $lookup );
213    
214     =cut
215    
216     my $lookup;
217    
218     sub _set_lookup {
219     $lookup = shift;
220     }
221    
222 dpavlin 707 =head2 _get_lookup
223    
224     Get current lookup hash
225    
226     my $lookup = _get_lookup();
227    
228     =cut
229    
230     sub _get_lookup {
231     return $lookup;
232     }
233    
234 dpavlin 736 =head2 _set_load_row
235 dpavlin 725
236     Setup code reference which will return L<data_structure> from
237     L<WebPAC::Store>
238    
239 dpavlin 736 _set_load_row(sub {
240 dpavlin 725 my ($database,$input,$mfn) = @_;
241 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
242 dpavlin 725 });
243    
244     =cut
245    
246 dpavlin 736 sub _set_load_row {
247 dpavlin 725 my $coderef = shift;
248     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
249    
250 dpavlin 736 $load_row_coderef = $coderef;
251 dpavlin 725 }
252    
253 dpavlin 554 =head2 _debug
254    
255     Change level of debug warnings
256    
257     _debug( 2 );
258    
259     =cut
260    
261     sub _debug {
262     my $l = shift;
263     return $debug unless defined($l);
264 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
265 dpavlin 554 $debug = $l;
266 dpavlin 1036 $WebPAC::Normalize::MARC::debug = $debug;
267 dpavlin 554 }
268    
269 dpavlin 540 =head1 Functions to create C<data_structure>
270    
271     Those functions generally have to first in your normalization file.
272    
273 dpavlin 1066 =head2 to
274    
275     Generic way to set values for some name
276    
277     to('field-name', 'name-value' => rec('200','a') );
278    
279     There are many helpers defined below which might be easier to use.
280    
281     =cut
282    
283     sub to {
284     my $type = shift or confess "need type -- BUG?";
285     my $name = shift or confess "needs name as first argument";
286     my @o = grep { defined($_) && $_ ne '' } @_;
287     return unless (@o);
288     $out->{$name}->{$type} = \@o;
289     }
290    
291 dpavlin 912 =head2 search_display
292 dpavlin 433
293 dpavlin 912 Define output for L<search> and L<display> at the same time
294 dpavlin 433
295 dpavlin 912 search_display('Title', rec('200','a') );
296 dpavlin 13
297     =cut
298    
299 dpavlin 912 sub search_display {
300     my $name = shift or die "search_display needs name as first argument";
301 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
302     return unless (@o);
303     $out->{$name}->{search} = \@o;
304     $out->{$name}->{display} = \@o;
305     }
306 dpavlin 13
307 dpavlin 915 =head2 tag
308    
309 dpavlin 1066 Old name for L<search_display>, it will probably be removed at one point.
310 dpavlin 915
311     =cut
312    
313     sub tag {
314     search_display( @_ );
315     }
316    
317 dpavlin 536 =head2 display
318 dpavlin 13
319 dpavlin 912 Define output just for I<display>
320 dpavlin 125
321 dpavlin 536 @v = display('Title', rec('200','a') );
322 dpavlin 125
323 dpavlin 536 =cut
324 dpavlin 125
325 dpavlin 1066 sub display { to( 'display', @_ ) }
326 dpavlin 13
327 dpavlin 536 =head2 search
328 dpavlin 13
329 dpavlin 536 Prepare values just for I<search>
330 dpavlin 13
331 dpavlin 536 @v = search('Title', rec('200','a') );
332 dpavlin 433
333 dpavlin 536 =cut
334 dpavlin 13
335 dpavlin 1066 sub search { to( 'search', @_ ) }
336 dpavlin 13
337 dpavlin 923 =head2 sorted
338    
339     Insert into lists which will be automatically sorted
340    
341     sorted('Title', rec('200','a') );
342    
343     =cut
344    
345 dpavlin 1066 sub sorted { to( 'sorted', @_ ) }
346 dpavlin 923
347    
348 dpavlin 540 =head1 Functions to extract data from input
349    
350     This function should be used inside functions to create C<data_structure> described
351     above.
352    
353 dpavlin 641 =head2 _pack_subfields_hash
354    
355 dpavlin 669 @subfields = _pack_subfields_hash( $h );
356     $subfields = _pack_subfields_hash( $h, 1 );
357 dpavlin 641
358 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
359     with subfields (denoted by C<^>) and values.
360    
361 dpavlin 641 =cut
362    
363     sub _pack_subfields_hash {
364    
365 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
366 dpavlin 641
367     my ($h,$include_subfields) = @_;
368    
369 dpavlin 831 # sanity and ease of use
370     return $h if (ref($h) ne 'HASH');
371    
372 dpavlin 641 if ( defined($h->{subfields}) ) {
373     my $sfs = delete $h->{subfields} || die "no subfields?";
374     my @out;
375     while (@$sfs) {
376     my $sf = shift @$sfs;
377     push @out, '^' . $sf if ($include_subfields);
378     my $o = shift @$sfs;
379     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
380     # single element subfields are not arrays
381 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
382 dpavlin 667
383 dpavlin 641 push @out, $h->{$sf};
384     } else {
385 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
386 dpavlin 641 push @out, $h->{$sf}->[$o];
387     }
388     }
389 dpavlin 667 if ($include_subfields) {
390     return join('', @out);
391     } else {
392     return @out;
393     }
394 dpavlin 641 } else {
395 dpavlin 667 if ($include_subfields) {
396     my $out = '';
397 dpavlin 668 foreach my $sf (sort keys %$h) {
398 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
399     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
400     } else {
401     $out .= '^' . $sf . $h->{$sf};
402     }
403     }
404     return $out;
405     } else {
406     # FIXME this should probably be in alphabetical order instead of hash order
407     values %{$h};
408     }
409 dpavlin 641 }
410     }
411    
412 dpavlin 536 =head2 rec1
413 dpavlin 371
414 dpavlin 536 Return all values in some field
415 dpavlin 371
416 dpavlin 536 @v = rec1('200')
417 dpavlin 15
418 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
419 dpavlin 15
420 dpavlin 536 =cut
421 dpavlin 15
422 dpavlin 536 sub rec1 {
423     my $f = shift;
424 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
425 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
426 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
427 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
428 dpavlin 641 my @out;
429     foreach my $h ( @{ $rec->{$f} } ) {
430     if (ref($h) eq 'HASH') {
431     push @out, ( _pack_subfields_hash( $h ) );
432 dpavlin 31 } else {
433 dpavlin 641 push @out, $h;
434 dpavlin 31 }
435 dpavlin 641 }
436     return @out;
437 dpavlin 536 } elsif( defined($rec->{$f}) ) {
438     return $rec->{$f};
439 dpavlin 15 }
440     }
441    
442 dpavlin 536 =head2 rec2
443 dpavlin 15
444 dpavlin 536 Return all values in specific field and subfield
445 dpavlin 13
446 dpavlin 536 @v = rec2('200','a')
447 dpavlin 13
448     =cut
449    
450 dpavlin 536 sub rec2 {
451     my $f = shift;
452     return unless (defined($rec && $rec->{$f}));
453     my $sf = shift;
454 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
455 dpavlin 589 return map {
456     if (ref($_->{$sf}) eq 'ARRAY') {
457     @{ $_->{$sf} };
458     } else {
459     $_->{$sf};
460     }
461 dpavlin 1048 } grep { ref($_) eq 'HASH' && defined $_->{$sf} } @{ $rec->{$f} };
462 dpavlin 536 }
463 dpavlin 13
464 dpavlin 536 =head2 rec
465 dpavlin 13
466 dpavlin 536 syntaxtic sugar for
467 dpavlin 13
468 dpavlin 536 @v = rec('200')
469     @v = rec('200','a')
470 dpavlin 13
471 dpavlin 750 If rec() returns just single value, it will
472     return scalar, not array.
473    
474 dpavlin 536 =cut
475 dpavlin 373
476 dpavlin 536 sub rec {
477 dpavlin 583 my @out;
478 dpavlin 536 if ($#_ == 0) {
479 dpavlin 583 @out = rec1(@_);
480 dpavlin 536 } elsif ($#_ == 1) {
481 dpavlin 583 @out = rec2(@_);
482 dpavlin 13 }
483 dpavlin 750 if ($#out == 0 && ! wantarray) {
484     return $out[0];
485     } elsif (@out) {
486 dpavlin 583 return @out;
487     } else {
488     return '';
489     }
490 dpavlin 13 }
491    
492 dpavlin 1012 =head2 frec
493    
494     Returns first value from field
495    
496     $v = frec('200');
497     $v = frec('200','a');
498    
499     =cut
500    
501     sub frec {
502     my @out = rec(@_);
503     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
504     return shift @out;
505     }
506    
507 dpavlin 1015 =head2 frec_eq
508    
509     =head2 frec_ne
510    
511     Check if first values from two fields are same or different
512    
513     if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
514     # values are same
515     } else {
516     # values are different
517     }
518    
519     Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
520     could write something like:
521    
522     if ( frec( '900','a' ) eq frec( '910','c' ) ) {
523     # yada tada
524     }
525    
526     but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
527     in order to parse text and create invalid function C<eqfrec>.
528    
529     =cut
530    
531     sub frec_eq {
532     my ( $f1,$sf1, $f2, $sf2 ) = @_;
533     return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
534     }
535    
536     sub frec_ne {
537     return ! frec_eq( @_ );
538     }
539    
540 dpavlin 536 =head2 regex
541 dpavlin 15
542 dpavlin 536 Apply regex to some or all values
543 dpavlin 15
544 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
545 dpavlin 15
546     =cut
547    
548 dpavlin 536 sub regex {
549     my $r = shift;
550     my @out;
551 dpavlin 550 #warn "r: $r\n", dump(\@_);
552 dpavlin 536 foreach my $t (@_) {
553     next unless ($t);
554     eval "\$t =~ $r";
555     push @out, $t if ($t && $t ne '');
556 dpavlin 368 }
557 dpavlin 536 return @out;
558 dpavlin 15 }
559    
560 dpavlin 536 =head2 prefix
561 dpavlin 15
562 dpavlin 536 Prefix all values with a string
563 dpavlin 15
564 dpavlin 536 @v = prefix( 'my_', @v );
565 dpavlin 15
566     =cut
567    
568 dpavlin 536 sub prefix {
569 dpavlin 819 my $p = shift;
570     return @_ unless defined( $p );
571 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
572     }
573 dpavlin 15
574 dpavlin 536 =head2 suffix
575 dpavlin 15
576 dpavlin 536 suffix all values with a string
577 dpavlin 15
578 dpavlin 536 @v = suffix( '_my', @v );
579 dpavlin 15
580 dpavlin 536 =cut
581 dpavlin 15
582 dpavlin 536 sub suffix {
583 dpavlin 819 my $s = shift;
584     return @_ unless defined( $s );
585 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
586 dpavlin 15 }
587    
588 dpavlin 536 =head2 surround
589 dpavlin 13
590 dpavlin 536 surround all values with a two strings
591 dpavlin 13
592 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
593 dpavlin 13
594     =cut
595    
596 dpavlin 536 sub surround {
597 dpavlin 819 my $p = shift;
598     my $s = shift;
599     $p = '' unless defined( $p );
600     $s = '' unless defined( $s );
601 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
602 dpavlin 13 }
603    
604 dpavlin 536 =head2 first
605 dpavlin 13
606 dpavlin 536 Return first element
607 dpavlin 15
608 dpavlin 536 $v = first( @v );
609 dpavlin 13
610     =cut
611    
612 dpavlin 536 sub first {
613     my $r = shift;
614     return $r;
615 dpavlin 13 }
616    
617 dpavlin 536 =head2 lookup
618 dpavlin 13
619 dpavlin 536 Consult lookup hashes for some value
620 dpavlin 13
621 dpavlin 725 @v = lookup(
622     sub {
623     'ffkk/peri/mfn'.rec('000')
624     },
625     'ffkk','peri','200-a-200-e',
626     sub {
627     first(rec(200,'a')).' '.first(rec('200','e'))
628     }
629     );
630 dpavlin 13
631 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
632     normal lookup definition in C<conf/lookup/something.pl> which looks like:
633 dpavlin 707
634 dpavlin 725 lookup(
635     # which results to return from record recorded in lookup
636     sub { 'ffkk/peri/mfn' . rec('000') },
637     # from which database and input
638     'ffkk','peri',
639     # such that following values match
640     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
641     # if this part is missing, we will try to match same fields
642     # from lookup record and current one, or you can override
643     # which records to use from current record using
644     sub { rec('900','x') . ' ' . rec('900','y') },
645     )
646    
647     You can think about this lookup as SQL (if that helps):
648    
649     select
650     sub { what }
651     from
652     database, input
653     where
654     sub { filter from lookuped record }
655     having
656     sub { optional filter on current record }
657    
658     Easy as pie, right?
659    
660 dpavlin 13 =cut
661    
662 dpavlin 536 sub lookup {
663 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
664    
665 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
666 dpavlin 725
667 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
668 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
669    
670 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
671 dpavlin 725
672     my $mfns;
673     my @having = $having->();
674    
675 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
676 dpavlin 725
677     foreach my $h ( @having ) {
678     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
679 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
680 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
681     }
682 dpavlin 536 }
683 dpavlin 725
684     return unless ($mfns);
685    
686     my @mfns = sort keys %$mfns;
687    
688 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
689 dpavlin 725
690     my $old_rec = $rec;
691     my @out;
692    
693     foreach my $mfn (@mfns) {
694 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
695 dpavlin 725
696 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
697 dpavlin 725
698     my @vals = $what->();
699    
700     push @out, ( @vals );
701    
702 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
703 dpavlin 725 }
704    
705     # if (ref($lookup->{$k}) eq 'ARRAY') {
706     # return @{ $lookup->{$k} };
707     # } else {
708     # return $lookup->{$k};
709     # }
710    
711     $rec = $old_rec;
712    
713 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
714 dpavlin 725
715 dpavlin 740 if ($#out == 0) {
716     return $out[0];
717     } else {
718     return @out;
719     }
720 dpavlin 13 }
721    
722 dpavlin 707 =head2 save_into_lookup
723    
724 dpavlin 725 Save value into lookup. It associates current database, input
725     and specific keys with one or more values which will be
726     associated over MFN.
727 dpavlin 707
728 dpavlin 725 MFN will be extracted from first occurence current of field 000
729     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
730    
731     my $nr = save_into_lookup($database,$input,$key,sub {
732 dpavlin 707 # code which produce one or more values
733     });
734    
735 dpavlin 725 It returns number of items saved.
736 dpavlin 707
737 dpavlin 725 This function shouldn't be called directly, it's called from code created by
738     L<WebPAC::Parser>.
739    
740 dpavlin 707 =cut
741    
742     sub save_into_lookup {
743 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
744     die "save_into_lookup needs database" unless defined($database);
745     die "save_into_lookup needs input" unless defined($input);
746     die "save_into_lookup needs key" unless defined($key);
747 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
748 dpavlin 725
749 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
750 dpavlin 725
751     my $mfn =
752     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
753     defined($config->{_mfn}) ? $config->{_mfn} :
754     die "mfn not defined or zero";
755    
756     my $nr = 0;
757    
758 dpavlin 707 foreach my $v ( $coderef->() ) {
759 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
760 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
761 dpavlin 725 $nr++;
762 dpavlin 707 }
763 dpavlin 725
764     return $nr;
765 dpavlin 707 }
766    
767 dpavlin 595 =head2 config
768    
769     Consult config values stored in C<config.yml>
770    
771     # return database code (key under databases in yaml)
772     $database_code = config(); # use _ from hash
773     $database_name = config('name');
774     $database_input_name = config('input name');
775    
776     Up to three levels are supported.
777    
778     =cut
779    
780     sub config {
781     return unless ($config);
782    
783     my $p = shift;
784    
785     $p ||= '';
786    
787     my $v;
788    
789     warn "### getting config($p)\n" if ($debug > 1);
790    
791     my @p = split(/\s+/,$p);
792     if ($#p < 0) {
793     $v = $config->{ '_' }; # special, database code
794     } else {
795    
796     my $c = dclone( $config );
797    
798     foreach my $k (@p) {
799     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
800     if (ref($c) eq 'ARRAY') {
801     $c = shift @$c;
802     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
803     last;
804     }
805    
806     if (! defined($c->{$k}) ) {
807     $c = undef;
808     last;
809     } else {
810     $c = $c->{$k};
811     }
812     }
813     $v = $c if ($c);
814    
815     }
816    
817     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
818     warn "config( '$p' ) is empty\n" if (! $v);
819    
820     return $v;
821     }
822    
823     =head2 id
824    
825     Returns unique id of this record
826    
827     $id = id();
828    
829     Returns C<42/2> for 2nd occurence of MFN 42.
830    
831     =cut
832    
833     sub id {
834     my $mfn = $config->{_mfn} || die "no _mfn in config data";
835 dpavlin 1036 return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
836 dpavlin 595 }
837    
838 dpavlin 536 =head2 join_with
839 dpavlin 13
840 dpavlin 536 Joins walues with some delimiter
841 dpavlin 10
842 dpavlin 536 $v = join_with(", ", @v);
843 dpavlin 10
844 dpavlin 536 =cut
845 dpavlin 10
846 dpavlin 536 sub join_with {
847     my $d = shift;
848 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
849 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
850     return '' unless defined($v);
851     return $v;
852 dpavlin 536 }
853 dpavlin 10
854 dpavlin 562 =head2 split_rec_on
855    
856     Split record subfield on some regex and take one of parts out
857    
858     $a_before_semi_column =
859     split_rec_on('200','a', /\s*;\s*/, $part);
860    
861     C<$part> is optional number of element. First element is
862     B<1>, not 0!
863    
864     If there is no C<$part> parameter or C<$part> is 0, this function will
865     return all values produced by splitting.
866    
867     =cut
868    
869     sub split_rec_on {
870     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
871    
872     my ($fld, $sf, $regex, $part) = @_;
873 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
874 dpavlin 562
875     my @r = rec( $fld, $sf );
876     my $v = shift @r;
877 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
878 dpavlin 562
879 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
880 dpavlin 566
881 dpavlin 562 my @s = split( $regex, $v );
882 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
883 dpavlin 566 if ($part && $part > 0) {
884 dpavlin 562 return $s[ $part - 1 ];
885     } else {
886 dpavlin 571 return @s;
887 dpavlin 562 }
888     }
889    
890 dpavlin 785 my $hash;
891    
892     =head2 set
893    
894     set( key => 'value' );
895    
896     =cut
897    
898     sub set {
899     my ($k,$v) = @_;
900 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
901 dpavlin 785 $hash->{$k} = $v;
902     };
903    
904     =head2 get
905    
906     get( 'key' );
907    
908     =cut
909    
910     sub get {
911     my $k = shift || return;
912     my $v = $hash->{$k};
913 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
914 dpavlin 785 return $v;
915     }
916    
917 dpavlin 791 =head2 count
918 dpavlin 785
919 dpavlin 791 if ( count( @result ) == 1 ) {
920     # do something if only 1 result is there
921     }
922    
923     =cut
924    
925     sub count {
926 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
927 dpavlin 791 return @_ . '';
928     }
929    
930 dpavlin 536 # END
931     1;

  ViewVC Help
Powered by ViewVC 1.1.26