/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1210 - (hide annotations)
Sat May 30 15:26:25 2009 UTC (13 years, 8 months ago) by dpavlin
File size: 18230 byte(s)
 r1906@llin:  dpavlin | 2009-05-30 17:26:17 +0200
 added table name to row normalize command so we can create
 data in different tables directly from normalization file

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 1066 to
11 dpavlin 923 search_display search display sorted
12 dpavlin 912
13 dpavlin 536 rec1 rec2 rec
14 dpavlin 1015 frec frec_eq frec_ne
15 dpavlin 536 regex prefix suffix surround
16     first lookup join_with
17 dpavlin 707 save_into_lookup
18 dpavlin 562
19     split_rec_on
20 dpavlin 785
21     get set
22 dpavlin 791 count
23 dpavlin 980
24 dpavlin 1206 row
25     rec_array
26    
27 dpavlin 536 /;
28 dpavlin 10
29     use warnings;
30     use strict;
31 dpavlin 536
32     #use base qw/WebPAC::Common/;
33 dpavlin 550 use Data::Dump qw/dump/;
34 dpavlin 725 use Carp qw/confess/;
35 dpavlin 10
36 dpavlin 550 # debugging warn(s)
37     my $debug = 0;
38 dpavlin 1037 _debug( $debug );
39 dpavlin 550
40 dpavlin 1021 # FIXME
41 dpavlin 980 use WebPAC::Normalize::ISBN;
42     push @EXPORT, ( 'isbn_10', 'isbn_13' );
43 dpavlin 550
44 dpavlin 1021 use WebPAC::Normalize::MARC;
45 dpavlin 1036 push @EXPORT, ( qw/
46     marc marc_indicators marc_repeatable_subfield
47     marc_compose marc_leader marc_fixed
48     marc_duplicate marc_remove marc_count
49     marc_original_order
50     marc_template
51     /);
52 dpavlin 1021
53 dpavlin 1118 use Storable qw/dclone/;
54    
55 dpavlin 10 =head1 NAME
56    
57 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
58 dpavlin 10
59     =cut
60    
61 dpavlin 1066 our $VERSION = '0.36';
62 dpavlin 10
63     =head1 SYNOPSIS
64    
65 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
66     from input records using perl functions which are specialized for set
67     processing.
68 dpavlin 10
69 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
70     means that you check it's validity before running WebPAC using
71     C<perl -c normalize.pl>.
72 dpavlin 15
73 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
74 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
75 dpavlin 547 C<marc>.
76 dpavlin 15
77 dpavlin 10 =head1 FUNCTIONS
78    
79 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
80     All other functions are available for use within normalisation rules.
81    
82 dpavlin 536 =head2 data_structure
83 dpavlin 10
84 dpavlin 536 Return data structure
85 dpavlin 13
86 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
87 dpavlin 725 lookup => $lookup_hash,
88 dpavlin 536 row => $row,
89     rules => $normalize_pl_config,
90 dpavlin 541 marc_encoding => 'utf-8',
91 dpavlin 595 config => $config,
92 dpavlin 736 load_row_coderef => sub {
93 dpavlin 979 my ($database,$input,$mfn) = @_;
94 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
95 dpavlin 725 },
96 dpavlin 13 );
97    
98 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
99 dpavlin 540 other are optional.
100    
101 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
102 dpavlin 725 die if it's not defined.
103    
104 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
105 dpavlin 15
106 dpavlin 538 Since this function isn't exported you have to call it with
107     C<WebPAC::Normalize::data_structure>.
108    
109 dpavlin 10 =cut
110    
111 dpavlin 736 my $load_row_coderef;
112 dpavlin 725
113 dpavlin 536 sub data_structure {
114     my $arg = {@_};
115 dpavlin 13
116 dpavlin 536 die "need row argument" unless ($arg->{row});
117     die "need normalisation argument" unless ($arg->{rules});
118 dpavlin 31
119 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
120 dpavlin 983 _set_ds( $arg->{row} );
121 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
122 dpavlin 541 _clean_ds( %{ $arg } );
123 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
124 dpavlin 725
125 dpavlin 1011 no strict 'subs';
126     no warnings 'redefine';
127     eval "$arg->{rules};";
128 dpavlin 536 die "error evaling $arg->{rules}: $@\n" if ($@);
129 dpavlin 540
130 dpavlin 538 return _get_ds();
131 dpavlin 10 }
132    
133 dpavlin 983 =head2 _set_ds
134 dpavlin 13
135 dpavlin 536 Set current record hash
136 dpavlin 433
137 dpavlin 983 _set_ds( $rec );
138 dpavlin 433
139     =cut
140    
141 dpavlin 536 my $rec;
142 dpavlin 433
143 dpavlin 983 sub _set_ds {
144 dpavlin 536 $rec = shift or die "no record hash";
145 dpavlin 1036 $WebPAC::Normalize::MARC::rec = $rec;
146 dpavlin 433 }
147    
148 dpavlin 1021 =head2
149    
150     my $rec = _get_rec();
151    
152     =cut
153    
154     sub _get_rec { $rec };
155    
156 dpavlin 1205 sub rec_array {
157     my $d = $rec->{ $_[0] };
158     return @$d if ref($d) eq 'ARRAY';
159     die "field $_[0] not array: ",dump( $d );
160     }
161    
162 dpavlin 595 =head2 _set_config
163    
164     Set current config hash
165    
166     _set_config( $config );
167    
168     Magic keys are:
169    
170     =over 4
171    
172     =item _
173    
174     Code of current database
175    
176     =item _mfn
177    
178     Current MFN
179    
180     =back
181    
182     =cut
183    
184     my $config;
185    
186     sub _set_config {
187     $config = shift;
188     }
189    
190 dpavlin 538 =head2 _get_ds
191    
192     Return hash formatted as data structure
193    
194     my $ds = _get_ds();
195    
196     =cut
197    
198 dpavlin 1036 my $out;
199 dpavlin 538
200     sub _get_ds {
201 dpavlin 982 #warn "## out = ",dump($out);
202 dpavlin 538 return $out;
203     }
204    
205     =head2 _clean_ds
206    
207     Clean data structure hash for next record
208    
209     _clean_ds();
210    
211     =cut
212    
213     sub _clean_ds {
214 dpavlin 541 my $a = {@_};
215 dpavlin 1036 $out = undef;
216     WebPAC::Normalize::MARC::_clean();
217 dpavlin 538 }
218    
219     =head2 _set_lookup
220    
221     Set current lookup hash
222    
223     _set_lookup( $lookup );
224    
225     =cut
226    
227     my $lookup;
228    
229     sub _set_lookup {
230     $lookup = shift;
231     }
232    
233 dpavlin 707 =head2 _get_lookup
234    
235     Get current lookup hash
236    
237     my $lookup = _get_lookup();
238    
239     =cut
240    
241     sub _get_lookup {
242     return $lookup;
243     }
244    
245 dpavlin 736 =head2 _set_load_row
246 dpavlin 725
247     Setup code reference which will return L<data_structure> from
248     L<WebPAC::Store>
249    
250 dpavlin 736 _set_load_row(sub {
251 dpavlin 725 my ($database,$input,$mfn) = @_;
252 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
253 dpavlin 725 });
254    
255     =cut
256    
257 dpavlin 736 sub _set_load_row {
258 dpavlin 725 my $coderef = shift;
259     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
260    
261 dpavlin 736 $load_row_coderef = $coderef;
262 dpavlin 725 }
263    
264 dpavlin 554 =head2 _debug
265    
266     Change level of debug warnings
267    
268     _debug( 2 );
269    
270     =cut
271    
272     sub _debug {
273     my $l = shift;
274     return $debug unless defined($l);
275 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
276 dpavlin 554 $debug = $l;
277 dpavlin 1036 $WebPAC::Normalize::MARC::debug = $debug;
278 dpavlin 554 }
279    
280 dpavlin 540 =head1 Functions to create C<data_structure>
281    
282     Those functions generally have to first in your normalization file.
283    
284 dpavlin 1066 =head2 to
285    
286     Generic way to set values for some name
287    
288     to('field-name', 'name-value' => rec('200','a') );
289    
290     There are many helpers defined below which might be easier to use.
291    
292     =cut
293    
294     sub to {
295     my $type = shift or confess "need type -- BUG?";
296     my $name = shift or confess "needs name as first argument";
297     my @o = grep { defined($_) && $_ ne '' } @_;
298     return unless (@o);
299     $out->{$name}->{$type} = \@o;
300     }
301    
302 dpavlin 912 =head2 search_display
303 dpavlin 433
304 dpavlin 912 Define output for L<search> and L<display> at the same time
305 dpavlin 433
306 dpavlin 912 search_display('Title', rec('200','a') );
307 dpavlin 13
308     =cut
309    
310 dpavlin 912 sub search_display {
311     my $name = shift or die "search_display needs name as first argument";
312 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
313     return unless (@o);
314     $out->{$name}->{search} = \@o;
315     $out->{$name}->{display} = \@o;
316     }
317 dpavlin 13
318 dpavlin 915 =head2 tag
319    
320 dpavlin 1066 Old name for L<search_display>, it will probably be removed at one point.
321 dpavlin 915
322     =cut
323    
324     sub tag {
325     search_display( @_ );
326     }
327    
328 dpavlin 536 =head2 display
329 dpavlin 13
330 dpavlin 912 Define output just for I<display>
331 dpavlin 125
332 dpavlin 536 @v = display('Title', rec('200','a') );
333 dpavlin 125
334 dpavlin 536 =cut
335 dpavlin 125
336 dpavlin 1066 sub display { to( 'display', @_ ) }
337 dpavlin 13
338 dpavlin 536 =head2 search
339 dpavlin 13
340 dpavlin 536 Prepare values just for I<search>
341 dpavlin 13
342 dpavlin 536 @v = search('Title', rec('200','a') );
343 dpavlin 433
344 dpavlin 536 =cut
345 dpavlin 13
346 dpavlin 1066 sub search { to( 'search', @_ ) }
347 dpavlin 13
348 dpavlin 923 =head2 sorted
349    
350     Insert into lists which will be automatically sorted
351    
352     sorted('Title', rec('200','a') );
353    
354     =cut
355    
356 dpavlin 1066 sub sorted { to( 'sorted', @_ ) }
357 dpavlin 923
358 dpavlin 1205 =head2 row
359 dpavlin 923
360 dpavlin 1205 Insert new row of data into output module
361    
362     row( column => 'foo', column2 => 'bar' );
363    
364     =cut
365    
366     use Data::Dump qw/dump/;
367    
368     sub row {
369 dpavlin 1210 die "array doesn't have odd number of elements but $#_: ",dump( @_ ) if $#_ % 2 == 1;
370     my $table = shift @_;
371     push @{ $out->{'_rows'}->{$table} }, {@_};
372 dpavlin 1205 }
373    
374    
375 dpavlin 540 =head1 Functions to extract data from input
376    
377     This function should be used inside functions to create C<data_structure> described
378     above.
379    
380 dpavlin 641 =head2 _pack_subfields_hash
381    
382 dpavlin 669 @subfields = _pack_subfields_hash( $h );
383     $subfields = _pack_subfields_hash( $h, 1 );
384 dpavlin 641
385 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
386     with subfields (denoted by C<^>) and values.
387    
388 dpavlin 641 =cut
389    
390     sub _pack_subfields_hash {
391    
392 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
393 dpavlin 641
394 dpavlin 1118 my ($hash,$include_subfields) = @_;
395 dpavlin 641
396 dpavlin 831 # sanity and ease of use
397 dpavlin 1118 return $hash if (ref($hash) ne 'HASH');
398 dpavlin 831
399 dpavlin 1118 my $h = dclone( $hash );
400    
401 dpavlin 641 if ( defined($h->{subfields}) ) {
402     my $sfs = delete $h->{subfields} || die "no subfields?";
403     my @out;
404     while (@$sfs) {
405     my $sf = shift @$sfs;
406     push @out, '^' . $sf if ($include_subfields);
407     my $o = shift @$sfs;
408     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
409     # single element subfields are not arrays
410 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
411 dpavlin 667
412 dpavlin 641 push @out, $h->{$sf};
413     } else {
414 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
415 dpavlin 641 push @out, $h->{$sf}->[$o];
416     }
417     }
418 dpavlin 667 if ($include_subfields) {
419     return join('', @out);
420     } else {
421     return @out;
422     }
423 dpavlin 641 } else {
424 dpavlin 667 if ($include_subfields) {
425     my $out = '';
426 dpavlin 668 foreach my $sf (sort keys %$h) {
427 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
428     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
429     } else {
430     $out .= '^' . $sf . $h->{$sf};
431     }
432     }
433     return $out;
434     } else {
435     # FIXME this should probably be in alphabetical order instead of hash order
436     values %{$h};
437     }
438 dpavlin 641 }
439     }
440    
441 dpavlin 536 =head2 rec1
442 dpavlin 371
443 dpavlin 536 Return all values in some field
444 dpavlin 371
445 dpavlin 536 @v = rec1('200')
446 dpavlin 15
447 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
448 dpavlin 15
449 dpavlin 536 =cut
450 dpavlin 15
451 dpavlin 536 sub rec1 {
452     my $f = shift;
453 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
454 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
455 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
456 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
457 dpavlin 641 my @out;
458     foreach my $h ( @{ $rec->{$f} } ) {
459     if (ref($h) eq 'HASH') {
460     push @out, ( _pack_subfields_hash( $h ) );
461 dpavlin 31 } else {
462 dpavlin 641 push @out, $h;
463 dpavlin 31 }
464 dpavlin 641 }
465     return @out;
466 dpavlin 536 } elsif( defined($rec->{$f}) ) {
467     return $rec->{$f};
468 dpavlin 15 }
469     }
470    
471 dpavlin 536 =head2 rec2
472 dpavlin 15
473 dpavlin 536 Return all values in specific field and subfield
474 dpavlin 13
475 dpavlin 536 @v = rec2('200','a')
476 dpavlin 13
477     =cut
478    
479 dpavlin 536 sub rec2 {
480     my $f = shift;
481     return unless (defined($rec && $rec->{$f}));
482     my $sf = shift;
483 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
484 dpavlin 589 return map {
485     if (ref($_->{$sf}) eq 'ARRAY') {
486     @{ $_->{$sf} };
487     } else {
488     $_->{$sf};
489     }
490 dpavlin 1048 } grep { ref($_) eq 'HASH' && defined $_->{$sf} } @{ $rec->{$f} };
491 dpavlin 536 }
492 dpavlin 13
493 dpavlin 536 =head2 rec
494 dpavlin 13
495 dpavlin 536 syntaxtic sugar for
496 dpavlin 13
497 dpavlin 536 @v = rec('200')
498     @v = rec('200','a')
499 dpavlin 13
500 dpavlin 750 If rec() returns just single value, it will
501     return scalar, not array.
502    
503 dpavlin 536 =cut
504 dpavlin 373
505 dpavlin 536 sub rec {
506 dpavlin 583 my @out;
507 dpavlin 536 if ($#_ == 0) {
508 dpavlin 583 @out = rec1(@_);
509 dpavlin 536 } elsif ($#_ == 1) {
510 dpavlin 583 @out = rec2(@_);
511 dpavlin 13 }
512 dpavlin 750 if ($#out == 0 && ! wantarray) {
513     return $out[0];
514     } elsif (@out) {
515 dpavlin 583 return @out;
516     } else {
517     return '';
518     }
519 dpavlin 13 }
520    
521 dpavlin 1012 =head2 frec
522    
523     Returns first value from field
524    
525     $v = frec('200');
526     $v = frec('200','a');
527    
528     =cut
529    
530     sub frec {
531     my @out = rec(@_);
532     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
533     return shift @out;
534     }
535    
536 dpavlin 1015 =head2 frec_eq
537    
538     =head2 frec_ne
539    
540     Check if first values from two fields are same or different
541    
542     if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
543     # values are same
544     } else {
545     # values are different
546     }
547    
548     Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
549     could write something like:
550    
551     if ( frec( '900','a' ) eq frec( '910','c' ) ) {
552     # yada tada
553     }
554    
555     but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
556     in order to parse text and create invalid function C<eqfrec>.
557    
558     =cut
559    
560     sub frec_eq {
561     my ( $f1,$sf1, $f2, $sf2 ) = @_;
562     return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
563     }
564    
565     sub frec_ne {
566     return ! frec_eq( @_ );
567     }
568    
569 dpavlin 536 =head2 regex
570 dpavlin 15
571 dpavlin 536 Apply regex to some or all values
572 dpavlin 15
573 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
574 dpavlin 15
575     =cut
576    
577 dpavlin 536 sub regex {
578     my $r = shift;
579     my @out;
580 dpavlin 550 #warn "r: $r\n", dump(\@_);
581 dpavlin 536 foreach my $t (@_) {
582     next unless ($t);
583     eval "\$t =~ $r";
584     push @out, $t if ($t && $t ne '');
585 dpavlin 368 }
586 dpavlin 536 return @out;
587 dpavlin 15 }
588    
589 dpavlin 536 =head2 prefix
590 dpavlin 15
591 dpavlin 536 Prefix all values with a string
592 dpavlin 15
593 dpavlin 536 @v = prefix( 'my_', @v );
594 dpavlin 15
595     =cut
596    
597 dpavlin 536 sub prefix {
598 dpavlin 819 my $p = shift;
599     return @_ unless defined( $p );
600 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
601     }
602 dpavlin 15
603 dpavlin 536 =head2 suffix
604 dpavlin 15
605 dpavlin 536 suffix all values with a string
606 dpavlin 15
607 dpavlin 536 @v = suffix( '_my', @v );
608 dpavlin 15
609 dpavlin 536 =cut
610 dpavlin 15
611 dpavlin 536 sub suffix {
612 dpavlin 819 my $s = shift;
613     return @_ unless defined( $s );
614 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
615 dpavlin 15 }
616    
617 dpavlin 536 =head2 surround
618 dpavlin 13
619 dpavlin 536 surround all values with a two strings
620 dpavlin 13
621 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
622 dpavlin 13
623     =cut
624    
625 dpavlin 536 sub surround {
626 dpavlin 819 my $p = shift;
627     my $s = shift;
628     $p = '' unless defined( $p );
629     $s = '' unless defined( $s );
630 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
631 dpavlin 13 }
632    
633 dpavlin 536 =head2 first
634 dpavlin 13
635 dpavlin 536 Return first element
636 dpavlin 15
637 dpavlin 536 $v = first( @v );
638 dpavlin 13
639     =cut
640    
641 dpavlin 536 sub first {
642     my $r = shift;
643     return $r;
644 dpavlin 13 }
645    
646 dpavlin 536 =head2 lookup
647 dpavlin 13
648 dpavlin 536 Consult lookup hashes for some value
649 dpavlin 13
650 dpavlin 725 @v = lookup(
651     sub {
652     'ffkk/peri/mfn'.rec('000')
653     },
654     'ffkk','peri','200-a-200-e',
655     sub {
656     first(rec(200,'a')).' '.first(rec('200','e'))
657     }
658     );
659 dpavlin 13
660 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
661     normal lookup definition in C<conf/lookup/something.pl> which looks like:
662 dpavlin 707
663 dpavlin 725 lookup(
664     # which results to return from record recorded in lookup
665     sub { 'ffkk/peri/mfn' . rec('000') },
666     # from which database and input
667     'ffkk','peri',
668     # such that following values match
669     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
670     # if this part is missing, we will try to match same fields
671     # from lookup record and current one, or you can override
672     # which records to use from current record using
673     sub { rec('900','x') . ' ' . rec('900','y') },
674     )
675    
676     You can think about this lookup as SQL (if that helps):
677    
678     select
679     sub { what }
680     from
681     database, input
682     where
683     sub { filter from lookuped record }
684     having
685     sub { optional filter on current record }
686    
687     Easy as pie, right?
688    
689 dpavlin 13 =cut
690    
691 dpavlin 536 sub lookup {
692 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
693    
694 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
695 dpavlin 725
696 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
697 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
698    
699 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
700 dpavlin 725
701     my $mfns;
702     my @having = $having->();
703    
704 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
705 dpavlin 725
706     foreach my $h ( @having ) {
707     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
708 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
709 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
710     }
711 dpavlin 536 }
712 dpavlin 725
713     return unless ($mfns);
714    
715     my @mfns = sort keys %$mfns;
716    
717 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
718 dpavlin 725
719     my $old_rec = $rec;
720     my @out;
721    
722     foreach my $mfn (@mfns) {
723 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
724 dpavlin 725
725 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
726 dpavlin 725
727     my @vals = $what->();
728    
729     push @out, ( @vals );
730    
731 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
732 dpavlin 725 }
733    
734     # if (ref($lookup->{$k}) eq 'ARRAY') {
735     # return @{ $lookup->{$k} };
736     # } else {
737     # return $lookup->{$k};
738     # }
739    
740     $rec = $old_rec;
741    
742 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
743 dpavlin 725
744 dpavlin 740 if ($#out == 0) {
745     return $out[0];
746     } else {
747     return @out;
748     }
749 dpavlin 13 }
750    
751 dpavlin 707 =head2 save_into_lookup
752    
753 dpavlin 725 Save value into lookup. It associates current database, input
754     and specific keys with one or more values which will be
755     associated over MFN.
756 dpavlin 707
757 dpavlin 725 MFN will be extracted from first occurence current of field 000
758     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
759    
760     my $nr = save_into_lookup($database,$input,$key,sub {
761 dpavlin 707 # code which produce one or more values
762     });
763    
764 dpavlin 725 It returns number of items saved.
765 dpavlin 707
766 dpavlin 725 This function shouldn't be called directly, it's called from code created by
767     L<WebPAC::Parser>.
768    
769 dpavlin 707 =cut
770    
771     sub save_into_lookup {
772 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
773     die "save_into_lookup needs database" unless defined($database);
774     die "save_into_lookup needs input" unless defined($input);
775     die "save_into_lookup needs key" unless defined($key);
776 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
777 dpavlin 725
778 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
779 dpavlin 725
780     my $mfn =
781     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
782     defined($config->{_mfn}) ? $config->{_mfn} :
783     die "mfn not defined or zero";
784    
785     my $nr = 0;
786    
787 dpavlin 707 foreach my $v ( $coderef->() ) {
788 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
789 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
790 dpavlin 725 $nr++;
791 dpavlin 707 }
792 dpavlin 725
793     return $nr;
794 dpavlin 707 }
795    
796 dpavlin 595 =head2 config
797    
798     Consult config values stored in C<config.yml>
799    
800     # return database code (key under databases in yaml)
801     $database_code = config(); # use _ from hash
802     $database_name = config('name');
803     $database_input_name = config('input name');
804    
805     Up to three levels are supported.
806    
807     =cut
808    
809     sub config {
810     return unless ($config);
811    
812     my $p = shift;
813    
814     $p ||= '';
815    
816     my $v;
817    
818     warn "### getting config($p)\n" if ($debug > 1);
819    
820     my @p = split(/\s+/,$p);
821     if ($#p < 0) {
822     $v = $config->{ '_' }; # special, database code
823     } else {
824    
825     my $c = dclone( $config );
826    
827     foreach my $k (@p) {
828     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
829     if (ref($c) eq 'ARRAY') {
830     $c = shift @$c;
831     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
832     last;
833     }
834    
835     if (! defined($c->{$k}) ) {
836     $c = undef;
837     last;
838     } else {
839     $c = $c->{$k};
840     }
841     }
842     $v = $c if ($c);
843    
844     }
845    
846     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
847     warn "config( '$p' ) is empty\n" if (! $v);
848    
849     return $v;
850     }
851    
852     =head2 id
853    
854     Returns unique id of this record
855    
856     $id = id();
857    
858     Returns C<42/2> for 2nd occurence of MFN 42.
859    
860     =cut
861    
862     sub id {
863     my $mfn = $config->{_mfn} || die "no _mfn in config data";
864 dpavlin 1036 return $mfn . ( WebPAC::Normalize::MARC::_created_marc_records() || '' );
865 dpavlin 595 }
866    
867 dpavlin 536 =head2 join_with
868 dpavlin 13
869 dpavlin 536 Joins walues with some delimiter
870 dpavlin 10
871 dpavlin 536 $v = join_with(", ", @v);
872 dpavlin 10
873 dpavlin 536 =cut
874 dpavlin 10
875 dpavlin 536 sub join_with {
876     my $d = shift;
877 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
878 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
879     return '' unless defined($v);
880     return $v;
881 dpavlin 536 }
882 dpavlin 10
883 dpavlin 562 =head2 split_rec_on
884    
885     Split record subfield on some regex and take one of parts out
886    
887     $a_before_semi_column =
888     split_rec_on('200','a', /\s*;\s*/, $part);
889    
890     C<$part> is optional number of element. First element is
891     B<1>, not 0!
892    
893     If there is no C<$part> parameter or C<$part> is 0, this function will
894     return all values produced by splitting.
895    
896     =cut
897    
898     sub split_rec_on {
899     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
900    
901     my ($fld, $sf, $regex, $part) = @_;
902 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
903 dpavlin 562
904     my @r = rec( $fld, $sf );
905     my $v = shift @r;
906 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
907 dpavlin 562
908 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
909 dpavlin 566
910 dpavlin 562 my @s = split( $regex, $v );
911 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
912 dpavlin 566 if ($part && $part > 0) {
913 dpavlin 562 return $s[ $part - 1 ];
914     } else {
915 dpavlin 571 return @s;
916 dpavlin 562 }
917     }
918    
919 dpavlin 785 my $hash;
920    
921     =head2 set
922    
923     set( key => 'value' );
924    
925     =cut
926    
927     sub set {
928     my ($k,$v) = @_;
929 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
930 dpavlin 785 $hash->{$k} = $v;
931     };
932    
933     =head2 get
934    
935     get( 'key' );
936    
937     =cut
938    
939     sub get {
940     my $k = shift || return;
941     my $v = $hash->{$k};
942 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
943 dpavlin 785 return $v;
944     }
945    
946 dpavlin 791 =head2 count
947 dpavlin 785
948 dpavlin 791 if ( count( @result ) == 1 ) {
949     # do something if only 1 result is there
950     }
951    
952     =cut
953    
954     sub count {
955 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
956 dpavlin 791 return @_ . '';
957     }
958    
959 dpavlin 536 # END
960     1;

  ViewVC Help
Powered by ViewVC 1.1.26