/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1021 - (hide annotations)
Sat Nov 10 11:11:16 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 31988 byte(s)
 r1579@llin:  dpavlin | 2007-11-10 11:59:27 +0100
 Begin extraction of MARC functionality from WebPAC::Normalize to
 WebPAC::Normalize::MARC

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 923 search_display search display sorted
11 dpavlin 912
12 dpavlin 547 marc marc_indicators marc_repeatable_subfield
13 dpavlin 815 marc_compose marc_leader marc_fixed
14 dpavlin 813 marc_duplicate marc_remove marc_count
15 dpavlin 604 marc_original_order
16 dpavlin 1013 marc_template
17 dpavlin 540
18 dpavlin 536 rec1 rec2 rec
19 dpavlin 1015 frec frec_eq frec_ne
20 dpavlin 536 regex prefix suffix surround
21     first lookup join_with
22 dpavlin 707 save_into_lookup
23 dpavlin 562
24     split_rec_on
25 dpavlin 785
26     get set
27 dpavlin 791 count
28 dpavlin 980
29 dpavlin 536 /;
30 dpavlin 10
31     use warnings;
32     use strict;
33 dpavlin 536
34     #use base qw/WebPAC::Common/;
35 dpavlin 550 use Data::Dump qw/dump/;
36 dpavlin 574 use Storable qw/dclone/;
37 dpavlin 725 use Carp qw/confess/;
38 dpavlin 10
39 dpavlin 550 # debugging warn(s)
40     my $debug = 0;
41    
42 dpavlin 1021 # FIXME
43 dpavlin 980 use WebPAC::Normalize::ISBN;
44     push @EXPORT, ( 'isbn_10', 'isbn_13' );
45 dpavlin 550
46 dpavlin 1021 use WebPAC::Normalize::MARC;
47     push @EXPORT, ( 'marc_template' );
48    
49 dpavlin 10 =head1 NAME
50    
51 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
52 dpavlin 10
53     =cut
54    
55 dpavlin 1021 our $VERSION = '0.35';
56 dpavlin 10
57     =head1 SYNOPSIS
58    
59 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
60     from input records using perl functions which are specialized for set
61     processing.
62 dpavlin 10
63 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
64     means that you check it's validity before running WebPAC using
65     C<perl -c normalize.pl>.
66 dpavlin 15
67 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
68 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
69 dpavlin 547 C<marc>.
70 dpavlin 15
71 dpavlin 10 =head1 FUNCTIONS
72    
73 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
74     All other functions are available for use within normalisation rules.
75    
76 dpavlin 536 =head2 data_structure
77 dpavlin 10
78 dpavlin 536 Return data structure
79 dpavlin 13
80 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
81 dpavlin 725 lookup => $lookup_hash,
82 dpavlin 536 row => $row,
83     rules => $normalize_pl_config,
84 dpavlin 541 marc_encoding => 'utf-8',
85 dpavlin 595 config => $config,
86 dpavlin 736 load_row_coderef => sub {
87 dpavlin 979 my ($database,$input,$mfn) = @_;
88 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
89 dpavlin 725 },
90 dpavlin 13 );
91    
92 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
93 dpavlin 540 other are optional.
94    
95 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
96 dpavlin 725 die if it's not defined.
97    
98 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
99 dpavlin 15
100 dpavlin 538 Since this function isn't exported you have to call it with
101     C<WebPAC::Normalize::data_structure>.
102    
103 dpavlin 10 =cut
104    
105 dpavlin 736 my $load_row_coderef;
106 dpavlin 725
107 dpavlin 536 sub data_structure {
108     my $arg = {@_};
109 dpavlin 13
110 dpavlin 536 die "need row argument" unless ($arg->{row});
111     die "need normalisation argument" unless ($arg->{rules});
112 dpavlin 31
113 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
114 dpavlin 983 _set_ds( $arg->{row} );
115 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
116 dpavlin 541 _clean_ds( %{ $arg } );
117 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
118 dpavlin 725
119 dpavlin 1011 no strict 'subs';
120     no warnings 'redefine';
121     eval "$arg->{rules};";
122 dpavlin 536 die "error evaling $arg->{rules}: $@\n" if ($@);
123 dpavlin 540
124 dpavlin 538 return _get_ds();
125 dpavlin 10 }
126    
127 dpavlin 983 =head2 _set_ds
128 dpavlin 13
129 dpavlin 536 Set current record hash
130 dpavlin 433
131 dpavlin 983 _set_ds( $rec );
132 dpavlin 433
133     =cut
134    
135 dpavlin 536 my $rec;
136 dpavlin 433
137 dpavlin 983 sub _set_ds {
138 dpavlin 536 $rec = shift or die "no record hash";
139 dpavlin 433 }
140    
141 dpavlin 1021 =head2
142    
143     my $rec = _get_rec();
144    
145     =cut
146    
147     sub _get_rec { $rec };
148    
149 dpavlin 595 =head2 _set_config
150    
151     Set current config hash
152    
153     _set_config( $config );
154    
155     Magic keys are:
156    
157     =over 4
158    
159     =item _
160    
161     Code of current database
162    
163     =item _mfn
164    
165     Current MFN
166    
167     =back
168    
169     =cut
170    
171     my $config;
172    
173     sub _set_config {
174     $config = shift;
175     }
176    
177 dpavlin 538 =head2 _get_ds
178    
179     Return hash formatted as data structure
180    
181     my $ds = _get_ds();
182    
183     =cut
184    
185 dpavlin 812 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
186 dpavlin 574 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
187 dpavlin 538
188     sub _get_ds {
189 dpavlin 982 #warn "## out = ",dump($out);
190 dpavlin 538 return $out;
191     }
192    
193     =head2 _clean_ds
194    
195     Clean data structure hash for next record
196    
197     _clean_ds();
198    
199     =cut
200    
201     sub _clean_ds {
202 dpavlin 541 my $a = {@_};
203 dpavlin 812 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
204 dpavlin 574 ($marc_record_offset, $marc_fetch_offset) = (0,0);
205 dpavlin 541 $marc_encoding = $a->{marc_encoding};
206 dpavlin 538 }
207    
208     =head2 _set_lookup
209    
210     Set current lookup hash
211    
212     _set_lookup( $lookup );
213    
214     =cut
215    
216     my $lookup;
217    
218     sub _set_lookup {
219     $lookup = shift;
220     }
221    
222 dpavlin 707 =head2 _get_lookup
223    
224     Get current lookup hash
225    
226     my $lookup = _get_lookup();
227    
228     =cut
229    
230     sub _get_lookup {
231     return $lookup;
232     }
233    
234 dpavlin 736 =head2 _set_load_row
235 dpavlin 725
236     Setup code reference which will return L<data_structure> from
237     L<WebPAC::Store>
238    
239 dpavlin 736 _set_load_row(sub {
240 dpavlin 725 my ($database,$input,$mfn) = @_;
241 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
242 dpavlin 725 });
243    
244     =cut
245    
246 dpavlin 736 sub _set_load_row {
247 dpavlin 725 my $coderef = shift;
248     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
249    
250 dpavlin 736 $load_row_coderef = $coderef;
251 dpavlin 725 }
252    
253 dpavlin 547 =head2 _get_marc_fields
254 dpavlin 540
255 dpavlin 547 Get all fields defined by calls to C<marc>
256 dpavlin 540
257 dpavlin 547 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
258 dpavlin 540
259 dpavlin 543 We are using I<magic> which detect repeatable fields only from
260     sequence of field/subfield data generated by normalization.
261    
262 dpavlin 554 Repeatable field is created when there is second occurence of same subfield or
263     if any of indicators are different.
264 dpavlin 543
265 dpavlin 554 This is sane for most cases. Something like:
266 dpavlin 543
267 dpavlin 554 900a-1 900b-1 900c-1
268     900a-2 900b-2
269     900a-3
270    
271     will be created from any combination of:
272    
273     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
274    
275     and following rules:
276    
277     marc('900','a', rec('200','a') );
278     marc('900','b', rec('200','b') );
279     marc('900','c', rec('200','c') );
280    
281     which might not be what you have in mind. If you need repeatable subfield,
282     define it using C<marc_repeatable_subfield> like this:
283    
284 dpavlin 574 marc_repeatable_subfield('900','a');
285     marc('900','a', rec('200','a') );
286     marc('900','b', rec('200','b') );
287     marc('900','c', rec('200','c') );
288 dpavlin 554
289 dpavlin 574 will create:
290    
291     900a-1 900a-2 900a-3 900b-1 900c-1
292     900b-2
293    
294     There is also support for returning next or specific using:
295    
296     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
297     # do something with $mf
298     }
299    
300     will always return fields from next MARC record or
301    
302     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
303    
304     will return 42th copy record (if it exists).
305    
306 dpavlin 540 =cut
307    
308 dpavlin 813 my $fetch_pos;
309    
310 dpavlin 547 sub _get_marc_fields {
311 dpavlin 550
312 dpavlin 574 my $arg = {@_};
313     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
314 dpavlin 813 $fetch_pos = $marc_fetch_offset;
315 dpavlin 574 if ($arg->{offset}) {
316 dpavlin 813 $fetch_pos = $arg->{offset};
317 dpavlin 574 } elsif($arg->{fetch_next}) {
318     $marc_fetch_offset++;
319     }
320 dpavlin 550
321 dpavlin 574 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
322    
323     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
324    
325 dpavlin 813 my $marc_rec = $marc_record->[ $fetch_pos ];
326 dpavlin 574
327 dpavlin 813 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
328 dpavlin 574
329     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
330    
331 dpavlin 550 # first, sort all existing fields
332     # XXX might not be needed, but modern perl might randomize elements in hash
333     my @sorted_marc_record = sort {
334 dpavlin 572 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
335 dpavlin 574 } @{ $marc_rec };
336 dpavlin 550
337 dpavlin 574 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
338 dpavlin 562
339 dpavlin 550 # output marc fields
340 dpavlin 542 my @m;
341 dpavlin 550
342     # count unique field-subfields (used for offset when walking to next subfield)
343     my $u;
344 dpavlin 572 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
345 dpavlin 550
346     if ($debug) {
347 dpavlin 574 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
348 dpavlin 813 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
349 dpavlin 574 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
350     warn "## subfield count = ", dump( $u ), $/;
351 dpavlin 550 }
352    
353     my $len = $#sorted_marc_record;
354     my $visited;
355     my $i = 0;
356     my $field;
357    
358     foreach ( 0 .. $len ) {
359    
360     # find next element which isn't visited
361     while ($visited->{$i}) {
362     $i = ($i + 1) % ($len + 1);
363 dpavlin 542 }
364    
365 dpavlin 550 # mark it visited
366     $visited->{$i}++;
367    
368 dpavlin 574 my $row = dclone( $sorted_marc_record[$i] );
369 dpavlin 550
370     # field and subfield which is key for
371     # marc_repeatable_subfield and u
372 dpavlin 572 my $fsf = $row->[0] . ( $row->[3] || '' );
373 dpavlin 550
374     if ($debug > 1) {
375    
376     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
377     print "### this [$i]: ", dump( $row ),$/;
378     print "### sf: ", $row->[3], " vs ", $field->[3],
379     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
380     if ($#$field >= 0);
381    
382     }
383    
384     # if field exists
385     if ( $#$field >= 0 ) {
386     if (
387     $row->[0] ne $field->[0] || # field
388     $row->[1] ne $field->[1] || # i1
389     $row->[2] ne $field->[2] # i2
390     ) {
391     push @m, $field;
392     warn "## saved/1 ", dump( $field ),$/ if ($debug);
393     $field = $row;
394    
395     } elsif (
396     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
397     ||
398     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
399     ! $marc_repeatable_subfield->{ $fsf }
400     )
401     ) {
402     push @m, $field;
403     warn "## saved/2 ", dump( $field ),$/ if ($debug);
404     $field = $row;
405    
406     } else {
407     # append new subfields to existing field
408     push @$field, ( $row->[3], $row->[4] );
409     }
410     } else {
411     # insert first field
412     $field = $row;
413     }
414    
415     if (! $marc_repeatable_subfield->{ $fsf }) {
416     # make step to next subfield
417     $i = ($i + $u->{ $fsf } ) % ($len + 1);
418     }
419 dpavlin 542 }
420    
421 dpavlin 550 if ($#$field >= 0) {
422     push @m, $field;
423     warn "## saved/3 ", dump( $field ),$/ if ($debug);
424     }
425 dpavlin 542
426 dpavlin 579 return \@m;
427 dpavlin 540 }
428    
429 dpavlin 813 =head2 _get_marc_leader
430    
431     Return leader from currently fetched record by L</_get_marc_fields>
432    
433     print WebPAC::Normalize::_get_marc_leader();
434    
435     =cut
436    
437     sub _get_marc_leader {
438     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
439     return $marc_leader->[ $fetch_pos ];
440     }
441    
442 dpavlin 554 =head2 _debug
443    
444     Change level of debug warnings
445    
446     _debug( 2 );
447    
448     =cut
449    
450     sub _debug {
451     my $l = shift;
452     return $debug unless defined($l);
453 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
454 dpavlin 554 $debug = $l;
455     }
456    
457 dpavlin 540 =head1 Functions to create C<data_structure>
458    
459     Those functions generally have to first in your normalization file.
460    
461 dpavlin 912 =head2 search_display
462 dpavlin 433
463 dpavlin 912 Define output for L<search> and L<display> at the same time
464 dpavlin 433
465 dpavlin 912 search_display('Title', rec('200','a') );
466 dpavlin 13
467    
468     =cut
469    
470 dpavlin 912 sub search_display {
471     my $name = shift or die "search_display needs name as first argument";
472 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
473     return unless (@o);
474     $out->{$name}->{search} = \@o;
475     $out->{$name}->{display} = \@o;
476     }
477 dpavlin 13
478 dpavlin 915 =head2 tag
479    
480     Old name for L<search_display>, but supported
481    
482     =cut
483    
484     sub tag {
485     search_display( @_ );
486     }
487    
488 dpavlin 536 =head2 display
489 dpavlin 13
490 dpavlin 912 Define output just for I<display>
491 dpavlin 125
492 dpavlin 536 @v = display('Title', rec('200','a') );
493 dpavlin 125
494 dpavlin 536 =cut
495 dpavlin 125
496 dpavlin 923 sub _field {
497     my $type = shift or confess "need type -- BUG?";
498     my $name = shift or confess "needs name as first argument";
499 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
500     return unless (@o);
501 dpavlin 923 $out->{$name}->{$type} = \@o;
502 dpavlin 536 }
503 dpavlin 13
504 dpavlin 923 sub display { _field( 'display', @_ ) }
505    
506 dpavlin 536 =head2 search
507 dpavlin 13
508 dpavlin 536 Prepare values just for I<search>
509 dpavlin 13
510 dpavlin 536 @v = search('Title', rec('200','a') );
511 dpavlin 433
512 dpavlin 536 =cut
513 dpavlin 13
514 dpavlin 923 sub search { _field( 'search', @_ ) }
515 dpavlin 13
516 dpavlin 923 =head2 sorted
517    
518     Insert into lists which will be automatically sorted
519    
520     sorted('Title', rec('200','a') );
521    
522     =cut
523    
524     sub sorted { _field( 'sorted', @_ ) }
525    
526    
527 dpavlin 564 =head2 marc_leader
528    
529     Setup fields within MARC leader or get leader
530    
531     marc_leader('05','c');
532     my $leader = marc_leader();
533    
534     =cut
535    
536     sub marc_leader {
537     my ($offset,$value) = @_;
538    
539     if ($offset) {
540 dpavlin 813 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
541 dpavlin 564 } else {
542 dpavlin 813
543     if (defined($marc_leader)) {
544     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
545     return $marc_leader->[ $marc_record_offset ];
546     } else {
547     return;
548     }
549 dpavlin 564 }
550     }
551    
552 dpavlin 815 =head2 marc_fixed
553    
554     Create control/indentifier fields with values in fixed positions
555    
556     marc_fixed('008', 00, '070402');
557     marc_fixed('008', 39, '|');
558    
559     Positions not specified will be filled with spaces (C<0x20>).
560    
561     There will be no effort to extend last specified value to full length of
562     field in standard.
563    
564     =cut
565    
566     sub marc_fixed {
567     my ($f, $pos, $val) = @_;
568     die "need marc(field, position, value)" unless defined($f) && defined($pos);
569    
570 dpavlin 889 confess "need val" unless defined $val;
571    
572 dpavlin 815 my $update = 0;
573    
574     map {
575     if ($_->[0] eq $f) {
576     my $old = $_->[1];
577 dpavlin 889 if (length($old) <= $pos) {
578 dpavlin 815 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
579 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
580 dpavlin 815 } else {
581     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
582 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
583 dpavlin 815 }
584     $update++;
585     }
586     } @{ $marc_record->[ $marc_record_offset ] };
587    
588     if (! $update) {
589     my $v = ' ' x $pos . $val;
590     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
591 dpavlin 817 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
592 dpavlin 815 }
593     }
594    
595 dpavlin 547 =head2 marc
596 dpavlin 540
597     Save value for MARC field
598    
599 dpavlin 547 marc('900','a', rec('200','a') );
600 dpavlin 571 marc('001', rec('000') );
601 dpavlin 540
602     =cut
603    
604 dpavlin 547 sub marc {
605     my $f = shift or die "marc needs field";
606     die "marc field must be numer" unless ($f =~ /^\d+$/);
607 dpavlin 540
608 dpavlin 571 my $sf;
609     if ($f >= 10) {
610     $sf = shift or die "marc needs subfield";
611     }
612 dpavlin 540
613 dpavlin 541 foreach (@_) {
614     my $v = $_; # make var read-write for Encode
615 dpavlin 543 next unless (defined($v) && $v !~ /^\s*$/);
616 dpavlin 548 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
617 dpavlin 571 if (defined $sf) {
618 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
619 dpavlin 571 } else {
620 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
621 dpavlin 571 }
622 dpavlin 540 }
623     }
624    
625 dpavlin 547 =head2 marc_repeatable_subfield
626    
627     Save values for MARC repetable subfield
628    
629     marc_repeatable_subfield('910', 'z', rec('909') );
630    
631     =cut
632    
633     sub marc_repeatable_subfield {
634 dpavlin 550 my ($f,$sf) = @_;
635     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
636     $marc_repeatable_subfield->{ $f . $sf }++;
637 dpavlin 547 marc(@_);
638     }
639    
640     =head2 marc_indicators
641    
642     Set both indicators for MARC field
643    
644     marc_indicators('900', ' ', 1);
645    
646     Any indicator value other than C<0-9> will be treated as undefined.
647    
648     =cut
649    
650     sub marc_indicators {
651     my $f = shift || die "marc_indicators need field!\n";
652     my ($i1,$i2) = @_;
653     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
654     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
655    
656     $i1 = ' ' if ($i1 !~ /^\d$/);
657     $i2 = ' ' if ($i2 !~ /^\d$/);
658 dpavlin 548 @{ $marc_indicators->{$f} } = ($i1,$i2);
659 dpavlin 547 }
660    
661 dpavlin 562 =head2 marc_compose
662 dpavlin 547
663 dpavlin 562 Save values for each MARC subfield explicitly
664    
665     marc_compose('900',
666     'a', rec('200','a')
667     'b', rec('201','a')
668     'a', rec('200','b')
669     'c', rec('200','c')
670     );
671    
672 dpavlin 603 If you specify C<+> for subfield, value will be appended
673     to previous defined subfield.
674    
675 dpavlin 562 =cut
676    
677     sub marc_compose {
678     my $f = shift or die "marc_compose needs field";
679     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
680    
681     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
682     my $m = [ $f, $i1, $i2 ];
683    
684 dpavlin 583 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
685    
686 dpavlin 619 if ($#_ % 2 != 1) {
687     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
688     }
689    
690 dpavlin 562 while (@_) {
691 dpavlin 619 my $sf = shift;
692 dpavlin 565 my $v = shift;
693 dpavlin 562
694     next unless (defined($v) && $v !~ /^\s*$/);
695 dpavlin 568 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
696 dpavlin 603 if ($sf ne '+') {
697     push @$m, ( $sf, $v );
698     } else {
699     $m->[ $#$m ] .= $v;
700     }
701 dpavlin 562 }
702    
703 dpavlin 586 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
704 dpavlin 562
705 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
706 dpavlin 562 }
707    
708 dpavlin 574 =head2 marc_duplicate
709 dpavlin 562
710 dpavlin 574 Generate copy of current MARC record and continue working on copy
711    
712     marc_duplicate();
713    
714     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
715     C<< _get_marc_fields( offset => 42 ) >>.
716    
717     =cut
718    
719     sub marc_duplicate {
720     my $m = $marc_record->[ -1 ];
721     die "can't duplicate record which isn't defined" unless ($m);
722     push @{ $marc_record }, dclone( $m );
723 dpavlin 813 push @{ $marc_leader }, dclone( marc_leader() );
724     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
725 dpavlin 574 $marc_record_offset = $#{ $marc_record };
726     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
727 dpavlin 813
728 dpavlin 574 }
729    
730     =head2 marc_remove
731    
732     Remove some field or subfield from MARC record.
733    
734     marc_remove('200');
735     marc_remove('200','a');
736    
737     This will erase field C<200> or C<200^a> from current MARC record.
738    
739 dpavlin 786 marc_remove('*');
740    
741     Will remove all fields in current MARC record.
742    
743 dpavlin 574 This is useful after calling C<marc_duplicate> or on it's own (but, you
744     should probably just remove that subfield definition if you are not
745     using C<marc_duplicate>).
746    
747     FIXME: support fields < 10.
748    
749     =cut
750    
751     sub marc_remove {
752     my ($f, $sf) = @_;
753    
754     die "marc_remove needs record number" unless defined($f);
755    
756     my $marc = $marc_record->[ $marc_record_offset ];
757    
758     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
759    
760 dpavlin 786 if ($f eq '*') {
761    
762     delete( $marc_record->[ $marc_record_offset ] );
763 dpavlin 788 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
764 dpavlin 786
765     } else {
766    
767     my $i = 0;
768     foreach ( 0 .. $#{ $marc } ) {
769     last unless (defined $marc->[$i]);
770     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
771     if ($marc->[$i]->[0] eq $f) {
772     if (! defined $sf) {
773     # remove whole field
774     splice @$marc, $i, 1;
775     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
776     $i--;
777     } else {
778     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
779     my $o = ($j * 2) + 3;
780     if ($marc->[$i]->[$o] eq $sf) {
781     # remove subfield
782     splice @{$marc->[$i]}, $o, 2;
783     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
784     # is record now empty?
785     if ($#{ $marc->[$i] } == 2) {
786     splice @$marc, $i, 1;
787     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
788     $i--;
789     };
790     }
791 dpavlin 574 }
792     }
793     }
794 dpavlin 786 $i++;
795 dpavlin 574 }
796 dpavlin 786
797     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
798    
799     $marc_record->[ $marc_record_offset ] = $marc;
800 dpavlin 574 }
801    
802     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
803     }
804    
805 dpavlin 604 =head2 marc_original_order
806    
807     Copy all subfields preserving original order to marc field.
808    
809 dpavlin 616 marc_original_order( marc_field_number, original_input_field_number );
810 dpavlin 604
811 dpavlin 616 Please note that field numbers are consistent with other commands (marc
812     field number first), but somewhat counter-intuitive (destination and then
813     source).
814    
815 dpavlin 604 You might want to use this command if you are just renaming subfields or
816     using pre-processing modify_record in C<config.yml> and don't need any
817     post-processing or want to preserve order of original subfields.
818    
819 dpavlin 616
820 dpavlin 604 =cut
821    
822     sub marc_original_order {
823    
824 dpavlin 616 my ($to, $from) = @_;
825 dpavlin 604 die "marc_original_order needs from and to fields\n" unless ($from && $to);
826    
827 dpavlin 616 return unless defined($rec->{$from});
828    
829     my $r = $rec->{$from};
830 dpavlin 1017 die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
831 dpavlin 604
832     my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
833 dpavlin 616 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
834 dpavlin 604
835     foreach my $d (@$r) {
836    
837 dpavlin 605 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
838 dpavlin 616 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
839 dpavlin 605 next;
840     }
841    
842 dpavlin 604 my @sfs = @{ $d->{subfields} };
843    
844     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
845    
846 dpavlin 618 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
847 dpavlin 604
848     my $m = [ $to, $i1, $i2 ];
849    
850     while (my $sf = shift @sfs) {
851 dpavlin 618
852     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
853 dpavlin 604 my $offset = shift @sfs;
854     die "corrupted sufields specification for field $from\n" unless defined($offset);
855    
856     my $v;
857     if (ref($d->{$sf}) eq 'ARRAY') {
858     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
859     } elsif ($offset == 0) {
860     $v = $d->{$sf};
861     } else {
862     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
863     }
864     push @$m, ( $sf, $v ) if (defined($v));
865     }
866    
867     if ($#{$m} > 2) {
868     push @{ $marc_record->[ $marc_record_offset ] }, $m;
869     }
870     }
871    
872     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
873     }
874    
875 dpavlin 1013
876 dpavlin 813 =head2 marc_count
877 dpavlin 604
878 dpavlin 813 Return number of MARC records created using L</marc_duplicate>.
879    
880     print "created ", marc_count(), " records";
881    
882     =cut
883    
884     sub marc_count {
885     return $#{ $marc_record };
886     }
887    
888 dpavlin 1021 =head2 _marc_push
889 dpavlin 813
890 dpavlin 1021 _marc_push( $marc );
891    
892     =cut
893    
894     sub _marc_push {
895     my $marc = shift || die "no marc?";
896     push @{ $marc_record->[ $marc_record_offset ] }, $marc;
897     }
898    
899    
900 dpavlin 540 =head1 Functions to extract data from input
901    
902     This function should be used inside functions to create C<data_structure> described
903     above.
904    
905 dpavlin 641 =head2 _pack_subfields_hash
906    
907 dpavlin 669 @subfields = _pack_subfields_hash( $h );
908     $subfields = _pack_subfields_hash( $h, 1 );
909 dpavlin 641
910 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
911     with subfields (denoted by C<^>) and values.
912    
913 dpavlin 641 =cut
914    
915     sub _pack_subfields_hash {
916    
917 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
918 dpavlin 641
919     my ($h,$include_subfields) = @_;
920    
921 dpavlin 831 # sanity and ease of use
922     return $h if (ref($h) ne 'HASH');
923    
924 dpavlin 641 if ( defined($h->{subfields}) ) {
925     my $sfs = delete $h->{subfields} || die "no subfields?";
926     my @out;
927     while (@$sfs) {
928     my $sf = shift @$sfs;
929     push @out, '^' . $sf if ($include_subfields);
930     my $o = shift @$sfs;
931     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
932     # single element subfields are not arrays
933 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
934 dpavlin 667
935 dpavlin 641 push @out, $h->{$sf};
936     } else {
937 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
938 dpavlin 641 push @out, $h->{$sf}->[$o];
939     }
940     }
941 dpavlin 667 if ($include_subfields) {
942     return join('', @out);
943     } else {
944     return @out;
945     }
946 dpavlin 641 } else {
947 dpavlin 667 if ($include_subfields) {
948     my $out = '';
949 dpavlin 668 foreach my $sf (sort keys %$h) {
950 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
951     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
952     } else {
953     $out .= '^' . $sf . $h->{$sf};
954     }
955     }
956     return $out;
957     } else {
958     # FIXME this should probably be in alphabetical order instead of hash order
959     values %{$h};
960     }
961 dpavlin 641 }
962     }
963    
964 dpavlin 536 =head2 rec1
965 dpavlin 371
966 dpavlin 536 Return all values in some field
967 dpavlin 371
968 dpavlin 536 @v = rec1('200')
969 dpavlin 15
970 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
971 dpavlin 15
972 dpavlin 536 =cut
973 dpavlin 15
974 dpavlin 536 sub rec1 {
975     my $f = shift;
976 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
977 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
978 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
979 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
980 dpavlin 641 my @out;
981     foreach my $h ( @{ $rec->{$f} } ) {
982     if (ref($h) eq 'HASH') {
983     push @out, ( _pack_subfields_hash( $h ) );
984 dpavlin 31 } else {
985 dpavlin 641 push @out, $h;
986 dpavlin 31 }
987 dpavlin 641 }
988     return @out;
989 dpavlin 536 } elsif( defined($rec->{$f}) ) {
990     return $rec->{$f};
991 dpavlin 15 }
992     }
993    
994 dpavlin 536 =head2 rec2
995 dpavlin 15
996 dpavlin 536 Return all values in specific field and subfield
997 dpavlin 13
998 dpavlin 536 @v = rec2('200','a')
999 dpavlin 13
1000     =cut
1001    
1002 dpavlin 536 sub rec2 {
1003     my $f = shift;
1004     return unless (defined($rec && $rec->{$f}));
1005     my $sf = shift;
1006 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
1007 dpavlin 589 return map {
1008     if (ref($_->{$sf}) eq 'ARRAY') {
1009     @{ $_->{$sf} };
1010     } else {
1011     $_->{$sf};
1012     }
1013     } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
1014 dpavlin 536 }
1015 dpavlin 13
1016 dpavlin 536 =head2 rec
1017 dpavlin 13
1018 dpavlin 536 syntaxtic sugar for
1019 dpavlin 13
1020 dpavlin 536 @v = rec('200')
1021     @v = rec('200','a')
1022 dpavlin 13
1023 dpavlin 750 If rec() returns just single value, it will
1024     return scalar, not array.
1025    
1026 dpavlin 536 =cut
1027 dpavlin 373
1028 dpavlin 536 sub rec {
1029 dpavlin 583 my @out;
1030 dpavlin 536 if ($#_ == 0) {
1031 dpavlin 583 @out = rec1(@_);
1032 dpavlin 536 } elsif ($#_ == 1) {
1033 dpavlin 583 @out = rec2(@_);
1034 dpavlin 13 }
1035 dpavlin 750 if ($#out == 0 && ! wantarray) {
1036     return $out[0];
1037     } elsif (@out) {
1038 dpavlin 583 return @out;
1039     } else {
1040     return '';
1041     }
1042 dpavlin 13 }
1043    
1044 dpavlin 1012 =head2 frec
1045    
1046     Returns first value from field
1047    
1048     $v = frec('200');
1049     $v = frec('200','a');
1050    
1051     =cut
1052    
1053     sub frec {
1054     my @out = rec(@_);
1055     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
1056     return shift @out;
1057     }
1058    
1059 dpavlin 1015 =head2 frec_eq
1060    
1061     =head2 frec_ne
1062    
1063     Check if first values from two fields are same or different
1064    
1065     if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
1066     # values are same
1067     } else {
1068     # values are different
1069     }
1070    
1071     Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
1072     could write something like:
1073    
1074     if ( frec( '900','a' ) eq frec( '910','c' ) ) {
1075     # yada tada
1076     }
1077    
1078     but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
1079     in order to parse text and create invalid function C<eqfrec>.
1080    
1081     =cut
1082    
1083     sub frec_eq {
1084     my ( $f1,$sf1, $f2, $sf2 ) = @_;
1085     return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
1086     }
1087    
1088     sub frec_ne {
1089     return ! frec_eq( @_ );
1090     }
1091    
1092 dpavlin 536 =head2 regex
1093 dpavlin 15
1094 dpavlin 536 Apply regex to some or all values
1095 dpavlin 15
1096 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
1097 dpavlin 15
1098     =cut
1099    
1100 dpavlin 536 sub regex {
1101     my $r = shift;
1102     my @out;
1103 dpavlin 550 #warn "r: $r\n", dump(\@_);
1104 dpavlin 536 foreach my $t (@_) {
1105     next unless ($t);
1106     eval "\$t =~ $r";
1107     push @out, $t if ($t && $t ne '');
1108 dpavlin 368 }
1109 dpavlin 536 return @out;
1110 dpavlin 15 }
1111    
1112 dpavlin 536 =head2 prefix
1113 dpavlin 15
1114 dpavlin 536 Prefix all values with a string
1115 dpavlin 15
1116 dpavlin 536 @v = prefix( 'my_', @v );
1117 dpavlin 15
1118     =cut
1119    
1120 dpavlin 536 sub prefix {
1121 dpavlin 819 my $p = shift;
1122     return @_ unless defined( $p );
1123 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
1124     }
1125 dpavlin 15
1126 dpavlin 536 =head2 suffix
1127 dpavlin 15
1128 dpavlin 536 suffix all values with a string
1129 dpavlin 15
1130 dpavlin 536 @v = suffix( '_my', @v );
1131 dpavlin 15
1132 dpavlin 536 =cut
1133 dpavlin 15
1134 dpavlin 536 sub suffix {
1135 dpavlin 819 my $s = shift;
1136     return @_ unless defined( $s );
1137 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
1138 dpavlin 15 }
1139    
1140 dpavlin 536 =head2 surround
1141 dpavlin 13
1142 dpavlin 536 surround all values with a two strings
1143 dpavlin 13
1144 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
1145 dpavlin 13
1146     =cut
1147    
1148 dpavlin 536 sub surround {
1149 dpavlin 819 my $p = shift;
1150     my $s = shift;
1151     $p = '' unless defined( $p );
1152     $s = '' unless defined( $s );
1153 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
1154 dpavlin 13 }
1155    
1156 dpavlin 536 =head2 first
1157 dpavlin 13
1158 dpavlin 536 Return first element
1159 dpavlin 15
1160 dpavlin 536 $v = first( @v );
1161 dpavlin 13
1162     =cut
1163    
1164 dpavlin 536 sub first {
1165     my $r = shift;
1166     return $r;
1167 dpavlin 13 }
1168    
1169 dpavlin 536 =head2 lookup
1170 dpavlin 13
1171 dpavlin 536 Consult lookup hashes for some value
1172 dpavlin 13
1173 dpavlin 725 @v = lookup(
1174     sub {
1175     'ffkk/peri/mfn'.rec('000')
1176     },
1177     'ffkk','peri','200-a-200-e',
1178     sub {
1179     first(rec(200,'a')).' '.first(rec('200','e'))
1180     }
1181     );
1182 dpavlin 13
1183 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1184     normal lookup definition in C<conf/lookup/something.pl> which looks like:
1185 dpavlin 707
1186 dpavlin 725 lookup(
1187     # which results to return from record recorded in lookup
1188     sub { 'ffkk/peri/mfn' . rec('000') },
1189     # from which database and input
1190     'ffkk','peri',
1191     # such that following values match
1192     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1193     # if this part is missing, we will try to match same fields
1194     # from lookup record and current one, or you can override
1195     # which records to use from current record using
1196     sub { rec('900','x') . ' ' . rec('900','y') },
1197     )
1198    
1199     You can think about this lookup as SQL (if that helps):
1200    
1201     select
1202     sub { what }
1203     from
1204     database, input
1205     where
1206     sub { filter from lookuped record }
1207     having
1208     sub { optional filter on current record }
1209    
1210     Easy as pie, right?
1211    
1212 dpavlin 13 =cut
1213    
1214 dpavlin 536 sub lookup {
1215 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
1216    
1217 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1218 dpavlin 725
1219 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1220 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
1221    
1222 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1223 dpavlin 725
1224     my $mfns;
1225     my @having = $having->();
1226    
1227 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
1228 dpavlin 725
1229     foreach my $h ( @having ) {
1230     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1231 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1232 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1233     }
1234 dpavlin 536 }
1235 dpavlin 725
1236     return unless ($mfns);
1237    
1238     my @mfns = sort keys %$mfns;
1239    
1240 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1241 dpavlin 725
1242     my $old_rec = $rec;
1243     my @out;
1244    
1245     foreach my $mfn (@mfns) {
1246 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
1247 dpavlin 725
1248 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1249 dpavlin 725
1250     my @vals = $what->();
1251    
1252     push @out, ( @vals );
1253    
1254 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1255 dpavlin 725 }
1256    
1257     # if (ref($lookup->{$k}) eq 'ARRAY') {
1258     # return @{ $lookup->{$k} };
1259     # } else {
1260     # return $lookup->{$k};
1261     # }
1262    
1263     $rec = $old_rec;
1264    
1265 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1266 dpavlin 725
1267 dpavlin 740 if ($#out == 0) {
1268     return $out[0];
1269     } else {
1270     return @out;
1271     }
1272 dpavlin 13 }
1273    
1274 dpavlin 707 =head2 save_into_lookup
1275    
1276 dpavlin 725 Save value into lookup. It associates current database, input
1277     and specific keys with one or more values which will be
1278     associated over MFN.
1279 dpavlin 707
1280 dpavlin 725 MFN will be extracted from first occurence current of field 000
1281     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1282    
1283     my $nr = save_into_lookup($database,$input,$key,sub {
1284 dpavlin 707 # code which produce one or more values
1285     });
1286    
1287 dpavlin 725 It returns number of items saved.
1288 dpavlin 707
1289 dpavlin 725 This function shouldn't be called directly, it's called from code created by
1290     L<WebPAC::Parser>.
1291    
1292 dpavlin 707 =cut
1293    
1294     sub save_into_lookup {
1295 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
1296     die "save_into_lookup needs database" unless defined($database);
1297     die "save_into_lookup needs input" unless defined($input);
1298     die "save_into_lookup needs key" unless defined($key);
1299 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1300 dpavlin 725
1301 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1302 dpavlin 725
1303     my $mfn =
1304     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1305     defined($config->{_mfn}) ? $config->{_mfn} :
1306     die "mfn not defined or zero";
1307    
1308     my $nr = 0;
1309    
1310 dpavlin 707 foreach my $v ( $coderef->() ) {
1311 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1312 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1313 dpavlin 725 $nr++;
1314 dpavlin 707 }
1315 dpavlin 725
1316     return $nr;
1317 dpavlin 707 }
1318    
1319 dpavlin 595 =head2 config
1320    
1321     Consult config values stored in C<config.yml>
1322    
1323     # return database code (key under databases in yaml)
1324     $database_code = config(); # use _ from hash
1325     $database_name = config('name');
1326     $database_input_name = config('input name');
1327    
1328     Up to three levels are supported.
1329    
1330     =cut
1331    
1332     sub config {
1333     return unless ($config);
1334    
1335     my $p = shift;
1336    
1337     $p ||= '';
1338    
1339     my $v;
1340    
1341     warn "### getting config($p)\n" if ($debug > 1);
1342    
1343     my @p = split(/\s+/,$p);
1344     if ($#p < 0) {
1345     $v = $config->{ '_' }; # special, database code
1346     } else {
1347    
1348     my $c = dclone( $config );
1349    
1350     foreach my $k (@p) {
1351     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1352     if (ref($c) eq 'ARRAY') {
1353     $c = shift @$c;
1354     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1355     last;
1356     }
1357    
1358     if (! defined($c->{$k}) ) {
1359     $c = undef;
1360     last;
1361     } else {
1362     $c = $c->{$k};
1363     }
1364     }
1365     $v = $c if ($c);
1366    
1367     }
1368    
1369     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1370     warn "config( '$p' ) is empty\n" if (! $v);
1371    
1372     return $v;
1373     }
1374    
1375     =head2 id
1376    
1377     Returns unique id of this record
1378    
1379     $id = id();
1380    
1381     Returns C<42/2> for 2nd occurence of MFN 42.
1382    
1383     =cut
1384    
1385     sub id {
1386     my $mfn = $config->{_mfn} || die "no _mfn in config data";
1387     return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1388     }
1389    
1390 dpavlin 536 =head2 join_with
1391 dpavlin 13
1392 dpavlin 536 Joins walues with some delimiter
1393 dpavlin 10
1394 dpavlin 536 $v = join_with(", ", @v);
1395 dpavlin 10
1396 dpavlin 536 =cut
1397 dpavlin 10
1398 dpavlin 536 sub join_with {
1399     my $d = shift;
1400 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1401 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1402     return '' unless defined($v);
1403     return $v;
1404 dpavlin 536 }
1405 dpavlin 10
1406 dpavlin 562 =head2 split_rec_on
1407    
1408     Split record subfield on some regex and take one of parts out
1409    
1410     $a_before_semi_column =
1411     split_rec_on('200','a', /\s*;\s*/, $part);
1412    
1413     C<$part> is optional number of element. First element is
1414     B<1>, not 0!
1415    
1416     If there is no C<$part> parameter or C<$part> is 0, this function will
1417     return all values produced by splitting.
1418    
1419     =cut
1420    
1421     sub split_rec_on {
1422     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1423    
1424     my ($fld, $sf, $regex, $part) = @_;
1425 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1426 dpavlin 562
1427     my @r = rec( $fld, $sf );
1428     my $v = shift @r;
1429 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1430 dpavlin 562
1431 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1432 dpavlin 566
1433 dpavlin 562 my @s = split( $regex, $v );
1434 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1435 dpavlin 566 if ($part && $part > 0) {
1436 dpavlin 562 return $s[ $part - 1 ];
1437     } else {
1438 dpavlin 571 return @s;
1439 dpavlin 562 }
1440     }
1441    
1442 dpavlin 785 my $hash;
1443    
1444     =head2 set
1445    
1446     set( key => 'value' );
1447    
1448     =cut
1449    
1450     sub set {
1451     my ($k,$v) = @_;
1452 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1453 dpavlin 785 $hash->{$k} = $v;
1454     };
1455    
1456     =head2 get
1457    
1458     get( 'key' );
1459    
1460     =cut
1461    
1462     sub get {
1463     my $k = shift || return;
1464     my $v = $hash->{$k};
1465 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1466 dpavlin 785 return $v;
1467     }
1468    
1469 dpavlin 791 =head2 count
1470 dpavlin 785
1471 dpavlin 791 if ( count( @result ) == 1 ) {
1472     # do something if only 1 result is there
1473     }
1474    
1475     =cut
1476    
1477     sub count {
1478 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
1479 dpavlin 791 return @_ . '';
1480     }
1481    
1482 dpavlin 536 # END
1483     1;

  ViewVC Help
Powered by ViewVC 1.1.26