/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 669 - (show annotations)
Mon Sep 11 14:29:01 2006 UTC (17 years, 6 months ago) by dpavlin
File size: 22988 byte(s)
 r937@llin:  dpavlin | 2006-09-11 16:26:07 +0200
 changed _pack_subfields_hash usage and document it

1 package WebPAC::Normalize;
2 use Exporter 'import';
3 @EXPORT = qw/
4 _set_rec _set_lookup
5 _get_ds _clean_ds
6 _debug
7 _pack_subfields_hash
8
9 tag search display
10 marc marc_indicators marc_repeatable_subfield
11 marc_compose marc_leader
12 marc_duplicate marc_remove
13 marc_original_order
14
15 rec1 rec2 rec
16 regex prefix suffix surround
17 first lookup join_with
18
19 split_rec_on
20 /;
21
22 use warnings;
23 use strict;
24
25 #use base qw/WebPAC::Common/;
26 use Data::Dump qw/dump/;
27 use Storable qw/dclone/;
28
29 # debugging warn(s)
30 my $debug = 0;
31
32
33 =head1 NAME
34
35 WebPAC::Normalize - describe normalisaton rules using sets
36
37 =head1 VERSION
38
39 Version 0.19
40
41 =cut
42
43 our $VERSION = '0.19';
44
45 =head1 SYNOPSIS
46
47 This module uses C<conf/normalize/*.pl> files to perform normalisation
48 from input records using perl functions which are specialized for set
49 processing.
50
51 Sets are implemented as arrays, and normalisation file is valid perl, which
52 means that you check it's validity before running WebPAC using
53 C<perl -c normalize.pl>.
54
55 Normalisation can generate multiple output normalized data. For now, supported output
56 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
57 C<marc>.
58
59 =head1 FUNCTIONS
60
61 Functions which start with C<_> are private and used by WebPAC internally.
62 All other functions are available for use within normalisation rules.
63
64 =head2 data_structure
65
66 Return data structure
67
68 my $ds = WebPAC::Normalize::data_structure(
69 lookup => $lookup->lookup_hash,
70 row => $row,
71 rules => $normalize_pl_config,
72 marc_encoding => 'utf-8',
73 config => $config,
74 );
75
76 Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
77 other are optional.
78
79 This function will B<die> if normalizastion can't be evaled.
80
81 Since this function isn't exported you have to call it with
82 C<WebPAC::Normalize::data_structure>.
83
84 =cut
85
86 sub data_structure {
87 my $arg = {@_};
88
89 die "need row argument" unless ($arg->{row});
90 die "need normalisation argument" unless ($arg->{rules});
91
92 no strict 'subs';
93 _set_lookup( $arg->{lookup} );
94 _set_rec( $arg->{row} );
95 _set_config( $arg->{config} );
96 _clean_ds( %{ $arg } );
97 eval "$arg->{rules}";
98 die "error evaling $arg->{rules}: $@\n" if ($@);
99
100 return _get_ds();
101 }
102
103 =head2 _set_rec
104
105 Set current record hash
106
107 _set_rec( $rec );
108
109 =cut
110
111 my $rec;
112
113 sub _set_rec {
114 $rec = shift or die "no record hash";
115 }
116
117 =head2 _set_config
118
119 Set current config hash
120
121 _set_config( $config );
122
123 Magic keys are:
124
125 =over 4
126
127 =item _
128
129 Code of current database
130
131 =item _mfn
132
133 Current MFN
134
135 =back
136
137 =cut
138
139 my $config;
140
141 sub _set_config {
142 $config = shift;
143 }
144
145 =head2 _get_ds
146
147 Return hash formatted as data structure
148
149 my $ds = _get_ds();
150
151 =cut
152
153 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);
154 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
155
156 sub _get_ds {
157 return $out;
158 }
159
160 =head2 _clean_ds
161
162 Clean data structure hash for next record
163
164 _clean_ds();
165
166 =cut
167
168 sub _clean_ds {
169 my $a = {@_};
170 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();
171 ($marc_record_offset, $marc_fetch_offset) = (0,0);
172 $marc_encoding = $a->{marc_encoding};
173 }
174
175 =head2 _set_lookup
176
177 Set current lookup hash
178
179 _set_lookup( $lookup );
180
181 =cut
182
183 my $lookup;
184
185 sub _set_lookup {
186 $lookup = shift;
187 }
188
189 =head2 _get_marc_fields
190
191 Get all fields defined by calls to C<marc>
192
193 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
194
195 We are using I<magic> which detect repeatable fields only from
196 sequence of field/subfield data generated by normalization.
197
198 Repeatable field is created when there is second occurence of same subfield or
199 if any of indicators are different.
200
201 This is sane for most cases. Something like:
202
203 900a-1 900b-1 900c-1
204 900a-2 900b-2
205 900a-3
206
207 will be created from any combination of:
208
209 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
210
211 and following rules:
212
213 marc('900','a', rec('200','a') );
214 marc('900','b', rec('200','b') );
215 marc('900','c', rec('200','c') );
216
217 which might not be what you have in mind. If you need repeatable subfield,
218 define it using C<marc_repeatable_subfield> like this:
219
220 marc_repeatable_subfield('900','a');
221 marc('900','a', rec('200','a') );
222 marc('900','b', rec('200','b') );
223 marc('900','c', rec('200','c') );
224
225 will create:
226
227 900a-1 900a-2 900a-3 900b-1 900c-1
228 900b-2
229
230 There is also support for returning next or specific using:
231
232 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
233 # do something with $mf
234 }
235
236 will always return fields from next MARC record or
237
238 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
239
240 will return 42th copy record (if it exists).
241
242 =cut
243
244 sub _get_marc_fields {
245
246 my $arg = {@_};
247 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
248 my $offset = $marc_fetch_offset;
249 if ($arg->{offset}) {
250 $offset = $arg->{offset};
251 } elsif($arg->{fetch_next}) {
252 $marc_fetch_offset++;
253 }
254
255 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
256
257 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
258
259 my $marc_rec = $marc_record->[ $offset ];
260
261 warn "## _get_marc_fields (at offset: $offset) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
262
263 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
264
265 # first, sort all existing fields
266 # XXX might not be needed, but modern perl might randomize elements in hash
267 my @sorted_marc_record = sort {
268 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
269 } @{ $marc_rec };
270
271 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
272
273 # output marc fields
274 my @m;
275
276 # count unique field-subfields (used for offset when walking to next subfield)
277 my $u;
278 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
279
280 if ($debug) {
281 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
282 warn "## marc_record[$offset] = ", dump( $marc_rec ), $/;
283 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
284 warn "## subfield count = ", dump( $u ), $/;
285 }
286
287 my $len = $#sorted_marc_record;
288 my $visited;
289 my $i = 0;
290 my $field;
291
292 foreach ( 0 .. $len ) {
293
294 # find next element which isn't visited
295 while ($visited->{$i}) {
296 $i = ($i + 1) % ($len + 1);
297 }
298
299 # mark it visited
300 $visited->{$i}++;
301
302 my $row = dclone( $sorted_marc_record[$i] );
303
304 # field and subfield which is key for
305 # marc_repeatable_subfield and u
306 my $fsf = $row->[0] . ( $row->[3] || '' );
307
308 if ($debug > 1) {
309
310 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
311 print "### this [$i]: ", dump( $row ),$/;
312 print "### sf: ", $row->[3], " vs ", $field->[3],
313 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
314 if ($#$field >= 0);
315
316 }
317
318 # if field exists
319 if ( $#$field >= 0 ) {
320 if (
321 $row->[0] ne $field->[0] || # field
322 $row->[1] ne $field->[1] || # i1
323 $row->[2] ne $field->[2] # i2
324 ) {
325 push @m, $field;
326 warn "## saved/1 ", dump( $field ),$/ if ($debug);
327 $field = $row;
328
329 } elsif (
330 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
331 ||
332 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
333 ! $marc_repeatable_subfield->{ $fsf }
334 )
335 ) {
336 push @m, $field;
337 warn "## saved/2 ", dump( $field ),$/ if ($debug);
338 $field = $row;
339
340 } else {
341 # append new subfields to existing field
342 push @$field, ( $row->[3], $row->[4] );
343 }
344 } else {
345 # insert first field
346 $field = $row;
347 }
348
349 if (! $marc_repeatable_subfield->{ $fsf }) {
350 # make step to next subfield
351 $i = ($i + $u->{ $fsf } ) % ($len + 1);
352 }
353 }
354
355 if ($#$field >= 0) {
356 push @m, $field;
357 warn "## saved/3 ", dump( $field ),$/ if ($debug);
358 }
359
360 return \@m;
361 }
362
363 =head2 _debug
364
365 Change level of debug warnings
366
367 _debug( 2 );
368
369 =cut
370
371 sub _debug {
372 my $l = shift;
373 return $debug unless defined($l);
374 warn "debug level $l",$/ if ($l > 0);
375 $debug = $l;
376 }
377
378 =head1 Functions to create C<data_structure>
379
380 Those functions generally have to first in your normalization file.
381
382 =head2 tag
383
384 Define new tag for I<search> and I<display>.
385
386 tag('Title', rec('200','a') );
387
388
389 =cut
390
391 sub tag {
392 my $name = shift or die "tag needs name as first argument";
393 my @o = grep { defined($_) && $_ ne '' } @_;
394 return unless (@o);
395 $out->{$name}->{tag} = $name;
396 $out->{$name}->{search} = \@o;
397 $out->{$name}->{display} = \@o;
398 }
399
400 =head2 display
401
402 Define tag just for I<display>
403
404 @v = display('Title', rec('200','a') );
405
406 =cut
407
408 sub display {
409 my $name = shift or die "display needs name as first argument";
410 my @o = grep { defined($_) && $_ ne '' } @_;
411 return unless (@o);
412 $out->{$name}->{tag} = $name;
413 $out->{$name}->{display} = \@o;
414 }
415
416 =head2 search
417
418 Prepare values just for I<search>
419
420 @v = search('Title', rec('200','a') );
421
422 =cut
423
424 sub search {
425 my $name = shift or die "search needs name as first argument";
426 my @o = grep { defined($_) && $_ ne '' } @_;
427 return unless (@o);
428 $out->{$name}->{tag} = $name;
429 $out->{$name}->{search} = \@o;
430 }
431
432 =head2 marc_leader
433
434 Setup fields within MARC leader or get leader
435
436 marc_leader('05','c');
437 my $leader = marc_leader();
438
439 =cut
440
441 sub marc_leader {
442 my ($offset,$value) = @_;
443
444 if ($offset) {
445 $out->{' leader'}->{ $offset } = $value;
446 } else {
447 return $out->{' leader'};
448 }
449 }
450
451 =head2 marc
452
453 Save value for MARC field
454
455 marc('900','a', rec('200','a') );
456 marc('001', rec('000') );
457
458 =cut
459
460 sub marc {
461 my $f = shift or die "marc needs field";
462 die "marc field must be numer" unless ($f =~ /^\d+$/);
463
464 my $sf;
465 if ($f >= 10) {
466 $sf = shift or die "marc needs subfield";
467 }
468
469 foreach (@_) {
470 my $v = $_; # make var read-write for Encode
471 next unless (defined($v) && $v !~ /^\s*$/);
472 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
473 if (defined $sf) {
474 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
475 } else {
476 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
477 }
478 }
479 }
480
481 =head2 marc_repeatable_subfield
482
483 Save values for MARC repetable subfield
484
485 marc_repeatable_subfield('910', 'z', rec('909') );
486
487 =cut
488
489 sub marc_repeatable_subfield {
490 my ($f,$sf) = @_;
491 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
492 $marc_repeatable_subfield->{ $f . $sf }++;
493 marc(@_);
494 }
495
496 =head2 marc_indicators
497
498 Set both indicators for MARC field
499
500 marc_indicators('900', ' ', 1);
501
502 Any indicator value other than C<0-9> will be treated as undefined.
503
504 =cut
505
506 sub marc_indicators {
507 my $f = shift || die "marc_indicators need field!\n";
508 my ($i1,$i2) = @_;
509 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
510 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
511
512 $i1 = ' ' if ($i1 !~ /^\d$/);
513 $i2 = ' ' if ($i2 !~ /^\d$/);
514 @{ $marc_indicators->{$f} } = ($i1,$i2);
515 }
516
517 =head2 marc_compose
518
519 Save values for each MARC subfield explicitly
520
521 marc_compose('900',
522 'a', rec('200','a')
523 'b', rec('201','a')
524 'a', rec('200','b')
525 'c', rec('200','c')
526 );
527
528 If you specify C<+> for subfield, value will be appended
529 to previous defined subfield.
530
531 =cut
532
533 sub marc_compose {
534 my $f = shift or die "marc_compose needs field";
535 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
536
537 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
538 my $m = [ $f, $i1, $i2 ];
539
540 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
541
542 if ($#_ % 2 != 1) {
543 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
544 }
545
546 while (@_) {
547 my $sf = shift;
548 my $v = shift;
549
550 next unless (defined($v) && $v !~ /^\s*$/);
551 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
552 if ($sf ne '+') {
553 push @$m, ( $sf, $v );
554 } else {
555 $m->[ $#$m ] .= $v;
556 }
557 }
558
559 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
560
561 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
562 }
563
564 =head2 marc_duplicate
565
566 Generate copy of current MARC record and continue working on copy
567
568 marc_duplicate();
569
570 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
571 C<< _get_marc_fields( offset => 42 ) >>.
572
573 =cut
574
575 sub marc_duplicate {
576 my $m = $marc_record->[ -1 ];
577 die "can't duplicate record which isn't defined" unless ($m);
578 push @{ $marc_record }, dclone( $m );
579 warn "## marc_duplicate = ", dump(@$marc_record), $/ if ($debug > 1);
580 $marc_record_offset = $#{ $marc_record };
581 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
582 }
583
584 =head2 marc_remove
585
586 Remove some field or subfield from MARC record.
587
588 marc_remove('200');
589 marc_remove('200','a');
590
591 This will erase field C<200> or C<200^a> from current MARC record.
592
593 This is useful after calling C<marc_duplicate> or on it's own (but, you
594 should probably just remove that subfield definition if you are not
595 using C<marc_duplicate>).
596
597 FIXME: support fields < 10.
598
599 =cut
600
601 sub marc_remove {
602 my ($f, $sf) = @_;
603
604 die "marc_remove needs record number" unless defined($f);
605
606 my $marc = $marc_record->[ $marc_record_offset ];
607
608 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
609
610 my $i = 0;
611 foreach ( 0 .. $#{ $marc } ) {
612 last unless (defined $marc->[$i]);
613 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
614 if ($marc->[$i]->[0] eq $f) {
615 if (! defined $sf) {
616 # remove whole field
617 splice @$marc, $i, 1;
618 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
619 $i--;
620 } else {
621 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
622 my $o = ($j * 2) + 3;
623 if ($marc->[$i]->[$o] eq $sf) {
624 # remove subfield
625 splice @{$marc->[$i]}, $o, 2;
626 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
627 # is record now empty?
628 if ($#{ $marc->[$i] } == 2) {
629 splice @$marc, $i, 1;
630 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
631 $i--;
632 };
633 }
634 }
635 }
636 }
637 $i++;
638 }
639
640 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
641
642 $marc_record->[ $marc_record_offset ] = $marc;
643
644 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
645 }
646
647 =head2 marc_original_order
648
649 Copy all subfields preserving original order to marc field.
650
651 marc_original_order( marc_field_number, original_input_field_number );
652
653 Please note that field numbers are consistent with other commands (marc
654 field number first), but somewhat counter-intuitive (destination and then
655 source).
656
657 You might want to use this command if you are just renaming subfields or
658 using pre-processing modify_record in C<config.yml> and don't need any
659 post-processing or want to preserve order of original subfields.
660
661
662 =cut
663
664 sub marc_original_order {
665
666 my ($to, $from) = @_;
667 die "marc_original_order needs from and to fields\n" unless ($from && $to);
668
669 return unless defined($rec->{$from});
670
671 my $r = $rec->{$from};
672 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
673
674 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
675 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
676
677 foreach my $d (@$r) {
678
679 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
680 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
681 next;
682 }
683
684 my @sfs = @{ $d->{subfields} };
685
686 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
687
688 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
689
690 my $m = [ $to, $i1, $i2 ];
691
692 while (my $sf = shift @sfs) {
693
694 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
695 my $offset = shift @sfs;
696 die "corrupted sufields specification for field $from\n" unless defined($offset);
697
698 my $v;
699 if (ref($d->{$sf}) eq 'ARRAY') {
700 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
701 } elsif ($offset == 0) {
702 $v = $d->{$sf};
703 } else {
704 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
705 }
706 push @$m, ( $sf, $v ) if (defined($v));
707 }
708
709 if ($#{$m} > 2) {
710 push @{ $marc_record->[ $marc_record_offset ] }, $m;
711 }
712 }
713
714 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
715 }
716
717
718 =head1 Functions to extract data from input
719
720 This function should be used inside functions to create C<data_structure> described
721 above.
722
723 =head2 _pack_subfields_hash
724
725 @subfields = _pack_subfields_hash( $h );
726 $subfields = _pack_subfields_hash( $h, 1 );
727
728 Return each subfield value in array or pack them all together and return scalar
729 with subfields (denoted by C<^>) and values.
730
731 =cut
732
733 sub _pack_subfields_hash {
734
735 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
736
737 my ($h,$include_subfields) = @_;
738
739 if ( defined($h->{subfields}) ) {
740 my $sfs = delete $h->{subfields} || die "no subfields?";
741 my @out;
742 while (@$sfs) {
743 my $sf = shift @$sfs;
744 push @out, '^' . $sf if ($include_subfields);
745 my $o = shift @$sfs;
746 if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
747 # single element subfields are not arrays
748 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
749
750 push @out, $h->{$sf};
751 } else {
752 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
753 push @out, $h->{$sf}->[$o];
754 }
755 }
756 if ($include_subfields) {
757 return join('', @out);
758 } else {
759 return @out;
760 }
761 } else {
762 if ($include_subfields) {
763 my $out = '';
764 foreach my $sf (sort keys %$h) {
765 if (ref($h->{$sf}) eq 'ARRAY') {
766 $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
767 } else {
768 $out .= '^' . $sf . $h->{$sf};
769 }
770 }
771 return $out;
772 } else {
773 # FIXME this should probably be in alphabetical order instead of hash order
774 values %{$h};
775 }
776 }
777 }
778
779 =head2 rec1
780
781 Return all values in some field
782
783 @v = rec1('200')
784
785 TODO: order of values is probably same as in source data, need to investigate that
786
787 =cut
788
789 sub rec1 {
790 my $f = shift;
791 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
792 return unless (defined($rec) && defined($rec->{$f}));
793 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
794 if (ref($rec->{$f}) eq 'ARRAY') {
795 my @out;
796 foreach my $h ( @{ $rec->{$f} } ) {
797 if (ref($h) eq 'HASH') {
798 push @out, ( _pack_subfields_hash( $h ) );
799 } else {
800 push @out, $h;
801 }
802 }
803 return @out;
804 } elsif( defined($rec->{$f}) ) {
805 return $rec->{$f};
806 }
807 }
808
809 =head2 rec2
810
811 Return all values in specific field and subfield
812
813 @v = rec2('200','a')
814
815 =cut
816
817 sub rec2 {
818 my $f = shift;
819 return unless (defined($rec && $rec->{$f}));
820 my $sf = shift;
821 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
822 return map {
823 if (ref($_->{$sf}) eq 'ARRAY') {
824 @{ $_->{$sf} };
825 } else {
826 $_->{$sf};
827 }
828 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
829 }
830
831 =head2 rec
832
833 syntaxtic sugar for
834
835 @v = rec('200')
836 @v = rec('200','a')
837
838 =cut
839
840 sub rec {
841 my @out;
842 if ($#_ == 0) {
843 @out = rec1(@_);
844 } elsif ($#_ == 1) {
845 @out = rec2(@_);
846 }
847 if (@out) {
848 return @out;
849 } else {
850 return '';
851 }
852 }
853
854 =head2 regex
855
856 Apply regex to some or all values
857
858 @v = regex( 's/foo/bar/g', @v );
859
860 =cut
861
862 sub regex {
863 my $r = shift;
864 my @out;
865 #warn "r: $r\n", dump(\@_);
866 foreach my $t (@_) {
867 next unless ($t);
868 eval "\$t =~ $r";
869 push @out, $t if ($t && $t ne '');
870 }
871 return @out;
872 }
873
874 =head2 prefix
875
876 Prefix all values with a string
877
878 @v = prefix( 'my_', @v );
879
880 =cut
881
882 sub prefix {
883 my $p = shift or return;
884 return map { $p . $_ } grep { defined($_) } @_;
885 }
886
887 =head2 suffix
888
889 suffix all values with a string
890
891 @v = suffix( '_my', @v );
892
893 =cut
894
895 sub suffix {
896 my $s = shift or die "suffix needs string as first argument";
897 return map { $_ . $s } grep { defined($_) } @_;
898 }
899
900 =head2 surround
901
902 surround all values with a two strings
903
904 @v = surround( 'prefix_', '_suffix', @v );
905
906 =cut
907
908 sub surround {
909 my $p = shift or die "surround need prefix as first argument";
910 my $s = shift or die "surround needs suffix as second argument";
911 return map { $p . $_ . $s } grep { defined($_) } @_;
912 }
913
914 =head2 first
915
916 Return first element
917
918 $v = first( @v );
919
920 =cut
921
922 sub first {
923 my $r = shift;
924 return $r;
925 }
926
927 =head2 lookup
928
929 Consult lookup hashes for some value
930
931 @v = lookup( $v );
932 @v = lookup( @v );
933
934 =cut
935
936 sub lookup {
937 my $k = shift or return;
938 return unless (defined($lookup->{$k}));
939 if (ref($lookup->{$k}) eq 'ARRAY') {
940 return @{ $lookup->{$k} };
941 } else {
942 return $lookup->{$k};
943 }
944 }
945
946 =head2 config
947
948 Consult config values stored in C<config.yml>
949
950 # return database code (key under databases in yaml)
951 $database_code = config(); # use _ from hash
952 $database_name = config('name');
953 $database_input_name = config('input name');
954 $tag = config('input normalize tag');
955
956 Up to three levels are supported.
957
958 =cut
959
960 sub config {
961 return unless ($config);
962
963 my $p = shift;
964
965 $p ||= '';
966
967 my $v;
968
969 warn "### getting config($p)\n" if ($debug > 1);
970
971 my @p = split(/\s+/,$p);
972 if ($#p < 0) {
973 $v = $config->{ '_' }; # special, database code
974 } else {
975
976 my $c = dclone( $config );
977
978 foreach my $k (@p) {
979 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
980 if (ref($c) eq 'ARRAY') {
981 $c = shift @$c;
982 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
983 last;
984 }
985
986 if (! defined($c->{$k}) ) {
987 $c = undef;
988 last;
989 } else {
990 $c = $c->{$k};
991 }
992 }
993 $v = $c if ($c);
994
995 }
996
997 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
998 warn "config( '$p' ) is empty\n" if (! $v);
999
1000 return $v;
1001 }
1002
1003 =head2 id
1004
1005 Returns unique id of this record
1006
1007 $id = id();
1008
1009 Returns C<42/2> for 2nd occurence of MFN 42.
1010
1011 =cut
1012
1013 sub id {
1014 my $mfn = $config->{_mfn} || die "no _mfn in config data";
1015 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1016 }
1017
1018 =head2 join_with
1019
1020 Joins walues with some delimiter
1021
1022 $v = join_with(", ", @v);
1023
1024 =cut
1025
1026 sub join_with {
1027 my $d = shift;
1028 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1029 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1030 return '' unless defined($v);
1031 return $v;
1032 }
1033
1034 =head2 split_rec_on
1035
1036 Split record subfield on some regex and take one of parts out
1037
1038 $a_before_semi_column =
1039 split_rec_on('200','a', /\s*;\s*/, $part);
1040
1041 C<$part> is optional number of element. First element is
1042 B<1>, not 0!
1043
1044 If there is no C<$part> parameter or C<$part> is 0, this function will
1045 return all values produced by splitting.
1046
1047 =cut
1048
1049 sub split_rec_on {
1050 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1051
1052 my ($fld, $sf, $regex, $part) = @_;
1053 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1054
1055 my @r = rec( $fld, $sf );
1056 my $v = shift @r;
1057 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1058
1059 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1060
1061 my @s = split( $regex, $v );
1062 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1063 if ($part && $part > 0) {
1064 return $s[ $part - 1 ];
1065 } else {
1066 return @s;
1067 }
1068 }
1069
1070 # END
1071 1;

  ViewVC Help
Powered by ViewVC 1.1.26