/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 641 - (show annotations)
Wed Sep 6 20:54:47 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 22593 byte(s)
refactored _pack_subfields_hash in separate function

1 package WebPAC::Normalize;
2 use Exporter 'import';
3 @EXPORT = qw/
4 _set_rec _set_lookup
5 _get_ds _clean_ds
6 _debug
7 _pack_subfields_hash
8
9 tag search display
10 marc marc_indicators marc_repeatable_subfield
11 marc_compose marc_leader
12 marc_duplicate marc_remove
13 marc_original_order
14
15 rec1 rec2 rec
16 regex prefix suffix surround
17 first lookup join_with
18
19 split_rec_on
20 /;
21
22 use warnings;
23 use strict;
24
25 #use base qw/WebPAC::Common/;
26 use Data::Dump qw/dump/;
27 use Encode qw/from_to/;
28 use Storable qw/dclone/;
29
30 # debugging warn(s)
31 my $debug = 0;
32
33
34 =head1 NAME
35
36 WebPAC::Normalize - describe normalisaton rules using sets
37
38 =head1 VERSION
39
40 Version 0.17
41
42 =cut
43
44 our $VERSION = '0.17';
45
46 =head1 SYNOPSIS
47
48 This module uses C<conf/normalize/*.pl> files to perform normalisation
49 from input records using perl functions which are specialized for set
50 processing.
51
52 Sets are implemented as arrays, and normalisation file is valid perl, which
53 means that you check it's validity before running WebPAC using
54 C<perl -c normalize.pl>.
55
56 Normalisation can generate multiple output normalized data. For now, supported output
57 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
58 C<marc>.
59
60 =head1 FUNCTIONS
61
62 Functions which start with C<_> are private and used by WebPAC internally.
63 All other functions are available for use within normalisation rules.
64
65 =head2 data_structure
66
67 Return data structure
68
69 my $ds = WebPAC::Normalize::data_structure(
70 lookup => $lookup->lookup_hash,
71 row => $row,
72 rules => $normalize_pl_config,
73 marc_encoding => 'utf-8',
74 config => $config,
75 );
76
77 Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
78 other are optional.
79
80 This function will B<die> if normalizastion can't be evaled.
81
82 Since this function isn't exported you have to call it with
83 C<WebPAC::Normalize::data_structure>.
84
85 =cut
86
87 sub data_structure {
88 my $arg = {@_};
89
90 die "need row argument" unless ($arg->{row});
91 die "need normalisation argument" unless ($arg->{rules});
92
93 no strict 'subs';
94 _set_lookup( $arg->{lookup} );
95 _set_rec( $arg->{row} );
96 _set_config( $arg->{config} );
97 _clean_ds( %{ $arg } );
98 eval "$arg->{rules}";
99 die "error evaling $arg->{rules}: $@\n" if ($@);
100
101 return _get_ds();
102 }
103
104 =head2 _set_rec
105
106 Set current record hash
107
108 _set_rec( $rec );
109
110 =cut
111
112 my $rec;
113
114 sub _set_rec {
115 $rec = shift or die "no record hash";
116 }
117
118 =head2 _set_config
119
120 Set current config hash
121
122 _set_config( $config );
123
124 Magic keys are:
125
126 =over 4
127
128 =item _
129
130 Code of current database
131
132 =item _mfn
133
134 Current MFN
135
136 =back
137
138 =cut
139
140 my $config;
141
142 sub _set_config {
143 $config = shift;
144 }
145
146 =head2 _get_ds
147
148 Return hash formatted as data structure
149
150 my $ds = _get_ds();
151
152 =cut
153
154 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);
155 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
156
157 sub _get_ds {
158 return $out;
159 }
160
161 =head2 _clean_ds
162
163 Clean data structure hash for next record
164
165 _clean_ds();
166
167 =cut
168
169 sub _clean_ds {
170 my $a = {@_};
171 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();
172 ($marc_record_offset, $marc_fetch_offset) = (0,0);
173 $marc_encoding = $a->{marc_encoding};
174 }
175
176 =head2 _set_lookup
177
178 Set current lookup hash
179
180 _set_lookup( $lookup );
181
182 =cut
183
184 my $lookup;
185
186 sub _set_lookup {
187 $lookup = shift;
188 }
189
190 =head2 _get_marc_fields
191
192 Get all fields defined by calls to C<marc>
193
194 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
195
196 We are using I<magic> which detect repeatable fields only from
197 sequence of field/subfield data generated by normalization.
198
199 Repeatable field is created when there is second occurence of same subfield or
200 if any of indicators are different.
201
202 This is sane for most cases. Something like:
203
204 900a-1 900b-1 900c-1
205 900a-2 900b-2
206 900a-3
207
208 will be created from any combination of:
209
210 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
211
212 and following rules:
213
214 marc('900','a', rec('200','a') );
215 marc('900','b', rec('200','b') );
216 marc('900','c', rec('200','c') );
217
218 which might not be what you have in mind. If you need repeatable subfield,
219 define it using C<marc_repeatable_subfield> like this:
220
221 marc_repeatable_subfield('900','a');
222 marc('900','a', rec('200','a') );
223 marc('900','b', rec('200','b') );
224 marc('900','c', rec('200','c') );
225
226 will create:
227
228 900a-1 900a-2 900a-3 900b-1 900c-1
229 900b-2
230
231 There is also support for returning next or specific using:
232
233 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
234 # do something with $mf
235 }
236
237 will always return fields from next MARC record or
238
239 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
240
241 will return 42th copy record (if it exists).
242
243 =cut
244
245 sub _get_marc_fields {
246
247 my $arg = {@_};
248 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
249 my $offset = $marc_fetch_offset;
250 if ($arg->{offset}) {
251 $offset = $arg->{offset};
252 } elsif($arg->{fetch_next}) {
253 $marc_fetch_offset++;
254 }
255
256 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
257
258 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
259
260 my $marc_rec = $marc_record->[ $offset ];
261
262 warn "## _get_marc_fields (at offset: $offset) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
263
264 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
265
266 # first, sort all existing fields
267 # XXX might not be needed, but modern perl might randomize elements in hash
268 my @sorted_marc_record = sort {
269 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
270 } @{ $marc_rec };
271
272 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
273
274 # output marc fields
275 my @m;
276
277 # count unique field-subfields (used for offset when walking to next subfield)
278 my $u;
279 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
280
281 if ($debug) {
282 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
283 warn "## marc_record[$offset] = ", dump( $marc_rec ), $/;
284 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
285 warn "## subfield count = ", dump( $u ), $/;
286 }
287
288 my $len = $#sorted_marc_record;
289 my $visited;
290 my $i = 0;
291 my $field;
292
293 foreach ( 0 .. $len ) {
294
295 # find next element which isn't visited
296 while ($visited->{$i}) {
297 $i = ($i + 1) % ($len + 1);
298 }
299
300 # mark it visited
301 $visited->{$i}++;
302
303 my $row = dclone( $sorted_marc_record[$i] );
304
305 # field and subfield which is key for
306 # marc_repeatable_subfield and u
307 my $fsf = $row->[0] . ( $row->[3] || '' );
308
309 if ($debug > 1) {
310
311 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
312 print "### this [$i]: ", dump( $row ),$/;
313 print "### sf: ", $row->[3], " vs ", $field->[3],
314 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
315 if ($#$field >= 0);
316
317 }
318
319 # if field exists
320 if ( $#$field >= 0 ) {
321 if (
322 $row->[0] ne $field->[0] || # field
323 $row->[1] ne $field->[1] || # i1
324 $row->[2] ne $field->[2] # i2
325 ) {
326 push @m, $field;
327 warn "## saved/1 ", dump( $field ),$/ if ($debug);
328 $field = $row;
329
330 } elsif (
331 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
332 ||
333 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
334 ! $marc_repeatable_subfield->{ $fsf }
335 )
336 ) {
337 push @m, $field;
338 warn "## saved/2 ", dump( $field ),$/ if ($debug);
339 $field = $row;
340
341 } else {
342 # append new subfields to existing field
343 push @$field, ( $row->[3], $row->[4] );
344 }
345 } else {
346 # insert first field
347 $field = $row;
348 }
349
350 if (! $marc_repeatable_subfield->{ $fsf }) {
351 # make step to next subfield
352 $i = ($i + $u->{ $fsf } ) % ($len + 1);
353 }
354 }
355
356 if ($#$field >= 0) {
357 push @m, $field;
358 warn "## saved/3 ", dump( $field ),$/ if ($debug);
359 }
360
361 return \@m;
362 }
363
364 =head2 _debug
365
366 Change level of debug warnings
367
368 _debug( 2 );
369
370 =cut
371
372 sub _debug {
373 my $l = shift;
374 return $debug unless defined($l);
375 warn "debug level $l",$/ if ($l > 0);
376 $debug = $l;
377 }
378
379 =head1 Functions to create C<data_structure>
380
381 Those functions generally have to first in your normalization file.
382
383 =head2 tag
384
385 Define new tag for I<search> and I<display>.
386
387 tag('Title', rec('200','a') );
388
389
390 =cut
391
392 sub tag {
393 my $name = shift or die "tag needs name as first argument";
394 my @o = grep { defined($_) && $_ ne '' } @_;
395 return unless (@o);
396 $out->{$name}->{tag} = $name;
397 $out->{$name}->{search} = \@o;
398 $out->{$name}->{display} = \@o;
399 }
400
401 =head2 display
402
403 Define tag just for I<display>
404
405 @v = display('Title', rec('200','a') );
406
407 =cut
408
409 sub display {
410 my $name = shift or die "display needs name as first argument";
411 my @o = grep { defined($_) && $_ ne '' } @_;
412 return unless (@o);
413 $out->{$name}->{tag} = $name;
414 $out->{$name}->{display} = \@o;
415 }
416
417 =head2 search
418
419 Prepare values just for I<search>
420
421 @v = search('Title', rec('200','a') );
422
423 =cut
424
425 sub search {
426 my $name = shift or die "search needs name as first argument";
427 my @o = grep { defined($_) && $_ ne '' } @_;
428 return unless (@o);
429 $out->{$name}->{tag} = $name;
430 $out->{$name}->{search} = \@o;
431 }
432
433 =head2 marc_leader
434
435 Setup fields within MARC leader or get leader
436
437 marc_leader('05','c');
438 my $leader = marc_leader();
439
440 =cut
441
442 sub marc_leader {
443 my ($offset,$value) = @_;
444
445 if ($offset) {
446 $out->{' leader'}->{ $offset } = $value;
447 } else {
448 return $out->{' leader'};
449 }
450 }
451
452 =head2 marc
453
454 Save value for MARC field
455
456 marc('900','a', rec('200','a') );
457 marc('001', rec('000') );
458
459 =cut
460
461 sub marc {
462 my $f = shift or die "marc needs field";
463 die "marc field must be numer" unless ($f =~ /^\d+$/);
464
465 my $sf;
466 if ($f >= 10) {
467 $sf = shift or die "marc needs subfield";
468 }
469
470 foreach (@_) {
471 my $v = $_; # make var read-write for Encode
472 next unless (defined($v) && $v !~ /^\s*$/);
473 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
474 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
475 if (defined $sf) {
476 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
477 } else {
478 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
479 }
480 }
481 }
482
483 =head2 marc_repeatable_subfield
484
485 Save values for MARC repetable subfield
486
487 marc_repeatable_subfield('910', 'z', rec('909') );
488
489 =cut
490
491 sub marc_repeatable_subfield {
492 my ($f,$sf) = @_;
493 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
494 $marc_repeatable_subfield->{ $f . $sf }++;
495 marc(@_);
496 }
497
498 =head2 marc_indicators
499
500 Set both indicators for MARC field
501
502 marc_indicators('900', ' ', 1);
503
504 Any indicator value other than C<0-9> will be treated as undefined.
505
506 =cut
507
508 sub marc_indicators {
509 my $f = shift || die "marc_indicators need field!\n";
510 my ($i1,$i2) = @_;
511 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
512 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
513
514 $i1 = ' ' if ($i1 !~ /^\d$/);
515 $i2 = ' ' if ($i2 !~ /^\d$/);
516 @{ $marc_indicators->{$f} } = ($i1,$i2);
517 }
518
519 =head2 marc_compose
520
521 Save values for each MARC subfield explicitly
522
523 marc_compose('900',
524 'a', rec('200','a')
525 'b', rec('201','a')
526 'a', rec('200','b')
527 'c', rec('200','c')
528 );
529
530 If you specify C<+> for subfield, value will be appended
531 to previous defined subfield.
532
533 =cut
534
535 sub marc_compose {
536 my $f = shift or die "marc_compose needs field";
537 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
538
539 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
540 my $m = [ $f, $i1, $i2 ];
541
542 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
543
544 if ($#_ % 2 != 1) {
545 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
546 }
547
548 while (@_) {
549 my $sf = shift;
550 my $v = shift;
551
552 next unless (defined($v) && $v !~ /^\s*$/);
553 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
554 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
555 if ($sf ne '+') {
556 push @$m, ( $sf, $v );
557 } else {
558 $m->[ $#$m ] .= $v;
559 }
560 }
561
562 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
563
564 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
565 }
566
567 =head2 marc_duplicate
568
569 Generate copy of current MARC record and continue working on copy
570
571 marc_duplicate();
572
573 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
574 C<< _get_marc_fields( offset => 42 ) >>.
575
576 =cut
577
578 sub marc_duplicate {
579 my $m = $marc_record->[ -1 ];
580 die "can't duplicate record which isn't defined" unless ($m);
581 push @{ $marc_record }, dclone( $m );
582 warn "## marc_duplicate = ", dump(@$marc_record), $/ if ($debug > 1);
583 $marc_record_offset = $#{ $marc_record };
584 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
585 }
586
587 =head2 marc_remove
588
589 Remove some field or subfield from MARC record.
590
591 marc_remove('200');
592 marc_remove('200','a');
593
594 This will erase field C<200> or C<200^a> from current MARC record.
595
596 This is useful after calling C<marc_duplicate> or on it's own (but, you
597 should probably just remove that subfield definition if you are not
598 using C<marc_duplicate>).
599
600 FIXME: support fields < 10.
601
602 =cut
603
604 sub marc_remove {
605 my ($f, $sf) = @_;
606
607 die "marc_remove needs record number" unless defined($f);
608
609 my $marc = $marc_record->[ $marc_record_offset ];
610
611 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
612
613 my $i = 0;
614 foreach ( 0 .. $#{ $marc } ) {
615 last unless (defined $marc->[$i]);
616 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
617 if ($marc->[$i]->[0] eq $f) {
618 if (! defined $sf) {
619 # remove whole field
620 splice @$marc, $i, 1;
621 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
622 $i--;
623 } else {
624 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
625 my $o = ($j * 2) + 3;
626 if ($marc->[$i]->[$o] eq $sf) {
627 # remove subfield
628 splice @{$marc->[$i]}, $o, 2;
629 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
630 # is record now empty?
631 if ($#{ $marc->[$i] } == 2) {
632 splice @$marc, $i, 1;
633 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
634 $i--;
635 };
636 }
637 }
638 }
639 }
640 $i++;
641 }
642
643 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
644
645 $marc_record->[ $marc_record_offset ] = $marc;
646
647 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
648 }
649
650 =head2 marc_original_order
651
652 Copy all subfields preserving original order to marc field.
653
654 marc_original_order( marc_field_number, original_input_field_number );
655
656 Please note that field numbers are consistent with other commands (marc
657 field number first), but somewhat counter-intuitive (destination and then
658 source).
659
660 You might want to use this command if you are just renaming subfields or
661 using pre-processing modify_record in C<config.yml> and don't need any
662 post-processing or want to preserve order of original subfields.
663
664
665 =cut
666
667 sub marc_original_order {
668
669 my ($to, $from) = @_;
670 die "marc_original_order needs from and to fields\n" unless ($from && $to);
671
672 return unless defined($rec->{$from});
673
674 my $r = $rec->{$from};
675 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
676
677 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
678 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
679
680 foreach my $d (@$r) {
681
682 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
683 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
684 next;
685 }
686
687 my @sfs = @{ $d->{subfields} };
688
689 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
690
691 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
692
693 my $m = [ $to, $i1, $i2 ];
694
695 while (my $sf = shift @sfs) {
696
697 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
698 my $offset = shift @sfs;
699 die "corrupted sufields specification for field $from\n" unless defined($offset);
700
701 my $v;
702 if (ref($d->{$sf}) eq 'ARRAY') {
703 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
704 } elsif ($offset == 0) {
705 $v = $d->{$sf};
706 } else {
707 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
708 }
709 push @$m, ( $sf, $v ) if (defined($v));
710 }
711
712 if ($#{$m} > 2) {
713 push @{ $marc_record->[ $marc_record_offset ] }, $m;
714 }
715 }
716
717 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
718 }
719
720
721 =head1 Functions to extract data from input
722
723 This function should be used inside functions to create C<data_structure> described
724 above.
725
726 =head2 _pack_subfields_hash
727
728 @values = _pack_subfields_hash( $h, $include_subfields )
729
730 =cut
731
732 sub _pack_subfields_hash {
733
734 warn "## _pack_subfields_hash( ",dump(@_), " )\n";
735
736 my ($h,$include_subfields) = @_;
737
738
739 if ( defined($h->{subfields}) ) {
740 my $sfs = delete $h->{subfields} || die "no subfields?";
741 my @out;
742 while (@$sfs) {
743 my $sf = shift @$sfs;
744 push @out, '^' . $sf if ($include_subfields);
745 my $o = shift @$sfs;
746 if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
747 # single element subfields are not arrays
748 push @out, $h->{$sf};
749 } else {
750 #warn "====> $f $sf $o $#$sfs ", dump( $sfs ), "\n";
751 push @out, $h->{$sf}->[$o];
752 }
753 }
754 return @out;
755 } else {
756 # FIXME this should probably be in alphabetical order instead of hash order
757 values %{$h};
758 }
759 }
760
761 =head2 rec1
762
763 Return all values in some field
764
765 @v = rec1('200')
766
767 TODO: order of values is probably same as in source data, need to investigate that
768
769 =cut
770
771 sub rec1 {
772 my $f = shift;
773 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
774 return unless (defined($rec) && defined($rec->{$f}));
775 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
776 if (ref($rec->{$f}) eq 'ARRAY') {
777 my @out;
778 foreach my $h ( @{ $rec->{$f} } ) {
779 if (ref($h) eq 'HASH') {
780 warn "rec1 hash: ",dump($h),"\n";
781 push @out, ( _pack_subfields_hash( $h ) );
782 } else {
783 push @out, $h;
784 }
785 }
786 return @out;
787 } elsif( defined($rec->{$f}) ) {
788 return $rec->{$f};
789 }
790 }
791
792 =head2 rec2
793
794 Return all values in specific field and subfield
795
796 @v = rec2('200','a')
797
798 =cut
799
800 sub rec2 {
801 my $f = shift;
802 return unless (defined($rec && $rec->{$f}));
803 my $sf = shift;
804 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
805 return map {
806 if (ref($_->{$sf}) eq 'ARRAY') {
807 @{ $_->{$sf} };
808 } else {
809 $_->{$sf};
810 }
811 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
812 }
813
814 =head2 rec
815
816 syntaxtic sugar for
817
818 @v = rec('200')
819 @v = rec('200','a')
820
821 =cut
822
823 sub rec {
824 my @out;
825 if ($#_ == 0) {
826 @out = rec1(@_);
827 } elsif ($#_ == 1) {
828 @out = rec2(@_);
829 }
830 if (@out) {
831 return @out;
832 } else {
833 return '';
834 }
835 }
836
837 =head2 regex
838
839 Apply regex to some or all values
840
841 @v = regex( 's/foo/bar/g', @v );
842
843 =cut
844
845 sub regex {
846 my $r = shift;
847 my @out;
848 #warn "r: $r\n", dump(\@_);
849 foreach my $t (@_) {
850 next unless ($t);
851 eval "\$t =~ $r";
852 push @out, $t if ($t && $t ne '');
853 }
854 return @out;
855 }
856
857 =head2 prefix
858
859 Prefix all values with a string
860
861 @v = prefix( 'my_', @v );
862
863 =cut
864
865 sub prefix {
866 my $p = shift or return;
867 return map { $p . $_ } grep { defined($_) } @_;
868 }
869
870 =head2 suffix
871
872 suffix all values with a string
873
874 @v = suffix( '_my', @v );
875
876 =cut
877
878 sub suffix {
879 my $s = shift or die "suffix needs string as first argument";
880 return map { $_ . $s } grep { defined($_) } @_;
881 }
882
883 =head2 surround
884
885 surround all values with a two strings
886
887 @v = surround( 'prefix_', '_suffix', @v );
888
889 =cut
890
891 sub surround {
892 my $p = shift or die "surround need prefix as first argument";
893 my $s = shift or die "surround needs suffix as second argument";
894 return map { $p . $_ . $s } grep { defined($_) } @_;
895 }
896
897 =head2 first
898
899 Return first element
900
901 $v = first( @v );
902
903 =cut
904
905 sub first {
906 my $r = shift;
907 return $r;
908 }
909
910 =head2 lookup
911
912 Consult lookup hashes for some value
913
914 @v = lookup( $v );
915 @v = lookup( @v );
916
917 =cut
918
919 sub lookup {
920 my $k = shift or return;
921 return unless (defined($lookup->{$k}));
922 if (ref($lookup->{$k}) eq 'ARRAY') {
923 return @{ $lookup->{$k} };
924 } else {
925 return $lookup->{$k};
926 }
927 }
928
929 =head2 config
930
931 Consult config values stored in C<config.yml>
932
933 # return database code (key under databases in yaml)
934 $database_code = config(); # use _ from hash
935 $database_name = config('name');
936 $database_input_name = config('input name');
937 $tag = config('input normalize tag');
938
939 Up to three levels are supported.
940
941 =cut
942
943 sub config {
944 return unless ($config);
945
946 my $p = shift;
947
948 $p ||= '';
949
950 my $v;
951
952 warn "### getting config($p)\n" if ($debug > 1);
953
954 my @p = split(/\s+/,$p);
955 if ($#p < 0) {
956 $v = $config->{ '_' }; # special, database code
957 } else {
958
959 my $c = dclone( $config );
960
961 foreach my $k (@p) {
962 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
963 if (ref($c) eq 'ARRAY') {
964 $c = shift @$c;
965 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
966 last;
967 }
968
969 if (! defined($c->{$k}) ) {
970 $c = undef;
971 last;
972 } else {
973 $c = $c->{$k};
974 }
975 }
976 $v = $c if ($c);
977
978 }
979
980 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
981 warn "config( '$p' ) is empty\n" if (! $v);
982
983 return $v;
984 }
985
986 =head2 id
987
988 Returns unique id of this record
989
990 $id = id();
991
992 Returns C<42/2> for 2nd occurence of MFN 42.
993
994 =cut
995
996 sub id {
997 my $mfn = $config->{_mfn} || die "no _mfn in config data";
998 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
999 }
1000
1001 =head2 join_with
1002
1003 Joins walues with some delimiter
1004
1005 $v = join_with(", ", @v);
1006
1007 =cut
1008
1009 sub join_with {
1010 my $d = shift;
1011 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1012 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1013 return '' unless defined($v);
1014 return $v;
1015 }
1016
1017 =head2 split_rec_on
1018
1019 Split record subfield on some regex and take one of parts out
1020
1021 $a_before_semi_column =
1022 split_rec_on('200','a', /\s*;\s*/, $part);
1023
1024 C<$part> is optional number of element. First element is
1025 B<1>, not 0!
1026
1027 If there is no C<$part> parameter or C<$part> is 0, this function will
1028 return all values produced by splitting.
1029
1030 =cut
1031
1032 sub split_rec_on {
1033 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1034
1035 my ($fld, $sf, $regex, $part) = @_;
1036 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1037
1038 my @r = rec( $fld, $sf );
1039 my $v = shift @r;
1040 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1041
1042 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1043
1044 my @s = split( $regex, $v );
1045 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1046 if ($part && $part > 0) {
1047 return $s[ $part - 1 ];
1048 } else {
1049 return @s;
1050 }
1051 }
1052
1053 # END
1054 1;

  ViewVC Help
Powered by ViewVC 1.1.26