/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 595 - (show annotations)
Mon Jul 10 10:16:11 2006 UTC (17 years, 9 months ago) by dpavlin
File size: 19231 byte(s)
 r827@llin:  dpavlin | 2006-07-10 12:17:16 +0200
 add config() and id() to WebPAC::Normalize

1 package WebPAC::Normalize;
2 use Exporter 'import';
3 @EXPORT = qw/
4 _set_rec _set_lookup
5 _get_ds _clean_ds
6 _debug
7
8 tag search display
9 marc marc_indicators marc_repeatable_subfield
10 marc_compose marc_leader
11 marc_duplicate marc_remove
12
13 rec1 rec2 rec
14 regex prefix suffix surround
15 first lookup join_with
16
17 split_rec_on
18 /;
19
20 use warnings;
21 use strict;
22
23 #use base qw/WebPAC::Common/;
24 use Data::Dump qw/dump/;
25 use Encode qw/from_to/;
26 use Storable qw/dclone/;
27
28 # debugging warn(s)
29 my $debug = 0;
30
31
32 =head1 NAME
33
34 WebPAC::Normalize - describe normalisaton rules using sets
35
36 =head1 VERSION
37
38 Version 0.14
39
40 =cut
41
42 our $VERSION = '0.14';
43
44 =head1 SYNOPSIS
45
46 This module uses C<conf/normalize/*.pl> files to perform normalisation
47 from input records using perl functions which are specialized for set
48 processing.
49
50 Sets are implemented as arrays, and normalisation file is valid perl, which
51 means that you check it's validity before running WebPAC using
52 C<perl -c normalize.pl>.
53
54 Normalisation can generate multiple output normalized data. For now, supported output
55 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
56 C<marc>.
57
58 =head1 FUNCTIONS
59
60 Functions which start with C<_> are private and used by WebPAC internally.
61 All other functions are available for use within normalisation rules.
62
63 =head2 data_structure
64
65 Return data structure
66
67 my $ds = WebPAC::Normalize::data_structure(
68 lookup => $lookup->lookup_hash,
69 row => $row,
70 rules => $normalize_pl_config,
71 marc_encoding => 'utf-8',
72 config => $config,
73 );
74
75 Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
76 other are optional.
77
78 This function will B<die> if normalizastion can't be evaled.
79
80 Since this function isn't exported you have to call it with
81 C<WebPAC::Normalize::data_structure>.
82
83 =cut
84
85 sub data_structure {
86 my $arg = {@_};
87
88 die "need row argument" unless ($arg->{row});
89 die "need normalisation argument" unless ($arg->{rules});
90
91 no strict 'subs';
92 _set_lookup( $arg->{lookup} );
93 _set_rec( $arg->{row} );
94 _set_config( $arg->{config} );
95 _clean_ds( %{ $arg } );
96 eval "$arg->{rules}";
97 die "error evaling $arg->{rules}: $@\n" if ($@);
98
99 return _get_ds();
100 }
101
102 =head2 _set_rec
103
104 Set current record hash
105
106 _set_rec( $rec );
107
108 =cut
109
110 my $rec;
111
112 sub _set_rec {
113 $rec = shift or die "no record hash";
114 }
115
116 =head2 _set_config
117
118 Set current config hash
119
120 _set_config( $config );
121
122 Magic keys are:
123
124 =over 4
125
126 =item _
127
128 Code of current database
129
130 =item _mfn
131
132 Current MFN
133
134 =back
135
136 =cut
137
138 my $config;
139
140 sub _set_config {
141 $config = shift;
142 }
143
144 =head2 _get_ds
145
146 Return hash formatted as data structure
147
148 my $ds = _get_ds();
149
150 =cut
151
152 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);
153 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
154
155 sub _get_ds {
156 return $out;
157 }
158
159 =head2 _clean_ds
160
161 Clean data structure hash for next record
162
163 _clean_ds();
164
165 =cut
166
167 sub _clean_ds {
168 my $a = {@_};
169 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();
170 ($marc_record_offset, $marc_fetch_offset) = (0,0);
171 $marc_encoding = $a->{marc_encoding};
172 }
173
174 =head2 _set_lookup
175
176 Set current lookup hash
177
178 _set_lookup( $lookup );
179
180 =cut
181
182 my $lookup;
183
184 sub _set_lookup {
185 $lookup = shift;
186 }
187
188 =head2 _get_marc_fields
189
190 Get all fields defined by calls to C<marc>
191
192 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
193
194 We are using I<magic> which detect repeatable fields only from
195 sequence of field/subfield data generated by normalization.
196
197 Repeatable field is created when there is second occurence of same subfield or
198 if any of indicators are different.
199
200 This is sane for most cases. Something like:
201
202 900a-1 900b-1 900c-1
203 900a-2 900b-2
204 900a-3
205
206 will be created from any combination of:
207
208 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
209
210 and following rules:
211
212 marc('900','a', rec('200','a') );
213 marc('900','b', rec('200','b') );
214 marc('900','c', rec('200','c') );
215
216 which might not be what you have in mind. If you need repeatable subfield,
217 define it using C<marc_repeatable_subfield> like this:
218
219 marc_repeatable_subfield('900','a');
220 marc('900','a', rec('200','a') );
221 marc('900','b', rec('200','b') );
222 marc('900','c', rec('200','c') );
223
224 will create:
225
226 900a-1 900a-2 900a-3 900b-1 900c-1
227 900b-2
228
229 There is also support for returning next or specific using:
230
231 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
232 # do something with $mf
233 }
234
235 will always return fields from next MARC record or
236
237 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
238
239 will return 42th copy record (if it exists).
240
241 =cut
242
243 sub _get_marc_fields {
244
245 my $arg = {@_};
246 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
247 my $offset = $marc_fetch_offset;
248 if ($arg->{offset}) {
249 $offset = $arg->{offset};
250 } elsif($arg->{fetch_next}) {
251 $marc_fetch_offset++;
252 }
253
254 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
255
256 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
257
258 my $marc_rec = $marc_record->[ $offset ];
259
260 warn "## _get_marc_fields (at offset: $offset) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
261
262 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
263
264 # first, sort all existing fields
265 # XXX might not be needed, but modern perl might randomize elements in hash
266 my @sorted_marc_record = sort {
267 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
268 } @{ $marc_rec };
269
270 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
271
272 # output marc fields
273 my @m;
274
275 # count unique field-subfields (used for offset when walking to next subfield)
276 my $u;
277 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
278
279 if ($debug) {
280 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
281 warn "## marc_record[$offset] = ", dump( $marc_rec ), $/;
282 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
283 warn "## subfield count = ", dump( $u ), $/;
284 }
285
286 my $len = $#sorted_marc_record;
287 my $visited;
288 my $i = 0;
289 my $field;
290
291 foreach ( 0 .. $len ) {
292
293 # find next element which isn't visited
294 while ($visited->{$i}) {
295 $i = ($i + 1) % ($len + 1);
296 }
297
298 # mark it visited
299 $visited->{$i}++;
300
301 my $row = dclone( $sorted_marc_record[$i] );
302
303 # field and subfield which is key for
304 # marc_repeatable_subfield and u
305 my $fsf = $row->[0] . ( $row->[3] || '' );
306
307 if ($debug > 1) {
308
309 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
310 print "### this [$i]: ", dump( $row ),$/;
311 print "### sf: ", $row->[3], " vs ", $field->[3],
312 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
313 if ($#$field >= 0);
314
315 }
316
317 # if field exists
318 if ( $#$field >= 0 ) {
319 if (
320 $row->[0] ne $field->[0] || # field
321 $row->[1] ne $field->[1] || # i1
322 $row->[2] ne $field->[2] # i2
323 ) {
324 push @m, $field;
325 warn "## saved/1 ", dump( $field ),$/ if ($debug);
326 $field = $row;
327
328 } elsif (
329 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
330 ||
331 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
332 ! $marc_repeatable_subfield->{ $fsf }
333 )
334 ) {
335 push @m, $field;
336 warn "## saved/2 ", dump( $field ),$/ if ($debug);
337 $field = $row;
338
339 } else {
340 # append new subfields to existing field
341 push @$field, ( $row->[3], $row->[4] );
342 }
343 } else {
344 # insert first field
345 $field = $row;
346 }
347
348 if (! $marc_repeatable_subfield->{ $fsf }) {
349 # make step to next subfield
350 $i = ($i + $u->{ $fsf } ) % ($len + 1);
351 }
352 }
353
354 if ($#$field >= 0) {
355 push @m, $field;
356 warn "## saved/3 ", dump( $field ),$/ if ($debug);
357 }
358
359 return \@m;
360 }
361
362 =head2 _debug
363
364 Change level of debug warnings
365
366 _debug( 2 );
367
368 =cut
369
370 sub _debug {
371 my $l = shift;
372 return $debug unless defined($l);
373 warn "debug level $l",$/ if ($l > 0);
374 $debug = $l;
375 }
376
377 =head1 Functions to create C<data_structure>
378
379 Those functions generally have to first in your normalization file.
380
381 =head2 tag
382
383 Define new tag for I<search> and I<display>.
384
385 tag('Title', rec('200','a') );
386
387
388 =cut
389
390 sub tag {
391 my $name = shift or die "tag needs name as first argument";
392 my @o = grep { defined($_) && $_ ne '' } @_;
393 return unless (@o);
394 $out->{$name}->{tag} = $name;
395 $out->{$name}->{search} = \@o;
396 $out->{$name}->{display} = \@o;
397 }
398
399 =head2 display
400
401 Define tag just for I<display>
402
403 @v = display('Title', rec('200','a') );
404
405 =cut
406
407 sub display {
408 my $name = shift or die "display needs name as first argument";
409 my @o = grep { defined($_) && $_ ne '' } @_;
410 return unless (@o);
411 $out->{$name}->{tag} = $name;
412 $out->{$name}->{display} = \@o;
413 }
414
415 =head2 search
416
417 Prepare values just for I<search>
418
419 @v = search('Title', rec('200','a') );
420
421 =cut
422
423 sub search {
424 my $name = shift or die "search needs name as first argument";
425 my @o = grep { defined($_) && $_ ne '' } @_;
426 return unless (@o);
427 $out->{$name}->{tag} = $name;
428 $out->{$name}->{search} = \@o;
429 }
430
431 =head2 marc_leader
432
433 Setup fields within MARC leader or get leader
434
435 marc_leader('05','c');
436 my $leader = marc_leader();
437
438 =cut
439
440 sub marc_leader {
441 my ($offset,$value) = @_;
442
443 if ($offset) {
444 $out->{' leader'}->{ $offset } = $value;
445 } else {
446 return $out->{' leader'};
447 }
448 }
449
450 =head2 marc
451
452 Save value for MARC field
453
454 marc('900','a', rec('200','a') );
455 marc('001', rec('000') );
456
457 =cut
458
459 sub marc {
460 my $f = shift or die "marc needs field";
461 die "marc field must be numer" unless ($f =~ /^\d+$/);
462
463 my $sf;
464 if ($f >= 10) {
465 $sf = shift or die "marc needs subfield";
466 }
467
468 foreach (@_) {
469 my $v = $_; # make var read-write for Encode
470 next unless (defined($v) && $v !~ /^\s*$/);
471 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
472 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
473 if (defined $sf) {
474 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
475 } else {
476 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
477 }
478 }
479 }
480
481 =head2 marc_repeatable_subfield
482
483 Save values for MARC repetable subfield
484
485 marc_repeatable_subfield('910', 'z', rec('909') );
486
487 =cut
488
489 sub marc_repeatable_subfield {
490 my ($f,$sf) = @_;
491 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
492 $marc_repeatable_subfield->{ $f . $sf }++;
493 marc(@_);
494 }
495
496 =head2 marc_indicators
497
498 Set both indicators for MARC field
499
500 marc_indicators('900', ' ', 1);
501
502 Any indicator value other than C<0-9> will be treated as undefined.
503
504 =cut
505
506 sub marc_indicators {
507 my $f = shift || die "marc_indicators need field!\n";
508 my ($i1,$i2) = @_;
509 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
510 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
511
512 $i1 = ' ' if ($i1 !~ /^\d$/);
513 $i2 = ' ' if ($i2 !~ /^\d$/);
514 @{ $marc_indicators->{$f} } = ($i1,$i2);
515 }
516
517 =head2 marc_compose
518
519 Save values for each MARC subfield explicitly
520
521 marc_compose('900',
522 'a', rec('200','a')
523 'b', rec('201','a')
524 'a', rec('200','b')
525 'c', rec('200','c')
526 );
527
528 =cut
529
530 sub marc_compose {
531 my $f = shift or die "marc_compose needs field";
532 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
533
534 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
535 my $m = [ $f, $i1, $i2 ];
536
537 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
538
539 while (@_) {
540 my $sf = shift or die "marc_compose $f needs subfield";
541 my $v = shift;
542
543 next unless (defined($v) && $v !~ /^\s*$/);
544 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
545 push @$m, ( $sf, $v );
546 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
547 }
548
549 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
550
551 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
552 }
553
554 =head2 marc_duplicate
555
556 Generate copy of current MARC record and continue working on copy
557
558 marc_duplicate();
559
560 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
561 C<< _get_marc_fields( offset => 42 ) >>.
562
563 =cut
564
565 sub marc_duplicate {
566 my $m = $marc_record->[ -1 ];
567 die "can't duplicate record which isn't defined" unless ($m);
568 push @{ $marc_record }, dclone( $m );
569 warn "## marc_duplicate = ", dump(@$marc_record), $/ if ($debug > 1);
570 $marc_record_offset = $#{ $marc_record };
571 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
572 }
573
574 =head2 marc_remove
575
576 Remove some field or subfield from MARC record.
577
578 marc_remove('200');
579 marc_remove('200','a');
580
581 This will erase field C<200> or C<200^a> from current MARC record.
582
583 This is useful after calling C<marc_duplicate> or on it's own (but, you
584 should probably just remove that subfield definition if you are not
585 using C<marc_duplicate>).
586
587 FIXME: support fields < 10.
588
589 =cut
590
591 sub marc_remove {
592 my ($f, $sf) = @_;
593
594 die "marc_remove needs record number" unless defined($f);
595
596 my $marc = $marc_record->[ $marc_record_offset ];
597
598 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
599
600 my $i = 0;
601 foreach ( 0 .. $#{ $marc } ) {
602 last unless (defined $marc->[$i]);
603 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
604 if ($marc->[$i]->[0] eq $f) {
605 if (! defined $sf) {
606 # remove whole field
607 splice @$marc, $i, 1;
608 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
609 $i--;
610 } else {
611 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
612 my $o = ($j * 2) + 3;
613 if ($marc->[$i]->[$o] eq $sf) {
614 # remove subfield
615 splice @{$marc->[$i]}, $o, 2;
616 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
617 # is record now empty?
618 if ($#{ $marc->[$i] } == 2) {
619 splice @$marc, $i, 1;
620 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
621 $i--;
622 };
623 }
624 }
625 }
626 }
627 $i++;
628 }
629
630 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
631
632 $marc_record->[ $marc_record_offset ] = $marc;
633
634 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
635 }
636
637 =head1 Functions to extract data from input
638
639 This function should be used inside functions to create C<data_structure> described
640 above.
641
642 =head2 rec1
643
644 Return all values in some field
645
646 @v = rec1('200')
647
648 TODO: order of values is probably same as in source data, need to investigate that
649
650 =cut
651
652 sub rec1 {
653 my $f = shift;
654 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
655 return unless (defined($rec) && defined($rec->{$f}));
656 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
657 if (ref($rec->{$f}) eq 'ARRAY') {
658 return map {
659 if (ref($_) eq 'HASH') {
660 values %{$_};
661 } else {
662 $_;
663 }
664 } @{ $rec->{$f} };
665 } elsif( defined($rec->{$f}) ) {
666 return $rec->{$f};
667 }
668 }
669
670 =head2 rec2
671
672 Return all values in specific field and subfield
673
674 @v = rec2('200','a')
675
676 =cut
677
678 sub rec2 {
679 my $f = shift;
680 return unless (defined($rec && $rec->{$f}));
681 my $sf = shift;
682 return map {
683 if (ref($_->{$sf}) eq 'ARRAY') {
684 @{ $_->{$sf} };
685 } else {
686 $_->{$sf};
687 }
688 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
689 }
690
691 =head2 rec
692
693 syntaxtic sugar for
694
695 @v = rec('200')
696 @v = rec('200','a')
697
698 =cut
699
700 sub rec {
701 my @out;
702 if ($#_ == 0) {
703 @out = rec1(@_);
704 } elsif ($#_ == 1) {
705 @out = rec2(@_);
706 }
707 if (@out) {
708 return @out;
709 } else {
710 return '';
711 }
712 }
713
714 =head2 regex
715
716 Apply regex to some or all values
717
718 @v = regex( 's/foo/bar/g', @v );
719
720 =cut
721
722 sub regex {
723 my $r = shift;
724 my @out;
725 #warn "r: $r\n", dump(\@_);
726 foreach my $t (@_) {
727 next unless ($t);
728 eval "\$t =~ $r";
729 push @out, $t if ($t && $t ne '');
730 }
731 return @out;
732 }
733
734 =head2 prefix
735
736 Prefix all values with a string
737
738 @v = prefix( 'my_', @v );
739
740 =cut
741
742 sub prefix {
743 my $p = shift or return;
744 return map { $p . $_ } grep { defined($_) } @_;
745 }
746
747 =head2 suffix
748
749 suffix all values with a string
750
751 @v = suffix( '_my', @v );
752
753 =cut
754
755 sub suffix {
756 my $s = shift or die "suffix needs string as first argument";
757 return map { $_ . $s } grep { defined($_) } @_;
758 }
759
760 =head2 surround
761
762 surround all values with a two strings
763
764 @v = surround( 'prefix_', '_suffix', @v );
765
766 =cut
767
768 sub surround {
769 my $p = shift or die "surround need prefix as first argument";
770 my $s = shift or die "surround needs suffix as second argument";
771 return map { $p . $_ . $s } grep { defined($_) } @_;
772 }
773
774 =head2 first
775
776 Return first element
777
778 $v = first( @v );
779
780 =cut
781
782 sub first {
783 my $r = shift;
784 return $r;
785 }
786
787 =head2 lookup
788
789 Consult lookup hashes for some value
790
791 @v = lookup( $v );
792 @v = lookup( @v );
793
794 =cut
795
796 sub lookup {
797 my $k = shift or return;
798 return unless (defined($lookup->{$k}));
799 if (ref($lookup->{$k}) eq 'ARRAY') {
800 return @{ $lookup->{$k} };
801 } else {
802 return $lookup->{$k};
803 }
804 }
805
806 =head2 config
807
808 Consult config values stored in C<config.yml>
809
810 # return database code (key under databases in yaml)
811 $database_code = config(); # use _ from hash
812 $database_name = config('name');
813 $database_input_name = config('input name');
814 $tag = config('input normalize tag');
815
816 Up to three levels are supported.
817
818 =cut
819
820 sub config {
821 return unless ($config);
822
823 my $p = shift;
824
825 $p ||= '';
826
827 my $v;
828
829 warn "### getting config($p)\n" if ($debug > 1);
830
831 my @p = split(/\s+/,$p);
832 if ($#p < 0) {
833 $v = $config->{ '_' }; # special, database code
834 } else {
835
836 my $c = dclone( $config );
837
838 foreach my $k (@p) {
839 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
840 if (ref($c) eq 'ARRAY') {
841 $c = shift @$c;
842 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
843 last;
844 }
845
846 if (! defined($c->{$k}) ) {
847 $c = undef;
848 last;
849 } else {
850 $c = $c->{$k};
851 }
852 }
853 $v = $c if ($c);
854
855 }
856
857 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
858 warn "config( '$p' ) is empty\n" if (! $v);
859
860 return $v;
861 }
862
863 =head2 id
864
865 Returns unique id of this record
866
867 $id = id();
868
869 Returns C<42/2> for 2nd occurence of MFN 42.
870
871 =cut
872
873 sub id {
874 my $mfn = $config->{_mfn} || die "no _mfn in config data";
875 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
876 }
877
878 =head2 join_with
879
880 Joins walues with some delimiter
881
882 $v = join_with(", ", @v);
883
884 =cut
885
886 sub join_with {
887 my $d = shift;
888 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
889 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
890 return '' unless defined($v);
891 return $v;
892 }
893
894 =head2 split_rec_on
895
896 Split record subfield on some regex and take one of parts out
897
898 $a_before_semi_column =
899 split_rec_on('200','a', /\s*;\s*/, $part);
900
901 C<$part> is optional number of element. First element is
902 B<1>, not 0!
903
904 If there is no C<$part> parameter or C<$part> is 0, this function will
905 return all values produced by splitting.
906
907 =cut
908
909 sub split_rec_on {
910 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
911
912 my ($fld, $sf, $regex, $part) = @_;
913 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
914
915 my @r = rec( $fld, $sf );
916 my $v = shift @r;
917 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
918
919 return '' if( ! defined($v) || $v =~ /^\s*$/);
920
921 my @s = split( $regex, $v );
922 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
923 if ($part && $part > 0) {
924 return $s[ $part - 1 ];
925 } else {
926 return @s;
927 }
928 }
929
930 # END
931 1;

  ViewVC Help
Powered by ViewVC 1.1.26