/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 601 - (show annotations)
Sun Jul 23 17:33:11 2006 UTC (17 years, 9 months ago) by dpavlin
File size: 19296 byte(s)
added _debug(2) output to rec2(...)

1 package WebPAC::Normalize;
2 use Exporter 'import';
3 @EXPORT = qw/
4 _set_rec _set_lookup
5 _get_ds _clean_ds
6 _debug
7
8 tag search display
9 marc marc_indicators marc_repeatable_subfield
10 marc_compose marc_leader
11 marc_duplicate marc_remove
12
13 rec1 rec2 rec
14 regex prefix suffix surround
15 first lookup join_with
16
17 split_rec_on
18 /;
19
20 use warnings;
21 use strict;
22
23 #use base qw/WebPAC::Common/;
24 use Data::Dump qw/dump/;
25 use Encode qw/from_to/;
26 use Storable qw/dclone/;
27
28 # debugging warn(s)
29 my $debug = 0;
30
31
32 =head1 NAME
33
34 WebPAC::Normalize - describe normalisaton rules using sets
35
36 =head1 VERSION
37
38 Version 0.14
39
40 =cut
41
42 our $VERSION = '0.14';
43
44 =head1 SYNOPSIS
45
46 This module uses C<conf/normalize/*.pl> files to perform normalisation
47 from input records using perl functions which are specialized for set
48 processing.
49
50 Sets are implemented as arrays, and normalisation file is valid perl, which
51 means that you check it's validity before running WebPAC using
52 C<perl -c normalize.pl>.
53
54 Normalisation can generate multiple output normalized data. For now, supported output
55 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
56 C<marc>.
57
58 =head1 FUNCTIONS
59
60 Functions which start with C<_> are private and used by WebPAC internally.
61 All other functions are available for use within normalisation rules.
62
63 =head2 data_structure
64
65 Return data structure
66
67 my $ds = WebPAC::Normalize::data_structure(
68 lookup => $lookup->lookup_hash,
69 row => $row,
70 rules => $normalize_pl_config,
71 marc_encoding => 'utf-8',
72 config => $config,
73 );
74
75 Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
76 other are optional.
77
78 This function will B<die> if normalizastion can't be evaled.
79
80 Since this function isn't exported you have to call it with
81 C<WebPAC::Normalize::data_structure>.
82
83 =cut
84
85 sub data_structure {
86 my $arg = {@_};
87
88 die "need row argument" unless ($arg->{row});
89 die "need normalisation argument" unless ($arg->{rules});
90
91 no strict 'subs';
92 _set_lookup( $arg->{lookup} );
93 _set_rec( $arg->{row} );
94 _set_config( $arg->{config} );
95 _clean_ds( %{ $arg } );
96 eval "$arg->{rules}";
97 die "error evaling $arg->{rules}: $@\n" if ($@);
98
99 return _get_ds();
100 }
101
102 =head2 _set_rec
103
104 Set current record hash
105
106 _set_rec( $rec );
107
108 =cut
109
110 my $rec;
111
112 sub _set_rec {
113 $rec = shift or die "no record hash";
114 }
115
116 =head2 _set_config
117
118 Set current config hash
119
120 _set_config( $config );
121
122 Magic keys are:
123
124 =over 4
125
126 =item _
127
128 Code of current database
129
130 =item _mfn
131
132 Current MFN
133
134 =back
135
136 =cut
137
138 my $config;
139
140 sub _set_config {
141 $config = shift;
142 }
143
144 =head2 _get_ds
145
146 Return hash formatted as data structure
147
148 my $ds = _get_ds();
149
150 =cut
151
152 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);
153 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
154
155 sub _get_ds {
156 return $out;
157 }
158
159 =head2 _clean_ds
160
161 Clean data structure hash for next record
162
163 _clean_ds();
164
165 =cut
166
167 sub _clean_ds {
168 my $a = {@_};
169 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();
170 ($marc_record_offset, $marc_fetch_offset) = (0,0);
171 $marc_encoding = $a->{marc_encoding};
172 }
173
174 =head2 _set_lookup
175
176 Set current lookup hash
177
178 _set_lookup( $lookup );
179
180 =cut
181
182 my $lookup;
183
184 sub _set_lookup {
185 $lookup = shift;
186 }
187
188 =head2 _get_marc_fields
189
190 Get all fields defined by calls to C<marc>
191
192 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
193
194 We are using I<magic> which detect repeatable fields only from
195 sequence of field/subfield data generated by normalization.
196
197 Repeatable field is created when there is second occurence of same subfield or
198 if any of indicators are different.
199
200 This is sane for most cases. Something like:
201
202 900a-1 900b-1 900c-1
203 900a-2 900b-2
204 900a-3
205
206 will be created from any combination of:
207
208 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
209
210 and following rules:
211
212 marc('900','a', rec('200','a') );
213 marc('900','b', rec('200','b') );
214 marc('900','c', rec('200','c') );
215
216 which might not be what you have in mind. If you need repeatable subfield,
217 define it using C<marc_repeatable_subfield> like this:
218
219 marc_repeatable_subfield('900','a');
220 marc('900','a', rec('200','a') );
221 marc('900','b', rec('200','b') );
222 marc('900','c', rec('200','c') );
223
224 will create:
225
226 900a-1 900a-2 900a-3 900b-1 900c-1
227 900b-2
228
229 There is also support for returning next or specific using:
230
231 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
232 # do something with $mf
233 }
234
235 will always return fields from next MARC record or
236
237 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
238
239 will return 42th copy record (if it exists).
240
241 =cut
242
243 sub _get_marc_fields {
244
245 my $arg = {@_};
246 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
247 my $offset = $marc_fetch_offset;
248 if ($arg->{offset}) {
249 $offset = $arg->{offset};
250 } elsif($arg->{fetch_next}) {
251 $marc_fetch_offset++;
252 }
253
254 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
255
256 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
257
258 my $marc_rec = $marc_record->[ $offset ];
259
260 warn "## _get_marc_fields (at offset: $offset) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
261
262 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
263
264 # first, sort all existing fields
265 # XXX might not be needed, but modern perl might randomize elements in hash
266 my @sorted_marc_record = sort {
267 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
268 } @{ $marc_rec };
269
270 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
271
272 # output marc fields
273 my @m;
274
275 # count unique field-subfields (used for offset when walking to next subfield)
276 my $u;
277 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
278
279 if ($debug) {
280 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
281 warn "## marc_record[$offset] = ", dump( $marc_rec ), $/;
282 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
283 warn "## subfield count = ", dump( $u ), $/;
284 }
285
286 my $len = $#sorted_marc_record;
287 my $visited;
288 my $i = 0;
289 my $field;
290
291 foreach ( 0 .. $len ) {
292
293 # find next element which isn't visited
294 while ($visited->{$i}) {
295 $i = ($i + 1) % ($len + 1);
296 }
297
298 # mark it visited
299 $visited->{$i}++;
300
301 my $row = dclone( $sorted_marc_record[$i] );
302
303 # field and subfield which is key for
304 # marc_repeatable_subfield and u
305 my $fsf = $row->[0] . ( $row->[3] || '' );
306
307 if ($debug > 1) {
308
309 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
310 print "### this [$i]: ", dump( $row ),$/;
311 print "### sf: ", $row->[3], " vs ", $field->[3],
312 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
313 if ($#$field >= 0);
314
315 }
316
317 # if field exists
318 if ( $#$field >= 0 ) {
319 if (
320 $row->[0] ne $field->[0] || # field
321 $row->[1] ne $field->[1] || # i1
322 $row->[2] ne $field->[2] # i2
323 ) {
324 push @m, $field;
325 warn "## saved/1 ", dump( $field ),$/ if ($debug);
326 $field = $row;
327
328 } elsif (
329 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
330 ||
331 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
332 ! $marc_repeatable_subfield->{ $fsf }
333 )
334 ) {
335 push @m, $field;
336 warn "## saved/2 ", dump( $field ),$/ if ($debug);
337 $field = $row;
338
339 } else {
340 # append new subfields to existing field
341 push @$field, ( $row->[3], $row->[4] );
342 }
343 } else {
344 # insert first field
345 $field = $row;
346 }
347
348 if (! $marc_repeatable_subfield->{ $fsf }) {
349 # make step to next subfield
350 $i = ($i + $u->{ $fsf } ) % ($len + 1);
351 }
352 }
353
354 if ($#$field >= 0) {
355 push @m, $field;
356 warn "## saved/3 ", dump( $field ),$/ if ($debug);
357 }
358
359 return \@m;
360 }
361
362 =head2 _debug
363
364 Change level of debug warnings
365
366 _debug( 2 );
367
368 =cut
369
370 sub _debug {
371 my $l = shift;
372 return $debug unless defined($l);
373 warn "debug level $l",$/ if ($l > 0);
374 $debug = $l;
375 }
376
377 =head1 Functions to create C<data_structure>
378
379 Those functions generally have to first in your normalization file.
380
381 =head2 tag
382
383 Define new tag for I<search> and I<display>.
384
385 tag('Title', rec('200','a') );
386
387
388 =cut
389
390 sub tag {
391 my $name = shift or die "tag needs name as first argument";
392 my @o = grep { defined($_) && $_ ne '' } @_;
393 return unless (@o);
394 $out->{$name}->{tag} = $name;
395 $out->{$name}->{search} = \@o;
396 $out->{$name}->{display} = \@o;
397 }
398
399 =head2 display
400
401 Define tag just for I<display>
402
403 @v = display('Title', rec('200','a') );
404
405 =cut
406
407 sub display {
408 my $name = shift or die "display needs name as first argument";
409 my @o = grep { defined($_) && $_ ne '' } @_;
410 return unless (@o);
411 $out->{$name}->{tag} = $name;
412 $out->{$name}->{display} = \@o;
413 }
414
415 =head2 search
416
417 Prepare values just for I<search>
418
419 @v = search('Title', rec('200','a') );
420
421 =cut
422
423 sub search {
424 my $name = shift or die "search needs name as first argument";
425 my @o = grep { defined($_) && $_ ne '' } @_;
426 return unless (@o);
427 $out->{$name}->{tag} = $name;
428 $out->{$name}->{search} = \@o;
429 }
430
431 =head2 marc_leader
432
433 Setup fields within MARC leader or get leader
434
435 marc_leader('05','c');
436 my $leader = marc_leader();
437
438 =cut
439
440 sub marc_leader {
441 my ($offset,$value) = @_;
442
443 if ($offset) {
444 $out->{' leader'}->{ $offset } = $value;
445 } else {
446 return $out->{' leader'};
447 }
448 }
449
450 =head2 marc
451
452 Save value for MARC field
453
454 marc('900','a', rec('200','a') );
455 marc('001', rec('000') );
456
457 =cut
458
459 sub marc {
460 my $f = shift or die "marc needs field";
461 die "marc field must be numer" unless ($f =~ /^\d+$/);
462
463 my $sf;
464 if ($f >= 10) {
465 $sf = shift or die "marc needs subfield";
466 }
467
468 foreach (@_) {
469 my $v = $_; # make var read-write for Encode
470 next unless (defined($v) && $v !~ /^\s*$/);
471 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
472 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
473 if (defined $sf) {
474 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
475 } else {
476 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
477 }
478 }
479 }
480
481 =head2 marc_repeatable_subfield
482
483 Save values for MARC repetable subfield
484
485 marc_repeatable_subfield('910', 'z', rec('909') );
486
487 =cut
488
489 sub marc_repeatable_subfield {
490 my ($f,$sf) = @_;
491 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
492 $marc_repeatable_subfield->{ $f . $sf }++;
493 marc(@_);
494 }
495
496 =head2 marc_indicators
497
498 Set both indicators for MARC field
499
500 marc_indicators('900', ' ', 1);
501
502 Any indicator value other than C<0-9> will be treated as undefined.
503
504 =cut
505
506 sub marc_indicators {
507 my $f = shift || die "marc_indicators need field!\n";
508 my ($i1,$i2) = @_;
509 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
510 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
511
512 $i1 = ' ' if ($i1 !~ /^\d$/);
513 $i2 = ' ' if ($i2 !~ /^\d$/);
514 @{ $marc_indicators->{$f} } = ($i1,$i2);
515 }
516
517 =head2 marc_compose
518
519 Save values for each MARC subfield explicitly
520
521 marc_compose('900',
522 'a', rec('200','a')
523 'b', rec('201','a')
524 'a', rec('200','b')
525 'c', rec('200','c')
526 );
527
528 =cut
529
530 sub marc_compose {
531 my $f = shift or die "marc_compose needs field";
532 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
533
534 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
535 my $m = [ $f, $i1, $i2 ];
536
537 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
538
539 while (@_) {
540 my $sf = shift or die "marc_compose $f needs subfield";
541 my $v = shift;
542
543 next unless (defined($v) && $v !~ /^\s*$/);
544 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
545 push @$m, ( $sf, $v );
546 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
547 }
548
549 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
550
551 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
552 }
553
554 =head2 marc_duplicate
555
556 Generate copy of current MARC record and continue working on copy
557
558 marc_duplicate();
559
560 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
561 C<< _get_marc_fields( offset => 42 ) >>.
562
563 =cut
564
565 sub marc_duplicate {
566 my $m = $marc_record->[ -1 ];
567 die "can't duplicate record which isn't defined" unless ($m);
568 push @{ $marc_record }, dclone( $m );
569 warn "## marc_duplicate = ", dump(@$marc_record), $/ if ($debug > 1);
570 $marc_record_offset = $#{ $marc_record };
571 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
572 }
573
574 =head2 marc_remove
575
576 Remove some field or subfield from MARC record.
577
578 marc_remove('200');
579 marc_remove('200','a');
580
581 This will erase field C<200> or C<200^a> from current MARC record.
582
583 This is useful after calling C<marc_duplicate> or on it's own (but, you
584 should probably just remove that subfield definition if you are not
585 using C<marc_duplicate>).
586
587 FIXME: support fields < 10.
588
589 =cut
590
591 sub marc_remove {
592 my ($f, $sf) = @_;
593
594 die "marc_remove needs record number" unless defined($f);
595
596 my $marc = $marc_record->[ $marc_record_offset ];
597
598 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
599
600 my $i = 0;
601 foreach ( 0 .. $#{ $marc } ) {
602 last unless (defined $marc->[$i]);
603 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
604 if ($marc->[$i]->[0] eq $f) {
605 if (! defined $sf) {
606 # remove whole field
607 splice @$marc, $i, 1;
608 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
609 $i--;
610 } else {
611 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
612 my $o = ($j * 2) + 3;
613 if ($marc->[$i]->[$o] eq $sf) {
614 # remove subfield
615 splice @{$marc->[$i]}, $o, 2;
616 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
617 # is record now empty?
618 if ($#{ $marc->[$i] } == 2) {
619 splice @$marc, $i, 1;
620 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
621 $i--;
622 };
623 }
624 }
625 }
626 }
627 $i++;
628 }
629
630 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
631
632 $marc_record->[ $marc_record_offset ] = $marc;
633
634 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
635 }
636
637 =head1 Functions to extract data from input
638
639 This function should be used inside functions to create C<data_structure> described
640 above.
641
642 =head2 rec1
643
644 Return all values in some field
645
646 @v = rec1('200')
647
648 TODO: order of values is probably same as in source data, need to investigate that
649
650 =cut
651
652 sub rec1 {
653 my $f = shift;
654 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
655 return unless (defined($rec) && defined($rec->{$f}));
656 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
657 if (ref($rec->{$f}) eq 'ARRAY') {
658 return map {
659 if (ref($_) eq 'HASH') {
660 values %{$_};
661 } else {
662 $_;
663 }
664 } @{ $rec->{$f} };
665 } elsif( defined($rec->{$f}) ) {
666 return $rec->{$f};
667 }
668 }
669
670 =head2 rec2
671
672 Return all values in specific field and subfield
673
674 @v = rec2('200','a')
675
676 =cut
677
678 sub rec2 {
679 my $f = shift;
680 return unless (defined($rec && $rec->{$f}));
681 my $sf = shift;
682 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
683 return map {
684 if (ref($_->{$sf}) eq 'ARRAY') {
685 @{ $_->{$sf} };
686 } else {
687 $_->{$sf};
688 }
689 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
690 }
691
692 =head2 rec
693
694 syntaxtic sugar for
695
696 @v = rec('200')
697 @v = rec('200','a')
698
699 =cut
700
701 sub rec {
702 my @out;
703 if ($#_ == 0) {
704 @out = rec1(@_);
705 } elsif ($#_ == 1) {
706 @out = rec2(@_);
707 }
708 if (@out) {
709 return @out;
710 } else {
711 return '';
712 }
713 }
714
715 =head2 regex
716
717 Apply regex to some or all values
718
719 @v = regex( 's/foo/bar/g', @v );
720
721 =cut
722
723 sub regex {
724 my $r = shift;
725 my @out;
726 #warn "r: $r\n", dump(\@_);
727 foreach my $t (@_) {
728 next unless ($t);
729 eval "\$t =~ $r";
730 push @out, $t if ($t && $t ne '');
731 }
732 return @out;
733 }
734
735 =head2 prefix
736
737 Prefix all values with a string
738
739 @v = prefix( 'my_', @v );
740
741 =cut
742
743 sub prefix {
744 my $p = shift or return;
745 return map { $p . $_ } grep { defined($_) } @_;
746 }
747
748 =head2 suffix
749
750 suffix all values with a string
751
752 @v = suffix( '_my', @v );
753
754 =cut
755
756 sub suffix {
757 my $s = shift or die "suffix needs string as first argument";
758 return map { $_ . $s } grep { defined($_) } @_;
759 }
760
761 =head2 surround
762
763 surround all values with a two strings
764
765 @v = surround( 'prefix_', '_suffix', @v );
766
767 =cut
768
769 sub surround {
770 my $p = shift or die "surround need prefix as first argument";
771 my $s = shift or die "surround needs suffix as second argument";
772 return map { $p . $_ . $s } grep { defined($_) } @_;
773 }
774
775 =head2 first
776
777 Return first element
778
779 $v = first( @v );
780
781 =cut
782
783 sub first {
784 my $r = shift;
785 return $r;
786 }
787
788 =head2 lookup
789
790 Consult lookup hashes for some value
791
792 @v = lookup( $v );
793 @v = lookup( @v );
794
795 =cut
796
797 sub lookup {
798 my $k = shift or return;
799 return unless (defined($lookup->{$k}));
800 if (ref($lookup->{$k}) eq 'ARRAY') {
801 return @{ $lookup->{$k} };
802 } else {
803 return $lookup->{$k};
804 }
805 }
806
807 =head2 config
808
809 Consult config values stored in C<config.yml>
810
811 # return database code (key under databases in yaml)
812 $database_code = config(); # use _ from hash
813 $database_name = config('name');
814 $database_input_name = config('input name');
815 $tag = config('input normalize tag');
816
817 Up to three levels are supported.
818
819 =cut
820
821 sub config {
822 return unless ($config);
823
824 my $p = shift;
825
826 $p ||= '';
827
828 my $v;
829
830 warn "### getting config($p)\n" if ($debug > 1);
831
832 my @p = split(/\s+/,$p);
833 if ($#p < 0) {
834 $v = $config->{ '_' }; # special, database code
835 } else {
836
837 my $c = dclone( $config );
838
839 foreach my $k (@p) {
840 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
841 if (ref($c) eq 'ARRAY') {
842 $c = shift @$c;
843 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
844 last;
845 }
846
847 if (! defined($c->{$k}) ) {
848 $c = undef;
849 last;
850 } else {
851 $c = $c->{$k};
852 }
853 }
854 $v = $c if ($c);
855
856 }
857
858 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
859 warn "config( '$p' ) is empty\n" if (! $v);
860
861 return $v;
862 }
863
864 =head2 id
865
866 Returns unique id of this record
867
868 $id = id();
869
870 Returns C<42/2> for 2nd occurence of MFN 42.
871
872 =cut
873
874 sub id {
875 my $mfn = $config->{_mfn} || die "no _mfn in config data";
876 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
877 }
878
879 =head2 join_with
880
881 Joins walues with some delimiter
882
883 $v = join_with(", ", @v);
884
885 =cut
886
887 sub join_with {
888 my $d = shift;
889 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
890 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
891 return '' unless defined($v);
892 return $v;
893 }
894
895 =head2 split_rec_on
896
897 Split record subfield on some regex and take one of parts out
898
899 $a_before_semi_column =
900 split_rec_on('200','a', /\s*;\s*/, $part);
901
902 C<$part> is optional number of element. First element is
903 B<1>, not 0!
904
905 If there is no C<$part> parameter or C<$part> is 0, this function will
906 return all values produced by splitting.
907
908 =cut
909
910 sub split_rec_on {
911 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
912
913 my ($fld, $sf, $regex, $part) = @_;
914 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
915
916 my @r = rec( $fld, $sf );
917 my $v = shift @r;
918 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
919
920 return '' if( ! defined($v) || $v =~ /^\s*$/);
921
922 my @s = split( $regex, $v );
923 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
924 if ($part && $part > 0) {
925 return $s[ $part - 1 ];
926 } else {
927 return @s;
928 }
929 }
930
931 # END
932 1;

  ViewVC Help
Powered by ViewVC 1.1.26