/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 603 - (show annotations)
Sun Jul 23 20:19:56 2006 UTC (17 years, 9 months ago) by dpavlin
File size: 19443 byte(s)
 r842@llin:  dpavlin | 2006-07-23 22:23:52 +0200
 append subfields

1 package WebPAC::Normalize;
2 use Exporter 'import';
3 @EXPORT = qw/
4 _set_rec _set_lookup
5 _get_ds _clean_ds
6 _debug
7
8 tag search display
9 marc marc_indicators marc_repeatable_subfield
10 marc_compose marc_leader
11 marc_duplicate marc_remove
12
13 rec1 rec2 rec
14 regex prefix suffix surround
15 first lookup join_with
16
17 split_rec_on
18 /;
19
20 use warnings;
21 use strict;
22
23 #use base qw/WebPAC::Common/;
24 use Data::Dump qw/dump/;
25 use Encode qw/from_to/;
26 use Storable qw/dclone/;
27
28 # debugging warn(s)
29 my $debug = 0;
30
31
32 =head1 NAME
33
34 WebPAC::Normalize - describe normalisaton rules using sets
35
36 =head1 VERSION
37
38 Version 0.15
39
40 =cut
41
42 our $VERSION = '0.15';
43
44 =head1 SYNOPSIS
45
46 This module uses C<conf/normalize/*.pl> files to perform normalisation
47 from input records using perl functions which are specialized for set
48 processing.
49
50 Sets are implemented as arrays, and normalisation file is valid perl, which
51 means that you check it's validity before running WebPAC using
52 C<perl -c normalize.pl>.
53
54 Normalisation can generate multiple output normalized data. For now, supported output
55 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
56 C<marc>.
57
58 =head1 FUNCTIONS
59
60 Functions which start with C<_> are private and used by WebPAC internally.
61 All other functions are available for use within normalisation rules.
62
63 =head2 data_structure
64
65 Return data structure
66
67 my $ds = WebPAC::Normalize::data_structure(
68 lookup => $lookup->lookup_hash,
69 row => $row,
70 rules => $normalize_pl_config,
71 marc_encoding => 'utf-8',
72 config => $config,
73 );
74
75 Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
76 other are optional.
77
78 This function will B<die> if normalizastion can't be evaled.
79
80 Since this function isn't exported you have to call it with
81 C<WebPAC::Normalize::data_structure>.
82
83 =cut
84
85 sub data_structure {
86 my $arg = {@_};
87
88 die "need row argument" unless ($arg->{row});
89 die "need normalisation argument" unless ($arg->{rules});
90
91 no strict 'subs';
92 _set_lookup( $arg->{lookup} );
93 _set_rec( $arg->{row} );
94 _set_config( $arg->{config} );
95 _clean_ds( %{ $arg } );
96 eval "$arg->{rules}";
97 die "error evaling $arg->{rules}: $@\n" if ($@);
98
99 return _get_ds();
100 }
101
102 =head2 _set_rec
103
104 Set current record hash
105
106 _set_rec( $rec );
107
108 =cut
109
110 my $rec;
111
112 sub _set_rec {
113 $rec = shift or die "no record hash";
114 }
115
116 =head2 _set_config
117
118 Set current config hash
119
120 _set_config( $config );
121
122 Magic keys are:
123
124 =over 4
125
126 =item _
127
128 Code of current database
129
130 =item _mfn
131
132 Current MFN
133
134 =back
135
136 =cut
137
138 my $config;
139
140 sub _set_config {
141 $config = shift;
142 }
143
144 =head2 _get_ds
145
146 Return hash formatted as data structure
147
148 my $ds = _get_ds();
149
150 =cut
151
152 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);
153 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
154
155 sub _get_ds {
156 return $out;
157 }
158
159 =head2 _clean_ds
160
161 Clean data structure hash for next record
162
163 _clean_ds();
164
165 =cut
166
167 sub _clean_ds {
168 my $a = {@_};
169 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();
170 ($marc_record_offset, $marc_fetch_offset) = (0,0);
171 $marc_encoding = $a->{marc_encoding};
172 }
173
174 =head2 _set_lookup
175
176 Set current lookup hash
177
178 _set_lookup( $lookup );
179
180 =cut
181
182 my $lookup;
183
184 sub _set_lookup {
185 $lookup = shift;
186 }
187
188 =head2 _get_marc_fields
189
190 Get all fields defined by calls to C<marc>
191
192 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
193
194 We are using I<magic> which detect repeatable fields only from
195 sequence of field/subfield data generated by normalization.
196
197 Repeatable field is created when there is second occurence of same subfield or
198 if any of indicators are different.
199
200 This is sane for most cases. Something like:
201
202 900a-1 900b-1 900c-1
203 900a-2 900b-2
204 900a-3
205
206 will be created from any combination of:
207
208 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
209
210 and following rules:
211
212 marc('900','a', rec('200','a') );
213 marc('900','b', rec('200','b') );
214 marc('900','c', rec('200','c') );
215
216 which might not be what you have in mind. If you need repeatable subfield,
217 define it using C<marc_repeatable_subfield> like this:
218
219 marc_repeatable_subfield('900','a');
220 marc('900','a', rec('200','a') );
221 marc('900','b', rec('200','b') );
222 marc('900','c', rec('200','c') );
223
224 will create:
225
226 900a-1 900a-2 900a-3 900b-1 900c-1
227 900b-2
228
229 There is also support for returning next or specific using:
230
231 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
232 # do something with $mf
233 }
234
235 will always return fields from next MARC record or
236
237 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
238
239 will return 42th copy record (if it exists).
240
241 =cut
242
243 sub _get_marc_fields {
244
245 my $arg = {@_};
246 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
247 my $offset = $marc_fetch_offset;
248 if ($arg->{offset}) {
249 $offset = $arg->{offset};
250 } elsif($arg->{fetch_next}) {
251 $marc_fetch_offset++;
252 }
253
254 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
255
256 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
257
258 my $marc_rec = $marc_record->[ $offset ];
259
260 warn "## _get_marc_fields (at offset: $offset) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
261
262 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
263
264 # first, sort all existing fields
265 # XXX might not be needed, but modern perl might randomize elements in hash
266 my @sorted_marc_record = sort {
267 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
268 } @{ $marc_rec };
269
270 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
271
272 # output marc fields
273 my @m;
274
275 # count unique field-subfields (used for offset when walking to next subfield)
276 my $u;
277 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
278
279 if ($debug) {
280 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
281 warn "## marc_record[$offset] = ", dump( $marc_rec ), $/;
282 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
283 warn "## subfield count = ", dump( $u ), $/;
284 }
285
286 my $len = $#sorted_marc_record;
287 my $visited;
288 my $i = 0;
289 my $field;
290
291 foreach ( 0 .. $len ) {
292
293 # find next element which isn't visited
294 while ($visited->{$i}) {
295 $i = ($i + 1) % ($len + 1);
296 }
297
298 # mark it visited
299 $visited->{$i}++;
300
301 my $row = dclone( $sorted_marc_record[$i] );
302
303 # field and subfield which is key for
304 # marc_repeatable_subfield and u
305 my $fsf = $row->[0] . ( $row->[3] || '' );
306
307 if ($debug > 1) {
308
309 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
310 print "### this [$i]: ", dump( $row ),$/;
311 print "### sf: ", $row->[3], " vs ", $field->[3],
312 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
313 if ($#$field >= 0);
314
315 }
316
317 # if field exists
318 if ( $#$field >= 0 ) {
319 if (
320 $row->[0] ne $field->[0] || # field
321 $row->[1] ne $field->[1] || # i1
322 $row->[2] ne $field->[2] # i2
323 ) {
324 push @m, $field;
325 warn "## saved/1 ", dump( $field ),$/ if ($debug);
326 $field = $row;
327
328 } elsif (
329 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
330 ||
331 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
332 ! $marc_repeatable_subfield->{ $fsf }
333 )
334 ) {
335 push @m, $field;
336 warn "## saved/2 ", dump( $field ),$/ if ($debug);
337 $field = $row;
338
339 } else {
340 # append new subfields to existing field
341 push @$field, ( $row->[3], $row->[4] );
342 }
343 } else {
344 # insert first field
345 $field = $row;
346 }
347
348 if (! $marc_repeatable_subfield->{ $fsf }) {
349 # make step to next subfield
350 $i = ($i + $u->{ $fsf } ) % ($len + 1);
351 }
352 }
353
354 if ($#$field >= 0) {
355 push @m, $field;
356 warn "## saved/3 ", dump( $field ),$/ if ($debug);
357 }
358
359 return \@m;
360 }
361
362 =head2 _debug
363
364 Change level of debug warnings
365
366 _debug( 2 );
367
368 =cut
369
370 sub _debug {
371 my $l = shift;
372 return $debug unless defined($l);
373 warn "debug level $l",$/ if ($l > 0);
374 $debug = $l;
375 }
376
377 =head1 Functions to create C<data_structure>
378
379 Those functions generally have to first in your normalization file.
380
381 =head2 tag
382
383 Define new tag for I<search> and I<display>.
384
385 tag('Title', rec('200','a') );
386
387
388 =cut
389
390 sub tag {
391 my $name = shift or die "tag needs name as first argument";
392 my @o = grep { defined($_) && $_ ne '' } @_;
393 return unless (@o);
394 $out->{$name}->{tag} = $name;
395 $out->{$name}->{search} = \@o;
396 $out->{$name}->{display} = \@o;
397 }
398
399 =head2 display
400
401 Define tag just for I<display>
402
403 @v = display('Title', rec('200','a') );
404
405 =cut
406
407 sub display {
408 my $name = shift or die "display needs name as first argument";
409 my @o = grep { defined($_) && $_ ne '' } @_;
410 return unless (@o);
411 $out->{$name}->{tag} = $name;
412 $out->{$name}->{display} = \@o;
413 }
414
415 =head2 search
416
417 Prepare values just for I<search>
418
419 @v = search('Title', rec('200','a') );
420
421 =cut
422
423 sub search {
424 my $name = shift or die "search needs name as first argument";
425 my @o = grep { defined($_) && $_ ne '' } @_;
426 return unless (@o);
427 $out->{$name}->{tag} = $name;
428 $out->{$name}->{search} = \@o;
429 }
430
431 =head2 marc_leader
432
433 Setup fields within MARC leader or get leader
434
435 marc_leader('05','c');
436 my $leader = marc_leader();
437
438 =cut
439
440 sub marc_leader {
441 my ($offset,$value) = @_;
442
443 if ($offset) {
444 $out->{' leader'}->{ $offset } = $value;
445 } else {
446 return $out->{' leader'};
447 }
448 }
449
450 =head2 marc
451
452 Save value for MARC field
453
454 marc('900','a', rec('200','a') );
455 marc('001', rec('000') );
456
457 =cut
458
459 sub marc {
460 my $f = shift or die "marc needs field";
461 die "marc field must be numer" unless ($f =~ /^\d+$/);
462
463 my $sf;
464 if ($f >= 10) {
465 $sf = shift or die "marc needs subfield";
466 }
467
468 foreach (@_) {
469 my $v = $_; # make var read-write for Encode
470 next unless (defined($v) && $v !~ /^\s*$/);
471 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
472 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
473 if (defined $sf) {
474 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
475 } else {
476 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
477 }
478 }
479 }
480
481 =head2 marc_repeatable_subfield
482
483 Save values for MARC repetable subfield
484
485 marc_repeatable_subfield('910', 'z', rec('909') );
486
487 =cut
488
489 sub marc_repeatable_subfield {
490 my ($f,$sf) = @_;
491 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
492 $marc_repeatable_subfield->{ $f . $sf }++;
493 marc(@_);
494 }
495
496 =head2 marc_indicators
497
498 Set both indicators for MARC field
499
500 marc_indicators('900', ' ', 1);
501
502 Any indicator value other than C<0-9> will be treated as undefined.
503
504 =cut
505
506 sub marc_indicators {
507 my $f = shift || die "marc_indicators need field!\n";
508 my ($i1,$i2) = @_;
509 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
510 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
511
512 $i1 = ' ' if ($i1 !~ /^\d$/);
513 $i2 = ' ' if ($i2 !~ /^\d$/);
514 @{ $marc_indicators->{$f} } = ($i1,$i2);
515 }
516
517 =head2 marc_compose
518
519 Save values for each MARC subfield explicitly
520
521 marc_compose('900',
522 'a', rec('200','a')
523 'b', rec('201','a')
524 'a', rec('200','b')
525 'c', rec('200','c')
526 );
527
528 If you specify C<+> for subfield, value will be appended
529 to previous defined subfield.
530
531 =cut
532
533 sub marc_compose {
534 my $f = shift or die "marc_compose needs field";
535 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
536
537 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
538 my $m = [ $f, $i1, $i2 ];
539
540 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
541
542 while (@_) {
543 my $sf = shift or die "marc_compose $f needs subfield";
544 my $v = shift;
545
546 next unless (defined($v) && $v !~ /^\s*$/);
547 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
548 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
549 if ($sf ne '+') {
550 push @$m, ( $sf, $v );
551 } else {
552 $m->[ $#$m ] .= $v;
553 }
554 }
555
556 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
557
558 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
559 }
560
561 =head2 marc_duplicate
562
563 Generate copy of current MARC record and continue working on copy
564
565 marc_duplicate();
566
567 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
568 C<< _get_marc_fields( offset => 42 ) >>.
569
570 =cut
571
572 sub marc_duplicate {
573 my $m = $marc_record->[ -1 ];
574 die "can't duplicate record which isn't defined" unless ($m);
575 push @{ $marc_record }, dclone( $m );
576 warn "## marc_duplicate = ", dump(@$marc_record), $/ if ($debug > 1);
577 $marc_record_offset = $#{ $marc_record };
578 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
579 }
580
581 =head2 marc_remove
582
583 Remove some field or subfield from MARC record.
584
585 marc_remove('200');
586 marc_remove('200','a');
587
588 This will erase field C<200> or C<200^a> from current MARC record.
589
590 This is useful after calling C<marc_duplicate> or on it's own (but, you
591 should probably just remove that subfield definition if you are not
592 using C<marc_duplicate>).
593
594 FIXME: support fields < 10.
595
596 =cut
597
598 sub marc_remove {
599 my ($f, $sf) = @_;
600
601 die "marc_remove needs record number" unless defined($f);
602
603 my $marc = $marc_record->[ $marc_record_offset ];
604
605 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
606
607 my $i = 0;
608 foreach ( 0 .. $#{ $marc } ) {
609 last unless (defined $marc->[$i]);
610 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
611 if ($marc->[$i]->[0] eq $f) {
612 if (! defined $sf) {
613 # remove whole field
614 splice @$marc, $i, 1;
615 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
616 $i--;
617 } else {
618 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
619 my $o = ($j * 2) + 3;
620 if ($marc->[$i]->[$o] eq $sf) {
621 # remove subfield
622 splice @{$marc->[$i]}, $o, 2;
623 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
624 # is record now empty?
625 if ($#{ $marc->[$i] } == 2) {
626 splice @$marc, $i, 1;
627 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
628 $i--;
629 };
630 }
631 }
632 }
633 }
634 $i++;
635 }
636
637 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
638
639 $marc_record->[ $marc_record_offset ] = $marc;
640
641 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
642 }
643
644 =head1 Functions to extract data from input
645
646 This function should be used inside functions to create C<data_structure> described
647 above.
648
649 =head2 rec1
650
651 Return all values in some field
652
653 @v = rec1('200')
654
655 TODO: order of values is probably same as in source data, need to investigate that
656
657 =cut
658
659 sub rec1 {
660 my $f = shift;
661 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
662 return unless (defined($rec) && defined($rec->{$f}));
663 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
664 if (ref($rec->{$f}) eq 'ARRAY') {
665 return map {
666 if (ref($_) eq 'HASH') {
667 values %{$_};
668 } else {
669 $_;
670 }
671 } @{ $rec->{$f} };
672 } elsif( defined($rec->{$f}) ) {
673 return $rec->{$f};
674 }
675 }
676
677 =head2 rec2
678
679 Return all values in specific field and subfield
680
681 @v = rec2('200','a')
682
683 =cut
684
685 sub rec2 {
686 my $f = shift;
687 return unless (defined($rec && $rec->{$f}));
688 my $sf = shift;
689 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
690 return map {
691 if (ref($_->{$sf}) eq 'ARRAY') {
692 @{ $_->{$sf} };
693 } else {
694 $_->{$sf};
695 }
696 } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
697 }
698
699 =head2 rec
700
701 syntaxtic sugar for
702
703 @v = rec('200')
704 @v = rec('200','a')
705
706 =cut
707
708 sub rec {
709 my @out;
710 if ($#_ == 0) {
711 @out = rec1(@_);
712 } elsif ($#_ == 1) {
713 @out = rec2(@_);
714 }
715 if (@out) {
716 return @out;
717 } else {
718 return '';
719 }
720 }
721
722 =head2 regex
723
724 Apply regex to some or all values
725
726 @v = regex( 's/foo/bar/g', @v );
727
728 =cut
729
730 sub regex {
731 my $r = shift;
732 my @out;
733 #warn "r: $r\n", dump(\@_);
734 foreach my $t (@_) {
735 next unless ($t);
736 eval "\$t =~ $r";
737 push @out, $t if ($t && $t ne '');
738 }
739 return @out;
740 }
741
742 =head2 prefix
743
744 Prefix all values with a string
745
746 @v = prefix( 'my_', @v );
747
748 =cut
749
750 sub prefix {
751 my $p = shift or return;
752 return map { $p . $_ } grep { defined($_) } @_;
753 }
754
755 =head2 suffix
756
757 suffix all values with a string
758
759 @v = suffix( '_my', @v );
760
761 =cut
762
763 sub suffix {
764 my $s = shift or die "suffix needs string as first argument";
765 return map { $_ . $s } grep { defined($_) } @_;
766 }
767
768 =head2 surround
769
770 surround all values with a two strings
771
772 @v = surround( 'prefix_', '_suffix', @v );
773
774 =cut
775
776 sub surround {
777 my $p = shift or die "surround need prefix as first argument";
778 my $s = shift or die "surround needs suffix as second argument";
779 return map { $p . $_ . $s } grep { defined($_) } @_;
780 }
781
782 =head2 first
783
784 Return first element
785
786 $v = first( @v );
787
788 =cut
789
790 sub first {
791 my $r = shift;
792 return $r;
793 }
794
795 =head2 lookup
796
797 Consult lookup hashes for some value
798
799 @v = lookup( $v );
800 @v = lookup( @v );
801
802 =cut
803
804 sub lookup {
805 my $k = shift or return;
806 return unless (defined($lookup->{$k}));
807 if (ref($lookup->{$k}) eq 'ARRAY') {
808 return @{ $lookup->{$k} };
809 } else {
810 return $lookup->{$k};
811 }
812 }
813
814 =head2 config
815
816 Consult config values stored in C<config.yml>
817
818 # return database code (key under databases in yaml)
819 $database_code = config(); # use _ from hash
820 $database_name = config('name');
821 $database_input_name = config('input name');
822 $tag = config('input normalize tag');
823
824 Up to three levels are supported.
825
826 =cut
827
828 sub config {
829 return unless ($config);
830
831 my $p = shift;
832
833 $p ||= '';
834
835 my $v;
836
837 warn "### getting config($p)\n" if ($debug > 1);
838
839 my @p = split(/\s+/,$p);
840 if ($#p < 0) {
841 $v = $config->{ '_' }; # special, database code
842 } else {
843
844 my $c = dclone( $config );
845
846 foreach my $k (@p) {
847 warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
848 if (ref($c) eq 'ARRAY') {
849 $c = shift @$c;
850 warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
851 last;
852 }
853
854 if (! defined($c->{$k}) ) {
855 $c = undef;
856 last;
857 } else {
858 $c = $c->{$k};
859 }
860 }
861 $v = $c if ($c);
862
863 }
864
865 warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
866 warn "config( '$p' ) is empty\n" if (! $v);
867
868 return $v;
869 }
870
871 =head2 id
872
873 Returns unique id of this record
874
875 $id = id();
876
877 Returns C<42/2> for 2nd occurence of MFN 42.
878
879 =cut
880
881 sub id {
882 my $mfn = $config->{_mfn} || die "no _mfn in config data";
883 return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
884 }
885
886 =head2 join_with
887
888 Joins walues with some delimiter
889
890 $v = join_with(", ", @v);
891
892 =cut
893
894 sub join_with {
895 my $d = shift;
896 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
897 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
898 return '' unless defined($v);
899 return $v;
900 }
901
902 =head2 split_rec_on
903
904 Split record subfield on some regex and take one of parts out
905
906 $a_before_semi_column =
907 split_rec_on('200','a', /\s*;\s*/, $part);
908
909 C<$part> is optional number of element. First element is
910 B<1>, not 0!
911
912 If there is no C<$part> parameter or C<$part> is 0, this function will
913 return all values produced by splitting.
914
915 =cut
916
917 sub split_rec_on {
918 die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
919
920 my ($fld, $sf, $regex, $part) = @_;
921 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
922
923 my @r = rec( $fld, $sf );
924 my $v = shift @r;
925 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
926
927 return '' if( ! defined($v) || $v =~ /^\s*$/);
928
929 my @s = split( $regex, $v );
930 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
931 if ($part && $part > 0) {
932 return $s[ $part - 1 ];
933 } else {
934 return @s;
935 }
936 }
937
938 # END
939 1;

  ViewVC Help
Powered by ViewVC 1.1.26