/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 589 - (hide annotations)
Fri Jul 7 21:48:09 2006 UTC (17 years, 9 months ago) by dpavlin
File size: 17684 byte(s)
 r817@llin:  dpavlin | 2006-07-07 23:48:50 +0200
 support repeatable subfields from Biblio::Isis 0.20

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3     @EXPORT = qw/
4 dpavlin 538 _set_rec _set_lookup
5     _get_ds _clean_ds
6 dpavlin 554 _debug
7 dpavlin 538
8 dpavlin 536 tag search display
9 dpavlin 547 marc marc_indicators marc_repeatable_subfield
10 dpavlin 564 marc_compose marc_leader
11 dpavlin 574 marc_duplicate marc_remove
12 dpavlin 540
13 dpavlin 536 rec1 rec2 rec
14     regex prefix suffix surround
15     first lookup join_with
16 dpavlin 562
17     split_rec_on
18 dpavlin 536 /;
19 dpavlin 10
20     use warnings;
21     use strict;
22 dpavlin 536
23     #use base qw/WebPAC::Common/;
24 dpavlin 550 use Data::Dump qw/dump/;
25 dpavlin 541 use Encode qw/from_to/;
26 dpavlin 574 use Storable qw/dclone/;
27 dpavlin 10
28 dpavlin 550 # debugging warn(s)
29     my $debug = 0;
30    
31    
32 dpavlin 10 =head1 NAME
33    
34 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
35 dpavlin 10
36     =head1 VERSION
37    
38 dpavlin 589 Version 0.12
39 dpavlin 10
40     =cut
41    
42 dpavlin 589 our $VERSION = '0.12';
43 dpavlin 10
44     =head1 SYNOPSIS
45    
46 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
47     from input records using perl functions which are specialized for set
48     processing.
49 dpavlin 10
50 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
51     means that you check it's validity before running WebPAC using
52     C<perl -c normalize.pl>.
53 dpavlin 15
54 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
55 dpavlin 540 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
56 dpavlin 547 C<marc>.
57 dpavlin 15
58 dpavlin 10 =head1 FUNCTIONS
59    
60 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
61     All other functions are available for use within normalisation rules.
62    
63 dpavlin 536 =head2 data_structure
64 dpavlin 10
65 dpavlin 536 Return data structure
66 dpavlin 13
67 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
68 dpavlin 536 lookup => $lookup->lookup_hash,
69     row => $row,
70     rules => $normalize_pl_config,
71 dpavlin 541 marc_encoding => 'utf-8',
72 dpavlin 13 );
73    
74 dpavlin 540 Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
75     other are optional.
76    
77 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
78 dpavlin 15
79 dpavlin 538 Since this function isn't exported you have to call it with
80     C<WebPAC::Normalize::data_structure>.
81    
82 dpavlin 10 =cut
83    
84 dpavlin 536 sub data_structure {
85     my $arg = {@_};
86 dpavlin 13
87 dpavlin 536 die "need row argument" unless ($arg->{row});
88     die "need normalisation argument" unless ($arg->{rules});
89 dpavlin 31
90 dpavlin 536 no strict 'subs';
91 dpavlin 538 _set_lookup( $arg->{lookup} );
92     _set_rec( $arg->{row} );
93 dpavlin 541 _clean_ds( %{ $arg } );
94 dpavlin 536 eval "$arg->{rules}";
95     die "error evaling $arg->{rules}: $@\n" if ($@);
96 dpavlin 540
97 dpavlin 538 return _get_ds();
98 dpavlin 10 }
99    
100 dpavlin 538 =head2 _set_rec
101 dpavlin 13
102 dpavlin 536 Set current record hash
103 dpavlin 433
104 dpavlin 538 _set_rec( $rec );
105 dpavlin 433
106     =cut
107    
108 dpavlin 536 my $rec;
109 dpavlin 433
110 dpavlin 538 sub _set_rec {
111 dpavlin 536 $rec = shift or die "no record hash";
112 dpavlin 433 }
113    
114 dpavlin 538 =head2 _get_ds
115    
116     Return hash formatted as data structure
117    
118     my $ds = _get_ds();
119    
120     =cut
121    
122 dpavlin 574 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);
123     my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
124 dpavlin 538
125     sub _get_ds {
126     return $out;
127     }
128    
129     =head2 _clean_ds
130    
131     Clean data structure hash for next record
132    
133     _clean_ds();
134    
135     =cut
136    
137     sub _clean_ds {
138 dpavlin 541 my $a = {@_};
139 dpavlin 550 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();
140 dpavlin 574 ($marc_record_offset, $marc_fetch_offset) = (0,0);
141 dpavlin 541 $marc_encoding = $a->{marc_encoding};
142 dpavlin 538 }
143    
144     =head2 _set_lookup
145    
146     Set current lookup hash
147    
148     _set_lookup( $lookup );
149    
150     =cut
151    
152     my $lookup;
153    
154     sub _set_lookup {
155     $lookup = shift;
156     }
157    
158 dpavlin 547 =head2 _get_marc_fields
159 dpavlin 540
160 dpavlin 547 Get all fields defined by calls to C<marc>
161 dpavlin 540
162 dpavlin 547 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
163 dpavlin 540
164 dpavlin 543 We are using I<magic> which detect repeatable fields only from
165     sequence of field/subfield data generated by normalization.
166    
167 dpavlin 554 Repeatable field is created when there is second occurence of same subfield or
168     if any of indicators are different.
169 dpavlin 543
170 dpavlin 554 This is sane for most cases. Something like:
171 dpavlin 543
172 dpavlin 554 900a-1 900b-1 900c-1
173     900a-2 900b-2
174     900a-3
175    
176     will be created from any combination of:
177    
178     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
179    
180     and following rules:
181    
182     marc('900','a', rec('200','a') );
183     marc('900','b', rec('200','b') );
184     marc('900','c', rec('200','c') );
185    
186     which might not be what you have in mind. If you need repeatable subfield,
187     define it using C<marc_repeatable_subfield> like this:
188    
189 dpavlin 574 marc_repeatable_subfield('900','a');
190     marc('900','a', rec('200','a') );
191     marc('900','b', rec('200','b') );
192     marc('900','c', rec('200','c') );
193 dpavlin 554
194 dpavlin 574 will create:
195    
196     900a-1 900a-2 900a-3 900b-1 900c-1
197     900b-2
198    
199     There is also support for returning next or specific using:
200    
201     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
202     # do something with $mf
203     }
204    
205     will always return fields from next MARC record or
206    
207     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
208    
209     will return 42th copy record (if it exists).
210    
211 dpavlin 540 =cut
212    
213 dpavlin 547 sub _get_marc_fields {
214 dpavlin 550
215 dpavlin 574 my $arg = {@_};
216     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
217     my $offset = $marc_fetch_offset;
218     if ($arg->{offset}) {
219     $offset = $arg->{offset};
220     } elsif($arg->{fetch_next}) {
221     $marc_fetch_offset++;
222     }
223 dpavlin 550
224 dpavlin 574 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
225    
226     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
227    
228     my $marc_rec = $marc_record->[ $offset ];
229    
230     warn "## _get_marc_fields (at offset: $offset) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
231    
232     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
233    
234 dpavlin 550 # first, sort all existing fields
235     # XXX might not be needed, but modern perl might randomize elements in hash
236     my @sorted_marc_record = sort {
237 dpavlin 572 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
238 dpavlin 574 } @{ $marc_rec };
239 dpavlin 550
240 dpavlin 574 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
241 dpavlin 562
242 dpavlin 550 # output marc fields
243 dpavlin 542 my @m;
244 dpavlin 550
245     # count unique field-subfields (used for offset when walking to next subfield)
246     my $u;
247 dpavlin 572 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
248 dpavlin 550
249     if ($debug) {
250 dpavlin 574 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
251     warn "## marc_record[$offset] = ", dump( $marc_rec ), $/;
252     warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
253     warn "## subfield count = ", dump( $u ), $/;
254 dpavlin 550 }
255    
256     my $len = $#sorted_marc_record;
257     my $visited;
258     my $i = 0;
259     my $field;
260    
261     foreach ( 0 .. $len ) {
262    
263     # find next element which isn't visited
264     while ($visited->{$i}) {
265     $i = ($i + 1) % ($len + 1);
266 dpavlin 542 }
267    
268 dpavlin 550 # mark it visited
269     $visited->{$i}++;
270    
271 dpavlin 574 my $row = dclone( $sorted_marc_record[$i] );
272 dpavlin 550
273     # field and subfield which is key for
274     # marc_repeatable_subfield and u
275 dpavlin 572 my $fsf = $row->[0] . ( $row->[3] || '' );
276 dpavlin 550
277     if ($debug > 1) {
278    
279     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
280     print "### this [$i]: ", dump( $row ),$/;
281     print "### sf: ", $row->[3], " vs ", $field->[3],
282     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
283     if ($#$field >= 0);
284    
285     }
286    
287     # if field exists
288     if ( $#$field >= 0 ) {
289     if (
290     $row->[0] ne $field->[0] || # field
291     $row->[1] ne $field->[1] || # i1
292     $row->[2] ne $field->[2] # i2
293     ) {
294     push @m, $field;
295     warn "## saved/1 ", dump( $field ),$/ if ($debug);
296     $field = $row;
297    
298     } elsif (
299     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
300     ||
301     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
302     ! $marc_repeatable_subfield->{ $fsf }
303     )
304     ) {
305     push @m, $field;
306     warn "## saved/2 ", dump( $field ),$/ if ($debug);
307     $field = $row;
308    
309     } else {
310     # append new subfields to existing field
311     push @$field, ( $row->[3], $row->[4] );
312     }
313     } else {
314     # insert first field
315     $field = $row;
316     }
317    
318     if (! $marc_repeatable_subfield->{ $fsf }) {
319     # make step to next subfield
320     $i = ($i + $u->{ $fsf } ) % ($len + 1);
321     }
322 dpavlin 542 }
323    
324 dpavlin 550 if ($#$field >= 0) {
325     push @m, $field;
326     warn "## saved/3 ", dump( $field ),$/ if ($debug);
327     }
328 dpavlin 542
329 dpavlin 579 return \@m;
330 dpavlin 540 }
331    
332 dpavlin 554 =head2 _debug
333    
334     Change level of debug warnings
335    
336     _debug( 2 );
337    
338     =cut
339    
340     sub _debug {
341     my $l = shift;
342     return $debug unless defined($l);
343 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
344 dpavlin 554 $debug = $l;
345     }
346    
347 dpavlin 540 =head1 Functions to create C<data_structure>
348    
349     Those functions generally have to first in your normalization file.
350    
351 dpavlin 536 =head2 tag
352 dpavlin 433
353 dpavlin 536 Define new tag for I<search> and I<display>.
354 dpavlin 433
355 dpavlin 536 tag('Title', rec('200','a') );
356 dpavlin 13
357    
358     =cut
359    
360 dpavlin 536 sub tag {
361     my $name = shift or die "tag needs name as first argument";
362     my @o = grep { defined($_) && $_ ne '' } @_;
363     return unless (@o);
364     $out->{$name}->{tag} = $name;
365     $out->{$name}->{search} = \@o;
366     $out->{$name}->{display} = \@o;
367     }
368 dpavlin 13
369 dpavlin 536 =head2 display
370 dpavlin 13
371 dpavlin 536 Define tag just for I<display>
372 dpavlin 125
373 dpavlin 536 @v = display('Title', rec('200','a') );
374 dpavlin 125
375 dpavlin 536 =cut
376 dpavlin 125
377 dpavlin 536 sub display {
378     my $name = shift or die "display needs name as first argument";
379     my @o = grep { defined($_) && $_ ne '' } @_;
380     return unless (@o);
381     $out->{$name}->{tag} = $name;
382     $out->{$name}->{display} = \@o;
383     }
384 dpavlin 13
385 dpavlin 536 =head2 search
386 dpavlin 13
387 dpavlin 536 Prepare values just for I<search>
388 dpavlin 13
389 dpavlin 536 @v = search('Title', rec('200','a') );
390 dpavlin 433
391 dpavlin 536 =cut
392 dpavlin 13
393 dpavlin 536 sub search {
394     my $name = shift or die "search needs name as first argument";
395     my @o = grep { defined($_) && $_ ne '' } @_;
396     return unless (@o);
397     $out->{$name}->{tag} = $name;
398     $out->{$name}->{search} = \@o;
399 dpavlin 13 }
400    
401 dpavlin 564 =head2 marc_leader
402    
403     Setup fields within MARC leader or get leader
404    
405     marc_leader('05','c');
406     my $leader = marc_leader();
407    
408     =cut
409    
410     sub marc_leader {
411     my ($offset,$value) = @_;
412    
413     if ($offset) {
414     $out->{' leader'}->{ $offset } = $value;
415     } else {
416     return $out->{' leader'};
417     }
418     }
419    
420 dpavlin 547 =head2 marc
421 dpavlin 540
422     Save value for MARC field
423    
424 dpavlin 547 marc('900','a', rec('200','a') );
425 dpavlin 571 marc('001', rec('000') );
426 dpavlin 540
427     =cut
428    
429 dpavlin 547 sub marc {
430     my $f = shift or die "marc needs field";
431     die "marc field must be numer" unless ($f =~ /^\d+$/);
432 dpavlin 540
433 dpavlin 571 my $sf;
434     if ($f >= 10) {
435     $sf = shift or die "marc needs subfield";
436     }
437 dpavlin 540
438 dpavlin 541 foreach (@_) {
439     my $v = $_; # make var read-write for Encode
440 dpavlin 543 next unless (defined($v) && $v !~ /^\s*$/);
441 dpavlin 541 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
442 dpavlin 548 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
443 dpavlin 571 if (defined $sf) {
444 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
445 dpavlin 571 } else {
446 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
447 dpavlin 571 }
448 dpavlin 540 }
449     }
450    
451 dpavlin 547 =head2 marc_repeatable_subfield
452    
453     Save values for MARC repetable subfield
454    
455     marc_repeatable_subfield('910', 'z', rec('909') );
456    
457     =cut
458    
459     sub marc_repeatable_subfield {
460 dpavlin 550 my ($f,$sf) = @_;
461     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
462     $marc_repeatable_subfield->{ $f . $sf }++;
463 dpavlin 547 marc(@_);
464     }
465    
466     =head2 marc_indicators
467    
468     Set both indicators for MARC field
469    
470     marc_indicators('900', ' ', 1);
471    
472     Any indicator value other than C<0-9> will be treated as undefined.
473    
474     =cut
475    
476     sub marc_indicators {
477     my $f = shift || die "marc_indicators need field!\n";
478     my ($i1,$i2) = @_;
479     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
480     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
481    
482     $i1 = ' ' if ($i1 !~ /^\d$/);
483     $i2 = ' ' if ($i2 !~ /^\d$/);
484 dpavlin 548 @{ $marc_indicators->{$f} } = ($i1,$i2);
485 dpavlin 547 }
486    
487 dpavlin 562 =head2 marc_compose
488 dpavlin 547
489 dpavlin 562 Save values for each MARC subfield explicitly
490    
491     marc_compose('900',
492     'a', rec('200','a')
493     'b', rec('201','a')
494     'a', rec('200','b')
495     'c', rec('200','c')
496     );
497    
498     =cut
499    
500     sub marc_compose {
501     my $f = shift or die "marc_compose needs field";
502     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
503    
504     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
505     my $m = [ $f, $i1, $i2 ];
506    
507 dpavlin 583 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
508    
509 dpavlin 562 while (@_) {
510     my $sf = shift or die "marc_compose $f needs subfield";
511 dpavlin 565 my $v = shift;
512 dpavlin 562
513     next unless (defined($v) && $v !~ /^\s*$/);
514     from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
515     push @$m, ( $sf, $v );
516 dpavlin 568 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
517 dpavlin 562 }
518    
519 dpavlin 586 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
520 dpavlin 562
521 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
522 dpavlin 562 }
523    
524 dpavlin 574 =head2 marc_duplicate
525 dpavlin 562
526 dpavlin 574 Generate copy of current MARC record and continue working on copy
527    
528     marc_duplicate();
529    
530     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
531     C<< _get_marc_fields( offset => 42 ) >>.
532    
533     =cut
534    
535     sub marc_duplicate {
536     my $m = $marc_record->[ -1 ];
537     die "can't duplicate record which isn't defined" unless ($m);
538     push @{ $marc_record }, dclone( $m );
539     warn "## marc_duplicate = ", dump(@$marc_record), $/ if ($debug > 1);
540     $marc_record_offset = $#{ $marc_record };
541     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
542     }
543    
544     =head2 marc_remove
545    
546     Remove some field or subfield from MARC record.
547    
548     marc_remove('200');
549     marc_remove('200','a');
550    
551     This will erase field C<200> or C<200^a> from current MARC record.
552    
553     This is useful after calling C<marc_duplicate> or on it's own (but, you
554     should probably just remove that subfield definition if you are not
555     using C<marc_duplicate>).
556    
557     FIXME: support fields < 10.
558    
559     =cut
560    
561     sub marc_remove {
562     my ($f, $sf) = @_;
563    
564     die "marc_remove needs record number" unless defined($f);
565    
566     my $marc = $marc_record->[ $marc_record_offset ];
567    
568     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
569    
570 dpavlin 579 my $i = 0;
571     foreach ( 0 .. $#{ $marc } ) {
572 dpavlin 574 last unless (defined $marc->[$i]);
573     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
574     if ($marc->[$i]->[0] eq $f) {
575     if (! defined $sf) {
576     # remove whole field
577     splice @$marc, $i, 1;
578     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
579     $i--;
580     } else {
581     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
582     my $o = ($j * 2) + 3;
583     if ($marc->[$i]->[$o] eq $sf) {
584     # remove subfield
585     splice @{$marc->[$i]}, $o, 2;
586     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
587     # is record now empty?
588     if ($#{ $marc->[$i] } == 2) {
589     splice @$marc, $i, 1;
590     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
591     $i--;
592     };
593     }
594     }
595     }
596     }
597 dpavlin 579 $i++;
598 dpavlin 574 }
599    
600     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
601    
602     $marc_record->[ $marc_record_offset ] = $marc;
603    
604     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
605     }
606    
607 dpavlin 540 =head1 Functions to extract data from input
608    
609     This function should be used inside functions to create C<data_structure> described
610     above.
611    
612 dpavlin 536 =head2 rec1
613 dpavlin 371
614 dpavlin 536 Return all values in some field
615 dpavlin 371
616 dpavlin 536 @v = rec1('200')
617 dpavlin 15
618 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
619 dpavlin 15
620 dpavlin 536 =cut
621 dpavlin 15
622 dpavlin 536 sub rec1 {
623     my $f = shift;
624 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
625 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
626 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
627 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
628     return map {
629     if (ref($_) eq 'HASH') {
630     values %{$_};
631 dpavlin 31 } else {
632 dpavlin 536 $_;
633 dpavlin 31 }
634 dpavlin 536 } @{ $rec->{$f} };
635     } elsif( defined($rec->{$f}) ) {
636     return $rec->{$f};
637 dpavlin 15 }
638     }
639    
640 dpavlin 536 =head2 rec2
641 dpavlin 15
642 dpavlin 536 Return all values in specific field and subfield
643 dpavlin 13
644 dpavlin 536 @v = rec2('200','a')
645 dpavlin 13
646     =cut
647    
648 dpavlin 536 sub rec2 {
649     my $f = shift;
650     return unless (defined($rec && $rec->{$f}));
651     my $sf = shift;
652 dpavlin 589 return map {
653     if (ref($_->{$sf}) eq 'ARRAY') {
654     @{ $_->{$sf} };
655     } else {
656     $_->{$sf};
657     }
658     } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
659 dpavlin 536 }
660 dpavlin 13
661 dpavlin 536 =head2 rec
662 dpavlin 13
663 dpavlin 536 syntaxtic sugar for
664 dpavlin 13
665 dpavlin 536 @v = rec('200')
666     @v = rec('200','a')
667 dpavlin 13
668 dpavlin 536 =cut
669 dpavlin 373
670 dpavlin 536 sub rec {
671 dpavlin 583 my @out;
672 dpavlin 536 if ($#_ == 0) {
673 dpavlin 583 @out = rec1(@_);
674 dpavlin 536 } elsif ($#_ == 1) {
675 dpavlin 583 @out = rec2(@_);
676 dpavlin 13 }
677 dpavlin 583 if (@out) {
678     return @out;
679     } else {
680     return '';
681     }
682 dpavlin 13 }
683    
684 dpavlin 536 =head2 regex
685 dpavlin 15
686 dpavlin 536 Apply regex to some or all values
687 dpavlin 15
688 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
689 dpavlin 15
690     =cut
691    
692 dpavlin 536 sub regex {
693     my $r = shift;
694     my @out;
695 dpavlin 550 #warn "r: $r\n", dump(\@_);
696 dpavlin 536 foreach my $t (@_) {
697     next unless ($t);
698     eval "\$t =~ $r";
699     push @out, $t if ($t && $t ne '');
700 dpavlin 368 }
701 dpavlin 536 return @out;
702 dpavlin 15 }
703    
704 dpavlin 536 =head2 prefix
705 dpavlin 15
706 dpavlin 536 Prefix all values with a string
707 dpavlin 15
708 dpavlin 536 @v = prefix( 'my_', @v );
709 dpavlin 15
710     =cut
711    
712 dpavlin 536 sub prefix {
713     my $p = shift or die "prefix needs string as first argument";
714     return map { $p . $_ } grep { defined($_) } @_;
715     }
716 dpavlin 15
717 dpavlin 536 =head2 suffix
718 dpavlin 15
719 dpavlin 536 suffix all values with a string
720 dpavlin 15
721 dpavlin 536 @v = suffix( '_my', @v );
722 dpavlin 15
723 dpavlin 536 =cut
724 dpavlin 15
725 dpavlin 536 sub suffix {
726     my $s = shift or die "suffix needs string as first argument";
727     return map { $_ . $s } grep { defined($_) } @_;
728 dpavlin 15 }
729    
730 dpavlin 536 =head2 surround
731 dpavlin 13
732 dpavlin 536 surround all values with a two strings
733 dpavlin 13
734 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
735 dpavlin 13
736     =cut
737    
738 dpavlin 536 sub surround {
739     my $p = shift or die "surround need prefix as first argument";
740     my $s = shift or die "surround needs suffix as second argument";
741     return map { $p . $_ . $s } grep { defined($_) } @_;
742 dpavlin 13 }
743    
744 dpavlin 536 =head2 first
745 dpavlin 13
746 dpavlin 536 Return first element
747 dpavlin 15
748 dpavlin 536 $v = first( @v );
749 dpavlin 13
750     =cut
751    
752 dpavlin 536 sub first {
753     my $r = shift;
754     return $r;
755 dpavlin 13 }
756    
757 dpavlin 536 =head2 lookup
758 dpavlin 13
759 dpavlin 536 Consult lookup hashes for some value
760 dpavlin 13
761 dpavlin 536 @v = lookup( $v );
762     @v = lookup( @v );
763 dpavlin 13
764     =cut
765    
766 dpavlin 536 sub lookup {
767     my $k = shift or return;
768     return unless (defined($lookup->{$k}));
769     if (ref($lookup->{$k}) eq 'ARRAY') {
770     return @{ $lookup->{$k} };
771     } else {
772     return $lookup->{$k};
773     }
774 dpavlin 13 }
775    
776 dpavlin 536 =head2 join_with
777 dpavlin 13
778 dpavlin 536 Joins walues with some delimiter
779 dpavlin 10
780 dpavlin 536 $v = join_with(", ", @v);
781 dpavlin 10
782 dpavlin 536 =cut
783 dpavlin 10
784 dpavlin 536 sub join_with {
785     my $d = shift;
786 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
787 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
788     return '' unless defined($v);
789     return $v;
790 dpavlin 536 }
791 dpavlin 10
792 dpavlin 562 =head2 split_rec_on
793    
794     Split record subfield on some regex and take one of parts out
795    
796     $a_before_semi_column =
797     split_rec_on('200','a', /\s*;\s*/, $part);
798    
799     C<$part> is optional number of element. First element is
800     B<1>, not 0!
801    
802     If there is no C<$part> parameter or C<$part> is 0, this function will
803     return all values produced by splitting.
804    
805     =cut
806    
807     sub split_rec_on {
808     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
809    
810     my ($fld, $sf, $regex, $part) = @_;
811 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
812 dpavlin 562
813     my @r = rec( $fld, $sf );
814     my $v = shift @r;
815 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
816 dpavlin 562
817 dpavlin 566 return '' if( ! defined($v) || $v =~ /^\s*$/);
818    
819 dpavlin 562 my @s = split( $regex, $v );
820 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
821 dpavlin 566 if ($part && $part > 0) {
822 dpavlin 562 return $s[ $part - 1 ];
823     } else {
824 dpavlin 571 return @s;
825 dpavlin 562 }
826     }
827    
828 dpavlin 536 # END
829     1;

  ViewVC Help
Powered by ViewVC 1.1.26