/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1011 - (hide annotations)
Tue Nov 6 20:26:31 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 30884 byte(s)
added no warnings 'redefine'; so we can define subs in normalization perl
without annoying warnings.

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 923 search_display search display sorted
11 dpavlin 912
12 dpavlin 547 marc marc_indicators marc_repeatable_subfield
13 dpavlin 815 marc_compose marc_leader marc_fixed
14 dpavlin 813 marc_duplicate marc_remove marc_count
15 dpavlin 604 marc_original_order
16 dpavlin 540
17 dpavlin 536 rec1 rec2 rec
18 dpavlin 979 frec
19 dpavlin 536 regex prefix suffix surround
20     first lookup join_with
21 dpavlin 707 save_into_lookup
22 dpavlin 562
23     split_rec_on
24 dpavlin 785
25     get set
26 dpavlin 791 count
27 dpavlin 980
28 dpavlin 536 /;
29 dpavlin 10
30     use warnings;
31     use strict;
32 dpavlin 536
33     #use base qw/WebPAC::Common/;
34 dpavlin 550 use Data::Dump qw/dump/;
35 dpavlin 574 use Storable qw/dclone/;
36 dpavlin 725 use Carp qw/confess/;
37 dpavlin 10
38 dpavlin 550 # debugging warn(s)
39     my $debug = 0;
40    
41 dpavlin 980 use WebPAC::Normalize::ISBN;
42     push @EXPORT, ( 'isbn_10', 'isbn_13' );
43 dpavlin 550
44 dpavlin 10 =head1 NAME
45    
46 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
47 dpavlin 10
48     =cut
49    
50 dpavlin 983 our $VERSION = '0.32';
51 dpavlin 10
52     =head1 SYNOPSIS
53    
54 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
55     from input records using perl functions which are specialized for set
56     processing.
57 dpavlin 10
58 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
59     means that you check it's validity before running WebPAC using
60     C<perl -c normalize.pl>.
61 dpavlin 15
62 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
63 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
64 dpavlin 547 C<marc>.
65 dpavlin 15
66 dpavlin 10 =head1 FUNCTIONS
67    
68 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
69     All other functions are available for use within normalisation rules.
70    
71 dpavlin 536 =head2 data_structure
72 dpavlin 10
73 dpavlin 536 Return data structure
74 dpavlin 13
75 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
76 dpavlin 725 lookup => $lookup_hash,
77 dpavlin 536 row => $row,
78     rules => $normalize_pl_config,
79 dpavlin 541 marc_encoding => 'utf-8',
80 dpavlin 595 config => $config,
81 dpavlin 736 load_row_coderef => sub {
82 dpavlin 979 my ($database,$input,$mfn) = @_;
83 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
84 dpavlin 725 },
85 dpavlin 13 );
86    
87 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
88 dpavlin 540 other are optional.
89    
90 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
91 dpavlin 725 die if it's not defined.
92    
93 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
94 dpavlin 15
95 dpavlin 538 Since this function isn't exported you have to call it with
96     C<WebPAC::Normalize::data_structure>.
97    
98 dpavlin 10 =cut
99    
100 dpavlin 736 my $load_row_coderef;
101 dpavlin 725
102 dpavlin 536 sub data_structure {
103     my $arg = {@_};
104 dpavlin 13
105 dpavlin 536 die "need row argument" unless ($arg->{row});
106     die "need normalisation argument" unless ($arg->{rules});
107 dpavlin 31
108 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
109 dpavlin 983 _set_ds( $arg->{row} );
110 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
111 dpavlin 541 _clean_ds( %{ $arg } );
112 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
113 dpavlin 725
114 dpavlin 1011 no strict 'subs';
115     no warnings 'redefine';
116     eval "$arg->{rules};";
117 dpavlin 536 die "error evaling $arg->{rules}: $@\n" if ($@);
118 dpavlin 540
119 dpavlin 538 return _get_ds();
120 dpavlin 10 }
121    
122 dpavlin 983 =head2 _set_ds
123 dpavlin 13
124 dpavlin 536 Set current record hash
125 dpavlin 433
126 dpavlin 983 _set_ds( $rec );
127 dpavlin 433
128     =cut
129    
130 dpavlin 536 my $rec;
131 dpavlin 433
132 dpavlin 983 sub _set_ds {
133 dpavlin 536 $rec = shift or die "no record hash";
134 dpavlin 433 }
135    
136 dpavlin 595 =head2 _set_config
137    
138     Set current config hash
139    
140     _set_config( $config );
141    
142     Magic keys are:
143    
144     =over 4
145    
146     =item _
147    
148     Code of current database
149    
150     =item _mfn
151    
152     Current MFN
153    
154     =back
155    
156     =cut
157    
158     my $config;
159    
160     sub _set_config {
161     $config = shift;
162     }
163    
164 dpavlin 538 =head2 _get_ds
165    
166     Return hash formatted as data structure
167    
168     my $ds = _get_ds();
169    
170     =cut
171    
172 dpavlin 812 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
173 dpavlin 574 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
174 dpavlin 538
175     sub _get_ds {
176 dpavlin 982 #warn "## out = ",dump($out);
177 dpavlin 538 return $out;
178     }
179    
180     =head2 _clean_ds
181    
182     Clean data structure hash for next record
183    
184     _clean_ds();
185    
186     =cut
187    
188     sub _clean_ds {
189 dpavlin 541 my $a = {@_};
190 dpavlin 812 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
191 dpavlin 574 ($marc_record_offset, $marc_fetch_offset) = (0,0);
192 dpavlin 541 $marc_encoding = $a->{marc_encoding};
193 dpavlin 538 }
194    
195     =head2 _set_lookup
196    
197     Set current lookup hash
198    
199     _set_lookup( $lookup );
200    
201     =cut
202    
203     my $lookup;
204    
205     sub _set_lookup {
206     $lookup = shift;
207     }
208    
209 dpavlin 707 =head2 _get_lookup
210    
211     Get current lookup hash
212    
213     my $lookup = _get_lookup();
214    
215     =cut
216    
217     sub _get_lookup {
218     return $lookup;
219     }
220    
221 dpavlin 736 =head2 _set_load_row
222 dpavlin 725
223     Setup code reference which will return L<data_structure> from
224     L<WebPAC::Store>
225    
226 dpavlin 736 _set_load_row(sub {
227 dpavlin 725 my ($database,$input,$mfn) = @_;
228 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
229 dpavlin 725 });
230    
231     =cut
232    
233 dpavlin 736 sub _set_load_row {
234 dpavlin 725 my $coderef = shift;
235     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
236    
237 dpavlin 736 $load_row_coderef = $coderef;
238 dpavlin 725 }
239    
240 dpavlin 547 =head2 _get_marc_fields
241 dpavlin 540
242 dpavlin 547 Get all fields defined by calls to C<marc>
243 dpavlin 540
244 dpavlin 547 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
245 dpavlin 540
246 dpavlin 543 We are using I<magic> which detect repeatable fields only from
247     sequence of field/subfield data generated by normalization.
248    
249 dpavlin 554 Repeatable field is created when there is second occurence of same subfield or
250     if any of indicators are different.
251 dpavlin 543
252 dpavlin 554 This is sane for most cases. Something like:
253 dpavlin 543
254 dpavlin 554 900a-1 900b-1 900c-1
255     900a-2 900b-2
256     900a-3
257    
258     will be created from any combination of:
259    
260     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
261    
262     and following rules:
263    
264     marc('900','a', rec('200','a') );
265     marc('900','b', rec('200','b') );
266     marc('900','c', rec('200','c') );
267    
268     which might not be what you have in mind. If you need repeatable subfield,
269     define it using C<marc_repeatable_subfield> like this:
270    
271 dpavlin 574 marc_repeatable_subfield('900','a');
272     marc('900','a', rec('200','a') );
273     marc('900','b', rec('200','b') );
274     marc('900','c', rec('200','c') );
275 dpavlin 554
276 dpavlin 574 will create:
277    
278     900a-1 900a-2 900a-3 900b-1 900c-1
279     900b-2
280    
281     There is also support for returning next or specific using:
282    
283     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
284     # do something with $mf
285     }
286    
287     will always return fields from next MARC record or
288    
289     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
290    
291     will return 42th copy record (if it exists).
292    
293 dpavlin 540 =cut
294    
295 dpavlin 813 my $fetch_pos;
296    
297 dpavlin 547 sub _get_marc_fields {
298 dpavlin 550
299 dpavlin 574 my $arg = {@_};
300     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
301 dpavlin 813 $fetch_pos = $marc_fetch_offset;
302 dpavlin 574 if ($arg->{offset}) {
303 dpavlin 813 $fetch_pos = $arg->{offset};
304 dpavlin 574 } elsif($arg->{fetch_next}) {
305     $marc_fetch_offset++;
306     }
307 dpavlin 550
308 dpavlin 574 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
309    
310     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
311    
312 dpavlin 813 my $marc_rec = $marc_record->[ $fetch_pos ];
313 dpavlin 574
314 dpavlin 813 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
315 dpavlin 574
316     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
317    
318 dpavlin 550 # first, sort all existing fields
319     # XXX might not be needed, but modern perl might randomize elements in hash
320     my @sorted_marc_record = sort {
321 dpavlin 572 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
322 dpavlin 574 } @{ $marc_rec };
323 dpavlin 550
324 dpavlin 574 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
325 dpavlin 562
326 dpavlin 550 # output marc fields
327 dpavlin 542 my @m;
328 dpavlin 550
329     # count unique field-subfields (used for offset when walking to next subfield)
330     my $u;
331 dpavlin 572 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
332 dpavlin 550
333     if ($debug) {
334 dpavlin 574 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
335 dpavlin 813 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
336 dpavlin 574 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
337     warn "## subfield count = ", dump( $u ), $/;
338 dpavlin 550 }
339    
340     my $len = $#sorted_marc_record;
341     my $visited;
342     my $i = 0;
343     my $field;
344    
345     foreach ( 0 .. $len ) {
346    
347     # find next element which isn't visited
348     while ($visited->{$i}) {
349     $i = ($i + 1) % ($len + 1);
350 dpavlin 542 }
351    
352 dpavlin 550 # mark it visited
353     $visited->{$i}++;
354    
355 dpavlin 574 my $row = dclone( $sorted_marc_record[$i] );
356 dpavlin 550
357     # field and subfield which is key for
358     # marc_repeatable_subfield and u
359 dpavlin 572 my $fsf = $row->[0] . ( $row->[3] || '' );
360 dpavlin 550
361     if ($debug > 1) {
362    
363     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
364     print "### this [$i]: ", dump( $row ),$/;
365     print "### sf: ", $row->[3], " vs ", $field->[3],
366     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
367     if ($#$field >= 0);
368    
369     }
370    
371     # if field exists
372     if ( $#$field >= 0 ) {
373     if (
374     $row->[0] ne $field->[0] || # field
375     $row->[1] ne $field->[1] || # i1
376     $row->[2] ne $field->[2] # i2
377     ) {
378     push @m, $field;
379     warn "## saved/1 ", dump( $field ),$/ if ($debug);
380     $field = $row;
381    
382     } elsif (
383     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
384     ||
385     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
386     ! $marc_repeatable_subfield->{ $fsf }
387     )
388     ) {
389     push @m, $field;
390     warn "## saved/2 ", dump( $field ),$/ if ($debug);
391     $field = $row;
392    
393     } else {
394     # append new subfields to existing field
395     push @$field, ( $row->[3], $row->[4] );
396     }
397     } else {
398     # insert first field
399     $field = $row;
400     }
401    
402     if (! $marc_repeatable_subfield->{ $fsf }) {
403     # make step to next subfield
404     $i = ($i + $u->{ $fsf } ) % ($len + 1);
405     }
406 dpavlin 542 }
407    
408 dpavlin 550 if ($#$field >= 0) {
409     push @m, $field;
410     warn "## saved/3 ", dump( $field ),$/ if ($debug);
411     }
412 dpavlin 542
413 dpavlin 579 return \@m;
414 dpavlin 540 }
415    
416 dpavlin 813 =head2 _get_marc_leader
417    
418     Return leader from currently fetched record by L</_get_marc_fields>
419    
420     print WebPAC::Normalize::_get_marc_leader();
421    
422     =cut
423    
424     sub _get_marc_leader {
425     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
426     return $marc_leader->[ $fetch_pos ];
427     }
428    
429 dpavlin 554 =head2 _debug
430    
431     Change level of debug warnings
432    
433     _debug( 2 );
434    
435     =cut
436    
437     sub _debug {
438     my $l = shift;
439     return $debug unless defined($l);
440 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
441 dpavlin 554 $debug = $l;
442     }
443    
444 dpavlin 540 =head1 Functions to create C<data_structure>
445    
446     Those functions generally have to first in your normalization file.
447    
448 dpavlin 912 =head2 search_display
449 dpavlin 433
450 dpavlin 912 Define output for L<search> and L<display> at the same time
451 dpavlin 433
452 dpavlin 912 search_display('Title', rec('200','a') );
453 dpavlin 13
454    
455     =cut
456    
457 dpavlin 912 sub search_display {
458     my $name = shift or die "search_display needs name as first argument";
459 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
460     return unless (@o);
461     $out->{$name}->{search} = \@o;
462     $out->{$name}->{display} = \@o;
463     }
464 dpavlin 13
465 dpavlin 915 =head2 tag
466    
467     Old name for L<search_display>, but supported
468    
469     =cut
470    
471     sub tag {
472     search_display( @_ );
473     }
474    
475 dpavlin 536 =head2 display
476 dpavlin 13
477 dpavlin 912 Define output just for I<display>
478 dpavlin 125
479 dpavlin 536 @v = display('Title', rec('200','a') );
480 dpavlin 125
481 dpavlin 536 =cut
482 dpavlin 125
483 dpavlin 923 sub _field {
484     my $type = shift or confess "need type -- BUG?";
485     my $name = shift or confess "needs name as first argument";
486 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
487     return unless (@o);
488 dpavlin 923 $out->{$name}->{$type} = \@o;
489 dpavlin 536 }
490 dpavlin 13
491 dpavlin 923 sub display { _field( 'display', @_ ) }
492    
493 dpavlin 536 =head2 search
494 dpavlin 13
495 dpavlin 536 Prepare values just for I<search>
496 dpavlin 13
497 dpavlin 536 @v = search('Title', rec('200','a') );
498 dpavlin 433
499 dpavlin 536 =cut
500 dpavlin 13
501 dpavlin 923 sub search { _field( 'search', @_ ) }
502 dpavlin 13
503 dpavlin 923 =head2 sorted
504    
505     Insert into lists which will be automatically sorted
506    
507     sorted('Title', rec('200','a') );
508    
509     =cut
510    
511     sub sorted { _field( 'sorted', @_ ) }
512    
513    
514 dpavlin 564 =head2 marc_leader
515    
516     Setup fields within MARC leader or get leader
517    
518     marc_leader('05','c');
519     my $leader = marc_leader();
520    
521     =cut
522    
523     sub marc_leader {
524     my ($offset,$value) = @_;
525    
526     if ($offset) {
527 dpavlin 813 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
528 dpavlin 564 } else {
529 dpavlin 813
530     if (defined($marc_leader)) {
531     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
532     return $marc_leader->[ $marc_record_offset ];
533     } else {
534     return;
535     }
536 dpavlin 564 }
537     }
538    
539 dpavlin 815 =head2 marc_fixed
540    
541     Create control/indentifier fields with values in fixed positions
542    
543     marc_fixed('008', 00, '070402');
544     marc_fixed('008', 39, '|');
545    
546     Positions not specified will be filled with spaces (C<0x20>).
547    
548     There will be no effort to extend last specified value to full length of
549     field in standard.
550    
551     =cut
552    
553     sub marc_fixed {
554     my ($f, $pos, $val) = @_;
555     die "need marc(field, position, value)" unless defined($f) && defined($pos);
556    
557 dpavlin 889 confess "need val" unless defined $val;
558    
559 dpavlin 815 my $update = 0;
560    
561     map {
562     if ($_->[0] eq $f) {
563     my $old = $_->[1];
564 dpavlin 889 if (length($old) <= $pos) {
565 dpavlin 815 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
566 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
567 dpavlin 815 } else {
568     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
569 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
570 dpavlin 815 }
571     $update++;
572     }
573     } @{ $marc_record->[ $marc_record_offset ] };
574    
575     if (! $update) {
576     my $v = ' ' x $pos . $val;
577     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
578 dpavlin 817 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
579 dpavlin 815 }
580     }
581    
582 dpavlin 547 =head2 marc
583 dpavlin 540
584     Save value for MARC field
585    
586 dpavlin 547 marc('900','a', rec('200','a') );
587 dpavlin 571 marc('001', rec('000') );
588 dpavlin 540
589     =cut
590    
591 dpavlin 547 sub marc {
592     my $f = shift or die "marc needs field";
593     die "marc field must be numer" unless ($f =~ /^\d+$/);
594 dpavlin 540
595 dpavlin 571 my $sf;
596     if ($f >= 10) {
597     $sf = shift or die "marc needs subfield";
598     }
599 dpavlin 540
600 dpavlin 541 foreach (@_) {
601     my $v = $_; # make var read-write for Encode
602 dpavlin 543 next unless (defined($v) && $v !~ /^\s*$/);
603 dpavlin 548 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
604 dpavlin 571 if (defined $sf) {
605 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
606 dpavlin 571 } else {
607 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
608 dpavlin 571 }
609 dpavlin 540 }
610     }
611    
612 dpavlin 547 =head2 marc_repeatable_subfield
613    
614     Save values for MARC repetable subfield
615    
616     marc_repeatable_subfield('910', 'z', rec('909') );
617    
618     =cut
619    
620     sub marc_repeatable_subfield {
621 dpavlin 550 my ($f,$sf) = @_;
622     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
623     $marc_repeatable_subfield->{ $f . $sf }++;
624 dpavlin 547 marc(@_);
625     }
626    
627     =head2 marc_indicators
628    
629     Set both indicators for MARC field
630    
631     marc_indicators('900', ' ', 1);
632    
633     Any indicator value other than C<0-9> will be treated as undefined.
634    
635     =cut
636    
637     sub marc_indicators {
638     my $f = shift || die "marc_indicators need field!\n";
639     my ($i1,$i2) = @_;
640     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
641     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
642    
643     $i1 = ' ' if ($i1 !~ /^\d$/);
644     $i2 = ' ' if ($i2 !~ /^\d$/);
645 dpavlin 548 @{ $marc_indicators->{$f} } = ($i1,$i2);
646 dpavlin 547 }
647    
648 dpavlin 562 =head2 marc_compose
649 dpavlin 547
650 dpavlin 562 Save values for each MARC subfield explicitly
651    
652     marc_compose('900',
653     'a', rec('200','a')
654     'b', rec('201','a')
655     'a', rec('200','b')
656     'c', rec('200','c')
657     );
658    
659 dpavlin 603 If you specify C<+> for subfield, value will be appended
660     to previous defined subfield.
661    
662 dpavlin 562 =cut
663    
664     sub marc_compose {
665     my $f = shift or die "marc_compose needs field";
666     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
667    
668     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
669     my $m = [ $f, $i1, $i2 ];
670    
671 dpavlin 583 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
672    
673 dpavlin 619 if ($#_ % 2 != 1) {
674     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
675     }
676    
677 dpavlin 562 while (@_) {
678 dpavlin 619 my $sf = shift;
679 dpavlin 565 my $v = shift;
680 dpavlin 562
681     next unless (defined($v) && $v !~ /^\s*$/);
682 dpavlin 568 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
683 dpavlin 603 if ($sf ne '+') {
684     push @$m, ( $sf, $v );
685     } else {
686     $m->[ $#$m ] .= $v;
687     }
688 dpavlin 562 }
689    
690 dpavlin 586 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
691 dpavlin 562
692 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
693 dpavlin 562 }
694    
695 dpavlin 574 =head2 marc_duplicate
696 dpavlin 562
697 dpavlin 574 Generate copy of current MARC record and continue working on copy
698    
699     marc_duplicate();
700    
701     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
702     C<< _get_marc_fields( offset => 42 ) >>.
703    
704     =cut
705    
706     sub marc_duplicate {
707     my $m = $marc_record->[ -1 ];
708     die "can't duplicate record which isn't defined" unless ($m);
709     push @{ $marc_record }, dclone( $m );
710 dpavlin 813 push @{ $marc_leader }, dclone( marc_leader() );
711     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
712 dpavlin 574 $marc_record_offset = $#{ $marc_record };
713     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
714 dpavlin 813
715 dpavlin 574 }
716    
717     =head2 marc_remove
718    
719     Remove some field or subfield from MARC record.
720    
721     marc_remove('200');
722     marc_remove('200','a');
723    
724     This will erase field C<200> or C<200^a> from current MARC record.
725    
726 dpavlin 786 marc_remove('*');
727    
728     Will remove all fields in current MARC record.
729    
730 dpavlin 574 This is useful after calling C<marc_duplicate> or on it's own (but, you
731     should probably just remove that subfield definition if you are not
732     using C<marc_duplicate>).
733    
734     FIXME: support fields < 10.
735    
736     =cut
737    
738     sub marc_remove {
739     my ($f, $sf) = @_;
740    
741     die "marc_remove needs record number" unless defined($f);
742    
743     my $marc = $marc_record->[ $marc_record_offset ];
744    
745     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
746    
747 dpavlin 786 if ($f eq '*') {
748    
749     delete( $marc_record->[ $marc_record_offset ] );
750 dpavlin 788 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
751 dpavlin 786
752     } else {
753    
754     my $i = 0;
755     foreach ( 0 .. $#{ $marc } ) {
756     last unless (defined $marc->[$i]);
757     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
758     if ($marc->[$i]->[0] eq $f) {
759     if (! defined $sf) {
760     # remove whole field
761     splice @$marc, $i, 1;
762     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
763     $i--;
764     } else {
765     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
766     my $o = ($j * 2) + 3;
767     if ($marc->[$i]->[$o] eq $sf) {
768     # remove subfield
769     splice @{$marc->[$i]}, $o, 2;
770     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
771     # is record now empty?
772     if ($#{ $marc->[$i] } == 2) {
773     splice @$marc, $i, 1;
774     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
775     $i--;
776     };
777     }
778 dpavlin 574 }
779     }
780     }
781 dpavlin 786 $i++;
782 dpavlin 574 }
783 dpavlin 786
784     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
785    
786     $marc_record->[ $marc_record_offset ] = $marc;
787 dpavlin 574 }
788    
789     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
790     }
791    
792 dpavlin 604 =head2 marc_original_order
793    
794     Copy all subfields preserving original order to marc field.
795    
796 dpavlin 616 marc_original_order( marc_field_number, original_input_field_number );
797 dpavlin 604
798 dpavlin 616 Please note that field numbers are consistent with other commands (marc
799     field number first), but somewhat counter-intuitive (destination and then
800     source).
801    
802 dpavlin 604 You might want to use this command if you are just renaming subfields or
803     using pre-processing modify_record in C<config.yml> and don't need any
804     post-processing or want to preserve order of original subfields.
805    
806 dpavlin 616
807 dpavlin 604 =cut
808    
809     sub marc_original_order {
810    
811 dpavlin 616 my ($to, $from) = @_;
812 dpavlin 604 die "marc_original_order needs from and to fields\n" unless ($from && $to);
813    
814 dpavlin 616 return unless defined($rec->{$from});
815    
816     my $r = $rec->{$from};
817 dpavlin 604 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
818    
819     my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
820 dpavlin 616 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
821 dpavlin 604
822     foreach my $d (@$r) {
823    
824 dpavlin 605 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
825 dpavlin 616 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
826 dpavlin 605 next;
827     }
828    
829 dpavlin 604 my @sfs = @{ $d->{subfields} };
830    
831     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
832    
833 dpavlin 618 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
834 dpavlin 604
835     my $m = [ $to, $i1, $i2 ];
836    
837     while (my $sf = shift @sfs) {
838 dpavlin 618
839     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
840 dpavlin 604 my $offset = shift @sfs;
841     die "corrupted sufields specification for field $from\n" unless defined($offset);
842    
843     my $v;
844     if (ref($d->{$sf}) eq 'ARRAY') {
845     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
846     } elsif ($offset == 0) {
847     $v = $d->{$sf};
848     } else {
849     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
850     }
851     push @$m, ( $sf, $v ) if (defined($v));
852     }
853    
854     if ($#{$m} > 2) {
855     push @{ $marc_record->[ $marc_record_offset ] }, $m;
856     }
857     }
858    
859     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
860     }
861    
862 dpavlin 813 =head2 marc_count
863 dpavlin 604
864 dpavlin 813 Return number of MARC records created using L</marc_duplicate>.
865    
866     print "created ", marc_count(), " records";
867    
868     =cut
869    
870     sub marc_count {
871     return $#{ $marc_record };
872     }
873    
874    
875 dpavlin 540 =head1 Functions to extract data from input
876    
877     This function should be used inside functions to create C<data_structure> described
878     above.
879    
880 dpavlin 641 =head2 _pack_subfields_hash
881    
882 dpavlin 669 @subfields = _pack_subfields_hash( $h );
883     $subfields = _pack_subfields_hash( $h, 1 );
884 dpavlin 641
885 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
886     with subfields (denoted by C<^>) and values.
887    
888 dpavlin 641 =cut
889    
890     sub _pack_subfields_hash {
891    
892 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
893 dpavlin 641
894     my ($h,$include_subfields) = @_;
895    
896 dpavlin 831 # sanity and ease of use
897     return $h if (ref($h) ne 'HASH');
898    
899 dpavlin 641 if ( defined($h->{subfields}) ) {
900     my $sfs = delete $h->{subfields} || die "no subfields?";
901     my @out;
902     while (@$sfs) {
903     my $sf = shift @$sfs;
904     push @out, '^' . $sf if ($include_subfields);
905     my $o = shift @$sfs;
906     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
907     # single element subfields are not arrays
908 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
909 dpavlin 667
910 dpavlin 641 push @out, $h->{$sf};
911     } else {
912 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
913 dpavlin 641 push @out, $h->{$sf}->[$o];
914     }
915     }
916 dpavlin 667 if ($include_subfields) {
917     return join('', @out);
918     } else {
919     return @out;
920     }
921 dpavlin 641 } else {
922 dpavlin 667 if ($include_subfields) {
923     my $out = '';
924 dpavlin 668 foreach my $sf (sort keys %$h) {
925 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
926     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
927     } else {
928     $out .= '^' . $sf . $h->{$sf};
929     }
930     }
931     return $out;
932     } else {
933     # FIXME this should probably be in alphabetical order instead of hash order
934     values %{$h};
935     }
936 dpavlin 641 }
937     }
938    
939 dpavlin 536 =head2 rec1
940 dpavlin 371
941 dpavlin 536 Return all values in some field
942 dpavlin 371
943 dpavlin 536 @v = rec1('200')
944 dpavlin 15
945 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
946 dpavlin 15
947 dpavlin 536 =cut
948 dpavlin 15
949 dpavlin 536 sub rec1 {
950     my $f = shift;
951 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
952 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
953 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
954 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
955 dpavlin 641 my @out;
956     foreach my $h ( @{ $rec->{$f} } ) {
957     if (ref($h) eq 'HASH') {
958     push @out, ( _pack_subfields_hash( $h ) );
959 dpavlin 31 } else {
960 dpavlin 641 push @out, $h;
961 dpavlin 31 }
962 dpavlin 641 }
963     return @out;
964 dpavlin 536 } elsif( defined($rec->{$f}) ) {
965     return $rec->{$f};
966 dpavlin 15 }
967     }
968    
969 dpavlin 536 =head2 rec2
970 dpavlin 15
971 dpavlin 536 Return all values in specific field and subfield
972 dpavlin 13
973 dpavlin 536 @v = rec2('200','a')
974 dpavlin 13
975     =cut
976    
977 dpavlin 536 sub rec2 {
978     my $f = shift;
979     return unless (defined($rec && $rec->{$f}));
980     my $sf = shift;
981 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
982 dpavlin 589 return map {
983     if (ref($_->{$sf}) eq 'ARRAY') {
984     @{ $_->{$sf} };
985     } else {
986     $_->{$sf};
987     }
988     } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
989 dpavlin 536 }
990 dpavlin 13
991 dpavlin 536 =head2 rec
992 dpavlin 13
993 dpavlin 536 syntaxtic sugar for
994 dpavlin 13
995 dpavlin 536 @v = rec('200')
996     @v = rec('200','a')
997 dpavlin 13
998 dpavlin 750 If rec() returns just single value, it will
999     return scalar, not array.
1000    
1001 dpavlin 536 =cut
1002 dpavlin 373
1003 dpavlin 979 sub frec {
1004     my @out = rec(@_);
1005     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
1006     return shift @out;
1007     }
1008    
1009 dpavlin 536 sub rec {
1010 dpavlin 583 my @out;
1011 dpavlin 536 if ($#_ == 0) {
1012 dpavlin 583 @out = rec1(@_);
1013 dpavlin 536 } elsif ($#_ == 1) {
1014 dpavlin 583 @out = rec2(@_);
1015 dpavlin 13 }
1016 dpavlin 750 if ($#out == 0 && ! wantarray) {
1017     return $out[0];
1018     } elsif (@out) {
1019 dpavlin 583 return @out;
1020     } else {
1021     return '';
1022     }
1023 dpavlin 13 }
1024    
1025 dpavlin 536 =head2 regex
1026 dpavlin 15
1027 dpavlin 536 Apply regex to some or all values
1028 dpavlin 15
1029 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
1030 dpavlin 15
1031     =cut
1032    
1033 dpavlin 536 sub regex {
1034     my $r = shift;
1035     my @out;
1036 dpavlin 550 #warn "r: $r\n", dump(\@_);
1037 dpavlin 536 foreach my $t (@_) {
1038     next unless ($t);
1039     eval "\$t =~ $r";
1040     push @out, $t if ($t && $t ne '');
1041 dpavlin 368 }
1042 dpavlin 536 return @out;
1043 dpavlin 15 }
1044    
1045 dpavlin 536 =head2 prefix
1046 dpavlin 15
1047 dpavlin 536 Prefix all values with a string
1048 dpavlin 15
1049 dpavlin 536 @v = prefix( 'my_', @v );
1050 dpavlin 15
1051     =cut
1052    
1053 dpavlin 536 sub prefix {
1054 dpavlin 819 my $p = shift;
1055     return @_ unless defined( $p );
1056 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
1057     }
1058 dpavlin 15
1059 dpavlin 536 =head2 suffix
1060 dpavlin 15
1061 dpavlin 536 suffix all values with a string
1062 dpavlin 15
1063 dpavlin 536 @v = suffix( '_my', @v );
1064 dpavlin 15
1065 dpavlin 536 =cut
1066 dpavlin 15
1067 dpavlin 536 sub suffix {
1068 dpavlin 819 my $s = shift;
1069     return @_ unless defined( $s );
1070 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
1071 dpavlin 15 }
1072    
1073 dpavlin 536 =head2 surround
1074 dpavlin 13
1075 dpavlin 536 surround all values with a two strings
1076 dpavlin 13
1077 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
1078 dpavlin 13
1079     =cut
1080    
1081 dpavlin 536 sub surround {
1082 dpavlin 819 my $p = shift;
1083     my $s = shift;
1084     $p = '' unless defined( $p );
1085     $s = '' unless defined( $s );
1086 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
1087 dpavlin 13 }
1088    
1089 dpavlin 536 =head2 first
1090 dpavlin 13
1091 dpavlin 536 Return first element
1092 dpavlin 15
1093 dpavlin 536 $v = first( @v );
1094 dpavlin 13
1095     =cut
1096    
1097 dpavlin 536 sub first {
1098     my $r = shift;
1099     return $r;
1100 dpavlin 13 }
1101    
1102 dpavlin 536 =head2 lookup
1103 dpavlin 13
1104 dpavlin 536 Consult lookup hashes for some value
1105 dpavlin 13
1106 dpavlin 725 @v = lookup(
1107     sub {
1108     'ffkk/peri/mfn'.rec('000')
1109     },
1110     'ffkk','peri','200-a-200-e',
1111     sub {
1112     first(rec(200,'a')).' '.first(rec('200','e'))
1113     }
1114     );
1115 dpavlin 13
1116 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1117     normal lookup definition in C<conf/lookup/something.pl> which looks like:
1118 dpavlin 707
1119 dpavlin 725 lookup(
1120     # which results to return from record recorded in lookup
1121     sub { 'ffkk/peri/mfn' . rec('000') },
1122     # from which database and input
1123     'ffkk','peri',
1124     # such that following values match
1125     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1126     # if this part is missing, we will try to match same fields
1127     # from lookup record and current one, or you can override
1128     # which records to use from current record using
1129     sub { rec('900','x') . ' ' . rec('900','y') },
1130     )
1131    
1132     You can think about this lookup as SQL (if that helps):
1133    
1134     select
1135     sub { what }
1136     from
1137     database, input
1138     where
1139     sub { filter from lookuped record }
1140     having
1141     sub { optional filter on current record }
1142    
1143     Easy as pie, right?
1144    
1145 dpavlin 13 =cut
1146    
1147 dpavlin 536 sub lookup {
1148 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
1149    
1150 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1151 dpavlin 725
1152 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1153 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
1154    
1155 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1156 dpavlin 725
1157     my $mfns;
1158     my @having = $having->();
1159    
1160 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
1161 dpavlin 725
1162     foreach my $h ( @having ) {
1163     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1164 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1165 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1166     }
1167 dpavlin 536 }
1168 dpavlin 725
1169     return unless ($mfns);
1170    
1171     my @mfns = sort keys %$mfns;
1172    
1173 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1174 dpavlin 725
1175     my $old_rec = $rec;
1176     my @out;
1177    
1178     foreach my $mfn (@mfns) {
1179 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
1180 dpavlin 725
1181 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1182 dpavlin 725
1183     my @vals = $what->();
1184    
1185     push @out, ( @vals );
1186    
1187 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1188 dpavlin 725 }
1189    
1190     # if (ref($lookup->{$k}) eq 'ARRAY') {
1191     # return @{ $lookup->{$k} };
1192     # } else {
1193     # return $lookup->{$k};
1194     # }
1195    
1196     $rec = $old_rec;
1197    
1198 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1199 dpavlin 725
1200 dpavlin 740 if ($#out == 0) {
1201     return $out[0];
1202     } else {
1203     return @out;
1204     }
1205 dpavlin 13 }
1206    
1207 dpavlin 707 =head2 save_into_lookup
1208    
1209 dpavlin 725 Save value into lookup. It associates current database, input
1210     and specific keys with one or more values which will be
1211     associated over MFN.
1212 dpavlin 707
1213 dpavlin 725 MFN will be extracted from first occurence current of field 000
1214     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1215    
1216     my $nr = save_into_lookup($database,$input,$key,sub {
1217 dpavlin 707 # code which produce one or more values
1218     });
1219    
1220 dpavlin 725 It returns number of items saved.
1221 dpavlin 707
1222 dpavlin 725 This function shouldn't be called directly, it's called from code created by
1223     L<WebPAC::Parser>.
1224    
1225 dpavlin 707 =cut
1226    
1227     sub save_into_lookup {
1228 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
1229     die "save_into_lookup needs database" unless defined($database);
1230     die "save_into_lookup needs input" unless defined($input);
1231     die "save_into_lookup needs key" unless defined($key);
1232 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1233 dpavlin 725
1234 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1235 dpavlin 725
1236     my $mfn =
1237     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1238     defined($config->{_mfn}) ? $config->{_mfn} :
1239     die "mfn not defined or zero";
1240    
1241     my $nr = 0;
1242    
1243 dpavlin 707 foreach my $v ( $coderef->() ) {
1244 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1245 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1246 dpavlin 725 $nr++;
1247 dpavlin 707 }
1248 dpavlin 725
1249     return $nr;
1250 dpavlin 707 }
1251    
1252 dpavlin 595 =head2 config
1253    
1254     Consult config values stored in C<config.yml>
1255    
1256     # return database code (key under databases in yaml)
1257     $database_code = config(); # use _ from hash
1258     $database_name = config('name');
1259     $database_input_name = config('input name');
1260    
1261     Up to three levels are supported.
1262    
1263     =cut
1264    
1265     sub config {
1266     return unless ($config);
1267    
1268     my $p = shift;
1269    
1270     $p ||= '';
1271    
1272     my $v;
1273    
1274     warn "### getting config($p)\n" if ($debug > 1);
1275    
1276     my @p = split(/\s+/,$p);
1277     if ($#p < 0) {
1278     $v = $config->{ '_' }; # special, database code
1279     } else {
1280    
1281     my $c = dclone( $config );
1282    
1283     foreach my $k (@p) {
1284     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1285     if (ref($c) eq 'ARRAY') {
1286     $c = shift @$c;
1287     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1288     last;
1289     }
1290    
1291     if (! defined($c->{$k}) ) {
1292     $c = undef;
1293     last;
1294     } else {
1295     $c = $c->{$k};
1296     }
1297     }
1298     $v = $c if ($c);
1299    
1300     }
1301    
1302     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1303     warn "config( '$p' ) is empty\n" if (! $v);
1304    
1305     return $v;
1306     }
1307    
1308     =head2 id
1309    
1310     Returns unique id of this record
1311    
1312     $id = id();
1313    
1314     Returns C<42/2> for 2nd occurence of MFN 42.
1315    
1316     =cut
1317    
1318     sub id {
1319     my $mfn = $config->{_mfn} || die "no _mfn in config data";
1320     return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1321     }
1322    
1323 dpavlin 536 =head2 join_with
1324 dpavlin 13
1325 dpavlin 536 Joins walues with some delimiter
1326 dpavlin 10
1327 dpavlin 536 $v = join_with(", ", @v);
1328 dpavlin 10
1329 dpavlin 536 =cut
1330 dpavlin 10
1331 dpavlin 536 sub join_with {
1332     my $d = shift;
1333 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1334 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1335     return '' unless defined($v);
1336     return $v;
1337 dpavlin 536 }
1338 dpavlin 10
1339 dpavlin 562 =head2 split_rec_on
1340    
1341     Split record subfield on some regex and take one of parts out
1342    
1343     $a_before_semi_column =
1344     split_rec_on('200','a', /\s*;\s*/, $part);
1345    
1346     C<$part> is optional number of element. First element is
1347     B<1>, not 0!
1348    
1349     If there is no C<$part> parameter or C<$part> is 0, this function will
1350     return all values produced by splitting.
1351    
1352     =cut
1353    
1354     sub split_rec_on {
1355     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1356    
1357     my ($fld, $sf, $regex, $part) = @_;
1358 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1359 dpavlin 562
1360     my @r = rec( $fld, $sf );
1361     my $v = shift @r;
1362 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1363 dpavlin 562
1364 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1365 dpavlin 566
1366 dpavlin 562 my @s = split( $regex, $v );
1367 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1368 dpavlin 566 if ($part && $part > 0) {
1369 dpavlin 562 return $s[ $part - 1 ];
1370     } else {
1371 dpavlin 571 return @s;
1372 dpavlin 562 }
1373     }
1374    
1375 dpavlin 785 my $hash;
1376    
1377     =head2 set
1378    
1379     set( key => 'value' );
1380    
1381     =cut
1382    
1383     sub set {
1384     my ($k,$v) = @_;
1385 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1386 dpavlin 785 $hash->{$k} = $v;
1387     };
1388    
1389     =head2 get
1390    
1391     get( 'key' );
1392    
1393     =cut
1394    
1395     sub get {
1396     my $k = shift || return;
1397     my $v = $hash->{$k};
1398 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1399 dpavlin 785 return $v;
1400     }
1401    
1402 dpavlin 791 =head2 count
1403 dpavlin 785
1404 dpavlin 791 if ( count( @result ) == 1 ) {
1405     # do something if only 1 result is there
1406     }
1407    
1408     =cut
1409    
1410     sub count {
1411 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
1412 dpavlin 791 return @_ . '';
1413     }
1414    
1415 dpavlin 536 # END
1416     1;

  ViewVC Help
Powered by ViewVC 1.1.26