/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 973 - (hide annotations)
Fri Nov 2 14:59:12 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 30683 byte(s)
 r1489@llin:  dpavlin | 2007-11-02 15:59:07 +0100
 load WebPAC::Normalize::ISBN at right place

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3     @EXPORT = qw/
4 dpavlin 538 _set_rec _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 923 search_display search display sorted
11 dpavlin 912
12 dpavlin 547 marc marc_indicators marc_repeatable_subfield
13 dpavlin 815 marc_compose marc_leader marc_fixed
14 dpavlin 813 marc_duplicate marc_remove marc_count
15 dpavlin 604 marc_original_order
16 dpavlin 540
17 dpavlin 536 rec1 rec2 rec
18     regex prefix suffix surround
19     first lookup join_with
20 dpavlin 707 save_into_lookup
21 dpavlin 562
22     split_rec_on
23 dpavlin 785
24     get set
25 dpavlin 791 count
26 dpavlin 536 /;
27 dpavlin 10
28     use warnings;
29     use strict;
30 dpavlin 536
31     #use base qw/WebPAC::Common/;
32 dpavlin 550 use Data::Dump qw/dump/;
33 dpavlin 574 use Storable qw/dclone/;
34 dpavlin 725 use Carp qw/confess/;
35 dpavlin 10
36 dpavlin 550 # debugging warn(s)
37     my $debug = 0;
38    
39    
40 dpavlin 10 =head1 NAME
41    
42 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
43 dpavlin 10
44     =cut
45    
46 dpavlin 923 our $VERSION = '0.31';
47 dpavlin 10
48     =head1 SYNOPSIS
49    
50 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
51     from input records using perl functions which are specialized for set
52     processing.
53 dpavlin 10
54 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
55     means that you check it's validity before running WebPAC using
56     C<perl -c normalize.pl>.
57 dpavlin 15
58 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
59 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
60 dpavlin 547 C<marc>.
61 dpavlin 15
62 dpavlin 10 =head1 FUNCTIONS
63    
64 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
65     All other functions are available for use within normalisation rules.
66    
67 dpavlin 536 =head2 data_structure
68 dpavlin 10
69 dpavlin 536 Return data structure
70 dpavlin 13
71 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
72 dpavlin 725 lookup => $lookup_hash,
73 dpavlin 536 row => $row,
74     rules => $normalize_pl_config,
75 dpavlin 541 marc_encoding => 'utf-8',
76 dpavlin 595 config => $config,
77 dpavlin 736 load_row_coderef => sub {
78 dpavlin 725 my ($database,$input,$mfn) = shift;
79 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
80 dpavlin 725 },
81 dpavlin 13 );
82    
83 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
84 dpavlin 540 other are optional.
85    
86 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
87 dpavlin 725 die if it's not defined.
88    
89 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
90 dpavlin 15
91 dpavlin 538 Since this function isn't exported you have to call it with
92     C<WebPAC::Normalize::data_structure>.
93    
94 dpavlin 10 =cut
95    
96 dpavlin 736 my $load_row_coderef;
97 dpavlin 725
98 dpavlin 536 sub data_structure {
99     my $arg = {@_};
100 dpavlin 13
101 dpavlin 536 die "need row argument" unless ($arg->{row});
102     die "need normalisation argument" unless ($arg->{rules});
103 dpavlin 31
104 dpavlin 536 no strict 'subs';
105 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
106 dpavlin 538 _set_rec( $arg->{row} );
107 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
108 dpavlin 541 _clean_ds( %{ $arg } );
109 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
110 dpavlin 725
111 dpavlin 973 # FIXME load this conditionally
112     use WebPAC::Normalize::ISBN;
113    
114 dpavlin 536 eval "$arg->{rules}";
115     die "error evaling $arg->{rules}: $@\n" if ($@);
116 dpavlin 540
117 dpavlin 538 return _get_ds();
118 dpavlin 10 }
119    
120 dpavlin 538 =head2 _set_rec
121 dpavlin 13
122 dpavlin 536 Set current record hash
123 dpavlin 433
124 dpavlin 538 _set_rec( $rec );
125 dpavlin 433
126     =cut
127    
128 dpavlin 536 my $rec;
129 dpavlin 433
130 dpavlin 538 sub _set_rec {
131 dpavlin 536 $rec = shift or die "no record hash";
132 dpavlin 433 }
133    
134 dpavlin 595 =head2 _set_config
135    
136     Set current config hash
137    
138     _set_config( $config );
139    
140     Magic keys are:
141    
142     =over 4
143    
144     =item _
145    
146     Code of current database
147    
148     =item _mfn
149    
150     Current MFN
151    
152     =back
153    
154     =cut
155    
156     my $config;
157    
158     sub _set_config {
159     $config = shift;
160     }
161    
162 dpavlin 538 =head2 _get_ds
163    
164     Return hash formatted as data structure
165    
166     my $ds = _get_ds();
167    
168     =cut
169    
170 dpavlin 812 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
171 dpavlin 574 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
172 dpavlin 538
173     sub _get_ds {
174     return $out;
175     }
176    
177     =head2 _clean_ds
178    
179     Clean data structure hash for next record
180    
181     _clean_ds();
182    
183     =cut
184    
185     sub _clean_ds {
186 dpavlin 541 my $a = {@_};
187 dpavlin 812 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
188 dpavlin 574 ($marc_record_offset, $marc_fetch_offset) = (0,0);
189 dpavlin 541 $marc_encoding = $a->{marc_encoding};
190 dpavlin 538 }
191    
192     =head2 _set_lookup
193    
194     Set current lookup hash
195    
196     _set_lookup( $lookup );
197    
198     =cut
199    
200     my $lookup;
201    
202     sub _set_lookup {
203     $lookup = shift;
204     }
205    
206 dpavlin 707 =head2 _get_lookup
207    
208     Get current lookup hash
209    
210     my $lookup = _get_lookup();
211    
212     =cut
213    
214     sub _get_lookup {
215     return $lookup;
216     }
217    
218 dpavlin 736 =head2 _set_load_row
219 dpavlin 725
220     Setup code reference which will return L<data_structure> from
221     L<WebPAC::Store>
222    
223 dpavlin 736 _set_load_row(sub {
224 dpavlin 725 my ($database,$input,$mfn) = @_;
225 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
226 dpavlin 725 });
227    
228     =cut
229    
230 dpavlin 736 sub _set_load_row {
231 dpavlin 725 my $coderef = shift;
232     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
233    
234 dpavlin 736 $load_row_coderef = $coderef;
235 dpavlin 725 }
236    
237 dpavlin 547 =head2 _get_marc_fields
238 dpavlin 540
239 dpavlin 547 Get all fields defined by calls to C<marc>
240 dpavlin 540
241 dpavlin 547 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
242 dpavlin 540
243 dpavlin 543 We are using I<magic> which detect repeatable fields only from
244     sequence of field/subfield data generated by normalization.
245    
246 dpavlin 554 Repeatable field is created when there is second occurence of same subfield or
247     if any of indicators are different.
248 dpavlin 543
249 dpavlin 554 This is sane for most cases. Something like:
250 dpavlin 543
251 dpavlin 554 900a-1 900b-1 900c-1
252     900a-2 900b-2
253     900a-3
254    
255     will be created from any combination of:
256    
257     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
258    
259     and following rules:
260    
261     marc('900','a', rec('200','a') );
262     marc('900','b', rec('200','b') );
263     marc('900','c', rec('200','c') );
264    
265     which might not be what you have in mind. If you need repeatable subfield,
266     define it using C<marc_repeatable_subfield> like this:
267    
268 dpavlin 574 marc_repeatable_subfield('900','a');
269     marc('900','a', rec('200','a') );
270     marc('900','b', rec('200','b') );
271     marc('900','c', rec('200','c') );
272 dpavlin 554
273 dpavlin 574 will create:
274    
275     900a-1 900a-2 900a-3 900b-1 900c-1
276     900b-2
277    
278     There is also support for returning next or specific using:
279    
280     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
281     # do something with $mf
282     }
283    
284     will always return fields from next MARC record or
285    
286     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
287    
288     will return 42th copy record (if it exists).
289    
290 dpavlin 540 =cut
291    
292 dpavlin 813 my $fetch_pos;
293    
294 dpavlin 547 sub _get_marc_fields {
295 dpavlin 550
296 dpavlin 574 my $arg = {@_};
297     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
298 dpavlin 813 $fetch_pos = $marc_fetch_offset;
299 dpavlin 574 if ($arg->{offset}) {
300 dpavlin 813 $fetch_pos = $arg->{offset};
301 dpavlin 574 } elsif($arg->{fetch_next}) {
302     $marc_fetch_offset++;
303     }
304 dpavlin 550
305 dpavlin 574 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
306    
307     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
308    
309 dpavlin 813 my $marc_rec = $marc_record->[ $fetch_pos ];
310 dpavlin 574
311 dpavlin 813 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
312 dpavlin 574
313     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
314    
315 dpavlin 550 # first, sort all existing fields
316     # XXX might not be needed, but modern perl might randomize elements in hash
317     my @sorted_marc_record = sort {
318 dpavlin 572 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
319 dpavlin 574 } @{ $marc_rec };
320 dpavlin 550
321 dpavlin 574 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
322 dpavlin 562
323 dpavlin 550 # output marc fields
324 dpavlin 542 my @m;
325 dpavlin 550
326     # count unique field-subfields (used for offset when walking to next subfield)
327     my $u;
328 dpavlin 572 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
329 dpavlin 550
330     if ($debug) {
331 dpavlin 574 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
332 dpavlin 813 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
333 dpavlin 574 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
334     warn "## subfield count = ", dump( $u ), $/;
335 dpavlin 550 }
336    
337     my $len = $#sorted_marc_record;
338     my $visited;
339     my $i = 0;
340     my $field;
341    
342     foreach ( 0 .. $len ) {
343    
344     # find next element which isn't visited
345     while ($visited->{$i}) {
346     $i = ($i + 1) % ($len + 1);
347 dpavlin 542 }
348    
349 dpavlin 550 # mark it visited
350     $visited->{$i}++;
351    
352 dpavlin 574 my $row = dclone( $sorted_marc_record[$i] );
353 dpavlin 550
354     # field and subfield which is key for
355     # marc_repeatable_subfield and u
356 dpavlin 572 my $fsf = $row->[0] . ( $row->[3] || '' );
357 dpavlin 550
358     if ($debug > 1) {
359    
360     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
361     print "### this [$i]: ", dump( $row ),$/;
362     print "### sf: ", $row->[3], " vs ", $field->[3],
363     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
364     if ($#$field >= 0);
365    
366     }
367    
368     # if field exists
369     if ( $#$field >= 0 ) {
370     if (
371     $row->[0] ne $field->[0] || # field
372     $row->[1] ne $field->[1] || # i1
373     $row->[2] ne $field->[2] # i2
374     ) {
375     push @m, $field;
376     warn "## saved/1 ", dump( $field ),$/ if ($debug);
377     $field = $row;
378    
379     } elsif (
380     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
381     ||
382     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
383     ! $marc_repeatable_subfield->{ $fsf }
384     )
385     ) {
386     push @m, $field;
387     warn "## saved/2 ", dump( $field ),$/ if ($debug);
388     $field = $row;
389    
390     } else {
391     # append new subfields to existing field
392     push @$field, ( $row->[3], $row->[4] );
393     }
394     } else {
395     # insert first field
396     $field = $row;
397     }
398    
399     if (! $marc_repeatable_subfield->{ $fsf }) {
400     # make step to next subfield
401     $i = ($i + $u->{ $fsf } ) % ($len + 1);
402     }
403 dpavlin 542 }
404    
405 dpavlin 550 if ($#$field >= 0) {
406     push @m, $field;
407     warn "## saved/3 ", dump( $field ),$/ if ($debug);
408     }
409 dpavlin 542
410 dpavlin 579 return \@m;
411 dpavlin 540 }
412    
413 dpavlin 813 =head2 _get_marc_leader
414    
415     Return leader from currently fetched record by L</_get_marc_fields>
416    
417     print WebPAC::Normalize::_get_marc_leader();
418    
419     =cut
420    
421     sub _get_marc_leader {
422     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
423     return $marc_leader->[ $fetch_pos ];
424     }
425    
426 dpavlin 554 =head2 _debug
427    
428     Change level of debug warnings
429    
430     _debug( 2 );
431    
432     =cut
433    
434     sub _debug {
435     my $l = shift;
436     return $debug unless defined($l);
437 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
438 dpavlin 554 $debug = $l;
439     }
440    
441 dpavlin 540 =head1 Functions to create C<data_structure>
442    
443     Those functions generally have to first in your normalization file.
444    
445 dpavlin 912 =head2 search_display
446 dpavlin 433
447 dpavlin 912 Define output for L<search> and L<display> at the same time
448 dpavlin 433
449 dpavlin 912 search_display('Title', rec('200','a') );
450 dpavlin 13
451    
452     =cut
453    
454 dpavlin 912 sub search_display {
455     my $name = shift or die "search_display needs name as first argument";
456 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
457     return unless (@o);
458     $out->{$name}->{search} = \@o;
459     $out->{$name}->{display} = \@o;
460     }
461 dpavlin 13
462 dpavlin 915 =head2 tag
463    
464     Old name for L<search_display>, but supported
465    
466     =cut
467    
468     sub tag {
469     search_display( @_ );
470     }
471    
472 dpavlin 536 =head2 display
473 dpavlin 13
474 dpavlin 912 Define output just for I<display>
475 dpavlin 125
476 dpavlin 536 @v = display('Title', rec('200','a') );
477 dpavlin 125
478 dpavlin 536 =cut
479 dpavlin 125
480 dpavlin 923 sub _field {
481     my $type = shift or confess "need type -- BUG?";
482     my $name = shift or confess "needs name as first argument";
483 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
484     return unless (@o);
485 dpavlin 923 $out->{$name}->{$type} = \@o;
486 dpavlin 536 }
487 dpavlin 13
488 dpavlin 923 sub display { _field( 'display', @_ ) }
489    
490 dpavlin 536 =head2 search
491 dpavlin 13
492 dpavlin 536 Prepare values just for I<search>
493 dpavlin 13
494 dpavlin 536 @v = search('Title', rec('200','a') );
495 dpavlin 433
496 dpavlin 536 =cut
497 dpavlin 13
498 dpavlin 923 sub search { _field( 'search', @_ ) }
499 dpavlin 13
500 dpavlin 923 =head2 sorted
501    
502     Insert into lists which will be automatically sorted
503    
504     sorted('Title', rec('200','a') );
505    
506     =cut
507    
508     sub sorted { _field( 'sorted', @_ ) }
509    
510    
511 dpavlin 564 =head2 marc_leader
512    
513     Setup fields within MARC leader or get leader
514    
515     marc_leader('05','c');
516     my $leader = marc_leader();
517    
518     =cut
519    
520     sub marc_leader {
521     my ($offset,$value) = @_;
522    
523     if ($offset) {
524 dpavlin 813 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
525 dpavlin 564 } else {
526 dpavlin 813
527     if (defined($marc_leader)) {
528     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
529     return $marc_leader->[ $marc_record_offset ];
530     } else {
531     return;
532     }
533 dpavlin 564 }
534     }
535    
536 dpavlin 815 =head2 marc_fixed
537    
538     Create control/indentifier fields with values in fixed positions
539    
540     marc_fixed('008', 00, '070402');
541     marc_fixed('008', 39, '|');
542    
543     Positions not specified will be filled with spaces (C<0x20>).
544    
545     There will be no effort to extend last specified value to full length of
546     field in standard.
547    
548     =cut
549    
550     sub marc_fixed {
551     my ($f, $pos, $val) = @_;
552     die "need marc(field, position, value)" unless defined($f) && defined($pos);
553    
554 dpavlin 889 confess "need val" unless defined $val;
555    
556 dpavlin 815 my $update = 0;
557    
558     map {
559     if ($_->[0] eq $f) {
560     my $old = $_->[1];
561 dpavlin 889 if (length($old) <= $pos) {
562 dpavlin 815 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
563 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
564 dpavlin 815 } else {
565     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
566 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
567 dpavlin 815 }
568     $update++;
569     }
570     } @{ $marc_record->[ $marc_record_offset ] };
571    
572     if (! $update) {
573     my $v = ' ' x $pos . $val;
574     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
575 dpavlin 817 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
576 dpavlin 815 }
577     }
578    
579 dpavlin 547 =head2 marc
580 dpavlin 540
581     Save value for MARC field
582    
583 dpavlin 547 marc('900','a', rec('200','a') );
584 dpavlin 571 marc('001', rec('000') );
585 dpavlin 540
586     =cut
587    
588 dpavlin 547 sub marc {
589     my $f = shift or die "marc needs field";
590     die "marc field must be numer" unless ($f =~ /^\d+$/);
591 dpavlin 540
592 dpavlin 571 my $sf;
593     if ($f >= 10) {
594     $sf = shift or die "marc needs subfield";
595     }
596 dpavlin 540
597 dpavlin 541 foreach (@_) {
598     my $v = $_; # make var read-write for Encode
599 dpavlin 543 next unless (defined($v) && $v !~ /^\s*$/);
600 dpavlin 548 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
601 dpavlin 571 if (defined $sf) {
602 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
603 dpavlin 571 } else {
604 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
605 dpavlin 571 }
606 dpavlin 540 }
607     }
608    
609 dpavlin 547 =head2 marc_repeatable_subfield
610    
611     Save values for MARC repetable subfield
612    
613     marc_repeatable_subfield('910', 'z', rec('909') );
614    
615     =cut
616    
617     sub marc_repeatable_subfield {
618 dpavlin 550 my ($f,$sf) = @_;
619     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
620     $marc_repeatable_subfield->{ $f . $sf }++;
621 dpavlin 547 marc(@_);
622     }
623    
624     =head2 marc_indicators
625    
626     Set both indicators for MARC field
627    
628     marc_indicators('900', ' ', 1);
629    
630     Any indicator value other than C<0-9> will be treated as undefined.
631    
632     =cut
633    
634     sub marc_indicators {
635     my $f = shift || die "marc_indicators need field!\n";
636     my ($i1,$i2) = @_;
637     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
638     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
639    
640     $i1 = ' ' if ($i1 !~ /^\d$/);
641     $i2 = ' ' if ($i2 !~ /^\d$/);
642 dpavlin 548 @{ $marc_indicators->{$f} } = ($i1,$i2);
643 dpavlin 547 }
644    
645 dpavlin 562 =head2 marc_compose
646 dpavlin 547
647 dpavlin 562 Save values for each MARC subfield explicitly
648    
649     marc_compose('900',
650     'a', rec('200','a')
651     'b', rec('201','a')
652     'a', rec('200','b')
653     'c', rec('200','c')
654     );
655    
656 dpavlin 603 If you specify C<+> for subfield, value will be appended
657     to previous defined subfield.
658    
659 dpavlin 562 =cut
660    
661     sub marc_compose {
662     my $f = shift or die "marc_compose needs field";
663     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
664    
665     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
666     my $m = [ $f, $i1, $i2 ];
667    
668 dpavlin 583 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
669    
670 dpavlin 619 if ($#_ % 2 != 1) {
671     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
672     }
673    
674 dpavlin 562 while (@_) {
675 dpavlin 619 my $sf = shift;
676 dpavlin 565 my $v = shift;
677 dpavlin 562
678     next unless (defined($v) && $v !~ /^\s*$/);
679 dpavlin 568 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
680 dpavlin 603 if ($sf ne '+') {
681     push @$m, ( $sf, $v );
682     } else {
683     $m->[ $#$m ] .= $v;
684     }
685 dpavlin 562 }
686    
687 dpavlin 586 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
688 dpavlin 562
689 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
690 dpavlin 562 }
691    
692 dpavlin 574 =head2 marc_duplicate
693 dpavlin 562
694 dpavlin 574 Generate copy of current MARC record and continue working on copy
695    
696     marc_duplicate();
697    
698     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
699     C<< _get_marc_fields( offset => 42 ) >>.
700    
701     =cut
702    
703     sub marc_duplicate {
704     my $m = $marc_record->[ -1 ];
705     die "can't duplicate record which isn't defined" unless ($m);
706     push @{ $marc_record }, dclone( $m );
707 dpavlin 813 push @{ $marc_leader }, dclone( marc_leader() );
708     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
709 dpavlin 574 $marc_record_offset = $#{ $marc_record };
710     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
711 dpavlin 813
712 dpavlin 574 }
713    
714     =head2 marc_remove
715    
716     Remove some field or subfield from MARC record.
717    
718     marc_remove('200');
719     marc_remove('200','a');
720    
721     This will erase field C<200> or C<200^a> from current MARC record.
722    
723 dpavlin 786 marc_remove('*');
724    
725     Will remove all fields in current MARC record.
726    
727 dpavlin 574 This is useful after calling C<marc_duplicate> or on it's own (but, you
728     should probably just remove that subfield definition if you are not
729     using C<marc_duplicate>).
730    
731     FIXME: support fields < 10.
732    
733     =cut
734    
735     sub marc_remove {
736     my ($f, $sf) = @_;
737    
738     die "marc_remove needs record number" unless defined($f);
739    
740     my $marc = $marc_record->[ $marc_record_offset ];
741    
742     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
743    
744 dpavlin 786 if ($f eq '*') {
745    
746     delete( $marc_record->[ $marc_record_offset ] );
747 dpavlin 788 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
748 dpavlin 786
749     } else {
750    
751     my $i = 0;
752     foreach ( 0 .. $#{ $marc } ) {
753     last unless (defined $marc->[$i]);
754     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
755     if ($marc->[$i]->[0] eq $f) {
756     if (! defined $sf) {
757     # remove whole field
758     splice @$marc, $i, 1;
759     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
760     $i--;
761     } else {
762     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
763     my $o = ($j * 2) + 3;
764     if ($marc->[$i]->[$o] eq $sf) {
765     # remove subfield
766     splice @{$marc->[$i]}, $o, 2;
767     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
768     # is record now empty?
769     if ($#{ $marc->[$i] } == 2) {
770     splice @$marc, $i, 1;
771     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
772     $i--;
773     };
774     }
775 dpavlin 574 }
776     }
777     }
778 dpavlin 786 $i++;
779 dpavlin 574 }
780 dpavlin 786
781     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
782    
783     $marc_record->[ $marc_record_offset ] = $marc;
784 dpavlin 574 }
785    
786     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
787     }
788    
789 dpavlin 604 =head2 marc_original_order
790    
791     Copy all subfields preserving original order to marc field.
792    
793 dpavlin 616 marc_original_order( marc_field_number, original_input_field_number );
794 dpavlin 604
795 dpavlin 616 Please note that field numbers are consistent with other commands (marc
796     field number first), but somewhat counter-intuitive (destination and then
797     source).
798    
799 dpavlin 604 You might want to use this command if you are just renaming subfields or
800     using pre-processing modify_record in C<config.yml> and don't need any
801     post-processing or want to preserve order of original subfields.
802    
803 dpavlin 616
804 dpavlin 604 =cut
805    
806     sub marc_original_order {
807    
808 dpavlin 616 my ($to, $from) = @_;
809 dpavlin 604 die "marc_original_order needs from and to fields\n" unless ($from && $to);
810    
811 dpavlin 616 return unless defined($rec->{$from});
812    
813     my $r = $rec->{$from};
814 dpavlin 604 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
815    
816     my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
817 dpavlin 616 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
818 dpavlin 604
819     foreach my $d (@$r) {
820    
821 dpavlin 605 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
822 dpavlin 616 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
823 dpavlin 605 next;
824     }
825    
826 dpavlin 604 my @sfs = @{ $d->{subfields} };
827    
828     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
829    
830 dpavlin 618 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
831 dpavlin 604
832     my $m = [ $to, $i1, $i2 ];
833    
834     while (my $sf = shift @sfs) {
835 dpavlin 618
836     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
837 dpavlin 604 my $offset = shift @sfs;
838     die "corrupted sufields specification for field $from\n" unless defined($offset);
839    
840     my $v;
841     if (ref($d->{$sf}) eq 'ARRAY') {
842     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
843     } elsif ($offset == 0) {
844     $v = $d->{$sf};
845     } else {
846     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
847     }
848     push @$m, ( $sf, $v ) if (defined($v));
849     }
850    
851     if ($#{$m} > 2) {
852     push @{ $marc_record->[ $marc_record_offset ] }, $m;
853     }
854     }
855    
856     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
857     }
858    
859 dpavlin 813 =head2 marc_count
860 dpavlin 604
861 dpavlin 813 Return number of MARC records created using L</marc_duplicate>.
862    
863     print "created ", marc_count(), " records";
864    
865     =cut
866    
867     sub marc_count {
868     return $#{ $marc_record };
869     }
870    
871    
872 dpavlin 540 =head1 Functions to extract data from input
873    
874     This function should be used inside functions to create C<data_structure> described
875     above.
876    
877 dpavlin 641 =head2 _pack_subfields_hash
878    
879 dpavlin 669 @subfields = _pack_subfields_hash( $h );
880     $subfields = _pack_subfields_hash( $h, 1 );
881 dpavlin 641
882 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
883     with subfields (denoted by C<^>) and values.
884    
885 dpavlin 641 =cut
886    
887     sub _pack_subfields_hash {
888    
889 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
890 dpavlin 641
891     my ($h,$include_subfields) = @_;
892    
893 dpavlin 831 # sanity and ease of use
894     return $h if (ref($h) ne 'HASH');
895    
896 dpavlin 641 if ( defined($h->{subfields}) ) {
897     my $sfs = delete $h->{subfields} || die "no subfields?";
898     my @out;
899     while (@$sfs) {
900     my $sf = shift @$sfs;
901     push @out, '^' . $sf if ($include_subfields);
902     my $o = shift @$sfs;
903     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
904     # single element subfields are not arrays
905 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
906 dpavlin 667
907 dpavlin 641 push @out, $h->{$sf};
908     } else {
909 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
910 dpavlin 641 push @out, $h->{$sf}->[$o];
911     }
912     }
913 dpavlin 667 if ($include_subfields) {
914     return join('', @out);
915     } else {
916     return @out;
917     }
918 dpavlin 641 } else {
919 dpavlin 667 if ($include_subfields) {
920     my $out = '';
921 dpavlin 668 foreach my $sf (sort keys %$h) {
922 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
923     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
924     } else {
925     $out .= '^' . $sf . $h->{$sf};
926     }
927     }
928     return $out;
929     } else {
930     # FIXME this should probably be in alphabetical order instead of hash order
931     values %{$h};
932     }
933 dpavlin 641 }
934     }
935    
936 dpavlin 536 =head2 rec1
937 dpavlin 371
938 dpavlin 536 Return all values in some field
939 dpavlin 371
940 dpavlin 536 @v = rec1('200')
941 dpavlin 15
942 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
943 dpavlin 15
944 dpavlin 536 =cut
945 dpavlin 15
946 dpavlin 536 sub rec1 {
947     my $f = shift;
948 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
949 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
950 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
951 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
952 dpavlin 641 my @out;
953     foreach my $h ( @{ $rec->{$f} } ) {
954     if (ref($h) eq 'HASH') {
955     push @out, ( _pack_subfields_hash( $h ) );
956 dpavlin 31 } else {
957 dpavlin 641 push @out, $h;
958 dpavlin 31 }
959 dpavlin 641 }
960     return @out;
961 dpavlin 536 } elsif( defined($rec->{$f}) ) {
962     return $rec->{$f};
963 dpavlin 15 }
964     }
965    
966 dpavlin 536 =head2 rec2
967 dpavlin 15
968 dpavlin 536 Return all values in specific field and subfield
969 dpavlin 13
970 dpavlin 536 @v = rec2('200','a')
971 dpavlin 13
972     =cut
973    
974 dpavlin 536 sub rec2 {
975     my $f = shift;
976     return unless (defined($rec && $rec->{$f}));
977     my $sf = shift;
978 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
979 dpavlin 589 return map {
980     if (ref($_->{$sf}) eq 'ARRAY') {
981     @{ $_->{$sf} };
982     } else {
983     $_->{$sf};
984     }
985     } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
986 dpavlin 536 }
987 dpavlin 13
988 dpavlin 536 =head2 rec
989 dpavlin 13
990 dpavlin 536 syntaxtic sugar for
991 dpavlin 13
992 dpavlin 536 @v = rec('200')
993     @v = rec('200','a')
994 dpavlin 13
995 dpavlin 750 If rec() returns just single value, it will
996     return scalar, not array.
997    
998 dpavlin 536 =cut
999 dpavlin 373
1000 dpavlin 536 sub rec {
1001 dpavlin 583 my @out;
1002 dpavlin 536 if ($#_ == 0) {
1003 dpavlin 583 @out = rec1(@_);
1004 dpavlin 536 } elsif ($#_ == 1) {
1005 dpavlin 583 @out = rec2(@_);
1006 dpavlin 13 }
1007 dpavlin 750 if ($#out == 0 && ! wantarray) {
1008     return $out[0];
1009     } elsif (@out) {
1010 dpavlin 583 return @out;
1011     } else {
1012     return '';
1013     }
1014 dpavlin 13 }
1015    
1016 dpavlin 536 =head2 regex
1017 dpavlin 15
1018 dpavlin 536 Apply regex to some or all values
1019 dpavlin 15
1020 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
1021 dpavlin 15
1022     =cut
1023    
1024 dpavlin 536 sub regex {
1025     my $r = shift;
1026     my @out;
1027 dpavlin 550 #warn "r: $r\n", dump(\@_);
1028 dpavlin 536 foreach my $t (@_) {
1029     next unless ($t);
1030     eval "\$t =~ $r";
1031     push @out, $t if ($t && $t ne '');
1032 dpavlin 368 }
1033 dpavlin 536 return @out;
1034 dpavlin 15 }
1035    
1036 dpavlin 536 =head2 prefix
1037 dpavlin 15
1038 dpavlin 536 Prefix all values with a string
1039 dpavlin 15
1040 dpavlin 536 @v = prefix( 'my_', @v );
1041 dpavlin 15
1042     =cut
1043    
1044 dpavlin 536 sub prefix {
1045 dpavlin 819 my $p = shift;
1046     return @_ unless defined( $p );
1047 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
1048     }
1049 dpavlin 15
1050 dpavlin 536 =head2 suffix
1051 dpavlin 15
1052 dpavlin 536 suffix all values with a string
1053 dpavlin 15
1054 dpavlin 536 @v = suffix( '_my', @v );
1055 dpavlin 15
1056 dpavlin 536 =cut
1057 dpavlin 15
1058 dpavlin 536 sub suffix {
1059 dpavlin 819 my $s = shift;
1060     return @_ unless defined( $s );
1061 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
1062 dpavlin 15 }
1063    
1064 dpavlin 536 =head2 surround
1065 dpavlin 13
1066 dpavlin 536 surround all values with a two strings
1067 dpavlin 13
1068 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
1069 dpavlin 13
1070     =cut
1071    
1072 dpavlin 536 sub surround {
1073 dpavlin 819 my $p = shift;
1074     my $s = shift;
1075     $p = '' unless defined( $p );
1076     $s = '' unless defined( $s );
1077 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
1078 dpavlin 13 }
1079    
1080 dpavlin 536 =head2 first
1081 dpavlin 13
1082 dpavlin 536 Return first element
1083 dpavlin 15
1084 dpavlin 536 $v = first( @v );
1085 dpavlin 13
1086     =cut
1087    
1088 dpavlin 536 sub first {
1089     my $r = shift;
1090     return $r;
1091 dpavlin 13 }
1092    
1093 dpavlin 536 =head2 lookup
1094 dpavlin 13
1095 dpavlin 536 Consult lookup hashes for some value
1096 dpavlin 13
1097 dpavlin 725 @v = lookup(
1098     sub {
1099     'ffkk/peri/mfn'.rec('000')
1100     },
1101     'ffkk','peri','200-a-200-e',
1102     sub {
1103     first(rec(200,'a')).' '.first(rec('200','e'))
1104     }
1105     );
1106 dpavlin 13
1107 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1108     normal lookup definition in C<conf/lookup/something.pl> which looks like:
1109 dpavlin 707
1110 dpavlin 725 lookup(
1111     # which results to return from record recorded in lookup
1112     sub { 'ffkk/peri/mfn' . rec('000') },
1113     # from which database and input
1114     'ffkk','peri',
1115     # such that following values match
1116     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1117     # if this part is missing, we will try to match same fields
1118     # from lookup record and current one, or you can override
1119     # which records to use from current record using
1120     sub { rec('900','x') . ' ' . rec('900','y') },
1121     )
1122    
1123     You can think about this lookup as SQL (if that helps):
1124    
1125     select
1126     sub { what }
1127     from
1128     database, input
1129     where
1130     sub { filter from lookuped record }
1131     having
1132     sub { optional filter on current record }
1133    
1134     Easy as pie, right?
1135    
1136 dpavlin 13 =cut
1137    
1138 dpavlin 536 sub lookup {
1139 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
1140    
1141 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1142 dpavlin 725
1143 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1144 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
1145    
1146 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1147 dpavlin 725
1148     my $mfns;
1149     my @having = $having->();
1150    
1151 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
1152 dpavlin 725
1153     foreach my $h ( @having ) {
1154     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1155 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1156 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1157     }
1158 dpavlin 536 }
1159 dpavlin 725
1160     return unless ($mfns);
1161    
1162     my @mfns = sort keys %$mfns;
1163    
1164 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1165 dpavlin 725
1166     my $old_rec = $rec;
1167     my @out;
1168    
1169     foreach my $mfn (@mfns) {
1170 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
1171 dpavlin 725
1172 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1173 dpavlin 725
1174     my @vals = $what->();
1175    
1176     push @out, ( @vals );
1177    
1178 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1179 dpavlin 725 }
1180    
1181     # if (ref($lookup->{$k}) eq 'ARRAY') {
1182     # return @{ $lookup->{$k} };
1183     # } else {
1184     # return $lookup->{$k};
1185     # }
1186    
1187     $rec = $old_rec;
1188    
1189 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1190 dpavlin 725
1191 dpavlin 740 if ($#out == 0) {
1192     return $out[0];
1193     } else {
1194     return @out;
1195     }
1196 dpavlin 13 }
1197    
1198 dpavlin 707 =head2 save_into_lookup
1199    
1200 dpavlin 725 Save value into lookup. It associates current database, input
1201     and specific keys with one or more values which will be
1202     associated over MFN.
1203 dpavlin 707
1204 dpavlin 725 MFN will be extracted from first occurence current of field 000
1205     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1206    
1207     my $nr = save_into_lookup($database,$input,$key,sub {
1208 dpavlin 707 # code which produce one or more values
1209     });
1210    
1211 dpavlin 725 It returns number of items saved.
1212 dpavlin 707
1213 dpavlin 725 This function shouldn't be called directly, it's called from code created by
1214     L<WebPAC::Parser>.
1215    
1216 dpavlin 707 =cut
1217    
1218     sub save_into_lookup {
1219 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
1220     die "save_into_lookup needs database" unless defined($database);
1221     die "save_into_lookup needs input" unless defined($input);
1222     die "save_into_lookup needs key" unless defined($key);
1223 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1224 dpavlin 725
1225 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1226 dpavlin 725
1227     my $mfn =
1228     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1229     defined($config->{_mfn}) ? $config->{_mfn} :
1230     die "mfn not defined or zero";
1231    
1232     my $nr = 0;
1233    
1234 dpavlin 707 foreach my $v ( $coderef->() ) {
1235 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1236 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1237 dpavlin 725 $nr++;
1238 dpavlin 707 }
1239 dpavlin 725
1240     return $nr;
1241 dpavlin 707 }
1242    
1243 dpavlin 595 =head2 config
1244    
1245     Consult config values stored in C<config.yml>
1246    
1247     # return database code (key under databases in yaml)
1248     $database_code = config(); # use _ from hash
1249     $database_name = config('name');
1250     $database_input_name = config('input name');
1251    
1252     Up to three levels are supported.
1253    
1254     =cut
1255    
1256     sub config {
1257     return unless ($config);
1258    
1259     my $p = shift;
1260    
1261     $p ||= '';
1262    
1263     my $v;
1264    
1265     warn "### getting config($p)\n" if ($debug > 1);
1266    
1267     my @p = split(/\s+/,$p);
1268     if ($#p < 0) {
1269     $v = $config->{ '_' }; # special, database code
1270     } else {
1271    
1272     my $c = dclone( $config );
1273    
1274     foreach my $k (@p) {
1275     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1276     if (ref($c) eq 'ARRAY') {
1277     $c = shift @$c;
1278     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1279     last;
1280     }
1281    
1282     if (! defined($c->{$k}) ) {
1283     $c = undef;
1284     last;
1285     } else {
1286     $c = $c->{$k};
1287     }
1288     }
1289     $v = $c if ($c);
1290    
1291     }
1292    
1293     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1294     warn "config( '$p' ) is empty\n" if (! $v);
1295    
1296     return $v;
1297     }
1298    
1299     =head2 id
1300    
1301     Returns unique id of this record
1302    
1303     $id = id();
1304    
1305     Returns C<42/2> for 2nd occurence of MFN 42.
1306    
1307     =cut
1308    
1309     sub id {
1310     my $mfn = $config->{_mfn} || die "no _mfn in config data";
1311     return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1312     }
1313    
1314 dpavlin 536 =head2 join_with
1315 dpavlin 13
1316 dpavlin 536 Joins walues with some delimiter
1317 dpavlin 10
1318 dpavlin 536 $v = join_with(", ", @v);
1319 dpavlin 10
1320 dpavlin 536 =cut
1321 dpavlin 10
1322 dpavlin 536 sub join_with {
1323     my $d = shift;
1324 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1325 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1326     return '' unless defined($v);
1327     return $v;
1328 dpavlin 536 }
1329 dpavlin 10
1330 dpavlin 562 =head2 split_rec_on
1331    
1332     Split record subfield on some regex and take one of parts out
1333    
1334     $a_before_semi_column =
1335     split_rec_on('200','a', /\s*;\s*/, $part);
1336    
1337     C<$part> is optional number of element. First element is
1338     B<1>, not 0!
1339    
1340     If there is no C<$part> parameter or C<$part> is 0, this function will
1341     return all values produced by splitting.
1342    
1343     =cut
1344    
1345     sub split_rec_on {
1346     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1347    
1348     my ($fld, $sf, $regex, $part) = @_;
1349 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1350 dpavlin 562
1351     my @r = rec( $fld, $sf );
1352     my $v = shift @r;
1353 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1354 dpavlin 562
1355 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1356 dpavlin 566
1357 dpavlin 562 my @s = split( $regex, $v );
1358 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1359 dpavlin 566 if ($part && $part > 0) {
1360 dpavlin 562 return $s[ $part - 1 ];
1361     } else {
1362 dpavlin 571 return @s;
1363 dpavlin 562 }
1364     }
1365    
1366 dpavlin 785 my $hash;
1367    
1368     =head2 set
1369    
1370     set( key => 'value' );
1371    
1372     =cut
1373    
1374     sub set {
1375     my ($k,$v) = @_;
1376 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1377 dpavlin 785 $hash->{$k} = $v;
1378     };
1379    
1380     =head2 get
1381    
1382     get( 'key' );
1383    
1384     =cut
1385    
1386     sub get {
1387     my $k = shift || return;
1388     my $v = $hash->{$k};
1389 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1390 dpavlin 785 return $v;
1391     }
1392    
1393 dpavlin 791 =head2 count
1394 dpavlin 785
1395 dpavlin 791 if ( count( @result ) == 1 ) {
1396     # do something if only 1 result is there
1397     }
1398    
1399     =cut
1400    
1401     sub count {
1402 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
1403 dpavlin 791 return @_ . '';
1404     }
1405    
1406 dpavlin 536 # END
1407     1;

  ViewVC Help
Powered by ViewVC 1.1.26