/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1012 - (hide annotations)
Wed Nov 7 09:19:29 2007 UTC (15 years, 2 months ago) by dpavlin
File size: 30980 byte(s)
document frec

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 923 search_display search display sorted
11 dpavlin 912
12 dpavlin 547 marc marc_indicators marc_repeatable_subfield
13 dpavlin 815 marc_compose marc_leader marc_fixed
14 dpavlin 813 marc_duplicate marc_remove marc_count
15 dpavlin 604 marc_original_order
16 dpavlin 540
17 dpavlin 536 rec1 rec2 rec
18 dpavlin 979 frec
19 dpavlin 536 regex prefix suffix surround
20     first lookup join_with
21 dpavlin 707 save_into_lookup
22 dpavlin 562
23     split_rec_on
24 dpavlin 785
25     get set
26 dpavlin 791 count
27 dpavlin 980
28 dpavlin 536 /;
29 dpavlin 10
30     use warnings;
31     use strict;
32 dpavlin 536
33     #use base qw/WebPAC::Common/;
34 dpavlin 550 use Data::Dump qw/dump/;
35 dpavlin 574 use Storable qw/dclone/;
36 dpavlin 725 use Carp qw/confess/;
37 dpavlin 10
38 dpavlin 550 # debugging warn(s)
39     my $debug = 0;
40    
41 dpavlin 980 use WebPAC::Normalize::ISBN;
42     push @EXPORT, ( 'isbn_10', 'isbn_13' );
43 dpavlin 550
44 dpavlin 10 =head1 NAME
45    
46 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
47 dpavlin 10
48     =cut
49    
50 dpavlin 983 our $VERSION = '0.32';
51 dpavlin 10
52     =head1 SYNOPSIS
53    
54 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
55     from input records using perl functions which are specialized for set
56     processing.
57 dpavlin 10
58 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
59     means that you check it's validity before running WebPAC using
60     C<perl -c normalize.pl>.
61 dpavlin 15
62 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
63 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
64 dpavlin 547 C<marc>.
65 dpavlin 15
66 dpavlin 10 =head1 FUNCTIONS
67    
68 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
69     All other functions are available for use within normalisation rules.
70    
71 dpavlin 536 =head2 data_structure
72 dpavlin 10
73 dpavlin 536 Return data structure
74 dpavlin 13
75 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
76 dpavlin 725 lookup => $lookup_hash,
77 dpavlin 536 row => $row,
78     rules => $normalize_pl_config,
79 dpavlin 541 marc_encoding => 'utf-8',
80 dpavlin 595 config => $config,
81 dpavlin 736 load_row_coderef => sub {
82 dpavlin 979 my ($database,$input,$mfn) = @_;
83 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
84 dpavlin 725 },
85 dpavlin 13 );
86    
87 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
88 dpavlin 540 other are optional.
89    
90 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
91 dpavlin 725 die if it's not defined.
92    
93 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
94 dpavlin 15
95 dpavlin 538 Since this function isn't exported you have to call it with
96     C<WebPAC::Normalize::data_structure>.
97    
98 dpavlin 10 =cut
99    
100 dpavlin 736 my $load_row_coderef;
101 dpavlin 725
102 dpavlin 536 sub data_structure {
103     my $arg = {@_};
104 dpavlin 13
105 dpavlin 536 die "need row argument" unless ($arg->{row});
106     die "need normalisation argument" unless ($arg->{rules});
107 dpavlin 31
108 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
109 dpavlin 983 _set_ds( $arg->{row} );
110 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
111 dpavlin 541 _clean_ds( %{ $arg } );
112 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
113 dpavlin 725
114 dpavlin 1011 no strict 'subs';
115     no warnings 'redefine';
116     eval "$arg->{rules};";
117 dpavlin 536 die "error evaling $arg->{rules}: $@\n" if ($@);
118 dpavlin 540
119 dpavlin 538 return _get_ds();
120 dpavlin 10 }
121    
122 dpavlin 983 =head2 _set_ds
123 dpavlin 13
124 dpavlin 536 Set current record hash
125 dpavlin 433
126 dpavlin 983 _set_ds( $rec );
127 dpavlin 433
128     =cut
129    
130 dpavlin 536 my $rec;
131 dpavlin 433
132 dpavlin 983 sub _set_ds {
133 dpavlin 536 $rec = shift or die "no record hash";
134 dpavlin 433 }
135    
136 dpavlin 595 =head2 _set_config
137    
138     Set current config hash
139    
140     _set_config( $config );
141    
142     Magic keys are:
143    
144     =over 4
145    
146     =item _
147    
148     Code of current database
149    
150     =item _mfn
151    
152     Current MFN
153    
154     =back
155    
156     =cut
157    
158     my $config;
159    
160     sub _set_config {
161     $config = shift;
162     }
163    
164 dpavlin 538 =head2 _get_ds
165    
166     Return hash formatted as data structure
167    
168     my $ds = _get_ds();
169    
170     =cut
171    
172 dpavlin 812 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
173 dpavlin 574 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
174 dpavlin 538
175     sub _get_ds {
176 dpavlin 982 #warn "## out = ",dump($out);
177 dpavlin 538 return $out;
178     }
179    
180     =head2 _clean_ds
181    
182     Clean data structure hash for next record
183    
184     _clean_ds();
185    
186     =cut
187    
188     sub _clean_ds {
189 dpavlin 541 my $a = {@_};
190 dpavlin 812 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
191 dpavlin 574 ($marc_record_offset, $marc_fetch_offset) = (0,0);
192 dpavlin 541 $marc_encoding = $a->{marc_encoding};
193 dpavlin 538 }
194    
195     =head2 _set_lookup
196    
197     Set current lookup hash
198    
199     _set_lookup( $lookup );
200    
201     =cut
202    
203     my $lookup;
204    
205     sub _set_lookup {
206     $lookup = shift;
207     }
208    
209 dpavlin 707 =head2 _get_lookup
210    
211     Get current lookup hash
212    
213     my $lookup = _get_lookup();
214    
215     =cut
216    
217     sub _get_lookup {
218     return $lookup;
219     }
220    
221 dpavlin 736 =head2 _set_load_row
222 dpavlin 725
223     Setup code reference which will return L<data_structure> from
224     L<WebPAC::Store>
225    
226 dpavlin 736 _set_load_row(sub {
227 dpavlin 725 my ($database,$input,$mfn) = @_;
228 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
229 dpavlin 725 });
230    
231     =cut
232    
233 dpavlin 736 sub _set_load_row {
234 dpavlin 725 my $coderef = shift;
235     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
236    
237 dpavlin 736 $load_row_coderef = $coderef;
238 dpavlin 725 }
239    
240 dpavlin 547 =head2 _get_marc_fields
241 dpavlin 540
242 dpavlin 547 Get all fields defined by calls to C<marc>
243 dpavlin 540
244 dpavlin 547 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
245 dpavlin 540
246 dpavlin 543 We are using I<magic> which detect repeatable fields only from
247     sequence of field/subfield data generated by normalization.
248    
249 dpavlin 554 Repeatable field is created when there is second occurence of same subfield or
250     if any of indicators are different.
251 dpavlin 543
252 dpavlin 554 This is sane for most cases. Something like:
253 dpavlin 543
254 dpavlin 554 900a-1 900b-1 900c-1
255     900a-2 900b-2
256     900a-3
257    
258     will be created from any combination of:
259    
260     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
261    
262     and following rules:
263    
264     marc('900','a', rec('200','a') );
265     marc('900','b', rec('200','b') );
266     marc('900','c', rec('200','c') );
267    
268     which might not be what you have in mind. If you need repeatable subfield,
269     define it using C<marc_repeatable_subfield> like this:
270    
271 dpavlin 574 marc_repeatable_subfield('900','a');
272     marc('900','a', rec('200','a') );
273     marc('900','b', rec('200','b') );
274     marc('900','c', rec('200','c') );
275 dpavlin 554
276 dpavlin 574 will create:
277    
278     900a-1 900a-2 900a-3 900b-1 900c-1
279     900b-2
280    
281     There is also support for returning next or specific using:
282    
283     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
284     # do something with $mf
285     }
286    
287     will always return fields from next MARC record or
288    
289     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
290    
291     will return 42th copy record (if it exists).
292    
293 dpavlin 540 =cut
294    
295 dpavlin 813 my $fetch_pos;
296    
297 dpavlin 547 sub _get_marc_fields {
298 dpavlin 550
299 dpavlin 574 my $arg = {@_};
300     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
301 dpavlin 813 $fetch_pos = $marc_fetch_offset;
302 dpavlin 574 if ($arg->{offset}) {
303 dpavlin 813 $fetch_pos = $arg->{offset};
304 dpavlin 574 } elsif($arg->{fetch_next}) {
305     $marc_fetch_offset++;
306     }
307 dpavlin 550
308 dpavlin 574 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
309    
310     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
311    
312 dpavlin 813 my $marc_rec = $marc_record->[ $fetch_pos ];
313 dpavlin 574
314 dpavlin 813 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
315 dpavlin 574
316     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
317    
318 dpavlin 550 # first, sort all existing fields
319     # XXX might not be needed, but modern perl might randomize elements in hash
320     my @sorted_marc_record = sort {
321 dpavlin 572 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
322 dpavlin 574 } @{ $marc_rec };
323 dpavlin 550
324 dpavlin 574 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
325 dpavlin 562
326 dpavlin 550 # output marc fields
327 dpavlin 542 my @m;
328 dpavlin 550
329     # count unique field-subfields (used for offset when walking to next subfield)
330     my $u;
331 dpavlin 572 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
332 dpavlin 550
333     if ($debug) {
334 dpavlin 574 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
335 dpavlin 813 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
336 dpavlin 574 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
337     warn "## subfield count = ", dump( $u ), $/;
338 dpavlin 550 }
339    
340     my $len = $#sorted_marc_record;
341     my $visited;
342     my $i = 0;
343     my $field;
344    
345     foreach ( 0 .. $len ) {
346    
347     # find next element which isn't visited
348     while ($visited->{$i}) {
349     $i = ($i + 1) % ($len + 1);
350 dpavlin 542 }
351    
352 dpavlin 550 # mark it visited
353     $visited->{$i}++;
354    
355 dpavlin 574 my $row = dclone( $sorted_marc_record[$i] );
356 dpavlin 550
357     # field and subfield which is key for
358     # marc_repeatable_subfield and u
359 dpavlin 572 my $fsf = $row->[0] . ( $row->[3] || '' );
360 dpavlin 550
361     if ($debug > 1) {
362    
363     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
364     print "### this [$i]: ", dump( $row ),$/;
365     print "### sf: ", $row->[3], " vs ", $field->[3],
366     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
367     if ($#$field >= 0);
368    
369     }
370    
371     # if field exists
372     if ( $#$field >= 0 ) {
373     if (
374     $row->[0] ne $field->[0] || # field
375     $row->[1] ne $field->[1] || # i1
376     $row->[2] ne $field->[2] # i2
377     ) {
378     push @m, $field;
379     warn "## saved/1 ", dump( $field ),$/ if ($debug);
380     $field = $row;
381    
382     } elsif (
383     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
384     ||
385     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
386     ! $marc_repeatable_subfield->{ $fsf }
387     )
388     ) {
389     push @m, $field;
390     warn "## saved/2 ", dump( $field ),$/ if ($debug);
391     $field = $row;
392    
393     } else {
394     # append new subfields to existing field
395     push @$field, ( $row->[3], $row->[4] );
396     }
397     } else {
398     # insert first field
399     $field = $row;
400     }
401    
402     if (! $marc_repeatable_subfield->{ $fsf }) {
403     # make step to next subfield
404     $i = ($i + $u->{ $fsf } ) % ($len + 1);
405     }
406 dpavlin 542 }
407    
408 dpavlin 550 if ($#$field >= 0) {
409     push @m, $field;
410     warn "## saved/3 ", dump( $field ),$/ if ($debug);
411     }
412 dpavlin 542
413 dpavlin 579 return \@m;
414 dpavlin 540 }
415    
416 dpavlin 813 =head2 _get_marc_leader
417    
418     Return leader from currently fetched record by L</_get_marc_fields>
419    
420     print WebPAC::Normalize::_get_marc_leader();
421    
422     =cut
423    
424     sub _get_marc_leader {
425     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
426     return $marc_leader->[ $fetch_pos ];
427     }
428    
429 dpavlin 554 =head2 _debug
430    
431     Change level of debug warnings
432    
433     _debug( 2 );
434    
435     =cut
436    
437     sub _debug {
438     my $l = shift;
439     return $debug unless defined($l);
440 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
441 dpavlin 554 $debug = $l;
442     }
443    
444 dpavlin 540 =head1 Functions to create C<data_structure>
445    
446     Those functions generally have to first in your normalization file.
447    
448 dpavlin 912 =head2 search_display
449 dpavlin 433
450 dpavlin 912 Define output for L<search> and L<display> at the same time
451 dpavlin 433
452 dpavlin 912 search_display('Title', rec('200','a') );
453 dpavlin 13
454    
455     =cut
456    
457 dpavlin 912 sub search_display {
458     my $name = shift or die "search_display needs name as first argument";
459 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
460     return unless (@o);
461     $out->{$name}->{search} = \@o;
462     $out->{$name}->{display} = \@o;
463     }
464 dpavlin 13
465 dpavlin 915 =head2 tag
466    
467     Old name for L<search_display>, but supported
468    
469     =cut
470    
471     sub tag {
472     search_display( @_ );
473     }
474    
475 dpavlin 536 =head2 display
476 dpavlin 13
477 dpavlin 912 Define output just for I<display>
478 dpavlin 125
479 dpavlin 536 @v = display('Title', rec('200','a') );
480 dpavlin 125
481 dpavlin 536 =cut
482 dpavlin 125
483 dpavlin 923 sub _field {
484     my $type = shift or confess "need type -- BUG?";
485     my $name = shift or confess "needs name as first argument";
486 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
487     return unless (@o);
488 dpavlin 923 $out->{$name}->{$type} = \@o;
489 dpavlin 536 }
490 dpavlin 13
491 dpavlin 923 sub display { _field( 'display', @_ ) }
492    
493 dpavlin 536 =head2 search
494 dpavlin 13
495 dpavlin 536 Prepare values just for I<search>
496 dpavlin 13
497 dpavlin 536 @v = search('Title', rec('200','a') );
498 dpavlin 433
499 dpavlin 536 =cut
500 dpavlin 13
501 dpavlin 923 sub search { _field( 'search', @_ ) }
502 dpavlin 13
503 dpavlin 923 =head2 sorted
504    
505     Insert into lists which will be automatically sorted
506    
507     sorted('Title', rec('200','a') );
508    
509     =cut
510    
511     sub sorted { _field( 'sorted', @_ ) }
512    
513    
514 dpavlin 564 =head2 marc_leader
515    
516     Setup fields within MARC leader or get leader
517    
518     marc_leader('05','c');
519     my $leader = marc_leader();
520    
521     =cut
522    
523     sub marc_leader {
524     my ($offset,$value) = @_;
525    
526     if ($offset) {
527 dpavlin 813 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
528 dpavlin 564 } else {
529 dpavlin 813
530     if (defined($marc_leader)) {
531     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
532     return $marc_leader->[ $marc_record_offset ];
533     } else {
534     return;
535     }
536 dpavlin 564 }
537     }
538    
539 dpavlin 815 =head2 marc_fixed
540    
541     Create control/indentifier fields with values in fixed positions
542    
543     marc_fixed('008', 00, '070402');
544     marc_fixed('008', 39, '|');
545    
546     Positions not specified will be filled with spaces (C<0x20>).
547    
548     There will be no effort to extend last specified value to full length of
549     field in standard.
550    
551     =cut
552    
553     sub marc_fixed {
554     my ($f, $pos, $val) = @_;
555     die "need marc(field, position, value)" unless defined($f) && defined($pos);
556    
557 dpavlin 889 confess "need val" unless defined $val;
558    
559 dpavlin 815 my $update = 0;
560    
561     map {
562     if ($_->[0] eq $f) {
563     my $old = $_->[1];
564 dpavlin 889 if (length($old) <= $pos) {
565 dpavlin 815 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
566 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
567 dpavlin 815 } else {
568     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
569 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
570 dpavlin 815 }
571     $update++;
572     }
573     } @{ $marc_record->[ $marc_record_offset ] };
574    
575     if (! $update) {
576     my $v = ' ' x $pos . $val;
577     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
578 dpavlin 817 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
579 dpavlin 815 }
580     }
581    
582 dpavlin 547 =head2 marc
583 dpavlin 540
584     Save value for MARC field
585    
586 dpavlin 547 marc('900','a', rec('200','a') );
587 dpavlin 571 marc('001', rec('000') );
588 dpavlin 540
589     =cut
590    
591 dpavlin 547 sub marc {
592     my $f = shift or die "marc needs field";
593     die "marc field must be numer" unless ($f =~ /^\d+$/);
594 dpavlin 540
595 dpavlin 571 my $sf;
596     if ($f >= 10) {
597     $sf = shift or die "marc needs subfield";
598     }
599 dpavlin 540
600 dpavlin 541 foreach (@_) {
601     my $v = $_; # make var read-write for Encode
602 dpavlin 543 next unless (defined($v) && $v !~ /^\s*$/);
603 dpavlin 548 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
604 dpavlin 571 if (defined $sf) {
605 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
606 dpavlin 571 } else {
607 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
608 dpavlin 571 }
609 dpavlin 540 }
610     }
611    
612 dpavlin 547 =head2 marc_repeatable_subfield
613    
614     Save values for MARC repetable subfield
615    
616     marc_repeatable_subfield('910', 'z', rec('909') );
617    
618     =cut
619    
620     sub marc_repeatable_subfield {
621 dpavlin 550 my ($f,$sf) = @_;
622     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
623     $marc_repeatable_subfield->{ $f . $sf }++;
624 dpavlin 547 marc(@_);
625     }
626    
627     =head2 marc_indicators
628    
629     Set both indicators for MARC field
630    
631     marc_indicators('900', ' ', 1);
632    
633     Any indicator value other than C<0-9> will be treated as undefined.
634    
635     =cut
636    
637     sub marc_indicators {
638     my $f = shift || die "marc_indicators need field!\n";
639     my ($i1,$i2) = @_;
640     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
641     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
642    
643     $i1 = ' ' if ($i1 !~ /^\d$/);
644     $i2 = ' ' if ($i2 !~ /^\d$/);
645 dpavlin 548 @{ $marc_indicators->{$f} } = ($i1,$i2);
646 dpavlin 547 }
647    
648 dpavlin 562 =head2 marc_compose
649 dpavlin 547
650 dpavlin 562 Save values for each MARC subfield explicitly
651    
652     marc_compose('900',
653     'a', rec('200','a')
654     'b', rec('201','a')
655     'a', rec('200','b')
656     'c', rec('200','c')
657     );
658    
659 dpavlin 603 If you specify C<+> for subfield, value will be appended
660     to previous defined subfield.
661    
662 dpavlin 562 =cut
663    
664     sub marc_compose {
665     my $f = shift or die "marc_compose needs field";
666     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
667    
668     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
669     my $m = [ $f, $i1, $i2 ];
670    
671 dpavlin 583 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
672    
673 dpavlin 619 if ($#_ % 2 != 1) {
674     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
675     }
676    
677 dpavlin 562 while (@_) {
678 dpavlin 619 my $sf = shift;
679 dpavlin 565 my $v = shift;
680 dpavlin 562
681     next unless (defined($v) && $v !~ /^\s*$/);
682 dpavlin 568 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
683 dpavlin 603 if ($sf ne '+') {
684     push @$m, ( $sf, $v );
685     } else {
686     $m->[ $#$m ] .= $v;
687     }
688 dpavlin 562 }
689    
690 dpavlin 586 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
691 dpavlin 562
692 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
693 dpavlin 562 }
694    
695 dpavlin 574 =head2 marc_duplicate
696 dpavlin 562
697 dpavlin 574 Generate copy of current MARC record and continue working on copy
698    
699     marc_duplicate();
700    
701     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
702     C<< _get_marc_fields( offset => 42 ) >>.
703    
704     =cut
705    
706     sub marc_duplicate {
707     my $m = $marc_record->[ -1 ];
708     die "can't duplicate record which isn't defined" unless ($m);
709     push @{ $marc_record }, dclone( $m );
710 dpavlin 813 push @{ $marc_leader }, dclone( marc_leader() );
711     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
712 dpavlin 574 $marc_record_offset = $#{ $marc_record };
713     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
714 dpavlin 813
715 dpavlin 574 }
716    
717     =head2 marc_remove
718    
719     Remove some field or subfield from MARC record.
720    
721     marc_remove('200');
722     marc_remove('200','a');
723    
724     This will erase field C<200> or C<200^a> from current MARC record.
725    
726 dpavlin 786 marc_remove('*');
727    
728     Will remove all fields in current MARC record.
729    
730 dpavlin 574 This is useful after calling C<marc_duplicate> or on it's own (but, you
731     should probably just remove that subfield definition if you are not
732     using C<marc_duplicate>).
733    
734     FIXME: support fields < 10.
735    
736     =cut
737    
738     sub marc_remove {
739     my ($f, $sf) = @_;
740    
741     die "marc_remove needs record number" unless defined($f);
742    
743     my $marc = $marc_record->[ $marc_record_offset ];
744    
745     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
746    
747 dpavlin 786 if ($f eq '*') {
748    
749     delete( $marc_record->[ $marc_record_offset ] );
750 dpavlin 788 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
751 dpavlin 786
752     } else {
753    
754     my $i = 0;
755     foreach ( 0 .. $#{ $marc } ) {
756     last unless (defined $marc->[$i]);
757     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
758     if ($marc->[$i]->[0] eq $f) {
759     if (! defined $sf) {
760     # remove whole field
761     splice @$marc, $i, 1;
762     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
763     $i--;
764     } else {
765     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
766     my $o = ($j * 2) + 3;
767     if ($marc->[$i]->[$o] eq $sf) {
768     # remove subfield
769     splice @{$marc->[$i]}, $o, 2;
770     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
771     # is record now empty?
772     if ($#{ $marc->[$i] } == 2) {
773     splice @$marc, $i, 1;
774     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
775     $i--;
776     };
777     }
778 dpavlin 574 }
779     }
780     }
781 dpavlin 786 $i++;
782 dpavlin 574 }
783 dpavlin 786
784     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
785    
786     $marc_record->[ $marc_record_offset ] = $marc;
787 dpavlin 574 }
788    
789     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
790     }
791    
792 dpavlin 604 =head2 marc_original_order
793    
794     Copy all subfields preserving original order to marc field.
795    
796 dpavlin 616 marc_original_order( marc_field_number, original_input_field_number );
797 dpavlin 604
798 dpavlin 616 Please note that field numbers are consistent with other commands (marc
799     field number first), but somewhat counter-intuitive (destination and then
800     source).
801    
802 dpavlin 604 You might want to use this command if you are just renaming subfields or
803     using pre-processing modify_record in C<config.yml> and don't need any
804     post-processing or want to preserve order of original subfields.
805    
806 dpavlin 616
807 dpavlin 604 =cut
808    
809     sub marc_original_order {
810    
811 dpavlin 616 my ($to, $from) = @_;
812 dpavlin 604 die "marc_original_order needs from and to fields\n" unless ($from && $to);
813    
814 dpavlin 616 return unless defined($rec->{$from});
815    
816     my $r = $rec->{$from};
817 dpavlin 604 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
818    
819     my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
820 dpavlin 616 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
821 dpavlin 604
822     foreach my $d (@$r) {
823    
824 dpavlin 605 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
825 dpavlin 616 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
826 dpavlin 605 next;
827     }
828    
829 dpavlin 604 my @sfs = @{ $d->{subfields} };
830    
831     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
832    
833 dpavlin 618 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
834 dpavlin 604
835     my $m = [ $to, $i1, $i2 ];
836    
837     while (my $sf = shift @sfs) {
838 dpavlin 618
839     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
840 dpavlin 604 my $offset = shift @sfs;
841     die "corrupted sufields specification for field $from\n" unless defined($offset);
842    
843     my $v;
844     if (ref($d->{$sf}) eq 'ARRAY') {
845     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
846     } elsif ($offset == 0) {
847     $v = $d->{$sf};
848     } else {
849     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
850     }
851     push @$m, ( $sf, $v ) if (defined($v));
852     }
853    
854     if ($#{$m} > 2) {
855     push @{ $marc_record->[ $marc_record_offset ] }, $m;
856     }
857     }
858    
859     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
860     }
861    
862 dpavlin 813 =head2 marc_count
863 dpavlin 604
864 dpavlin 813 Return number of MARC records created using L</marc_duplicate>.
865    
866     print "created ", marc_count(), " records";
867    
868     =cut
869    
870     sub marc_count {
871     return $#{ $marc_record };
872     }
873    
874    
875 dpavlin 540 =head1 Functions to extract data from input
876    
877     This function should be used inside functions to create C<data_structure> described
878     above.
879    
880 dpavlin 641 =head2 _pack_subfields_hash
881    
882 dpavlin 669 @subfields = _pack_subfields_hash( $h );
883     $subfields = _pack_subfields_hash( $h, 1 );
884 dpavlin 641
885 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
886     with subfields (denoted by C<^>) and values.
887    
888 dpavlin 641 =cut
889    
890     sub _pack_subfields_hash {
891    
892 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
893 dpavlin 641
894     my ($h,$include_subfields) = @_;
895    
896 dpavlin 831 # sanity and ease of use
897     return $h if (ref($h) ne 'HASH');
898    
899 dpavlin 641 if ( defined($h->{subfields}) ) {
900     my $sfs = delete $h->{subfields} || die "no subfields?";
901     my @out;
902     while (@$sfs) {
903     my $sf = shift @$sfs;
904     push @out, '^' . $sf if ($include_subfields);
905     my $o = shift @$sfs;
906     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
907     # single element subfields are not arrays
908 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
909 dpavlin 667
910 dpavlin 641 push @out, $h->{$sf};
911     } else {
912 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
913 dpavlin 641 push @out, $h->{$sf}->[$o];
914     }
915     }
916 dpavlin 667 if ($include_subfields) {
917     return join('', @out);
918     } else {
919     return @out;
920     }
921 dpavlin 641 } else {
922 dpavlin 667 if ($include_subfields) {
923     my $out = '';
924 dpavlin 668 foreach my $sf (sort keys %$h) {
925 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
926     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
927     } else {
928     $out .= '^' . $sf . $h->{$sf};
929     }
930     }
931     return $out;
932     } else {
933     # FIXME this should probably be in alphabetical order instead of hash order
934     values %{$h};
935     }
936 dpavlin 641 }
937     }
938    
939 dpavlin 536 =head2 rec1
940 dpavlin 371
941 dpavlin 536 Return all values in some field
942 dpavlin 371
943 dpavlin 536 @v = rec1('200')
944 dpavlin 15
945 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
946 dpavlin 15
947 dpavlin 536 =cut
948 dpavlin 15
949 dpavlin 536 sub rec1 {
950     my $f = shift;
951 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
952 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
953 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
954 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
955 dpavlin 641 my @out;
956     foreach my $h ( @{ $rec->{$f} } ) {
957     if (ref($h) eq 'HASH') {
958     push @out, ( _pack_subfields_hash( $h ) );
959 dpavlin 31 } else {
960 dpavlin 641 push @out, $h;
961 dpavlin 31 }
962 dpavlin 641 }
963     return @out;
964 dpavlin 536 } elsif( defined($rec->{$f}) ) {
965     return $rec->{$f};
966 dpavlin 15 }
967     }
968    
969 dpavlin 536 =head2 rec2
970 dpavlin 15
971 dpavlin 536 Return all values in specific field and subfield
972 dpavlin 13
973 dpavlin 536 @v = rec2('200','a')
974 dpavlin 13
975     =cut
976    
977 dpavlin 536 sub rec2 {
978     my $f = shift;
979     return unless (defined($rec && $rec->{$f}));
980     my $sf = shift;
981 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
982 dpavlin 589 return map {
983     if (ref($_->{$sf}) eq 'ARRAY') {
984     @{ $_->{$sf} };
985     } else {
986     $_->{$sf};
987     }
988     } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
989 dpavlin 536 }
990 dpavlin 13
991 dpavlin 536 =head2 rec
992 dpavlin 13
993 dpavlin 536 syntaxtic sugar for
994 dpavlin 13
995 dpavlin 536 @v = rec('200')
996     @v = rec('200','a')
997 dpavlin 13
998 dpavlin 750 If rec() returns just single value, it will
999     return scalar, not array.
1000    
1001 dpavlin 536 =cut
1002 dpavlin 373
1003 dpavlin 536 sub rec {
1004 dpavlin 583 my @out;
1005 dpavlin 536 if ($#_ == 0) {
1006 dpavlin 583 @out = rec1(@_);
1007 dpavlin 536 } elsif ($#_ == 1) {
1008 dpavlin 583 @out = rec2(@_);
1009 dpavlin 13 }
1010 dpavlin 750 if ($#out == 0 && ! wantarray) {
1011     return $out[0];
1012     } elsif (@out) {
1013 dpavlin 583 return @out;
1014     } else {
1015     return '';
1016     }
1017 dpavlin 13 }
1018    
1019 dpavlin 1012 =head2 frec
1020    
1021     Returns first value from field
1022    
1023     $v = frec('200');
1024     $v = frec('200','a');
1025    
1026     =cut
1027    
1028     sub frec {
1029     my @out = rec(@_);
1030     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
1031     return shift @out;
1032     }
1033    
1034 dpavlin 536 =head2 regex
1035 dpavlin 15
1036 dpavlin 536 Apply regex to some or all values
1037 dpavlin 15
1038 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
1039 dpavlin 15
1040     =cut
1041    
1042 dpavlin 536 sub regex {
1043     my $r = shift;
1044     my @out;
1045 dpavlin 550 #warn "r: $r\n", dump(\@_);
1046 dpavlin 536 foreach my $t (@_) {
1047     next unless ($t);
1048     eval "\$t =~ $r";
1049     push @out, $t if ($t && $t ne '');
1050 dpavlin 368 }
1051 dpavlin 536 return @out;
1052 dpavlin 15 }
1053    
1054 dpavlin 536 =head2 prefix
1055 dpavlin 15
1056 dpavlin 536 Prefix all values with a string
1057 dpavlin 15
1058 dpavlin 536 @v = prefix( 'my_', @v );
1059 dpavlin 15
1060     =cut
1061    
1062 dpavlin 536 sub prefix {
1063 dpavlin 819 my $p = shift;
1064     return @_ unless defined( $p );
1065 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
1066     }
1067 dpavlin 15
1068 dpavlin 536 =head2 suffix
1069 dpavlin 15
1070 dpavlin 536 suffix all values with a string
1071 dpavlin 15
1072 dpavlin 536 @v = suffix( '_my', @v );
1073 dpavlin 15
1074 dpavlin 536 =cut
1075 dpavlin 15
1076 dpavlin 536 sub suffix {
1077 dpavlin 819 my $s = shift;
1078     return @_ unless defined( $s );
1079 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
1080 dpavlin 15 }
1081    
1082 dpavlin 536 =head2 surround
1083 dpavlin 13
1084 dpavlin 536 surround all values with a two strings
1085 dpavlin 13
1086 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
1087 dpavlin 13
1088     =cut
1089    
1090 dpavlin 536 sub surround {
1091 dpavlin 819 my $p = shift;
1092     my $s = shift;
1093     $p = '' unless defined( $p );
1094     $s = '' unless defined( $s );
1095 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
1096 dpavlin 13 }
1097    
1098 dpavlin 536 =head2 first
1099 dpavlin 13
1100 dpavlin 536 Return first element
1101 dpavlin 15
1102 dpavlin 536 $v = first( @v );
1103 dpavlin 13
1104     =cut
1105    
1106 dpavlin 536 sub first {
1107     my $r = shift;
1108     return $r;
1109 dpavlin 13 }
1110    
1111 dpavlin 536 =head2 lookup
1112 dpavlin 13
1113 dpavlin 536 Consult lookup hashes for some value
1114 dpavlin 13
1115 dpavlin 725 @v = lookup(
1116     sub {
1117     'ffkk/peri/mfn'.rec('000')
1118     },
1119     'ffkk','peri','200-a-200-e',
1120     sub {
1121     first(rec(200,'a')).' '.first(rec('200','e'))
1122     }
1123     );
1124 dpavlin 13
1125 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1126     normal lookup definition in C<conf/lookup/something.pl> which looks like:
1127 dpavlin 707
1128 dpavlin 725 lookup(
1129     # which results to return from record recorded in lookup
1130     sub { 'ffkk/peri/mfn' . rec('000') },
1131     # from which database and input
1132     'ffkk','peri',
1133     # such that following values match
1134     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1135     # if this part is missing, we will try to match same fields
1136     # from lookup record and current one, or you can override
1137     # which records to use from current record using
1138     sub { rec('900','x') . ' ' . rec('900','y') },
1139     )
1140    
1141     You can think about this lookup as SQL (if that helps):
1142    
1143     select
1144     sub { what }
1145     from
1146     database, input
1147     where
1148     sub { filter from lookuped record }
1149     having
1150     sub { optional filter on current record }
1151    
1152     Easy as pie, right?
1153    
1154 dpavlin 13 =cut
1155    
1156 dpavlin 536 sub lookup {
1157 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
1158    
1159 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1160 dpavlin 725
1161 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1162 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
1163    
1164 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1165 dpavlin 725
1166     my $mfns;
1167     my @having = $having->();
1168    
1169 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
1170 dpavlin 725
1171     foreach my $h ( @having ) {
1172     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1173 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1174 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1175     }
1176 dpavlin 536 }
1177 dpavlin 725
1178     return unless ($mfns);
1179    
1180     my @mfns = sort keys %$mfns;
1181    
1182 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1183 dpavlin 725
1184     my $old_rec = $rec;
1185     my @out;
1186    
1187     foreach my $mfn (@mfns) {
1188 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
1189 dpavlin 725
1190 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1191 dpavlin 725
1192     my @vals = $what->();
1193    
1194     push @out, ( @vals );
1195    
1196 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1197 dpavlin 725 }
1198    
1199     # if (ref($lookup->{$k}) eq 'ARRAY') {
1200     # return @{ $lookup->{$k} };
1201     # } else {
1202     # return $lookup->{$k};
1203     # }
1204    
1205     $rec = $old_rec;
1206    
1207 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1208 dpavlin 725
1209 dpavlin 740 if ($#out == 0) {
1210     return $out[0];
1211     } else {
1212     return @out;
1213     }
1214 dpavlin 13 }
1215    
1216 dpavlin 707 =head2 save_into_lookup
1217    
1218 dpavlin 725 Save value into lookup. It associates current database, input
1219     and specific keys with one or more values which will be
1220     associated over MFN.
1221 dpavlin 707
1222 dpavlin 725 MFN will be extracted from first occurence current of field 000
1223     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1224    
1225     my $nr = save_into_lookup($database,$input,$key,sub {
1226 dpavlin 707 # code which produce one or more values
1227     });
1228    
1229 dpavlin 725 It returns number of items saved.
1230 dpavlin 707
1231 dpavlin 725 This function shouldn't be called directly, it's called from code created by
1232     L<WebPAC::Parser>.
1233    
1234 dpavlin 707 =cut
1235    
1236     sub save_into_lookup {
1237 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
1238     die "save_into_lookup needs database" unless defined($database);
1239     die "save_into_lookup needs input" unless defined($input);
1240     die "save_into_lookup needs key" unless defined($key);
1241 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1242 dpavlin 725
1243 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1244 dpavlin 725
1245     my $mfn =
1246     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1247     defined($config->{_mfn}) ? $config->{_mfn} :
1248     die "mfn not defined or zero";
1249    
1250     my $nr = 0;
1251    
1252 dpavlin 707 foreach my $v ( $coderef->() ) {
1253 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1254 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1255 dpavlin 725 $nr++;
1256 dpavlin 707 }
1257 dpavlin 725
1258     return $nr;
1259 dpavlin 707 }
1260    
1261 dpavlin 595 =head2 config
1262    
1263     Consult config values stored in C<config.yml>
1264    
1265     # return database code (key under databases in yaml)
1266     $database_code = config(); # use _ from hash
1267     $database_name = config('name');
1268     $database_input_name = config('input name');
1269    
1270     Up to three levels are supported.
1271    
1272     =cut
1273    
1274     sub config {
1275     return unless ($config);
1276    
1277     my $p = shift;
1278    
1279     $p ||= '';
1280    
1281     my $v;
1282    
1283     warn "### getting config($p)\n" if ($debug > 1);
1284    
1285     my @p = split(/\s+/,$p);
1286     if ($#p < 0) {
1287     $v = $config->{ '_' }; # special, database code
1288     } else {
1289    
1290     my $c = dclone( $config );
1291    
1292     foreach my $k (@p) {
1293     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1294     if (ref($c) eq 'ARRAY') {
1295     $c = shift @$c;
1296     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1297     last;
1298     }
1299    
1300     if (! defined($c->{$k}) ) {
1301     $c = undef;
1302     last;
1303     } else {
1304     $c = $c->{$k};
1305     }
1306     }
1307     $v = $c if ($c);
1308    
1309     }
1310    
1311     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1312     warn "config( '$p' ) is empty\n" if (! $v);
1313    
1314     return $v;
1315     }
1316    
1317     =head2 id
1318    
1319     Returns unique id of this record
1320    
1321     $id = id();
1322    
1323     Returns C<42/2> for 2nd occurence of MFN 42.
1324    
1325     =cut
1326    
1327     sub id {
1328     my $mfn = $config->{_mfn} || die "no _mfn in config data";
1329     return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1330     }
1331    
1332 dpavlin 536 =head2 join_with
1333 dpavlin 13
1334 dpavlin 536 Joins walues with some delimiter
1335 dpavlin 10
1336 dpavlin 536 $v = join_with(", ", @v);
1337 dpavlin 10
1338 dpavlin 536 =cut
1339 dpavlin 10
1340 dpavlin 536 sub join_with {
1341     my $d = shift;
1342 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1343 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1344     return '' unless defined($v);
1345     return $v;
1346 dpavlin 536 }
1347 dpavlin 10
1348 dpavlin 562 =head2 split_rec_on
1349    
1350     Split record subfield on some regex and take one of parts out
1351    
1352     $a_before_semi_column =
1353     split_rec_on('200','a', /\s*;\s*/, $part);
1354    
1355     C<$part> is optional number of element. First element is
1356     B<1>, not 0!
1357    
1358     If there is no C<$part> parameter or C<$part> is 0, this function will
1359     return all values produced by splitting.
1360    
1361     =cut
1362    
1363     sub split_rec_on {
1364     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1365    
1366     my ($fld, $sf, $regex, $part) = @_;
1367 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1368 dpavlin 562
1369     my @r = rec( $fld, $sf );
1370     my $v = shift @r;
1371 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1372 dpavlin 562
1373 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1374 dpavlin 566
1375 dpavlin 562 my @s = split( $regex, $v );
1376 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1377 dpavlin 566 if ($part && $part > 0) {
1378 dpavlin 562 return $s[ $part - 1 ];
1379     } else {
1380 dpavlin 571 return @s;
1381 dpavlin 562 }
1382     }
1383    
1384 dpavlin 785 my $hash;
1385    
1386     =head2 set
1387    
1388     set( key => 'value' );
1389    
1390     =cut
1391    
1392     sub set {
1393     my ($k,$v) = @_;
1394 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1395 dpavlin 785 $hash->{$k} = $v;
1396     };
1397    
1398     =head2 get
1399    
1400     get( 'key' );
1401    
1402     =cut
1403    
1404     sub get {
1405     my $k = shift || return;
1406     my $v = $hash->{$k};
1407 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1408 dpavlin 785 return $v;
1409     }
1410    
1411 dpavlin 791 =head2 count
1412 dpavlin 785
1413 dpavlin 791 if ( count( @result ) == 1 ) {
1414     # do something if only 1 result is there
1415     }
1416    
1417     =cut
1418    
1419     sub count {
1420 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
1421 dpavlin 791 return @_ . '';
1422     }
1423    
1424 dpavlin 536 # END
1425     1;

  ViewVC Help
Powered by ViewVC 1.1.26