/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 982 - (hide annotations)
Sat Nov 3 13:35:03 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 30863 byte(s)
remove debug

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 538 _set_rec _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 923 search_display search display sorted
11 dpavlin 912
12 dpavlin 547 marc marc_indicators marc_repeatable_subfield
13 dpavlin 815 marc_compose marc_leader marc_fixed
14 dpavlin 813 marc_duplicate marc_remove marc_count
15 dpavlin 604 marc_original_order
16 dpavlin 540
17 dpavlin 536 rec1 rec2 rec
18 dpavlin 979 frec
19 dpavlin 536 regex prefix suffix surround
20     first lookup join_with
21 dpavlin 707 save_into_lookup
22 dpavlin 562
23     split_rec_on
24 dpavlin 785
25     get set
26 dpavlin 791 count
27 dpavlin 980
28 dpavlin 536 /;
29 dpavlin 10
30     use warnings;
31     use strict;
32 dpavlin 536
33     #use base qw/WebPAC::Common/;
34 dpavlin 550 use Data::Dump qw/dump/;
35 dpavlin 574 use Storable qw/dclone/;
36 dpavlin 725 use Carp qw/confess/;
37 dpavlin 10
38 dpavlin 550 # debugging warn(s)
39     my $debug = 0;
40    
41 dpavlin 980 use WebPAC::Normalize::ISBN;
42     push @EXPORT, ( 'isbn_10', 'isbn_13' );
43 dpavlin 550
44 dpavlin 10 =head1 NAME
45    
46 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
47 dpavlin 10
48     =cut
49    
50 dpavlin 923 our $VERSION = '0.31';
51 dpavlin 10
52     =head1 SYNOPSIS
53    
54 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
55     from input records using perl functions which are specialized for set
56     processing.
57 dpavlin 10
58 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
59     means that you check it's validity before running WebPAC using
60     C<perl -c normalize.pl>.
61 dpavlin 15
62 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
63 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
64 dpavlin 547 C<marc>.
65 dpavlin 15
66 dpavlin 10 =head1 FUNCTIONS
67    
68 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
69     All other functions are available for use within normalisation rules.
70    
71 dpavlin 536 =head2 data_structure
72 dpavlin 10
73 dpavlin 536 Return data structure
74 dpavlin 13
75 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
76 dpavlin 725 lookup => $lookup_hash,
77 dpavlin 536 row => $row,
78     rules => $normalize_pl_config,
79 dpavlin 541 marc_encoding => 'utf-8',
80 dpavlin 595 config => $config,
81 dpavlin 736 load_row_coderef => sub {
82 dpavlin 979 my ($database,$input,$mfn) = @_;
83 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
84 dpavlin 725 },
85 dpavlin 13 );
86    
87 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
88 dpavlin 540 other are optional.
89    
90 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
91 dpavlin 725 die if it's not defined.
92    
93 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
94 dpavlin 15
95 dpavlin 538 Since this function isn't exported you have to call it with
96     C<WebPAC::Normalize::data_structure>.
97    
98 dpavlin 10 =cut
99    
100 dpavlin 736 my $load_row_coderef;
101 dpavlin 725
102 dpavlin 536 sub data_structure {
103     my $arg = {@_};
104 dpavlin 13
105 dpavlin 536 die "need row argument" unless ($arg->{row});
106     die "need normalisation argument" unless ($arg->{rules});
107 dpavlin 31
108 dpavlin 536 no strict 'subs';
109 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
110 dpavlin 538 _set_rec( $arg->{row} );
111 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
112 dpavlin 541 _clean_ds( %{ $arg } );
113 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
114 dpavlin 725
115 dpavlin 536 eval "$arg->{rules}";
116     die "error evaling $arg->{rules}: $@\n" if ($@);
117 dpavlin 540
118 dpavlin 538 return _get_ds();
119 dpavlin 10 }
120    
121 dpavlin 538 =head2 _set_rec
122 dpavlin 13
123 dpavlin 536 Set current record hash
124 dpavlin 433
125 dpavlin 538 _set_rec( $rec );
126 dpavlin 433
127     =cut
128    
129 dpavlin 536 my $rec;
130 dpavlin 433
131 dpavlin 538 sub _set_rec {
132 dpavlin 536 $rec = shift or die "no record hash";
133 dpavlin 433 }
134    
135 dpavlin 595 =head2 _set_config
136    
137     Set current config hash
138    
139     _set_config( $config );
140    
141     Magic keys are:
142    
143     =over 4
144    
145     =item _
146    
147     Code of current database
148    
149     =item _mfn
150    
151     Current MFN
152    
153     =back
154    
155     =cut
156    
157     my $config;
158    
159     sub _set_config {
160     $config = shift;
161     }
162    
163 dpavlin 538 =head2 _get_ds
164    
165     Return hash formatted as data structure
166    
167     my $ds = _get_ds();
168    
169     =cut
170    
171 dpavlin 812 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
172 dpavlin 574 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
173 dpavlin 538
174     sub _get_ds {
175 dpavlin 982 #warn "## out = ",dump($out);
176 dpavlin 538 return $out;
177     }
178    
179     =head2 _clean_ds
180    
181     Clean data structure hash for next record
182    
183     _clean_ds();
184    
185     =cut
186    
187     sub _clean_ds {
188 dpavlin 541 my $a = {@_};
189 dpavlin 812 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
190 dpavlin 574 ($marc_record_offset, $marc_fetch_offset) = (0,0);
191 dpavlin 541 $marc_encoding = $a->{marc_encoding};
192 dpavlin 538 }
193    
194     =head2 _set_lookup
195    
196     Set current lookup hash
197    
198     _set_lookup( $lookup );
199    
200     =cut
201    
202     my $lookup;
203    
204     sub _set_lookup {
205     $lookup = shift;
206     }
207    
208 dpavlin 707 =head2 _get_lookup
209    
210     Get current lookup hash
211    
212     my $lookup = _get_lookup();
213    
214     =cut
215    
216     sub _get_lookup {
217     return $lookup;
218     }
219    
220 dpavlin 736 =head2 _set_load_row
221 dpavlin 725
222     Setup code reference which will return L<data_structure> from
223     L<WebPAC::Store>
224    
225 dpavlin 736 _set_load_row(sub {
226 dpavlin 725 my ($database,$input,$mfn) = @_;
227 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
228 dpavlin 725 });
229    
230     =cut
231    
232 dpavlin 736 sub _set_load_row {
233 dpavlin 725 my $coderef = shift;
234     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
235    
236 dpavlin 736 $load_row_coderef = $coderef;
237 dpavlin 725 }
238    
239 dpavlin 547 =head2 _get_marc_fields
240 dpavlin 540
241 dpavlin 547 Get all fields defined by calls to C<marc>
242 dpavlin 540
243 dpavlin 547 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
244 dpavlin 540
245 dpavlin 543 We are using I<magic> which detect repeatable fields only from
246     sequence of field/subfield data generated by normalization.
247    
248 dpavlin 554 Repeatable field is created when there is second occurence of same subfield or
249     if any of indicators are different.
250 dpavlin 543
251 dpavlin 554 This is sane for most cases. Something like:
252 dpavlin 543
253 dpavlin 554 900a-1 900b-1 900c-1
254     900a-2 900b-2
255     900a-3
256    
257     will be created from any combination of:
258    
259     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
260    
261     and following rules:
262    
263     marc('900','a', rec('200','a') );
264     marc('900','b', rec('200','b') );
265     marc('900','c', rec('200','c') );
266    
267     which might not be what you have in mind. If you need repeatable subfield,
268     define it using C<marc_repeatable_subfield> like this:
269    
270 dpavlin 574 marc_repeatable_subfield('900','a');
271     marc('900','a', rec('200','a') );
272     marc('900','b', rec('200','b') );
273     marc('900','c', rec('200','c') );
274 dpavlin 554
275 dpavlin 574 will create:
276    
277     900a-1 900a-2 900a-3 900b-1 900c-1
278     900b-2
279    
280     There is also support for returning next or specific using:
281    
282     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
283     # do something with $mf
284     }
285    
286     will always return fields from next MARC record or
287    
288     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
289    
290     will return 42th copy record (if it exists).
291    
292 dpavlin 540 =cut
293    
294 dpavlin 813 my $fetch_pos;
295    
296 dpavlin 547 sub _get_marc_fields {
297 dpavlin 550
298 dpavlin 574 my $arg = {@_};
299     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
300 dpavlin 813 $fetch_pos = $marc_fetch_offset;
301 dpavlin 574 if ($arg->{offset}) {
302 dpavlin 813 $fetch_pos = $arg->{offset};
303 dpavlin 574 } elsif($arg->{fetch_next}) {
304     $marc_fetch_offset++;
305     }
306 dpavlin 550
307 dpavlin 574 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
308    
309     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
310    
311 dpavlin 813 my $marc_rec = $marc_record->[ $fetch_pos ];
312 dpavlin 574
313 dpavlin 813 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
314 dpavlin 574
315     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
316    
317 dpavlin 550 # first, sort all existing fields
318     # XXX might not be needed, but modern perl might randomize elements in hash
319     my @sorted_marc_record = sort {
320 dpavlin 572 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
321 dpavlin 574 } @{ $marc_rec };
322 dpavlin 550
323 dpavlin 574 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
324 dpavlin 562
325 dpavlin 550 # output marc fields
326 dpavlin 542 my @m;
327 dpavlin 550
328     # count unique field-subfields (used for offset when walking to next subfield)
329     my $u;
330 dpavlin 572 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
331 dpavlin 550
332     if ($debug) {
333 dpavlin 574 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
334 dpavlin 813 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
335 dpavlin 574 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
336     warn "## subfield count = ", dump( $u ), $/;
337 dpavlin 550 }
338    
339     my $len = $#sorted_marc_record;
340     my $visited;
341     my $i = 0;
342     my $field;
343    
344     foreach ( 0 .. $len ) {
345    
346     # find next element which isn't visited
347     while ($visited->{$i}) {
348     $i = ($i + 1) % ($len + 1);
349 dpavlin 542 }
350    
351 dpavlin 550 # mark it visited
352     $visited->{$i}++;
353    
354 dpavlin 574 my $row = dclone( $sorted_marc_record[$i] );
355 dpavlin 550
356     # field and subfield which is key for
357     # marc_repeatable_subfield and u
358 dpavlin 572 my $fsf = $row->[0] . ( $row->[3] || '' );
359 dpavlin 550
360     if ($debug > 1) {
361    
362     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
363     print "### this [$i]: ", dump( $row ),$/;
364     print "### sf: ", $row->[3], " vs ", $field->[3],
365     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
366     if ($#$field >= 0);
367    
368     }
369    
370     # if field exists
371     if ( $#$field >= 0 ) {
372     if (
373     $row->[0] ne $field->[0] || # field
374     $row->[1] ne $field->[1] || # i1
375     $row->[2] ne $field->[2] # i2
376     ) {
377     push @m, $field;
378     warn "## saved/1 ", dump( $field ),$/ if ($debug);
379     $field = $row;
380    
381     } elsif (
382     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
383     ||
384     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
385     ! $marc_repeatable_subfield->{ $fsf }
386     )
387     ) {
388     push @m, $field;
389     warn "## saved/2 ", dump( $field ),$/ if ($debug);
390     $field = $row;
391    
392     } else {
393     # append new subfields to existing field
394     push @$field, ( $row->[3], $row->[4] );
395     }
396     } else {
397     # insert first field
398     $field = $row;
399     }
400    
401     if (! $marc_repeatable_subfield->{ $fsf }) {
402     # make step to next subfield
403     $i = ($i + $u->{ $fsf } ) % ($len + 1);
404     }
405 dpavlin 542 }
406    
407 dpavlin 550 if ($#$field >= 0) {
408     push @m, $field;
409     warn "## saved/3 ", dump( $field ),$/ if ($debug);
410     }
411 dpavlin 542
412 dpavlin 579 return \@m;
413 dpavlin 540 }
414    
415 dpavlin 813 =head2 _get_marc_leader
416    
417     Return leader from currently fetched record by L</_get_marc_fields>
418    
419     print WebPAC::Normalize::_get_marc_leader();
420    
421     =cut
422    
423     sub _get_marc_leader {
424     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
425     return $marc_leader->[ $fetch_pos ];
426     }
427    
428 dpavlin 554 =head2 _debug
429    
430     Change level of debug warnings
431    
432     _debug( 2 );
433    
434     =cut
435    
436     sub _debug {
437     my $l = shift;
438     return $debug unless defined($l);
439 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
440 dpavlin 554 $debug = $l;
441     }
442    
443 dpavlin 540 =head1 Functions to create C<data_structure>
444    
445     Those functions generally have to first in your normalization file.
446    
447 dpavlin 912 =head2 search_display
448 dpavlin 433
449 dpavlin 912 Define output for L<search> and L<display> at the same time
450 dpavlin 433
451 dpavlin 912 search_display('Title', rec('200','a') );
452 dpavlin 13
453    
454     =cut
455    
456 dpavlin 912 sub search_display {
457     my $name = shift or die "search_display needs name as first argument";
458 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
459     return unless (@o);
460     $out->{$name}->{search} = \@o;
461     $out->{$name}->{display} = \@o;
462     }
463 dpavlin 13
464 dpavlin 915 =head2 tag
465    
466     Old name for L<search_display>, but supported
467    
468     =cut
469    
470     sub tag {
471     search_display( @_ );
472     }
473    
474 dpavlin 536 =head2 display
475 dpavlin 13
476 dpavlin 912 Define output just for I<display>
477 dpavlin 125
478 dpavlin 536 @v = display('Title', rec('200','a') );
479 dpavlin 125
480 dpavlin 536 =cut
481 dpavlin 125
482 dpavlin 923 sub _field {
483     my $type = shift or confess "need type -- BUG?";
484     my $name = shift or confess "needs name as first argument";
485 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
486     return unless (@o);
487 dpavlin 923 $out->{$name}->{$type} = \@o;
488 dpavlin 536 }
489 dpavlin 13
490 dpavlin 923 sub display { _field( 'display', @_ ) }
491    
492 dpavlin 536 =head2 search
493 dpavlin 13
494 dpavlin 536 Prepare values just for I<search>
495 dpavlin 13
496 dpavlin 536 @v = search('Title', rec('200','a') );
497 dpavlin 433
498 dpavlin 536 =cut
499 dpavlin 13
500 dpavlin 923 sub search { _field( 'search', @_ ) }
501 dpavlin 13
502 dpavlin 923 =head2 sorted
503    
504     Insert into lists which will be automatically sorted
505    
506     sorted('Title', rec('200','a') );
507    
508     =cut
509    
510     sub sorted { _field( 'sorted', @_ ) }
511    
512    
513 dpavlin 564 =head2 marc_leader
514    
515     Setup fields within MARC leader or get leader
516    
517     marc_leader('05','c');
518     my $leader = marc_leader();
519    
520     =cut
521    
522     sub marc_leader {
523     my ($offset,$value) = @_;
524    
525     if ($offset) {
526 dpavlin 813 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
527 dpavlin 564 } else {
528 dpavlin 813
529     if (defined($marc_leader)) {
530     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
531     return $marc_leader->[ $marc_record_offset ];
532     } else {
533     return;
534     }
535 dpavlin 564 }
536     }
537    
538 dpavlin 815 =head2 marc_fixed
539    
540     Create control/indentifier fields with values in fixed positions
541    
542     marc_fixed('008', 00, '070402');
543     marc_fixed('008', 39, '|');
544    
545     Positions not specified will be filled with spaces (C<0x20>).
546    
547     There will be no effort to extend last specified value to full length of
548     field in standard.
549    
550     =cut
551    
552     sub marc_fixed {
553     my ($f, $pos, $val) = @_;
554     die "need marc(field, position, value)" unless defined($f) && defined($pos);
555    
556 dpavlin 889 confess "need val" unless defined $val;
557    
558 dpavlin 815 my $update = 0;
559    
560     map {
561     if ($_->[0] eq $f) {
562     my $old = $_->[1];
563 dpavlin 889 if (length($old) <= $pos) {
564 dpavlin 815 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
565 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
566 dpavlin 815 } else {
567     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
568 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
569 dpavlin 815 }
570     $update++;
571     }
572     } @{ $marc_record->[ $marc_record_offset ] };
573    
574     if (! $update) {
575     my $v = ' ' x $pos . $val;
576     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
577 dpavlin 817 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
578 dpavlin 815 }
579     }
580    
581 dpavlin 547 =head2 marc
582 dpavlin 540
583     Save value for MARC field
584    
585 dpavlin 547 marc('900','a', rec('200','a') );
586 dpavlin 571 marc('001', rec('000') );
587 dpavlin 540
588     =cut
589    
590 dpavlin 547 sub marc {
591     my $f = shift or die "marc needs field";
592     die "marc field must be numer" unless ($f =~ /^\d+$/);
593 dpavlin 540
594 dpavlin 571 my $sf;
595     if ($f >= 10) {
596     $sf = shift or die "marc needs subfield";
597     }
598 dpavlin 540
599 dpavlin 541 foreach (@_) {
600     my $v = $_; # make var read-write for Encode
601 dpavlin 543 next unless (defined($v) && $v !~ /^\s*$/);
602 dpavlin 548 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
603 dpavlin 571 if (defined $sf) {
604 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
605 dpavlin 571 } else {
606 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
607 dpavlin 571 }
608 dpavlin 540 }
609     }
610    
611 dpavlin 547 =head2 marc_repeatable_subfield
612    
613     Save values for MARC repetable subfield
614    
615     marc_repeatable_subfield('910', 'z', rec('909') );
616    
617     =cut
618    
619     sub marc_repeatable_subfield {
620 dpavlin 550 my ($f,$sf) = @_;
621     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
622     $marc_repeatable_subfield->{ $f . $sf }++;
623 dpavlin 547 marc(@_);
624     }
625    
626     =head2 marc_indicators
627    
628     Set both indicators for MARC field
629    
630     marc_indicators('900', ' ', 1);
631    
632     Any indicator value other than C<0-9> will be treated as undefined.
633    
634     =cut
635    
636     sub marc_indicators {
637     my $f = shift || die "marc_indicators need field!\n";
638     my ($i1,$i2) = @_;
639     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
640     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
641    
642     $i1 = ' ' if ($i1 !~ /^\d$/);
643     $i2 = ' ' if ($i2 !~ /^\d$/);
644 dpavlin 548 @{ $marc_indicators->{$f} } = ($i1,$i2);
645 dpavlin 547 }
646    
647 dpavlin 562 =head2 marc_compose
648 dpavlin 547
649 dpavlin 562 Save values for each MARC subfield explicitly
650    
651     marc_compose('900',
652     'a', rec('200','a')
653     'b', rec('201','a')
654     'a', rec('200','b')
655     'c', rec('200','c')
656     );
657    
658 dpavlin 603 If you specify C<+> for subfield, value will be appended
659     to previous defined subfield.
660    
661 dpavlin 562 =cut
662    
663     sub marc_compose {
664     my $f = shift or die "marc_compose needs field";
665     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
666    
667     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
668     my $m = [ $f, $i1, $i2 ];
669    
670 dpavlin 583 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
671    
672 dpavlin 619 if ($#_ % 2 != 1) {
673     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
674     }
675    
676 dpavlin 562 while (@_) {
677 dpavlin 619 my $sf = shift;
678 dpavlin 565 my $v = shift;
679 dpavlin 562
680     next unless (defined($v) && $v !~ /^\s*$/);
681 dpavlin 568 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
682 dpavlin 603 if ($sf ne '+') {
683     push @$m, ( $sf, $v );
684     } else {
685     $m->[ $#$m ] .= $v;
686     }
687 dpavlin 562 }
688    
689 dpavlin 586 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
690 dpavlin 562
691 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
692 dpavlin 562 }
693    
694 dpavlin 574 =head2 marc_duplicate
695 dpavlin 562
696 dpavlin 574 Generate copy of current MARC record and continue working on copy
697    
698     marc_duplicate();
699    
700     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
701     C<< _get_marc_fields( offset => 42 ) >>.
702    
703     =cut
704    
705     sub marc_duplicate {
706     my $m = $marc_record->[ -1 ];
707     die "can't duplicate record which isn't defined" unless ($m);
708     push @{ $marc_record }, dclone( $m );
709 dpavlin 813 push @{ $marc_leader }, dclone( marc_leader() );
710     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
711 dpavlin 574 $marc_record_offset = $#{ $marc_record };
712     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
713 dpavlin 813
714 dpavlin 574 }
715    
716     =head2 marc_remove
717    
718     Remove some field or subfield from MARC record.
719    
720     marc_remove('200');
721     marc_remove('200','a');
722    
723     This will erase field C<200> or C<200^a> from current MARC record.
724    
725 dpavlin 786 marc_remove('*');
726    
727     Will remove all fields in current MARC record.
728    
729 dpavlin 574 This is useful after calling C<marc_duplicate> or on it's own (but, you
730     should probably just remove that subfield definition if you are not
731     using C<marc_duplicate>).
732    
733     FIXME: support fields < 10.
734    
735     =cut
736    
737     sub marc_remove {
738     my ($f, $sf) = @_;
739    
740     die "marc_remove needs record number" unless defined($f);
741    
742     my $marc = $marc_record->[ $marc_record_offset ];
743    
744     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
745    
746 dpavlin 786 if ($f eq '*') {
747    
748     delete( $marc_record->[ $marc_record_offset ] );
749 dpavlin 788 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
750 dpavlin 786
751     } else {
752    
753     my $i = 0;
754     foreach ( 0 .. $#{ $marc } ) {
755     last unless (defined $marc->[$i]);
756     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
757     if ($marc->[$i]->[0] eq $f) {
758     if (! defined $sf) {
759     # remove whole field
760     splice @$marc, $i, 1;
761     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
762     $i--;
763     } else {
764     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
765     my $o = ($j * 2) + 3;
766     if ($marc->[$i]->[$o] eq $sf) {
767     # remove subfield
768     splice @{$marc->[$i]}, $o, 2;
769     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
770     # is record now empty?
771     if ($#{ $marc->[$i] } == 2) {
772     splice @$marc, $i, 1;
773     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
774     $i--;
775     };
776     }
777 dpavlin 574 }
778     }
779     }
780 dpavlin 786 $i++;
781 dpavlin 574 }
782 dpavlin 786
783     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
784    
785     $marc_record->[ $marc_record_offset ] = $marc;
786 dpavlin 574 }
787    
788     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
789     }
790    
791 dpavlin 604 =head2 marc_original_order
792    
793     Copy all subfields preserving original order to marc field.
794    
795 dpavlin 616 marc_original_order( marc_field_number, original_input_field_number );
796 dpavlin 604
797 dpavlin 616 Please note that field numbers are consistent with other commands (marc
798     field number first), but somewhat counter-intuitive (destination and then
799     source).
800    
801 dpavlin 604 You might want to use this command if you are just renaming subfields or
802     using pre-processing modify_record in C<config.yml> and don't need any
803     post-processing or want to preserve order of original subfields.
804    
805 dpavlin 616
806 dpavlin 604 =cut
807    
808     sub marc_original_order {
809    
810 dpavlin 616 my ($to, $from) = @_;
811 dpavlin 604 die "marc_original_order needs from and to fields\n" unless ($from && $to);
812    
813 dpavlin 616 return unless defined($rec->{$from});
814    
815     my $r = $rec->{$from};
816 dpavlin 604 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
817    
818     my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
819 dpavlin 616 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
820 dpavlin 604
821     foreach my $d (@$r) {
822    
823 dpavlin 605 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
824 dpavlin 616 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
825 dpavlin 605 next;
826     }
827    
828 dpavlin 604 my @sfs = @{ $d->{subfields} };
829    
830     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
831    
832 dpavlin 618 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
833 dpavlin 604
834     my $m = [ $to, $i1, $i2 ];
835    
836     while (my $sf = shift @sfs) {
837 dpavlin 618
838     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
839 dpavlin 604 my $offset = shift @sfs;
840     die "corrupted sufields specification for field $from\n" unless defined($offset);
841    
842     my $v;
843     if (ref($d->{$sf}) eq 'ARRAY') {
844     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
845     } elsif ($offset == 0) {
846     $v = $d->{$sf};
847     } else {
848     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
849     }
850     push @$m, ( $sf, $v ) if (defined($v));
851     }
852    
853     if ($#{$m} > 2) {
854     push @{ $marc_record->[ $marc_record_offset ] }, $m;
855     }
856     }
857    
858     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
859     }
860    
861 dpavlin 813 =head2 marc_count
862 dpavlin 604
863 dpavlin 813 Return number of MARC records created using L</marc_duplicate>.
864    
865     print "created ", marc_count(), " records";
866    
867     =cut
868    
869     sub marc_count {
870     return $#{ $marc_record };
871     }
872    
873    
874 dpavlin 540 =head1 Functions to extract data from input
875    
876     This function should be used inside functions to create C<data_structure> described
877     above.
878    
879 dpavlin 641 =head2 _pack_subfields_hash
880    
881 dpavlin 669 @subfields = _pack_subfields_hash( $h );
882     $subfields = _pack_subfields_hash( $h, 1 );
883 dpavlin 641
884 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
885     with subfields (denoted by C<^>) and values.
886    
887 dpavlin 641 =cut
888    
889     sub _pack_subfields_hash {
890    
891 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
892 dpavlin 641
893     my ($h,$include_subfields) = @_;
894    
895 dpavlin 831 # sanity and ease of use
896     return $h if (ref($h) ne 'HASH');
897    
898 dpavlin 641 if ( defined($h->{subfields}) ) {
899     my $sfs = delete $h->{subfields} || die "no subfields?";
900     my @out;
901     while (@$sfs) {
902     my $sf = shift @$sfs;
903     push @out, '^' . $sf if ($include_subfields);
904     my $o = shift @$sfs;
905     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
906     # single element subfields are not arrays
907 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
908 dpavlin 667
909 dpavlin 641 push @out, $h->{$sf};
910     } else {
911 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
912 dpavlin 641 push @out, $h->{$sf}->[$o];
913     }
914     }
915 dpavlin 667 if ($include_subfields) {
916     return join('', @out);
917     } else {
918     return @out;
919     }
920 dpavlin 641 } else {
921 dpavlin 667 if ($include_subfields) {
922     my $out = '';
923 dpavlin 668 foreach my $sf (sort keys %$h) {
924 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
925     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
926     } else {
927     $out .= '^' . $sf . $h->{$sf};
928     }
929     }
930     return $out;
931     } else {
932     # FIXME this should probably be in alphabetical order instead of hash order
933     values %{$h};
934     }
935 dpavlin 641 }
936     }
937    
938 dpavlin 536 =head2 rec1
939 dpavlin 371
940 dpavlin 536 Return all values in some field
941 dpavlin 371
942 dpavlin 536 @v = rec1('200')
943 dpavlin 15
944 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
945 dpavlin 15
946 dpavlin 536 =cut
947 dpavlin 15
948 dpavlin 536 sub rec1 {
949     my $f = shift;
950 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
951 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
952 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
953 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
954 dpavlin 641 my @out;
955     foreach my $h ( @{ $rec->{$f} } ) {
956     if (ref($h) eq 'HASH') {
957     push @out, ( _pack_subfields_hash( $h ) );
958 dpavlin 31 } else {
959 dpavlin 641 push @out, $h;
960 dpavlin 31 }
961 dpavlin 641 }
962     return @out;
963 dpavlin 536 } elsif( defined($rec->{$f}) ) {
964     return $rec->{$f};
965 dpavlin 15 }
966     }
967    
968 dpavlin 536 =head2 rec2
969 dpavlin 15
970 dpavlin 536 Return all values in specific field and subfield
971 dpavlin 13
972 dpavlin 536 @v = rec2('200','a')
973 dpavlin 13
974     =cut
975    
976 dpavlin 536 sub rec2 {
977     my $f = shift;
978     return unless (defined($rec && $rec->{$f}));
979     my $sf = shift;
980 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
981 dpavlin 589 return map {
982     if (ref($_->{$sf}) eq 'ARRAY') {
983     @{ $_->{$sf} };
984     } else {
985     $_->{$sf};
986     }
987     } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
988 dpavlin 536 }
989 dpavlin 13
990 dpavlin 536 =head2 rec
991 dpavlin 13
992 dpavlin 536 syntaxtic sugar for
993 dpavlin 13
994 dpavlin 536 @v = rec('200')
995     @v = rec('200','a')
996 dpavlin 13
997 dpavlin 750 If rec() returns just single value, it will
998     return scalar, not array.
999    
1000 dpavlin 536 =cut
1001 dpavlin 373
1002 dpavlin 979 sub frec {
1003     my @out = rec(@_);
1004     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
1005     return shift @out;
1006     }
1007    
1008 dpavlin 536 sub rec {
1009 dpavlin 583 my @out;
1010 dpavlin 536 if ($#_ == 0) {
1011 dpavlin 583 @out = rec1(@_);
1012 dpavlin 536 } elsif ($#_ == 1) {
1013 dpavlin 583 @out = rec2(@_);
1014 dpavlin 13 }
1015 dpavlin 750 if ($#out == 0 && ! wantarray) {
1016     return $out[0];
1017     } elsif (@out) {
1018 dpavlin 583 return @out;
1019     } else {
1020     return '';
1021     }
1022 dpavlin 13 }
1023    
1024 dpavlin 536 =head2 regex
1025 dpavlin 15
1026 dpavlin 536 Apply regex to some or all values
1027 dpavlin 15
1028 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
1029 dpavlin 15
1030     =cut
1031    
1032 dpavlin 536 sub regex {
1033     my $r = shift;
1034     my @out;
1035 dpavlin 550 #warn "r: $r\n", dump(\@_);
1036 dpavlin 536 foreach my $t (@_) {
1037     next unless ($t);
1038     eval "\$t =~ $r";
1039     push @out, $t if ($t && $t ne '');
1040 dpavlin 368 }
1041 dpavlin 536 return @out;
1042 dpavlin 15 }
1043    
1044 dpavlin 536 =head2 prefix
1045 dpavlin 15
1046 dpavlin 536 Prefix all values with a string
1047 dpavlin 15
1048 dpavlin 536 @v = prefix( 'my_', @v );
1049 dpavlin 15
1050     =cut
1051    
1052 dpavlin 536 sub prefix {
1053 dpavlin 819 my $p = shift;
1054     return @_ unless defined( $p );
1055 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
1056     }
1057 dpavlin 15
1058 dpavlin 536 =head2 suffix
1059 dpavlin 15
1060 dpavlin 536 suffix all values with a string
1061 dpavlin 15
1062 dpavlin 536 @v = suffix( '_my', @v );
1063 dpavlin 15
1064 dpavlin 536 =cut
1065 dpavlin 15
1066 dpavlin 536 sub suffix {
1067 dpavlin 819 my $s = shift;
1068     return @_ unless defined( $s );
1069 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
1070 dpavlin 15 }
1071    
1072 dpavlin 536 =head2 surround
1073 dpavlin 13
1074 dpavlin 536 surround all values with a two strings
1075 dpavlin 13
1076 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
1077 dpavlin 13
1078     =cut
1079    
1080 dpavlin 536 sub surround {
1081 dpavlin 819 my $p = shift;
1082     my $s = shift;
1083     $p = '' unless defined( $p );
1084     $s = '' unless defined( $s );
1085 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
1086 dpavlin 13 }
1087    
1088 dpavlin 536 =head2 first
1089 dpavlin 13
1090 dpavlin 536 Return first element
1091 dpavlin 15
1092 dpavlin 536 $v = first( @v );
1093 dpavlin 13
1094     =cut
1095    
1096 dpavlin 536 sub first {
1097     my $r = shift;
1098     return $r;
1099 dpavlin 13 }
1100    
1101 dpavlin 536 =head2 lookup
1102 dpavlin 13
1103 dpavlin 536 Consult lookup hashes for some value
1104 dpavlin 13
1105 dpavlin 725 @v = lookup(
1106     sub {
1107     'ffkk/peri/mfn'.rec('000')
1108     },
1109     'ffkk','peri','200-a-200-e',
1110     sub {
1111     first(rec(200,'a')).' '.first(rec('200','e'))
1112     }
1113     );
1114 dpavlin 13
1115 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1116     normal lookup definition in C<conf/lookup/something.pl> which looks like:
1117 dpavlin 707
1118 dpavlin 725 lookup(
1119     # which results to return from record recorded in lookup
1120     sub { 'ffkk/peri/mfn' . rec('000') },
1121     # from which database and input
1122     'ffkk','peri',
1123     # such that following values match
1124     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1125     # if this part is missing, we will try to match same fields
1126     # from lookup record and current one, or you can override
1127     # which records to use from current record using
1128     sub { rec('900','x') . ' ' . rec('900','y') },
1129     )
1130    
1131     You can think about this lookup as SQL (if that helps):
1132    
1133     select
1134     sub { what }
1135     from
1136     database, input
1137     where
1138     sub { filter from lookuped record }
1139     having
1140     sub { optional filter on current record }
1141    
1142     Easy as pie, right?
1143    
1144 dpavlin 13 =cut
1145    
1146 dpavlin 536 sub lookup {
1147 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
1148    
1149 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1150 dpavlin 725
1151 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1152 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
1153    
1154 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1155 dpavlin 725
1156     my $mfns;
1157     my @having = $having->();
1158    
1159 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
1160 dpavlin 725
1161     foreach my $h ( @having ) {
1162     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1163 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1164 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1165     }
1166 dpavlin 536 }
1167 dpavlin 725
1168     return unless ($mfns);
1169    
1170     my @mfns = sort keys %$mfns;
1171    
1172 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1173 dpavlin 725
1174     my $old_rec = $rec;
1175     my @out;
1176    
1177     foreach my $mfn (@mfns) {
1178 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
1179 dpavlin 725
1180 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1181 dpavlin 725
1182     my @vals = $what->();
1183    
1184     push @out, ( @vals );
1185    
1186 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1187 dpavlin 725 }
1188    
1189     # if (ref($lookup->{$k}) eq 'ARRAY') {
1190     # return @{ $lookup->{$k} };
1191     # } else {
1192     # return $lookup->{$k};
1193     # }
1194    
1195     $rec = $old_rec;
1196    
1197 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1198 dpavlin 725
1199 dpavlin 740 if ($#out == 0) {
1200     return $out[0];
1201     } else {
1202     return @out;
1203     }
1204 dpavlin 13 }
1205    
1206 dpavlin 707 =head2 save_into_lookup
1207    
1208 dpavlin 725 Save value into lookup. It associates current database, input
1209     and specific keys with one or more values which will be
1210     associated over MFN.
1211 dpavlin 707
1212 dpavlin 725 MFN will be extracted from first occurence current of field 000
1213     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1214    
1215     my $nr = save_into_lookup($database,$input,$key,sub {
1216 dpavlin 707 # code which produce one or more values
1217     });
1218    
1219 dpavlin 725 It returns number of items saved.
1220 dpavlin 707
1221 dpavlin 725 This function shouldn't be called directly, it's called from code created by
1222     L<WebPAC::Parser>.
1223    
1224 dpavlin 707 =cut
1225    
1226     sub save_into_lookup {
1227 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
1228     die "save_into_lookup needs database" unless defined($database);
1229     die "save_into_lookup needs input" unless defined($input);
1230     die "save_into_lookup needs key" unless defined($key);
1231 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1232 dpavlin 725
1233 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1234 dpavlin 725
1235     my $mfn =
1236     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1237     defined($config->{_mfn}) ? $config->{_mfn} :
1238     die "mfn not defined or zero";
1239    
1240     my $nr = 0;
1241    
1242 dpavlin 707 foreach my $v ( $coderef->() ) {
1243 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1244 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1245 dpavlin 725 $nr++;
1246 dpavlin 707 }
1247 dpavlin 725
1248     return $nr;
1249 dpavlin 707 }
1250    
1251 dpavlin 595 =head2 config
1252    
1253     Consult config values stored in C<config.yml>
1254    
1255     # return database code (key under databases in yaml)
1256     $database_code = config(); # use _ from hash
1257     $database_name = config('name');
1258     $database_input_name = config('input name');
1259    
1260     Up to three levels are supported.
1261    
1262     =cut
1263    
1264     sub config {
1265     return unless ($config);
1266    
1267     my $p = shift;
1268    
1269     $p ||= '';
1270    
1271     my $v;
1272    
1273     warn "### getting config($p)\n" if ($debug > 1);
1274    
1275     my @p = split(/\s+/,$p);
1276     if ($#p < 0) {
1277     $v = $config->{ '_' }; # special, database code
1278     } else {
1279    
1280     my $c = dclone( $config );
1281    
1282     foreach my $k (@p) {
1283     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1284     if (ref($c) eq 'ARRAY') {
1285     $c = shift @$c;
1286     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1287     last;
1288     }
1289    
1290     if (! defined($c->{$k}) ) {
1291     $c = undef;
1292     last;
1293     } else {
1294     $c = $c->{$k};
1295     }
1296     }
1297     $v = $c if ($c);
1298    
1299     }
1300    
1301     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1302     warn "config( '$p' ) is empty\n" if (! $v);
1303    
1304     return $v;
1305     }
1306    
1307     =head2 id
1308    
1309     Returns unique id of this record
1310    
1311     $id = id();
1312    
1313     Returns C<42/2> for 2nd occurence of MFN 42.
1314    
1315     =cut
1316    
1317     sub id {
1318     my $mfn = $config->{_mfn} || die "no _mfn in config data";
1319     return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1320     }
1321    
1322 dpavlin 536 =head2 join_with
1323 dpavlin 13
1324 dpavlin 536 Joins walues with some delimiter
1325 dpavlin 10
1326 dpavlin 536 $v = join_with(", ", @v);
1327 dpavlin 10
1328 dpavlin 536 =cut
1329 dpavlin 10
1330 dpavlin 536 sub join_with {
1331     my $d = shift;
1332 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1333 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1334     return '' unless defined($v);
1335     return $v;
1336 dpavlin 536 }
1337 dpavlin 10
1338 dpavlin 562 =head2 split_rec_on
1339    
1340     Split record subfield on some regex and take one of parts out
1341    
1342     $a_before_semi_column =
1343     split_rec_on('200','a', /\s*;\s*/, $part);
1344    
1345     C<$part> is optional number of element. First element is
1346     B<1>, not 0!
1347    
1348     If there is no C<$part> parameter or C<$part> is 0, this function will
1349     return all values produced by splitting.
1350    
1351     =cut
1352    
1353     sub split_rec_on {
1354     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1355    
1356     my ($fld, $sf, $regex, $part) = @_;
1357 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1358 dpavlin 562
1359     my @r = rec( $fld, $sf );
1360     my $v = shift @r;
1361 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1362 dpavlin 562
1363 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1364 dpavlin 566
1365 dpavlin 562 my @s = split( $regex, $v );
1366 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1367 dpavlin 566 if ($part && $part > 0) {
1368 dpavlin 562 return $s[ $part - 1 ];
1369     } else {
1370 dpavlin 571 return @s;
1371 dpavlin 562 }
1372     }
1373    
1374 dpavlin 785 my $hash;
1375    
1376     =head2 set
1377    
1378     set( key => 'value' );
1379    
1380     =cut
1381    
1382     sub set {
1383     my ($k,$v) = @_;
1384 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1385 dpavlin 785 $hash->{$k} = $v;
1386     };
1387    
1388     =head2 get
1389    
1390     get( 'key' );
1391    
1392     =cut
1393    
1394     sub get {
1395     my $k = shift || return;
1396     my $v = $hash->{$k};
1397 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1398 dpavlin 785 return $v;
1399     }
1400    
1401 dpavlin 791 =head2 count
1402 dpavlin 785
1403 dpavlin 791 if ( count( @result ) == 1 ) {
1404     # do something if only 1 result is there
1405     }
1406    
1407     =cut
1408    
1409     sub count {
1410 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
1411 dpavlin 791 return @_ . '';
1412     }
1413    
1414 dpavlin 536 # END
1415     1;

  ViewVC Help
Powered by ViewVC 1.1.26