/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 983 - (hide annotations)
Sun Nov 4 11:12:38 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 30983 byte(s)
 r1505@llin:  dpavlin | 2007-11-04 12:12:20 +0100
 renamed _set_rec to _set_ds (because it's a data_structure actually)
 and added symetric public get_ds to get whole data_structure as
 hash to manually traverse in normalization

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3 dpavlin 980 our @EXPORT = qw/
4 dpavlin 983 _set_ds _set_lookup
5     get_ds
6 dpavlin 736 _set_load_row
7 dpavlin 538 _get_ds _clean_ds
8 dpavlin 554 _debug
9 dpavlin 641 _pack_subfields_hash
10 dpavlin 538
11 dpavlin 923 search_display search display sorted
12 dpavlin 912
13 dpavlin 547 marc marc_indicators marc_repeatable_subfield
14 dpavlin 815 marc_compose marc_leader marc_fixed
15 dpavlin 813 marc_duplicate marc_remove marc_count
16 dpavlin 604 marc_original_order
17 dpavlin 540
18 dpavlin 536 rec1 rec2 rec
19 dpavlin 979 frec
20 dpavlin 536 regex prefix suffix surround
21     first lookup join_with
22 dpavlin 707 save_into_lookup
23 dpavlin 562
24     split_rec_on
25 dpavlin 785
26     get set
27 dpavlin 791 count
28 dpavlin 980
29 dpavlin 536 /;
30 dpavlin 10
31     use warnings;
32     use strict;
33 dpavlin 536
34     #use base qw/WebPAC::Common/;
35 dpavlin 550 use Data::Dump qw/dump/;
36 dpavlin 574 use Storable qw/dclone/;
37 dpavlin 725 use Carp qw/confess/;
38 dpavlin 10
39 dpavlin 550 # debugging warn(s)
40     my $debug = 0;
41    
42 dpavlin 980 use WebPAC::Normalize::ISBN;
43     push @EXPORT, ( 'isbn_10', 'isbn_13' );
44 dpavlin 550
45 dpavlin 10 =head1 NAME
46    
47 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
48 dpavlin 10
49     =cut
50    
51 dpavlin 983 our $VERSION = '0.32';
52 dpavlin 10
53     =head1 SYNOPSIS
54    
55 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
56     from input records using perl functions which are specialized for set
57     processing.
58 dpavlin 10
59 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
60     means that you check it's validity before running WebPAC using
61     C<perl -c normalize.pl>.
62 dpavlin 15
63 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
64 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
65 dpavlin 547 C<marc>.
66 dpavlin 15
67 dpavlin 10 =head1 FUNCTIONS
68    
69 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
70     All other functions are available for use within normalisation rules.
71    
72 dpavlin 536 =head2 data_structure
73 dpavlin 10
74 dpavlin 536 Return data structure
75 dpavlin 13
76 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
77 dpavlin 725 lookup => $lookup_hash,
78 dpavlin 536 row => $row,
79     rules => $normalize_pl_config,
80 dpavlin 541 marc_encoding => 'utf-8',
81 dpavlin 595 config => $config,
82 dpavlin 736 load_row_coderef => sub {
83 dpavlin 979 my ($database,$input,$mfn) = @_;
84 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
85 dpavlin 725 },
86 dpavlin 13 );
87    
88 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
89 dpavlin 540 other are optional.
90    
91 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
92 dpavlin 725 die if it's not defined.
93    
94 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
95 dpavlin 15
96 dpavlin 538 Since this function isn't exported you have to call it with
97     C<WebPAC::Normalize::data_structure>.
98    
99 dpavlin 10 =cut
100    
101 dpavlin 736 my $load_row_coderef;
102 dpavlin 725
103 dpavlin 536 sub data_structure {
104     my $arg = {@_};
105 dpavlin 13
106 dpavlin 536 die "need row argument" unless ($arg->{row});
107     die "need normalisation argument" unless ($arg->{rules});
108 dpavlin 31
109 dpavlin 536 no strict 'subs';
110 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
111 dpavlin 983 _set_ds( $arg->{row} );
112 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
113 dpavlin 541 _clean_ds( %{ $arg } );
114 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
115 dpavlin 725
116 dpavlin 536 eval "$arg->{rules}";
117     die "error evaling $arg->{rules}: $@\n" if ($@);
118 dpavlin 540
119 dpavlin 538 return _get_ds();
120 dpavlin 10 }
121    
122 dpavlin 983 =head2 _set_ds
123 dpavlin 13
124 dpavlin 536 Set current record hash
125 dpavlin 433
126 dpavlin 983 _set_ds( $rec );
127 dpavlin 433
128     =cut
129    
130 dpavlin 536 my $rec;
131 dpavlin 433
132 dpavlin 983 sub _set_ds {
133 dpavlin 536 $rec = shift or die "no record hash";
134 dpavlin 433 }
135    
136 dpavlin 983 =head2 get_ds
137    
138     Access to original record from input module
139    
140     my $ds = get_rec;
141    
142     =cut
143    
144     sub get_ds {
145     return $rec;
146     }
147    
148 dpavlin 595 =head2 _set_config
149    
150     Set current config hash
151    
152     _set_config( $config );
153    
154     Magic keys are:
155    
156     =over 4
157    
158     =item _
159    
160     Code of current database
161    
162     =item _mfn
163    
164     Current MFN
165    
166     =back
167    
168     =cut
169    
170     my $config;
171    
172     sub _set_config {
173     $config = shift;
174     }
175    
176 dpavlin 538 =head2 _get_ds
177    
178     Return hash formatted as data structure
179    
180     my $ds = _get_ds();
181    
182     =cut
183    
184 dpavlin 812 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
185 dpavlin 574 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
186 dpavlin 538
187     sub _get_ds {
188 dpavlin 982 #warn "## out = ",dump($out);
189 dpavlin 538 return $out;
190     }
191    
192     =head2 _clean_ds
193    
194     Clean data structure hash for next record
195    
196     _clean_ds();
197    
198     =cut
199    
200     sub _clean_ds {
201 dpavlin 541 my $a = {@_};
202 dpavlin 812 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
203 dpavlin 574 ($marc_record_offset, $marc_fetch_offset) = (0,0);
204 dpavlin 541 $marc_encoding = $a->{marc_encoding};
205 dpavlin 538 }
206    
207     =head2 _set_lookup
208    
209     Set current lookup hash
210    
211     _set_lookup( $lookup );
212    
213     =cut
214    
215     my $lookup;
216    
217     sub _set_lookup {
218     $lookup = shift;
219     }
220    
221 dpavlin 707 =head2 _get_lookup
222    
223     Get current lookup hash
224    
225     my $lookup = _get_lookup();
226    
227     =cut
228    
229     sub _get_lookup {
230     return $lookup;
231     }
232    
233 dpavlin 736 =head2 _set_load_row
234 dpavlin 725
235     Setup code reference which will return L<data_structure> from
236     L<WebPAC::Store>
237    
238 dpavlin 736 _set_load_row(sub {
239 dpavlin 725 my ($database,$input,$mfn) = @_;
240 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
241 dpavlin 725 });
242    
243     =cut
244    
245 dpavlin 736 sub _set_load_row {
246 dpavlin 725 my $coderef = shift;
247     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
248    
249 dpavlin 736 $load_row_coderef = $coderef;
250 dpavlin 725 }
251    
252 dpavlin 547 =head2 _get_marc_fields
253 dpavlin 540
254 dpavlin 547 Get all fields defined by calls to C<marc>
255 dpavlin 540
256 dpavlin 547 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
257 dpavlin 540
258 dpavlin 543 We are using I<magic> which detect repeatable fields only from
259     sequence of field/subfield data generated by normalization.
260    
261 dpavlin 554 Repeatable field is created when there is second occurence of same subfield or
262     if any of indicators are different.
263 dpavlin 543
264 dpavlin 554 This is sane for most cases. Something like:
265 dpavlin 543
266 dpavlin 554 900a-1 900b-1 900c-1
267     900a-2 900b-2
268     900a-3
269    
270     will be created from any combination of:
271    
272     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
273    
274     and following rules:
275    
276     marc('900','a', rec('200','a') );
277     marc('900','b', rec('200','b') );
278     marc('900','c', rec('200','c') );
279    
280     which might not be what you have in mind. If you need repeatable subfield,
281     define it using C<marc_repeatable_subfield> like this:
282    
283 dpavlin 574 marc_repeatable_subfield('900','a');
284     marc('900','a', rec('200','a') );
285     marc('900','b', rec('200','b') );
286     marc('900','c', rec('200','c') );
287 dpavlin 554
288 dpavlin 574 will create:
289    
290     900a-1 900a-2 900a-3 900b-1 900c-1
291     900b-2
292    
293     There is also support for returning next or specific using:
294    
295     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
296     # do something with $mf
297     }
298    
299     will always return fields from next MARC record or
300    
301     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
302    
303     will return 42th copy record (if it exists).
304    
305 dpavlin 540 =cut
306    
307 dpavlin 813 my $fetch_pos;
308    
309 dpavlin 547 sub _get_marc_fields {
310 dpavlin 550
311 dpavlin 574 my $arg = {@_};
312     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
313 dpavlin 813 $fetch_pos = $marc_fetch_offset;
314 dpavlin 574 if ($arg->{offset}) {
315 dpavlin 813 $fetch_pos = $arg->{offset};
316 dpavlin 574 } elsif($arg->{fetch_next}) {
317     $marc_fetch_offset++;
318     }
319 dpavlin 550
320 dpavlin 574 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
321    
322     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
323    
324 dpavlin 813 my $marc_rec = $marc_record->[ $fetch_pos ];
325 dpavlin 574
326 dpavlin 813 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
327 dpavlin 574
328     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
329    
330 dpavlin 550 # first, sort all existing fields
331     # XXX might not be needed, but modern perl might randomize elements in hash
332     my @sorted_marc_record = sort {
333 dpavlin 572 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
334 dpavlin 574 } @{ $marc_rec };
335 dpavlin 550
336 dpavlin 574 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
337 dpavlin 562
338 dpavlin 550 # output marc fields
339 dpavlin 542 my @m;
340 dpavlin 550
341     # count unique field-subfields (used for offset when walking to next subfield)
342     my $u;
343 dpavlin 572 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
344 dpavlin 550
345     if ($debug) {
346 dpavlin 574 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
347 dpavlin 813 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
348 dpavlin 574 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
349     warn "## subfield count = ", dump( $u ), $/;
350 dpavlin 550 }
351    
352     my $len = $#sorted_marc_record;
353     my $visited;
354     my $i = 0;
355     my $field;
356    
357     foreach ( 0 .. $len ) {
358    
359     # find next element which isn't visited
360     while ($visited->{$i}) {
361     $i = ($i + 1) % ($len + 1);
362 dpavlin 542 }
363    
364 dpavlin 550 # mark it visited
365     $visited->{$i}++;
366    
367 dpavlin 574 my $row = dclone( $sorted_marc_record[$i] );
368 dpavlin 550
369     # field and subfield which is key for
370     # marc_repeatable_subfield and u
371 dpavlin 572 my $fsf = $row->[0] . ( $row->[3] || '' );
372 dpavlin 550
373     if ($debug > 1) {
374    
375     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
376     print "### this [$i]: ", dump( $row ),$/;
377     print "### sf: ", $row->[3], " vs ", $field->[3],
378     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
379     if ($#$field >= 0);
380    
381     }
382    
383     # if field exists
384     if ( $#$field >= 0 ) {
385     if (
386     $row->[0] ne $field->[0] || # field
387     $row->[1] ne $field->[1] || # i1
388     $row->[2] ne $field->[2] # i2
389     ) {
390     push @m, $field;
391     warn "## saved/1 ", dump( $field ),$/ if ($debug);
392     $field = $row;
393    
394     } elsif (
395     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
396     ||
397     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
398     ! $marc_repeatable_subfield->{ $fsf }
399     )
400     ) {
401     push @m, $field;
402     warn "## saved/2 ", dump( $field ),$/ if ($debug);
403     $field = $row;
404    
405     } else {
406     # append new subfields to existing field
407     push @$field, ( $row->[3], $row->[4] );
408     }
409     } else {
410     # insert first field
411     $field = $row;
412     }
413    
414     if (! $marc_repeatable_subfield->{ $fsf }) {
415     # make step to next subfield
416     $i = ($i + $u->{ $fsf } ) % ($len + 1);
417     }
418 dpavlin 542 }
419    
420 dpavlin 550 if ($#$field >= 0) {
421     push @m, $field;
422     warn "## saved/3 ", dump( $field ),$/ if ($debug);
423     }
424 dpavlin 542
425 dpavlin 579 return \@m;
426 dpavlin 540 }
427    
428 dpavlin 813 =head2 _get_marc_leader
429    
430     Return leader from currently fetched record by L</_get_marc_fields>
431    
432     print WebPAC::Normalize::_get_marc_leader();
433    
434     =cut
435    
436     sub _get_marc_leader {
437     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
438     return $marc_leader->[ $fetch_pos ];
439     }
440    
441 dpavlin 554 =head2 _debug
442    
443     Change level of debug warnings
444    
445     _debug( 2 );
446    
447     =cut
448    
449     sub _debug {
450     my $l = shift;
451     return $debug unless defined($l);
452 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
453 dpavlin 554 $debug = $l;
454     }
455    
456 dpavlin 540 =head1 Functions to create C<data_structure>
457    
458     Those functions generally have to first in your normalization file.
459    
460 dpavlin 912 =head2 search_display
461 dpavlin 433
462 dpavlin 912 Define output for L<search> and L<display> at the same time
463 dpavlin 433
464 dpavlin 912 search_display('Title', rec('200','a') );
465 dpavlin 13
466    
467     =cut
468    
469 dpavlin 912 sub search_display {
470     my $name = shift or die "search_display needs name as first argument";
471 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
472     return unless (@o);
473     $out->{$name}->{search} = \@o;
474     $out->{$name}->{display} = \@o;
475     }
476 dpavlin 13
477 dpavlin 915 =head2 tag
478    
479     Old name for L<search_display>, but supported
480    
481     =cut
482    
483     sub tag {
484     search_display( @_ );
485     }
486    
487 dpavlin 536 =head2 display
488 dpavlin 13
489 dpavlin 912 Define output just for I<display>
490 dpavlin 125
491 dpavlin 536 @v = display('Title', rec('200','a') );
492 dpavlin 125
493 dpavlin 536 =cut
494 dpavlin 125
495 dpavlin 923 sub _field {
496     my $type = shift or confess "need type -- BUG?";
497     my $name = shift or confess "needs name as first argument";
498 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
499     return unless (@o);
500 dpavlin 923 $out->{$name}->{$type} = \@o;
501 dpavlin 536 }
502 dpavlin 13
503 dpavlin 923 sub display { _field( 'display', @_ ) }
504    
505 dpavlin 536 =head2 search
506 dpavlin 13
507 dpavlin 536 Prepare values just for I<search>
508 dpavlin 13
509 dpavlin 536 @v = search('Title', rec('200','a') );
510 dpavlin 433
511 dpavlin 536 =cut
512 dpavlin 13
513 dpavlin 923 sub search { _field( 'search', @_ ) }
514 dpavlin 13
515 dpavlin 923 =head2 sorted
516    
517     Insert into lists which will be automatically sorted
518    
519     sorted('Title', rec('200','a') );
520    
521     =cut
522    
523     sub sorted { _field( 'sorted', @_ ) }
524    
525    
526 dpavlin 564 =head2 marc_leader
527    
528     Setup fields within MARC leader or get leader
529    
530     marc_leader('05','c');
531     my $leader = marc_leader();
532    
533     =cut
534    
535     sub marc_leader {
536     my ($offset,$value) = @_;
537    
538     if ($offset) {
539 dpavlin 813 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
540 dpavlin 564 } else {
541 dpavlin 813
542     if (defined($marc_leader)) {
543     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
544     return $marc_leader->[ $marc_record_offset ];
545     } else {
546     return;
547     }
548 dpavlin 564 }
549     }
550    
551 dpavlin 815 =head2 marc_fixed
552    
553     Create control/indentifier fields with values in fixed positions
554    
555     marc_fixed('008', 00, '070402');
556     marc_fixed('008', 39, '|');
557    
558     Positions not specified will be filled with spaces (C<0x20>).
559    
560     There will be no effort to extend last specified value to full length of
561     field in standard.
562    
563     =cut
564    
565     sub marc_fixed {
566     my ($f, $pos, $val) = @_;
567     die "need marc(field, position, value)" unless defined($f) && defined($pos);
568    
569 dpavlin 889 confess "need val" unless defined $val;
570    
571 dpavlin 815 my $update = 0;
572    
573     map {
574     if ($_->[0] eq $f) {
575     my $old = $_->[1];
576 dpavlin 889 if (length($old) <= $pos) {
577 dpavlin 815 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
578 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
579 dpavlin 815 } else {
580     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
581 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
582 dpavlin 815 }
583     $update++;
584     }
585     } @{ $marc_record->[ $marc_record_offset ] };
586    
587     if (! $update) {
588     my $v = ' ' x $pos . $val;
589     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
590 dpavlin 817 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
591 dpavlin 815 }
592     }
593    
594 dpavlin 547 =head2 marc
595 dpavlin 540
596     Save value for MARC field
597    
598 dpavlin 547 marc('900','a', rec('200','a') );
599 dpavlin 571 marc('001', rec('000') );
600 dpavlin 540
601     =cut
602    
603 dpavlin 547 sub marc {
604     my $f = shift or die "marc needs field";
605     die "marc field must be numer" unless ($f =~ /^\d+$/);
606 dpavlin 540
607 dpavlin 571 my $sf;
608     if ($f >= 10) {
609     $sf = shift or die "marc needs subfield";
610     }
611 dpavlin 540
612 dpavlin 541 foreach (@_) {
613     my $v = $_; # make var read-write for Encode
614 dpavlin 543 next unless (defined($v) && $v !~ /^\s*$/);
615 dpavlin 548 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
616 dpavlin 571 if (defined $sf) {
617 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
618 dpavlin 571 } else {
619 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
620 dpavlin 571 }
621 dpavlin 540 }
622     }
623    
624 dpavlin 547 =head2 marc_repeatable_subfield
625    
626     Save values for MARC repetable subfield
627    
628     marc_repeatable_subfield('910', 'z', rec('909') );
629    
630     =cut
631    
632     sub marc_repeatable_subfield {
633 dpavlin 550 my ($f,$sf) = @_;
634     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
635     $marc_repeatable_subfield->{ $f . $sf }++;
636 dpavlin 547 marc(@_);
637     }
638    
639     =head2 marc_indicators
640    
641     Set both indicators for MARC field
642    
643     marc_indicators('900', ' ', 1);
644    
645     Any indicator value other than C<0-9> will be treated as undefined.
646    
647     =cut
648    
649     sub marc_indicators {
650     my $f = shift || die "marc_indicators need field!\n";
651     my ($i1,$i2) = @_;
652     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
653     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
654    
655     $i1 = ' ' if ($i1 !~ /^\d$/);
656     $i2 = ' ' if ($i2 !~ /^\d$/);
657 dpavlin 548 @{ $marc_indicators->{$f} } = ($i1,$i2);
658 dpavlin 547 }
659    
660 dpavlin 562 =head2 marc_compose
661 dpavlin 547
662 dpavlin 562 Save values for each MARC subfield explicitly
663    
664     marc_compose('900',
665     'a', rec('200','a')
666     'b', rec('201','a')
667     'a', rec('200','b')
668     'c', rec('200','c')
669     );
670    
671 dpavlin 603 If you specify C<+> for subfield, value will be appended
672     to previous defined subfield.
673    
674 dpavlin 562 =cut
675    
676     sub marc_compose {
677     my $f = shift or die "marc_compose needs field";
678     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
679    
680     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
681     my $m = [ $f, $i1, $i2 ];
682    
683 dpavlin 583 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
684    
685 dpavlin 619 if ($#_ % 2 != 1) {
686     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
687     }
688    
689 dpavlin 562 while (@_) {
690 dpavlin 619 my $sf = shift;
691 dpavlin 565 my $v = shift;
692 dpavlin 562
693     next unless (defined($v) && $v !~ /^\s*$/);
694 dpavlin 568 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
695 dpavlin 603 if ($sf ne '+') {
696     push @$m, ( $sf, $v );
697     } else {
698     $m->[ $#$m ] .= $v;
699     }
700 dpavlin 562 }
701    
702 dpavlin 586 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
703 dpavlin 562
704 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
705 dpavlin 562 }
706    
707 dpavlin 574 =head2 marc_duplicate
708 dpavlin 562
709 dpavlin 574 Generate copy of current MARC record and continue working on copy
710    
711     marc_duplicate();
712    
713     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
714     C<< _get_marc_fields( offset => 42 ) >>.
715    
716     =cut
717    
718     sub marc_duplicate {
719     my $m = $marc_record->[ -1 ];
720     die "can't duplicate record which isn't defined" unless ($m);
721     push @{ $marc_record }, dclone( $m );
722 dpavlin 813 push @{ $marc_leader }, dclone( marc_leader() );
723     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
724 dpavlin 574 $marc_record_offset = $#{ $marc_record };
725     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
726 dpavlin 813
727 dpavlin 574 }
728    
729     =head2 marc_remove
730    
731     Remove some field or subfield from MARC record.
732    
733     marc_remove('200');
734     marc_remove('200','a');
735    
736     This will erase field C<200> or C<200^a> from current MARC record.
737    
738 dpavlin 786 marc_remove('*');
739    
740     Will remove all fields in current MARC record.
741    
742 dpavlin 574 This is useful after calling C<marc_duplicate> or on it's own (but, you
743     should probably just remove that subfield definition if you are not
744     using C<marc_duplicate>).
745    
746     FIXME: support fields < 10.
747    
748     =cut
749    
750     sub marc_remove {
751     my ($f, $sf) = @_;
752    
753     die "marc_remove needs record number" unless defined($f);
754    
755     my $marc = $marc_record->[ $marc_record_offset ];
756    
757     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
758    
759 dpavlin 786 if ($f eq '*') {
760    
761     delete( $marc_record->[ $marc_record_offset ] );
762 dpavlin 788 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
763 dpavlin 786
764     } else {
765    
766     my $i = 0;
767     foreach ( 0 .. $#{ $marc } ) {
768     last unless (defined $marc->[$i]);
769     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
770     if ($marc->[$i]->[0] eq $f) {
771     if (! defined $sf) {
772     # remove whole field
773     splice @$marc, $i, 1;
774     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
775     $i--;
776     } else {
777     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
778     my $o = ($j * 2) + 3;
779     if ($marc->[$i]->[$o] eq $sf) {
780     # remove subfield
781     splice @{$marc->[$i]}, $o, 2;
782     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
783     # is record now empty?
784     if ($#{ $marc->[$i] } == 2) {
785     splice @$marc, $i, 1;
786     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
787     $i--;
788     };
789     }
790 dpavlin 574 }
791     }
792     }
793 dpavlin 786 $i++;
794 dpavlin 574 }
795 dpavlin 786
796     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
797    
798     $marc_record->[ $marc_record_offset ] = $marc;
799 dpavlin 574 }
800    
801     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
802     }
803    
804 dpavlin 604 =head2 marc_original_order
805    
806     Copy all subfields preserving original order to marc field.
807    
808 dpavlin 616 marc_original_order( marc_field_number, original_input_field_number );
809 dpavlin 604
810 dpavlin 616 Please note that field numbers are consistent with other commands (marc
811     field number first), but somewhat counter-intuitive (destination and then
812     source).
813    
814 dpavlin 604 You might want to use this command if you are just renaming subfields or
815     using pre-processing modify_record in C<config.yml> and don't need any
816     post-processing or want to preserve order of original subfields.
817    
818 dpavlin 616
819 dpavlin 604 =cut
820    
821     sub marc_original_order {
822    
823 dpavlin 616 my ($to, $from) = @_;
824 dpavlin 604 die "marc_original_order needs from and to fields\n" unless ($from && $to);
825    
826 dpavlin 616 return unless defined($rec->{$from});
827    
828     my $r = $rec->{$from};
829 dpavlin 604 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
830    
831     my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
832 dpavlin 616 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
833 dpavlin 604
834     foreach my $d (@$r) {
835    
836 dpavlin 605 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
837 dpavlin 616 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
838 dpavlin 605 next;
839     }
840    
841 dpavlin 604 my @sfs = @{ $d->{subfields} };
842    
843     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
844    
845 dpavlin 618 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
846 dpavlin 604
847     my $m = [ $to, $i1, $i2 ];
848    
849     while (my $sf = shift @sfs) {
850 dpavlin 618
851     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
852 dpavlin 604 my $offset = shift @sfs;
853     die "corrupted sufields specification for field $from\n" unless defined($offset);
854    
855     my $v;
856     if (ref($d->{$sf}) eq 'ARRAY') {
857     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
858     } elsif ($offset == 0) {
859     $v = $d->{$sf};
860     } else {
861     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
862     }
863     push @$m, ( $sf, $v ) if (defined($v));
864     }
865    
866     if ($#{$m} > 2) {
867     push @{ $marc_record->[ $marc_record_offset ] }, $m;
868     }
869     }
870    
871     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
872     }
873    
874 dpavlin 813 =head2 marc_count
875 dpavlin 604
876 dpavlin 813 Return number of MARC records created using L</marc_duplicate>.
877    
878     print "created ", marc_count(), " records";
879    
880     =cut
881    
882     sub marc_count {
883     return $#{ $marc_record };
884     }
885    
886    
887 dpavlin 540 =head1 Functions to extract data from input
888    
889     This function should be used inside functions to create C<data_structure> described
890     above.
891    
892 dpavlin 641 =head2 _pack_subfields_hash
893    
894 dpavlin 669 @subfields = _pack_subfields_hash( $h );
895     $subfields = _pack_subfields_hash( $h, 1 );
896 dpavlin 641
897 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
898     with subfields (denoted by C<^>) and values.
899    
900 dpavlin 641 =cut
901    
902     sub _pack_subfields_hash {
903    
904 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
905 dpavlin 641
906     my ($h,$include_subfields) = @_;
907    
908 dpavlin 831 # sanity and ease of use
909     return $h if (ref($h) ne 'HASH');
910    
911 dpavlin 641 if ( defined($h->{subfields}) ) {
912     my $sfs = delete $h->{subfields} || die "no subfields?";
913     my @out;
914     while (@$sfs) {
915     my $sf = shift @$sfs;
916     push @out, '^' . $sf if ($include_subfields);
917     my $o = shift @$sfs;
918     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
919     # single element subfields are not arrays
920 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
921 dpavlin 667
922 dpavlin 641 push @out, $h->{$sf};
923     } else {
924 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
925 dpavlin 641 push @out, $h->{$sf}->[$o];
926     }
927     }
928 dpavlin 667 if ($include_subfields) {
929     return join('', @out);
930     } else {
931     return @out;
932     }
933 dpavlin 641 } else {
934 dpavlin 667 if ($include_subfields) {
935     my $out = '';
936 dpavlin 668 foreach my $sf (sort keys %$h) {
937 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
938     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
939     } else {
940     $out .= '^' . $sf . $h->{$sf};
941     }
942     }
943     return $out;
944     } else {
945     # FIXME this should probably be in alphabetical order instead of hash order
946     values %{$h};
947     }
948 dpavlin 641 }
949     }
950    
951 dpavlin 536 =head2 rec1
952 dpavlin 371
953 dpavlin 536 Return all values in some field
954 dpavlin 371
955 dpavlin 536 @v = rec1('200')
956 dpavlin 15
957 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
958 dpavlin 15
959 dpavlin 536 =cut
960 dpavlin 15
961 dpavlin 536 sub rec1 {
962     my $f = shift;
963 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
964 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
965 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
966 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
967 dpavlin 641 my @out;
968     foreach my $h ( @{ $rec->{$f} } ) {
969     if (ref($h) eq 'HASH') {
970     push @out, ( _pack_subfields_hash( $h ) );
971 dpavlin 31 } else {
972 dpavlin 641 push @out, $h;
973 dpavlin 31 }
974 dpavlin 641 }
975     return @out;
976 dpavlin 536 } elsif( defined($rec->{$f}) ) {
977     return $rec->{$f};
978 dpavlin 15 }
979     }
980    
981 dpavlin 536 =head2 rec2
982 dpavlin 15
983 dpavlin 536 Return all values in specific field and subfield
984 dpavlin 13
985 dpavlin 536 @v = rec2('200','a')
986 dpavlin 13
987     =cut
988    
989 dpavlin 536 sub rec2 {
990     my $f = shift;
991     return unless (defined($rec && $rec->{$f}));
992     my $sf = shift;
993 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
994 dpavlin 589 return map {
995     if (ref($_->{$sf}) eq 'ARRAY') {
996     @{ $_->{$sf} };
997     } else {
998     $_->{$sf};
999     }
1000     } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
1001 dpavlin 536 }
1002 dpavlin 13
1003 dpavlin 536 =head2 rec
1004 dpavlin 13
1005 dpavlin 536 syntaxtic sugar for
1006 dpavlin 13
1007 dpavlin 536 @v = rec('200')
1008     @v = rec('200','a')
1009 dpavlin 13
1010 dpavlin 750 If rec() returns just single value, it will
1011     return scalar, not array.
1012    
1013 dpavlin 536 =cut
1014 dpavlin 373
1015 dpavlin 979 sub frec {
1016     my @out = rec(@_);
1017     warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
1018     return shift @out;
1019     }
1020    
1021 dpavlin 536 sub rec {
1022 dpavlin 583 my @out;
1023 dpavlin 536 if ($#_ == 0) {
1024 dpavlin 583 @out = rec1(@_);
1025 dpavlin 536 } elsif ($#_ == 1) {
1026 dpavlin 583 @out = rec2(@_);
1027 dpavlin 13 }
1028 dpavlin 750 if ($#out == 0 && ! wantarray) {
1029     return $out[0];
1030     } elsif (@out) {
1031 dpavlin 583 return @out;
1032     } else {
1033     return '';
1034     }
1035 dpavlin 13 }
1036    
1037 dpavlin 536 =head2 regex
1038 dpavlin 15
1039 dpavlin 536 Apply regex to some or all values
1040 dpavlin 15
1041 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
1042 dpavlin 15
1043     =cut
1044    
1045 dpavlin 536 sub regex {
1046     my $r = shift;
1047     my @out;
1048 dpavlin 550 #warn "r: $r\n", dump(\@_);
1049 dpavlin 536 foreach my $t (@_) {
1050     next unless ($t);
1051     eval "\$t =~ $r";
1052     push @out, $t if ($t && $t ne '');
1053 dpavlin 368 }
1054 dpavlin 536 return @out;
1055 dpavlin 15 }
1056    
1057 dpavlin 536 =head2 prefix
1058 dpavlin 15
1059 dpavlin 536 Prefix all values with a string
1060 dpavlin 15
1061 dpavlin 536 @v = prefix( 'my_', @v );
1062 dpavlin 15
1063     =cut
1064    
1065 dpavlin 536 sub prefix {
1066 dpavlin 819 my $p = shift;
1067     return @_ unless defined( $p );
1068 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
1069     }
1070 dpavlin 15
1071 dpavlin 536 =head2 suffix
1072 dpavlin 15
1073 dpavlin 536 suffix all values with a string
1074 dpavlin 15
1075 dpavlin 536 @v = suffix( '_my', @v );
1076 dpavlin 15
1077 dpavlin 536 =cut
1078 dpavlin 15
1079 dpavlin 536 sub suffix {
1080 dpavlin 819 my $s = shift;
1081     return @_ unless defined( $s );
1082 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
1083 dpavlin 15 }
1084    
1085 dpavlin 536 =head2 surround
1086 dpavlin 13
1087 dpavlin 536 surround all values with a two strings
1088 dpavlin 13
1089 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
1090 dpavlin 13
1091     =cut
1092    
1093 dpavlin 536 sub surround {
1094 dpavlin 819 my $p = shift;
1095     my $s = shift;
1096     $p = '' unless defined( $p );
1097     $s = '' unless defined( $s );
1098 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
1099 dpavlin 13 }
1100    
1101 dpavlin 536 =head2 first
1102 dpavlin 13
1103 dpavlin 536 Return first element
1104 dpavlin 15
1105 dpavlin 536 $v = first( @v );
1106 dpavlin 13
1107     =cut
1108    
1109 dpavlin 536 sub first {
1110     my $r = shift;
1111     return $r;
1112 dpavlin 13 }
1113    
1114 dpavlin 536 =head2 lookup
1115 dpavlin 13
1116 dpavlin 536 Consult lookup hashes for some value
1117 dpavlin 13
1118 dpavlin 725 @v = lookup(
1119     sub {
1120     'ffkk/peri/mfn'.rec('000')
1121     },
1122     'ffkk','peri','200-a-200-e',
1123     sub {
1124     first(rec(200,'a')).' '.first(rec('200','e'))
1125     }
1126     );
1127 dpavlin 13
1128 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1129     normal lookup definition in C<conf/lookup/something.pl> which looks like:
1130 dpavlin 707
1131 dpavlin 725 lookup(
1132     # which results to return from record recorded in lookup
1133     sub { 'ffkk/peri/mfn' . rec('000') },
1134     # from which database and input
1135     'ffkk','peri',
1136     # such that following values match
1137     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1138     # if this part is missing, we will try to match same fields
1139     # from lookup record and current one, or you can override
1140     # which records to use from current record using
1141     sub { rec('900','x') . ' ' . rec('900','y') },
1142     )
1143    
1144     You can think about this lookup as SQL (if that helps):
1145    
1146     select
1147     sub { what }
1148     from
1149     database, input
1150     where
1151     sub { filter from lookuped record }
1152     having
1153     sub { optional filter on current record }
1154    
1155     Easy as pie, right?
1156    
1157 dpavlin 13 =cut
1158    
1159 dpavlin 536 sub lookup {
1160 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
1161    
1162 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1163 dpavlin 725
1164 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1165 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
1166    
1167 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1168 dpavlin 725
1169     my $mfns;
1170     my @having = $having->();
1171    
1172 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
1173 dpavlin 725
1174     foreach my $h ( @having ) {
1175     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1176 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1177 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1178     }
1179 dpavlin 536 }
1180 dpavlin 725
1181     return unless ($mfns);
1182    
1183     my @mfns = sort keys %$mfns;
1184    
1185 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1186 dpavlin 725
1187     my $old_rec = $rec;
1188     my @out;
1189    
1190     foreach my $mfn (@mfns) {
1191 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
1192 dpavlin 725
1193 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1194 dpavlin 725
1195     my @vals = $what->();
1196    
1197     push @out, ( @vals );
1198    
1199 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1200 dpavlin 725 }
1201    
1202     # if (ref($lookup->{$k}) eq 'ARRAY') {
1203     # return @{ $lookup->{$k} };
1204     # } else {
1205     # return $lookup->{$k};
1206     # }
1207    
1208     $rec = $old_rec;
1209    
1210 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1211 dpavlin 725
1212 dpavlin 740 if ($#out == 0) {
1213     return $out[0];
1214     } else {
1215     return @out;
1216     }
1217 dpavlin 13 }
1218    
1219 dpavlin 707 =head2 save_into_lookup
1220    
1221 dpavlin 725 Save value into lookup. It associates current database, input
1222     and specific keys with one or more values which will be
1223     associated over MFN.
1224 dpavlin 707
1225 dpavlin 725 MFN will be extracted from first occurence current of field 000
1226     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1227    
1228     my $nr = save_into_lookup($database,$input,$key,sub {
1229 dpavlin 707 # code which produce one or more values
1230     });
1231    
1232 dpavlin 725 It returns number of items saved.
1233 dpavlin 707
1234 dpavlin 725 This function shouldn't be called directly, it's called from code created by
1235     L<WebPAC::Parser>.
1236    
1237 dpavlin 707 =cut
1238    
1239     sub save_into_lookup {
1240 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
1241     die "save_into_lookup needs database" unless defined($database);
1242     die "save_into_lookup needs input" unless defined($input);
1243     die "save_into_lookup needs key" unless defined($key);
1244 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1245 dpavlin 725
1246 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1247 dpavlin 725
1248     my $mfn =
1249     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1250     defined($config->{_mfn}) ? $config->{_mfn} :
1251     die "mfn not defined or zero";
1252    
1253     my $nr = 0;
1254    
1255 dpavlin 707 foreach my $v ( $coderef->() ) {
1256 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1257 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1258 dpavlin 725 $nr++;
1259 dpavlin 707 }
1260 dpavlin 725
1261     return $nr;
1262 dpavlin 707 }
1263    
1264 dpavlin 595 =head2 config
1265    
1266     Consult config values stored in C<config.yml>
1267    
1268     # return database code (key under databases in yaml)
1269     $database_code = config(); # use _ from hash
1270     $database_name = config('name');
1271     $database_input_name = config('input name');
1272    
1273     Up to three levels are supported.
1274    
1275     =cut
1276    
1277     sub config {
1278     return unless ($config);
1279    
1280     my $p = shift;
1281    
1282     $p ||= '';
1283    
1284     my $v;
1285    
1286     warn "### getting config($p)\n" if ($debug > 1);
1287    
1288     my @p = split(/\s+/,$p);
1289     if ($#p < 0) {
1290     $v = $config->{ '_' }; # special, database code
1291     } else {
1292    
1293     my $c = dclone( $config );
1294    
1295     foreach my $k (@p) {
1296     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1297     if (ref($c) eq 'ARRAY') {
1298     $c = shift @$c;
1299     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1300     last;
1301     }
1302    
1303     if (! defined($c->{$k}) ) {
1304     $c = undef;
1305     last;
1306     } else {
1307     $c = $c->{$k};
1308     }
1309     }
1310     $v = $c if ($c);
1311    
1312     }
1313    
1314     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1315     warn "config( '$p' ) is empty\n" if (! $v);
1316    
1317     return $v;
1318     }
1319    
1320     =head2 id
1321    
1322     Returns unique id of this record
1323    
1324     $id = id();
1325    
1326     Returns C<42/2> for 2nd occurence of MFN 42.
1327    
1328     =cut
1329    
1330     sub id {
1331     my $mfn = $config->{_mfn} || die "no _mfn in config data";
1332     return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1333     }
1334    
1335 dpavlin 536 =head2 join_with
1336 dpavlin 13
1337 dpavlin 536 Joins walues with some delimiter
1338 dpavlin 10
1339 dpavlin 536 $v = join_with(", ", @v);
1340 dpavlin 10
1341 dpavlin 536 =cut
1342 dpavlin 10
1343 dpavlin 536 sub join_with {
1344     my $d = shift;
1345 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1346 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1347     return '' unless defined($v);
1348     return $v;
1349 dpavlin 536 }
1350 dpavlin 10
1351 dpavlin 562 =head2 split_rec_on
1352    
1353     Split record subfield on some regex and take one of parts out
1354    
1355     $a_before_semi_column =
1356     split_rec_on('200','a', /\s*;\s*/, $part);
1357    
1358     C<$part> is optional number of element. First element is
1359     B<1>, not 0!
1360    
1361     If there is no C<$part> parameter or C<$part> is 0, this function will
1362     return all values produced by splitting.
1363    
1364     =cut
1365    
1366     sub split_rec_on {
1367     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1368    
1369     my ($fld, $sf, $regex, $part) = @_;
1370 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1371 dpavlin 562
1372     my @r = rec( $fld, $sf );
1373     my $v = shift @r;
1374 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1375 dpavlin 562
1376 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1377 dpavlin 566
1378 dpavlin 562 my @s = split( $regex, $v );
1379 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1380 dpavlin 566 if ($part && $part > 0) {
1381 dpavlin 562 return $s[ $part - 1 ];
1382     } else {
1383 dpavlin 571 return @s;
1384 dpavlin 562 }
1385     }
1386    
1387 dpavlin 785 my $hash;
1388    
1389     =head2 set
1390    
1391     set( key => 'value' );
1392    
1393     =cut
1394    
1395     sub set {
1396     my ($k,$v) = @_;
1397 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1398 dpavlin 785 $hash->{$k} = $v;
1399     };
1400    
1401     =head2 get
1402    
1403     get( 'key' );
1404    
1405     =cut
1406    
1407     sub get {
1408     my $k = shift || return;
1409     my $v = $hash->{$k};
1410 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1411 dpavlin 785 return $v;
1412     }
1413    
1414 dpavlin 791 =head2 count
1415 dpavlin 785
1416 dpavlin 791 if ( count( @result ) == 1 ) {
1417     # do something if only 1 result is there
1418     }
1419    
1420     =cut
1421    
1422     sub count {
1423 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
1424 dpavlin 791 return @_ . '';
1425     }
1426    
1427 dpavlin 536 # END
1428     1;

  ViewVC Help
Powered by ViewVC 1.1.26