/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 923 - (hide annotations)
Wed Oct 31 00:26:43 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 30619 byte(s)
 r1393@llin:  dpavlin | 2007-10-31 00:52:52 +0100
 added sorted to WebPAC::Normalize to define values which should go
 into sorted lists

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3     @EXPORT = qw/
4 dpavlin 538 _set_rec _set_lookup
5 dpavlin 736 _set_load_row
6 dpavlin 538 _get_ds _clean_ds
7 dpavlin 554 _debug
8 dpavlin 641 _pack_subfields_hash
9 dpavlin 538
10 dpavlin 923 search_display search display sorted
11 dpavlin 912
12 dpavlin 547 marc marc_indicators marc_repeatable_subfield
13 dpavlin 815 marc_compose marc_leader marc_fixed
14 dpavlin 813 marc_duplicate marc_remove marc_count
15 dpavlin 604 marc_original_order
16 dpavlin 540
17 dpavlin 536 rec1 rec2 rec
18     regex prefix suffix surround
19     first lookup join_with
20 dpavlin 707 save_into_lookup
21 dpavlin 562
22     split_rec_on
23 dpavlin 785
24     get set
25 dpavlin 791 count
26 dpavlin 536 /;
27 dpavlin 10
28     use warnings;
29     use strict;
30 dpavlin 536
31     #use base qw/WebPAC::Common/;
32 dpavlin 550 use Data::Dump qw/dump/;
33 dpavlin 574 use Storable qw/dclone/;
34 dpavlin 725 use Carp qw/confess/;
35 dpavlin 10
36 dpavlin 550 # debugging warn(s)
37     my $debug = 0;
38    
39    
40 dpavlin 10 =head1 NAME
41    
42 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
43 dpavlin 10
44     =cut
45    
46 dpavlin 923 our $VERSION = '0.31';
47 dpavlin 10
48     =head1 SYNOPSIS
49    
50 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
51     from input records using perl functions which are specialized for set
52     processing.
53 dpavlin 10
54 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
55     means that you check it's validity before running WebPAC using
56     C<perl -c normalize.pl>.
57 dpavlin 15
58 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
59 dpavlin 912 types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
60 dpavlin 547 C<marc>.
61 dpavlin 15
62 dpavlin 10 =head1 FUNCTIONS
63    
64 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
65     All other functions are available for use within normalisation rules.
66    
67 dpavlin 536 =head2 data_structure
68 dpavlin 10
69 dpavlin 536 Return data structure
70 dpavlin 13
71 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
72 dpavlin 725 lookup => $lookup_hash,
73 dpavlin 536 row => $row,
74     rules => $normalize_pl_config,
75 dpavlin 541 marc_encoding => 'utf-8',
76 dpavlin 595 config => $config,
77 dpavlin 736 load_row_coderef => sub {
78 dpavlin 725 my ($database,$input,$mfn) = shift;
79 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
80 dpavlin 725 },
81 dpavlin 13 );
82    
83 dpavlin 707 Options C<row>, C<rules> and C<log> are mandatory while all
84 dpavlin 540 other are optional.
85    
86 dpavlin 736 C<load_row_coderef> is closure only used when executing lookups, so they will
87 dpavlin 725 die if it's not defined.
88    
89 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
90 dpavlin 15
91 dpavlin 538 Since this function isn't exported you have to call it with
92     C<WebPAC::Normalize::data_structure>.
93    
94 dpavlin 10 =cut
95    
96 dpavlin 736 my $load_row_coderef;
97 dpavlin 725
98 dpavlin 536 sub data_structure {
99     my $arg = {@_};
100 dpavlin 13
101 dpavlin 536 die "need row argument" unless ($arg->{row});
102     die "need normalisation argument" unless ($arg->{rules});
103 dpavlin 31
104 dpavlin 536 no strict 'subs';
105 dpavlin 730 _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
106 dpavlin 538 _set_rec( $arg->{row} );
107 dpavlin 730 _set_config( $arg->{config} ) if defined($arg->{config});
108 dpavlin 541 _clean_ds( %{ $arg } );
109 dpavlin 736 $load_row_coderef = $arg->{load_row_coderef};
110 dpavlin 725
111 dpavlin 536 eval "$arg->{rules}";
112     die "error evaling $arg->{rules}: $@\n" if ($@);
113 dpavlin 540
114 dpavlin 538 return _get_ds();
115 dpavlin 10 }
116    
117 dpavlin 538 =head2 _set_rec
118 dpavlin 13
119 dpavlin 536 Set current record hash
120 dpavlin 433
121 dpavlin 538 _set_rec( $rec );
122 dpavlin 433
123     =cut
124    
125 dpavlin 536 my $rec;
126 dpavlin 433
127 dpavlin 538 sub _set_rec {
128 dpavlin 536 $rec = shift or die "no record hash";
129 dpavlin 433 }
130    
131 dpavlin 595 =head2 _set_config
132    
133     Set current config hash
134    
135     _set_config( $config );
136    
137     Magic keys are:
138    
139     =over 4
140    
141     =item _
142    
143     Code of current database
144    
145     =item _mfn
146    
147     Current MFN
148    
149     =back
150    
151     =cut
152    
153     my $config;
154    
155     sub _set_config {
156     $config = shift;
157     }
158    
159 dpavlin 538 =head2 _get_ds
160    
161     Return hash formatted as data structure
162    
163     my $ds = _get_ds();
164    
165     =cut
166    
167 dpavlin 812 my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
168 dpavlin 574 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
169 dpavlin 538
170     sub _get_ds {
171     return $out;
172     }
173    
174     =head2 _clean_ds
175    
176     Clean data structure hash for next record
177    
178     _clean_ds();
179    
180     =cut
181    
182     sub _clean_ds {
183 dpavlin 541 my $a = {@_};
184 dpavlin 812 ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
185 dpavlin 574 ($marc_record_offset, $marc_fetch_offset) = (0,0);
186 dpavlin 541 $marc_encoding = $a->{marc_encoding};
187 dpavlin 538 }
188    
189     =head2 _set_lookup
190    
191     Set current lookup hash
192    
193     _set_lookup( $lookup );
194    
195     =cut
196    
197     my $lookup;
198    
199     sub _set_lookup {
200     $lookup = shift;
201     }
202    
203 dpavlin 707 =head2 _get_lookup
204    
205     Get current lookup hash
206    
207     my $lookup = _get_lookup();
208    
209     =cut
210    
211     sub _get_lookup {
212     return $lookup;
213     }
214    
215 dpavlin 736 =head2 _set_load_row
216 dpavlin 725
217     Setup code reference which will return L<data_structure> from
218     L<WebPAC::Store>
219    
220 dpavlin 736 _set_load_row(sub {
221 dpavlin 725 my ($database,$input,$mfn) = @_;
222 dpavlin 736 $store->load_row( database => $database, input => $input, id => $mfn );
223 dpavlin 725 });
224    
225     =cut
226    
227 dpavlin 736 sub _set_load_row {
228 dpavlin 725 my $coderef = shift;
229     confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
230    
231 dpavlin 736 $load_row_coderef = $coderef;
232 dpavlin 725 }
233    
234 dpavlin 547 =head2 _get_marc_fields
235 dpavlin 540
236 dpavlin 547 Get all fields defined by calls to C<marc>
237 dpavlin 540
238 dpavlin 547 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
239 dpavlin 540
240 dpavlin 543 We are using I<magic> which detect repeatable fields only from
241     sequence of field/subfield data generated by normalization.
242    
243 dpavlin 554 Repeatable field is created when there is second occurence of same subfield or
244     if any of indicators are different.
245 dpavlin 543
246 dpavlin 554 This is sane for most cases. Something like:
247 dpavlin 543
248 dpavlin 554 900a-1 900b-1 900c-1
249     900a-2 900b-2
250     900a-3
251    
252     will be created from any combination of:
253    
254     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
255    
256     and following rules:
257    
258     marc('900','a', rec('200','a') );
259     marc('900','b', rec('200','b') );
260     marc('900','c', rec('200','c') );
261    
262     which might not be what you have in mind. If you need repeatable subfield,
263     define it using C<marc_repeatable_subfield> like this:
264    
265 dpavlin 574 marc_repeatable_subfield('900','a');
266     marc('900','a', rec('200','a') );
267     marc('900','b', rec('200','b') );
268     marc('900','c', rec('200','c') );
269 dpavlin 554
270 dpavlin 574 will create:
271    
272     900a-1 900a-2 900a-3 900b-1 900c-1
273     900b-2
274    
275     There is also support for returning next or specific using:
276    
277     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
278     # do something with $mf
279     }
280    
281     will always return fields from next MARC record or
282    
283     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
284    
285     will return 42th copy record (if it exists).
286    
287 dpavlin 540 =cut
288    
289 dpavlin 813 my $fetch_pos;
290    
291 dpavlin 547 sub _get_marc_fields {
292 dpavlin 550
293 dpavlin 574 my $arg = {@_};
294     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
295 dpavlin 813 $fetch_pos = $marc_fetch_offset;
296 dpavlin 574 if ($arg->{offset}) {
297 dpavlin 813 $fetch_pos = $arg->{offset};
298 dpavlin 574 } elsif($arg->{fetch_next}) {
299     $marc_fetch_offset++;
300     }
301 dpavlin 550
302 dpavlin 574 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
303    
304     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
305    
306 dpavlin 813 my $marc_rec = $marc_record->[ $fetch_pos ];
307 dpavlin 574
308 dpavlin 813 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
309 dpavlin 574
310     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
311    
312 dpavlin 550 # first, sort all existing fields
313     # XXX might not be needed, but modern perl might randomize elements in hash
314     my @sorted_marc_record = sort {
315 dpavlin 572 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
316 dpavlin 574 } @{ $marc_rec };
317 dpavlin 550
318 dpavlin 574 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
319 dpavlin 562
320 dpavlin 550 # output marc fields
321 dpavlin 542 my @m;
322 dpavlin 550
323     # count unique field-subfields (used for offset when walking to next subfield)
324     my $u;
325 dpavlin 572 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
326 dpavlin 550
327     if ($debug) {
328 dpavlin 574 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
329 dpavlin 813 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
330 dpavlin 574 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
331     warn "## subfield count = ", dump( $u ), $/;
332 dpavlin 550 }
333    
334     my $len = $#sorted_marc_record;
335     my $visited;
336     my $i = 0;
337     my $field;
338    
339     foreach ( 0 .. $len ) {
340    
341     # find next element which isn't visited
342     while ($visited->{$i}) {
343     $i = ($i + 1) % ($len + 1);
344 dpavlin 542 }
345    
346 dpavlin 550 # mark it visited
347     $visited->{$i}++;
348    
349 dpavlin 574 my $row = dclone( $sorted_marc_record[$i] );
350 dpavlin 550
351     # field and subfield which is key for
352     # marc_repeatable_subfield and u
353 dpavlin 572 my $fsf = $row->[0] . ( $row->[3] || '' );
354 dpavlin 550
355     if ($debug > 1) {
356    
357     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
358     print "### this [$i]: ", dump( $row ),$/;
359     print "### sf: ", $row->[3], " vs ", $field->[3],
360     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
361     if ($#$field >= 0);
362    
363     }
364    
365     # if field exists
366     if ( $#$field >= 0 ) {
367     if (
368     $row->[0] ne $field->[0] || # field
369     $row->[1] ne $field->[1] || # i1
370     $row->[2] ne $field->[2] # i2
371     ) {
372     push @m, $field;
373     warn "## saved/1 ", dump( $field ),$/ if ($debug);
374     $field = $row;
375    
376     } elsif (
377     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
378     ||
379     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
380     ! $marc_repeatable_subfield->{ $fsf }
381     )
382     ) {
383     push @m, $field;
384     warn "## saved/2 ", dump( $field ),$/ if ($debug);
385     $field = $row;
386    
387     } else {
388     # append new subfields to existing field
389     push @$field, ( $row->[3], $row->[4] );
390     }
391     } else {
392     # insert first field
393     $field = $row;
394     }
395    
396     if (! $marc_repeatable_subfield->{ $fsf }) {
397     # make step to next subfield
398     $i = ($i + $u->{ $fsf } ) % ($len + 1);
399     }
400 dpavlin 542 }
401    
402 dpavlin 550 if ($#$field >= 0) {
403     push @m, $field;
404     warn "## saved/3 ", dump( $field ),$/ if ($debug);
405     }
406 dpavlin 542
407 dpavlin 579 return \@m;
408 dpavlin 540 }
409    
410 dpavlin 813 =head2 _get_marc_leader
411    
412     Return leader from currently fetched record by L</_get_marc_fields>
413    
414     print WebPAC::Normalize::_get_marc_leader();
415    
416     =cut
417    
418     sub _get_marc_leader {
419     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
420     return $marc_leader->[ $fetch_pos ];
421     }
422    
423 dpavlin 554 =head2 _debug
424    
425     Change level of debug warnings
426    
427     _debug( 2 );
428    
429     =cut
430    
431     sub _debug {
432     my $l = shift;
433     return $debug unless defined($l);
434 dpavlin 568 warn "debug level $l",$/ if ($l > 0);
435 dpavlin 554 $debug = $l;
436     }
437    
438 dpavlin 540 =head1 Functions to create C<data_structure>
439    
440     Those functions generally have to first in your normalization file.
441    
442 dpavlin 912 =head2 search_display
443 dpavlin 433
444 dpavlin 912 Define output for L<search> and L<display> at the same time
445 dpavlin 433
446 dpavlin 912 search_display('Title', rec('200','a') );
447 dpavlin 13
448    
449     =cut
450    
451 dpavlin 912 sub search_display {
452     my $name = shift or die "search_display needs name as first argument";
453 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
454     return unless (@o);
455     $out->{$name}->{search} = \@o;
456     $out->{$name}->{display} = \@o;
457     }
458 dpavlin 13
459 dpavlin 915 =head2 tag
460    
461     Old name for L<search_display>, but supported
462    
463     =cut
464    
465     sub tag {
466     search_display( @_ );
467     }
468    
469 dpavlin 536 =head2 display
470 dpavlin 13
471 dpavlin 912 Define output just for I<display>
472 dpavlin 125
473 dpavlin 536 @v = display('Title', rec('200','a') );
474 dpavlin 125
475 dpavlin 536 =cut
476 dpavlin 125
477 dpavlin 923 sub _field {
478     my $type = shift or confess "need type -- BUG?";
479     my $name = shift or confess "needs name as first argument";
480 dpavlin 536 my @o = grep { defined($_) && $_ ne '' } @_;
481     return unless (@o);
482 dpavlin 923 $out->{$name}->{$type} = \@o;
483 dpavlin 536 }
484 dpavlin 13
485 dpavlin 923 sub display { _field( 'display', @_ ) }
486    
487 dpavlin 536 =head2 search
488 dpavlin 13
489 dpavlin 536 Prepare values just for I<search>
490 dpavlin 13
491 dpavlin 536 @v = search('Title', rec('200','a') );
492 dpavlin 433
493 dpavlin 536 =cut
494 dpavlin 13
495 dpavlin 923 sub search { _field( 'search', @_ ) }
496 dpavlin 13
497 dpavlin 923 =head2 sorted
498    
499     Insert into lists which will be automatically sorted
500    
501     sorted('Title', rec('200','a') );
502    
503     =cut
504    
505     sub sorted { _field( 'sorted', @_ ) }
506    
507    
508 dpavlin 564 =head2 marc_leader
509    
510     Setup fields within MARC leader or get leader
511    
512     marc_leader('05','c');
513     my $leader = marc_leader();
514    
515     =cut
516    
517     sub marc_leader {
518     my ($offset,$value) = @_;
519    
520     if ($offset) {
521 dpavlin 813 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
522 dpavlin 564 } else {
523 dpavlin 813
524     if (defined($marc_leader)) {
525     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
526     return $marc_leader->[ $marc_record_offset ];
527     } else {
528     return;
529     }
530 dpavlin 564 }
531     }
532    
533 dpavlin 815 =head2 marc_fixed
534    
535     Create control/indentifier fields with values in fixed positions
536    
537     marc_fixed('008', 00, '070402');
538     marc_fixed('008', 39, '|');
539    
540     Positions not specified will be filled with spaces (C<0x20>).
541    
542     There will be no effort to extend last specified value to full length of
543     field in standard.
544    
545     =cut
546    
547     sub marc_fixed {
548     my ($f, $pos, $val) = @_;
549     die "need marc(field, position, value)" unless defined($f) && defined($pos);
550    
551 dpavlin 889 confess "need val" unless defined $val;
552    
553 dpavlin 815 my $update = 0;
554    
555     map {
556     if ($_->[0] eq $f) {
557     my $old = $_->[1];
558 dpavlin 889 if (length($old) <= $pos) {
559 dpavlin 815 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
560 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
561 dpavlin 815 } else {
562     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
563 dpavlin 817 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
564 dpavlin 815 }
565     $update++;
566     }
567     } @{ $marc_record->[ $marc_record_offset ] };
568    
569     if (! $update) {
570     my $v = ' ' x $pos . $val;
571     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
572 dpavlin 817 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
573 dpavlin 815 }
574     }
575    
576 dpavlin 547 =head2 marc
577 dpavlin 540
578     Save value for MARC field
579    
580 dpavlin 547 marc('900','a', rec('200','a') );
581 dpavlin 571 marc('001', rec('000') );
582 dpavlin 540
583     =cut
584    
585 dpavlin 547 sub marc {
586     my $f = shift or die "marc needs field";
587     die "marc field must be numer" unless ($f =~ /^\d+$/);
588 dpavlin 540
589 dpavlin 571 my $sf;
590     if ($f >= 10) {
591     $sf = shift or die "marc needs subfield";
592     }
593 dpavlin 540
594 dpavlin 541 foreach (@_) {
595     my $v = $_; # make var read-write for Encode
596 dpavlin 543 next unless (defined($v) && $v !~ /^\s*$/);
597 dpavlin 548 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
598 dpavlin 571 if (defined $sf) {
599 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
600 dpavlin 571 } else {
601 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
602 dpavlin 571 }
603 dpavlin 540 }
604     }
605    
606 dpavlin 547 =head2 marc_repeatable_subfield
607    
608     Save values for MARC repetable subfield
609    
610     marc_repeatable_subfield('910', 'z', rec('909') );
611    
612     =cut
613    
614     sub marc_repeatable_subfield {
615 dpavlin 550 my ($f,$sf) = @_;
616     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
617     $marc_repeatable_subfield->{ $f . $sf }++;
618 dpavlin 547 marc(@_);
619     }
620    
621     =head2 marc_indicators
622    
623     Set both indicators for MARC field
624    
625     marc_indicators('900', ' ', 1);
626    
627     Any indicator value other than C<0-9> will be treated as undefined.
628    
629     =cut
630    
631     sub marc_indicators {
632     my $f = shift || die "marc_indicators need field!\n";
633     my ($i1,$i2) = @_;
634     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
635     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
636    
637     $i1 = ' ' if ($i1 !~ /^\d$/);
638     $i2 = ' ' if ($i2 !~ /^\d$/);
639 dpavlin 548 @{ $marc_indicators->{$f} } = ($i1,$i2);
640 dpavlin 547 }
641    
642 dpavlin 562 =head2 marc_compose
643 dpavlin 547
644 dpavlin 562 Save values for each MARC subfield explicitly
645    
646     marc_compose('900',
647     'a', rec('200','a')
648     'b', rec('201','a')
649     'a', rec('200','b')
650     'c', rec('200','c')
651     );
652    
653 dpavlin 603 If you specify C<+> for subfield, value will be appended
654     to previous defined subfield.
655    
656 dpavlin 562 =cut
657    
658     sub marc_compose {
659     my $f = shift or die "marc_compose needs field";
660     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
661    
662     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
663     my $m = [ $f, $i1, $i2 ];
664    
665 dpavlin 583 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
666    
667 dpavlin 619 if ($#_ % 2 != 1) {
668     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
669     }
670    
671 dpavlin 562 while (@_) {
672 dpavlin 619 my $sf = shift;
673 dpavlin 565 my $v = shift;
674 dpavlin 562
675     next unless (defined($v) && $v !~ /^\s*$/);
676 dpavlin 568 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
677 dpavlin 603 if ($sf ne '+') {
678     push @$m, ( $sf, $v );
679     } else {
680     $m->[ $#$m ] .= $v;
681     }
682 dpavlin 562 }
683    
684 dpavlin 586 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
685 dpavlin 562
686 dpavlin 574 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
687 dpavlin 562 }
688    
689 dpavlin 574 =head2 marc_duplicate
690 dpavlin 562
691 dpavlin 574 Generate copy of current MARC record and continue working on copy
692    
693     marc_duplicate();
694    
695     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
696     C<< _get_marc_fields( offset => 42 ) >>.
697    
698     =cut
699    
700     sub marc_duplicate {
701     my $m = $marc_record->[ -1 ];
702     die "can't duplicate record which isn't defined" unless ($m);
703     push @{ $marc_record }, dclone( $m );
704 dpavlin 813 push @{ $marc_leader }, dclone( marc_leader() );
705     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
706 dpavlin 574 $marc_record_offset = $#{ $marc_record };
707     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
708 dpavlin 813
709 dpavlin 574 }
710    
711     =head2 marc_remove
712    
713     Remove some field or subfield from MARC record.
714    
715     marc_remove('200');
716     marc_remove('200','a');
717    
718     This will erase field C<200> or C<200^a> from current MARC record.
719    
720 dpavlin 786 marc_remove('*');
721    
722     Will remove all fields in current MARC record.
723    
724 dpavlin 574 This is useful after calling C<marc_duplicate> or on it's own (but, you
725     should probably just remove that subfield definition if you are not
726     using C<marc_duplicate>).
727    
728     FIXME: support fields < 10.
729    
730     =cut
731    
732     sub marc_remove {
733     my ($f, $sf) = @_;
734    
735     die "marc_remove needs record number" unless defined($f);
736    
737     my $marc = $marc_record->[ $marc_record_offset ];
738    
739     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
740    
741 dpavlin 786 if ($f eq '*') {
742    
743     delete( $marc_record->[ $marc_record_offset ] );
744 dpavlin 788 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
745 dpavlin 786
746     } else {
747    
748     my $i = 0;
749     foreach ( 0 .. $#{ $marc } ) {
750     last unless (defined $marc->[$i]);
751     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
752     if ($marc->[$i]->[0] eq $f) {
753     if (! defined $sf) {
754     # remove whole field
755     splice @$marc, $i, 1;
756     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
757     $i--;
758     } else {
759     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
760     my $o = ($j * 2) + 3;
761     if ($marc->[$i]->[$o] eq $sf) {
762     # remove subfield
763     splice @{$marc->[$i]}, $o, 2;
764     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
765     # is record now empty?
766     if ($#{ $marc->[$i] } == 2) {
767     splice @$marc, $i, 1;
768     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
769     $i--;
770     };
771     }
772 dpavlin 574 }
773     }
774     }
775 dpavlin 786 $i++;
776 dpavlin 574 }
777 dpavlin 786
778     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
779    
780     $marc_record->[ $marc_record_offset ] = $marc;
781 dpavlin 574 }
782    
783     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
784     }
785    
786 dpavlin 604 =head2 marc_original_order
787    
788     Copy all subfields preserving original order to marc field.
789    
790 dpavlin 616 marc_original_order( marc_field_number, original_input_field_number );
791 dpavlin 604
792 dpavlin 616 Please note that field numbers are consistent with other commands (marc
793     field number first), but somewhat counter-intuitive (destination and then
794     source).
795    
796 dpavlin 604 You might want to use this command if you are just renaming subfields or
797     using pre-processing modify_record in C<config.yml> and don't need any
798     post-processing or want to preserve order of original subfields.
799    
800 dpavlin 616
801 dpavlin 604 =cut
802    
803     sub marc_original_order {
804    
805 dpavlin 616 my ($to, $from) = @_;
806 dpavlin 604 die "marc_original_order needs from and to fields\n" unless ($from && $to);
807    
808 dpavlin 616 return unless defined($rec->{$from});
809    
810     my $r = $rec->{$from};
811 dpavlin 604 die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
812    
813     my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
814 dpavlin 616 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
815 dpavlin 604
816     foreach my $d (@$r) {
817    
818 dpavlin 605 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
819 dpavlin 616 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
820 dpavlin 605 next;
821     }
822    
823 dpavlin 604 my @sfs = @{ $d->{subfields} };
824    
825     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
826    
827 dpavlin 618 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
828 dpavlin 604
829     my $m = [ $to, $i1, $i2 ];
830    
831     while (my $sf = shift @sfs) {
832 dpavlin 618
833     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
834 dpavlin 604 my $offset = shift @sfs;
835     die "corrupted sufields specification for field $from\n" unless defined($offset);
836    
837     my $v;
838     if (ref($d->{$sf}) eq 'ARRAY') {
839     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
840     } elsif ($offset == 0) {
841     $v = $d->{$sf};
842     } else {
843     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
844     }
845     push @$m, ( $sf, $v ) if (defined($v));
846     }
847    
848     if ($#{$m} > 2) {
849     push @{ $marc_record->[ $marc_record_offset ] }, $m;
850     }
851     }
852    
853     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
854     }
855    
856 dpavlin 813 =head2 marc_count
857 dpavlin 604
858 dpavlin 813 Return number of MARC records created using L</marc_duplicate>.
859    
860     print "created ", marc_count(), " records";
861    
862     =cut
863    
864     sub marc_count {
865     return $#{ $marc_record };
866     }
867    
868    
869 dpavlin 540 =head1 Functions to extract data from input
870    
871     This function should be used inside functions to create C<data_structure> described
872     above.
873    
874 dpavlin 641 =head2 _pack_subfields_hash
875    
876 dpavlin 669 @subfields = _pack_subfields_hash( $h );
877     $subfields = _pack_subfields_hash( $h, 1 );
878 dpavlin 641
879 dpavlin 669 Return each subfield value in array or pack them all together and return scalar
880     with subfields (denoted by C<^>) and values.
881    
882 dpavlin 641 =cut
883    
884     sub _pack_subfields_hash {
885    
886 dpavlin 642 warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
887 dpavlin 641
888     my ($h,$include_subfields) = @_;
889    
890 dpavlin 831 # sanity and ease of use
891     return $h if (ref($h) ne 'HASH');
892    
893 dpavlin 641 if ( defined($h->{subfields}) ) {
894     my $sfs = delete $h->{subfields} || die "no subfields?";
895     my @out;
896     while (@$sfs) {
897     my $sf = shift @$sfs;
898     push @out, '^' . $sf if ($include_subfields);
899     my $o = shift @$sfs;
900     if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
901     # single element subfields are not arrays
902 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
903 dpavlin 667
904 dpavlin 641 push @out, $h->{$sf};
905     } else {
906 dpavlin 669 #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
907 dpavlin 641 push @out, $h->{$sf}->[$o];
908     }
909     }
910 dpavlin 667 if ($include_subfields) {
911     return join('', @out);
912     } else {
913     return @out;
914     }
915 dpavlin 641 } else {
916 dpavlin 667 if ($include_subfields) {
917     my $out = '';
918 dpavlin 668 foreach my $sf (sort keys %$h) {
919 dpavlin 667 if (ref($h->{$sf}) eq 'ARRAY') {
920     $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
921     } else {
922     $out .= '^' . $sf . $h->{$sf};
923     }
924     }
925     return $out;
926     } else {
927     # FIXME this should probably be in alphabetical order instead of hash order
928     values %{$h};
929     }
930 dpavlin 641 }
931     }
932    
933 dpavlin 536 =head2 rec1
934 dpavlin 371
935 dpavlin 536 Return all values in some field
936 dpavlin 371
937 dpavlin 536 @v = rec1('200')
938 dpavlin 15
939 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
940 dpavlin 15
941 dpavlin 536 =cut
942 dpavlin 15
943 dpavlin 536 sub rec1 {
944     my $f = shift;
945 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
946 dpavlin 536 return unless (defined($rec) && defined($rec->{$f}));
947 dpavlin 571 warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
948 dpavlin 536 if (ref($rec->{$f}) eq 'ARRAY') {
949 dpavlin 641 my @out;
950     foreach my $h ( @{ $rec->{$f} } ) {
951     if (ref($h) eq 'HASH') {
952     push @out, ( _pack_subfields_hash( $h ) );
953 dpavlin 31 } else {
954 dpavlin 641 push @out, $h;
955 dpavlin 31 }
956 dpavlin 641 }
957     return @out;
958 dpavlin 536 } elsif( defined($rec->{$f}) ) {
959     return $rec->{$f};
960 dpavlin 15 }
961     }
962    
963 dpavlin 536 =head2 rec2
964 dpavlin 15
965 dpavlin 536 Return all values in specific field and subfield
966 dpavlin 13
967 dpavlin 536 @v = rec2('200','a')
968 dpavlin 13
969     =cut
970    
971 dpavlin 536 sub rec2 {
972     my $f = shift;
973     return unless (defined($rec && $rec->{$f}));
974     my $sf = shift;
975 dpavlin 601 warn "rec2($f,$sf) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
976 dpavlin 589 return map {
977     if (ref($_->{$sf}) eq 'ARRAY') {
978     @{ $_->{$sf} };
979     } else {
980     $_->{$sf};
981     }
982     } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
983 dpavlin 536 }
984 dpavlin 13
985 dpavlin 536 =head2 rec
986 dpavlin 13
987 dpavlin 536 syntaxtic sugar for
988 dpavlin 13
989 dpavlin 536 @v = rec('200')
990     @v = rec('200','a')
991 dpavlin 13
992 dpavlin 750 If rec() returns just single value, it will
993     return scalar, not array.
994    
995 dpavlin 536 =cut
996 dpavlin 373
997 dpavlin 536 sub rec {
998 dpavlin 583 my @out;
999 dpavlin 536 if ($#_ == 0) {
1000 dpavlin 583 @out = rec1(@_);
1001 dpavlin 536 } elsif ($#_ == 1) {
1002 dpavlin 583 @out = rec2(@_);
1003 dpavlin 13 }
1004 dpavlin 750 if ($#out == 0 && ! wantarray) {
1005     return $out[0];
1006     } elsif (@out) {
1007 dpavlin 583 return @out;
1008     } else {
1009     return '';
1010     }
1011 dpavlin 13 }
1012    
1013 dpavlin 536 =head2 regex
1014 dpavlin 15
1015 dpavlin 536 Apply regex to some or all values
1016 dpavlin 15
1017 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
1018 dpavlin 15
1019     =cut
1020    
1021 dpavlin 536 sub regex {
1022     my $r = shift;
1023     my @out;
1024 dpavlin 550 #warn "r: $r\n", dump(\@_);
1025 dpavlin 536 foreach my $t (@_) {
1026     next unless ($t);
1027     eval "\$t =~ $r";
1028     push @out, $t if ($t && $t ne '');
1029 dpavlin 368 }
1030 dpavlin 536 return @out;
1031 dpavlin 15 }
1032    
1033 dpavlin 536 =head2 prefix
1034 dpavlin 15
1035 dpavlin 536 Prefix all values with a string
1036 dpavlin 15
1037 dpavlin 536 @v = prefix( 'my_', @v );
1038 dpavlin 15
1039     =cut
1040    
1041 dpavlin 536 sub prefix {
1042 dpavlin 819 my $p = shift;
1043     return @_ unless defined( $p );
1044 dpavlin 536 return map { $p . $_ } grep { defined($_) } @_;
1045     }
1046 dpavlin 15
1047 dpavlin 536 =head2 suffix
1048 dpavlin 15
1049 dpavlin 536 suffix all values with a string
1050 dpavlin 15
1051 dpavlin 536 @v = suffix( '_my', @v );
1052 dpavlin 15
1053 dpavlin 536 =cut
1054 dpavlin 15
1055 dpavlin 536 sub suffix {
1056 dpavlin 819 my $s = shift;
1057     return @_ unless defined( $s );
1058 dpavlin 536 return map { $_ . $s } grep { defined($_) } @_;
1059 dpavlin 15 }
1060    
1061 dpavlin 536 =head2 surround
1062 dpavlin 13
1063 dpavlin 536 surround all values with a two strings
1064 dpavlin 13
1065 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
1066 dpavlin 13
1067     =cut
1068    
1069 dpavlin 536 sub surround {
1070 dpavlin 819 my $p = shift;
1071     my $s = shift;
1072     $p = '' unless defined( $p );
1073     $s = '' unless defined( $s );
1074 dpavlin 536 return map { $p . $_ . $s } grep { defined($_) } @_;
1075 dpavlin 13 }
1076    
1077 dpavlin 536 =head2 first
1078 dpavlin 13
1079 dpavlin 536 Return first element
1080 dpavlin 15
1081 dpavlin 536 $v = first( @v );
1082 dpavlin 13
1083     =cut
1084    
1085 dpavlin 536 sub first {
1086     my $r = shift;
1087     return $r;
1088 dpavlin 13 }
1089    
1090 dpavlin 536 =head2 lookup
1091 dpavlin 13
1092 dpavlin 536 Consult lookup hashes for some value
1093 dpavlin 13
1094 dpavlin 725 @v = lookup(
1095     sub {
1096     'ffkk/peri/mfn'.rec('000')
1097     },
1098     'ffkk','peri','200-a-200-e',
1099     sub {
1100     first(rec(200,'a')).' '.first(rec('200','e'))
1101     }
1102     );
1103 dpavlin 13
1104 dpavlin 725 Code like above will be B<automatically generated> using L<WebPAC::Parse> from
1105     normal lookup definition in C<conf/lookup/something.pl> which looks like:
1106 dpavlin 707
1107 dpavlin 725 lookup(
1108     # which results to return from record recorded in lookup
1109     sub { 'ffkk/peri/mfn' . rec('000') },
1110     # from which database and input
1111     'ffkk','peri',
1112     # such that following values match
1113     sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1114     # if this part is missing, we will try to match same fields
1115     # from lookup record and current one, or you can override
1116     # which records to use from current record using
1117     sub { rec('900','x') . ' ' . rec('900','y') },
1118     )
1119    
1120     You can think about this lookup as SQL (if that helps):
1121    
1122     select
1123     sub { what }
1124     from
1125     database, input
1126     where
1127     sub { filter from lookuped record }
1128     having
1129     sub { optional filter on current record }
1130    
1131     Easy as pie, right?
1132    
1133 dpavlin 13 =cut
1134    
1135 dpavlin 536 sub lookup {
1136 dpavlin 725 my ($what, $database, $input, $key, $having) = @_;
1137    
1138 dpavlin 766 confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1139 dpavlin 725
1140 dpavlin 729 warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1141 dpavlin 725 return unless (defined($lookup->{$database}->{$input}->{$key}));
1142    
1143 dpavlin 736 confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1144 dpavlin 725
1145     my $mfns;
1146     my @having = $having->();
1147    
1148 dpavlin 729 warn "## having = ", dump( @having ) if ($debug > 2);
1149 dpavlin 725
1150     foreach my $h ( @having ) {
1151     if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1152 dpavlin 752 warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1153 dpavlin 725 $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1154     }
1155 dpavlin 536 }
1156 dpavlin 725
1157     return unless ($mfns);
1158    
1159     my @mfns = sort keys %$mfns;
1160    
1161 dpavlin 750 warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1162 dpavlin 725
1163     my $old_rec = $rec;
1164     my @out;
1165    
1166     foreach my $mfn (@mfns) {
1167 dpavlin 736 $rec = $load_row_coderef->( $database, $input, $mfn );
1168 dpavlin 725
1169 dpavlin 752 warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1170 dpavlin 725
1171     my @vals = $what->();
1172    
1173     push @out, ( @vals );
1174    
1175 dpavlin 752 warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1176 dpavlin 725 }
1177    
1178     # if (ref($lookup->{$k}) eq 'ARRAY') {
1179     # return @{ $lookup->{$k} };
1180     # } else {
1181     # return $lookup->{$k};
1182     # }
1183    
1184     $rec = $old_rec;
1185    
1186 dpavlin 750 warn "## lookup returns = ", dump(@out), $/ if ($debug);
1187 dpavlin 725
1188 dpavlin 740 if ($#out == 0) {
1189     return $out[0];
1190     } else {
1191     return @out;
1192     }
1193 dpavlin 13 }
1194    
1195 dpavlin 707 =head2 save_into_lookup
1196    
1197 dpavlin 725 Save value into lookup. It associates current database, input
1198     and specific keys with one or more values which will be
1199     associated over MFN.
1200 dpavlin 707
1201 dpavlin 725 MFN will be extracted from first occurence current of field 000
1202     in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1203    
1204     my $nr = save_into_lookup($database,$input,$key,sub {
1205 dpavlin 707 # code which produce one or more values
1206     });
1207    
1208 dpavlin 725 It returns number of items saved.
1209 dpavlin 707
1210 dpavlin 725 This function shouldn't be called directly, it's called from code created by
1211     L<WebPAC::Parser>.
1212    
1213 dpavlin 707 =cut
1214    
1215     sub save_into_lookup {
1216 dpavlin 712 my ($database,$input,$key,$coderef) = @_;
1217     die "save_into_lookup needs database" unless defined($database);
1218     die "save_into_lookup needs input" unless defined($input);
1219     die "save_into_lookup needs key" unless defined($key);
1220 dpavlin 707 die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1221 dpavlin 725
1222 dpavlin 729 warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1223 dpavlin 725
1224     my $mfn =
1225     defined($rec->{'000'}->[0]) ? $rec->{'000'}->[0] :
1226     defined($config->{_mfn}) ? $config->{_mfn} :
1227     die "mfn not defined or zero";
1228    
1229     my $nr = 0;
1230    
1231 dpavlin 707 foreach my $v ( $coderef->() ) {
1232 dpavlin 712 $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1233 dpavlin 721 warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1234 dpavlin 725 $nr++;
1235 dpavlin 707 }
1236 dpavlin 725
1237     return $nr;
1238 dpavlin 707 }
1239    
1240 dpavlin 595 =head2 config
1241    
1242     Consult config values stored in C<config.yml>
1243    
1244     # return database code (key under databases in yaml)
1245     $database_code = config(); # use _ from hash
1246     $database_name = config('name');
1247     $database_input_name = config('input name');
1248    
1249     Up to three levels are supported.
1250    
1251     =cut
1252    
1253     sub config {
1254     return unless ($config);
1255    
1256     my $p = shift;
1257    
1258     $p ||= '';
1259    
1260     my $v;
1261    
1262     warn "### getting config($p)\n" if ($debug > 1);
1263    
1264     my @p = split(/\s+/,$p);
1265     if ($#p < 0) {
1266     $v = $config->{ '_' }; # special, database code
1267     } else {
1268    
1269     my $c = dclone( $config );
1270    
1271     foreach my $k (@p) {
1272     warn "### k: $k c = ",dump($c),$/ if ($debug > 1);
1273     if (ref($c) eq 'ARRAY') {
1274     $c = shift @$c;
1275     warn "config($p) taking first occurence of '$k', probably not what you wanted!\n";
1276     last;
1277     }
1278    
1279     if (! defined($c->{$k}) ) {
1280     $c = undef;
1281     last;
1282     } else {
1283     $c = $c->{$k};
1284     }
1285     }
1286     $v = $c if ($c);
1287    
1288     }
1289    
1290     warn "## config( '$p' ) = ",dump( $v ),$/ if ($v && $debug);
1291     warn "config( '$p' ) is empty\n" if (! $v);
1292    
1293     return $v;
1294     }
1295    
1296     =head2 id
1297    
1298     Returns unique id of this record
1299    
1300     $id = id();
1301    
1302     Returns C<42/2> for 2nd occurence of MFN 42.
1303    
1304     =cut
1305    
1306     sub id {
1307     my $mfn = $config->{_mfn} || die "no _mfn in config data";
1308     return $mfn . $#{$marc_record} ? $#{$marc_record} + 1 : '';
1309     }
1310    
1311 dpavlin 536 =head2 join_with
1312 dpavlin 13
1313 dpavlin 536 Joins walues with some delimiter
1314 dpavlin 10
1315 dpavlin 536 $v = join_with(", ", @v);
1316 dpavlin 10
1317 dpavlin 536 =cut
1318 dpavlin 10
1319 dpavlin 536 sub join_with {
1320     my $d = shift;
1321 dpavlin 586 warn "### join_with('$d',",dump(@_),")\n" if ($debug > 2);
1322 dpavlin 583 my $v = join($d, grep { defined($_) && $_ ne '' } @_);
1323     return '' unless defined($v);
1324     return $v;
1325 dpavlin 536 }
1326 dpavlin 10
1327 dpavlin 562 =head2 split_rec_on
1328    
1329     Split record subfield on some regex and take one of parts out
1330    
1331     $a_before_semi_column =
1332     split_rec_on('200','a', /\s*;\s*/, $part);
1333    
1334     C<$part> is optional number of element. First element is
1335     B<1>, not 0!
1336    
1337     If there is no C<$part> parameter or C<$part> is 0, this function will
1338     return all values produced by splitting.
1339    
1340     =cut
1341    
1342     sub split_rec_on {
1343     die "split_rec_on need (fld,sf,regex[,part]" if ($#_ < 2);
1344    
1345     my ($fld, $sf, $regex, $part) = @_;
1346 dpavlin 568 warn "### regex ", ref($regex), $regex, $/ if ($debug > 2);
1347 dpavlin 562
1348     my @r = rec( $fld, $sf );
1349     my $v = shift @r;
1350 dpavlin 568 warn "### first rec($fld,$sf) = ",dump($v),$/ if ($debug > 2);
1351 dpavlin 562
1352 dpavlin 604 return '' if ( ! defined($v) || $v =~ /^\s*$/);
1353 dpavlin 566
1354 dpavlin 562 my @s = split( $regex, $v );
1355 dpavlin 568 warn "## split_rec_on($fld,$sf,$regex,$part) = ",dump(@s),$/ if ($debug > 1);
1356 dpavlin 566 if ($part && $part > 0) {
1357 dpavlin 562 return $s[ $part - 1 ];
1358     } else {
1359 dpavlin 571 return @s;
1360 dpavlin 562 }
1361     }
1362    
1363 dpavlin 785 my $hash;
1364    
1365     =head2 set
1366    
1367     set( key => 'value' );
1368    
1369     =cut
1370    
1371     sub set {
1372     my ($k,$v) = @_;
1373 dpavlin 810 warn "## set ( $k => ", dump($v), " )", $/ if ( $debug );
1374 dpavlin 785 $hash->{$k} = $v;
1375     };
1376    
1377     =head2 get
1378    
1379     get( 'key' );
1380    
1381     =cut
1382    
1383     sub get {
1384     my $k = shift || return;
1385     my $v = $hash->{$k};
1386 dpavlin 810 warn "## get $k = ", dump( $v ), $/ if ( $debug );
1387 dpavlin 785 return $v;
1388     }
1389    
1390 dpavlin 791 =head2 count
1391 dpavlin 785
1392 dpavlin 791 if ( count( @result ) == 1 ) {
1393     # do something if only 1 result is there
1394     }
1395    
1396     =cut
1397    
1398     sub count {
1399 dpavlin 810 warn "## count ",dump(@_),$/ if ( $debug );
1400 dpavlin 791 return @_ . '';
1401     }
1402    
1403 dpavlin 536 # END
1404     1;

  ViewVC Help
Powered by ViewVC 1.1.26