/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1040 - (hide annotations)
Mon Nov 12 12:18:55 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 22724 byte(s)
 r1618@llin:  dpavlin | 2007-11-12 13:18:54 +0100
 cleanup and fixes

1 dpavlin 1021 package WebPAC::Normalize::MARC;
2     use Exporter 'import';
3 dpavlin 1036 our @EXPORT = qw/
4     marc marc_indicators marc_repeatable_subfield
5     marc_compose marc_leader marc_fixed
6     marc_duplicate marc_remove marc_count
7     marc_original_order
8 dpavlin 1021 marc_template
9     /;
10    
11     use strict;
12     use warnings;
13    
14 dpavlin 1036 use Storable qw/dclone/;
15 dpavlin 1021 use Data::Dump qw/dump/;
16     use Carp qw/confess/;
17    
18     use WebPAC::Normalize;
19    
20 dpavlin 1038 our $debug = 0;
21 dpavlin 1028
22 dpavlin 1036 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
23     my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
24    
25     our $rec;
26    
27 dpavlin 1021 =head1 NAME
28    
29     WebPAC::Normalize::MARC - create MARC/ISO2709 records
30    
31     =cut
32    
33     =head1 FUNCTIONS
34    
35     =head2 marc_template
36    
37     marc_template(
38     from => 225, to => 440,
39     subfields_rename => [
40     'a' => 'a',
41     'x' => 'x',
42     'v' => 'v',
43     'h' => 'n',
44     'i' => 'p',
45     'w' => 'v',
46     ],
47     isis_template => [
48     'a ; |v. |i',
49     'a. |i ; |w',
50     ],
51     marc_template => [
52     'a, |x ; |v. |n, |p ; |v',
53     'a ; |v. |p ; |v',
54     ],
55     );
56    
57 dpavlin 1026 Returns number of records produced.
58    
59 dpavlin 1021 =cut
60    
61 dpavlin 1039 my $created_with_marc_template;
62    
63 dpavlin 1021 sub marc_template {
64     my $args = {@_};
65 dpavlin 1028 warn "## marc_template(",dump($args),")",$/ if $debug;
66 dpavlin 1021
67     foreach ( qw/subfields_rename isis_template marc_template/ ) {
68 dpavlin 1028 # warn "ref($_) = ",ref($args->{$_}) if $debug;
69 dpavlin 1021 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
70     }
71    
72     die "marc_template needs isis_template or marc_template"
73     if ! defined $args->{isis_template} && ! defined $args->{marc_template};
74    
75     my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
76     my $r = $rec->{ $args->{from} } || return;
77     die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
78    
79     my @subfields_rename = @{ $args->{subfields_rename} };
80 dpavlin 1028 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
81 dpavlin 1021
82     confess "need mapping in pairs for subfields_rename"
83     if $#subfields_rename % 2 != 1;
84    
85 dpavlin 1029 my ( $subfields_rename, $from_subfields );
86     our $to_subfields = {};
87 dpavlin 1021 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
88     my ( $f, $t ) = (
89     $from_subfields->{ $from }++,
90     $to_subfields->{ $to }++
91     );
92     $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
93     }
94 dpavlin 1028 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
95     warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
96     warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
97 dpavlin 1021
98 dpavlin 1025 our $_template;
99 dpavlin 1021
100 dpavlin 1026 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
101     $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
102 dpavlin 1025
103 dpavlin 1021 my @marc_out;
104    
105 dpavlin 1023 sub _parse_template {
106 dpavlin 1024 my ( $name, $templates ) = @_;
107 dpavlin 1021
108 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
109 dpavlin 1025
110 dpavlin 1023 foreach my $template ( @{ $templates } ) {
111     our $count = {};
112 dpavlin 1025 our @order = ();
113 dpavlin 1023 sub my_count {
114     my $sf = shift;
115     my $nr = $count->{$sf}++;
116 dpavlin 1025 push @order, [ $sf, $nr ];
117 dpavlin 1023 return $sf . $nr;
118     }
119     my $pos_template = $template;
120     $pos_template =~ s/($fields_re)/my_count($1)/ge;
121     my $count_key = dump( $count );
122 dpavlin 1028 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
123 dpavlin 1024 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
124 dpavlin 1025 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
125 dpavlin 1021 }
126 dpavlin 1028 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
127 dpavlin 1021 }
128    
129 dpavlin 1024 _parse_template( 'marc', $args->{marc_template} );
130     _parse_template( 'isis', $args->{isis_template} );
131 dpavlin 1028 warn "### _template = ",dump( $_template ),$/ if $debug;
132 dpavlin 1023
133 dpavlin 1021 my $m;
134    
135     foreach my $r ( @{ $rec->{ $args->{from} } } ) {
136    
137     my $i1 = $r->{i1} || ' ';
138     my $i2 = $r->{i2} || ' ';
139 dpavlin 1039 my $to = $args->{to};
140     $m = [ $to, $i1, $i2 ];
141 dpavlin 1021
142 dpavlin 1039 $created_with_marc_template->{ $to }++;
143    
144 dpavlin 1028 warn "### r = ",dump( $r ),$/ if $debug;
145 dpavlin 1021
146 dpavlin 1026 my ( $from_mapping, $to_mapping, $from_count, $to_count );
147 dpavlin 1029 foreach my $from_sf ( keys %{$r} ) {
148 dpavlin 1021 # skip everything which isn't one char subfield (e.g. 'subfields')
149 dpavlin 1029 next unless $from_sf =~ m/^\w$/;
150     my $from_nr = $from_count->{$from_sf}++;
151     my $rename_to = $subfields_rename->{ $from_sf } ||
152     die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
153     my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
154     $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
155 dpavlin 1021
156 dpavlin 1029 my $to_nr2 = $to_count->{ $to_sf }++;
157     $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
158    
159     warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
160 dpavlin 1021 }
161    
162 dpavlin 1029 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
163 dpavlin 1021
164 dpavlin 1025 my $count_key = {
165     from => dump( $from_count ),
166     to => dump( $to_count),
167     };
168 dpavlin 1021
169 dpavlin 1028 warn "### count_key = ",dump( $count_key ),$/ if $debug;
170 dpavlin 1021
171 dpavlin 1025 my $processed_templates = 0;
172 dpavlin 1021
173 dpavlin 1025 # this defines order of traversal
174     foreach ( qw/isis:from marc:to/ ) {
175     my ($name,$count_name) = split(/:/);
176 dpavlin 1021
177 dpavlin 1025 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
178 dpavlin 1021
179 dpavlin 1025 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
180     $processed_templates++;
181 dpavlin 1021
182 dpavlin 1031 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
183 dpavlin 1025
184     our $fill_in = {};
185    
186     my @templates = split(/\|/, $template );
187 dpavlin 1031 @templates = ( $template ) unless @templates;
188 dpavlin 1025
189 dpavlin 1035 warn "### templates = ",dump( @templates ),$/ if $debug;
190 dpavlin 1031
191 dpavlin 1025 foreach my $sf ( @templates ) {
192     sub fill_in {
193 dpavlin 1031 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
194     warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
195 dpavlin 1028 my ( $from_sf, $from_nr );
196 dpavlin 1025 if ( $name eq 'marc' ) {
197 dpavlin 1029 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), " form record ",dump( $r ) unless defined $to_mapping->{$sf}->[$nr];
198 dpavlin 1026 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
199 dpavlin 1025 } else {
200     ( $from_sf, $from_nr ) = ( $sf, $nr );
201     }
202     my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
203     if ( ref( $v ) eq 'ARRAY' ) {
204 dpavlin 1031 $v = $pre . $v->[$from_nr] . $post;
205 dpavlin 1025 } elsif ( $from_nr == 0 ) {
206 dpavlin 1031 $v = $pre . $v . $post;
207 dpavlin 1025 } else {
208     die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
209     }
210 dpavlin 1031 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
211     $fill_in->{$sf}->[$nr] = $v;
212     return $v;
213 dpavlin 1021 }
214 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
215 dpavlin 1031 warn "#### $sf <<<< $fields_re\n" if $debug;
216     $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
217 dpavlin 1028 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
218 dpavlin 1021 }
219    
220 dpavlin 1028 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
221 dpavlin 1021
222 dpavlin 1025 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
223     my ( $sf, $nr ) = @$sf;
224     my $v = $fill_in->{$sf}->[$nr] || die "can't find fill_in $sf/$nr";
225 dpavlin 1026 if ( $name eq 'isis') {
226     ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
227     }
228 dpavlin 1028 warn "++ $sf/$nr |$v|\n" if $debug;
229 dpavlin 1025 push @$m, ( $sf, $v );
230     }
231 dpavlin 1021
232 dpavlin 1028 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
233 dpavlin 1025
234     push @marc_out, $m;
235 dpavlin 1029
236     last;
237 dpavlin 1025 }
238    
239     die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
240 dpavlin 1028 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
241 dpavlin 1021 }
242    
243    
244 dpavlin 1026 my $recs = 0;
245 dpavlin 1025
246 dpavlin 1021 foreach my $marc ( @marc_out ) {
247 dpavlin 1028 warn "+++ ",dump( $marc ),$/ if $debug;
248 dpavlin 1036 _marc_push( $marc );
249 dpavlin 1026 $recs++;
250 dpavlin 1021 }
251 dpavlin 1026
252 dpavlin 1028 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
253    
254     return $recs;
255 dpavlin 1021 }
256    
257 dpavlin 1036 =head2 marc_leader
258    
259     Setup fields within MARC leader or get leader
260    
261     marc_leader('05','c');
262     my $leader = marc_leader();
263    
264     =cut
265    
266     sub marc_leader {
267     my ($offset,$value) = @_;
268    
269     if ($offset) {
270     $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
271     } else {
272    
273     if (defined($marc_leader)) {
274     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
275     return $marc_leader->[ $marc_record_offset ];
276     } else {
277     return;
278     }
279     }
280     }
281    
282     =head2 marc_fixed
283    
284     Create control/indentifier fields with values in fixed positions
285    
286     marc_fixed('008', 00, '070402');
287     marc_fixed('008', 39, '|');
288    
289     Positions not specified will be filled with spaces (C<0x20>).
290    
291     There will be no effort to extend last specified value to full length of
292     field in standard.
293    
294     =cut
295    
296     sub marc_fixed {
297     my ($f, $pos, $val) = @_;
298     die "need marc(field, position, value)" unless defined($f) && defined($pos);
299    
300     confess "need val" unless defined $val;
301    
302     my $update = 0;
303    
304     map {
305     if ($_->[0] eq $f) {
306     my $old = $_->[1];
307     if (length($old) <= $pos) {
308     $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
309     warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
310     } else {
311     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
312     warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
313     }
314     $update++;
315     }
316     } @{ $marc_record->[ $marc_record_offset ] };
317    
318     if (! $update) {
319     my $v = ' ' x $pos . $val;
320     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
321     warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
322     }
323     }
324    
325     =head2 marc
326    
327     Save value for MARC field
328    
329     marc('900','a', rec('200','a') );
330     marc('001', rec('000') );
331    
332     =cut
333    
334     sub marc {
335     my $f = shift or die "marc needs field";
336     die "marc field must be numer" unless ($f =~ /^\d+$/);
337    
338     my $sf;
339     if ($f >= 10) {
340     $sf = shift or die "marc needs subfield";
341     }
342    
343     foreach (@_) {
344     my $v = $_; # make var read-write for Encode
345     next unless (defined($v) && $v !~ /^\s*$/);
346     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
347     if (defined $sf) {
348     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
349     } else {
350     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
351     }
352     }
353     }
354    
355     =head2 marc_repeatable_subfield
356    
357     Save values for MARC repetable subfield
358    
359     marc_repeatable_subfield('910', 'z', rec('909') );
360    
361     =cut
362    
363     sub marc_repeatable_subfield {
364     my ($f,$sf) = @_;
365     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
366     $marc_repeatable_subfield->{ $f . $sf }++;
367     marc(@_);
368     }
369    
370     =head2 marc_indicators
371    
372     Set both indicators for MARC field
373    
374     marc_indicators('900', ' ', 1);
375    
376     Any indicator value other than C<0-9> will be treated as undefined.
377    
378     =cut
379    
380     sub marc_indicators {
381     my $f = shift || die "marc_indicators need field!\n";
382     my ($i1,$i2) = @_;
383     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
384     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
385    
386     $i1 = ' ' if ($i1 !~ /^\d$/);
387     $i2 = ' ' if ($i2 !~ /^\d$/);
388     @{ $marc_indicators->{$f} } = ($i1,$i2);
389     }
390    
391     =head2 marc_compose
392    
393     Save values for each MARC subfield explicitly
394    
395     marc_compose('900',
396     'a', rec('200','a')
397     'b', rec('201','a')
398     'a', rec('200','b')
399     'c', rec('200','c')
400     );
401    
402     If you specify C<+> for subfield, value will be appended
403     to previous defined subfield.
404    
405     =cut
406    
407     sub marc_compose {
408     my $f = shift or die "marc_compose needs field";
409     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
410    
411     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
412     my $m = [ $f, $i1, $i2 ];
413    
414     warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
415    
416     if ($#_ % 2 != 1) {
417     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
418     }
419    
420     while (@_) {
421     my $sf = shift;
422     my $v = shift;
423    
424     next unless (defined($v) && $v !~ /^\s*$/);
425     warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
426     if ($sf ne '+') {
427     push @$m, ( $sf, $v );
428     } else {
429     $m->[ $#$m ] .= $v;
430     }
431     }
432    
433     warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
434    
435     push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
436     }
437    
438     =head2 marc_duplicate
439    
440     Generate copy of current MARC record and continue working on copy
441    
442     marc_duplicate();
443    
444     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
445     C<< _get_marc_fields( offset => 42 ) >>.
446    
447     =cut
448    
449     sub marc_duplicate {
450     my $m = $marc_record->[ -1 ];
451     die "can't duplicate record which isn't defined" unless ($m);
452     push @{ $marc_record }, dclone( $m );
453     push @{ $marc_leader }, dclone( marc_leader() );
454     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
455     $marc_record_offset = $#{ $marc_record };
456     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
457    
458     }
459    
460     =head2 marc_remove
461    
462     Remove some field or subfield from MARC record.
463    
464     marc_remove('200');
465     marc_remove('200','a');
466    
467     This will erase field C<200> or C<200^a> from current MARC record.
468    
469     marc_remove('*');
470    
471     Will remove all fields in current MARC record.
472    
473     This is useful after calling C<marc_duplicate> or on it's own (but, you
474     should probably just remove that subfield definition if you are not
475     using C<marc_duplicate>).
476    
477     FIXME: support fields < 10.
478    
479     =cut
480    
481     sub marc_remove {
482     my ($f, $sf) = @_;
483    
484     die "marc_remove needs record number" unless defined($f);
485    
486     my $marc = $marc_record->[ $marc_record_offset ];
487    
488     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
489    
490     if ($f eq '*') {
491    
492     delete( $marc_record->[ $marc_record_offset ] );
493     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
494    
495     } else {
496    
497     my $i = 0;
498     foreach ( 0 .. $#{ $marc } ) {
499     last unless (defined $marc->[$i]);
500     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
501     if ($marc->[$i]->[0] eq $f) {
502     if (! defined $sf) {
503     # remove whole field
504     splice @$marc, $i, 1;
505     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
506     $i--;
507     } else {
508     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
509     my $o = ($j * 2) + 3;
510     if ($marc->[$i]->[$o] eq $sf) {
511     # remove subfield
512     splice @{$marc->[$i]}, $o, 2;
513     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
514     # is record now empty?
515     if ($#{ $marc->[$i] } == 2) {
516     splice @$marc, $i, 1;
517     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
518     $i--;
519     };
520     }
521     }
522     }
523     }
524     $i++;
525     }
526    
527     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
528    
529     $marc_record->[ $marc_record_offset ] = $marc;
530     }
531    
532     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
533     }
534    
535     =head2 marc_original_order
536    
537     Copy all subfields preserving original order to marc field.
538    
539     marc_original_order( marc_field_number, original_input_field_number );
540    
541     Please note that field numbers are consistent with other commands (marc
542     field number first), but somewhat counter-intuitive (destination and then
543     source).
544    
545     You might want to use this command if you are just renaming subfields or
546     using pre-processing modify_record in C<config.yml> and don't need any
547     post-processing or want to preserve order of original subfields.
548    
549    
550     =cut
551    
552     sub marc_original_order {
553    
554     my ($to, $from) = @_;
555     die "marc_original_order needs from and to fields\n" unless ($from && $to);
556    
557     return unless defined($rec->{$from});
558    
559     my $r = $rec->{$from};
560     die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
561    
562     my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
563     warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
564    
565     foreach my $d (@$r) {
566    
567     if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
568     warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
569     next;
570     }
571    
572     my @sfs = @{ $d->{subfields} };
573    
574     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
575    
576     warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
577    
578     my $m = [ $to, $i1, $i2 ];
579    
580     while (my $sf = shift @sfs) {
581    
582     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
583     my $offset = shift @sfs;
584     die "corrupted sufields specification for field $from\n" unless defined($offset);
585    
586     my $v;
587     if (ref($d->{$sf}) eq 'ARRAY') {
588     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
589     } elsif ($offset == 0) {
590     $v = $d->{$sf};
591     } else {
592     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
593     }
594     push @$m, ( $sf, $v ) if (defined($v));
595     }
596    
597     if ($#{$m} > 2) {
598     push @{ $marc_record->[ $marc_record_offset ] }, $m;
599     }
600     }
601    
602     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
603     }
604    
605    
606     =head2 marc_count
607    
608     Return number of MARC records created using L</marc_duplicate>.
609    
610     print "created ", marc_count(), " records";
611    
612     =cut
613    
614     sub marc_count {
615     return $#{ $marc_record };
616     }
617    
618 dpavlin 1039 =head1 PRIVATE FUNCTIONS
619    
620 dpavlin 1036 =head2 _marc_push
621    
622     _marc_push( $marc );
623    
624     =cut
625    
626     sub _marc_push {
627     my $marc = shift || die "no marc?";
628     push @{ $marc_record->[ $marc_record_offset ] }, $marc;
629     }
630    
631     =head2 _clean
632    
633     Clean internal structures
634    
635     =cut
636    
637     sub _clean {
638 dpavlin 1039 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader, $created_with_marc_template) = ();
639 dpavlin 1036 ($marc_record_offset, $marc_fetch_offset) = (0,0);
640     }
641    
642    
643     =head2 _get_marc_fields
644    
645     Get all fields defined by calls to C<marc>
646    
647     $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
648    
649     We are using I<magic> which detect repeatable fields only from
650     sequence of field/subfield data generated by normalization.
651    
652 dpavlin 1039 This magic is disabled for all records created with C<marc_template>.
653    
654 dpavlin 1036 Repeatable field is created when there is second occurence of same subfield or
655     if any of indicators are different.
656    
657     This is sane for most cases. Something like:
658    
659     900a-1 900b-1 900c-1
660     900a-2 900b-2
661     900a-3
662    
663     will be created from any combination of:
664    
665     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
666    
667     and following rules:
668    
669     marc('900','a', rec('200','a') );
670     marc('900','b', rec('200','b') );
671     marc('900','c', rec('200','c') );
672    
673     which might not be what you have in mind. If you need repeatable subfield,
674     define it using C<marc_repeatable_subfield> like this:
675    
676     marc_repeatable_subfield('900','a');
677     marc('900','a', rec('200','a') );
678     marc('900','b', rec('200','b') );
679     marc('900','c', rec('200','c') );
680    
681     will create:
682    
683     900a-1 900a-2 900a-3 900b-1 900c-1
684     900b-2
685    
686     There is also support for returning next or specific using:
687    
688     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
689     # do something with $mf
690     }
691    
692     will always return fields from next MARC record or
693    
694     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
695    
696     will return 42th copy record (if it exists).
697    
698     =cut
699    
700     my $fetch_pos;
701    
702     sub _get_marc_fields {
703    
704     my $arg = {@_};
705     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
706     $fetch_pos = $marc_fetch_offset;
707     if ($arg->{offset}) {
708     $fetch_pos = $arg->{offset};
709     } elsif($arg->{fetch_next}) {
710     $marc_fetch_offset++;
711     }
712    
713     return if (! $marc_record || ref($marc_record) ne 'ARRAY');
714    
715     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
716    
717     my $marc_rec = $marc_record->[ $fetch_pos ];
718    
719     warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
720    
721     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
722    
723     # first, sort all existing fields
724     # XXX might not be needed, but modern perl might randomize elements in hash
725 dpavlin 1039 # my @sorted_marc_record = sort {
726     # $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
727     # } @{ $marc_rec };
728 dpavlin 1036
729 dpavlin 1040 my @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
730 dpavlin 1036
731     # output marc fields
732     my @m;
733    
734     # count unique field-subfields (used for offset when walking to next subfield)
735     my $u;
736     map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
737    
738     if ($debug) {
739     warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
740     warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
741     warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
742     warn "## subfield count = ", dump( $u ), $/;
743     }
744    
745     my $len = $#sorted_marc_record;
746     my $visited;
747     my $i = 0;
748     my $field;
749    
750 dpavlin 1040 warn "## created_with_marc_template = ",dump( $created_with_marc_template ) if $debug;
751    
752 dpavlin 1036 foreach ( 0 .. $len ) {
753    
754     # find next element which isn't visited
755     while ($visited->{$i}) {
756     $i = ($i + 1) % ($len + 1);
757     }
758    
759     # mark it visited
760     $visited->{$i}++;
761    
762     my $row = dclone( $sorted_marc_record[$i] );
763    
764 dpavlin 1039 if ( $created_with_marc_template->{ $row->[0] } ) {
765     push @m, $row;
766     warn "## copied marc_template created ", dump( $row ),$/ if $debug;
767     next;
768     }
769 dpavlin 1040
770 dpavlin 1036 # field and subfield which is key for
771     # marc_repeatable_subfield and u
772     my $fsf = $row->[0] . ( $row->[3] || '' );
773    
774     if ($debug > 1) {
775    
776     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
777     print "### this [$i]: ", dump( $row ),$/;
778     print "### sf: ", $row->[3], " vs ", $field->[3],
779     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
780     if ($#$field >= 0);
781    
782     }
783    
784     # if field exists
785     if ( $#$field >= 0 ) {
786     if (
787     $row->[0] ne $field->[0] || # field
788     $row->[1] ne $field->[1] || # i1
789     $row->[2] ne $field->[2] # i2
790     ) {
791     push @m, $field;
792     warn "## saved/1 ", dump( $field ),$/ if ($debug);
793     $field = $row;
794    
795     } elsif (
796     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
797     ||
798     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
799     ! $marc_repeatable_subfield->{ $fsf }
800     )
801     ) {
802     push @m, $field;
803     warn "## saved/2 ", dump( $field ),$/ if ($debug);
804     $field = $row;
805    
806     } else {
807     # append new subfields to existing field
808     push @$field, ( $row->[3], $row->[4] );
809     }
810     } else {
811     # insert first field
812     $field = $row;
813     }
814    
815     if (! $marc_repeatable_subfield->{ $fsf }) {
816     # make step to next subfield
817     $i = ($i + $u->{ $fsf } ) % ($len + 1);
818     }
819     }
820    
821     if ($#$field >= 0) {
822     push @m, $field;
823     warn "## saved/3 ", dump( $field ),$/ if ($debug);
824     }
825    
826     return \@m;
827     }
828    
829     =head2 _get_marc_leader
830    
831     Return leader from currently fetched record by L</_get_marc_fields>
832    
833     print WebPAC::Normalize::MARC::_get_marc_leader();
834    
835     =cut
836    
837     sub _get_marc_leader {
838     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
839     return $marc_leader->[ $fetch_pos ];
840     }
841    
842     =head2 _created_marc_records
843    
844     my $nr_records = _created_marc_records;
845    
846     =cut
847    
848     sub _created_marc_records {
849     return $#{ $marc_record } + 1 if $marc_record;
850     }
851    
852 dpavlin 1021 1;

  ViewVC Help
Powered by ViewVC 1.1.26