/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1044 - (hide annotations)
Mon Nov 12 14:18:49 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 22804 byte(s)
fix variable scoping

1 dpavlin 1021 package WebPAC::Normalize::MARC;
2     use Exporter 'import';
3 dpavlin 1036 our @EXPORT = qw/
4     marc marc_indicators marc_repeatable_subfield
5     marc_compose marc_leader marc_fixed
6     marc_duplicate marc_remove marc_count
7     marc_original_order
8 dpavlin 1021 marc_template
9     /;
10    
11     use strict;
12     use warnings;
13    
14 dpavlin 1036 use Storable qw/dclone/;
15 dpavlin 1021 use Data::Dump qw/dump/;
16     use Carp qw/confess/;
17    
18     use WebPAC::Normalize;
19    
20 dpavlin 1038 our $debug = 0;
21 dpavlin 1028
22 dpavlin 1036 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
23     my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
24    
25     our $rec;
26    
27 dpavlin 1021 =head1 NAME
28    
29     WebPAC::Normalize::MARC - create MARC/ISO2709 records
30    
31     =cut
32    
33     =head1 FUNCTIONS
34    
35     =head2 marc_template
36    
37     marc_template(
38     from => 225, to => 440,
39     subfields_rename => [
40     'a' => 'a',
41     'x' => 'x',
42     'v' => 'v',
43     'h' => 'n',
44     'i' => 'p',
45     'w' => 'v',
46     ],
47     isis_template => [
48     'a ; |v. |i',
49     'a. |i ; |w',
50     ],
51     marc_template => [
52     'a, |x ; |v. |n, |p ; |v',
53     'a ; |v. |p ; |v',
54     ],
55     );
56    
57 dpavlin 1026 Returns number of records produced.
58    
59 dpavlin 1021 =cut
60    
61 dpavlin 1039 my $created_with_marc_template;
62    
63 dpavlin 1021 sub marc_template {
64     my $args = {@_};
65 dpavlin 1028 warn "## marc_template(",dump($args),")",$/ if $debug;
66 dpavlin 1021
67     foreach ( qw/subfields_rename isis_template marc_template/ ) {
68 dpavlin 1028 # warn "ref($_) = ",ref($args->{$_}) if $debug;
69 dpavlin 1021 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
70     }
71    
72     die "marc_template needs isis_template or marc_template"
73     if ! defined $args->{isis_template} && ! defined $args->{marc_template};
74    
75     my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
76     my $r = $rec->{ $args->{from} } || return;
77     die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
78    
79     my @subfields_rename = @{ $args->{subfields_rename} };
80 dpavlin 1028 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
81 dpavlin 1021
82     confess "need mapping in pairs for subfields_rename"
83     if $#subfields_rename % 2 != 1;
84    
85 dpavlin 1029 my ( $subfields_rename, $from_subfields );
86     our $to_subfields = {};
87 dpavlin 1021 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
88     my ( $f, $t ) = (
89     $from_subfields->{ $from }++,
90     $to_subfields->{ $to }++
91     );
92     $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
93     }
94 dpavlin 1028 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
95     warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
96     warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
97 dpavlin 1021
98 dpavlin 1025 our $_template;
99 dpavlin 1021
100 dpavlin 1026 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
101     $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
102 dpavlin 1025
103 dpavlin 1021 my @marc_out;
104    
105 dpavlin 1023 sub _parse_template {
106 dpavlin 1024 my ( $name, $templates ) = @_;
107 dpavlin 1021
108 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
109 dpavlin 1025
110 dpavlin 1023 foreach my $template ( @{ $templates } ) {
111     our $count = {};
112 dpavlin 1025 our @order = ();
113 dpavlin 1023 sub my_count {
114     my $sf = shift;
115     my $nr = $count->{$sf}++;
116 dpavlin 1025 push @order, [ $sf, $nr ];
117 dpavlin 1023 return $sf . $nr;
118     }
119     my $pos_template = $template;
120     $pos_template =~ s/($fields_re)/my_count($1)/ge;
121     my $count_key = dump( $count );
122 dpavlin 1028 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
123 dpavlin 1024 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
124 dpavlin 1025 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
125 dpavlin 1021 }
126 dpavlin 1028 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
127 dpavlin 1021 }
128    
129 dpavlin 1024 _parse_template( 'marc', $args->{marc_template} );
130     _parse_template( 'isis', $args->{isis_template} );
131 dpavlin 1028 warn "### _template = ",dump( $_template ),$/ if $debug;
132 dpavlin 1023
133 dpavlin 1021 my $m;
134    
135 dpavlin 1044 our $from_rec = $rec->{ $args->{from} };
136 dpavlin 1021
137 dpavlin 1044 foreach my $r ( @$from_rec ) {
138    
139 dpavlin 1021 my $i1 = $r->{i1} || ' ';
140     my $i2 = $r->{i2} || ' ';
141 dpavlin 1039 my $to = $args->{to};
142     $m = [ $to, $i1, $i2 ];
143 dpavlin 1021
144 dpavlin 1039 $created_with_marc_template->{ $to }++;
145    
146 dpavlin 1028 warn "### r = ",dump( $r ),$/ if $debug;
147 dpavlin 1021
148 dpavlin 1044 my ( $from_mapping, $from_count, $to_count );
149     our $to_mapping;
150 dpavlin 1029 foreach my $from_sf ( keys %{$r} ) {
151 dpavlin 1021 # skip everything which isn't one char subfield (e.g. 'subfields')
152 dpavlin 1029 next unless $from_sf =~ m/^\w$/;
153     my $from_nr = $from_count->{$from_sf}++;
154     my $rename_to = $subfields_rename->{ $from_sf } ||
155     die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
156     my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
157     $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
158 dpavlin 1021
159 dpavlin 1029 my $to_nr2 = $to_count->{ $to_sf }++;
160     $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
161    
162     warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
163 dpavlin 1021 }
164    
165 dpavlin 1029 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
166 dpavlin 1021
167 dpavlin 1025 my $count_key = {
168     from => dump( $from_count ),
169     to => dump( $to_count),
170     };
171 dpavlin 1021
172 dpavlin 1028 warn "### count_key = ",dump( $count_key ),$/ if $debug;
173 dpavlin 1021
174 dpavlin 1025 my $processed_templates = 0;
175 dpavlin 1021
176 dpavlin 1025 # this defines order of traversal
177     foreach ( qw/isis:from marc:to/ ) {
178     my ($name,$count_name) = split(/:/);
179 dpavlin 1021
180 dpavlin 1025 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
181 dpavlin 1021
182 dpavlin 1025 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
183     $processed_templates++;
184 dpavlin 1021
185 dpavlin 1031 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
186 dpavlin 1025
187     our $fill_in = {};
188    
189     my @templates = split(/\|/, $template );
190 dpavlin 1031 @templates = ( $template ) unless @templates;
191 dpavlin 1025
192 dpavlin 1035 warn "### templates = ",dump( @templates ),$/ if $debug;
193 dpavlin 1031
194 dpavlin 1025 foreach my $sf ( @templates ) {
195     sub fill_in {
196 dpavlin 1031 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
197     warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
198 dpavlin 1028 my ( $from_sf, $from_nr );
199 dpavlin 1025 if ( $name eq 'marc' ) {
200 dpavlin 1044 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), "\n>>>> from record ",dump( $r ), "\n>>>> full record = ",dump( $from_rec ) unless defined $to_mapping->{$sf}->[$nr];
201 dpavlin 1026 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
202 dpavlin 1025 } else {
203     ( $from_sf, $from_nr ) = ( $sf, $nr );
204     }
205     my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
206     if ( ref( $v ) eq 'ARRAY' ) {
207 dpavlin 1031 $v = $pre . $v->[$from_nr] . $post;
208 dpavlin 1025 } elsif ( $from_nr == 0 ) {
209 dpavlin 1031 $v = $pre . $v . $post;
210 dpavlin 1025 } else {
211     die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
212     }
213 dpavlin 1031 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
214     $fill_in->{$sf}->[$nr] = $v;
215     return $v;
216 dpavlin 1021 }
217 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
218 dpavlin 1031 warn "#### $sf <<<< $fields_re\n" if $debug;
219     $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
220 dpavlin 1028 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
221 dpavlin 1021 }
222    
223 dpavlin 1028 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
224 dpavlin 1021
225 dpavlin 1025 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
226     my ( $sf, $nr ) = @$sf;
227     my $v = $fill_in->{$sf}->[$nr] || die "can't find fill_in $sf/$nr";
228 dpavlin 1026 if ( $name eq 'isis') {
229     ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
230     }
231 dpavlin 1028 warn "++ $sf/$nr |$v|\n" if $debug;
232 dpavlin 1025 push @$m, ( $sf, $v );
233     }
234 dpavlin 1021
235 dpavlin 1028 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
236 dpavlin 1025
237     push @marc_out, $m;
238 dpavlin 1029
239     last;
240 dpavlin 1025 }
241    
242     die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
243 dpavlin 1028 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
244 dpavlin 1021 }
245    
246    
247 dpavlin 1026 my $recs = 0;
248 dpavlin 1025
249 dpavlin 1021 foreach my $marc ( @marc_out ) {
250 dpavlin 1028 warn "+++ ",dump( $marc ),$/ if $debug;
251 dpavlin 1036 _marc_push( $marc );
252 dpavlin 1026 $recs++;
253 dpavlin 1021 }
254 dpavlin 1026
255 dpavlin 1028 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
256    
257     return $recs;
258 dpavlin 1021 }
259    
260 dpavlin 1036 =head2 marc_leader
261    
262     Setup fields within MARC leader or get leader
263    
264     marc_leader('05','c');
265     my $leader = marc_leader();
266    
267     =cut
268    
269     sub marc_leader {
270     my ($offset,$value) = @_;
271    
272     if ($offset) {
273     $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
274     } else {
275    
276     if (defined($marc_leader)) {
277     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
278     return $marc_leader->[ $marc_record_offset ];
279     } else {
280     return;
281     }
282     }
283     }
284    
285     =head2 marc_fixed
286    
287     Create control/indentifier fields with values in fixed positions
288    
289     marc_fixed('008', 00, '070402');
290     marc_fixed('008', 39, '|');
291    
292     Positions not specified will be filled with spaces (C<0x20>).
293    
294     There will be no effort to extend last specified value to full length of
295     field in standard.
296    
297     =cut
298    
299     sub marc_fixed {
300     my ($f, $pos, $val) = @_;
301     die "need marc(field, position, value)" unless defined($f) && defined($pos);
302    
303     confess "need val" unless defined $val;
304    
305     my $update = 0;
306    
307     map {
308     if ($_->[0] eq $f) {
309     my $old = $_->[1];
310     if (length($old) <= $pos) {
311     $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
312     warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
313     } else {
314     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
315     warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
316     }
317     $update++;
318     }
319     } @{ $marc_record->[ $marc_record_offset ] };
320    
321     if (! $update) {
322     my $v = ' ' x $pos . $val;
323     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
324     warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
325     }
326     }
327    
328     =head2 marc
329    
330     Save value for MARC field
331    
332     marc('900','a', rec('200','a') );
333     marc('001', rec('000') );
334    
335     =cut
336    
337     sub marc {
338     my $f = shift or die "marc needs field";
339     die "marc field must be numer" unless ($f =~ /^\d+$/);
340    
341     my $sf;
342     if ($f >= 10) {
343     $sf = shift or die "marc needs subfield";
344     }
345    
346     foreach (@_) {
347     my $v = $_; # make var read-write for Encode
348     next unless (defined($v) && $v !~ /^\s*$/);
349     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
350     if (defined $sf) {
351     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
352     } else {
353     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
354     }
355     }
356     }
357    
358     =head2 marc_repeatable_subfield
359    
360     Save values for MARC repetable subfield
361    
362     marc_repeatable_subfield('910', 'z', rec('909') );
363    
364     =cut
365    
366     sub marc_repeatable_subfield {
367     my ($f,$sf) = @_;
368     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
369     $marc_repeatable_subfield->{ $f . $sf }++;
370     marc(@_);
371     }
372    
373     =head2 marc_indicators
374    
375     Set both indicators for MARC field
376    
377     marc_indicators('900', ' ', 1);
378    
379     Any indicator value other than C<0-9> will be treated as undefined.
380    
381     =cut
382    
383     sub marc_indicators {
384     my $f = shift || die "marc_indicators need field!\n";
385     my ($i1,$i2) = @_;
386     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
387     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
388    
389     $i1 = ' ' if ($i1 !~ /^\d$/);
390     $i2 = ' ' if ($i2 !~ /^\d$/);
391     @{ $marc_indicators->{$f} } = ($i1,$i2);
392     }
393    
394     =head2 marc_compose
395    
396     Save values for each MARC subfield explicitly
397    
398     marc_compose('900',
399     'a', rec('200','a')
400     'b', rec('201','a')
401     'a', rec('200','b')
402     'c', rec('200','c')
403     );
404    
405     If you specify C<+> for subfield, value will be appended
406     to previous defined subfield.
407    
408     =cut
409    
410     sub marc_compose {
411     my $f = shift or die "marc_compose needs field";
412     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
413    
414     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
415     my $m = [ $f, $i1, $i2 ];
416    
417     warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
418    
419     if ($#_ % 2 != 1) {
420     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
421     }
422    
423     while (@_) {
424     my $sf = shift;
425     my $v = shift;
426    
427     next unless (defined($v) && $v !~ /^\s*$/);
428     warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
429     if ($sf ne '+') {
430     push @$m, ( $sf, $v );
431     } else {
432     $m->[ $#$m ] .= $v;
433     }
434     }
435    
436     warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
437    
438     push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
439     }
440    
441     =head2 marc_duplicate
442    
443     Generate copy of current MARC record and continue working on copy
444    
445     marc_duplicate();
446    
447     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
448     C<< _get_marc_fields( offset => 42 ) >>.
449    
450     =cut
451    
452     sub marc_duplicate {
453     my $m = $marc_record->[ -1 ];
454     die "can't duplicate record which isn't defined" unless ($m);
455     push @{ $marc_record }, dclone( $m );
456     push @{ $marc_leader }, dclone( marc_leader() );
457     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
458     $marc_record_offset = $#{ $marc_record };
459     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
460    
461     }
462    
463     =head2 marc_remove
464    
465     Remove some field or subfield from MARC record.
466    
467     marc_remove('200');
468     marc_remove('200','a');
469    
470     This will erase field C<200> or C<200^a> from current MARC record.
471    
472     marc_remove('*');
473    
474     Will remove all fields in current MARC record.
475    
476     This is useful after calling C<marc_duplicate> or on it's own (but, you
477     should probably just remove that subfield definition if you are not
478     using C<marc_duplicate>).
479    
480     FIXME: support fields < 10.
481    
482     =cut
483    
484     sub marc_remove {
485     my ($f, $sf) = @_;
486    
487     die "marc_remove needs record number" unless defined($f);
488    
489     my $marc = $marc_record->[ $marc_record_offset ];
490    
491     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
492    
493     if ($f eq '*') {
494    
495     delete( $marc_record->[ $marc_record_offset ] );
496     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
497    
498     } else {
499    
500     my $i = 0;
501     foreach ( 0 .. $#{ $marc } ) {
502     last unless (defined $marc->[$i]);
503     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
504     if ($marc->[$i]->[0] eq $f) {
505     if (! defined $sf) {
506     # remove whole field
507     splice @$marc, $i, 1;
508     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
509     $i--;
510     } else {
511     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
512     my $o = ($j * 2) + 3;
513     if ($marc->[$i]->[$o] eq $sf) {
514     # remove subfield
515     splice @{$marc->[$i]}, $o, 2;
516     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
517     # is record now empty?
518     if ($#{ $marc->[$i] } == 2) {
519     splice @$marc, $i, 1;
520     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
521     $i--;
522     };
523     }
524     }
525     }
526     }
527     $i++;
528     }
529    
530     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
531    
532     $marc_record->[ $marc_record_offset ] = $marc;
533     }
534    
535     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
536     }
537    
538     =head2 marc_original_order
539    
540     Copy all subfields preserving original order to marc field.
541    
542     marc_original_order( marc_field_number, original_input_field_number );
543    
544     Please note that field numbers are consistent with other commands (marc
545     field number first), but somewhat counter-intuitive (destination and then
546     source).
547    
548     You might want to use this command if you are just renaming subfields or
549     using pre-processing modify_record in C<config.yml> and don't need any
550     post-processing or want to preserve order of original subfields.
551    
552    
553     =cut
554    
555     sub marc_original_order {
556    
557     my ($to, $from) = @_;
558     die "marc_original_order needs from and to fields\n" unless ($from && $to);
559    
560     return unless defined($rec->{$from});
561    
562     my $r = $rec->{$from};
563     die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
564    
565     my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
566     warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
567    
568     foreach my $d (@$r) {
569    
570     if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
571     warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
572     next;
573     }
574    
575     my @sfs = @{ $d->{subfields} };
576    
577     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
578    
579     warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
580    
581     my $m = [ $to, $i1, $i2 ];
582    
583     while (my $sf = shift @sfs) {
584    
585     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
586     my $offset = shift @sfs;
587     die "corrupted sufields specification for field $from\n" unless defined($offset);
588    
589     my $v;
590     if (ref($d->{$sf}) eq 'ARRAY') {
591     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
592     } elsif ($offset == 0) {
593     $v = $d->{$sf};
594     } else {
595     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
596     }
597     push @$m, ( $sf, $v ) if (defined($v));
598     }
599    
600     if ($#{$m} > 2) {
601     push @{ $marc_record->[ $marc_record_offset ] }, $m;
602     }
603     }
604    
605     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
606     }
607    
608    
609     =head2 marc_count
610    
611     Return number of MARC records created using L</marc_duplicate>.
612    
613     print "created ", marc_count(), " records";
614    
615     =cut
616    
617     sub marc_count {
618     return $#{ $marc_record };
619     }
620    
621 dpavlin 1039 =head1 PRIVATE FUNCTIONS
622    
623 dpavlin 1036 =head2 _marc_push
624    
625     _marc_push( $marc );
626    
627     =cut
628    
629     sub _marc_push {
630     my $marc = shift || die "no marc?";
631     push @{ $marc_record->[ $marc_record_offset ] }, $marc;
632     }
633    
634     =head2 _clean
635    
636     Clean internal structures
637    
638     =cut
639    
640     sub _clean {
641 dpavlin 1039 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader, $created_with_marc_template) = ();
642 dpavlin 1036 ($marc_record_offset, $marc_fetch_offset) = (0,0);
643     }
644    
645    
646     =head2 _get_marc_fields
647    
648     Get all fields defined by calls to C<marc>
649    
650     $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
651    
652     We are using I<magic> which detect repeatable fields only from
653     sequence of field/subfield data generated by normalization.
654    
655 dpavlin 1039 This magic is disabled for all records created with C<marc_template>.
656    
657 dpavlin 1036 Repeatable field is created when there is second occurence of same subfield or
658     if any of indicators are different.
659    
660     This is sane for most cases. Something like:
661    
662     900a-1 900b-1 900c-1
663     900a-2 900b-2
664     900a-3
665    
666     will be created from any combination of:
667    
668     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
669    
670     and following rules:
671    
672     marc('900','a', rec('200','a') );
673     marc('900','b', rec('200','b') );
674     marc('900','c', rec('200','c') );
675    
676     which might not be what you have in mind. If you need repeatable subfield,
677     define it using C<marc_repeatable_subfield> like this:
678    
679     marc_repeatable_subfield('900','a');
680     marc('900','a', rec('200','a') );
681     marc('900','b', rec('200','b') );
682     marc('900','c', rec('200','c') );
683    
684     will create:
685    
686     900a-1 900a-2 900a-3 900b-1 900c-1
687     900b-2
688    
689     There is also support for returning next or specific using:
690    
691     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
692     # do something with $mf
693     }
694    
695     will always return fields from next MARC record or
696    
697     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
698    
699     will return 42th copy record (if it exists).
700    
701     =cut
702    
703     my $fetch_pos;
704    
705     sub _get_marc_fields {
706    
707     my $arg = {@_};
708     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
709     $fetch_pos = $marc_fetch_offset;
710     if ($arg->{offset}) {
711     $fetch_pos = $arg->{offset};
712     } elsif($arg->{fetch_next}) {
713     $marc_fetch_offset++;
714     }
715    
716     return if (! $marc_record || ref($marc_record) ne 'ARRAY');
717    
718     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
719    
720     my $marc_rec = $marc_record->[ $fetch_pos ];
721    
722     warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
723    
724     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
725    
726     # first, sort all existing fields
727     # XXX might not be needed, but modern perl might randomize elements in hash
728 dpavlin 1039 # my @sorted_marc_record = sort {
729     # $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
730     # } @{ $marc_rec };
731 dpavlin 1036
732 dpavlin 1040 my @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
733 dpavlin 1036
734     # output marc fields
735     my @m;
736    
737     # count unique field-subfields (used for offset when walking to next subfield)
738     my $u;
739     map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
740    
741     if ($debug) {
742     warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
743     warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
744     warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
745     warn "## subfield count = ", dump( $u ), $/;
746     }
747    
748     my $len = $#sorted_marc_record;
749     my $visited;
750     my $i = 0;
751     my $field;
752    
753 dpavlin 1040 warn "## created_with_marc_template = ",dump( $created_with_marc_template ) if $debug;
754    
755 dpavlin 1036 foreach ( 0 .. $len ) {
756    
757     # find next element which isn't visited
758     while ($visited->{$i}) {
759     $i = ($i + 1) % ($len + 1);
760     }
761    
762     # mark it visited
763     $visited->{$i}++;
764    
765     my $row = dclone( $sorted_marc_record[$i] );
766    
767 dpavlin 1039 if ( $created_with_marc_template->{ $row->[0] } ) {
768     push @m, $row;
769     warn "## copied marc_template created ", dump( $row ),$/ if $debug;
770     next;
771     }
772 dpavlin 1040
773 dpavlin 1036 # field and subfield which is key for
774     # marc_repeatable_subfield and u
775     my $fsf = $row->[0] . ( $row->[3] || '' );
776    
777     if ($debug > 1) {
778    
779     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
780     print "### this [$i]: ", dump( $row ),$/;
781     print "### sf: ", $row->[3], " vs ", $field->[3],
782     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
783     if ($#$field >= 0);
784    
785     }
786    
787     # if field exists
788     if ( $#$field >= 0 ) {
789     if (
790     $row->[0] ne $field->[0] || # field
791     $row->[1] ne $field->[1] || # i1
792     $row->[2] ne $field->[2] # i2
793     ) {
794     push @m, $field;
795     warn "## saved/1 ", dump( $field ),$/ if ($debug);
796     $field = $row;
797    
798     } elsif (
799     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
800     ||
801     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
802     ! $marc_repeatable_subfield->{ $fsf }
803     )
804     ) {
805     push @m, $field;
806     warn "## saved/2 ", dump( $field ),$/ if ($debug);
807     $field = $row;
808    
809     } else {
810     # append new subfields to existing field
811     push @$field, ( $row->[3], $row->[4] );
812     }
813     } else {
814     # insert first field
815     $field = $row;
816     }
817    
818     if (! $marc_repeatable_subfield->{ $fsf }) {
819     # make step to next subfield
820     $i = ($i + $u->{ $fsf } ) % ($len + 1);
821     }
822     }
823    
824     if ($#$field >= 0) {
825     push @m, $field;
826     warn "## saved/3 ", dump( $field ),$/ if ($debug);
827     }
828    
829     return \@m;
830     }
831    
832     =head2 _get_marc_leader
833    
834     Return leader from currently fetched record by L</_get_marc_fields>
835    
836     print WebPAC::Normalize::MARC::_get_marc_leader();
837    
838     =cut
839    
840     sub _get_marc_leader {
841     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
842     return $marc_leader->[ $fetch_pos ];
843     }
844    
845     =head2 _created_marc_records
846    
847     my $nr_records = _created_marc_records;
848    
849     =cut
850    
851     sub _created_marc_records {
852     return $#{ $marc_record } + 1 if $marc_record;
853     }
854    
855 dpavlin 1021 1;

  ViewVC Help
Powered by ViewVC 1.1.26