/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1047 - (hide annotations)
Mon Nov 19 15:56:05 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 22808 byte(s)
 r1631@llin:  dpavlin | 2007-11-19 16:56:04 +0100
 marc_template now respect marc_indicators, code cleanup

1 dpavlin 1021 package WebPAC::Normalize::MARC;
2     use Exporter 'import';
3 dpavlin 1036 our @EXPORT = qw/
4     marc marc_indicators marc_repeatable_subfield
5     marc_compose marc_leader marc_fixed
6     marc_duplicate marc_remove marc_count
7     marc_original_order
8 dpavlin 1021 marc_template
9     /;
10    
11     use strict;
12     use warnings;
13    
14 dpavlin 1036 use Storable qw/dclone/;
15 dpavlin 1021 use Data::Dump qw/dump/;
16     use Carp qw/confess/;
17    
18     use WebPAC::Normalize;
19    
20 dpavlin 1038 our $debug = 0;
21 dpavlin 1028
22 dpavlin 1036 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
23     my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
24    
25     our $rec;
26    
27 dpavlin 1021 =head1 NAME
28    
29     WebPAC::Normalize::MARC - create MARC/ISO2709 records
30    
31     =cut
32    
33     =head1 FUNCTIONS
34    
35     =head2 marc_template
36    
37     marc_template(
38     from => 225, to => 440,
39     subfields_rename => [
40     'a' => 'a',
41     'x' => 'x',
42     'v' => 'v',
43     'h' => 'n',
44     'i' => 'p',
45     'w' => 'v',
46     ],
47     isis_template => [
48     'a ; |v. |i',
49     'a. |i ; |w',
50     ],
51     marc_template => [
52     'a, |x ; |v. |n, |p ; |v',
53     'a ; |v. |p ; |v',
54     ],
55     );
56    
57 dpavlin 1026 Returns number of records produced.
58    
59 dpavlin 1021 =cut
60    
61 dpavlin 1039 my $created_with_marc_template;
62    
63 dpavlin 1021 sub marc_template {
64     my $args = {@_};
65 dpavlin 1028 warn "## marc_template(",dump($args),")",$/ if $debug;
66 dpavlin 1021
67     foreach ( qw/subfields_rename isis_template marc_template/ ) {
68 dpavlin 1028 # warn "ref($_) = ",ref($args->{$_}) if $debug;
69 dpavlin 1021 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
70     }
71    
72     die "marc_template needs isis_template or marc_template"
73     if ! defined $args->{isis_template} && ! defined $args->{marc_template};
74    
75     my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
76     my $r = $rec->{ $args->{from} } || return;
77     die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
78    
79     my @subfields_rename = @{ $args->{subfields_rename} };
80 dpavlin 1028 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
81 dpavlin 1021
82     confess "need mapping in pairs for subfields_rename"
83     if $#subfields_rename % 2 != 1;
84    
85 dpavlin 1029 my ( $subfields_rename, $from_subfields );
86     our $to_subfields = {};
87 dpavlin 1021 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
88     my ( $f, $t ) = (
89     $from_subfields->{ $from }++,
90     $to_subfields->{ $to }++
91     );
92     $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
93     }
94 dpavlin 1028 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
95     warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
96     warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
97 dpavlin 1021
98 dpavlin 1025 our $_template;
99 dpavlin 1021
100 dpavlin 1026 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
101     $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
102 dpavlin 1025
103 dpavlin 1021 my @marc_out;
104    
105 dpavlin 1023 sub _parse_template {
106 dpavlin 1024 my ( $name, $templates ) = @_;
107 dpavlin 1021
108 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
109 dpavlin 1025
110 dpavlin 1023 foreach my $template ( @{ $templates } ) {
111     our $count = {};
112 dpavlin 1025 our @order = ();
113 dpavlin 1023 sub my_count {
114     my $sf = shift;
115     my $nr = $count->{$sf}++;
116 dpavlin 1025 push @order, [ $sf, $nr ];
117 dpavlin 1023 return $sf . $nr;
118     }
119     my $pos_template = $template;
120     $pos_template =~ s/($fields_re)/my_count($1)/ge;
121     my $count_key = dump( $count );
122 dpavlin 1028 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
123 dpavlin 1024 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
124 dpavlin 1025 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
125 dpavlin 1021 }
126 dpavlin 1028 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
127 dpavlin 1021 }
128    
129 dpavlin 1024 _parse_template( 'marc', $args->{marc_template} );
130     _parse_template( 'isis', $args->{isis_template} );
131 dpavlin 1028 warn "### _template = ",dump( $_template ),$/ if $debug;
132 dpavlin 1023
133 dpavlin 1021 my $m;
134    
135 dpavlin 1044 our $from_rec = $rec->{ $args->{from} };
136 dpavlin 1021
137 dpavlin 1044 foreach my $r ( @$from_rec ) {
138    
139 dpavlin 1039 my $to = $args->{to};
140 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $to );
141 dpavlin 1039 $m = [ $to, $i1, $i2 ];
142 dpavlin 1021
143 dpavlin 1039 $created_with_marc_template->{ $to }++;
144    
145 dpavlin 1028 warn "### r = ",dump( $r ),$/ if $debug;
146 dpavlin 1021
147 dpavlin 1044 my ( $from_mapping, $from_count, $to_count );
148     our $to_mapping;
149 dpavlin 1029 foreach my $from_sf ( keys %{$r} ) {
150 dpavlin 1021 # skip everything which isn't one char subfield (e.g. 'subfields')
151 dpavlin 1029 next unless $from_sf =~ m/^\w$/;
152     my $from_nr = $from_count->{$from_sf}++;
153     my $rename_to = $subfields_rename->{ $from_sf } ||
154     die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
155     my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
156     $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
157 dpavlin 1021
158 dpavlin 1029 my $to_nr2 = $to_count->{ $to_sf }++;
159     $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
160    
161     warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
162 dpavlin 1021 }
163    
164 dpavlin 1029 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
165 dpavlin 1021
166 dpavlin 1025 my $count_key = {
167     from => dump( $from_count ),
168     to => dump( $to_count),
169     };
170 dpavlin 1021
171 dpavlin 1028 warn "### count_key = ",dump( $count_key ),$/ if $debug;
172 dpavlin 1021
173 dpavlin 1025 my $processed_templates = 0;
174 dpavlin 1021
175 dpavlin 1025 # this defines order of traversal
176     foreach ( qw/isis:from marc:to/ ) {
177     my ($name,$count_name) = split(/:/);
178 dpavlin 1021
179 dpavlin 1025 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
180 dpavlin 1021
181 dpavlin 1025 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
182     $processed_templates++;
183 dpavlin 1021
184 dpavlin 1031 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
185 dpavlin 1025
186     our $fill_in = {};
187    
188     my @templates = split(/\|/, $template );
189 dpavlin 1031 @templates = ( $template ) unless @templates;
190 dpavlin 1025
191 dpavlin 1035 warn "### templates = ",dump( @templates ),$/ if $debug;
192 dpavlin 1031
193 dpavlin 1025 foreach my $sf ( @templates ) {
194     sub fill_in {
195 dpavlin 1031 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
196     warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
197 dpavlin 1028 my ( $from_sf, $from_nr );
198 dpavlin 1025 if ( $name eq 'marc' ) {
199 dpavlin 1044 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), "\n>>>> from record ",dump( $r ), "\n>>>> full record = ",dump( $from_rec ) unless defined $to_mapping->{$sf}->[$nr];
200 dpavlin 1026 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
201 dpavlin 1025 } else {
202     ( $from_sf, $from_nr ) = ( $sf, $nr );
203     }
204     my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
205     if ( ref( $v ) eq 'ARRAY' ) {
206 dpavlin 1031 $v = $pre . $v->[$from_nr] . $post;
207 dpavlin 1025 } elsif ( $from_nr == 0 ) {
208 dpavlin 1031 $v = $pre . $v . $post;
209 dpavlin 1025 } else {
210     die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
211     }
212 dpavlin 1031 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
213     $fill_in->{$sf}->[$nr] = $v;
214     return $v;
215 dpavlin 1021 }
216 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
217 dpavlin 1031 warn "#### $sf <<<< $fields_re\n" if $debug;
218     $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
219 dpavlin 1028 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
220 dpavlin 1021 }
221    
222 dpavlin 1028 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
223 dpavlin 1021
224 dpavlin 1025 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
225     my ( $sf, $nr ) = @$sf;
226     my $v = $fill_in->{$sf}->[$nr] || die "can't find fill_in $sf/$nr";
227 dpavlin 1026 if ( $name eq 'isis') {
228     ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
229     }
230 dpavlin 1028 warn "++ $sf/$nr |$v|\n" if $debug;
231 dpavlin 1025 push @$m, ( $sf, $v );
232     }
233 dpavlin 1021
234 dpavlin 1028 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
235 dpavlin 1025
236     push @marc_out, $m;
237 dpavlin 1029
238     last;
239 dpavlin 1025 }
240    
241     die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
242 dpavlin 1028 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
243 dpavlin 1021 }
244    
245    
246 dpavlin 1026 my $recs = 0;
247 dpavlin 1025
248 dpavlin 1021 foreach my $marc ( @marc_out ) {
249 dpavlin 1028 warn "+++ ",dump( $marc ),$/ if $debug;
250 dpavlin 1036 _marc_push( $marc );
251 dpavlin 1026 $recs++;
252 dpavlin 1021 }
253 dpavlin 1026
254 dpavlin 1028 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
255    
256     return $recs;
257 dpavlin 1021 }
258    
259 dpavlin 1036 =head2 marc_leader
260    
261     Setup fields within MARC leader or get leader
262    
263     marc_leader('05','c');
264     my $leader = marc_leader();
265    
266     =cut
267    
268     sub marc_leader {
269     my ($offset,$value) = @_;
270    
271     if ($offset) {
272     $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
273     } else {
274    
275     if (defined($marc_leader)) {
276     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
277     return $marc_leader->[ $marc_record_offset ];
278     } else {
279     return;
280     }
281     }
282     }
283    
284     =head2 marc_fixed
285    
286     Create control/indentifier fields with values in fixed positions
287    
288     marc_fixed('008', 00, '070402');
289     marc_fixed('008', 39, '|');
290    
291     Positions not specified will be filled with spaces (C<0x20>).
292    
293     There will be no effort to extend last specified value to full length of
294     field in standard.
295    
296     =cut
297    
298     sub marc_fixed {
299     my ($f, $pos, $val) = @_;
300     die "need marc(field, position, value)" unless defined($f) && defined($pos);
301    
302     confess "need val" unless defined $val;
303    
304     my $update = 0;
305    
306     map {
307     if ($_->[0] eq $f) {
308     my $old = $_->[1];
309     if (length($old) <= $pos) {
310     $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
311     warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
312     } else {
313     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
314     warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
315     }
316     $update++;
317     }
318     } @{ $marc_record->[ $marc_record_offset ] };
319    
320     if (! $update) {
321     my $v = ' ' x $pos . $val;
322     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
323     warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
324     }
325     }
326    
327     =head2 marc
328    
329     Save value for MARC field
330    
331     marc('900','a', rec('200','a') );
332     marc('001', rec('000') );
333    
334     =cut
335    
336     sub marc {
337     my $f = shift or die "marc needs field";
338     die "marc field must be numer" unless ($f =~ /^\d+$/);
339    
340     my $sf;
341     if ($f >= 10) {
342     $sf = shift or die "marc needs subfield";
343     }
344    
345     foreach (@_) {
346     my $v = $_; # make var read-write for Encode
347     next unless (defined($v) && $v !~ /^\s*$/);
348 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $f );
349 dpavlin 1036 if (defined $sf) {
350     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
351     } else {
352     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
353     }
354     }
355     }
356    
357     =head2 marc_repeatable_subfield
358    
359     Save values for MARC repetable subfield
360    
361     marc_repeatable_subfield('910', 'z', rec('909') );
362    
363     =cut
364    
365     sub marc_repeatable_subfield {
366     my ($f,$sf) = @_;
367     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
368     $marc_repeatable_subfield->{ $f . $sf }++;
369     marc(@_);
370     }
371    
372     =head2 marc_indicators
373    
374     Set both indicators for MARC field
375    
376     marc_indicators('900', ' ', 1);
377    
378     Any indicator value other than C<0-9> will be treated as undefined.
379    
380     =cut
381    
382     sub marc_indicators {
383     my $f = shift || die "marc_indicators need field!\n";
384     my ($i1,$i2) = @_;
385     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
386     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
387    
388     $i1 = ' ' if ($i1 !~ /^\d$/);
389     $i2 = ' ' if ($i2 !~ /^\d$/);
390     @{ $marc_indicators->{$f} } = ($i1,$i2);
391     }
392    
393 dpavlin 1047 sub _get_marc_indicators {
394     my $f = shift || confess "need field!\n";
395     return defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
396     }
397    
398 dpavlin 1036 =head2 marc_compose
399    
400     Save values for each MARC subfield explicitly
401    
402     marc_compose('900',
403     'a', rec('200','a')
404     'b', rec('201','a')
405     'a', rec('200','b')
406     'c', rec('200','c')
407     );
408    
409     If you specify C<+> for subfield, value will be appended
410     to previous defined subfield.
411    
412     =cut
413    
414     sub marc_compose {
415     my $f = shift or die "marc_compose needs field";
416     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
417    
418 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $f );
419 dpavlin 1036 my $m = [ $f, $i1, $i2 ];
420    
421     warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
422    
423     if ($#_ % 2 != 1) {
424     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
425     }
426    
427     while (@_) {
428     my $sf = shift;
429     my $v = shift;
430    
431     next unless (defined($v) && $v !~ /^\s*$/);
432     warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
433     if ($sf ne '+') {
434     push @$m, ( $sf, $v );
435     } else {
436     $m->[ $#$m ] .= $v;
437     }
438     }
439    
440     warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
441    
442     push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
443     }
444    
445     =head2 marc_duplicate
446    
447     Generate copy of current MARC record and continue working on copy
448    
449     marc_duplicate();
450    
451     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
452     C<< _get_marc_fields( offset => 42 ) >>.
453    
454     =cut
455    
456     sub marc_duplicate {
457     my $m = $marc_record->[ -1 ];
458     die "can't duplicate record which isn't defined" unless ($m);
459     push @{ $marc_record }, dclone( $m );
460     push @{ $marc_leader }, dclone( marc_leader() );
461     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
462     $marc_record_offset = $#{ $marc_record };
463     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
464    
465     }
466    
467     =head2 marc_remove
468    
469     Remove some field or subfield from MARC record.
470    
471     marc_remove('200');
472     marc_remove('200','a');
473    
474     This will erase field C<200> or C<200^a> from current MARC record.
475    
476     marc_remove('*');
477    
478     Will remove all fields in current MARC record.
479    
480     This is useful after calling C<marc_duplicate> or on it's own (but, you
481     should probably just remove that subfield definition if you are not
482     using C<marc_duplicate>).
483    
484     FIXME: support fields < 10.
485    
486     =cut
487    
488     sub marc_remove {
489     my ($f, $sf) = @_;
490    
491     die "marc_remove needs record number" unless defined($f);
492    
493     my $marc = $marc_record->[ $marc_record_offset ];
494    
495     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
496    
497     if ($f eq '*') {
498    
499     delete( $marc_record->[ $marc_record_offset ] );
500     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
501    
502     } else {
503    
504     my $i = 0;
505     foreach ( 0 .. $#{ $marc } ) {
506     last unless (defined $marc->[$i]);
507     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
508     if ($marc->[$i]->[0] eq $f) {
509     if (! defined $sf) {
510     # remove whole field
511     splice @$marc, $i, 1;
512     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
513     $i--;
514     } else {
515     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
516     my $o = ($j * 2) + 3;
517     if ($marc->[$i]->[$o] eq $sf) {
518     # remove subfield
519     splice @{$marc->[$i]}, $o, 2;
520     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
521     # is record now empty?
522     if ($#{ $marc->[$i] } == 2) {
523     splice @$marc, $i, 1;
524     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
525     $i--;
526     };
527     }
528     }
529     }
530     }
531     $i++;
532     }
533    
534     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
535    
536     $marc_record->[ $marc_record_offset ] = $marc;
537     }
538    
539     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
540     }
541    
542     =head2 marc_original_order
543    
544     Copy all subfields preserving original order to marc field.
545    
546     marc_original_order( marc_field_number, original_input_field_number );
547    
548     Please note that field numbers are consistent with other commands (marc
549     field number first), but somewhat counter-intuitive (destination and then
550     source).
551    
552     You might want to use this command if you are just renaming subfields or
553     using pre-processing modify_record in C<config.yml> and don't need any
554     post-processing or want to preserve order of original subfields.
555    
556    
557     =cut
558    
559     sub marc_original_order {
560    
561     my ($to, $from) = @_;
562     die "marc_original_order needs from and to fields\n" unless ($from && $to);
563    
564     return unless defined($rec->{$from});
565    
566     my $r = $rec->{$from};
567     die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
568    
569 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $to );
570 dpavlin 1036 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
571    
572     foreach my $d (@$r) {
573    
574     if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
575     warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
576     next;
577     }
578    
579     my @sfs = @{ $d->{subfields} };
580    
581     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
582    
583     warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
584    
585     my $m = [ $to, $i1, $i2 ];
586    
587     while (my $sf = shift @sfs) {
588    
589     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
590     my $offset = shift @sfs;
591     die "corrupted sufields specification for field $from\n" unless defined($offset);
592    
593     my $v;
594     if (ref($d->{$sf}) eq 'ARRAY') {
595     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
596     } elsif ($offset == 0) {
597     $v = $d->{$sf};
598     } else {
599     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
600     }
601     push @$m, ( $sf, $v ) if (defined($v));
602     }
603    
604     if ($#{$m} > 2) {
605     push @{ $marc_record->[ $marc_record_offset ] }, $m;
606     }
607     }
608    
609     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
610     }
611    
612    
613     =head2 marc_count
614    
615     Return number of MARC records created using L</marc_duplicate>.
616    
617     print "created ", marc_count(), " records";
618    
619     =cut
620    
621     sub marc_count {
622     return $#{ $marc_record };
623     }
624    
625 dpavlin 1039 =head1 PRIVATE FUNCTIONS
626    
627 dpavlin 1036 =head2 _marc_push
628    
629     _marc_push( $marc );
630    
631     =cut
632    
633     sub _marc_push {
634     my $marc = shift || die "no marc?";
635     push @{ $marc_record->[ $marc_record_offset ] }, $marc;
636     }
637    
638     =head2 _clean
639    
640     Clean internal structures
641    
642     =cut
643    
644     sub _clean {
645 dpavlin 1039 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader, $created_with_marc_template) = ();
646 dpavlin 1036 ($marc_record_offset, $marc_fetch_offset) = (0,0);
647     }
648    
649    
650     =head2 _get_marc_fields
651    
652     Get all fields defined by calls to C<marc>
653    
654     $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
655    
656     We are using I<magic> which detect repeatable fields only from
657     sequence of field/subfield data generated by normalization.
658    
659 dpavlin 1039 This magic is disabled for all records created with C<marc_template>.
660    
661 dpavlin 1036 Repeatable field is created when there is second occurence of same subfield or
662     if any of indicators are different.
663    
664     This is sane for most cases. Something like:
665    
666     900a-1 900b-1 900c-1
667     900a-2 900b-2
668     900a-3
669    
670     will be created from any combination of:
671    
672     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
673    
674     and following rules:
675    
676     marc('900','a', rec('200','a') );
677     marc('900','b', rec('200','b') );
678     marc('900','c', rec('200','c') );
679    
680     which might not be what you have in mind. If you need repeatable subfield,
681     define it using C<marc_repeatable_subfield> like this:
682    
683     marc_repeatable_subfield('900','a');
684     marc('900','a', rec('200','a') );
685     marc('900','b', rec('200','b') );
686     marc('900','c', rec('200','c') );
687    
688     will create:
689    
690     900a-1 900a-2 900a-3 900b-1 900c-1
691     900b-2
692    
693     There is also support for returning next or specific using:
694    
695     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
696     # do something with $mf
697     }
698    
699     will always return fields from next MARC record or
700    
701     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
702    
703     will return 42th copy record (if it exists).
704    
705     =cut
706    
707     my $fetch_pos;
708    
709     sub _get_marc_fields {
710    
711     my $arg = {@_};
712     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
713     $fetch_pos = $marc_fetch_offset;
714     if ($arg->{offset}) {
715     $fetch_pos = $arg->{offset};
716     } elsif($arg->{fetch_next}) {
717     $marc_fetch_offset++;
718     }
719    
720     return if (! $marc_record || ref($marc_record) ne 'ARRAY');
721    
722     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
723    
724     my $marc_rec = $marc_record->[ $fetch_pos ];
725    
726     warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
727    
728     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
729    
730     # first, sort all existing fields
731     # XXX might not be needed, but modern perl might randomize elements in hash
732 dpavlin 1039 # my @sorted_marc_record = sort {
733     # $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
734     # } @{ $marc_rec };
735 dpavlin 1036
736 dpavlin 1040 my @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
737 dpavlin 1036
738     # output marc fields
739     my @m;
740    
741     # count unique field-subfields (used for offset when walking to next subfield)
742     my $u;
743     map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
744    
745     if ($debug) {
746     warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
747     warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
748     warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
749     warn "## subfield count = ", dump( $u ), $/;
750     }
751    
752     my $len = $#sorted_marc_record;
753     my $visited;
754     my $i = 0;
755     my $field;
756    
757 dpavlin 1040 warn "## created_with_marc_template = ",dump( $created_with_marc_template ) if $debug;
758    
759 dpavlin 1036 foreach ( 0 .. $len ) {
760    
761     # find next element which isn't visited
762     while ($visited->{$i}) {
763     $i = ($i + 1) % ($len + 1);
764     }
765    
766     # mark it visited
767     $visited->{$i}++;
768    
769     my $row = dclone( $sorted_marc_record[$i] );
770    
771 dpavlin 1039 if ( $created_with_marc_template->{ $row->[0] } ) {
772     push @m, $row;
773     warn "## copied marc_template created ", dump( $row ),$/ if $debug;
774     next;
775     }
776 dpavlin 1040
777 dpavlin 1036 # field and subfield which is key for
778     # marc_repeatable_subfield and u
779     my $fsf = $row->[0] . ( $row->[3] || '' );
780    
781     if ($debug > 1) {
782    
783     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
784     print "### this [$i]: ", dump( $row ),$/;
785     print "### sf: ", $row->[3], " vs ", $field->[3],
786     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
787     if ($#$field >= 0);
788    
789     }
790    
791     # if field exists
792     if ( $#$field >= 0 ) {
793     if (
794     $row->[0] ne $field->[0] || # field
795     $row->[1] ne $field->[1] || # i1
796     $row->[2] ne $field->[2] # i2
797     ) {
798     push @m, $field;
799     warn "## saved/1 ", dump( $field ),$/ if ($debug);
800     $field = $row;
801    
802     } elsif (
803     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
804     ||
805     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
806     ! $marc_repeatable_subfield->{ $fsf }
807     )
808     ) {
809     push @m, $field;
810     warn "## saved/2 ", dump( $field ),$/ if ($debug);
811     $field = $row;
812    
813     } else {
814     # append new subfields to existing field
815     push @$field, ( $row->[3], $row->[4] );
816     }
817     } else {
818     # insert first field
819     $field = $row;
820     }
821    
822     if (! $marc_repeatable_subfield->{ $fsf }) {
823     # make step to next subfield
824     $i = ($i + $u->{ $fsf } ) % ($len + 1);
825     }
826     }
827    
828     if ($#$field >= 0) {
829     push @m, $field;
830     warn "## saved/3 ", dump( $field ),$/ if ($debug);
831     }
832    
833     return \@m;
834     }
835    
836     =head2 _get_marc_leader
837    
838     Return leader from currently fetched record by L</_get_marc_fields>
839    
840     print WebPAC::Normalize::MARC::_get_marc_leader();
841    
842     =cut
843    
844     sub _get_marc_leader {
845     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
846     return $marc_leader->[ $fetch_pos ];
847     }
848    
849     =head2 _created_marc_records
850    
851     my $nr_records = _created_marc_records;
852    
853     =cut
854    
855     sub _created_marc_records {
856     return $#{ $marc_record } + 1 if $marc_record;
857     }
858    
859 dpavlin 1021 1;

  ViewVC Help
Powered by ViewVC 1.1.26