/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1109 - (hide annotations)
Sat Sep 6 09:54:08 2008 UTC (15 years, 8 months ago) by dpavlin
File size: 23096 byte(s)
 r1735@llin:  dpavlin | 2008-09-06 11:54:01 +0200
 cleanup test

1 dpavlin 1021 package WebPAC::Normalize::MARC;
2     use Exporter 'import';
3 dpavlin 1036 our @EXPORT = qw/
4     marc marc_indicators marc_repeatable_subfield
5     marc_compose marc_leader marc_fixed
6     marc_duplicate marc_remove marc_count
7     marc_original_order
8 dpavlin 1021 marc_template
9 dpavlin 1108 marc_clone
10 dpavlin 1021 /;
11    
12     use strict;
13     use warnings;
14    
15 dpavlin 1036 use Storable qw/dclone/;
16 dpavlin 1021 use Data::Dump qw/dump/;
17     use Carp qw/confess/;
18    
19     use WebPAC::Normalize;
20    
21 dpavlin 1038 our $debug = 0;
22 dpavlin 1028
23 dpavlin 1036 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
24     my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
25    
26     our $rec;
27    
28 dpavlin 1021 =head1 NAME
29    
30     WebPAC::Normalize::MARC - create MARC/ISO2709 records
31    
32     =cut
33    
34     =head1 FUNCTIONS
35    
36     =head2 marc_template
37    
38     marc_template(
39     from => 225, to => 440,
40     subfields_rename => [
41     'a' => 'a',
42     'x' => 'x',
43     'v' => 'v',
44     'h' => 'n',
45     'i' => 'p',
46     'w' => 'v',
47     ],
48     isis_template => [
49     'a ; |v. |i',
50     'a. |i ; |w',
51     ],
52     marc_template => [
53     'a, |x ; |v. |n, |p ; |v',
54     'a ; |v. |p ; |v',
55     ],
56     );
57    
58 dpavlin 1026 Returns number of records produced.
59    
60 dpavlin 1021 =cut
61    
62 dpavlin 1039 my $created_with_marc_template;
63    
64 dpavlin 1021 sub marc_template {
65     my $args = {@_};
66 dpavlin 1028 warn "## marc_template(",dump($args),")",$/ if $debug;
67 dpavlin 1021
68     foreach ( qw/subfields_rename isis_template marc_template/ ) {
69 dpavlin 1028 # warn "ref($_) = ",ref($args->{$_}) if $debug;
70 dpavlin 1021 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
71     }
72    
73     die "marc_template needs isis_template or marc_template"
74     if ! defined $args->{isis_template} && ! defined $args->{marc_template};
75    
76     my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
77     my $r = $rec->{ $args->{from} } || return;
78     die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
79    
80     my @subfields_rename = @{ $args->{subfields_rename} };
81 dpavlin 1028 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
82 dpavlin 1021
83     confess "need mapping in pairs for subfields_rename"
84     if $#subfields_rename % 2 != 1;
85    
86 dpavlin 1029 my ( $subfields_rename, $from_subfields );
87     our $to_subfields = {};
88 dpavlin 1021 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
89     my ( $f, $t ) = (
90     $from_subfields->{ $from }++,
91     $to_subfields->{ $to }++
92     );
93     $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
94     }
95 dpavlin 1028 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
96     warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
97     warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
98 dpavlin 1021
99 dpavlin 1025 our $_template;
100 dpavlin 1021
101 dpavlin 1026 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
102     $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
103 dpavlin 1025
104 dpavlin 1021 my @marc_out;
105    
106 dpavlin 1023 sub _parse_template {
107 dpavlin 1024 my ( $name, $templates ) = @_;
108 dpavlin 1021
109 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
110 dpavlin 1025
111 dpavlin 1023 foreach my $template ( @{ $templates } ) {
112     our $count = {};
113 dpavlin 1025 our @order = ();
114 dpavlin 1023 sub my_count {
115     my $sf = shift;
116     my $nr = $count->{$sf}++;
117 dpavlin 1025 push @order, [ $sf, $nr ];
118 dpavlin 1023 return $sf . $nr;
119     }
120     my $pos_template = $template;
121     $pos_template =~ s/($fields_re)/my_count($1)/ge;
122     my $count_key = dump( $count );
123 dpavlin 1028 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
124 dpavlin 1024 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
125 dpavlin 1025 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
126 dpavlin 1021 }
127 dpavlin 1028 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
128 dpavlin 1021 }
129    
130 dpavlin 1024 _parse_template( 'marc', $args->{marc_template} );
131     _parse_template( 'isis', $args->{isis_template} );
132 dpavlin 1028 warn "### _template = ",dump( $_template ),$/ if $debug;
133 dpavlin 1023
134 dpavlin 1021 my $m;
135    
136 dpavlin 1044 our $from_rec = $rec->{ $args->{from} };
137 dpavlin 1021
138 dpavlin 1044 foreach my $r ( @$from_rec ) {
139    
140 dpavlin 1039 my $to = $args->{to};
141 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $to );
142 dpavlin 1039 $m = [ $to, $i1, $i2 ];
143 dpavlin 1021
144 dpavlin 1039 $created_with_marc_template->{ $to }++;
145    
146 dpavlin 1028 warn "### r = ",dump( $r ),$/ if $debug;
147 dpavlin 1021
148 dpavlin 1044 my ( $from_mapping, $from_count, $to_count );
149     our $to_mapping;
150 dpavlin 1029 foreach my $from_sf ( keys %{$r} ) {
151 dpavlin 1021 # skip everything which isn't one char subfield (e.g. 'subfields')
152 dpavlin 1029 next unless $from_sf =~ m/^\w$/;
153     my $from_nr = $from_count->{$from_sf}++;
154     my $rename_to = $subfields_rename->{ $from_sf } ||
155     die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
156     my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
157     $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
158 dpavlin 1021
159 dpavlin 1029 my $to_nr2 = $to_count->{ $to_sf }++;
160     $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
161    
162     warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
163 dpavlin 1021 }
164    
165 dpavlin 1029 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
166 dpavlin 1021
167 dpavlin 1025 my $count_key = {
168     from => dump( $from_count ),
169     to => dump( $to_count),
170     };
171 dpavlin 1021
172 dpavlin 1028 warn "### count_key = ",dump( $count_key ),$/ if $debug;
173 dpavlin 1021
174 dpavlin 1025 my $processed_templates = 0;
175 dpavlin 1021
176 dpavlin 1025 # this defines order of traversal
177     foreach ( qw/isis:from marc:to/ ) {
178     my ($name,$count_name) = split(/:/);
179 dpavlin 1021
180 dpavlin 1025 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
181 dpavlin 1021
182 dpavlin 1025 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
183     $processed_templates++;
184 dpavlin 1021
185 dpavlin 1031 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
186 dpavlin 1025
187     our $fill_in = {};
188    
189     my @templates = split(/\|/, $template );
190 dpavlin 1031 @templates = ( $template ) unless @templates;
191 dpavlin 1025
192 dpavlin 1035 warn "### templates = ",dump( @templates ),$/ if $debug;
193 dpavlin 1031
194 dpavlin 1025 foreach my $sf ( @templates ) {
195     sub fill_in {
196 dpavlin 1031 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
197     warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
198 dpavlin 1028 my ( $from_sf, $from_nr );
199 dpavlin 1025 if ( $name eq 'marc' ) {
200 dpavlin 1044 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), "\n>>>> from record ",dump( $r ), "\n>>>> full record = ",dump( $from_rec ) unless defined $to_mapping->{$sf}->[$nr];
201 dpavlin 1026 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
202 dpavlin 1025 } else {
203     ( $from_sf, $from_nr ) = ( $sf, $nr );
204     }
205     my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
206     if ( ref( $v ) eq 'ARRAY' ) {
207 dpavlin 1031 $v = $pre . $v->[$from_nr] . $post;
208 dpavlin 1025 } elsif ( $from_nr == 0 ) {
209 dpavlin 1031 $v = $pre . $v . $post;
210 dpavlin 1025 } else {
211     die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
212     }
213 dpavlin 1031 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
214     $fill_in->{$sf}->[$nr] = $v;
215     return $v;
216 dpavlin 1021 }
217 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
218 dpavlin 1031 warn "#### $sf <<<< $fields_re\n" if $debug;
219     $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
220 dpavlin 1028 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
221 dpavlin 1021 }
222    
223 dpavlin 1028 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
224 dpavlin 1021
225 dpavlin 1025 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
226     my ( $sf, $nr ) = @$sf;
227 dpavlin 1062 my $v = $fill_in->{$sf}->[$nr];
228     die "can't find fill_in $sf/$nr" unless defined $v;
229 dpavlin 1026 if ( $name eq 'isis') {
230     ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
231     }
232 dpavlin 1028 warn "++ $sf/$nr |$v|\n" if $debug;
233 dpavlin 1025 push @$m, ( $sf, $v );
234     }
235 dpavlin 1021
236 dpavlin 1028 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
237 dpavlin 1025
238     push @marc_out, $m;
239 dpavlin 1029
240     last;
241 dpavlin 1025 }
242    
243     die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
244 dpavlin 1028 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
245 dpavlin 1021 }
246    
247    
248 dpavlin 1026 my $recs = 0;
249 dpavlin 1025
250 dpavlin 1021 foreach my $marc ( @marc_out ) {
251 dpavlin 1028 warn "+++ ",dump( $marc ),$/ if $debug;
252 dpavlin 1036 _marc_push( $marc );
253 dpavlin 1026 $recs++;
254 dpavlin 1021 }
255 dpavlin 1026
256 dpavlin 1028 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
257    
258     return $recs;
259 dpavlin 1021 }
260    
261 dpavlin 1036 =head2 marc_leader
262    
263     Setup fields within MARC leader or get leader
264    
265     marc_leader('05','c');
266     my $leader = marc_leader();
267    
268     =cut
269    
270     sub marc_leader {
271     my ($offset,$value) = @_;
272    
273     if ($offset) {
274     $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
275     } else {
276    
277     if (defined($marc_leader)) {
278     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
279     return $marc_leader->[ $marc_record_offset ];
280     } else {
281     return;
282     }
283     }
284     }
285    
286     =head2 marc_fixed
287    
288     Create control/indentifier fields with values in fixed positions
289    
290     marc_fixed('008', 00, '070402');
291     marc_fixed('008', 39, '|');
292    
293     Positions not specified will be filled with spaces (C<0x20>).
294    
295     There will be no effort to extend last specified value to full length of
296     field in standard.
297    
298     =cut
299    
300     sub marc_fixed {
301     my ($f, $pos, $val) = @_;
302     die "need marc(field, position, value)" unless defined($f) && defined($pos);
303    
304     confess "need val" unless defined $val;
305    
306     my $update = 0;
307    
308     map {
309     if ($_->[0] eq $f) {
310     my $old = $_->[1];
311     if (length($old) <= $pos) {
312     $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
313     warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
314     } else {
315     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
316     warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
317     }
318     $update++;
319     }
320     } @{ $marc_record->[ $marc_record_offset ] };
321    
322     if (! $update) {
323     my $v = ' ' x $pos . $val;
324     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
325     warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
326     }
327     }
328    
329     =head2 marc
330    
331     Save value for MARC field
332    
333     marc('900','a', rec('200','a') );
334     marc('001', rec('000') );
335    
336     =cut
337    
338     sub marc {
339     my $f = shift or die "marc needs field";
340     die "marc field must be numer" unless ($f =~ /^\d+$/);
341    
342     my $sf;
343     if ($f >= 10) {
344     $sf = shift or die "marc needs subfield";
345     }
346    
347     foreach (@_) {
348     my $v = $_; # make var read-write for Encode
349     next unless (defined($v) && $v !~ /^\s*$/);
350 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $f );
351 dpavlin 1036 if (defined $sf) {
352     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
353     } else {
354     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
355     }
356     }
357     }
358    
359     =head2 marc_repeatable_subfield
360    
361     Save values for MARC repetable subfield
362    
363     marc_repeatable_subfield('910', 'z', rec('909') );
364    
365     =cut
366    
367     sub marc_repeatable_subfield {
368     my ($f,$sf) = @_;
369     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
370     $marc_repeatable_subfield->{ $f . $sf }++;
371     marc(@_);
372     }
373    
374     =head2 marc_indicators
375    
376     Set both indicators for MARC field
377    
378     marc_indicators('900', ' ', 1);
379    
380     Any indicator value other than C<0-9> will be treated as undefined.
381    
382     =cut
383    
384     sub marc_indicators {
385     my $f = shift || die "marc_indicators need field!\n";
386     my ($i1,$i2) = @_;
387     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
388     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
389    
390     $i1 = ' ' if ($i1 !~ /^\d$/);
391     $i2 = ' ' if ($i2 !~ /^\d$/);
392     @{ $marc_indicators->{$f} } = ($i1,$i2);
393     }
394    
395 dpavlin 1047 sub _get_marc_indicators {
396     my $f = shift || confess "need field!\n";
397     return defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
398     }
399    
400 dpavlin 1036 =head2 marc_compose
401    
402     Save values for each MARC subfield explicitly
403    
404     marc_compose('900',
405     'a', rec('200','a')
406     'b', rec('201','a')
407     'a', rec('200','b')
408     'c', rec('200','c')
409     );
410    
411     If you specify C<+> for subfield, value will be appended
412     to previous defined subfield.
413    
414     =cut
415    
416     sub marc_compose {
417     my $f = shift or die "marc_compose needs field";
418     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
419    
420 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $f );
421 dpavlin 1036 my $m = [ $f, $i1, $i2 ];
422    
423     warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
424    
425     if ($#_ % 2 != 1) {
426     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
427     }
428    
429     while (@_) {
430     my $sf = shift;
431     my $v = shift;
432    
433     next unless (defined($v) && $v !~ /^\s*$/);
434     warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
435     if ($sf ne '+') {
436     push @$m, ( $sf, $v );
437     } else {
438     $m->[ $#$m ] .= $v;
439     }
440     }
441    
442     warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
443    
444     push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
445     }
446    
447     =head2 marc_duplicate
448    
449     Generate copy of current MARC record and continue working on copy
450    
451     marc_duplicate();
452    
453     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
454     C<< _get_marc_fields( offset => 42 ) >>.
455    
456     =cut
457    
458     sub marc_duplicate {
459     my $m = $marc_record->[ -1 ];
460     die "can't duplicate record which isn't defined" unless ($m);
461     push @{ $marc_record }, dclone( $m );
462     push @{ $marc_leader }, dclone( marc_leader() );
463     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
464     $marc_record_offset = $#{ $marc_record };
465     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
466    
467     }
468    
469     =head2 marc_remove
470    
471     Remove some field or subfield from MARC record.
472    
473     marc_remove('200');
474     marc_remove('200','a');
475    
476     This will erase field C<200> or C<200^a> from current MARC record.
477    
478     marc_remove('*');
479    
480     Will remove all fields in current MARC record.
481    
482     This is useful after calling C<marc_duplicate> or on it's own (but, you
483     should probably just remove that subfield definition if you are not
484     using C<marc_duplicate>).
485    
486     FIXME: support fields < 10.
487    
488     =cut
489    
490     sub marc_remove {
491     my ($f, $sf) = @_;
492    
493     die "marc_remove needs record number" unless defined($f);
494    
495     my $marc = $marc_record->[ $marc_record_offset ];
496    
497     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
498    
499     if ($f eq '*') {
500    
501     delete( $marc_record->[ $marc_record_offset ] );
502     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
503    
504     } else {
505    
506     my $i = 0;
507     foreach ( 0 .. $#{ $marc } ) {
508     last unless (defined $marc->[$i]);
509     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
510     if ($marc->[$i]->[0] eq $f) {
511     if (! defined $sf) {
512     # remove whole field
513     splice @$marc, $i, 1;
514     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
515     $i--;
516     } else {
517     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
518     my $o = ($j * 2) + 3;
519     if ($marc->[$i]->[$o] eq $sf) {
520     # remove subfield
521     splice @{$marc->[$i]}, $o, 2;
522     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
523     # is record now empty?
524     if ($#{ $marc->[$i] } == 2) {
525     splice @$marc, $i, 1;
526     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
527     $i--;
528     };
529     }
530     }
531     }
532     }
533     $i++;
534     }
535    
536     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
537    
538     $marc_record->[ $marc_record_offset ] = $marc;
539     }
540    
541     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
542     }
543    
544     =head2 marc_original_order
545    
546     Copy all subfields preserving original order to marc field.
547    
548     marc_original_order( marc_field_number, original_input_field_number );
549    
550     Please note that field numbers are consistent with other commands (marc
551     field number first), but somewhat counter-intuitive (destination and then
552     source).
553    
554     You might want to use this command if you are just renaming subfields or
555     using pre-processing modify_record in C<config.yml> and don't need any
556     post-processing or want to preserve order of original subfields.
557    
558    
559     =cut
560    
561     sub marc_original_order {
562    
563     my ($to, $from) = @_;
564     die "marc_original_order needs from and to fields\n" unless ($from && $to);
565    
566     return unless defined($rec->{$from});
567    
568     my $r = $rec->{$from};
569     die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
570    
571 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $to );
572 dpavlin 1036 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
573    
574     foreach my $d (@$r) {
575    
576     if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
577     warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
578     next;
579     }
580    
581     my @sfs = @{ $d->{subfields} };
582    
583     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
584    
585     warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
586    
587     my $m = [ $to, $i1, $i2 ];
588    
589     while (my $sf = shift @sfs) {
590    
591     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
592     my $offset = shift @sfs;
593     die "corrupted sufields specification for field $from\n" unless defined($offset);
594    
595     my $v;
596     if (ref($d->{$sf}) eq 'ARRAY') {
597     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
598     } elsif ($offset == 0) {
599     $v = $d->{$sf};
600     } else {
601     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
602     }
603     push @$m, ( $sf, $v ) if (defined($v));
604     }
605    
606     if ($#{$m} > 2) {
607     push @{ $marc_record->[ $marc_record_offset ] }, $m;
608     }
609     }
610    
611     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
612     }
613    
614    
615     =head2 marc_count
616    
617     Return number of MARC records created using L</marc_duplicate>.
618    
619     print "created ", marc_count(), " records";
620    
621     =cut
622    
623     sub marc_count {
624     return $#{ $marc_record };
625     }
626    
627 dpavlin 1108 =head2 marc_clone
628    
629     Clone marc records from input file, whole or just some fields/indicators
630    
631     marc_clone; # whole record
632    
633     =cut
634    
635     sub marc_clone {
636     foreach my $f ( keys %$rec ) {
637 dpavlin 1109 warn "## marc_clone $f\n" if $debug;
638 dpavlin 1108 marc_original_order( $f, $f );
639     }
640     }
641    
642 dpavlin 1039 =head1 PRIVATE FUNCTIONS
643    
644 dpavlin 1036 =head2 _marc_push
645    
646     _marc_push( $marc );
647    
648     =cut
649    
650     sub _marc_push {
651     my $marc = shift || die "no marc?";
652     push @{ $marc_record->[ $marc_record_offset ] }, $marc;
653     }
654    
655     =head2 _clean
656    
657     Clean internal structures
658    
659     =cut
660    
661     sub _clean {
662 dpavlin 1039 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader, $created_with_marc_template) = ();
663 dpavlin 1036 ($marc_record_offset, $marc_fetch_offset) = (0,0);
664     }
665    
666    
667     =head2 _get_marc_fields
668    
669     Get all fields defined by calls to C<marc>
670    
671     $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
672    
673     We are using I<magic> which detect repeatable fields only from
674     sequence of field/subfield data generated by normalization.
675    
676 dpavlin 1039 This magic is disabled for all records created with C<marc_template>.
677    
678 dpavlin 1036 Repeatable field is created when there is second occurence of same subfield or
679     if any of indicators are different.
680    
681     This is sane for most cases. Something like:
682    
683     900a-1 900b-1 900c-1
684     900a-2 900b-2
685     900a-3
686    
687     will be created from any combination of:
688    
689     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
690    
691     and following rules:
692    
693     marc('900','a', rec('200','a') );
694     marc('900','b', rec('200','b') );
695     marc('900','c', rec('200','c') );
696    
697     which might not be what you have in mind. If you need repeatable subfield,
698     define it using C<marc_repeatable_subfield> like this:
699    
700     marc_repeatable_subfield('900','a');
701     marc('900','a', rec('200','a') );
702     marc('900','b', rec('200','b') );
703     marc('900','c', rec('200','c') );
704    
705     will create:
706    
707     900a-1 900a-2 900a-3 900b-1 900c-1
708     900b-2
709    
710     There is also support for returning next or specific using:
711    
712     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
713     # do something with $mf
714     }
715    
716     will always return fields from next MARC record or
717    
718     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
719    
720     will return 42th copy record (if it exists).
721    
722     =cut
723    
724     my $fetch_pos;
725    
726     sub _get_marc_fields {
727    
728     my $arg = {@_};
729     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
730     $fetch_pos = $marc_fetch_offset;
731     if ($arg->{offset}) {
732     $fetch_pos = $arg->{offset};
733     } elsif($arg->{fetch_next}) {
734     $marc_fetch_offset++;
735     }
736    
737     return if (! $marc_record || ref($marc_record) ne 'ARRAY');
738    
739     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
740    
741     my $marc_rec = $marc_record->[ $fetch_pos ];
742    
743     warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
744    
745     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
746    
747     # first, sort all existing fields
748     # XXX might not be needed, but modern perl might randomize elements in hash
749 dpavlin 1039 # my @sorted_marc_record = sort {
750     # $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
751     # } @{ $marc_rec };
752 dpavlin 1036
753 dpavlin 1040 my @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
754 dpavlin 1036
755     # output marc fields
756     my @m;
757    
758     # count unique field-subfields (used for offset when walking to next subfield)
759     my $u;
760     map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
761    
762     if ($debug) {
763     warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
764     warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
765     warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
766     warn "## subfield count = ", dump( $u ), $/;
767     }
768    
769     my $len = $#sorted_marc_record;
770     my $visited;
771     my $i = 0;
772     my $field;
773    
774 dpavlin 1040 warn "## created_with_marc_template = ",dump( $created_with_marc_template ) if $debug;
775    
776 dpavlin 1036 foreach ( 0 .. $len ) {
777    
778     # find next element which isn't visited
779     while ($visited->{$i}) {
780     $i = ($i + 1) % ($len + 1);
781     }
782    
783     # mark it visited
784     $visited->{$i}++;
785    
786     my $row = dclone( $sorted_marc_record[$i] );
787    
788 dpavlin 1039 if ( $created_with_marc_template->{ $row->[0] } ) {
789     push @m, $row;
790     warn "## copied marc_template created ", dump( $row ),$/ if $debug;
791     next;
792     }
793 dpavlin 1040
794 dpavlin 1036 # field and subfield which is key for
795     # marc_repeatable_subfield and u
796     my $fsf = $row->[0] . ( $row->[3] || '' );
797    
798     if ($debug > 1) {
799    
800     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
801     print "### this [$i]: ", dump( $row ),$/;
802     print "### sf: ", $row->[3], " vs ", $field->[3],
803     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
804     if ($#$field >= 0);
805    
806     }
807    
808     # if field exists
809     if ( $#$field >= 0 ) {
810     if (
811     $row->[0] ne $field->[0] || # field
812     $row->[1] ne $field->[1] || # i1
813     $row->[2] ne $field->[2] # i2
814     ) {
815     push @m, $field;
816     warn "## saved/1 ", dump( $field ),$/ if ($debug);
817     $field = $row;
818    
819     } elsif (
820     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
821     ||
822     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
823     ! $marc_repeatable_subfield->{ $fsf }
824     )
825     ) {
826     push @m, $field;
827     warn "## saved/2 ", dump( $field ),$/ if ($debug);
828     $field = $row;
829    
830     } else {
831     # append new subfields to existing field
832     push @$field, ( $row->[3], $row->[4] );
833     }
834     } else {
835     # insert first field
836     $field = $row;
837     }
838    
839     if (! $marc_repeatable_subfield->{ $fsf }) {
840     # make step to next subfield
841     $i = ($i + $u->{ $fsf } ) % ($len + 1);
842     }
843     }
844    
845     if ($#$field >= 0) {
846     push @m, $field;
847     warn "## saved/3 ", dump( $field ),$/ if ($debug);
848     }
849    
850     return \@m;
851     }
852    
853     =head2 _get_marc_leader
854    
855     Return leader from currently fetched record by L</_get_marc_fields>
856    
857     print WebPAC::Normalize::MARC::_get_marc_leader();
858    
859     =cut
860    
861     sub _get_marc_leader {
862     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
863     return $marc_leader->[ $fetch_pos ];
864     }
865    
866     =head2 _created_marc_records
867    
868     my $nr_records = _created_marc_records;
869    
870     =cut
871    
872     sub _created_marc_records {
873     return $#{ $marc_record } + 1 if $marc_record;
874     }
875    
876 dpavlin 1021 1;

  ViewVC Help
Powered by ViewVC 1.1.26