/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1368 - (hide annotations)
Mon Mar 12 19:00:00 2012 UTC (12 years, 2 months ago) by dpavlin
File size: 23577 byte(s)
marc_original_order support for leader and fixed fields

1 dpavlin 1021 package WebPAC::Normalize::MARC;
2     use Exporter 'import';
3 dpavlin 1036 our @EXPORT = qw/
4     marc marc_indicators marc_repeatable_subfield
5     marc_compose marc_leader marc_fixed
6     marc_duplicate marc_remove marc_count
7     marc_original_order
8 dpavlin 1021 marc_template
9 dpavlin 1108 marc_clone
10 dpavlin 1021 /;
11    
12     use strict;
13     use warnings;
14    
15 dpavlin 1036 use Storable qw/dclone/;
16 dpavlin 1021 use Data::Dump qw/dump/;
17     use Carp qw/confess/;
18    
19     use WebPAC::Normalize;
20    
21 dpavlin 1038 our $debug = 0;
22 dpavlin 1028
23 dpavlin 1036 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
24     my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
25    
26     our $rec;
27    
28 dpavlin 1021 =head1 NAME
29    
30     WebPAC::Normalize::MARC - create MARC/ISO2709 records
31    
32     =cut
33    
34     =head1 FUNCTIONS
35    
36     =head2 marc_template
37    
38     marc_template(
39     from => 225, to => 440,
40     subfields_rename => [
41     'a' => 'a',
42     'x' => 'x',
43     'v' => 'v',
44     'h' => 'n',
45     'i' => 'p',
46     'w' => 'v',
47     ],
48     isis_template => [
49     'a ; |v. |i',
50     'a. |i ; |w',
51     ],
52     marc_template => [
53     'a, |x ; |v. |n, |p ; |v',
54     'a ; |v. |p ; |v',
55     ],
56     );
57    
58 dpavlin 1026 Returns number of records produced.
59    
60 dpavlin 1021 =cut
61    
62 dpavlin 1039 my $created_with_marc_template;
63    
64 dpavlin 1021 sub marc_template {
65     my $args = {@_};
66 dpavlin 1028 warn "## marc_template(",dump($args),")",$/ if $debug;
67 dpavlin 1021
68     foreach ( qw/subfields_rename isis_template marc_template/ ) {
69 dpavlin 1028 # warn "ref($_) = ",ref($args->{$_}) if $debug;
70 dpavlin 1021 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
71     }
72    
73     die "marc_template needs isis_template or marc_template"
74     if ! defined $args->{isis_template} && ! defined $args->{marc_template};
75    
76     my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
77     my $r = $rec->{ $args->{from} } || return;
78     die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
79    
80     my @subfields_rename = @{ $args->{subfields_rename} };
81 dpavlin 1028 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
82 dpavlin 1021
83     confess "need mapping in pairs for subfields_rename"
84     if $#subfields_rename % 2 != 1;
85    
86 dpavlin 1029 my ( $subfields_rename, $from_subfields );
87     our $to_subfields = {};
88 dpavlin 1021 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
89     my ( $f, $t ) = (
90     $from_subfields->{ $from }++,
91     $to_subfields->{ $to }++
92     );
93     $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
94     }
95 dpavlin 1028 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
96     warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
97     warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
98 dpavlin 1021
99 dpavlin 1025 our $_template;
100 dpavlin 1021
101 dpavlin 1026 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
102     $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
103 dpavlin 1025
104 dpavlin 1021 my @marc_out;
105    
106 dpavlin 1023 sub _parse_template {
107 dpavlin 1024 my ( $name, $templates ) = @_;
108 dpavlin 1021
109 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
110 dpavlin 1025
111 dpavlin 1023 foreach my $template ( @{ $templates } ) {
112     our $count = {};
113 dpavlin 1025 our @order = ();
114 dpavlin 1023 sub my_count {
115     my $sf = shift;
116     my $nr = $count->{$sf}++;
117 dpavlin 1025 push @order, [ $sf, $nr ];
118 dpavlin 1023 return $sf . $nr;
119     }
120     my $pos_template = $template;
121     $pos_template =~ s/($fields_re)/my_count($1)/ge;
122     my $count_key = dump( $count );
123 dpavlin 1028 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
124 dpavlin 1024 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
125 dpavlin 1025 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
126 dpavlin 1021 }
127 dpavlin 1028 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
128 dpavlin 1021 }
129    
130 dpavlin 1024 _parse_template( 'marc', $args->{marc_template} );
131     _parse_template( 'isis', $args->{isis_template} );
132 dpavlin 1028 warn "### _template = ",dump( $_template ),$/ if $debug;
133 dpavlin 1023
134 dpavlin 1021 my $m;
135    
136 dpavlin 1044 our $from_rec = $rec->{ $args->{from} };
137 dpavlin 1021
138 dpavlin 1044 foreach my $r ( @$from_rec ) {
139    
140 dpavlin 1039 my $to = $args->{to};
141 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $to );
142 dpavlin 1039 $m = [ $to, $i1, $i2 ];
143 dpavlin 1021
144 dpavlin 1039 $created_with_marc_template->{ $to }++;
145    
146 dpavlin 1028 warn "### r = ",dump( $r ),$/ if $debug;
147 dpavlin 1021
148 dpavlin 1044 my ( $from_mapping, $from_count, $to_count );
149     our $to_mapping;
150 dpavlin 1029 foreach my $from_sf ( keys %{$r} ) {
151 dpavlin 1021 # skip everything which isn't one char subfield (e.g. 'subfields')
152 dpavlin 1029 next unless $from_sf =~ m/^\w$/;
153     my $from_nr = $from_count->{$from_sf}++;
154     my $rename_to = $subfields_rename->{ $from_sf } ||
155     die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
156     my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
157     $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
158 dpavlin 1021
159 dpavlin 1029 my $to_nr2 = $to_count->{ $to_sf }++;
160     $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
161    
162     warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
163 dpavlin 1021 }
164    
165 dpavlin 1029 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
166 dpavlin 1021
167 dpavlin 1025 my $count_key = {
168     from => dump( $from_count ),
169     to => dump( $to_count),
170     };
171 dpavlin 1021
172 dpavlin 1028 warn "### count_key = ",dump( $count_key ),$/ if $debug;
173 dpavlin 1021
174 dpavlin 1025 my $processed_templates = 0;
175 dpavlin 1021
176 dpavlin 1025 # this defines order of traversal
177     foreach ( qw/isis:from marc:to/ ) {
178     my ($name,$count_name) = split(/:/);
179 dpavlin 1021
180 dpavlin 1025 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
181 dpavlin 1021
182 dpavlin 1025 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
183     $processed_templates++;
184 dpavlin 1021
185 dpavlin 1031 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
186 dpavlin 1025
187     our $fill_in = {};
188    
189     my @templates = split(/\|/, $template );
190 dpavlin 1031 @templates = ( $template ) unless @templates;
191 dpavlin 1025
192 dpavlin 1035 warn "### templates = ",dump( @templates ),$/ if $debug;
193 dpavlin 1031
194 dpavlin 1025 foreach my $sf ( @templates ) {
195     sub fill_in {
196 dpavlin 1031 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
197     warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
198 dpavlin 1028 my ( $from_sf, $from_nr );
199 dpavlin 1025 if ( $name eq 'marc' ) {
200 dpavlin 1044 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), "\n>>>> from record ",dump( $r ), "\n>>>> full record = ",dump( $from_rec ) unless defined $to_mapping->{$sf}->[$nr];
201 dpavlin 1026 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
202 dpavlin 1025 } else {
203     ( $from_sf, $from_nr ) = ( $sf, $nr );
204     }
205     my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
206     if ( ref( $v ) eq 'ARRAY' ) {
207 dpavlin 1031 $v = $pre . $v->[$from_nr] . $post;
208 dpavlin 1025 } elsif ( $from_nr == 0 ) {
209 dpavlin 1031 $v = $pre . $v . $post;
210 dpavlin 1025 } else {
211     die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
212     }
213 dpavlin 1031 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
214     $fill_in->{$sf}->[$nr] = $v;
215     return $v;
216 dpavlin 1021 }
217 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
218 dpavlin 1031 warn "#### $sf <<<< $fields_re\n" if $debug;
219     $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
220 dpavlin 1028 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
221 dpavlin 1021 }
222    
223 dpavlin 1028 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
224 dpavlin 1021
225 dpavlin 1025 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
226     my ( $sf, $nr ) = @$sf;
227 dpavlin 1062 my $v = $fill_in->{$sf}->[$nr];
228     die "can't find fill_in $sf/$nr" unless defined $v;
229 dpavlin 1026 if ( $name eq 'isis') {
230     ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
231     }
232 dpavlin 1028 warn "++ $sf/$nr |$v|\n" if $debug;
233 dpavlin 1025 push @$m, ( $sf, $v );
234     }
235 dpavlin 1021
236 dpavlin 1028 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
237 dpavlin 1025
238     push @marc_out, $m;
239 dpavlin 1029
240     last;
241 dpavlin 1025 }
242    
243     die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
244 dpavlin 1028 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
245 dpavlin 1021 }
246    
247    
248 dpavlin 1026 my $recs = 0;
249 dpavlin 1025
250 dpavlin 1021 foreach my $marc ( @marc_out ) {
251 dpavlin 1028 warn "+++ ",dump( $marc ),$/ if $debug;
252 dpavlin 1036 _marc_push( $marc );
253 dpavlin 1026 $recs++;
254 dpavlin 1021 }
255 dpavlin 1026
256 dpavlin 1028 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
257    
258     return $recs;
259 dpavlin 1021 }
260    
261 dpavlin 1036 =head2 marc_leader
262    
263     Setup fields within MARC leader or get leader
264    
265     marc_leader('05','c');
266     my $leader = marc_leader();
267    
268     =cut
269    
270     sub marc_leader {
271     my ($offset,$value) = @_;
272    
273 dpavlin 1368 if ( defined $offset && defined $value ) {
274 dpavlin 1036 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
275     } else {
276    
277     if (defined($marc_leader)) {
278     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
279     return $marc_leader->[ $marc_record_offset ];
280     } else {
281     return;
282     }
283     }
284     }
285    
286     =head2 marc_fixed
287    
288     Create control/indentifier fields with values in fixed positions
289    
290     marc_fixed('008', 00, '070402');
291     marc_fixed('008', 39, '|');
292    
293     Positions not specified will be filled with spaces (C<0x20>).
294    
295     There will be no effort to extend last specified value to full length of
296     field in standard.
297    
298     =cut
299    
300     sub marc_fixed {
301     my ($f, $pos, $val) = @_;
302     die "need marc(field, position, value)" unless defined($f) && defined($pos);
303    
304     confess "need val" unless defined $val;
305    
306     my $update = 0;
307    
308     map {
309     if ($_->[0] eq $f) {
310     my $old = $_->[1];
311     if (length($old) <= $pos) {
312     $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
313     warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
314 dpavlin 1362 } elsif ( defined $old ) {
315 dpavlin 1036 $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
316     warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
317     }
318     $update++;
319     }
320     } @{ $marc_record->[ $marc_record_offset ] };
321    
322     if (! $update) {
323     my $v = ' ' x $pos . $val;
324     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
325     warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
326     }
327     }
328    
329     =head2 marc
330    
331     Save value for MARC field
332    
333     marc('900','a', rec('200','a') );
334     marc('001', rec('000') );
335    
336     =cut
337    
338     sub marc {
339     my $f = shift or die "marc needs field";
340     die "marc field must be numer" unless ($f =~ /^\d+$/);
341    
342     my $sf;
343     if ($f >= 10) {
344     $sf = shift or die "marc needs subfield";
345     }
346    
347     foreach (@_) {
348     my $v = $_; # make var read-write for Encode
349 dpavlin 1362 #Encode::_utf8_on($v); # FIXME we probably need this
350 dpavlin 1036 next unless (defined($v) && $v !~ /^\s*$/);
351 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $f );
352 dpavlin 1036 if (defined $sf) {
353     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
354     } else {
355     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
356     }
357     }
358     }
359    
360     =head2 marc_repeatable_subfield
361    
362     Save values for MARC repetable subfield
363    
364     marc_repeatable_subfield('910', 'z', rec('909') );
365    
366     =cut
367    
368     sub marc_repeatable_subfield {
369     my ($f,$sf) = @_;
370     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
371     $marc_repeatable_subfield->{ $f . $sf }++;
372     marc(@_);
373     }
374    
375     =head2 marc_indicators
376    
377     Set both indicators for MARC field
378    
379     marc_indicators('900', ' ', 1);
380    
381     Any indicator value other than C<0-9> will be treated as undefined.
382    
383     =cut
384    
385     sub marc_indicators {
386     my $f = shift || die "marc_indicators need field!\n";
387     my ($i1,$i2) = @_;
388     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
389     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
390    
391     $i1 = ' ' if ($i1 !~ /^\d$/);
392     $i2 = ' ' if ($i2 !~ /^\d$/);
393     @{ $marc_indicators->{$f} } = ($i1,$i2);
394     }
395    
396 dpavlin 1047 sub _get_marc_indicators {
397     my $f = shift || confess "need field!\n";
398     return defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
399     }
400    
401 dpavlin 1036 =head2 marc_compose
402    
403     Save values for each MARC subfield explicitly
404    
405     marc_compose('900',
406     'a', rec('200','a')
407     'b', rec('201','a')
408     'a', rec('200','b')
409     'c', rec('200','c')
410     );
411    
412     If you specify C<+> for subfield, value will be appended
413     to previous defined subfield.
414    
415     =cut
416    
417     sub marc_compose {
418     my $f = shift or die "marc_compose needs field";
419     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
420    
421 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $f );
422 dpavlin 1036 my $m = [ $f, $i1, $i2 ];
423    
424     warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
425    
426     if ($#_ % 2 != 1) {
427     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
428     }
429    
430     while (@_) {
431     my $sf = shift;
432     my $v = shift;
433    
434     next unless (defined($v) && $v !~ /^\s*$/);
435     warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
436     if ($sf ne '+') {
437     push @$m, ( $sf, $v );
438     } else {
439     $m->[ $#$m ] .= $v;
440     }
441     }
442    
443     warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
444    
445     push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
446     }
447    
448     =head2 marc_duplicate
449    
450     Generate copy of current MARC record and continue working on copy
451    
452     marc_duplicate();
453    
454     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
455     C<< _get_marc_fields( offset => 42 ) >>.
456    
457     =cut
458    
459     sub marc_duplicate {
460     my $m = $marc_record->[ -1 ];
461     die "can't duplicate record which isn't defined" unless ($m);
462     push @{ $marc_record }, dclone( $m );
463     push @{ $marc_leader }, dclone( marc_leader() );
464     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
465     $marc_record_offset = $#{ $marc_record };
466     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
467    
468     }
469    
470     =head2 marc_remove
471    
472     Remove some field or subfield from MARC record.
473    
474     marc_remove('200');
475     marc_remove('200','a');
476    
477     This will erase field C<200> or C<200^a> from current MARC record.
478    
479     marc_remove('*');
480    
481     Will remove all fields in current MARC record.
482    
483     This is useful after calling C<marc_duplicate> or on it's own (but, you
484     should probably just remove that subfield definition if you are not
485     using C<marc_duplicate>).
486    
487     FIXME: support fields < 10.
488    
489     =cut
490    
491     sub marc_remove {
492     my ($f, $sf) = @_;
493    
494     die "marc_remove needs record number" unless defined($f);
495    
496     my $marc = $marc_record->[ $marc_record_offset ];
497    
498     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
499    
500     if ($f eq '*') {
501    
502     delete( $marc_record->[ $marc_record_offset ] );
503     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
504    
505     } else {
506    
507     my $i = 0;
508     foreach ( 0 .. $#{ $marc } ) {
509     last unless (defined $marc->[$i]);
510     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
511     if ($marc->[$i]->[0] eq $f) {
512     if (! defined $sf) {
513     # remove whole field
514     splice @$marc, $i, 1;
515     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
516     $i--;
517     } else {
518     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
519     my $o = ($j * 2) + 3;
520     if ($marc->[$i]->[$o] eq $sf) {
521     # remove subfield
522     splice @{$marc->[$i]}, $o, 2;
523     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
524     # is record now empty?
525     if ($#{ $marc->[$i] } == 2) {
526     splice @$marc, $i, 1;
527     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
528     $i--;
529     };
530     }
531     }
532     }
533     }
534     $i++;
535     }
536    
537     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
538    
539     $marc_record->[ $marc_record_offset ] = $marc;
540     }
541    
542     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
543     }
544    
545     =head2 marc_original_order
546    
547     Copy all subfields preserving original order to marc field.
548    
549     marc_original_order( marc_field_number, original_input_field_number );
550    
551     Please note that field numbers are consistent with other commands (marc
552     field number first), but somewhat counter-intuitive (destination and then
553     source).
554    
555     You might want to use this command if you are just renaming subfields or
556     using pre-processing modify_record in C<config.yml> and don't need any
557     post-processing or want to preserve order of original subfields.
558    
559    
560     =cut
561    
562     sub marc_original_order {
563    
564     my ($to, $from) = @_;
565     die "marc_original_order needs from and to fields\n" unless ($from && $to);
566    
567     return unless defined($rec->{$from});
568    
569     my $r = $rec->{$from};
570     die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
571    
572     warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
573    
574     foreach my $d (@$r) {
575    
576 dpavlin 1111 if ( ! ref($d) ) {
577     # scalar
578 dpavlin 1368 if ( $to eq 'leader' ) {
579     marc_leader( 0, $d );
580     } elsif ( $to < 100 ) {
581     marc_fixed( $to, 0, $d );
582     } else {
583     warn "## marc_original_order($to,$from) skipped: ",dump( $d );
584     }
585 dpavlin 1111 next;
586     }
587    
588 dpavlin 1036 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
589     warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
590     next;
591     }
592    
593     my @sfs = @{ $d->{subfields} };
594    
595     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
596    
597     warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
598    
599 dpavlin 1368 my ($i1,$i2) = _get_marc_indicators( $from );
600     $i1 = $d->{i1} if exists $d->{i1};
601     $i2 = $d->{i2} if exists $d->{i2};
602    
603 dpavlin 1036 my $m = [ $to, $i1, $i2 ];
604    
605     while (my $sf = shift @sfs) {
606    
607     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
608     my $offset = shift @sfs;
609     die "corrupted sufields specification for field $from\n" unless defined($offset);
610    
611     my $v;
612     if (ref($d->{$sf}) eq 'ARRAY') {
613     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
614     } elsif ($offset == 0) {
615     $v = $d->{$sf};
616     } else {
617 dpavlin 1111 die "field $from subfield '$sf' need occurence $offset which doesn't exist in ", dump($d);
618 dpavlin 1036 }
619     push @$m, ( $sf, $v ) if (defined($v));
620     }
621    
622     if ($#{$m} > 2) {
623     push @{ $marc_record->[ $marc_record_offset ] }, $m;
624     }
625     }
626    
627     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
628     }
629    
630    
631     =head2 marc_count
632    
633     Return number of MARC records created using L</marc_duplicate>.
634    
635     print "created ", marc_count(), " records";
636    
637     =cut
638    
639     sub marc_count {
640     return $#{ $marc_record };
641     }
642    
643 dpavlin 1108 =head2 marc_clone
644    
645     Clone marc records from input file, whole or just some fields/indicators
646    
647     marc_clone; # whole record
648    
649     =cut
650    
651     sub marc_clone {
652 dpavlin 1111 warn "### marc_clone rec: ",dump( $rec ) if $debug > 2;
653 dpavlin 1368 foreach my $f ( sort keys %$rec ) {
654 dpavlin 1109 warn "## marc_clone $f\n" if $debug;
655 dpavlin 1108 marc_original_order( $f, $f );
656     }
657     }
658    
659 dpavlin 1039 =head1 PRIVATE FUNCTIONS
660    
661 dpavlin 1036 =head2 _marc_push
662    
663     _marc_push( $marc );
664    
665     =cut
666    
667     sub _marc_push {
668     my $marc = shift || die "no marc?";
669     push @{ $marc_record->[ $marc_record_offset ] }, $marc;
670     }
671    
672     =head2 _clean
673    
674     Clean internal structures
675    
676     =cut
677    
678     sub _clean {
679 dpavlin 1039 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader, $created_with_marc_template) = ();
680 dpavlin 1036 ($marc_record_offset, $marc_fetch_offset) = (0,0);
681     }
682    
683    
684     =head2 _get_marc_fields
685    
686     Get all fields defined by calls to C<marc>
687    
688     $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
689    
690     We are using I<magic> which detect repeatable fields only from
691     sequence of field/subfield data generated by normalization.
692    
693 dpavlin 1039 This magic is disabled for all records created with C<marc_template>.
694    
695 dpavlin 1036 Repeatable field is created when there is second occurence of same subfield or
696     if any of indicators are different.
697    
698     This is sane for most cases. Something like:
699    
700     900a-1 900b-1 900c-1
701     900a-2 900b-2
702     900a-3
703    
704     will be created from any combination of:
705    
706     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
707    
708     and following rules:
709    
710     marc('900','a', rec('200','a') );
711     marc('900','b', rec('200','b') );
712     marc('900','c', rec('200','c') );
713    
714     which might not be what you have in mind. If you need repeatable subfield,
715     define it using C<marc_repeatable_subfield> like this:
716    
717     marc_repeatable_subfield('900','a');
718     marc('900','a', rec('200','a') );
719     marc('900','b', rec('200','b') );
720     marc('900','c', rec('200','c') );
721    
722     will create:
723    
724     900a-1 900a-2 900a-3 900b-1 900c-1
725     900b-2
726    
727     There is also support for returning next or specific using:
728    
729     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
730     # do something with $mf
731     }
732    
733     will always return fields from next MARC record or
734    
735     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
736    
737     will return 42th copy record (if it exists).
738    
739     =cut
740    
741     my $fetch_pos;
742    
743     sub _get_marc_fields {
744    
745     my $arg = {@_};
746     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
747     $fetch_pos = $marc_fetch_offset;
748     if ($arg->{offset}) {
749     $fetch_pos = $arg->{offset};
750     } elsif($arg->{fetch_next}) {
751     $marc_fetch_offset++;
752     }
753    
754     return if (! $marc_record || ref($marc_record) ne 'ARRAY');
755    
756     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
757    
758     my $marc_rec = $marc_record->[ $fetch_pos ];
759    
760     warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
761    
762     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
763    
764     # first, sort all existing fields
765     # XXX might not be needed, but modern perl might randomize elements in hash
766 dpavlin 1039 # my @sorted_marc_record = sort {
767     # $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
768     # } @{ $marc_rec };
769 dpavlin 1036
770 dpavlin 1040 my @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
771 dpavlin 1036
772     # output marc fields
773     my @m;
774    
775     # count unique field-subfields (used for offset when walking to next subfield)
776     my $u;
777     map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
778    
779     if ($debug) {
780     warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
781     warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
782     warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
783     warn "## subfield count = ", dump( $u ), $/;
784     }
785    
786     my $len = $#sorted_marc_record;
787     my $visited;
788     my $i = 0;
789     my $field;
790    
791 dpavlin 1040 warn "## created_with_marc_template = ",dump( $created_with_marc_template ) if $debug;
792    
793 dpavlin 1036 foreach ( 0 .. $len ) {
794    
795     # find next element which isn't visited
796     while ($visited->{$i}) {
797     $i = ($i + 1) % ($len + 1);
798     }
799    
800     # mark it visited
801     $visited->{$i}++;
802    
803     my $row = dclone( $sorted_marc_record[$i] );
804    
805 dpavlin 1039 if ( $created_with_marc_template->{ $row->[0] } ) {
806     push @m, $row;
807     warn "## copied marc_template created ", dump( $row ),$/ if $debug;
808     next;
809     }
810 dpavlin 1040
811 dpavlin 1036 # field and subfield which is key for
812     # marc_repeatable_subfield and u
813     my $fsf = $row->[0] . ( $row->[3] || '' );
814    
815     if ($debug > 1) {
816    
817     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
818     print "### this [$i]: ", dump( $row ),$/;
819     print "### sf: ", $row->[3], " vs ", $field->[3],
820     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
821     if ($#$field >= 0);
822    
823     }
824    
825     # if field exists
826     if ( $#$field >= 0 ) {
827     if (
828     $row->[0] ne $field->[0] || # field
829     $row->[1] ne $field->[1] || # i1
830     $row->[2] ne $field->[2] # i2
831     ) {
832     push @m, $field;
833     warn "## saved/1 ", dump( $field ),$/ if ($debug);
834     $field = $row;
835    
836     } elsif (
837     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
838     ||
839     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
840     ! $marc_repeatable_subfield->{ $fsf }
841     )
842     ) {
843     push @m, $field;
844     warn "## saved/2 ", dump( $field ),$/ if ($debug);
845     $field = $row;
846    
847     } else {
848     # append new subfields to existing field
849     push @$field, ( $row->[3], $row->[4] );
850     }
851     } else {
852     # insert first field
853     $field = $row;
854     }
855    
856     if (! $marc_repeatable_subfield->{ $fsf }) {
857     # make step to next subfield
858     $i = ($i + $u->{ $fsf } ) % ($len + 1);
859     }
860     }
861    
862     if ($#$field >= 0) {
863     push @m, $field;
864     warn "## saved/3 ", dump( $field ),$/ if ($debug);
865     }
866    
867     return \@m;
868     }
869    
870     =head2 _get_marc_leader
871    
872     Return leader from currently fetched record by L</_get_marc_fields>
873    
874     print WebPAC::Normalize::MARC::_get_marc_leader();
875    
876     =cut
877    
878     sub _get_marc_leader {
879     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
880     return $marc_leader->[ $fetch_pos ];
881     }
882    
883     =head2 _created_marc_records
884    
885     my $nr_records = _created_marc_records;
886    
887     =cut
888    
889     sub _created_marc_records {
890     return $#{ $marc_record } + 1 if $marc_record;
891     }
892    
893 dpavlin 1021 1;

  ViewVC Help
Powered by ViewVC 1.1.26