/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1062 - (hide annotations)
Wed Nov 21 10:09:55 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 22828 byte(s)
 r1661@llin:  dpavlin | 2007-11-21 11:09:52 +0100
 fix marc_template hadling of field value 0

1 dpavlin 1021 package WebPAC::Normalize::MARC;
2     use Exporter 'import';
3 dpavlin 1036 our @EXPORT = qw/
4     marc marc_indicators marc_repeatable_subfield
5     marc_compose marc_leader marc_fixed
6     marc_duplicate marc_remove marc_count
7     marc_original_order
8 dpavlin 1021 marc_template
9     /;
10    
11     use strict;
12     use warnings;
13    
14 dpavlin 1036 use Storable qw/dclone/;
15 dpavlin 1021 use Data::Dump qw/dump/;
16     use Carp qw/confess/;
17    
18     use WebPAC::Normalize;
19    
20 dpavlin 1038 our $debug = 0;
21 dpavlin 1028
22 dpavlin 1036 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
23     my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
24    
25     our $rec;
26    
27 dpavlin 1021 =head1 NAME
28    
29     WebPAC::Normalize::MARC - create MARC/ISO2709 records
30    
31     =cut
32    
33     =head1 FUNCTIONS
34    
35     =head2 marc_template
36    
37     marc_template(
38     from => 225, to => 440,
39     subfields_rename => [
40     'a' => 'a',
41     'x' => 'x',
42     'v' => 'v',
43     'h' => 'n',
44     'i' => 'p',
45     'w' => 'v',
46     ],
47     isis_template => [
48     'a ; |v. |i',
49     'a. |i ; |w',
50     ],
51     marc_template => [
52     'a, |x ; |v. |n, |p ; |v',
53     'a ; |v. |p ; |v',
54     ],
55     );
56    
57 dpavlin 1026 Returns number of records produced.
58    
59 dpavlin 1021 =cut
60    
61 dpavlin 1039 my $created_with_marc_template;
62    
63 dpavlin 1021 sub marc_template {
64     my $args = {@_};
65 dpavlin 1028 warn "## marc_template(",dump($args),")",$/ if $debug;
66 dpavlin 1021
67     foreach ( qw/subfields_rename isis_template marc_template/ ) {
68 dpavlin 1028 # warn "ref($_) = ",ref($args->{$_}) if $debug;
69 dpavlin 1021 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
70     }
71    
72     die "marc_template needs isis_template or marc_template"
73     if ! defined $args->{isis_template} && ! defined $args->{marc_template};
74    
75     my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
76     my $r = $rec->{ $args->{from} } || return;
77     die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
78    
79     my @subfields_rename = @{ $args->{subfields_rename} };
80 dpavlin 1028 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
81 dpavlin 1021
82     confess "need mapping in pairs for subfields_rename"
83     if $#subfields_rename % 2 != 1;
84    
85 dpavlin 1029 my ( $subfields_rename, $from_subfields );
86     our $to_subfields = {};
87 dpavlin 1021 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
88     my ( $f, $t ) = (
89     $from_subfields->{ $from }++,
90     $to_subfields->{ $to }++
91     );
92     $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
93     }
94 dpavlin 1028 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
95     warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
96     warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
97 dpavlin 1021
98 dpavlin 1025 our $_template;
99 dpavlin 1021
100 dpavlin 1026 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
101     $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
102 dpavlin 1025
103 dpavlin 1021 my @marc_out;
104    
105 dpavlin 1023 sub _parse_template {
106 dpavlin 1024 my ( $name, $templates ) = @_;
107 dpavlin 1021
108 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
109 dpavlin 1025
110 dpavlin 1023 foreach my $template ( @{ $templates } ) {
111     our $count = {};
112 dpavlin 1025 our @order = ();
113 dpavlin 1023 sub my_count {
114     my $sf = shift;
115     my $nr = $count->{$sf}++;
116 dpavlin 1025 push @order, [ $sf, $nr ];
117 dpavlin 1023 return $sf . $nr;
118     }
119     my $pos_template = $template;
120     $pos_template =~ s/($fields_re)/my_count($1)/ge;
121     my $count_key = dump( $count );
122 dpavlin 1028 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
123 dpavlin 1024 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
124 dpavlin 1025 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
125 dpavlin 1021 }
126 dpavlin 1028 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
127 dpavlin 1021 }
128    
129 dpavlin 1024 _parse_template( 'marc', $args->{marc_template} );
130     _parse_template( 'isis', $args->{isis_template} );
131 dpavlin 1028 warn "### _template = ",dump( $_template ),$/ if $debug;
132 dpavlin 1023
133 dpavlin 1021 my $m;
134    
135 dpavlin 1044 our $from_rec = $rec->{ $args->{from} };
136 dpavlin 1021
137 dpavlin 1044 foreach my $r ( @$from_rec ) {
138    
139 dpavlin 1039 my $to = $args->{to};
140 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $to );
141 dpavlin 1039 $m = [ $to, $i1, $i2 ];
142 dpavlin 1021
143 dpavlin 1039 $created_with_marc_template->{ $to }++;
144    
145 dpavlin 1028 warn "### r = ",dump( $r ),$/ if $debug;
146 dpavlin 1021
147 dpavlin 1044 my ( $from_mapping, $from_count, $to_count );
148     our $to_mapping;
149 dpavlin 1029 foreach my $from_sf ( keys %{$r} ) {
150 dpavlin 1021 # skip everything which isn't one char subfield (e.g. 'subfields')
151 dpavlin 1029 next unless $from_sf =~ m/^\w$/;
152     my $from_nr = $from_count->{$from_sf}++;
153     my $rename_to = $subfields_rename->{ $from_sf } ||
154     die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
155     my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
156     $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
157 dpavlin 1021
158 dpavlin 1029 my $to_nr2 = $to_count->{ $to_sf }++;
159     $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
160    
161     warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
162 dpavlin 1021 }
163    
164 dpavlin 1029 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
165 dpavlin 1021
166 dpavlin 1025 my $count_key = {
167     from => dump( $from_count ),
168     to => dump( $to_count),
169     };
170 dpavlin 1021
171 dpavlin 1028 warn "### count_key = ",dump( $count_key ),$/ if $debug;
172 dpavlin 1021
173 dpavlin 1025 my $processed_templates = 0;
174 dpavlin 1021
175 dpavlin 1025 # this defines order of traversal
176     foreach ( qw/isis:from marc:to/ ) {
177     my ($name,$count_name) = split(/:/);
178 dpavlin 1021
179 dpavlin 1025 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
180 dpavlin 1021
181 dpavlin 1025 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
182     $processed_templates++;
183 dpavlin 1021
184 dpavlin 1031 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
185 dpavlin 1025
186     our $fill_in = {};
187    
188     my @templates = split(/\|/, $template );
189 dpavlin 1031 @templates = ( $template ) unless @templates;
190 dpavlin 1025
191 dpavlin 1035 warn "### templates = ",dump( @templates ),$/ if $debug;
192 dpavlin 1031
193 dpavlin 1025 foreach my $sf ( @templates ) {
194     sub fill_in {
195 dpavlin 1031 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
196     warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
197 dpavlin 1028 my ( $from_sf, $from_nr );
198 dpavlin 1025 if ( $name eq 'marc' ) {
199 dpavlin 1044 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), "\n>>>> from record ",dump( $r ), "\n>>>> full record = ",dump( $from_rec ) unless defined $to_mapping->{$sf}->[$nr];
200 dpavlin 1026 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
201 dpavlin 1025 } else {
202     ( $from_sf, $from_nr ) = ( $sf, $nr );
203     }
204     my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
205     if ( ref( $v ) eq 'ARRAY' ) {
206 dpavlin 1031 $v = $pre . $v->[$from_nr] . $post;
207 dpavlin 1025 } elsif ( $from_nr == 0 ) {
208 dpavlin 1031 $v = $pre . $v . $post;
209 dpavlin 1025 } else {
210     die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
211     }
212 dpavlin 1031 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
213     $fill_in->{$sf}->[$nr] = $v;
214     return $v;
215 dpavlin 1021 }
216 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
217 dpavlin 1031 warn "#### $sf <<<< $fields_re\n" if $debug;
218     $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
219 dpavlin 1028 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
220 dpavlin 1021 }
221    
222 dpavlin 1028 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
223 dpavlin 1021
224 dpavlin 1025 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
225     my ( $sf, $nr ) = @$sf;
226 dpavlin 1062 my $v = $fill_in->{$sf}->[$nr];
227     die "can't find fill_in $sf/$nr" unless defined $v;
228 dpavlin 1026 if ( $name eq 'isis') {
229     ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
230     }
231 dpavlin 1028 warn "++ $sf/$nr |$v|\n" if $debug;
232 dpavlin 1025 push @$m, ( $sf, $v );
233     }
234 dpavlin 1021
235 dpavlin 1028 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
236 dpavlin 1025
237     push @marc_out, $m;
238 dpavlin 1029
239     last;
240 dpavlin 1025 }
241    
242     die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
243 dpavlin 1028 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
244 dpavlin 1021 }
245    
246    
247 dpavlin 1026 my $recs = 0;
248 dpavlin 1025
249 dpavlin 1021 foreach my $marc ( @marc_out ) {
250 dpavlin 1028 warn "+++ ",dump( $marc ),$/ if $debug;
251 dpavlin 1036 _marc_push( $marc );
252 dpavlin 1026 $recs++;
253 dpavlin 1021 }
254 dpavlin 1026
255 dpavlin 1028 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
256    
257     return $recs;
258 dpavlin 1021 }
259    
260 dpavlin 1036 =head2 marc_leader
261    
262     Setup fields within MARC leader or get leader
263    
264     marc_leader('05','c');
265     my $leader = marc_leader();
266    
267     =cut
268    
269     sub marc_leader {
270     my ($offset,$value) = @_;
271    
272     if ($offset) {
273     $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
274     } else {
275    
276     if (defined($marc_leader)) {
277     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
278     return $marc_leader->[ $marc_record_offset ];
279     } else {
280     return;
281     }
282     }
283     }
284    
285     =head2 marc_fixed
286    
287     Create control/indentifier fields with values in fixed positions
288    
289     marc_fixed('008', 00, '070402');
290     marc_fixed('008', 39, '|');
291    
292     Positions not specified will be filled with spaces (C<0x20>).
293    
294     There will be no effort to extend last specified value to full length of
295     field in standard.
296    
297     =cut
298    
299     sub marc_fixed {
300     my ($f, $pos, $val) = @_;
301     die "need marc(field, position, value)" unless defined($f) && defined($pos);
302    
303     confess "need val" unless defined $val;
304    
305     my $update = 0;
306    
307     map {
308     if ($_->[0] eq $f) {
309     my $old = $_->[1];
310     if (length($old) <= $pos) {
311     $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
312     warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
313     } else {
314     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
315     warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
316     }
317     $update++;
318     }
319     } @{ $marc_record->[ $marc_record_offset ] };
320    
321     if (! $update) {
322     my $v = ' ' x $pos . $val;
323     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
324     warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
325     }
326     }
327    
328     =head2 marc
329    
330     Save value for MARC field
331    
332     marc('900','a', rec('200','a') );
333     marc('001', rec('000') );
334    
335     =cut
336    
337     sub marc {
338     my $f = shift or die "marc needs field";
339     die "marc field must be numer" unless ($f =~ /^\d+$/);
340    
341     my $sf;
342     if ($f >= 10) {
343     $sf = shift or die "marc needs subfield";
344     }
345    
346     foreach (@_) {
347     my $v = $_; # make var read-write for Encode
348     next unless (defined($v) && $v !~ /^\s*$/);
349 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $f );
350 dpavlin 1036 if (defined $sf) {
351     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
352     } else {
353     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
354     }
355     }
356     }
357    
358     =head2 marc_repeatable_subfield
359    
360     Save values for MARC repetable subfield
361    
362     marc_repeatable_subfield('910', 'z', rec('909') );
363    
364     =cut
365    
366     sub marc_repeatable_subfield {
367     my ($f,$sf) = @_;
368     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
369     $marc_repeatable_subfield->{ $f . $sf }++;
370     marc(@_);
371     }
372    
373     =head2 marc_indicators
374    
375     Set both indicators for MARC field
376    
377     marc_indicators('900', ' ', 1);
378    
379     Any indicator value other than C<0-9> will be treated as undefined.
380    
381     =cut
382    
383     sub marc_indicators {
384     my $f = shift || die "marc_indicators need field!\n";
385     my ($i1,$i2) = @_;
386     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
387     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
388    
389     $i1 = ' ' if ($i1 !~ /^\d$/);
390     $i2 = ' ' if ($i2 !~ /^\d$/);
391     @{ $marc_indicators->{$f} } = ($i1,$i2);
392     }
393    
394 dpavlin 1047 sub _get_marc_indicators {
395     my $f = shift || confess "need field!\n";
396     return defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
397     }
398    
399 dpavlin 1036 =head2 marc_compose
400    
401     Save values for each MARC subfield explicitly
402    
403     marc_compose('900',
404     'a', rec('200','a')
405     'b', rec('201','a')
406     'a', rec('200','b')
407     'c', rec('200','c')
408     );
409    
410     If you specify C<+> for subfield, value will be appended
411     to previous defined subfield.
412    
413     =cut
414    
415     sub marc_compose {
416     my $f = shift or die "marc_compose needs field";
417     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
418    
419 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $f );
420 dpavlin 1036 my $m = [ $f, $i1, $i2 ];
421    
422     warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
423    
424     if ($#_ % 2 != 1) {
425     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
426     }
427    
428     while (@_) {
429     my $sf = shift;
430     my $v = shift;
431    
432     next unless (defined($v) && $v !~ /^\s*$/);
433     warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
434     if ($sf ne '+') {
435     push @$m, ( $sf, $v );
436     } else {
437     $m->[ $#$m ] .= $v;
438     }
439     }
440    
441     warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
442    
443     push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
444     }
445    
446     =head2 marc_duplicate
447    
448     Generate copy of current MARC record and continue working on copy
449    
450     marc_duplicate();
451    
452     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
453     C<< _get_marc_fields( offset => 42 ) >>.
454    
455     =cut
456    
457     sub marc_duplicate {
458     my $m = $marc_record->[ -1 ];
459     die "can't duplicate record which isn't defined" unless ($m);
460     push @{ $marc_record }, dclone( $m );
461     push @{ $marc_leader }, dclone( marc_leader() );
462     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
463     $marc_record_offset = $#{ $marc_record };
464     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
465    
466     }
467    
468     =head2 marc_remove
469    
470     Remove some field or subfield from MARC record.
471    
472     marc_remove('200');
473     marc_remove('200','a');
474    
475     This will erase field C<200> or C<200^a> from current MARC record.
476    
477     marc_remove('*');
478    
479     Will remove all fields in current MARC record.
480    
481     This is useful after calling C<marc_duplicate> or on it's own (but, you
482     should probably just remove that subfield definition if you are not
483     using C<marc_duplicate>).
484    
485     FIXME: support fields < 10.
486    
487     =cut
488    
489     sub marc_remove {
490     my ($f, $sf) = @_;
491    
492     die "marc_remove needs record number" unless defined($f);
493    
494     my $marc = $marc_record->[ $marc_record_offset ];
495    
496     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
497    
498     if ($f eq '*') {
499    
500     delete( $marc_record->[ $marc_record_offset ] );
501     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
502    
503     } else {
504    
505     my $i = 0;
506     foreach ( 0 .. $#{ $marc } ) {
507     last unless (defined $marc->[$i]);
508     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
509     if ($marc->[$i]->[0] eq $f) {
510     if (! defined $sf) {
511     # remove whole field
512     splice @$marc, $i, 1;
513     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
514     $i--;
515     } else {
516     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
517     my $o = ($j * 2) + 3;
518     if ($marc->[$i]->[$o] eq $sf) {
519     # remove subfield
520     splice @{$marc->[$i]}, $o, 2;
521     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
522     # is record now empty?
523     if ($#{ $marc->[$i] } == 2) {
524     splice @$marc, $i, 1;
525     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
526     $i--;
527     };
528     }
529     }
530     }
531     }
532     $i++;
533     }
534    
535     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
536    
537     $marc_record->[ $marc_record_offset ] = $marc;
538     }
539    
540     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
541     }
542    
543     =head2 marc_original_order
544    
545     Copy all subfields preserving original order to marc field.
546    
547     marc_original_order( marc_field_number, original_input_field_number );
548    
549     Please note that field numbers are consistent with other commands (marc
550     field number first), but somewhat counter-intuitive (destination and then
551     source).
552    
553     You might want to use this command if you are just renaming subfields or
554     using pre-processing modify_record in C<config.yml> and don't need any
555     post-processing or want to preserve order of original subfields.
556    
557    
558     =cut
559    
560     sub marc_original_order {
561    
562     my ($to, $from) = @_;
563     die "marc_original_order needs from and to fields\n" unless ($from && $to);
564    
565     return unless defined($rec->{$from});
566    
567     my $r = $rec->{$from};
568     die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
569    
570 dpavlin 1047 my ($i1,$i2) = _get_marc_indicators( $to );
571 dpavlin 1036 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
572    
573     foreach my $d (@$r) {
574    
575     if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
576     warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
577     next;
578     }
579    
580     my @sfs = @{ $d->{subfields} };
581    
582     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
583    
584     warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
585    
586     my $m = [ $to, $i1, $i2 ];
587    
588     while (my $sf = shift @sfs) {
589    
590     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
591     my $offset = shift @sfs;
592     die "corrupted sufields specification for field $from\n" unless defined($offset);
593    
594     my $v;
595     if (ref($d->{$sf}) eq 'ARRAY') {
596     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
597     } elsif ($offset == 0) {
598     $v = $d->{$sf};
599     } else {
600     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
601     }
602     push @$m, ( $sf, $v ) if (defined($v));
603     }
604    
605     if ($#{$m} > 2) {
606     push @{ $marc_record->[ $marc_record_offset ] }, $m;
607     }
608     }
609    
610     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
611     }
612    
613    
614     =head2 marc_count
615    
616     Return number of MARC records created using L</marc_duplicate>.
617    
618     print "created ", marc_count(), " records";
619    
620     =cut
621    
622     sub marc_count {
623     return $#{ $marc_record };
624     }
625    
626 dpavlin 1039 =head1 PRIVATE FUNCTIONS
627    
628 dpavlin 1036 =head2 _marc_push
629    
630     _marc_push( $marc );
631    
632     =cut
633    
634     sub _marc_push {
635     my $marc = shift || die "no marc?";
636     push @{ $marc_record->[ $marc_record_offset ] }, $marc;
637     }
638    
639     =head2 _clean
640    
641     Clean internal structures
642    
643     =cut
644    
645     sub _clean {
646 dpavlin 1039 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader, $created_with_marc_template) = ();
647 dpavlin 1036 ($marc_record_offset, $marc_fetch_offset) = (0,0);
648     }
649    
650    
651     =head2 _get_marc_fields
652    
653     Get all fields defined by calls to C<marc>
654    
655     $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
656    
657     We are using I<magic> which detect repeatable fields only from
658     sequence of field/subfield data generated by normalization.
659    
660 dpavlin 1039 This magic is disabled for all records created with C<marc_template>.
661    
662 dpavlin 1036 Repeatable field is created when there is second occurence of same subfield or
663     if any of indicators are different.
664    
665     This is sane for most cases. Something like:
666    
667     900a-1 900b-1 900c-1
668     900a-2 900b-2
669     900a-3
670    
671     will be created from any combination of:
672    
673     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
674    
675     and following rules:
676    
677     marc('900','a', rec('200','a') );
678     marc('900','b', rec('200','b') );
679     marc('900','c', rec('200','c') );
680    
681     which might not be what you have in mind. If you need repeatable subfield,
682     define it using C<marc_repeatable_subfield> like this:
683    
684     marc_repeatable_subfield('900','a');
685     marc('900','a', rec('200','a') );
686     marc('900','b', rec('200','b') );
687     marc('900','c', rec('200','c') );
688    
689     will create:
690    
691     900a-1 900a-2 900a-3 900b-1 900c-1
692     900b-2
693    
694     There is also support for returning next or specific using:
695    
696     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
697     # do something with $mf
698     }
699    
700     will always return fields from next MARC record or
701    
702     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
703    
704     will return 42th copy record (if it exists).
705    
706     =cut
707    
708     my $fetch_pos;
709    
710     sub _get_marc_fields {
711    
712     my $arg = {@_};
713     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
714     $fetch_pos = $marc_fetch_offset;
715     if ($arg->{offset}) {
716     $fetch_pos = $arg->{offset};
717     } elsif($arg->{fetch_next}) {
718     $marc_fetch_offset++;
719     }
720    
721     return if (! $marc_record || ref($marc_record) ne 'ARRAY');
722    
723     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
724    
725     my $marc_rec = $marc_record->[ $fetch_pos ];
726    
727     warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
728    
729     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
730    
731     # first, sort all existing fields
732     # XXX might not be needed, but modern perl might randomize elements in hash
733 dpavlin 1039 # my @sorted_marc_record = sort {
734     # $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
735     # } @{ $marc_rec };
736 dpavlin 1036
737 dpavlin 1040 my @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
738 dpavlin 1036
739     # output marc fields
740     my @m;
741    
742     # count unique field-subfields (used for offset when walking to next subfield)
743     my $u;
744     map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
745    
746     if ($debug) {
747     warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
748     warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
749     warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
750     warn "## subfield count = ", dump( $u ), $/;
751     }
752    
753     my $len = $#sorted_marc_record;
754     my $visited;
755     my $i = 0;
756     my $field;
757    
758 dpavlin 1040 warn "## created_with_marc_template = ",dump( $created_with_marc_template ) if $debug;
759    
760 dpavlin 1036 foreach ( 0 .. $len ) {
761    
762     # find next element which isn't visited
763     while ($visited->{$i}) {
764     $i = ($i + 1) % ($len + 1);
765     }
766    
767     # mark it visited
768     $visited->{$i}++;
769    
770     my $row = dclone( $sorted_marc_record[$i] );
771    
772 dpavlin 1039 if ( $created_with_marc_template->{ $row->[0] } ) {
773     push @m, $row;
774     warn "## copied marc_template created ", dump( $row ),$/ if $debug;
775     next;
776     }
777 dpavlin 1040
778 dpavlin 1036 # field and subfield which is key for
779     # marc_repeatable_subfield and u
780     my $fsf = $row->[0] . ( $row->[3] || '' );
781    
782     if ($debug > 1) {
783    
784     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
785     print "### this [$i]: ", dump( $row ),$/;
786     print "### sf: ", $row->[3], " vs ", $field->[3],
787     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
788     if ($#$field >= 0);
789    
790     }
791    
792     # if field exists
793     if ( $#$field >= 0 ) {
794     if (
795     $row->[0] ne $field->[0] || # field
796     $row->[1] ne $field->[1] || # i1
797     $row->[2] ne $field->[2] # i2
798     ) {
799     push @m, $field;
800     warn "## saved/1 ", dump( $field ),$/ if ($debug);
801     $field = $row;
802    
803     } elsif (
804     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
805     ||
806     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
807     ! $marc_repeatable_subfield->{ $fsf }
808     )
809     ) {
810     push @m, $field;
811     warn "## saved/2 ", dump( $field ),$/ if ($debug);
812     $field = $row;
813    
814     } else {
815     # append new subfields to existing field
816     push @$field, ( $row->[3], $row->[4] );
817     }
818     } else {
819     # insert first field
820     $field = $row;
821     }
822    
823     if (! $marc_repeatable_subfield->{ $fsf }) {
824     # make step to next subfield
825     $i = ($i + $u->{ $fsf } ) % ($len + 1);
826     }
827     }
828    
829     if ($#$field >= 0) {
830     push @m, $field;
831     warn "## saved/3 ", dump( $field ),$/ if ($debug);
832     }
833    
834     return \@m;
835     }
836    
837     =head2 _get_marc_leader
838    
839     Return leader from currently fetched record by L</_get_marc_fields>
840    
841     print WebPAC::Normalize::MARC::_get_marc_leader();
842    
843     =cut
844    
845     sub _get_marc_leader {
846     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
847     return $marc_leader->[ $fetch_pos ];
848     }
849    
850     =head2 _created_marc_records
851    
852     my $nr_records = _created_marc_records;
853    
854     =cut
855    
856     sub _created_marc_records {
857     return $#{ $marc_record } + 1 if $marc_record;
858     }
859    
860 dpavlin 1021 1;

  ViewVC Help
Powered by ViewVC 1.1.26