/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1036 - (hide annotations)
Mon Nov 12 11:10:48 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 22281 byte(s)
 r1610@llin:  dpavlin | 2007-11-12 12:10:45 +0100
 split MARC handling routines into WebPAC::Normalize::MARC [2.31]

1 dpavlin 1021 package WebPAC::Normalize::MARC;
2     use Exporter 'import';
3 dpavlin 1036 our @EXPORT = qw/
4     marc marc_indicators marc_repeatable_subfield
5     marc_compose marc_leader marc_fixed
6     marc_duplicate marc_remove marc_count
7     marc_original_order
8 dpavlin 1021 marc_template
9     /;
10    
11     use strict;
12     use warnings;
13    
14 dpavlin 1036 use Storable qw/dclone/;
15 dpavlin 1021 use Data::Dump qw/dump/;
16     use Carp qw/confess/;
17    
18     use WebPAC::Normalize;
19    
20 dpavlin 1036 our $debug = 42;
21 dpavlin 1028
22 dpavlin 1036 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
23     my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
24    
25     our $rec;
26    
27 dpavlin 1021 =head1 NAME
28    
29     WebPAC::Normalize::MARC - create MARC/ISO2709 records
30    
31     =cut
32    
33     =head1 FUNCTIONS
34    
35     =head2 marc_template
36    
37     marc_template(
38     from => 225, to => 440,
39     subfields_rename => [
40     'a' => 'a',
41     'x' => 'x',
42     'v' => 'v',
43     'h' => 'n',
44     'i' => 'p',
45     'w' => 'v',
46     ],
47     isis_template => [
48     'a ; |v. |i',
49     'a. |i ; |w',
50     ],
51     marc_template => [
52     'a, |x ; |v. |n, |p ; |v',
53     'a ; |v. |p ; |v',
54     ],
55     );
56    
57 dpavlin 1026 Returns number of records produced.
58    
59 dpavlin 1021 =cut
60    
61     sub marc_template {
62     my $args = {@_};
63 dpavlin 1028 warn "## marc_template(",dump($args),")",$/ if $debug;
64 dpavlin 1021
65     foreach ( qw/subfields_rename isis_template marc_template/ ) {
66 dpavlin 1028 # warn "ref($_) = ",ref($args->{$_}) if $debug;
67 dpavlin 1021 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
68     }
69    
70     die "marc_template needs isis_template or marc_template"
71     if ! defined $args->{isis_template} && ! defined $args->{marc_template};
72    
73     my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
74     my $r = $rec->{ $args->{from} } || return;
75     die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
76    
77     my @subfields_rename = @{ $args->{subfields_rename} };
78 dpavlin 1028 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
79 dpavlin 1021
80     confess "need mapping in pairs for subfields_rename"
81     if $#subfields_rename % 2 != 1;
82    
83 dpavlin 1029 my ( $subfields_rename, $from_subfields );
84     our $to_subfields = {};
85 dpavlin 1021 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
86     my ( $f, $t ) = (
87     $from_subfields->{ $from }++,
88     $to_subfields->{ $to }++
89     );
90     $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
91     }
92 dpavlin 1028 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
93     warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
94     warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
95 dpavlin 1021
96 dpavlin 1025 our $_template;
97 dpavlin 1021
98 dpavlin 1026 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
99     $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
100 dpavlin 1025
101 dpavlin 1021 my @marc_out;
102    
103 dpavlin 1023 sub _parse_template {
104 dpavlin 1024 my ( $name, $templates ) = @_;
105 dpavlin 1021
106 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
107 dpavlin 1025
108 dpavlin 1023 foreach my $template ( @{ $templates } ) {
109     our $count = {};
110 dpavlin 1025 our @order = ();
111 dpavlin 1023 sub my_count {
112     my $sf = shift;
113     my $nr = $count->{$sf}++;
114 dpavlin 1025 push @order, [ $sf, $nr ];
115 dpavlin 1023 return $sf . $nr;
116     }
117     my $pos_template = $template;
118     $pos_template =~ s/($fields_re)/my_count($1)/ge;
119     my $count_key = dump( $count );
120 dpavlin 1028 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
121 dpavlin 1024 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
122 dpavlin 1025 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
123 dpavlin 1021 }
124 dpavlin 1028 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
125 dpavlin 1021 }
126    
127 dpavlin 1024 _parse_template( 'marc', $args->{marc_template} );
128     _parse_template( 'isis', $args->{isis_template} );
129 dpavlin 1028 warn "### _template = ",dump( $_template ),$/ if $debug;
130 dpavlin 1023
131 dpavlin 1021 my $m;
132    
133     foreach my $r ( @{ $rec->{ $args->{from} } } ) {
134    
135     my $i1 = $r->{i1} || ' ';
136     my $i2 = $r->{i2} || ' ';
137     $m = [ $args->{to}, $i1, $i2 ];
138    
139 dpavlin 1028 warn "### r = ",dump( $r ),$/ if $debug;
140 dpavlin 1021
141 dpavlin 1026 my ( $from_mapping, $to_mapping, $from_count, $to_count );
142 dpavlin 1029 foreach my $from_sf ( keys %{$r} ) {
143 dpavlin 1021 # skip everything which isn't one char subfield (e.g. 'subfields')
144 dpavlin 1029 next unless $from_sf =~ m/^\w$/;
145     my $from_nr = $from_count->{$from_sf}++;
146     my $rename_to = $subfields_rename->{ $from_sf } ||
147     die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
148     my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
149     $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
150 dpavlin 1021
151 dpavlin 1029 my $to_nr2 = $to_count->{ $to_sf }++;
152     $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
153    
154     warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
155 dpavlin 1021 }
156    
157 dpavlin 1029 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
158 dpavlin 1021
159 dpavlin 1025 my $count_key = {
160     from => dump( $from_count ),
161     to => dump( $to_count),
162     };
163 dpavlin 1021
164 dpavlin 1028 warn "### count_key = ",dump( $count_key ),$/ if $debug;
165 dpavlin 1021
166 dpavlin 1025 my $processed_templates = 0;
167 dpavlin 1021
168 dpavlin 1025 # this defines order of traversal
169     foreach ( qw/isis:from marc:to/ ) {
170     my ($name,$count_name) = split(/:/);
171 dpavlin 1021
172 dpavlin 1025 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
173 dpavlin 1021
174 dpavlin 1025 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
175     $processed_templates++;
176 dpavlin 1021
177 dpavlin 1031 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
178 dpavlin 1025
179     our $fill_in = {};
180    
181     my @templates = split(/\|/, $template );
182 dpavlin 1031 @templates = ( $template ) unless @templates;
183 dpavlin 1025
184 dpavlin 1035 warn "### templates = ",dump( @templates ),$/ if $debug;
185 dpavlin 1031
186 dpavlin 1025 foreach my $sf ( @templates ) {
187     sub fill_in {
188 dpavlin 1031 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
189     warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
190 dpavlin 1028 my ( $from_sf, $from_nr );
191 dpavlin 1025 if ( $name eq 'marc' ) {
192 dpavlin 1029 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), " form record ",dump( $r ) unless defined $to_mapping->{$sf}->[$nr];
193 dpavlin 1026 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
194 dpavlin 1025 } else {
195     ( $from_sf, $from_nr ) = ( $sf, $nr );
196     }
197     my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
198     if ( ref( $v ) eq 'ARRAY' ) {
199 dpavlin 1031 $v = $pre . $v->[$from_nr] . $post;
200 dpavlin 1025 } elsif ( $from_nr == 0 ) {
201 dpavlin 1031 $v = $pre . $v . $post;
202 dpavlin 1025 } else {
203     die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
204     }
205 dpavlin 1031 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
206     $fill_in->{$sf}->[$nr] = $v;
207     return $v;
208 dpavlin 1021 }
209 dpavlin 1026 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
210 dpavlin 1031 warn "#### $sf <<<< $fields_re\n" if $debug;
211     $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
212 dpavlin 1028 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
213 dpavlin 1021 }
214    
215 dpavlin 1028 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
216 dpavlin 1021
217 dpavlin 1025 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
218     my ( $sf, $nr ) = @$sf;
219     my $v = $fill_in->{$sf}->[$nr] || die "can't find fill_in $sf/$nr";
220 dpavlin 1026 if ( $name eq 'isis') {
221     ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
222     }
223 dpavlin 1028 warn "++ $sf/$nr |$v|\n" if $debug;
224 dpavlin 1025 push @$m, ( $sf, $v );
225     }
226 dpavlin 1021
227 dpavlin 1028 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
228 dpavlin 1025
229     push @marc_out, $m;
230 dpavlin 1029
231     last;
232 dpavlin 1025 }
233    
234     die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
235 dpavlin 1028 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
236 dpavlin 1021 }
237    
238    
239 dpavlin 1026 my $recs = 0;
240 dpavlin 1025
241 dpavlin 1021 foreach my $marc ( @marc_out ) {
242 dpavlin 1028 warn "+++ ",dump( $marc ),$/ if $debug;
243 dpavlin 1036 _marc_push( $marc );
244 dpavlin 1026 $recs++;
245 dpavlin 1021 }
246 dpavlin 1026
247 dpavlin 1028 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
248    
249     return $recs;
250 dpavlin 1021 }
251    
252 dpavlin 1036 =head2 marc_leader
253    
254     Setup fields within MARC leader or get leader
255    
256     marc_leader('05','c');
257     my $leader = marc_leader();
258    
259     =cut
260    
261     sub marc_leader {
262     my ($offset,$value) = @_;
263    
264     if ($offset) {
265     $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
266     } else {
267    
268     if (defined($marc_leader)) {
269     die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
270     return $marc_leader->[ $marc_record_offset ];
271     } else {
272     return;
273     }
274     }
275     }
276    
277     =head2 marc_fixed
278    
279     Create control/indentifier fields with values in fixed positions
280    
281     marc_fixed('008', 00, '070402');
282     marc_fixed('008', 39, '|');
283    
284     Positions not specified will be filled with spaces (C<0x20>).
285    
286     There will be no effort to extend last specified value to full length of
287     field in standard.
288    
289     =cut
290    
291     sub marc_fixed {
292     my ($f, $pos, $val) = @_;
293     die "need marc(field, position, value)" unless defined($f) && defined($pos);
294    
295     confess "need val" unless defined $val;
296    
297     my $update = 0;
298    
299     map {
300     if ($_->[0] eq $f) {
301     my $old = $_->[1];
302     if (length($old) <= $pos) {
303     $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
304     warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
305     } else {
306     $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
307     warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
308     }
309     $update++;
310     }
311     } @{ $marc_record->[ $marc_record_offset ] };
312    
313     if (! $update) {
314     my $v = ' ' x $pos . $val;
315     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
316     warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
317     }
318     }
319    
320     =head2 marc
321    
322     Save value for MARC field
323    
324     marc('900','a', rec('200','a') );
325     marc('001', rec('000') );
326    
327     =cut
328    
329     sub marc {
330     my $f = shift or die "marc needs field";
331     die "marc field must be numer" unless ($f =~ /^\d+$/);
332    
333     my $sf;
334     if ($f >= 10) {
335     $sf = shift or die "marc needs subfield";
336     }
337    
338     foreach (@_) {
339     my $v = $_; # make var read-write for Encode
340     next unless (defined($v) && $v !~ /^\s*$/);
341     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
342     if (defined $sf) {
343     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
344     } else {
345     push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
346     }
347     }
348     }
349    
350     =head2 marc_repeatable_subfield
351    
352     Save values for MARC repetable subfield
353    
354     marc_repeatable_subfield('910', 'z', rec('909') );
355    
356     =cut
357    
358     sub marc_repeatable_subfield {
359     my ($f,$sf) = @_;
360     die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
361     $marc_repeatable_subfield->{ $f . $sf }++;
362     marc(@_);
363     }
364    
365     =head2 marc_indicators
366    
367     Set both indicators for MARC field
368    
369     marc_indicators('900', ' ', 1);
370    
371     Any indicator value other than C<0-9> will be treated as undefined.
372    
373     =cut
374    
375     sub marc_indicators {
376     my $f = shift || die "marc_indicators need field!\n";
377     my ($i1,$i2) = @_;
378     die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
379     die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
380    
381     $i1 = ' ' if ($i1 !~ /^\d$/);
382     $i2 = ' ' if ($i2 !~ /^\d$/);
383     @{ $marc_indicators->{$f} } = ($i1,$i2);
384     }
385    
386     =head2 marc_compose
387    
388     Save values for each MARC subfield explicitly
389    
390     marc_compose('900',
391     'a', rec('200','a')
392     'b', rec('201','a')
393     'a', rec('200','b')
394     'c', rec('200','c')
395     );
396    
397     If you specify C<+> for subfield, value will be appended
398     to previous defined subfield.
399    
400     =cut
401    
402     sub marc_compose {
403     my $f = shift or die "marc_compose needs field";
404     die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
405    
406     my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
407     my $m = [ $f, $i1, $i2 ];
408    
409     warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
410    
411     if ($#_ % 2 != 1) {
412     die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
413     }
414    
415     while (@_) {
416     my $sf = shift;
417     my $v = shift;
418    
419     next unless (defined($v) && $v !~ /^\s*$/);
420     warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
421     if ($sf ne '+') {
422     push @$m, ( $sf, $v );
423     } else {
424     $m->[ $#$m ] .= $v;
425     }
426     }
427    
428     warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
429    
430     push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
431     }
432    
433     =head2 marc_duplicate
434    
435     Generate copy of current MARC record and continue working on copy
436    
437     marc_duplicate();
438    
439     Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
440     C<< _get_marc_fields( offset => 42 ) >>.
441    
442     =cut
443    
444     sub marc_duplicate {
445     my $m = $marc_record->[ -1 ];
446     die "can't duplicate record which isn't defined" unless ($m);
447     push @{ $marc_record }, dclone( $m );
448     push @{ $marc_leader }, dclone( marc_leader() );
449     warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
450     $marc_record_offset = $#{ $marc_record };
451     warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
452    
453     }
454    
455     =head2 marc_remove
456    
457     Remove some field or subfield from MARC record.
458    
459     marc_remove('200');
460     marc_remove('200','a');
461    
462     This will erase field C<200> or C<200^a> from current MARC record.
463    
464     marc_remove('*');
465    
466     Will remove all fields in current MARC record.
467    
468     This is useful after calling C<marc_duplicate> or on it's own (but, you
469     should probably just remove that subfield definition if you are not
470     using C<marc_duplicate>).
471    
472     FIXME: support fields < 10.
473    
474     =cut
475    
476     sub marc_remove {
477     my ($f, $sf) = @_;
478    
479     die "marc_remove needs record number" unless defined($f);
480    
481     my $marc = $marc_record->[ $marc_record_offset ];
482    
483     warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
484    
485     if ($f eq '*') {
486    
487     delete( $marc_record->[ $marc_record_offset ] );
488     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
489    
490     } else {
491    
492     my $i = 0;
493     foreach ( 0 .. $#{ $marc } ) {
494     last unless (defined $marc->[$i]);
495     warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
496     if ($marc->[$i]->[0] eq $f) {
497     if (! defined $sf) {
498     # remove whole field
499     splice @$marc, $i, 1;
500     warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
501     $i--;
502     } else {
503     foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
504     my $o = ($j * 2) + 3;
505     if ($marc->[$i]->[$o] eq $sf) {
506     # remove subfield
507     splice @{$marc->[$i]}, $o, 2;
508     warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
509     # is record now empty?
510     if ($#{ $marc->[$i] } == 2) {
511     splice @$marc, $i, 1;
512     warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
513     $i--;
514     };
515     }
516     }
517     }
518     }
519     $i++;
520     }
521    
522     warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
523    
524     $marc_record->[ $marc_record_offset ] = $marc;
525     }
526    
527     warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
528     }
529    
530     =head2 marc_original_order
531    
532     Copy all subfields preserving original order to marc field.
533    
534     marc_original_order( marc_field_number, original_input_field_number );
535    
536     Please note that field numbers are consistent with other commands (marc
537     field number first), but somewhat counter-intuitive (destination and then
538     source).
539    
540     You might want to use this command if you are just renaming subfields or
541     using pre-processing modify_record in C<config.yml> and don't need any
542     post-processing or want to preserve order of original subfields.
543    
544    
545     =cut
546    
547     sub marc_original_order {
548    
549     my ($to, $from) = @_;
550     die "marc_original_order needs from and to fields\n" unless ($from && $to);
551    
552     return unless defined($rec->{$from});
553    
554     my $r = $rec->{$from};
555     die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
556    
557     my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
558     warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
559    
560     foreach my $d (@$r) {
561    
562     if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
563     warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
564     next;
565     }
566    
567     my @sfs = @{ $d->{subfields} };
568    
569     die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
570    
571     warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
572    
573     my $m = [ $to, $i1, $i2 ];
574    
575     while (my $sf = shift @sfs) {
576    
577     warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
578     my $offset = shift @sfs;
579     die "corrupted sufields specification for field $from\n" unless defined($offset);
580    
581     my $v;
582     if (ref($d->{$sf}) eq 'ARRAY') {
583     $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
584     } elsif ($offset == 0) {
585     $v = $d->{$sf};
586     } else {
587     die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
588     }
589     push @$m, ( $sf, $v ) if (defined($v));
590     }
591    
592     if ($#{$m} > 2) {
593     push @{ $marc_record->[ $marc_record_offset ] }, $m;
594     }
595     }
596    
597     warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
598     }
599    
600    
601     =head2 marc_count
602    
603     Return number of MARC records created using L</marc_duplicate>.
604    
605     print "created ", marc_count(), " records";
606    
607     =cut
608    
609     sub marc_count {
610     return $#{ $marc_record };
611     }
612    
613     =head2 _marc_push
614    
615     _marc_push( $marc );
616    
617     =cut
618    
619     sub _marc_push {
620     my $marc = shift || die "no marc?";
621     push @{ $marc_record->[ $marc_record_offset ] }, $marc;
622     }
623    
624     =head1 PRIVATE FUNCTIONS
625    
626     =head2 _clean
627    
628     Clean internal structures
629    
630     =cut
631    
632     sub _clean {
633     ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
634     ($marc_record_offset, $marc_fetch_offset) = (0,0);
635     }
636    
637    
638     =head2 _get_marc_fields
639    
640     Get all fields defined by calls to C<marc>
641    
642     $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
643    
644     We are using I<magic> which detect repeatable fields only from
645     sequence of field/subfield data generated by normalization.
646    
647     Repeatable field is created when there is second occurence of same subfield or
648     if any of indicators are different.
649    
650     This is sane for most cases. Something like:
651    
652     900a-1 900b-1 900c-1
653     900a-2 900b-2
654     900a-3
655    
656     will be created from any combination of:
657    
658     900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
659    
660     and following rules:
661    
662     marc('900','a', rec('200','a') );
663     marc('900','b', rec('200','b') );
664     marc('900','c', rec('200','c') );
665    
666     which might not be what you have in mind. If you need repeatable subfield,
667     define it using C<marc_repeatable_subfield> like this:
668    
669     marc_repeatable_subfield('900','a');
670     marc('900','a', rec('200','a') );
671     marc('900','b', rec('200','b') );
672     marc('900','c', rec('200','c') );
673    
674     will create:
675    
676     900a-1 900a-2 900a-3 900b-1 900c-1
677     900b-2
678    
679     There is also support for returning next or specific using:
680    
681     while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
682     # do something with $mf
683     }
684    
685     will always return fields from next MARC record or
686    
687     my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
688    
689     will return 42th copy record (if it exists).
690    
691     =cut
692    
693     my $fetch_pos;
694    
695     sub _get_marc_fields {
696    
697     my $arg = {@_};
698     warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
699     $fetch_pos = $marc_fetch_offset;
700     if ($arg->{offset}) {
701     $fetch_pos = $arg->{offset};
702     } elsif($arg->{fetch_next}) {
703     $marc_fetch_offset++;
704     }
705    
706     return if (! $marc_record || ref($marc_record) ne 'ARRAY');
707    
708     warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
709    
710     my $marc_rec = $marc_record->[ $fetch_pos ];
711    
712     warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
713    
714     return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
715    
716     # first, sort all existing fields
717     # XXX might not be needed, but modern perl might randomize elements in hash
718     my @sorted_marc_record = sort {
719     $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
720     } @{ $marc_rec };
721    
722     @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
723    
724     # output marc fields
725     my @m;
726    
727     # count unique field-subfields (used for offset when walking to next subfield)
728     my $u;
729     map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
730    
731     if ($debug) {
732     warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
733     warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
734     warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
735     warn "## subfield count = ", dump( $u ), $/;
736     }
737    
738     my $len = $#sorted_marc_record;
739     my $visited;
740     my $i = 0;
741     my $field;
742    
743     foreach ( 0 .. $len ) {
744    
745     # find next element which isn't visited
746     while ($visited->{$i}) {
747     $i = ($i + 1) % ($len + 1);
748     }
749    
750     # mark it visited
751     $visited->{$i}++;
752    
753     my $row = dclone( $sorted_marc_record[$i] );
754    
755     # field and subfield which is key for
756     # marc_repeatable_subfield and u
757     my $fsf = $row->[0] . ( $row->[3] || '' );
758    
759     if ($debug > 1) {
760    
761     print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
762     print "### this [$i]: ", dump( $row ),$/;
763     print "### sf: ", $row->[3], " vs ", $field->[3],
764     $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
765     if ($#$field >= 0);
766    
767     }
768    
769     # if field exists
770     if ( $#$field >= 0 ) {
771     if (
772     $row->[0] ne $field->[0] || # field
773     $row->[1] ne $field->[1] || # i1
774     $row->[2] ne $field->[2] # i2
775     ) {
776     push @m, $field;
777     warn "## saved/1 ", dump( $field ),$/ if ($debug);
778     $field = $row;
779    
780     } elsif (
781     ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
782     ||
783     ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
784     ! $marc_repeatable_subfield->{ $fsf }
785     )
786     ) {
787     push @m, $field;
788     warn "## saved/2 ", dump( $field ),$/ if ($debug);
789     $field = $row;
790    
791     } else {
792     # append new subfields to existing field
793     push @$field, ( $row->[3], $row->[4] );
794     }
795     } else {
796     # insert first field
797     $field = $row;
798     }
799    
800     if (! $marc_repeatable_subfield->{ $fsf }) {
801     # make step to next subfield
802     $i = ($i + $u->{ $fsf } ) % ($len + 1);
803     }
804     }
805    
806     if ($#$field >= 0) {
807     push @m, $field;
808     warn "## saved/3 ", dump( $field ),$/ if ($debug);
809     }
810    
811     return \@m;
812     }
813    
814     =head2 _get_marc_leader
815    
816     Return leader from currently fetched record by L</_get_marc_fields>
817    
818     print WebPAC::Normalize::MARC::_get_marc_leader();
819    
820     =cut
821    
822     sub _get_marc_leader {
823     die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
824     return $marc_leader->[ $fetch_pos ];
825     }
826    
827     =head2 _created_marc_records
828    
829     my $nr_records = _created_marc_records;
830    
831     =cut
832    
833     sub _created_marc_records {
834     return $#{ $marc_record } + 1 if $marc_record;
835     }
836    
837 dpavlin 1021 1;

  ViewVC Help
Powered by ViewVC 1.1.26