/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1044 - (show annotations)
Mon Nov 12 14:18:49 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 22804 byte(s)
fix variable scoping

1 package WebPAC::Normalize::MARC;
2 use Exporter 'import';
3 our @EXPORT = qw/
4 marc marc_indicators marc_repeatable_subfield
5 marc_compose marc_leader marc_fixed
6 marc_duplicate marc_remove marc_count
7 marc_original_order
8 marc_template
9 /;
10
11 use strict;
12 use warnings;
13
14 use Storable qw/dclone/;
15 use Data::Dump qw/dump/;
16 use Carp qw/confess/;
17
18 use WebPAC::Normalize;
19
20 our $debug = 0;
21
22 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
23 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
24
25 our $rec;
26
27 =head1 NAME
28
29 WebPAC::Normalize::MARC - create MARC/ISO2709 records
30
31 =cut
32
33 =head1 FUNCTIONS
34
35 =head2 marc_template
36
37 marc_template(
38 from => 225, to => 440,
39 subfields_rename => [
40 'a' => 'a',
41 'x' => 'x',
42 'v' => 'v',
43 'h' => 'n',
44 'i' => 'p',
45 'w' => 'v',
46 ],
47 isis_template => [
48 'a ; |v. |i',
49 'a. |i ; |w',
50 ],
51 marc_template => [
52 'a, |x ; |v. |n, |p ; |v',
53 'a ; |v. |p ; |v',
54 ],
55 );
56
57 Returns number of records produced.
58
59 =cut
60
61 my $created_with_marc_template;
62
63 sub marc_template {
64 my $args = {@_};
65 warn "## marc_template(",dump($args),")",$/ if $debug;
66
67 foreach ( qw/subfields_rename isis_template marc_template/ ) {
68 # warn "ref($_) = ",ref($args->{$_}) if $debug;
69 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
70 }
71
72 die "marc_template needs isis_template or marc_template"
73 if ! defined $args->{isis_template} && ! defined $args->{marc_template};
74
75 my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
76 my $r = $rec->{ $args->{from} } || return;
77 die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
78
79 my @subfields_rename = @{ $args->{subfields_rename} };
80 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
81
82 confess "need mapping in pairs for subfields_rename"
83 if $#subfields_rename % 2 != 1;
84
85 my ( $subfields_rename, $from_subfields );
86 our $to_subfields = {};
87 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
88 my ( $f, $t ) = (
89 $from_subfields->{ $from }++,
90 $to_subfields->{ $to }++
91 );
92 $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
93 }
94 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
95 warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
96 warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
97
98 our $_template;
99
100 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
101 $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
102
103 my @marc_out;
104
105 sub _parse_template {
106 my ( $name, $templates ) = @_;
107
108 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
109
110 foreach my $template ( @{ $templates } ) {
111 our $count = {};
112 our @order = ();
113 sub my_count {
114 my $sf = shift;
115 my $nr = $count->{$sf}++;
116 push @order, [ $sf, $nr ];
117 return $sf . $nr;
118 }
119 my $pos_template = $template;
120 $pos_template =~ s/($fields_re)/my_count($1)/ge;
121 my $count_key = dump( $count );
122 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
123 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
124 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
125 }
126 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
127 }
128
129 _parse_template( 'marc', $args->{marc_template} );
130 _parse_template( 'isis', $args->{isis_template} );
131 warn "### _template = ",dump( $_template ),$/ if $debug;
132
133 my $m;
134
135 our $from_rec = $rec->{ $args->{from} };
136
137 foreach my $r ( @$from_rec ) {
138
139 my $i1 = $r->{i1} || ' ';
140 my $i2 = $r->{i2} || ' ';
141 my $to = $args->{to};
142 $m = [ $to, $i1, $i2 ];
143
144 $created_with_marc_template->{ $to }++;
145
146 warn "### r = ",dump( $r ),$/ if $debug;
147
148 my ( $from_mapping, $from_count, $to_count );
149 our $to_mapping;
150 foreach my $from_sf ( keys %{$r} ) {
151 # skip everything which isn't one char subfield (e.g. 'subfields')
152 next unless $from_sf =~ m/^\w$/;
153 my $from_nr = $from_count->{$from_sf}++;
154 my $rename_to = $subfields_rename->{ $from_sf } ||
155 die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
156 my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
157 $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
158
159 my $to_nr2 = $to_count->{ $to_sf }++;
160 $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
161
162 warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
163 }
164
165 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
166
167 my $count_key = {
168 from => dump( $from_count ),
169 to => dump( $to_count),
170 };
171
172 warn "### count_key = ",dump( $count_key ),$/ if $debug;
173
174 my $processed_templates = 0;
175
176 # this defines order of traversal
177 foreach ( qw/isis:from marc:to/ ) {
178 my ($name,$count_name) = split(/:/);
179
180 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
181
182 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
183 $processed_templates++;
184
185 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
186
187 our $fill_in = {};
188
189 my @templates = split(/\|/, $template );
190 @templates = ( $template ) unless @templates;
191
192 warn "### templates = ",dump( @templates ),$/ if $debug;
193
194 foreach my $sf ( @templates ) {
195 sub fill_in {
196 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
197 warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
198 my ( $from_sf, $from_nr );
199 if ( $name eq 'marc' ) {
200 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), "\n>>>> from record ",dump( $r ), "\n>>>> full record = ",dump( $from_rec ) unless defined $to_mapping->{$sf}->[$nr];
201 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
202 } else {
203 ( $from_sf, $from_nr ) = ( $sf, $nr );
204 }
205 my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
206 if ( ref( $v ) eq 'ARRAY' ) {
207 $v = $pre . $v->[$from_nr] . $post;
208 } elsif ( $from_nr == 0 ) {
209 $v = $pre . $v . $post;
210 } else {
211 die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
212 }
213 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
214 $fill_in->{$sf}->[$nr] = $v;
215 return $v;
216 }
217 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
218 warn "#### $sf <<<< $fields_re\n" if $debug;
219 $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
220 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
221 }
222
223 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
224
225 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
226 my ( $sf, $nr ) = @$sf;
227 my $v = $fill_in->{$sf}->[$nr] || die "can't find fill_in $sf/$nr";
228 if ( $name eq 'isis') {
229 ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
230 }
231 warn "++ $sf/$nr |$v|\n" if $debug;
232 push @$m, ( $sf, $v );
233 }
234
235 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
236
237 push @marc_out, $m;
238
239 last;
240 }
241
242 die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
243 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
244 }
245
246
247 my $recs = 0;
248
249 foreach my $marc ( @marc_out ) {
250 warn "+++ ",dump( $marc ),$/ if $debug;
251 _marc_push( $marc );
252 $recs++;
253 }
254
255 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
256
257 return $recs;
258 }
259
260 =head2 marc_leader
261
262 Setup fields within MARC leader or get leader
263
264 marc_leader('05','c');
265 my $leader = marc_leader();
266
267 =cut
268
269 sub marc_leader {
270 my ($offset,$value) = @_;
271
272 if ($offset) {
273 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
274 } else {
275
276 if (defined($marc_leader)) {
277 die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
278 return $marc_leader->[ $marc_record_offset ];
279 } else {
280 return;
281 }
282 }
283 }
284
285 =head2 marc_fixed
286
287 Create control/indentifier fields with values in fixed positions
288
289 marc_fixed('008', 00, '070402');
290 marc_fixed('008', 39, '|');
291
292 Positions not specified will be filled with spaces (C<0x20>).
293
294 There will be no effort to extend last specified value to full length of
295 field in standard.
296
297 =cut
298
299 sub marc_fixed {
300 my ($f, $pos, $val) = @_;
301 die "need marc(field, position, value)" unless defined($f) && defined($pos);
302
303 confess "need val" unless defined $val;
304
305 my $update = 0;
306
307 map {
308 if ($_->[0] eq $f) {
309 my $old = $_->[1];
310 if (length($old) <= $pos) {
311 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
312 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
313 } else {
314 $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
315 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
316 }
317 $update++;
318 }
319 } @{ $marc_record->[ $marc_record_offset ] };
320
321 if (! $update) {
322 my $v = ' ' x $pos . $val;
323 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
324 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
325 }
326 }
327
328 =head2 marc
329
330 Save value for MARC field
331
332 marc('900','a', rec('200','a') );
333 marc('001', rec('000') );
334
335 =cut
336
337 sub marc {
338 my $f = shift or die "marc needs field";
339 die "marc field must be numer" unless ($f =~ /^\d+$/);
340
341 my $sf;
342 if ($f >= 10) {
343 $sf = shift or die "marc needs subfield";
344 }
345
346 foreach (@_) {
347 my $v = $_; # make var read-write for Encode
348 next unless (defined($v) && $v !~ /^\s*$/);
349 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
350 if (defined $sf) {
351 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
352 } else {
353 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
354 }
355 }
356 }
357
358 =head2 marc_repeatable_subfield
359
360 Save values for MARC repetable subfield
361
362 marc_repeatable_subfield('910', 'z', rec('909') );
363
364 =cut
365
366 sub marc_repeatable_subfield {
367 my ($f,$sf) = @_;
368 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
369 $marc_repeatable_subfield->{ $f . $sf }++;
370 marc(@_);
371 }
372
373 =head2 marc_indicators
374
375 Set both indicators for MARC field
376
377 marc_indicators('900', ' ', 1);
378
379 Any indicator value other than C<0-9> will be treated as undefined.
380
381 =cut
382
383 sub marc_indicators {
384 my $f = shift || die "marc_indicators need field!\n";
385 my ($i1,$i2) = @_;
386 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
387 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
388
389 $i1 = ' ' if ($i1 !~ /^\d$/);
390 $i2 = ' ' if ($i2 !~ /^\d$/);
391 @{ $marc_indicators->{$f} } = ($i1,$i2);
392 }
393
394 =head2 marc_compose
395
396 Save values for each MARC subfield explicitly
397
398 marc_compose('900',
399 'a', rec('200','a')
400 'b', rec('201','a')
401 'a', rec('200','b')
402 'c', rec('200','c')
403 );
404
405 If you specify C<+> for subfield, value will be appended
406 to previous defined subfield.
407
408 =cut
409
410 sub marc_compose {
411 my $f = shift or die "marc_compose needs field";
412 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
413
414 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
415 my $m = [ $f, $i1, $i2 ];
416
417 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
418
419 if ($#_ % 2 != 1) {
420 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
421 }
422
423 while (@_) {
424 my $sf = shift;
425 my $v = shift;
426
427 next unless (defined($v) && $v !~ /^\s*$/);
428 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
429 if ($sf ne '+') {
430 push @$m, ( $sf, $v );
431 } else {
432 $m->[ $#$m ] .= $v;
433 }
434 }
435
436 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
437
438 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
439 }
440
441 =head2 marc_duplicate
442
443 Generate copy of current MARC record and continue working on copy
444
445 marc_duplicate();
446
447 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
448 C<< _get_marc_fields( offset => 42 ) >>.
449
450 =cut
451
452 sub marc_duplicate {
453 my $m = $marc_record->[ -1 ];
454 die "can't duplicate record which isn't defined" unless ($m);
455 push @{ $marc_record }, dclone( $m );
456 push @{ $marc_leader }, dclone( marc_leader() );
457 warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
458 $marc_record_offset = $#{ $marc_record };
459 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
460
461 }
462
463 =head2 marc_remove
464
465 Remove some field or subfield from MARC record.
466
467 marc_remove('200');
468 marc_remove('200','a');
469
470 This will erase field C<200> or C<200^a> from current MARC record.
471
472 marc_remove('*');
473
474 Will remove all fields in current MARC record.
475
476 This is useful after calling C<marc_duplicate> or on it's own (but, you
477 should probably just remove that subfield definition if you are not
478 using C<marc_duplicate>).
479
480 FIXME: support fields < 10.
481
482 =cut
483
484 sub marc_remove {
485 my ($f, $sf) = @_;
486
487 die "marc_remove needs record number" unless defined($f);
488
489 my $marc = $marc_record->[ $marc_record_offset ];
490
491 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
492
493 if ($f eq '*') {
494
495 delete( $marc_record->[ $marc_record_offset ] );
496 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
497
498 } else {
499
500 my $i = 0;
501 foreach ( 0 .. $#{ $marc } ) {
502 last unless (defined $marc->[$i]);
503 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
504 if ($marc->[$i]->[0] eq $f) {
505 if (! defined $sf) {
506 # remove whole field
507 splice @$marc, $i, 1;
508 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
509 $i--;
510 } else {
511 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
512 my $o = ($j * 2) + 3;
513 if ($marc->[$i]->[$o] eq $sf) {
514 # remove subfield
515 splice @{$marc->[$i]}, $o, 2;
516 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
517 # is record now empty?
518 if ($#{ $marc->[$i] } == 2) {
519 splice @$marc, $i, 1;
520 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
521 $i--;
522 };
523 }
524 }
525 }
526 }
527 $i++;
528 }
529
530 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
531
532 $marc_record->[ $marc_record_offset ] = $marc;
533 }
534
535 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
536 }
537
538 =head2 marc_original_order
539
540 Copy all subfields preserving original order to marc field.
541
542 marc_original_order( marc_field_number, original_input_field_number );
543
544 Please note that field numbers are consistent with other commands (marc
545 field number first), but somewhat counter-intuitive (destination and then
546 source).
547
548 You might want to use this command if you are just renaming subfields or
549 using pre-processing modify_record in C<config.yml> and don't need any
550 post-processing or want to preserve order of original subfields.
551
552
553 =cut
554
555 sub marc_original_order {
556
557 my ($to, $from) = @_;
558 die "marc_original_order needs from and to fields\n" unless ($from && $to);
559
560 return unless defined($rec->{$from});
561
562 my $r = $rec->{$from};
563 die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
564
565 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
566 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
567
568 foreach my $d (@$r) {
569
570 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
571 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
572 next;
573 }
574
575 my @sfs = @{ $d->{subfields} };
576
577 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
578
579 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
580
581 my $m = [ $to, $i1, $i2 ];
582
583 while (my $sf = shift @sfs) {
584
585 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
586 my $offset = shift @sfs;
587 die "corrupted sufields specification for field $from\n" unless defined($offset);
588
589 my $v;
590 if (ref($d->{$sf}) eq 'ARRAY') {
591 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
592 } elsif ($offset == 0) {
593 $v = $d->{$sf};
594 } else {
595 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
596 }
597 push @$m, ( $sf, $v ) if (defined($v));
598 }
599
600 if ($#{$m} > 2) {
601 push @{ $marc_record->[ $marc_record_offset ] }, $m;
602 }
603 }
604
605 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
606 }
607
608
609 =head2 marc_count
610
611 Return number of MARC records created using L</marc_duplicate>.
612
613 print "created ", marc_count(), " records";
614
615 =cut
616
617 sub marc_count {
618 return $#{ $marc_record };
619 }
620
621 =head1 PRIVATE FUNCTIONS
622
623 =head2 _marc_push
624
625 _marc_push( $marc );
626
627 =cut
628
629 sub _marc_push {
630 my $marc = shift || die "no marc?";
631 push @{ $marc_record->[ $marc_record_offset ] }, $marc;
632 }
633
634 =head2 _clean
635
636 Clean internal structures
637
638 =cut
639
640 sub _clean {
641 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader, $created_with_marc_template) = ();
642 ($marc_record_offset, $marc_fetch_offset) = (0,0);
643 }
644
645
646 =head2 _get_marc_fields
647
648 Get all fields defined by calls to C<marc>
649
650 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
651
652 We are using I<magic> which detect repeatable fields only from
653 sequence of field/subfield data generated by normalization.
654
655 This magic is disabled for all records created with C<marc_template>.
656
657 Repeatable field is created when there is second occurence of same subfield or
658 if any of indicators are different.
659
660 This is sane for most cases. Something like:
661
662 900a-1 900b-1 900c-1
663 900a-2 900b-2
664 900a-3
665
666 will be created from any combination of:
667
668 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
669
670 and following rules:
671
672 marc('900','a', rec('200','a') );
673 marc('900','b', rec('200','b') );
674 marc('900','c', rec('200','c') );
675
676 which might not be what you have in mind. If you need repeatable subfield,
677 define it using C<marc_repeatable_subfield> like this:
678
679 marc_repeatable_subfield('900','a');
680 marc('900','a', rec('200','a') );
681 marc('900','b', rec('200','b') );
682 marc('900','c', rec('200','c') );
683
684 will create:
685
686 900a-1 900a-2 900a-3 900b-1 900c-1
687 900b-2
688
689 There is also support for returning next or specific using:
690
691 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
692 # do something with $mf
693 }
694
695 will always return fields from next MARC record or
696
697 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
698
699 will return 42th copy record (if it exists).
700
701 =cut
702
703 my $fetch_pos;
704
705 sub _get_marc_fields {
706
707 my $arg = {@_};
708 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
709 $fetch_pos = $marc_fetch_offset;
710 if ($arg->{offset}) {
711 $fetch_pos = $arg->{offset};
712 } elsif($arg->{fetch_next}) {
713 $marc_fetch_offset++;
714 }
715
716 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
717
718 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
719
720 my $marc_rec = $marc_record->[ $fetch_pos ];
721
722 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
723
724 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
725
726 # first, sort all existing fields
727 # XXX might not be needed, but modern perl might randomize elements in hash
728 # my @sorted_marc_record = sort {
729 # $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
730 # } @{ $marc_rec };
731
732 my @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
733
734 # output marc fields
735 my @m;
736
737 # count unique field-subfields (used for offset when walking to next subfield)
738 my $u;
739 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
740
741 if ($debug) {
742 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
743 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
744 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
745 warn "## subfield count = ", dump( $u ), $/;
746 }
747
748 my $len = $#sorted_marc_record;
749 my $visited;
750 my $i = 0;
751 my $field;
752
753 warn "## created_with_marc_template = ",dump( $created_with_marc_template ) if $debug;
754
755 foreach ( 0 .. $len ) {
756
757 # find next element which isn't visited
758 while ($visited->{$i}) {
759 $i = ($i + 1) % ($len + 1);
760 }
761
762 # mark it visited
763 $visited->{$i}++;
764
765 my $row = dclone( $sorted_marc_record[$i] );
766
767 if ( $created_with_marc_template->{ $row->[0] } ) {
768 push @m, $row;
769 warn "## copied marc_template created ", dump( $row ),$/ if $debug;
770 next;
771 }
772
773 # field and subfield which is key for
774 # marc_repeatable_subfield and u
775 my $fsf = $row->[0] . ( $row->[3] || '' );
776
777 if ($debug > 1) {
778
779 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
780 print "### this [$i]: ", dump( $row ),$/;
781 print "### sf: ", $row->[3], " vs ", $field->[3],
782 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
783 if ($#$field >= 0);
784
785 }
786
787 # if field exists
788 if ( $#$field >= 0 ) {
789 if (
790 $row->[0] ne $field->[0] || # field
791 $row->[1] ne $field->[1] || # i1
792 $row->[2] ne $field->[2] # i2
793 ) {
794 push @m, $field;
795 warn "## saved/1 ", dump( $field ),$/ if ($debug);
796 $field = $row;
797
798 } elsif (
799 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
800 ||
801 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
802 ! $marc_repeatable_subfield->{ $fsf }
803 )
804 ) {
805 push @m, $field;
806 warn "## saved/2 ", dump( $field ),$/ if ($debug);
807 $field = $row;
808
809 } else {
810 # append new subfields to existing field
811 push @$field, ( $row->[3], $row->[4] );
812 }
813 } else {
814 # insert first field
815 $field = $row;
816 }
817
818 if (! $marc_repeatable_subfield->{ $fsf }) {
819 # make step to next subfield
820 $i = ($i + $u->{ $fsf } ) % ($len + 1);
821 }
822 }
823
824 if ($#$field >= 0) {
825 push @m, $field;
826 warn "## saved/3 ", dump( $field ),$/ if ($debug);
827 }
828
829 return \@m;
830 }
831
832 =head2 _get_marc_leader
833
834 Return leader from currently fetched record by L</_get_marc_fields>
835
836 print WebPAC::Normalize::MARC::_get_marc_leader();
837
838 =cut
839
840 sub _get_marc_leader {
841 die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
842 return $marc_leader->[ $fetch_pos ];
843 }
844
845 =head2 _created_marc_records
846
847 my $nr_records = _created_marc_records;
848
849 =cut
850
851 sub _created_marc_records {
852 return $#{ $marc_record } + 1 if $marc_record;
853 }
854
855 1;

  ViewVC Help
Powered by ViewVC 1.1.26