/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1040 - (show annotations)
Mon Nov 12 12:18:55 2007 UTC (16 years, 6 months ago) by dpavlin
File size: 22724 byte(s)
 r1618@llin:  dpavlin | 2007-11-12 13:18:54 +0100
 cleanup and fixes

1 package WebPAC::Normalize::MARC;
2 use Exporter 'import';
3 our @EXPORT = qw/
4 marc marc_indicators marc_repeatable_subfield
5 marc_compose marc_leader marc_fixed
6 marc_duplicate marc_remove marc_count
7 marc_original_order
8 marc_template
9 /;
10
11 use strict;
12 use warnings;
13
14 use Storable qw/dclone/;
15 use Data::Dump qw/dump/;
16 use Carp qw/confess/;
17
18 use WebPAC::Normalize;
19
20 our $debug = 0;
21
22 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
23 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
24
25 our $rec;
26
27 =head1 NAME
28
29 WebPAC::Normalize::MARC - create MARC/ISO2709 records
30
31 =cut
32
33 =head1 FUNCTIONS
34
35 =head2 marc_template
36
37 marc_template(
38 from => 225, to => 440,
39 subfields_rename => [
40 'a' => 'a',
41 'x' => 'x',
42 'v' => 'v',
43 'h' => 'n',
44 'i' => 'p',
45 'w' => 'v',
46 ],
47 isis_template => [
48 'a ; |v. |i',
49 'a. |i ; |w',
50 ],
51 marc_template => [
52 'a, |x ; |v. |n, |p ; |v',
53 'a ; |v. |p ; |v',
54 ],
55 );
56
57 Returns number of records produced.
58
59 =cut
60
61 my $created_with_marc_template;
62
63 sub marc_template {
64 my $args = {@_};
65 warn "## marc_template(",dump($args),")",$/ if $debug;
66
67 foreach ( qw/subfields_rename isis_template marc_template/ ) {
68 # warn "ref($_) = ",ref($args->{$_}) if $debug;
69 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
70 }
71
72 die "marc_template needs isis_template or marc_template"
73 if ! defined $args->{isis_template} && ! defined $args->{marc_template};
74
75 my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
76 my $r = $rec->{ $args->{from} } || return;
77 die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
78
79 my @subfields_rename = @{ $args->{subfields_rename} };
80 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
81
82 confess "need mapping in pairs for subfields_rename"
83 if $#subfields_rename % 2 != 1;
84
85 my ( $subfields_rename, $from_subfields );
86 our $to_subfields = {};
87 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
88 my ( $f, $t ) = (
89 $from_subfields->{ $from }++,
90 $to_subfields->{ $to }++
91 );
92 $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
93 }
94 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
95 warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
96 warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
97
98 our $_template;
99
100 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
101 $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
102
103 my @marc_out;
104
105 sub _parse_template {
106 my ( $name, $templates ) = @_;
107
108 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
109
110 foreach my $template ( @{ $templates } ) {
111 our $count = {};
112 our @order = ();
113 sub my_count {
114 my $sf = shift;
115 my $nr = $count->{$sf}++;
116 push @order, [ $sf, $nr ];
117 return $sf . $nr;
118 }
119 my $pos_template = $template;
120 $pos_template =~ s/($fields_re)/my_count($1)/ge;
121 my $count_key = dump( $count );
122 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
123 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
124 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
125 }
126 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
127 }
128
129 _parse_template( 'marc', $args->{marc_template} );
130 _parse_template( 'isis', $args->{isis_template} );
131 warn "### _template = ",dump( $_template ),$/ if $debug;
132
133 my $m;
134
135 foreach my $r ( @{ $rec->{ $args->{from} } } ) {
136
137 my $i1 = $r->{i1} || ' ';
138 my $i2 = $r->{i2} || ' ';
139 my $to = $args->{to};
140 $m = [ $to, $i1, $i2 ];
141
142 $created_with_marc_template->{ $to }++;
143
144 warn "### r = ",dump( $r ),$/ if $debug;
145
146 my ( $from_mapping, $to_mapping, $from_count, $to_count );
147 foreach my $from_sf ( keys %{$r} ) {
148 # skip everything which isn't one char subfield (e.g. 'subfields')
149 next unless $from_sf =~ m/^\w$/;
150 my $from_nr = $from_count->{$from_sf}++;
151 my $rename_to = $subfields_rename->{ $from_sf } ||
152 die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
153 my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
154 $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
155
156 my $to_nr2 = $to_count->{ $to_sf }++;
157 $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
158
159 warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
160 }
161
162 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
163
164 my $count_key = {
165 from => dump( $from_count ),
166 to => dump( $to_count),
167 };
168
169 warn "### count_key = ",dump( $count_key ),$/ if $debug;
170
171 my $processed_templates = 0;
172
173 # this defines order of traversal
174 foreach ( qw/isis:from marc:to/ ) {
175 my ($name,$count_name) = split(/:/);
176
177 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
178
179 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
180 $processed_templates++;
181
182 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
183
184 our $fill_in = {};
185
186 my @templates = split(/\|/, $template );
187 @templates = ( $template ) unless @templates;
188
189 warn "### templates = ",dump( @templates ),$/ if $debug;
190
191 foreach my $sf ( @templates ) {
192 sub fill_in {
193 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
194 warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
195 my ( $from_sf, $from_nr );
196 if ( $name eq 'marc' ) {
197 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), " form record ",dump( $r ) unless defined $to_mapping->{$sf}->[$nr];
198 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
199 } else {
200 ( $from_sf, $from_nr ) = ( $sf, $nr );
201 }
202 my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
203 if ( ref( $v ) eq 'ARRAY' ) {
204 $v = $pre . $v->[$from_nr] . $post;
205 } elsif ( $from_nr == 0 ) {
206 $v = $pre . $v . $post;
207 } else {
208 die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
209 }
210 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
211 $fill_in->{$sf}->[$nr] = $v;
212 return $v;
213 }
214 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
215 warn "#### $sf <<<< $fields_re\n" if $debug;
216 $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
217 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
218 }
219
220 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
221
222 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
223 my ( $sf, $nr ) = @$sf;
224 my $v = $fill_in->{$sf}->[$nr] || die "can't find fill_in $sf/$nr";
225 if ( $name eq 'isis') {
226 ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
227 }
228 warn "++ $sf/$nr |$v|\n" if $debug;
229 push @$m, ( $sf, $v );
230 }
231
232 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
233
234 push @marc_out, $m;
235
236 last;
237 }
238
239 die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
240 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
241 }
242
243
244 my $recs = 0;
245
246 foreach my $marc ( @marc_out ) {
247 warn "+++ ",dump( $marc ),$/ if $debug;
248 _marc_push( $marc );
249 $recs++;
250 }
251
252 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
253
254 return $recs;
255 }
256
257 =head2 marc_leader
258
259 Setup fields within MARC leader or get leader
260
261 marc_leader('05','c');
262 my $leader = marc_leader();
263
264 =cut
265
266 sub marc_leader {
267 my ($offset,$value) = @_;
268
269 if ($offset) {
270 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
271 } else {
272
273 if (defined($marc_leader)) {
274 die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
275 return $marc_leader->[ $marc_record_offset ];
276 } else {
277 return;
278 }
279 }
280 }
281
282 =head2 marc_fixed
283
284 Create control/indentifier fields with values in fixed positions
285
286 marc_fixed('008', 00, '070402');
287 marc_fixed('008', 39, '|');
288
289 Positions not specified will be filled with spaces (C<0x20>).
290
291 There will be no effort to extend last specified value to full length of
292 field in standard.
293
294 =cut
295
296 sub marc_fixed {
297 my ($f, $pos, $val) = @_;
298 die "need marc(field, position, value)" unless defined($f) && defined($pos);
299
300 confess "need val" unless defined $val;
301
302 my $update = 0;
303
304 map {
305 if ($_->[0] eq $f) {
306 my $old = $_->[1];
307 if (length($old) <= $pos) {
308 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
309 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
310 } else {
311 $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
312 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
313 }
314 $update++;
315 }
316 } @{ $marc_record->[ $marc_record_offset ] };
317
318 if (! $update) {
319 my $v = ' ' x $pos . $val;
320 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
321 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
322 }
323 }
324
325 =head2 marc
326
327 Save value for MARC field
328
329 marc('900','a', rec('200','a') );
330 marc('001', rec('000') );
331
332 =cut
333
334 sub marc {
335 my $f = shift or die "marc needs field";
336 die "marc field must be numer" unless ($f =~ /^\d+$/);
337
338 my $sf;
339 if ($f >= 10) {
340 $sf = shift or die "marc needs subfield";
341 }
342
343 foreach (@_) {
344 my $v = $_; # make var read-write for Encode
345 next unless (defined($v) && $v !~ /^\s*$/);
346 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
347 if (defined $sf) {
348 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
349 } else {
350 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
351 }
352 }
353 }
354
355 =head2 marc_repeatable_subfield
356
357 Save values for MARC repetable subfield
358
359 marc_repeatable_subfield('910', 'z', rec('909') );
360
361 =cut
362
363 sub marc_repeatable_subfield {
364 my ($f,$sf) = @_;
365 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
366 $marc_repeatable_subfield->{ $f . $sf }++;
367 marc(@_);
368 }
369
370 =head2 marc_indicators
371
372 Set both indicators for MARC field
373
374 marc_indicators('900', ' ', 1);
375
376 Any indicator value other than C<0-9> will be treated as undefined.
377
378 =cut
379
380 sub marc_indicators {
381 my $f = shift || die "marc_indicators need field!\n";
382 my ($i1,$i2) = @_;
383 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
384 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
385
386 $i1 = ' ' if ($i1 !~ /^\d$/);
387 $i2 = ' ' if ($i2 !~ /^\d$/);
388 @{ $marc_indicators->{$f} } = ($i1,$i2);
389 }
390
391 =head2 marc_compose
392
393 Save values for each MARC subfield explicitly
394
395 marc_compose('900',
396 'a', rec('200','a')
397 'b', rec('201','a')
398 'a', rec('200','b')
399 'c', rec('200','c')
400 );
401
402 If you specify C<+> for subfield, value will be appended
403 to previous defined subfield.
404
405 =cut
406
407 sub marc_compose {
408 my $f = shift or die "marc_compose needs field";
409 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
410
411 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
412 my $m = [ $f, $i1, $i2 ];
413
414 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
415
416 if ($#_ % 2 != 1) {
417 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
418 }
419
420 while (@_) {
421 my $sf = shift;
422 my $v = shift;
423
424 next unless (defined($v) && $v !~ /^\s*$/);
425 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
426 if ($sf ne '+') {
427 push @$m, ( $sf, $v );
428 } else {
429 $m->[ $#$m ] .= $v;
430 }
431 }
432
433 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
434
435 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
436 }
437
438 =head2 marc_duplicate
439
440 Generate copy of current MARC record and continue working on copy
441
442 marc_duplicate();
443
444 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
445 C<< _get_marc_fields( offset => 42 ) >>.
446
447 =cut
448
449 sub marc_duplicate {
450 my $m = $marc_record->[ -1 ];
451 die "can't duplicate record which isn't defined" unless ($m);
452 push @{ $marc_record }, dclone( $m );
453 push @{ $marc_leader }, dclone( marc_leader() );
454 warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
455 $marc_record_offset = $#{ $marc_record };
456 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
457
458 }
459
460 =head2 marc_remove
461
462 Remove some field or subfield from MARC record.
463
464 marc_remove('200');
465 marc_remove('200','a');
466
467 This will erase field C<200> or C<200^a> from current MARC record.
468
469 marc_remove('*');
470
471 Will remove all fields in current MARC record.
472
473 This is useful after calling C<marc_duplicate> or on it's own (but, you
474 should probably just remove that subfield definition if you are not
475 using C<marc_duplicate>).
476
477 FIXME: support fields < 10.
478
479 =cut
480
481 sub marc_remove {
482 my ($f, $sf) = @_;
483
484 die "marc_remove needs record number" unless defined($f);
485
486 my $marc = $marc_record->[ $marc_record_offset ];
487
488 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
489
490 if ($f eq '*') {
491
492 delete( $marc_record->[ $marc_record_offset ] );
493 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
494
495 } else {
496
497 my $i = 0;
498 foreach ( 0 .. $#{ $marc } ) {
499 last unless (defined $marc->[$i]);
500 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
501 if ($marc->[$i]->[0] eq $f) {
502 if (! defined $sf) {
503 # remove whole field
504 splice @$marc, $i, 1;
505 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
506 $i--;
507 } else {
508 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
509 my $o = ($j * 2) + 3;
510 if ($marc->[$i]->[$o] eq $sf) {
511 # remove subfield
512 splice @{$marc->[$i]}, $o, 2;
513 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
514 # is record now empty?
515 if ($#{ $marc->[$i] } == 2) {
516 splice @$marc, $i, 1;
517 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
518 $i--;
519 };
520 }
521 }
522 }
523 }
524 $i++;
525 }
526
527 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
528
529 $marc_record->[ $marc_record_offset ] = $marc;
530 }
531
532 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
533 }
534
535 =head2 marc_original_order
536
537 Copy all subfields preserving original order to marc field.
538
539 marc_original_order( marc_field_number, original_input_field_number );
540
541 Please note that field numbers are consistent with other commands (marc
542 field number first), but somewhat counter-intuitive (destination and then
543 source).
544
545 You might want to use this command if you are just renaming subfields or
546 using pre-processing modify_record in C<config.yml> and don't need any
547 post-processing or want to preserve order of original subfields.
548
549
550 =cut
551
552 sub marc_original_order {
553
554 my ($to, $from) = @_;
555 die "marc_original_order needs from and to fields\n" unless ($from && $to);
556
557 return unless defined($rec->{$from});
558
559 my $r = $rec->{$from};
560 die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
561
562 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
563 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
564
565 foreach my $d (@$r) {
566
567 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
568 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
569 next;
570 }
571
572 my @sfs = @{ $d->{subfields} };
573
574 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
575
576 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
577
578 my $m = [ $to, $i1, $i2 ];
579
580 while (my $sf = shift @sfs) {
581
582 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
583 my $offset = shift @sfs;
584 die "corrupted sufields specification for field $from\n" unless defined($offset);
585
586 my $v;
587 if (ref($d->{$sf}) eq 'ARRAY') {
588 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
589 } elsif ($offset == 0) {
590 $v = $d->{$sf};
591 } else {
592 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
593 }
594 push @$m, ( $sf, $v ) if (defined($v));
595 }
596
597 if ($#{$m} > 2) {
598 push @{ $marc_record->[ $marc_record_offset ] }, $m;
599 }
600 }
601
602 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
603 }
604
605
606 =head2 marc_count
607
608 Return number of MARC records created using L</marc_duplicate>.
609
610 print "created ", marc_count(), " records";
611
612 =cut
613
614 sub marc_count {
615 return $#{ $marc_record };
616 }
617
618 =head1 PRIVATE FUNCTIONS
619
620 =head2 _marc_push
621
622 _marc_push( $marc );
623
624 =cut
625
626 sub _marc_push {
627 my $marc = shift || die "no marc?";
628 push @{ $marc_record->[ $marc_record_offset ] }, $marc;
629 }
630
631 =head2 _clean
632
633 Clean internal structures
634
635 =cut
636
637 sub _clean {
638 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader, $created_with_marc_template) = ();
639 ($marc_record_offset, $marc_fetch_offset) = (0,0);
640 }
641
642
643 =head2 _get_marc_fields
644
645 Get all fields defined by calls to C<marc>
646
647 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
648
649 We are using I<magic> which detect repeatable fields only from
650 sequence of field/subfield data generated by normalization.
651
652 This magic is disabled for all records created with C<marc_template>.
653
654 Repeatable field is created when there is second occurence of same subfield or
655 if any of indicators are different.
656
657 This is sane for most cases. Something like:
658
659 900a-1 900b-1 900c-1
660 900a-2 900b-2
661 900a-3
662
663 will be created from any combination of:
664
665 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
666
667 and following rules:
668
669 marc('900','a', rec('200','a') );
670 marc('900','b', rec('200','b') );
671 marc('900','c', rec('200','c') );
672
673 which might not be what you have in mind. If you need repeatable subfield,
674 define it using C<marc_repeatable_subfield> like this:
675
676 marc_repeatable_subfield('900','a');
677 marc('900','a', rec('200','a') );
678 marc('900','b', rec('200','b') );
679 marc('900','c', rec('200','c') );
680
681 will create:
682
683 900a-1 900a-2 900a-3 900b-1 900c-1
684 900b-2
685
686 There is also support for returning next or specific using:
687
688 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
689 # do something with $mf
690 }
691
692 will always return fields from next MARC record or
693
694 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
695
696 will return 42th copy record (if it exists).
697
698 =cut
699
700 my $fetch_pos;
701
702 sub _get_marc_fields {
703
704 my $arg = {@_};
705 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
706 $fetch_pos = $marc_fetch_offset;
707 if ($arg->{offset}) {
708 $fetch_pos = $arg->{offset};
709 } elsif($arg->{fetch_next}) {
710 $marc_fetch_offset++;
711 }
712
713 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
714
715 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
716
717 my $marc_rec = $marc_record->[ $fetch_pos ];
718
719 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
720
721 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
722
723 # first, sort all existing fields
724 # XXX might not be needed, but modern perl might randomize elements in hash
725 # my @sorted_marc_record = sort {
726 # $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
727 # } @{ $marc_rec };
728
729 my @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
730
731 # output marc fields
732 my @m;
733
734 # count unique field-subfields (used for offset when walking to next subfield)
735 my $u;
736 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
737
738 if ($debug) {
739 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
740 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
741 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
742 warn "## subfield count = ", dump( $u ), $/;
743 }
744
745 my $len = $#sorted_marc_record;
746 my $visited;
747 my $i = 0;
748 my $field;
749
750 warn "## created_with_marc_template = ",dump( $created_with_marc_template ) if $debug;
751
752 foreach ( 0 .. $len ) {
753
754 # find next element which isn't visited
755 while ($visited->{$i}) {
756 $i = ($i + 1) % ($len + 1);
757 }
758
759 # mark it visited
760 $visited->{$i}++;
761
762 my $row = dclone( $sorted_marc_record[$i] );
763
764 if ( $created_with_marc_template->{ $row->[0] } ) {
765 push @m, $row;
766 warn "## copied marc_template created ", dump( $row ),$/ if $debug;
767 next;
768 }
769
770 # field and subfield which is key for
771 # marc_repeatable_subfield and u
772 my $fsf = $row->[0] . ( $row->[3] || '' );
773
774 if ($debug > 1) {
775
776 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
777 print "### this [$i]: ", dump( $row ),$/;
778 print "### sf: ", $row->[3], " vs ", $field->[3],
779 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
780 if ($#$field >= 0);
781
782 }
783
784 # if field exists
785 if ( $#$field >= 0 ) {
786 if (
787 $row->[0] ne $field->[0] || # field
788 $row->[1] ne $field->[1] || # i1
789 $row->[2] ne $field->[2] # i2
790 ) {
791 push @m, $field;
792 warn "## saved/1 ", dump( $field ),$/ if ($debug);
793 $field = $row;
794
795 } elsif (
796 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
797 ||
798 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
799 ! $marc_repeatable_subfield->{ $fsf }
800 )
801 ) {
802 push @m, $field;
803 warn "## saved/2 ", dump( $field ),$/ if ($debug);
804 $field = $row;
805
806 } else {
807 # append new subfields to existing field
808 push @$field, ( $row->[3], $row->[4] );
809 }
810 } else {
811 # insert first field
812 $field = $row;
813 }
814
815 if (! $marc_repeatable_subfield->{ $fsf }) {
816 # make step to next subfield
817 $i = ($i + $u->{ $fsf } ) % ($len + 1);
818 }
819 }
820
821 if ($#$field >= 0) {
822 push @m, $field;
823 warn "## saved/3 ", dump( $field ),$/ if ($debug);
824 }
825
826 return \@m;
827 }
828
829 =head2 _get_marc_leader
830
831 Return leader from currently fetched record by L</_get_marc_fields>
832
833 print WebPAC::Normalize::MARC::_get_marc_leader();
834
835 =cut
836
837 sub _get_marc_leader {
838 die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
839 return $marc_leader->[ $fetch_pos ];
840 }
841
842 =head2 _created_marc_records
843
844 my $nr_records = _created_marc_records;
845
846 =cut
847
848 sub _created_marc_records {
849 return $#{ $marc_record } + 1 if $marc_record;
850 }
851
852 1;

  ViewVC Help
Powered by ViewVC 1.1.26