/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1111 - (show annotations)
Sat Sep 6 10:54:25 2008 UTC (15 years, 8 months ago) by dpavlin
File size: 23263 byte(s)
a bit more work on marc_clone on some real records

1 package WebPAC::Normalize::MARC;
2 use Exporter 'import';
3 our @EXPORT = qw/
4 marc marc_indicators marc_repeatable_subfield
5 marc_compose marc_leader marc_fixed
6 marc_duplicate marc_remove marc_count
7 marc_original_order
8 marc_template
9 marc_clone
10 /;
11
12 use strict;
13 use warnings;
14
15 use Storable qw/dclone/;
16 use Data::Dump qw/dump/;
17 use Carp qw/confess/;
18
19 use WebPAC::Normalize;
20
21 our $debug = 0;
22
23 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
24 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
25
26 our $rec;
27
28 =head1 NAME
29
30 WebPAC::Normalize::MARC - create MARC/ISO2709 records
31
32 =cut
33
34 =head1 FUNCTIONS
35
36 =head2 marc_template
37
38 marc_template(
39 from => 225, to => 440,
40 subfields_rename => [
41 'a' => 'a',
42 'x' => 'x',
43 'v' => 'v',
44 'h' => 'n',
45 'i' => 'p',
46 'w' => 'v',
47 ],
48 isis_template => [
49 'a ; |v. |i',
50 'a. |i ; |w',
51 ],
52 marc_template => [
53 'a, |x ; |v. |n, |p ; |v',
54 'a ; |v. |p ; |v',
55 ],
56 );
57
58 Returns number of records produced.
59
60 =cut
61
62 my $created_with_marc_template;
63
64 sub marc_template {
65 my $args = {@_};
66 warn "## marc_template(",dump($args),")",$/ if $debug;
67
68 foreach ( qw/subfields_rename isis_template marc_template/ ) {
69 # warn "ref($_) = ",ref($args->{$_}) if $debug;
70 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
71 }
72
73 die "marc_template needs isis_template or marc_template"
74 if ! defined $args->{isis_template} && ! defined $args->{marc_template};
75
76 my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
77 my $r = $rec->{ $args->{from} } || return;
78 die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
79
80 my @subfields_rename = @{ $args->{subfields_rename} };
81 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
82
83 confess "need mapping in pairs for subfields_rename"
84 if $#subfields_rename % 2 != 1;
85
86 my ( $subfields_rename, $from_subfields );
87 our $to_subfields = {};
88 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
89 my ( $f, $t ) = (
90 $from_subfields->{ $from }++,
91 $to_subfields->{ $to }++
92 );
93 $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
94 }
95 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
96 warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
97 warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
98
99 our $_template;
100
101 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
102 $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
103
104 my @marc_out;
105
106 sub _parse_template {
107 my ( $name, $templates ) = @_;
108
109 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
110
111 foreach my $template ( @{ $templates } ) {
112 our $count = {};
113 our @order = ();
114 sub my_count {
115 my $sf = shift;
116 my $nr = $count->{$sf}++;
117 push @order, [ $sf, $nr ];
118 return $sf . $nr;
119 }
120 my $pos_template = $template;
121 $pos_template =~ s/($fields_re)/my_count($1)/ge;
122 my $count_key = dump( $count );
123 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
124 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
125 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
126 }
127 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
128 }
129
130 _parse_template( 'marc', $args->{marc_template} );
131 _parse_template( 'isis', $args->{isis_template} );
132 warn "### _template = ",dump( $_template ),$/ if $debug;
133
134 my $m;
135
136 our $from_rec = $rec->{ $args->{from} };
137
138 foreach my $r ( @$from_rec ) {
139
140 my $to = $args->{to};
141 my ($i1,$i2) = _get_marc_indicators( $to );
142 $m = [ $to, $i1, $i2 ];
143
144 $created_with_marc_template->{ $to }++;
145
146 warn "### r = ",dump( $r ),$/ if $debug;
147
148 my ( $from_mapping, $from_count, $to_count );
149 our $to_mapping;
150 foreach my $from_sf ( keys %{$r} ) {
151 # skip everything which isn't one char subfield (e.g. 'subfields')
152 next unless $from_sf =~ m/^\w$/;
153 my $from_nr = $from_count->{$from_sf}++;
154 my $rename_to = $subfields_rename->{ $from_sf } ||
155 die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
156 my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
157 $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
158
159 my $to_nr2 = $to_count->{ $to_sf }++;
160 $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
161
162 warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
163 }
164
165 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
166
167 my $count_key = {
168 from => dump( $from_count ),
169 to => dump( $to_count),
170 };
171
172 warn "### count_key = ",dump( $count_key ),$/ if $debug;
173
174 my $processed_templates = 0;
175
176 # this defines order of traversal
177 foreach ( qw/isis:from marc:to/ ) {
178 my ($name,$count_name) = split(/:/);
179
180 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
181
182 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
183 $processed_templates++;
184
185 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
186
187 our $fill_in = {};
188
189 my @templates = split(/\|/, $template );
190 @templates = ( $template ) unless @templates;
191
192 warn "### templates = ",dump( @templates ),$/ if $debug;
193
194 foreach my $sf ( @templates ) {
195 sub fill_in {
196 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
197 warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
198 my ( $from_sf, $from_nr );
199 if ( $name eq 'marc' ) {
200 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), "\n>>>> from record ",dump( $r ), "\n>>>> full record = ",dump( $from_rec ) unless defined $to_mapping->{$sf}->[$nr];
201 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
202 } else {
203 ( $from_sf, $from_nr ) = ( $sf, $nr );
204 }
205 my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
206 if ( ref( $v ) eq 'ARRAY' ) {
207 $v = $pre . $v->[$from_nr] . $post;
208 } elsif ( $from_nr == 0 ) {
209 $v = $pre . $v . $post;
210 } else {
211 die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
212 }
213 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
214 $fill_in->{$sf}->[$nr] = $v;
215 return $v;
216 }
217 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
218 warn "#### $sf <<<< $fields_re\n" if $debug;
219 $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
220 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
221 }
222
223 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
224
225 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
226 my ( $sf, $nr ) = @$sf;
227 my $v = $fill_in->{$sf}->[$nr];
228 die "can't find fill_in $sf/$nr" unless defined $v;
229 if ( $name eq 'isis') {
230 ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
231 }
232 warn "++ $sf/$nr |$v|\n" if $debug;
233 push @$m, ( $sf, $v );
234 }
235
236 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
237
238 push @marc_out, $m;
239
240 last;
241 }
242
243 die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
244 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
245 }
246
247
248 my $recs = 0;
249
250 foreach my $marc ( @marc_out ) {
251 warn "+++ ",dump( $marc ),$/ if $debug;
252 _marc_push( $marc );
253 $recs++;
254 }
255
256 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
257
258 return $recs;
259 }
260
261 =head2 marc_leader
262
263 Setup fields within MARC leader or get leader
264
265 marc_leader('05','c');
266 my $leader = marc_leader();
267
268 =cut
269
270 sub marc_leader {
271 my ($offset,$value) = @_;
272
273 if ($offset) {
274 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
275 } else {
276
277 if (defined($marc_leader)) {
278 die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
279 return $marc_leader->[ $marc_record_offset ];
280 } else {
281 return;
282 }
283 }
284 }
285
286 =head2 marc_fixed
287
288 Create control/indentifier fields with values in fixed positions
289
290 marc_fixed('008', 00, '070402');
291 marc_fixed('008', 39, '|');
292
293 Positions not specified will be filled with spaces (C<0x20>).
294
295 There will be no effort to extend last specified value to full length of
296 field in standard.
297
298 =cut
299
300 sub marc_fixed {
301 my ($f, $pos, $val) = @_;
302 die "need marc(field, position, value)" unless defined($f) && defined($pos);
303
304 confess "need val" unless defined $val;
305
306 my $update = 0;
307
308 map {
309 if ($_->[0] eq $f) {
310 my $old = $_->[1];
311 if (length($old) <= $pos) {
312 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
313 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
314 } else {
315 $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
316 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
317 }
318 $update++;
319 }
320 } @{ $marc_record->[ $marc_record_offset ] };
321
322 if (! $update) {
323 my $v = ' ' x $pos . $val;
324 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
325 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
326 }
327 }
328
329 =head2 marc
330
331 Save value for MARC field
332
333 marc('900','a', rec('200','a') );
334 marc('001', rec('000') );
335
336 =cut
337
338 sub marc {
339 my $f = shift or die "marc needs field";
340 die "marc field must be numer" unless ($f =~ /^\d+$/);
341
342 my $sf;
343 if ($f >= 10) {
344 $sf = shift or die "marc needs subfield";
345 }
346
347 foreach (@_) {
348 my $v = $_; # make var read-write for Encode
349 next unless (defined($v) && $v !~ /^\s*$/);
350 my ($i1,$i2) = _get_marc_indicators( $f );
351 if (defined $sf) {
352 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
353 } else {
354 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
355 }
356 }
357 }
358
359 =head2 marc_repeatable_subfield
360
361 Save values for MARC repetable subfield
362
363 marc_repeatable_subfield('910', 'z', rec('909') );
364
365 =cut
366
367 sub marc_repeatable_subfield {
368 my ($f,$sf) = @_;
369 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
370 $marc_repeatable_subfield->{ $f . $sf }++;
371 marc(@_);
372 }
373
374 =head2 marc_indicators
375
376 Set both indicators for MARC field
377
378 marc_indicators('900', ' ', 1);
379
380 Any indicator value other than C<0-9> will be treated as undefined.
381
382 =cut
383
384 sub marc_indicators {
385 my $f = shift || die "marc_indicators need field!\n";
386 my ($i1,$i2) = @_;
387 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
388 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
389
390 $i1 = ' ' if ($i1 !~ /^\d$/);
391 $i2 = ' ' if ($i2 !~ /^\d$/);
392 @{ $marc_indicators->{$f} } = ($i1,$i2);
393 }
394
395 sub _get_marc_indicators {
396 my $f = shift || confess "need field!\n";
397 return defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
398 }
399
400 =head2 marc_compose
401
402 Save values for each MARC subfield explicitly
403
404 marc_compose('900',
405 'a', rec('200','a')
406 'b', rec('201','a')
407 'a', rec('200','b')
408 'c', rec('200','c')
409 );
410
411 If you specify C<+> for subfield, value will be appended
412 to previous defined subfield.
413
414 =cut
415
416 sub marc_compose {
417 my $f = shift or die "marc_compose needs field";
418 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
419
420 my ($i1,$i2) = _get_marc_indicators( $f );
421 my $m = [ $f, $i1, $i2 ];
422
423 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
424
425 if ($#_ % 2 != 1) {
426 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
427 }
428
429 while (@_) {
430 my $sf = shift;
431 my $v = shift;
432
433 next unless (defined($v) && $v !~ /^\s*$/);
434 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
435 if ($sf ne '+') {
436 push @$m, ( $sf, $v );
437 } else {
438 $m->[ $#$m ] .= $v;
439 }
440 }
441
442 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
443
444 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
445 }
446
447 =head2 marc_duplicate
448
449 Generate copy of current MARC record and continue working on copy
450
451 marc_duplicate();
452
453 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
454 C<< _get_marc_fields( offset => 42 ) >>.
455
456 =cut
457
458 sub marc_duplicate {
459 my $m = $marc_record->[ -1 ];
460 die "can't duplicate record which isn't defined" unless ($m);
461 push @{ $marc_record }, dclone( $m );
462 push @{ $marc_leader }, dclone( marc_leader() );
463 warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
464 $marc_record_offset = $#{ $marc_record };
465 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
466
467 }
468
469 =head2 marc_remove
470
471 Remove some field or subfield from MARC record.
472
473 marc_remove('200');
474 marc_remove('200','a');
475
476 This will erase field C<200> or C<200^a> from current MARC record.
477
478 marc_remove('*');
479
480 Will remove all fields in current MARC record.
481
482 This is useful after calling C<marc_duplicate> or on it's own (but, you
483 should probably just remove that subfield definition if you are not
484 using C<marc_duplicate>).
485
486 FIXME: support fields < 10.
487
488 =cut
489
490 sub marc_remove {
491 my ($f, $sf) = @_;
492
493 die "marc_remove needs record number" unless defined($f);
494
495 my $marc = $marc_record->[ $marc_record_offset ];
496
497 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
498
499 if ($f eq '*') {
500
501 delete( $marc_record->[ $marc_record_offset ] );
502 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
503
504 } else {
505
506 my $i = 0;
507 foreach ( 0 .. $#{ $marc } ) {
508 last unless (defined $marc->[$i]);
509 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
510 if ($marc->[$i]->[0] eq $f) {
511 if (! defined $sf) {
512 # remove whole field
513 splice @$marc, $i, 1;
514 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
515 $i--;
516 } else {
517 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
518 my $o = ($j * 2) + 3;
519 if ($marc->[$i]->[$o] eq $sf) {
520 # remove subfield
521 splice @{$marc->[$i]}, $o, 2;
522 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
523 # is record now empty?
524 if ($#{ $marc->[$i] } == 2) {
525 splice @$marc, $i, 1;
526 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
527 $i--;
528 };
529 }
530 }
531 }
532 }
533 $i++;
534 }
535
536 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
537
538 $marc_record->[ $marc_record_offset ] = $marc;
539 }
540
541 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
542 }
543
544 =head2 marc_original_order
545
546 Copy all subfields preserving original order to marc field.
547
548 marc_original_order( marc_field_number, original_input_field_number );
549
550 Please note that field numbers are consistent with other commands (marc
551 field number first), but somewhat counter-intuitive (destination and then
552 source).
553
554 You might want to use this command if you are just renaming subfields or
555 using pre-processing modify_record in C<config.yml> and don't need any
556 post-processing or want to preserve order of original subfields.
557
558
559 =cut
560
561 sub marc_original_order {
562
563 my ($to, $from) = @_;
564 die "marc_original_order needs from and to fields\n" unless ($from && $to);
565
566 return unless defined($rec->{$from});
567
568 my $r = $rec->{$from};
569 die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
570
571 my ($i1,$i2) = _get_marc_indicators( $to );
572 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
573
574 foreach my $d (@$r) {
575
576 if ( ! ref($d) ) {
577 # scalar
578 warn "## marc_original_order($to,$from) skipped: ",dump( $d );
579 next;
580 }
581
582 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
583 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
584 next;
585 }
586
587 my @sfs = @{ $d->{subfields} };
588
589 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
590
591 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
592
593 my $m = [ $to, $i1, $i2 ];
594
595 while (my $sf = shift @sfs) {
596
597 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
598 my $offset = shift @sfs;
599 die "corrupted sufields specification for field $from\n" unless defined($offset);
600
601 my $v;
602 if (ref($d->{$sf}) eq 'ARRAY') {
603 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
604 } elsif ($offset == 0) {
605 $v = $d->{$sf};
606 } else {
607 die "field $from subfield '$sf' need occurence $offset which doesn't exist in ", dump($d);
608 }
609 push @$m, ( $sf, $v ) if (defined($v));
610 }
611
612 if ($#{$m} > 2) {
613 push @{ $marc_record->[ $marc_record_offset ] }, $m;
614 }
615 }
616
617 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
618 }
619
620
621 =head2 marc_count
622
623 Return number of MARC records created using L</marc_duplicate>.
624
625 print "created ", marc_count(), " records";
626
627 =cut
628
629 sub marc_count {
630 return $#{ $marc_record };
631 }
632
633 =head2 marc_clone
634
635 Clone marc records from input file, whole or just some fields/indicators
636
637 marc_clone; # whole record
638
639 =cut
640
641 sub marc_clone {
642 warn "### marc_clone rec: ",dump( $rec ) if $debug > 2;
643 foreach my $f ( keys %$rec ) {
644 warn "## marc_clone $f\n" if $debug;
645 marc_original_order( $f, $f );
646 }
647 }
648
649 =head1 PRIVATE FUNCTIONS
650
651 =head2 _marc_push
652
653 _marc_push( $marc );
654
655 =cut
656
657 sub _marc_push {
658 my $marc = shift || die "no marc?";
659 push @{ $marc_record->[ $marc_record_offset ] }, $marc;
660 }
661
662 =head2 _clean
663
664 Clean internal structures
665
666 =cut
667
668 sub _clean {
669 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader, $created_with_marc_template) = ();
670 ($marc_record_offset, $marc_fetch_offset) = (0,0);
671 }
672
673
674 =head2 _get_marc_fields
675
676 Get all fields defined by calls to C<marc>
677
678 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
679
680 We are using I<magic> which detect repeatable fields only from
681 sequence of field/subfield data generated by normalization.
682
683 This magic is disabled for all records created with C<marc_template>.
684
685 Repeatable field is created when there is second occurence of same subfield or
686 if any of indicators are different.
687
688 This is sane for most cases. Something like:
689
690 900a-1 900b-1 900c-1
691 900a-2 900b-2
692 900a-3
693
694 will be created from any combination of:
695
696 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
697
698 and following rules:
699
700 marc('900','a', rec('200','a') );
701 marc('900','b', rec('200','b') );
702 marc('900','c', rec('200','c') );
703
704 which might not be what you have in mind. If you need repeatable subfield,
705 define it using C<marc_repeatable_subfield> like this:
706
707 marc_repeatable_subfield('900','a');
708 marc('900','a', rec('200','a') );
709 marc('900','b', rec('200','b') );
710 marc('900','c', rec('200','c') );
711
712 will create:
713
714 900a-1 900a-2 900a-3 900b-1 900c-1
715 900b-2
716
717 There is also support for returning next or specific using:
718
719 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
720 # do something with $mf
721 }
722
723 will always return fields from next MARC record or
724
725 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
726
727 will return 42th copy record (if it exists).
728
729 =cut
730
731 my $fetch_pos;
732
733 sub _get_marc_fields {
734
735 my $arg = {@_};
736 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
737 $fetch_pos = $marc_fetch_offset;
738 if ($arg->{offset}) {
739 $fetch_pos = $arg->{offset};
740 } elsif($arg->{fetch_next}) {
741 $marc_fetch_offset++;
742 }
743
744 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
745
746 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
747
748 my $marc_rec = $marc_record->[ $fetch_pos ];
749
750 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
751
752 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
753
754 # first, sort all existing fields
755 # XXX might not be needed, but modern perl might randomize elements in hash
756 # my @sorted_marc_record = sort {
757 # $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
758 # } @{ $marc_rec };
759
760 my @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
761
762 # output marc fields
763 my @m;
764
765 # count unique field-subfields (used for offset when walking to next subfield)
766 my $u;
767 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
768
769 if ($debug) {
770 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
771 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
772 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
773 warn "## subfield count = ", dump( $u ), $/;
774 }
775
776 my $len = $#sorted_marc_record;
777 my $visited;
778 my $i = 0;
779 my $field;
780
781 warn "## created_with_marc_template = ",dump( $created_with_marc_template ) if $debug;
782
783 foreach ( 0 .. $len ) {
784
785 # find next element which isn't visited
786 while ($visited->{$i}) {
787 $i = ($i + 1) % ($len + 1);
788 }
789
790 # mark it visited
791 $visited->{$i}++;
792
793 my $row = dclone( $sorted_marc_record[$i] );
794
795 if ( $created_with_marc_template->{ $row->[0] } ) {
796 push @m, $row;
797 warn "## copied marc_template created ", dump( $row ),$/ if $debug;
798 next;
799 }
800
801 # field and subfield which is key for
802 # marc_repeatable_subfield and u
803 my $fsf = $row->[0] . ( $row->[3] || '' );
804
805 if ($debug > 1) {
806
807 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
808 print "### this [$i]: ", dump( $row ),$/;
809 print "### sf: ", $row->[3], " vs ", $field->[3],
810 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
811 if ($#$field >= 0);
812
813 }
814
815 # if field exists
816 if ( $#$field >= 0 ) {
817 if (
818 $row->[0] ne $field->[0] || # field
819 $row->[1] ne $field->[1] || # i1
820 $row->[2] ne $field->[2] # i2
821 ) {
822 push @m, $field;
823 warn "## saved/1 ", dump( $field ),$/ if ($debug);
824 $field = $row;
825
826 } elsif (
827 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
828 ||
829 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
830 ! $marc_repeatable_subfield->{ $fsf }
831 )
832 ) {
833 push @m, $field;
834 warn "## saved/2 ", dump( $field ),$/ if ($debug);
835 $field = $row;
836
837 } else {
838 # append new subfields to existing field
839 push @$field, ( $row->[3], $row->[4] );
840 }
841 } else {
842 # insert first field
843 $field = $row;
844 }
845
846 if (! $marc_repeatable_subfield->{ $fsf }) {
847 # make step to next subfield
848 $i = ($i + $u->{ $fsf } ) % ($len + 1);
849 }
850 }
851
852 if ($#$field >= 0) {
853 push @m, $field;
854 warn "## saved/3 ", dump( $field ),$/ if ($debug);
855 }
856
857 return \@m;
858 }
859
860 =head2 _get_marc_leader
861
862 Return leader from currently fetched record by L</_get_marc_fields>
863
864 print WebPAC::Normalize::MARC::_get_marc_leader();
865
866 =cut
867
868 sub _get_marc_leader {
869 die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
870 return $marc_leader->[ $fetch_pos ];
871 }
872
873 =head2 _created_marc_records
874
875 my $nr_records = _created_marc_records;
876
877 =cut
878
879 sub _created_marc_records {
880 return $#{ $marc_record } + 1 if $marc_record;
881 }
882
883 1;

  ViewVC Help
Powered by ViewVC 1.1.26