/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1047 - (show annotations)
Mon Nov 19 15:56:05 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 22808 byte(s)
 r1631@llin:  dpavlin | 2007-11-19 16:56:04 +0100
 marc_template now respect marc_indicators, code cleanup

1 package WebPAC::Normalize::MARC;
2 use Exporter 'import';
3 our @EXPORT = qw/
4 marc marc_indicators marc_repeatable_subfield
5 marc_compose marc_leader marc_fixed
6 marc_duplicate marc_remove marc_count
7 marc_original_order
8 marc_template
9 /;
10
11 use strict;
12 use warnings;
13
14 use Storable qw/dclone/;
15 use Data::Dump qw/dump/;
16 use Carp qw/confess/;
17
18 use WebPAC::Normalize;
19
20 our $debug = 0;
21
22 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
23 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
24
25 our $rec;
26
27 =head1 NAME
28
29 WebPAC::Normalize::MARC - create MARC/ISO2709 records
30
31 =cut
32
33 =head1 FUNCTIONS
34
35 =head2 marc_template
36
37 marc_template(
38 from => 225, to => 440,
39 subfields_rename => [
40 'a' => 'a',
41 'x' => 'x',
42 'v' => 'v',
43 'h' => 'n',
44 'i' => 'p',
45 'w' => 'v',
46 ],
47 isis_template => [
48 'a ; |v. |i',
49 'a. |i ; |w',
50 ],
51 marc_template => [
52 'a, |x ; |v. |n, |p ; |v',
53 'a ; |v. |p ; |v',
54 ],
55 );
56
57 Returns number of records produced.
58
59 =cut
60
61 my $created_with_marc_template;
62
63 sub marc_template {
64 my $args = {@_};
65 warn "## marc_template(",dump($args),")",$/ if $debug;
66
67 foreach ( qw/subfields_rename isis_template marc_template/ ) {
68 # warn "ref($_) = ",ref($args->{$_}) if $debug;
69 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
70 }
71
72 die "marc_template needs isis_template or marc_template"
73 if ! defined $args->{isis_template} && ! defined $args->{marc_template};
74
75 my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
76 my $r = $rec->{ $args->{from} } || return;
77 die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
78
79 my @subfields_rename = @{ $args->{subfields_rename} };
80 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
81
82 confess "need mapping in pairs for subfields_rename"
83 if $#subfields_rename % 2 != 1;
84
85 my ( $subfields_rename, $from_subfields );
86 our $to_subfields = {};
87 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
88 my ( $f, $t ) = (
89 $from_subfields->{ $from }++,
90 $to_subfields->{ $to }++
91 );
92 $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
93 }
94 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
95 warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
96 warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
97
98 our $_template;
99
100 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
101 $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
102
103 my @marc_out;
104
105 sub _parse_template {
106 my ( $name, $templates ) = @_;
107
108 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
109
110 foreach my $template ( @{ $templates } ) {
111 our $count = {};
112 our @order = ();
113 sub my_count {
114 my $sf = shift;
115 my $nr = $count->{$sf}++;
116 push @order, [ $sf, $nr ];
117 return $sf . $nr;
118 }
119 my $pos_template = $template;
120 $pos_template =~ s/($fields_re)/my_count($1)/ge;
121 my $count_key = dump( $count );
122 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
123 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
124 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
125 }
126 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
127 }
128
129 _parse_template( 'marc', $args->{marc_template} );
130 _parse_template( 'isis', $args->{isis_template} );
131 warn "### _template = ",dump( $_template ),$/ if $debug;
132
133 my $m;
134
135 our $from_rec = $rec->{ $args->{from} };
136
137 foreach my $r ( @$from_rec ) {
138
139 my $to = $args->{to};
140 my ($i1,$i2) = _get_marc_indicators( $to );
141 $m = [ $to, $i1, $i2 ];
142
143 $created_with_marc_template->{ $to }++;
144
145 warn "### r = ",dump( $r ),$/ if $debug;
146
147 my ( $from_mapping, $from_count, $to_count );
148 our $to_mapping;
149 foreach my $from_sf ( keys %{$r} ) {
150 # skip everything which isn't one char subfield (e.g. 'subfields')
151 next unless $from_sf =~ m/^\w$/;
152 my $from_nr = $from_count->{$from_sf}++;
153 my $rename_to = $subfields_rename->{ $from_sf } ||
154 die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
155 my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
156 $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
157
158 my $to_nr2 = $to_count->{ $to_sf }++;
159 $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
160
161 warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
162 }
163
164 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
165
166 my $count_key = {
167 from => dump( $from_count ),
168 to => dump( $to_count),
169 };
170
171 warn "### count_key = ",dump( $count_key ),$/ if $debug;
172
173 my $processed_templates = 0;
174
175 # this defines order of traversal
176 foreach ( qw/isis:from marc:to/ ) {
177 my ($name,$count_name) = split(/:/);
178
179 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
180
181 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
182 $processed_templates++;
183
184 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
185
186 our $fill_in = {};
187
188 my @templates = split(/\|/, $template );
189 @templates = ( $template ) unless @templates;
190
191 warn "### templates = ",dump( @templates ),$/ if $debug;
192
193 foreach my $sf ( @templates ) {
194 sub fill_in {
195 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
196 warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
197 my ( $from_sf, $from_nr );
198 if ( $name eq 'marc' ) {
199 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), "\n>>>> from record ",dump( $r ), "\n>>>> full record = ",dump( $from_rec ) unless defined $to_mapping->{$sf}->[$nr];
200 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
201 } else {
202 ( $from_sf, $from_nr ) = ( $sf, $nr );
203 }
204 my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
205 if ( ref( $v ) eq 'ARRAY' ) {
206 $v = $pre . $v->[$from_nr] . $post;
207 } elsif ( $from_nr == 0 ) {
208 $v = $pre . $v . $post;
209 } else {
210 die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
211 }
212 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
213 $fill_in->{$sf}->[$nr] = $v;
214 return $v;
215 }
216 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
217 warn "#### $sf <<<< $fields_re\n" if $debug;
218 $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
219 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
220 }
221
222 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
223
224 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
225 my ( $sf, $nr ) = @$sf;
226 my $v = $fill_in->{$sf}->[$nr] || die "can't find fill_in $sf/$nr";
227 if ( $name eq 'isis') {
228 ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
229 }
230 warn "++ $sf/$nr |$v|\n" if $debug;
231 push @$m, ( $sf, $v );
232 }
233
234 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
235
236 push @marc_out, $m;
237
238 last;
239 }
240
241 die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
242 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
243 }
244
245
246 my $recs = 0;
247
248 foreach my $marc ( @marc_out ) {
249 warn "+++ ",dump( $marc ),$/ if $debug;
250 _marc_push( $marc );
251 $recs++;
252 }
253
254 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
255
256 return $recs;
257 }
258
259 =head2 marc_leader
260
261 Setup fields within MARC leader or get leader
262
263 marc_leader('05','c');
264 my $leader = marc_leader();
265
266 =cut
267
268 sub marc_leader {
269 my ($offset,$value) = @_;
270
271 if ($offset) {
272 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
273 } else {
274
275 if (defined($marc_leader)) {
276 die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
277 return $marc_leader->[ $marc_record_offset ];
278 } else {
279 return;
280 }
281 }
282 }
283
284 =head2 marc_fixed
285
286 Create control/indentifier fields with values in fixed positions
287
288 marc_fixed('008', 00, '070402');
289 marc_fixed('008', 39, '|');
290
291 Positions not specified will be filled with spaces (C<0x20>).
292
293 There will be no effort to extend last specified value to full length of
294 field in standard.
295
296 =cut
297
298 sub marc_fixed {
299 my ($f, $pos, $val) = @_;
300 die "need marc(field, position, value)" unless defined($f) && defined($pos);
301
302 confess "need val" unless defined $val;
303
304 my $update = 0;
305
306 map {
307 if ($_->[0] eq $f) {
308 my $old = $_->[1];
309 if (length($old) <= $pos) {
310 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
311 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
312 } else {
313 $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
314 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
315 }
316 $update++;
317 }
318 } @{ $marc_record->[ $marc_record_offset ] };
319
320 if (! $update) {
321 my $v = ' ' x $pos . $val;
322 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
323 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
324 }
325 }
326
327 =head2 marc
328
329 Save value for MARC field
330
331 marc('900','a', rec('200','a') );
332 marc('001', rec('000') );
333
334 =cut
335
336 sub marc {
337 my $f = shift or die "marc needs field";
338 die "marc field must be numer" unless ($f =~ /^\d+$/);
339
340 my $sf;
341 if ($f >= 10) {
342 $sf = shift or die "marc needs subfield";
343 }
344
345 foreach (@_) {
346 my $v = $_; # make var read-write for Encode
347 next unless (defined($v) && $v !~ /^\s*$/);
348 my ($i1,$i2) = _get_marc_indicators( $f );
349 if (defined $sf) {
350 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
351 } else {
352 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
353 }
354 }
355 }
356
357 =head2 marc_repeatable_subfield
358
359 Save values for MARC repetable subfield
360
361 marc_repeatable_subfield('910', 'z', rec('909') );
362
363 =cut
364
365 sub marc_repeatable_subfield {
366 my ($f,$sf) = @_;
367 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
368 $marc_repeatable_subfield->{ $f . $sf }++;
369 marc(@_);
370 }
371
372 =head2 marc_indicators
373
374 Set both indicators for MARC field
375
376 marc_indicators('900', ' ', 1);
377
378 Any indicator value other than C<0-9> will be treated as undefined.
379
380 =cut
381
382 sub marc_indicators {
383 my $f = shift || die "marc_indicators need field!\n";
384 my ($i1,$i2) = @_;
385 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
386 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
387
388 $i1 = ' ' if ($i1 !~ /^\d$/);
389 $i2 = ' ' if ($i2 !~ /^\d$/);
390 @{ $marc_indicators->{$f} } = ($i1,$i2);
391 }
392
393 sub _get_marc_indicators {
394 my $f = shift || confess "need field!\n";
395 return defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
396 }
397
398 =head2 marc_compose
399
400 Save values for each MARC subfield explicitly
401
402 marc_compose('900',
403 'a', rec('200','a')
404 'b', rec('201','a')
405 'a', rec('200','b')
406 'c', rec('200','c')
407 );
408
409 If you specify C<+> for subfield, value will be appended
410 to previous defined subfield.
411
412 =cut
413
414 sub marc_compose {
415 my $f = shift or die "marc_compose needs field";
416 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
417
418 my ($i1,$i2) = _get_marc_indicators( $f );
419 my $m = [ $f, $i1, $i2 ];
420
421 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
422
423 if ($#_ % 2 != 1) {
424 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
425 }
426
427 while (@_) {
428 my $sf = shift;
429 my $v = shift;
430
431 next unless (defined($v) && $v !~ /^\s*$/);
432 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
433 if ($sf ne '+') {
434 push @$m, ( $sf, $v );
435 } else {
436 $m->[ $#$m ] .= $v;
437 }
438 }
439
440 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
441
442 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
443 }
444
445 =head2 marc_duplicate
446
447 Generate copy of current MARC record and continue working on copy
448
449 marc_duplicate();
450
451 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
452 C<< _get_marc_fields( offset => 42 ) >>.
453
454 =cut
455
456 sub marc_duplicate {
457 my $m = $marc_record->[ -1 ];
458 die "can't duplicate record which isn't defined" unless ($m);
459 push @{ $marc_record }, dclone( $m );
460 push @{ $marc_leader }, dclone( marc_leader() );
461 warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
462 $marc_record_offset = $#{ $marc_record };
463 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
464
465 }
466
467 =head2 marc_remove
468
469 Remove some field or subfield from MARC record.
470
471 marc_remove('200');
472 marc_remove('200','a');
473
474 This will erase field C<200> or C<200^a> from current MARC record.
475
476 marc_remove('*');
477
478 Will remove all fields in current MARC record.
479
480 This is useful after calling C<marc_duplicate> or on it's own (but, you
481 should probably just remove that subfield definition if you are not
482 using C<marc_duplicate>).
483
484 FIXME: support fields < 10.
485
486 =cut
487
488 sub marc_remove {
489 my ($f, $sf) = @_;
490
491 die "marc_remove needs record number" unless defined($f);
492
493 my $marc = $marc_record->[ $marc_record_offset ];
494
495 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
496
497 if ($f eq '*') {
498
499 delete( $marc_record->[ $marc_record_offset ] );
500 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
501
502 } else {
503
504 my $i = 0;
505 foreach ( 0 .. $#{ $marc } ) {
506 last unless (defined $marc->[$i]);
507 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
508 if ($marc->[$i]->[0] eq $f) {
509 if (! defined $sf) {
510 # remove whole field
511 splice @$marc, $i, 1;
512 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
513 $i--;
514 } else {
515 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
516 my $o = ($j * 2) + 3;
517 if ($marc->[$i]->[$o] eq $sf) {
518 # remove subfield
519 splice @{$marc->[$i]}, $o, 2;
520 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
521 # is record now empty?
522 if ($#{ $marc->[$i] } == 2) {
523 splice @$marc, $i, 1;
524 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
525 $i--;
526 };
527 }
528 }
529 }
530 }
531 $i++;
532 }
533
534 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
535
536 $marc_record->[ $marc_record_offset ] = $marc;
537 }
538
539 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
540 }
541
542 =head2 marc_original_order
543
544 Copy all subfields preserving original order to marc field.
545
546 marc_original_order( marc_field_number, original_input_field_number );
547
548 Please note that field numbers are consistent with other commands (marc
549 field number first), but somewhat counter-intuitive (destination and then
550 source).
551
552 You might want to use this command if you are just renaming subfields or
553 using pre-processing modify_record in C<config.yml> and don't need any
554 post-processing or want to preserve order of original subfields.
555
556
557 =cut
558
559 sub marc_original_order {
560
561 my ($to, $from) = @_;
562 die "marc_original_order needs from and to fields\n" unless ($from && $to);
563
564 return unless defined($rec->{$from});
565
566 my $r = $rec->{$from};
567 die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
568
569 my ($i1,$i2) = _get_marc_indicators( $to );
570 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
571
572 foreach my $d (@$r) {
573
574 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
575 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
576 next;
577 }
578
579 my @sfs = @{ $d->{subfields} };
580
581 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
582
583 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
584
585 my $m = [ $to, $i1, $i2 ];
586
587 while (my $sf = shift @sfs) {
588
589 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
590 my $offset = shift @sfs;
591 die "corrupted sufields specification for field $from\n" unless defined($offset);
592
593 my $v;
594 if (ref($d->{$sf}) eq 'ARRAY') {
595 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
596 } elsif ($offset == 0) {
597 $v = $d->{$sf};
598 } else {
599 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
600 }
601 push @$m, ( $sf, $v ) if (defined($v));
602 }
603
604 if ($#{$m} > 2) {
605 push @{ $marc_record->[ $marc_record_offset ] }, $m;
606 }
607 }
608
609 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
610 }
611
612
613 =head2 marc_count
614
615 Return number of MARC records created using L</marc_duplicate>.
616
617 print "created ", marc_count(), " records";
618
619 =cut
620
621 sub marc_count {
622 return $#{ $marc_record };
623 }
624
625 =head1 PRIVATE FUNCTIONS
626
627 =head2 _marc_push
628
629 _marc_push( $marc );
630
631 =cut
632
633 sub _marc_push {
634 my $marc = shift || die "no marc?";
635 push @{ $marc_record->[ $marc_record_offset ] }, $marc;
636 }
637
638 =head2 _clean
639
640 Clean internal structures
641
642 =cut
643
644 sub _clean {
645 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader, $created_with_marc_template) = ();
646 ($marc_record_offset, $marc_fetch_offset) = (0,0);
647 }
648
649
650 =head2 _get_marc_fields
651
652 Get all fields defined by calls to C<marc>
653
654 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
655
656 We are using I<magic> which detect repeatable fields only from
657 sequence of field/subfield data generated by normalization.
658
659 This magic is disabled for all records created with C<marc_template>.
660
661 Repeatable field is created when there is second occurence of same subfield or
662 if any of indicators are different.
663
664 This is sane for most cases. Something like:
665
666 900a-1 900b-1 900c-1
667 900a-2 900b-2
668 900a-3
669
670 will be created from any combination of:
671
672 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
673
674 and following rules:
675
676 marc('900','a', rec('200','a') );
677 marc('900','b', rec('200','b') );
678 marc('900','c', rec('200','c') );
679
680 which might not be what you have in mind. If you need repeatable subfield,
681 define it using C<marc_repeatable_subfield> like this:
682
683 marc_repeatable_subfield('900','a');
684 marc('900','a', rec('200','a') );
685 marc('900','b', rec('200','b') );
686 marc('900','c', rec('200','c') );
687
688 will create:
689
690 900a-1 900a-2 900a-3 900b-1 900c-1
691 900b-2
692
693 There is also support for returning next or specific using:
694
695 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
696 # do something with $mf
697 }
698
699 will always return fields from next MARC record or
700
701 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
702
703 will return 42th copy record (if it exists).
704
705 =cut
706
707 my $fetch_pos;
708
709 sub _get_marc_fields {
710
711 my $arg = {@_};
712 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
713 $fetch_pos = $marc_fetch_offset;
714 if ($arg->{offset}) {
715 $fetch_pos = $arg->{offset};
716 } elsif($arg->{fetch_next}) {
717 $marc_fetch_offset++;
718 }
719
720 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
721
722 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
723
724 my $marc_rec = $marc_record->[ $fetch_pos ];
725
726 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
727
728 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
729
730 # first, sort all existing fields
731 # XXX might not be needed, but modern perl might randomize elements in hash
732 # my @sorted_marc_record = sort {
733 # $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
734 # } @{ $marc_rec };
735
736 my @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
737
738 # output marc fields
739 my @m;
740
741 # count unique field-subfields (used for offset when walking to next subfield)
742 my $u;
743 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
744
745 if ($debug) {
746 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
747 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
748 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
749 warn "## subfield count = ", dump( $u ), $/;
750 }
751
752 my $len = $#sorted_marc_record;
753 my $visited;
754 my $i = 0;
755 my $field;
756
757 warn "## created_with_marc_template = ",dump( $created_with_marc_template ) if $debug;
758
759 foreach ( 0 .. $len ) {
760
761 # find next element which isn't visited
762 while ($visited->{$i}) {
763 $i = ($i + 1) % ($len + 1);
764 }
765
766 # mark it visited
767 $visited->{$i}++;
768
769 my $row = dclone( $sorted_marc_record[$i] );
770
771 if ( $created_with_marc_template->{ $row->[0] } ) {
772 push @m, $row;
773 warn "## copied marc_template created ", dump( $row ),$/ if $debug;
774 next;
775 }
776
777 # field and subfield which is key for
778 # marc_repeatable_subfield and u
779 my $fsf = $row->[0] . ( $row->[3] || '' );
780
781 if ($debug > 1) {
782
783 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
784 print "### this [$i]: ", dump( $row ),$/;
785 print "### sf: ", $row->[3], " vs ", $field->[3],
786 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
787 if ($#$field >= 0);
788
789 }
790
791 # if field exists
792 if ( $#$field >= 0 ) {
793 if (
794 $row->[0] ne $field->[0] || # field
795 $row->[1] ne $field->[1] || # i1
796 $row->[2] ne $field->[2] # i2
797 ) {
798 push @m, $field;
799 warn "## saved/1 ", dump( $field ),$/ if ($debug);
800 $field = $row;
801
802 } elsif (
803 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
804 ||
805 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
806 ! $marc_repeatable_subfield->{ $fsf }
807 )
808 ) {
809 push @m, $field;
810 warn "## saved/2 ", dump( $field ),$/ if ($debug);
811 $field = $row;
812
813 } else {
814 # append new subfields to existing field
815 push @$field, ( $row->[3], $row->[4] );
816 }
817 } else {
818 # insert first field
819 $field = $row;
820 }
821
822 if (! $marc_repeatable_subfield->{ $fsf }) {
823 # make step to next subfield
824 $i = ($i + $u->{ $fsf } ) % ($len + 1);
825 }
826 }
827
828 if ($#$field >= 0) {
829 push @m, $field;
830 warn "## saved/3 ", dump( $field ),$/ if ($debug);
831 }
832
833 return \@m;
834 }
835
836 =head2 _get_marc_leader
837
838 Return leader from currently fetched record by L</_get_marc_fields>
839
840 print WebPAC::Normalize::MARC::_get_marc_leader();
841
842 =cut
843
844 sub _get_marc_leader {
845 die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
846 return $marc_leader->[ $fetch_pos ];
847 }
848
849 =head2 _created_marc_records
850
851 my $nr_records = _created_marc_records;
852
853 =cut
854
855 sub _created_marc_records {
856 return $#{ $marc_record } + 1 if $marc_record;
857 }
858
859 1;

  ViewVC Help
Powered by ViewVC 1.1.26