/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1368 - (show annotations)
Mon Mar 12 19:00:00 2012 UTC (12 years, 2 months ago) by dpavlin
File size: 23577 byte(s)
marc_original_order support for leader and fixed fields

1 package WebPAC::Normalize::MARC;
2 use Exporter 'import';
3 our @EXPORT = qw/
4 marc marc_indicators marc_repeatable_subfield
5 marc_compose marc_leader marc_fixed
6 marc_duplicate marc_remove marc_count
7 marc_original_order
8 marc_template
9 marc_clone
10 /;
11
12 use strict;
13 use warnings;
14
15 use Storable qw/dclone/;
16 use Data::Dump qw/dump/;
17 use Carp qw/confess/;
18
19 use WebPAC::Normalize;
20
21 our $debug = 0;
22
23 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
24 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
25
26 our $rec;
27
28 =head1 NAME
29
30 WebPAC::Normalize::MARC - create MARC/ISO2709 records
31
32 =cut
33
34 =head1 FUNCTIONS
35
36 =head2 marc_template
37
38 marc_template(
39 from => 225, to => 440,
40 subfields_rename => [
41 'a' => 'a',
42 'x' => 'x',
43 'v' => 'v',
44 'h' => 'n',
45 'i' => 'p',
46 'w' => 'v',
47 ],
48 isis_template => [
49 'a ; |v. |i',
50 'a. |i ; |w',
51 ],
52 marc_template => [
53 'a, |x ; |v. |n, |p ; |v',
54 'a ; |v. |p ; |v',
55 ],
56 );
57
58 Returns number of records produced.
59
60 =cut
61
62 my $created_with_marc_template;
63
64 sub marc_template {
65 my $args = {@_};
66 warn "## marc_template(",dump($args),")",$/ if $debug;
67
68 foreach ( qw/subfields_rename isis_template marc_template/ ) {
69 # warn "ref($_) = ",ref($args->{$_}) if $debug;
70 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
71 }
72
73 die "marc_template needs isis_template or marc_template"
74 if ! defined $args->{isis_template} && ! defined $args->{marc_template};
75
76 my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
77 my $r = $rec->{ $args->{from} } || return;
78 die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
79
80 my @subfields_rename = @{ $args->{subfields_rename} };
81 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
82
83 confess "need mapping in pairs for subfields_rename"
84 if $#subfields_rename % 2 != 1;
85
86 my ( $subfields_rename, $from_subfields );
87 our $to_subfields = {};
88 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
89 my ( $f, $t ) = (
90 $from_subfields->{ $from }++,
91 $to_subfields->{ $to }++
92 );
93 $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
94 }
95 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
96 warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
97 warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
98
99 our $_template;
100
101 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
102 $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
103
104 my @marc_out;
105
106 sub _parse_template {
107 my ( $name, $templates ) = @_;
108
109 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
110
111 foreach my $template ( @{ $templates } ) {
112 our $count = {};
113 our @order = ();
114 sub my_count {
115 my $sf = shift;
116 my $nr = $count->{$sf}++;
117 push @order, [ $sf, $nr ];
118 return $sf . $nr;
119 }
120 my $pos_template = $template;
121 $pos_template =~ s/($fields_re)/my_count($1)/ge;
122 my $count_key = dump( $count );
123 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
124 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
125 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
126 }
127 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
128 }
129
130 _parse_template( 'marc', $args->{marc_template} );
131 _parse_template( 'isis', $args->{isis_template} );
132 warn "### _template = ",dump( $_template ),$/ if $debug;
133
134 my $m;
135
136 our $from_rec = $rec->{ $args->{from} };
137
138 foreach my $r ( @$from_rec ) {
139
140 my $to = $args->{to};
141 my ($i1,$i2) = _get_marc_indicators( $to );
142 $m = [ $to, $i1, $i2 ];
143
144 $created_with_marc_template->{ $to }++;
145
146 warn "### r = ",dump( $r ),$/ if $debug;
147
148 my ( $from_mapping, $from_count, $to_count );
149 our $to_mapping;
150 foreach my $from_sf ( keys %{$r} ) {
151 # skip everything which isn't one char subfield (e.g. 'subfields')
152 next unless $from_sf =~ m/^\w$/;
153 my $from_nr = $from_count->{$from_sf}++;
154 my $rename_to = $subfields_rename->{ $from_sf } ||
155 die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
156 my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
157 $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
158
159 my $to_nr2 = $to_count->{ $to_sf }++;
160 $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
161
162 warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
163 }
164
165 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
166
167 my $count_key = {
168 from => dump( $from_count ),
169 to => dump( $to_count),
170 };
171
172 warn "### count_key = ",dump( $count_key ),$/ if $debug;
173
174 my $processed_templates = 0;
175
176 # this defines order of traversal
177 foreach ( qw/isis:from marc:to/ ) {
178 my ($name,$count_name) = split(/:/);
179
180 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
181
182 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
183 $processed_templates++;
184
185 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
186
187 our $fill_in = {};
188
189 my @templates = split(/\|/, $template );
190 @templates = ( $template ) unless @templates;
191
192 warn "### templates = ",dump( @templates ),$/ if $debug;
193
194 foreach my $sf ( @templates ) {
195 sub fill_in {
196 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
197 warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
198 my ( $from_sf, $from_nr );
199 if ( $name eq 'marc' ) {
200 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), "\n>>>> from record ",dump( $r ), "\n>>>> full record = ",dump( $from_rec ) unless defined $to_mapping->{$sf}->[$nr];
201 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
202 } else {
203 ( $from_sf, $from_nr ) = ( $sf, $nr );
204 }
205 my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
206 if ( ref( $v ) eq 'ARRAY' ) {
207 $v = $pre . $v->[$from_nr] . $post;
208 } elsif ( $from_nr == 0 ) {
209 $v = $pre . $v . $post;
210 } else {
211 die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
212 }
213 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
214 $fill_in->{$sf}->[$nr] = $v;
215 return $v;
216 }
217 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
218 warn "#### $sf <<<< $fields_re\n" if $debug;
219 $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
220 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
221 }
222
223 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
224
225 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
226 my ( $sf, $nr ) = @$sf;
227 my $v = $fill_in->{$sf}->[$nr];
228 die "can't find fill_in $sf/$nr" unless defined $v;
229 if ( $name eq 'isis') {
230 ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
231 }
232 warn "++ $sf/$nr |$v|\n" if $debug;
233 push @$m, ( $sf, $v );
234 }
235
236 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
237
238 push @marc_out, $m;
239
240 last;
241 }
242
243 die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
244 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
245 }
246
247
248 my $recs = 0;
249
250 foreach my $marc ( @marc_out ) {
251 warn "+++ ",dump( $marc ),$/ if $debug;
252 _marc_push( $marc );
253 $recs++;
254 }
255
256 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
257
258 return $recs;
259 }
260
261 =head2 marc_leader
262
263 Setup fields within MARC leader or get leader
264
265 marc_leader('05','c');
266 my $leader = marc_leader();
267
268 =cut
269
270 sub marc_leader {
271 my ($offset,$value) = @_;
272
273 if ( defined $offset && defined $value ) {
274 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
275 } else {
276
277 if (defined($marc_leader)) {
278 die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
279 return $marc_leader->[ $marc_record_offset ];
280 } else {
281 return;
282 }
283 }
284 }
285
286 =head2 marc_fixed
287
288 Create control/indentifier fields with values in fixed positions
289
290 marc_fixed('008', 00, '070402');
291 marc_fixed('008', 39, '|');
292
293 Positions not specified will be filled with spaces (C<0x20>).
294
295 There will be no effort to extend last specified value to full length of
296 field in standard.
297
298 =cut
299
300 sub marc_fixed {
301 my ($f, $pos, $val) = @_;
302 die "need marc(field, position, value)" unless defined($f) && defined($pos);
303
304 confess "need val" unless defined $val;
305
306 my $update = 0;
307
308 map {
309 if ($_->[0] eq $f) {
310 my $old = $_->[1];
311 if (length($old) <= $pos) {
312 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
313 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
314 } elsif ( defined $old ) {
315 $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
316 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
317 }
318 $update++;
319 }
320 } @{ $marc_record->[ $marc_record_offset ] };
321
322 if (! $update) {
323 my $v = ' ' x $pos . $val;
324 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
325 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
326 }
327 }
328
329 =head2 marc
330
331 Save value for MARC field
332
333 marc('900','a', rec('200','a') );
334 marc('001', rec('000') );
335
336 =cut
337
338 sub marc {
339 my $f = shift or die "marc needs field";
340 die "marc field must be numer" unless ($f =~ /^\d+$/);
341
342 my $sf;
343 if ($f >= 10) {
344 $sf = shift or die "marc needs subfield";
345 }
346
347 foreach (@_) {
348 my $v = $_; # make var read-write for Encode
349 #Encode::_utf8_on($v); # FIXME we probably need this
350 next unless (defined($v) && $v !~ /^\s*$/);
351 my ($i1,$i2) = _get_marc_indicators( $f );
352 if (defined $sf) {
353 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
354 } else {
355 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
356 }
357 }
358 }
359
360 =head2 marc_repeatable_subfield
361
362 Save values for MARC repetable subfield
363
364 marc_repeatable_subfield('910', 'z', rec('909') );
365
366 =cut
367
368 sub marc_repeatable_subfield {
369 my ($f,$sf) = @_;
370 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
371 $marc_repeatable_subfield->{ $f . $sf }++;
372 marc(@_);
373 }
374
375 =head2 marc_indicators
376
377 Set both indicators for MARC field
378
379 marc_indicators('900', ' ', 1);
380
381 Any indicator value other than C<0-9> will be treated as undefined.
382
383 =cut
384
385 sub marc_indicators {
386 my $f = shift || die "marc_indicators need field!\n";
387 my ($i1,$i2) = @_;
388 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
389 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
390
391 $i1 = ' ' if ($i1 !~ /^\d$/);
392 $i2 = ' ' if ($i2 !~ /^\d$/);
393 @{ $marc_indicators->{$f} } = ($i1,$i2);
394 }
395
396 sub _get_marc_indicators {
397 my $f = shift || confess "need field!\n";
398 return defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
399 }
400
401 =head2 marc_compose
402
403 Save values for each MARC subfield explicitly
404
405 marc_compose('900',
406 'a', rec('200','a')
407 'b', rec('201','a')
408 'a', rec('200','b')
409 'c', rec('200','c')
410 );
411
412 If you specify C<+> for subfield, value will be appended
413 to previous defined subfield.
414
415 =cut
416
417 sub marc_compose {
418 my $f = shift or die "marc_compose needs field";
419 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
420
421 my ($i1,$i2) = _get_marc_indicators( $f );
422 my $m = [ $f, $i1, $i2 ];
423
424 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
425
426 if ($#_ % 2 != 1) {
427 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
428 }
429
430 while (@_) {
431 my $sf = shift;
432 my $v = shift;
433
434 next unless (defined($v) && $v !~ /^\s*$/);
435 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
436 if ($sf ne '+') {
437 push @$m, ( $sf, $v );
438 } else {
439 $m->[ $#$m ] .= $v;
440 }
441 }
442
443 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
444
445 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
446 }
447
448 =head2 marc_duplicate
449
450 Generate copy of current MARC record and continue working on copy
451
452 marc_duplicate();
453
454 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
455 C<< _get_marc_fields( offset => 42 ) >>.
456
457 =cut
458
459 sub marc_duplicate {
460 my $m = $marc_record->[ -1 ];
461 die "can't duplicate record which isn't defined" unless ($m);
462 push @{ $marc_record }, dclone( $m );
463 push @{ $marc_leader }, dclone( marc_leader() );
464 warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
465 $marc_record_offset = $#{ $marc_record };
466 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
467
468 }
469
470 =head2 marc_remove
471
472 Remove some field or subfield from MARC record.
473
474 marc_remove('200');
475 marc_remove('200','a');
476
477 This will erase field C<200> or C<200^a> from current MARC record.
478
479 marc_remove('*');
480
481 Will remove all fields in current MARC record.
482
483 This is useful after calling C<marc_duplicate> or on it's own (but, you
484 should probably just remove that subfield definition if you are not
485 using C<marc_duplicate>).
486
487 FIXME: support fields < 10.
488
489 =cut
490
491 sub marc_remove {
492 my ($f, $sf) = @_;
493
494 die "marc_remove needs record number" unless defined($f);
495
496 my $marc = $marc_record->[ $marc_record_offset ];
497
498 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
499
500 if ($f eq '*') {
501
502 delete( $marc_record->[ $marc_record_offset ] );
503 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
504
505 } else {
506
507 my $i = 0;
508 foreach ( 0 .. $#{ $marc } ) {
509 last unless (defined $marc->[$i]);
510 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
511 if ($marc->[$i]->[0] eq $f) {
512 if (! defined $sf) {
513 # remove whole field
514 splice @$marc, $i, 1;
515 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
516 $i--;
517 } else {
518 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
519 my $o = ($j * 2) + 3;
520 if ($marc->[$i]->[$o] eq $sf) {
521 # remove subfield
522 splice @{$marc->[$i]}, $o, 2;
523 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
524 # is record now empty?
525 if ($#{ $marc->[$i] } == 2) {
526 splice @$marc, $i, 1;
527 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
528 $i--;
529 };
530 }
531 }
532 }
533 }
534 $i++;
535 }
536
537 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
538
539 $marc_record->[ $marc_record_offset ] = $marc;
540 }
541
542 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
543 }
544
545 =head2 marc_original_order
546
547 Copy all subfields preserving original order to marc field.
548
549 marc_original_order( marc_field_number, original_input_field_number );
550
551 Please note that field numbers are consistent with other commands (marc
552 field number first), but somewhat counter-intuitive (destination and then
553 source).
554
555 You might want to use this command if you are just renaming subfields or
556 using pre-processing modify_record in C<config.yml> and don't need any
557 post-processing or want to preserve order of original subfields.
558
559
560 =cut
561
562 sub marc_original_order {
563
564 my ($to, $from) = @_;
565 die "marc_original_order needs from and to fields\n" unless ($from && $to);
566
567 return unless defined($rec->{$from});
568
569 my $r = $rec->{$from};
570 die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
571
572 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
573
574 foreach my $d (@$r) {
575
576 if ( ! ref($d) ) {
577 # scalar
578 if ( $to eq 'leader' ) {
579 marc_leader( 0, $d );
580 } elsif ( $to < 100 ) {
581 marc_fixed( $to, 0, $d );
582 } else {
583 warn "## marc_original_order($to,$from) skipped: ",dump( $d );
584 }
585 next;
586 }
587
588 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
589 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
590 next;
591 }
592
593 my @sfs = @{ $d->{subfields} };
594
595 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
596
597 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
598
599 my ($i1,$i2) = _get_marc_indicators( $from );
600 $i1 = $d->{i1} if exists $d->{i1};
601 $i2 = $d->{i2} if exists $d->{i2};
602
603 my $m = [ $to, $i1, $i2 ];
604
605 while (my $sf = shift @sfs) {
606
607 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
608 my $offset = shift @sfs;
609 die "corrupted sufields specification for field $from\n" unless defined($offset);
610
611 my $v;
612 if (ref($d->{$sf}) eq 'ARRAY') {
613 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
614 } elsif ($offset == 0) {
615 $v = $d->{$sf};
616 } else {
617 die "field $from subfield '$sf' need occurence $offset which doesn't exist in ", dump($d);
618 }
619 push @$m, ( $sf, $v ) if (defined($v));
620 }
621
622 if ($#{$m} > 2) {
623 push @{ $marc_record->[ $marc_record_offset ] }, $m;
624 }
625 }
626
627 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
628 }
629
630
631 =head2 marc_count
632
633 Return number of MARC records created using L</marc_duplicate>.
634
635 print "created ", marc_count(), " records";
636
637 =cut
638
639 sub marc_count {
640 return $#{ $marc_record };
641 }
642
643 =head2 marc_clone
644
645 Clone marc records from input file, whole or just some fields/indicators
646
647 marc_clone; # whole record
648
649 =cut
650
651 sub marc_clone {
652 warn "### marc_clone rec: ",dump( $rec ) if $debug > 2;
653 foreach my $f ( sort keys %$rec ) {
654 warn "## marc_clone $f\n" if $debug;
655 marc_original_order( $f, $f );
656 }
657 }
658
659 =head1 PRIVATE FUNCTIONS
660
661 =head2 _marc_push
662
663 _marc_push( $marc );
664
665 =cut
666
667 sub _marc_push {
668 my $marc = shift || die "no marc?";
669 push @{ $marc_record->[ $marc_record_offset ] }, $marc;
670 }
671
672 =head2 _clean
673
674 Clean internal structures
675
676 =cut
677
678 sub _clean {
679 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader, $created_with_marc_template) = ();
680 ($marc_record_offset, $marc_fetch_offset) = (0,0);
681 }
682
683
684 =head2 _get_marc_fields
685
686 Get all fields defined by calls to C<marc>
687
688 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
689
690 We are using I<magic> which detect repeatable fields only from
691 sequence of field/subfield data generated by normalization.
692
693 This magic is disabled for all records created with C<marc_template>.
694
695 Repeatable field is created when there is second occurence of same subfield or
696 if any of indicators are different.
697
698 This is sane for most cases. Something like:
699
700 900a-1 900b-1 900c-1
701 900a-2 900b-2
702 900a-3
703
704 will be created from any combination of:
705
706 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
707
708 and following rules:
709
710 marc('900','a', rec('200','a') );
711 marc('900','b', rec('200','b') );
712 marc('900','c', rec('200','c') );
713
714 which might not be what you have in mind. If you need repeatable subfield,
715 define it using C<marc_repeatable_subfield> like this:
716
717 marc_repeatable_subfield('900','a');
718 marc('900','a', rec('200','a') );
719 marc('900','b', rec('200','b') );
720 marc('900','c', rec('200','c') );
721
722 will create:
723
724 900a-1 900a-2 900a-3 900b-1 900c-1
725 900b-2
726
727 There is also support for returning next or specific using:
728
729 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
730 # do something with $mf
731 }
732
733 will always return fields from next MARC record or
734
735 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
736
737 will return 42th copy record (if it exists).
738
739 =cut
740
741 my $fetch_pos;
742
743 sub _get_marc_fields {
744
745 my $arg = {@_};
746 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
747 $fetch_pos = $marc_fetch_offset;
748 if ($arg->{offset}) {
749 $fetch_pos = $arg->{offset};
750 } elsif($arg->{fetch_next}) {
751 $marc_fetch_offset++;
752 }
753
754 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
755
756 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
757
758 my $marc_rec = $marc_record->[ $fetch_pos ];
759
760 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
761
762 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
763
764 # first, sort all existing fields
765 # XXX might not be needed, but modern perl might randomize elements in hash
766 # my @sorted_marc_record = sort {
767 # $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
768 # } @{ $marc_rec };
769
770 my @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
771
772 # output marc fields
773 my @m;
774
775 # count unique field-subfields (used for offset when walking to next subfield)
776 my $u;
777 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
778
779 if ($debug) {
780 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
781 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
782 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
783 warn "## subfield count = ", dump( $u ), $/;
784 }
785
786 my $len = $#sorted_marc_record;
787 my $visited;
788 my $i = 0;
789 my $field;
790
791 warn "## created_with_marc_template = ",dump( $created_with_marc_template ) if $debug;
792
793 foreach ( 0 .. $len ) {
794
795 # find next element which isn't visited
796 while ($visited->{$i}) {
797 $i = ($i + 1) % ($len + 1);
798 }
799
800 # mark it visited
801 $visited->{$i}++;
802
803 my $row = dclone( $sorted_marc_record[$i] );
804
805 if ( $created_with_marc_template->{ $row->[0] } ) {
806 push @m, $row;
807 warn "## copied marc_template created ", dump( $row ),$/ if $debug;
808 next;
809 }
810
811 # field and subfield which is key for
812 # marc_repeatable_subfield and u
813 my $fsf = $row->[0] . ( $row->[3] || '' );
814
815 if ($debug > 1) {
816
817 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
818 print "### this [$i]: ", dump( $row ),$/;
819 print "### sf: ", $row->[3], " vs ", $field->[3],
820 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
821 if ($#$field >= 0);
822
823 }
824
825 # if field exists
826 if ( $#$field >= 0 ) {
827 if (
828 $row->[0] ne $field->[0] || # field
829 $row->[1] ne $field->[1] || # i1
830 $row->[2] ne $field->[2] # i2
831 ) {
832 push @m, $field;
833 warn "## saved/1 ", dump( $field ),$/ if ($debug);
834 $field = $row;
835
836 } elsif (
837 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
838 ||
839 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
840 ! $marc_repeatable_subfield->{ $fsf }
841 )
842 ) {
843 push @m, $field;
844 warn "## saved/2 ", dump( $field ),$/ if ($debug);
845 $field = $row;
846
847 } else {
848 # append new subfields to existing field
849 push @$field, ( $row->[3], $row->[4] );
850 }
851 } else {
852 # insert first field
853 $field = $row;
854 }
855
856 if (! $marc_repeatable_subfield->{ $fsf }) {
857 # make step to next subfield
858 $i = ($i + $u->{ $fsf } ) % ($len + 1);
859 }
860 }
861
862 if ($#$field >= 0) {
863 push @m, $field;
864 warn "## saved/3 ", dump( $field ),$/ if ($debug);
865 }
866
867 return \@m;
868 }
869
870 =head2 _get_marc_leader
871
872 Return leader from currently fetched record by L</_get_marc_fields>
873
874 print WebPAC::Normalize::MARC::_get_marc_leader();
875
876 =cut
877
878 sub _get_marc_leader {
879 die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
880 return $marc_leader->[ $fetch_pos ];
881 }
882
883 =head2 _created_marc_records
884
885 my $nr_records = _created_marc_records;
886
887 =cut
888
889 sub _created_marc_records {
890 return $#{ $marc_record } + 1 if $marc_record;
891 }
892
893 1;

  ViewVC Help
Powered by ViewVC 1.1.26