/[webpac2]/trunk/lib/WebPAC/Normalize/MARC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize/MARC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1038 - (show annotations)
Mon Nov 12 11:57:00 2007 UTC (16 years, 5 months ago) by dpavlin
File size: 22280 byte(s)
 r1614@llin:  dpavlin | 2007-11-12 12:56:56 +0100
 demostrate problems with marc records returned

1 package WebPAC::Normalize::MARC;
2 use Exporter 'import';
3 our @EXPORT = qw/
4 marc marc_indicators marc_repeatable_subfield
5 marc_compose marc_leader marc_fixed
6 marc_duplicate marc_remove marc_count
7 marc_original_order
8 marc_template
9 /;
10
11 use strict;
12 use warnings;
13
14 use Storable qw/dclone/;
15 use Data::Dump qw/dump/;
16 use Carp qw/confess/;
17
18 use WebPAC::Normalize;
19
20 our $debug = 0;
21
22 my ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader);
23 my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
24
25 our $rec;
26
27 =head1 NAME
28
29 WebPAC::Normalize::MARC - create MARC/ISO2709 records
30
31 =cut
32
33 =head1 FUNCTIONS
34
35 =head2 marc_template
36
37 marc_template(
38 from => 225, to => 440,
39 subfields_rename => [
40 'a' => 'a',
41 'x' => 'x',
42 'v' => 'v',
43 'h' => 'n',
44 'i' => 'p',
45 'w' => 'v',
46 ],
47 isis_template => [
48 'a ; |v. |i',
49 'a. |i ; |w',
50 ],
51 marc_template => [
52 'a, |x ; |v. |n, |p ; |v',
53 'a ; |v. |p ; |v',
54 ],
55 );
56
57 Returns number of records produced.
58
59 =cut
60
61 sub marc_template {
62 my $args = {@_};
63 warn "## marc_template(",dump($args),")",$/ if $debug;
64
65 foreach ( qw/subfields_rename isis_template marc_template/ ) {
66 # warn "ref($_) = ",ref($args->{$_}) if $debug;
67 die "$_ not ARRAY" if defined($args->{$_}) && ref($args->{$_}) ne 'ARRAY';
68 }
69
70 die "marc_template needs isis_template or marc_template"
71 if ! defined $args->{isis_template} && ! defined $args->{marc_template};
72
73 my $rec = WebPAC::Normalize::_get_rec() || die "_get_rec?";
74 my $r = $rec->{ $args->{from} } || return;
75 die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
76
77 my @subfields_rename = @{ $args->{subfields_rename} };
78 # warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename ) if $debug;
79
80 confess "need mapping in pairs for subfields_rename"
81 if $#subfields_rename % 2 != 1;
82
83 my ( $subfields_rename, $from_subfields );
84 our $to_subfields = {};
85 while ( my ( $from, $to ) = splice(@subfields_rename, 0, 2) ) {
86 my ( $f, $t ) = (
87 $from_subfields->{ $from }++,
88 $to_subfields->{ $to }++
89 );
90 $subfields_rename->{ $from }->[ $f ] = [ $to => $t ];
91 }
92 warn "### subfields_rename = ",dump( $subfields_rename ),$/ if $debug;
93 warn "### from_subfields = ", dump( $from_subfields ),$/ if $debug;
94 warn "### to_subfields = ", dump( $to_subfields ),$/ if $debug;
95
96 our $_template;
97
98 $_template->{isis}->{fields_re} = join('|', keys %$from_subfields );
99 $_template->{marc}->{fields_re} = join('|', keys %$to_subfields );
100
101 my @marc_out;
102
103 sub _parse_template {
104 my ( $name, $templates ) = @_;
105
106 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
107
108 foreach my $template ( @{ $templates } ) {
109 our $count = {};
110 our @order = ();
111 sub my_count {
112 my $sf = shift;
113 my $nr = $count->{$sf}++;
114 push @order, [ $sf, $nr ];
115 return $sf . $nr;
116 }
117 my $pos_template = $template;
118 $pos_template =~ s/($fields_re)/my_count($1)/ge;
119 my $count_key = dump( $count );
120 warn "### template: |$template| -> |$pos_template| count = $count_key order = ",dump( @order ),$/ if $debug;
121 $_template->{$name}->{pos}->{ $count_key } = $pos_template;
122 $_template->{$name}->{order}->{ $pos_template } = [ @order ];
123 }
124 warn "### from ",dump( $templates ), " using $fields_re created ", dump( $_template ),$/ if $debug;
125 }
126
127 _parse_template( 'marc', $args->{marc_template} );
128 _parse_template( 'isis', $args->{isis_template} );
129 warn "### _template = ",dump( $_template ),$/ if $debug;
130
131 my $m;
132
133 foreach my $r ( @{ $rec->{ $args->{from} } } ) {
134
135 my $i1 = $r->{i1} || ' ';
136 my $i2 = $r->{i2} || ' ';
137 $m = [ $args->{to}, $i1, $i2 ];
138
139 warn "### r = ",dump( $r ),$/ if $debug;
140
141 my ( $from_mapping, $to_mapping, $from_count, $to_count );
142 foreach my $from_sf ( keys %{$r} ) {
143 # skip everything which isn't one char subfield (e.g. 'subfields')
144 next unless $from_sf =~ m/^\w$/;
145 my $from_nr = $from_count->{$from_sf}++;
146 my $rename_to = $subfields_rename->{ $from_sf } ||
147 die "can't find subfield rename for $from_sf/$from_nr in ", dump( $subfields_rename );
148 my ( $to_sf, $to_nr ) = @{ $rename_to->[$from_nr] };
149 $to_mapping->{ $to_sf }->[ $to_nr ] = [ $from_sf => $from_nr ];
150
151 my $to_nr2 = $to_count->{ $to_sf }++;
152 $from_mapping->{ $from_sf }->[ $from_nr ] = [ $to_sf => $to_nr2 ];
153
154 warn "### from $from_sf/$from_nr -> $to_sf/$to_nr\tto $from_sf/$from_nr -> $to_sf/$to_nr2\n" if $debug;
155 }
156
157 warn "### from_mapping = ",dump( $from_mapping ), "\n### to_mapping = ",dump( $to_mapping ),$/ if $debug;
158
159 my $count_key = {
160 from => dump( $from_count ),
161 to => dump( $to_count),
162 };
163
164 warn "### count_key = ",dump( $count_key ),$/ if $debug;
165
166 my $processed_templates = 0;
167
168 # this defines order of traversal
169 foreach ( qw/isis:from marc:to/ ) {
170 my ($name,$count_name) = split(/:/);
171
172 my $ckey = $count_key->{$count_name} || die "can't find count_key $count_name in ",dump( $count_key );
173
174 my $template = $_template->{$name}->{pos}->{ $ckey } || next;
175 $processed_templates++;
176
177 warn "### traverse $name $count_name selected template: |$template|\n" if $debug;
178
179 our $fill_in = {};
180
181 my @templates = split(/\|/, $template );
182 @templates = ( $template ) unless @templates;
183
184 warn "### templates = ",dump( @templates ),$/ if $debug;
185
186 foreach my $sf ( @templates ) {
187 sub fill_in {
188 my ( $name, $r, $pre, $sf, $nr, $post ) = @_;
189 warn "#### fill_in( $name, r, '$pre', $sf, $nr, '$post' )\n" if $debug;
190 my ( $from_sf, $from_nr );
191 if ( $name eq 'marc' ) {
192 die "no $sf/$nr in to_mapping: ",dump( $to_mapping ), " form record ",dump( $r ) unless defined $to_mapping->{$sf}->[$nr];
193 ( $from_sf, $from_nr ) = @{ $to_mapping->{$sf}->[$nr] };
194 } else {
195 ( $from_sf, $from_nr ) = ( $sf, $nr );
196 }
197 my $v = $r->{ $from_sf }; # || die "no $from_sf/$from_nr";
198 if ( ref( $v ) eq 'ARRAY' ) {
199 $v = $pre . $v->[$from_nr] . $post;
200 } elsif ( $from_nr == 0 ) {
201 $v = $pre . $v . $post;
202 } else {
203 die "requested subfield $from_sf/$from_nr but it's ",dump( $v );
204 }
205 warn "#### fill_in( $sf, $nr ) = $from_sf/$from_nr >>>> ",dump( $v ),$/ if $debug;
206 $fill_in->{$sf}->[$nr] = $v;
207 return $v;
208 }
209 my $fields_re = $_template->{$name}->{fields_re} || die "can't find $name in ",dump( $_template->{$name}->{fields_re} );
210 warn "#### $sf <<<< $fields_re\n" if $debug;
211 $sf =~ s/^(.*?)($fields_re)(\d+)(.*?)$/fill_in($name,$r,$1,$2,$3,$4)/ge;
212 warn "#### >>>> $sf with fill_in = ",dump( $fill_in ),$/ if $debug;
213 }
214
215 warn "## template: |$template|\n## _template->$name = ",dump( $_template->{$name} ),$/ if $debug;
216
217 foreach my $sf ( @{ $_template->{$name}->{order}->{$template} } ) {
218 my ( $sf, $nr ) = @$sf;
219 my $v = $fill_in->{$sf}->[$nr] || die "can't find fill_in $sf/$nr";
220 if ( $name eq 'isis') {
221 ( $sf, $nr ) = @{ $from_mapping->{$sf}->[$nr] };
222 }
223 warn "++ $sf/$nr |$v|\n" if $debug;
224 push @$m, ( $sf, $v );
225 }
226
227 warn "#### >>>> created MARC record: ", dump( $m ),$/ if $debug;
228
229 push @marc_out, $m;
230
231 last;
232 }
233
234 die "I don't have template for fields ",dump( $count_key ), "\n## available templates\n", dump( $_template ) unless $processed_templates;
235 warn ">>> $processed_templates templates applied to data\n",$/ if $debug;
236 }
237
238
239 my $recs = 0;
240
241 foreach my $marc ( @marc_out ) {
242 warn "+++ ",dump( $marc ),$/ if $debug;
243 _marc_push( $marc );
244 $recs++;
245 }
246
247 warn "### marc_template produced $recs MARC records: ",dump( @marc_out ),$/ if $debug;
248
249 return $recs;
250 }
251
252 =head2 marc_leader
253
254 Setup fields within MARC leader or get leader
255
256 marc_leader('05','c');
257 my $leader = marc_leader();
258
259 =cut
260
261 sub marc_leader {
262 my ($offset,$value) = @_;
263
264 if ($offset) {
265 $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
266 } else {
267
268 if (defined($marc_leader)) {
269 die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
270 return $marc_leader->[ $marc_record_offset ];
271 } else {
272 return;
273 }
274 }
275 }
276
277 =head2 marc_fixed
278
279 Create control/indentifier fields with values in fixed positions
280
281 marc_fixed('008', 00, '070402');
282 marc_fixed('008', 39, '|');
283
284 Positions not specified will be filled with spaces (C<0x20>).
285
286 There will be no effort to extend last specified value to full length of
287 field in standard.
288
289 =cut
290
291 sub marc_fixed {
292 my ($f, $pos, $val) = @_;
293 die "need marc(field, position, value)" unless defined($f) && defined($pos);
294
295 confess "need val" unless defined $val;
296
297 my $update = 0;
298
299 map {
300 if ($_->[0] eq $f) {
301 my $old = $_->[1];
302 if (length($old) <= $pos) {
303 $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
304 warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
305 } else {
306 $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
307 warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
308 }
309 $update++;
310 }
311 } @{ $marc_record->[ $marc_record_offset ] };
312
313 if (! $update) {
314 my $v = ' ' x $pos . $val;
315 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
316 warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
317 }
318 }
319
320 =head2 marc
321
322 Save value for MARC field
323
324 marc('900','a', rec('200','a') );
325 marc('001', rec('000') );
326
327 =cut
328
329 sub marc {
330 my $f = shift or die "marc needs field";
331 die "marc field must be numer" unless ($f =~ /^\d+$/);
332
333 my $sf;
334 if ($f >= 10) {
335 $sf = shift or die "marc needs subfield";
336 }
337
338 foreach (@_) {
339 my $v = $_; # make var read-write for Encode
340 next unless (defined($v) && $v !~ /^\s*$/);
341 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
342 if (defined $sf) {
343 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
344 } else {
345 push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
346 }
347 }
348 }
349
350 =head2 marc_repeatable_subfield
351
352 Save values for MARC repetable subfield
353
354 marc_repeatable_subfield('910', 'z', rec('909') );
355
356 =cut
357
358 sub marc_repeatable_subfield {
359 my ($f,$sf) = @_;
360 die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
361 $marc_repeatable_subfield->{ $f . $sf }++;
362 marc(@_);
363 }
364
365 =head2 marc_indicators
366
367 Set both indicators for MARC field
368
369 marc_indicators('900', ' ', 1);
370
371 Any indicator value other than C<0-9> will be treated as undefined.
372
373 =cut
374
375 sub marc_indicators {
376 my $f = shift || die "marc_indicators need field!\n";
377 my ($i1,$i2) = @_;
378 die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
379 die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
380
381 $i1 = ' ' if ($i1 !~ /^\d$/);
382 $i2 = ' ' if ($i2 !~ /^\d$/);
383 @{ $marc_indicators->{$f} } = ($i1,$i2);
384 }
385
386 =head2 marc_compose
387
388 Save values for each MARC subfield explicitly
389
390 marc_compose('900',
391 'a', rec('200','a')
392 'b', rec('201','a')
393 'a', rec('200','b')
394 'c', rec('200','c')
395 );
396
397 If you specify C<+> for subfield, value will be appended
398 to previous defined subfield.
399
400 =cut
401
402 sub marc_compose {
403 my $f = shift or die "marc_compose needs field";
404 die "marc_compose field must be numer" unless ($f =~ /^\d+$/);
405
406 my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
407 my $m = [ $f, $i1, $i2 ];
408
409 warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
410
411 if ($#_ % 2 != 1) {
412 die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
413 }
414
415 while (@_) {
416 my $sf = shift;
417 my $v = shift;
418
419 next unless (defined($v) && $v !~ /^\s*$/);
420 warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
421 if ($sf ne '+') {
422 push @$m, ( $sf, $v );
423 } else {
424 $m->[ $#$m ] .= $v;
425 }
426 }
427
428 warn "## marc_compose current marc = ", dump( $m ),$/ if ($debug > 1);
429
430 push @{ $marc_record->[ $marc_record_offset ] }, $m if ($#{$m} > 2);
431 }
432
433 =head2 marc_duplicate
434
435 Generate copy of current MARC record and continue working on copy
436
437 marc_duplicate();
438
439 Copies can be accessed using C<< _get_marc_fields( fetch_next => 1 ) >> or
440 C<< _get_marc_fields( offset => 42 ) >>.
441
442 =cut
443
444 sub marc_duplicate {
445 my $m = $marc_record->[ -1 ];
446 die "can't duplicate record which isn't defined" unless ($m);
447 push @{ $marc_record }, dclone( $m );
448 push @{ $marc_leader }, dclone( marc_leader() );
449 warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
450 $marc_record_offset = $#{ $marc_record };
451 warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
452
453 }
454
455 =head2 marc_remove
456
457 Remove some field or subfield from MARC record.
458
459 marc_remove('200');
460 marc_remove('200','a');
461
462 This will erase field C<200> or C<200^a> from current MARC record.
463
464 marc_remove('*');
465
466 Will remove all fields in current MARC record.
467
468 This is useful after calling C<marc_duplicate> or on it's own (but, you
469 should probably just remove that subfield definition if you are not
470 using C<marc_duplicate>).
471
472 FIXME: support fields < 10.
473
474 =cut
475
476 sub marc_remove {
477 my ($f, $sf) = @_;
478
479 die "marc_remove needs record number" unless defined($f);
480
481 my $marc = $marc_record->[ $marc_record_offset ];
482
483 warn "### marc_remove before = ", dump( $marc ), $/ if ($debug > 2);
484
485 if ($f eq '*') {
486
487 delete( $marc_record->[ $marc_record_offset ] );
488 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
489
490 } else {
491
492 my $i = 0;
493 foreach ( 0 .. $#{ $marc } ) {
494 last unless (defined $marc->[$i]);
495 warn "#### working on ",dump( @{ $marc->[$i] }), $/ if ($debug > 3);
496 if ($marc->[$i]->[0] eq $f) {
497 if (! defined $sf) {
498 # remove whole field
499 splice @$marc, $i, 1;
500 warn "#### slice \@\$marc, $i, 1 = ",dump( @{ $marc }), $/ if ($debug > 3);
501 $i--;
502 } else {
503 foreach my $j ( 0 .. (( $#{ $marc->[$i] } - 3 ) / 2) ) {
504 my $o = ($j * 2) + 3;
505 if ($marc->[$i]->[$o] eq $sf) {
506 # remove subfield
507 splice @{$marc->[$i]}, $o, 2;
508 warn "#### slice \@{\$marc->[$i]}, $o, 2 = ", dump( @{ $marc }), $/ if ($debug > 3);
509 # is record now empty?
510 if ($#{ $marc->[$i] } == 2) {
511 splice @$marc, $i, 1;
512 warn "#### slice \@\$marc, $i, 1 = ", dump( @{ $marc }), $/ if ($debug > 3);
513 $i--;
514 };
515 }
516 }
517 }
518 }
519 $i++;
520 }
521
522 warn "### marc_remove($f", $sf ? ",$sf" : "", ") after = ", dump( $marc ), $/ if ($debug > 2);
523
524 $marc_record->[ $marc_record_offset ] = $marc;
525 }
526
527 warn "## full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 1);
528 }
529
530 =head2 marc_original_order
531
532 Copy all subfields preserving original order to marc field.
533
534 marc_original_order( marc_field_number, original_input_field_number );
535
536 Please note that field numbers are consistent with other commands (marc
537 field number first), but somewhat counter-intuitive (destination and then
538 source).
539
540 You might want to use this command if you are just renaming subfields or
541 using pre-processing modify_record in C<config.yml> and don't need any
542 post-processing or want to preserve order of original subfields.
543
544
545 =cut
546
547 sub marc_original_order {
548
549 my ($to, $from) = @_;
550 die "marc_original_order needs from and to fields\n" unless ($from && $to);
551
552 return unless defined($rec->{$from});
553
554 my $r = $rec->{$from};
555 die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
556
557 my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
558 warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
559
560 foreach my $d (@$r) {
561
562 if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
563 warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
564 next;
565 }
566
567 my @sfs = @{ $d->{subfields} };
568
569 die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
570
571 warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
572
573 my $m = [ $to, $i1, $i2 ];
574
575 while (my $sf = shift @sfs) {
576
577 warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
578 my $offset = shift @sfs;
579 die "corrupted sufields specification for field $from\n" unless defined($offset);
580
581 my $v;
582 if (ref($d->{$sf}) eq 'ARRAY') {
583 $v = $d->{$sf}->[$offset] if (defined($d->{$sf}->[$offset]));
584 } elsif ($offset == 0) {
585 $v = $d->{$sf};
586 } else {
587 die "field $from subfield '$sf' need occurence $offset which doesn't exist", dump($d->{$sf});
588 }
589 push @$m, ( $sf, $v ) if (defined($v));
590 }
591
592 if ($#{$m} > 2) {
593 push @{ $marc_record->[ $marc_record_offset ] }, $m;
594 }
595 }
596
597 warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
598 }
599
600
601 =head2 marc_count
602
603 Return number of MARC records created using L</marc_duplicate>.
604
605 print "created ", marc_count(), " records";
606
607 =cut
608
609 sub marc_count {
610 return $#{ $marc_record };
611 }
612
613 =head2 _marc_push
614
615 _marc_push( $marc );
616
617 =cut
618
619 sub _marc_push {
620 my $marc = shift || die "no marc?";
621 push @{ $marc_record->[ $marc_record_offset ] }, $marc;
622 }
623
624 =head1 PRIVATE FUNCTIONS
625
626 =head2 _clean
627
628 Clean internal structures
629
630 =cut
631
632 sub _clean {
633 ($marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $marc_leader) = ();
634 ($marc_record_offset, $marc_fetch_offset) = (0,0);
635 }
636
637
638 =head2 _get_marc_fields
639
640 Get all fields defined by calls to C<marc>
641
642 $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
643
644 We are using I<magic> which detect repeatable fields only from
645 sequence of field/subfield data generated by normalization.
646
647 Repeatable field is created when there is second occurence of same subfield or
648 if any of indicators are different.
649
650 This is sane for most cases. Something like:
651
652 900a-1 900b-1 900c-1
653 900a-2 900b-2
654 900a-3
655
656 will be created from any combination of:
657
658 900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
659
660 and following rules:
661
662 marc('900','a', rec('200','a') );
663 marc('900','b', rec('200','b') );
664 marc('900','c', rec('200','c') );
665
666 which might not be what you have in mind. If you need repeatable subfield,
667 define it using C<marc_repeatable_subfield> like this:
668
669 marc_repeatable_subfield('900','a');
670 marc('900','a', rec('200','a') );
671 marc('900','b', rec('200','b') );
672 marc('900','c', rec('200','c') );
673
674 will create:
675
676 900a-1 900a-2 900a-3 900b-1 900c-1
677 900b-2
678
679 There is also support for returning next or specific using:
680
681 while (my $mf = WebPAC::Normalize:_get_marc_fields( fetch_next => 1 ) ) {
682 # do something with $mf
683 }
684
685 will always return fields from next MARC record or
686
687 my $mf = WebPAC::Normalize::_get_marc_fields( offset => 42 );
688
689 will return 42th copy record (if it exists).
690
691 =cut
692
693 my $fetch_pos;
694
695 sub _get_marc_fields {
696
697 my $arg = {@_};
698 warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
699 $fetch_pos = $marc_fetch_offset;
700 if ($arg->{offset}) {
701 $fetch_pos = $arg->{offset};
702 } elsif($arg->{fetch_next}) {
703 $marc_fetch_offset++;
704 }
705
706 return if (! $marc_record || ref($marc_record) ne 'ARRAY');
707
708 warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
709
710 my $marc_rec = $marc_record->[ $fetch_pos ];
711
712 warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
713
714 return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
715
716 # first, sort all existing fields
717 # XXX might not be needed, but modern perl might randomize elements in hash
718 my @sorted_marc_record = sort {
719 $a->[0] . ( $a->[3] || '' ) cmp $b->[0] . ( $b->[3] || '')
720 } @{ $marc_rec };
721
722 @sorted_marc_record = @{ $marc_rec }; ### FIXME disable sorting
723
724 # output marc fields
725 my @m;
726
727 # count unique field-subfields (used for offset when walking to next subfield)
728 my $u;
729 map { $u->{ $_->[0] . ( $_->[3] || '') }++ } @sorted_marc_record;
730
731 if ($debug) {
732 warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
733 warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
734 warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
735 warn "## subfield count = ", dump( $u ), $/;
736 }
737
738 my $len = $#sorted_marc_record;
739 my $visited;
740 my $i = 0;
741 my $field;
742
743 foreach ( 0 .. $len ) {
744
745 # find next element which isn't visited
746 while ($visited->{$i}) {
747 $i = ($i + 1) % ($len + 1);
748 }
749
750 # mark it visited
751 $visited->{$i}++;
752
753 my $row = dclone( $sorted_marc_record[$i] );
754
755 # field and subfield which is key for
756 # marc_repeatable_subfield and u
757 my $fsf = $row->[0] . ( $row->[3] || '' );
758
759 if ($debug > 1) {
760
761 print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
762 print "### this [$i]: ", dump( $row ),$/;
763 print "### sf: ", $row->[3], " vs ", $field->[3],
764 $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
765 if ($#$field >= 0);
766
767 }
768
769 # if field exists
770 if ( $#$field >= 0 ) {
771 if (
772 $row->[0] ne $field->[0] || # field
773 $row->[1] ne $field->[1] || # i1
774 $row->[2] ne $field->[2] # i2
775 ) {
776 push @m, $field;
777 warn "## saved/1 ", dump( $field ),$/ if ($debug);
778 $field = $row;
779
780 } elsif (
781 ( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
782 ||
783 ( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
784 ! $marc_repeatable_subfield->{ $fsf }
785 )
786 ) {
787 push @m, $field;
788 warn "## saved/2 ", dump( $field ),$/ if ($debug);
789 $field = $row;
790
791 } else {
792 # append new subfields to existing field
793 push @$field, ( $row->[3], $row->[4] );
794 }
795 } else {
796 # insert first field
797 $field = $row;
798 }
799
800 if (! $marc_repeatable_subfield->{ $fsf }) {
801 # make step to next subfield
802 $i = ($i + $u->{ $fsf } ) % ($len + 1);
803 }
804 }
805
806 if ($#$field >= 0) {
807 push @m, $field;
808 warn "## saved/3 ", dump( $field ),$/ if ($debug);
809 }
810
811 return \@m;
812 }
813
814 =head2 _get_marc_leader
815
816 Return leader from currently fetched record by L</_get_marc_fields>
817
818 print WebPAC::Normalize::MARC::_get_marc_leader();
819
820 =cut
821
822 sub _get_marc_leader {
823 die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
824 return $marc_leader->[ $fetch_pos ];
825 }
826
827 =head2 _created_marc_records
828
829 my $nr_records = _created_marc_records;
830
831 =cut
832
833 sub _created_marc_records {
834 return $#{ $marc_record } + 1 if $marc_record;
835 }
836
837 1;

  ViewVC Help
Powered by ViewVC 1.1.26