--- trunk/lib/WebPAC/Normalize.pm 2006/06/29 21:18:50 541 +++ trunk/lib/WebPAC/Normalize.pm 2006/06/29 23:29:02 548 @@ -5,7 +5,7 @@ _get_ds _clean_ds tag search display - marc21 + marc marc_indicators marc_repeatable_subfield rec1 rec2 rec regex prefix suffix surround @@ -43,7 +43,7 @@ Normalisation can generate multiple output normalized data. For now, supported output types (on the left side of definition) are: C, C, C and -C. +C. =head1 FUNCTIONS @@ -109,9 +109,7 @@ =cut -my $out; -my $marc21; -my $marc_encoding; +my ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators); sub _get_ds { return $out; @@ -127,8 +125,7 @@ sub _clean_ds { my $a = {@_}; - $out = undef; - $marc21 = undef; + ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = (undef); $marc_encoding = $a->{marc_encoding}; } @@ -146,16 +143,50 @@ $lookup = shift; } -=head2 _get_marc21_fields +=head2 _get_marc_fields -Get all fields defined by calls to C +Get all fields defined by calls to C - $marc->add_fields( WebPAC::Normalize:_get_marc21_fields() ); + $marc->add_fields( WebPAC::Normalize:_get_marc_fields() ); + + + +We are using I which detect repeatable fields only from +sequence of field/subfield data generated by normalization. + +Repeatable field is created if there is second occurence of same subfield or +if any of indicators are different. This is sane for most cases except for +non-repeatable fields with repeatable subfields. + +You can change behaviour of that using C. =cut -sub _get_marc21_fields { - return @{$marc21}; +sub _get_marc_fields { + my @m; + my $last; + foreach my $row (@{ $marc_record }) { + if ($last && + $last->[0] eq $row->[0] && # check if field is same + $last->[1] eq $row->[1] && # check for i1 + $last->[2] eq $row->[2] && # and for i2 + ( $last->[3] ne $row->[3] || # and subfield is different + $last->[3] eq $row->[3] && # or subfield is same, + $marc_repeatable_subfield->{ $row->[3] } # but is repeatable + ) + ) { + push @$last, ( $row->[3] , $row->[4] ); + next; + } elsif ($last) { + push @m, $last; + } + + $last = $row; + } + + push @m, $last if ($last); + + return @m; } =head1 Functions to create C @@ -212,28 +243,65 @@ $out->{$name}->{search} = \@o; } -=head2 marc21 +=head2 marc Save value for MARC field - marc21('900','a', rec('200','a') ); + marc('900','a', rec('200','a') ); =cut -sub marc21 { - my $f = shift or die "marc21 needs field"; - die "marc21 field must be numer" unless ($f =~ /^\d+$/); +sub marc { + my $f = shift or die "marc needs field"; + die "marc field must be numer" unless ($f =~ /^\d+$/); - my $sf = shift or die "marc21 needs subfield"; + my $sf = shift or die "marc needs subfield"; foreach (@_) { my $v = $_; # make var read-write for Encode - next unless (defined($v) && $v !~ /^\s+$/); + next unless (defined($v) && $v !~ /^\s*$/); from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding); - push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ]; + my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' '); + push @{ $marc_record }, [ $f, $i1, $i2, $sf => $v ]; } } +=head2 marc_repeatable_subfield + +Save values for MARC repetable subfield + + marc_repeatable_subfield('910', 'z', rec('909') ); + +=cut + +sub marc_repeatable_subfield { + die "marc_repeatable_subfield need subfield!\n" unless (defined($_[1])); + $marc_repeatable_subfield->{ $_[1] }++; + marc(@_); +} + +=head2 marc_indicators + +Set both indicators for MARC field + + marc_indicators('900', ' ', 1); + +Any indicator value other than C<0-9> will be treated as undefined. + +=cut + +sub marc_indicators { + my $f = shift || die "marc_indicators need field!\n"; + my ($i1,$i2) = @_; + die "marc_indicators($f, ...) need i1!\n" unless(defined($i1)); + die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2)); + + $i1 = ' ' if ($i1 !~ /^\d$/); + $i2 = ' ' if ($i2 !~ /^\d$/); + @{ $marc_indicators->{$f} } = ($i1,$i2); +} + + =head1 Functions to extract data from input This function should be used inside functions to create C described