--- trunk/lib/WebPAC/Normalize.pm 2006/06/29 15:29:41 540 +++ trunk/lib/WebPAC/Normalize.pm 2006/06/29 21:52:51 544 @@ -17,6 +17,7 @@ #use base qw/WebPAC::Common/; use Data::Dumper; +use Encode qw/from_to/; =head1 NAME @@ -24,11 +25,11 @@ =head1 VERSION -Version 0.05 +Version 0.06 =cut -our $VERSION = '0.05'; +our $VERSION = '0.06'; =head1 SYNOPSIS @@ -57,6 +58,7 @@ lookup => $lookup->lookup_hash, row => $row, rules => $normalize_pl_config, + marc_encoding => 'utf-8', ); Options C, C, C and C are mandatory while all @@ -78,8 +80,7 @@ no strict 'subs'; _set_lookup( $arg->{lookup} ); _set_rec( $arg->{row} ); - _clean_ds(); - + _clean_ds( %{ $arg } ); eval "$arg->{rules}"; die "error evaling $arg->{rules}: $@\n" if ($@); @@ -110,6 +111,7 @@ my $out; my $marc21; +my $marc_encoding; sub _get_ds { return $out; @@ -124,8 +126,10 @@ =cut sub _clean_ds { + my $a = {@_}; $out = undef; $marc21 = undef; + $marc_encoding = $a->{marc_encoding}; } =head2 _set_lookup @@ -148,10 +152,42 @@ $marc->add_fields( WebPAC::Normalize:_get_marc21_fields() ); + + +We are using I which detect repeatable fields only from +sequence of field/subfield data generated by normalization. + +Repeatable field is created if there is second occurence of same subfield or +if any of indicators are different. This is sane for most cases except for +non-repeatable fields with repeatable subfields. + +B: implement exceptions to magic + =cut sub _get_marc21_fields { - return @{$marc21}; + my @m; + my $last; + foreach my $row (@{ $marc21 }) { + if ($last && + $last->[0] eq $row->[0] && # check if field is same + $last->[1] eq $row->[1] && # check for i1 + $last->[2] eq $row->[2] && # and for i2 + $last->[3] ne $row->[3] # and subfield is different + ) { + push @$last, ( $row->[3] , $row->[4] ); + warn "## ++ added $row->[0] ^$row->[3] to $last->[0]\n"; + next; + } elsif ($last) { + push @m, $last; + } + + $last = $row; + } + + push @m, $last if ($last); + + return @m; } =head1 Functions to create C @@ -222,7 +258,10 @@ my $sf = shift or die "marc21 needs subfield"; - foreach my $v (@_) { + foreach (@_) { + my $v = $_; # make var read-write for Encode + next unless (defined($v) && $v !~ /^\s*$/); + from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding); push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ]; } }