--- trunk/lib/WebPAC/Normalize.pm 2006/06/29 15:29:19 538 +++ trunk/lib/WebPAC/Normalize.pm 2006/06/29 21:18:59 542 @@ -5,6 +5,8 @@ _get_ds _clean_ds tag search display + marc21 + rec1 rec2 rec regex prefix suffix surround first lookup join_with @@ -15,6 +17,7 @@ #use base qw/WebPAC::Common/; use Data::Dumper; +use Encode qw/from_to/; =head1 NAME @@ -22,11 +25,11 @@ =head1 VERSION -Version 0.05 +Version 0.06 =cut -our $VERSION = '0.05'; +our $VERSION = '0.06'; =head1 SYNOPSIS @@ -39,7 +42,8 @@ C. Normalisation can generate multiple output normalized data. For now, supported output -types (on the left side of definition) are: C, C and C. +types (on the left side of definition) are: C, C, C and +C. =head1 FUNCTIONS @@ -54,8 +58,12 @@ lookup => $lookup->lookup_hash, row => $row, rules => $normalize_pl_config, + marc_encoding => 'utf-8', ); +Options C, C, C and C are mandatory while all +other are optional. + This function will B if normalizastion can't be evaled. Since this function isn't exported you have to call it with @@ -72,9 +80,10 @@ no strict 'subs'; _set_lookup( $arg->{lookup} ); _set_rec( $arg->{row} ); - _clean_ds(); + _clean_ds( %{ $arg } ); eval "$arg->{rules}"; die "error evaling $arg->{rules}: $@\n" if ($@); + return _get_ds(); } @@ -101,6 +110,8 @@ =cut my $out; +my $marc21; +my $marc_encoding; sub _get_ds { return $out; @@ -115,7 +126,10 @@ =cut sub _clean_ds { + my $a = {@_}; $out = undef; + $marc21 = undef; + $marc_encoding = $a->{marc_encoding}; } =head2 _set_lookup @@ -132,6 +146,45 @@ $lookup = shift; } +=head2 _get_marc21_fields + +Get all fields defined by calls to C + + $marc->add_fields( WebPAC::Normalize:_get_marc21_fields() ); + +B: implement exceptions to magic which unrolls repeated subfields +as new field with that subfield. + +=cut + +sub _get_marc21_fields { + my @m; + my $last; + foreach my $row (@{ $marc21 }) { + if ($last && ( + $last->[0] eq $row->[0] || # check if field is same + $last->[1] eq $row->[1] || # check for i1 + $last->[2] eq $row->[2] # and for i2 + ) ) { + $last->[3]->{ $row->[3] } = $row->[4]; + warn "## ++ added $row->[0] ^$row->[3]\n"; + next; + } elsif ($last) { + push @m, $last; + } + + push @m, $row; + } + + push @m, $last if ($last); + + return @m; +} + +=head1 Functions to create C + +Those functions generally have to first in your normalization file. + =head2 tag Define new tag for I and I. @@ -182,6 +235,33 @@ $out->{$name}->{search} = \@o; } +=head2 marc21 + +Save value for MARC field + + marc21('900','a', rec('200','a') ); + +=cut + +sub marc21 { + my $f = shift or die "marc21 needs field"; + die "marc21 field must be numer" unless ($f =~ /^\d+$/); + + my $sf = shift or die "marc21 needs subfield"; + + foreach (@_) { + my $v = $_; # make var read-write for Encode + next unless (defined($v) && $v !~ /^\s+$/); + from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding); + push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ]; + } +} + +=head1 Functions to extract data from input + +This function should be used inside functions to create C described +above. + =head2 rec1 Return all values in some field