/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 538 by dpavlin, Thu Jun 29 15:29:19 2006 UTC revision 544 by dpavlin, Thu Jun 29 21:52:51 2006 UTC
# Line 5  use Exporter 'import'; Line 5  use Exporter 'import';
5          _get_ds _clean_ds          _get_ds _clean_ds
6    
7          tag search display          tag search display
8            marc21
9    
10          rec1 rec2 rec          rec1 rec2 rec
11          regex prefix suffix surround          regex prefix suffix surround
12          first lookup join_with          first lookup join_with
# Line 15  use strict; Line 17  use strict;
17    
18  #use base qw/WebPAC::Common/;  #use base qw/WebPAC::Common/;
19  use Data::Dumper;  use Data::Dumper;
20    use Encode qw/from_to/;
21    
22  =head1 NAME  =head1 NAME
23    
# Line 22  WebPAC::Normalize - describe normalisato Line 25  WebPAC::Normalize - describe normalisato
25    
26  =head1 VERSION  =head1 VERSION
27    
28  Version 0.05  Version 0.06
29    
30  =cut  =cut
31    
32  our $VERSION = '0.05';  our $VERSION = '0.06';
33    
34  =head1 SYNOPSIS  =head1 SYNOPSIS
35    
# Line 39  means that you check it's validity befor Line 42  means that you check it's validity befor
42  C<perl -c normalize.pl>.  C<perl -c normalize.pl>.
43    
44  Normalisation can generate multiple output normalized data. For now, supported output  Normalisation can generate multiple output normalized data. For now, supported output
45  types (on the left side of definition) are: C<tag>, C<display> and C<search>.  types (on the left side of definition) are: C<tag>, C<display>, C<search> and
46    C<marc21>.
47    
48  =head1 FUNCTIONS  =head1 FUNCTIONS
49    
# Line 54  Return data structure Line 58  Return data structure
58          lookup => $lookup->lookup_hash,          lookup => $lookup->lookup_hash,
59          row => $row,          row => $row,
60          rules => $normalize_pl_config,          rules => $normalize_pl_config,
61            marc_encoding => 'utf-8',
62    );    );
63    
64    Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
65    other are optional.
66    
67  This function will B<die> if normalizastion can't be evaled.  This function will B<die> if normalizastion can't be evaled.
68    
69  Since this function isn't exported you have to call it with  Since this function isn't exported you have to call it with
# Line 72  sub data_structure { Line 80  sub data_structure {
80          no strict 'subs';          no strict 'subs';
81          _set_lookup( $arg->{lookup} );          _set_lookup( $arg->{lookup} );
82          _set_rec( $arg->{row} );          _set_rec( $arg->{row} );
83          _clean_ds();          _clean_ds( %{ $arg } );
84          eval "$arg->{rules}";          eval "$arg->{rules}";
85          die "error evaling $arg->{rules}: $@\n" if ($@);          die "error evaling $arg->{rules}: $@\n" if ($@);
86    
87          return _get_ds();          return _get_ds();
88  }  }
89    
# Line 101  Return hash formatted as data structure Line 110  Return hash formatted as data structure
110  =cut  =cut
111    
112  my $out;  my $out;
113    my $marc21;
114    my $marc_encoding;
115    
116  sub _get_ds {  sub _get_ds {
117          return $out;          return $out;
# Line 115  Clean data structure hash for next recor Line 126  Clean data structure hash for next recor
126  =cut  =cut
127    
128  sub _clean_ds {  sub _clean_ds {
129            my $a = {@_};
130          $out = undef;          $out = undef;
131            $marc21 = undef;
132            $marc_encoding = $a->{marc_encoding};
133  }  }
134    
135  =head2 _set_lookup  =head2 _set_lookup
# Line 132  sub _set_lookup { Line 146  sub _set_lookup {
146          $lookup = shift;          $lookup = shift;
147  }  }
148    
149    =head2 _get_marc21_fields
150    
151    Get all fields defined by calls to C<marc21>
152    
153            $marc->add_fields( WebPAC::Normalize:_get_marc21_fields() );
154    
155    
156    
157    We are using I<magic> which detect repeatable fields only from
158    sequence of field/subfield data generated by normalization.
159    
160    Repeatable field is created if there is second occurence of same subfield or
161    if any of indicators are different. This is sane for most cases except for
162    non-repeatable fields with repeatable subfields.
163    
164    B<TODO>: implement exceptions to magic
165    
166    =cut
167    
168    sub _get_marc21_fields {
169            my @m;
170            my $last;
171            foreach my $row (@{ $marc21 }) {
172                    if ($last &&
173                            $last->[0] eq $row->[0] &&              # check if field is same
174                            $last->[1] eq $row->[1] &&              # check for i1
175                            $last->[2] eq $row->[2] &&              # and for i2
176                            $last->[3] ne $row->[3]                 # and subfield is different
177                    ) {
178                            push @$last, ( $row->[3] , $row->[4] );
179                            warn "## ++ added $row->[0] ^$row->[3] to $last->[0]\n";
180                            next;
181                    } elsif ($last) {
182                            push @m, $last;
183                    }
184    
185                    $last = $row;
186            }
187    
188            push @m, $last if ($last);
189    
190            return @m;
191    }
192    
193    =head1 Functions to create C<data_structure>
194    
195    Those functions generally have to first in your normalization file.
196    
197  =head2 tag  =head2 tag
198    
199  Define new tag for I<search> and I<display>.  Define new tag for I<search> and I<display>.
# Line 182  sub search { Line 244  sub search {
244          $out->{$name}->{search} = \@o;          $out->{$name}->{search} = \@o;
245  }  }
246    
247    =head2 marc21
248    
249    Save value for MARC field
250    
251      marc21('900','a', rec('200','a') );
252    
253    =cut
254    
255    sub marc21 {
256            my $f = shift or die "marc21 needs field";
257            die "marc21 field must be numer" unless ($f =~ /^\d+$/);
258    
259            my $sf = shift or die "marc21 needs subfield";
260    
261            foreach (@_) {
262                    my $v = $_;             # make var read-write for Encode
263                    next unless (defined($v) && $v !~ /^\s*$/);
264                    from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
265                    push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ];
266            }
267    }
268    
269    =head1 Functions to extract data from input
270    
271    This function should be used inside functions to create C<data_structure> described
272    above.
273    
274  =head2 rec1  =head2 rec1
275    
276  Return all values in some field  Return all values in some field

Legend:
Removed from v.538  
changed lines
  Added in v.544

  ViewVC Help
Powered by ViewVC 1.1.26