/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 540 by dpavlin, Thu Jun 29 15:29:41 2006 UTC revision 547 by dpavlin, Thu Jun 29 23:19:26 2006 UTC
# Line 5  use Exporter 'import'; Line 5  use Exporter 'import';
5          _get_ds _clean_ds          _get_ds _clean_ds
6    
7          tag search display          tag search display
8          marc21          marc marc_indicators marc_repeatable_subfield
9    
10          rec1 rec2 rec          rec1 rec2 rec
11          regex prefix suffix surround          regex prefix suffix surround
# Line 17  use strict; Line 17  use strict;
17    
18  #use base qw/WebPAC::Common/;  #use base qw/WebPAC::Common/;
19  use Data::Dumper;  use Data::Dumper;
20    use Encode qw/from_to/;
21    
22  =head1 NAME  =head1 NAME
23    
# Line 24  WebPAC::Normalize - describe normalisato Line 25  WebPAC::Normalize - describe normalisato
25    
26  =head1 VERSION  =head1 VERSION
27    
28  Version 0.05  Version 0.06
29    
30  =cut  =cut
31    
32  our $VERSION = '0.05';  our $VERSION = '0.06';
33    
34  =head1 SYNOPSIS  =head1 SYNOPSIS
35    
# Line 42  C<perl -c normalize.pl>. Line 43  C<perl -c normalize.pl>.
43    
44  Normalisation can generate multiple output normalized data. For now, supported output  Normalisation can generate multiple output normalized data. For now, supported output
45  types (on the left side of definition) are: C<tag>, C<display>, C<search> and  types (on the left side of definition) are: C<tag>, C<display>, C<search> and
46  C<marc21>.  C<marc>.
47    
48  =head1 FUNCTIONS  =head1 FUNCTIONS
49    
# Line 57  Return data structure Line 58  Return data structure
58          lookup => $lookup->lookup_hash,          lookup => $lookup->lookup_hash,
59          row => $row,          row => $row,
60          rules => $normalize_pl_config,          rules => $normalize_pl_config,
61            marc_encoding => 'utf-8',
62    );    );
63    
64  Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all  Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
# Line 78  sub data_structure { Line 80  sub data_structure {
80          no strict 'subs';          no strict 'subs';
81          _set_lookup( $arg->{lookup} );          _set_lookup( $arg->{lookup} );
82          _set_rec( $arg->{row} );          _set_rec( $arg->{row} );
83          _clean_ds();          _clean_ds( %{ $arg } );
   
84          eval "$arg->{rules}";          eval "$arg->{rules}";
85          die "error evaling $arg->{rules}: $@\n" if ($@);          die "error evaling $arg->{rules}: $@\n" if ($@);
86    
# Line 108  Return hash formatted as data structure Line 109  Return hash formatted as data structure
109    
110  =cut  =cut
111    
112  my $out;  my ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);
 my $marc21;  
113    
114  sub _get_ds {  sub _get_ds {
115          return $out;          return $out;
# Line 124  Clean data structure hash for next recor Line 124  Clean data structure hash for next recor
124  =cut  =cut
125    
126  sub _clean_ds {  sub _clean_ds {
127          $out = undef;          my $a = {@_};
128          $marc21 = undef;          ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = (undef);
129            $marc_encoding = $a->{marc_encoding};
130  }  }
131    
132  =head2 _set_lookup  =head2 _set_lookup
# Line 142  sub _set_lookup { Line 143  sub _set_lookup {
143          $lookup = shift;          $lookup = shift;
144  }  }
145    
146  =head2 _get_marc21_fields  =head2 _get_marc_fields
147    
148    Get all fields defined by calls to C<marc>
149    
150            $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
151    
152    
 Get all fields defined by calls to C<marc21>  
153    
154          $marc->add_fields( WebPAC::Normalize:_get_marc21_fields() );  We are using I<magic> which detect repeatable fields only from
155    sequence of field/subfield data generated by normalization.
156    
157    Repeatable field is created if there is second occurence of same subfield or
158    if any of indicators are different. This is sane for most cases except for
159    non-repeatable fields with repeatable subfields.
160    
161    You can change behaviour of that using C<marc_repeatable_subfield>.
162    
163  =cut  =cut
164    
165  sub _get_marc21_fields {  sub _get_marc_fields {
166          return @{$marc21};          my @m;
167            my $last;
168            foreach my $row (@{ $marc_record }) {
169                    if ($last &&
170                            $last->[0] eq $row->[0] &&              # check if field is same
171                            $last->[1] eq $row->[1] &&              # check for i1
172                            $last->[2] eq $row->[2] &&              # and for i2
173                                    ( $last->[3] ne $row->[3] ||                            # and subfield is different
174                                    $last->[3] eq $row->[3] &&                                      # or subfield is same,
175                                    $marc_repeatable_subfield->{ $row->[3] }        # but is repeatable
176                            )
177                    ) {
178                            push @$last, ( $row->[3] , $row->[4] );
179                            warn "## ++ added $row->[0] ^$row->[3] to $last->[0]\n";
180                            next;
181                    } elsif ($last) {
182                            push @m, $last;
183                    }
184    
185                    $last = $row;
186            }
187    
188            push @m, $last if ($last);
189    
190            return @m;
191  }  }
192    
193  =head1 Functions to create C<data_structure>  =head1 Functions to create C<data_structure>
# Line 208  sub search { Line 244  sub search {
244          $out->{$name}->{search} = \@o;          $out->{$name}->{search} = \@o;
245  }  }
246    
247  =head2 marc21  =head2 marc
248    
249  Save value for MARC field  Save value for MARC field
250    
251    marc21('900','a', rec('200','a') );    marc('900','a', rec('200','a') );
252    
253  =cut  =cut
254    
255  sub marc21 {  sub marc {
256          my $f = shift or die "marc21 needs field";          my $f = shift or die "marc needs field";
257          die "marc21 field must be numer" unless ($f =~ /^\d+$/);          die "marc field must be numer" unless ($f =~ /^\d+$/);
258    
259            my $sf = shift or die "marc needs subfield";
260    
261            foreach (@_) {
262                    my $v = $_;             # make var read-write for Encode
263                    next unless (defined($v) && $v !~ /^\s*$/);
264                    from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
265                    push @{ $marc_record }, [
266                            $f,
267                            $marc_indicators->{$f}->{i1} || ' ',
268                            $marc_indicators->{$f}->{i2} || ' ',
269                            $sf => $v
270                    ];
271            }
272    }
273    
274          my $sf = shift or die "marc21 needs subfield";  =head2 marc_repeatable_subfield
275    
276          foreach my $v (@_) {  Save values for MARC repetable subfield
277                  push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ];  
278          }    marc_repeatable_subfield('910', 'z', rec('909') );
279    
280    =cut
281    
282    sub marc_repeatable_subfield {
283            die "marc_repeatable_subfield need subfield!\n" unless (defined($_[1]));
284            $marc_repeatable_subfield->{ $_[1] }++;
285            marc(@_);
286  }  }
287    
288    =head2 marc_indicators
289    
290    Set both indicators for MARC field
291    
292      marc_indicators('900', ' ', 1);
293    
294    Any indicator value other than C<0-9> will be treated as undefined.
295    
296    =cut
297    
298    sub marc_indicators {
299            my $f = shift || die "marc_indicators need field!\n";
300            my ($i1,$i2) = @_;
301            die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
302            die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
303    
304            $i1 = ' ' if ($i1 !~ /^\d$/);
305            $i2 = ' ' if ($i2 !~ /^\d$/);
306            $marc_indicators->{$f}->{i1} = $i1;
307            $marc_indicators->{$f}->{i2} = $i2;
308    }
309    
310    
311  =head1 Functions to extract data from input  =head1 Functions to extract data from input
312    
313  This function should be used inside functions to create C<data_structure> described  This function should be used inside functions to create C<data_structure> described

Legend:
Removed from v.540  
changed lines
  Added in v.547

  ViewVC Help
Powered by ViewVC 1.1.26