/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 541 by dpavlin, Thu Jun 29 21:18:50 2006 UTC revision 548 by dpavlin, Thu Jun 29 23:29:02 2006 UTC
# Line 5  use Exporter 'import'; Line 5  use Exporter 'import';
5          _get_ds _clean_ds          _get_ds _clean_ds
6    
7          tag search display          tag search display
8          marc21          marc marc_indicators marc_repeatable_subfield
9    
10          rec1 rec2 rec          rec1 rec2 rec
11          regex prefix suffix surround          regex prefix suffix surround
# Line 43  C<perl -c normalize.pl>. Line 43  C<perl -c normalize.pl>.
43    
44  Normalisation can generate multiple output normalized data. For now, supported output  Normalisation can generate multiple output normalized data. For now, supported output
45  types (on the left side of definition) are: C<tag>, C<display>, C<search> and  types (on the left side of definition) are: C<tag>, C<display>, C<search> and
46  C<marc21>.  C<marc>.
47    
48  =head1 FUNCTIONS  =head1 FUNCTIONS
49    
# Line 109  Return hash formatted as data structure Line 109  Return hash formatted as data structure
109    
110  =cut  =cut
111    
112  my $out;  my ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);
 my $marc21;  
 my $marc_encoding;  
113    
114  sub _get_ds {  sub _get_ds {
115          return $out;          return $out;
# Line 127  Clean data structure hash for next recor Line 125  Clean data structure hash for next recor
125    
126  sub _clean_ds {  sub _clean_ds {
127          my $a = {@_};          my $a = {@_};
128          $out = undef;          ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = (undef);
         $marc21 = undef;  
129          $marc_encoding = $a->{marc_encoding};          $marc_encoding = $a->{marc_encoding};
130  }  }
131    
# Line 146  sub _set_lookup { Line 143  sub _set_lookup {
143          $lookup = shift;          $lookup = shift;
144  }  }
145    
146  =head2 _get_marc21_fields  =head2 _get_marc_fields
147    
148  Get all fields defined by calls to C<marc21>  Get all fields defined by calls to C<marc>
149    
150          $marc->add_fields( WebPAC::Normalize:_get_marc21_fields() );          $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
151    
152    
153    
154    We are using I<magic> which detect repeatable fields only from
155    sequence of field/subfield data generated by normalization.
156    
157    Repeatable field is created if there is second occurence of same subfield or
158    if any of indicators are different. This is sane for most cases except for
159    non-repeatable fields with repeatable subfields.
160    
161    You can change behaviour of that using C<marc_repeatable_subfield>.
162    
163  =cut  =cut
164    
165  sub _get_marc21_fields {  sub _get_marc_fields {
166          return @{$marc21};          my @m;
167            my $last;
168            foreach my $row (@{ $marc_record }) {
169                    if ($last &&
170                            $last->[0] eq $row->[0] &&              # check if field is same
171                            $last->[1] eq $row->[1] &&              # check for i1
172                            $last->[2] eq $row->[2] &&              # and for i2
173                                    ( $last->[3] ne $row->[3] ||                            # and subfield is different
174                                    $last->[3] eq $row->[3] &&                                      # or subfield is same,
175                                    $marc_repeatable_subfield->{ $row->[3] }        # but is repeatable
176                            )
177                    ) {
178                            push @$last, ( $row->[3] , $row->[4] );
179                            next;
180                    } elsif ($last) {
181                            push @m, $last;
182                    }
183    
184                    $last = $row;
185            }
186    
187            push @m, $last if ($last);
188    
189            return @m;
190  }  }
191    
192  =head1 Functions to create C<data_structure>  =head1 Functions to create C<data_structure>
# Line 212  sub search { Line 243  sub search {
243          $out->{$name}->{search} = \@o;          $out->{$name}->{search} = \@o;
244  }  }
245    
246  =head2 marc21  =head2 marc
247    
248  Save value for MARC field  Save value for MARC field
249    
250    marc21('900','a', rec('200','a') );    marc('900','a', rec('200','a') );
251    
252  =cut  =cut
253    
254  sub marc21 {  sub marc {
255          my $f = shift or die "marc21 needs field";          my $f = shift or die "marc needs field";
256          die "marc21 field must be numer" unless ($f =~ /^\d+$/);          die "marc field must be numer" unless ($f =~ /^\d+$/);
257    
258          my $sf = shift or die "marc21 needs subfield";          my $sf = shift or die "marc needs subfield";
259    
260          foreach (@_) {          foreach (@_) {
261                  my $v = $_;             # make var read-write for Encode                  my $v = $_;             # make var read-write for Encode
262                  next unless (defined($v) && $v !~ /^\s+$/);                  next unless (defined($v) && $v !~ /^\s*$/);
263                  from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);                  from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
264                  push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ];                  my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
265                    push @{ $marc_record }, [ $f, $i1, $i2, $sf => $v ];
266          }          }
267  }  }
268    
269    =head2 marc_repeatable_subfield
270    
271    Save values for MARC repetable subfield
272    
273      marc_repeatable_subfield('910', 'z', rec('909') );
274    
275    =cut
276    
277    sub marc_repeatable_subfield {
278            die "marc_repeatable_subfield need subfield!\n" unless (defined($_[1]));
279            $marc_repeatable_subfield->{ $_[1] }++;
280            marc(@_);
281    }
282    
283    =head2 marc_indicators
284    
285    Set both indicators for MARC field
286    
287      marc_indicators('900', ' ', 1);
288    
289    Any indicator value other than C<0-9> will be treated as undefined.
290    
291    =cut
292    
293    sub marc_indicators {
294            my $f = shift || die "marc_indicators need field!\n";
295            my ($i1,$i2) = @_;
296            die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
297            die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
298    
299            $i1 = ' ' if ($i1 !~ /^\d$/);
300            $i2 = ' ' if ($i2 !~ /^\d$/);
301            @{ $marc_indicators->{$f} } = ($i1,$i2);
302    }
303    
304    
305  =head1 Functions to extract data from input  =head1 Functions to extract data from input
306    
307  This function should be used inside functions to create C<data_structure> described  This function should be used inside functions to create C<data_structure> described

Legend:
Removed from v.541  
changed lines
  Added in v.548

  ViewVC Help
Powered by ViewVC 1.1.26