/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 38 by dpavlin, Sat Nov 12 21:21:50 2005 UTC revision 295 by dpavlin, Mon Dec 19 15:34:47 2005 UTC
# Line 11  WebPAC::Normalize - data mungling for no Line 11  WebPAC::Normalize - data mungling for no
11    
12  =head1 VERSION  =head1 VERSION
13    
14  Version 0.01  Version 0.07
15    
16  =cut  =cut
17    
18  our $VERSION = '0.01';  our $VERSION = '0.07';
19    
20  =head1 SYNOPSIS  =head1 SYNOPSIS
21    
# Line 47  optional C<filter{filter_name}> at B<beg Line 47  optional C<filter{filter_name}> at B<beg
47  code defined as code ref on format after field substitution to producing  code defined as code ref on format after field substitution to producing
48  output  output
49    
50    There is one built-in filter called C<regex> which can be use like this:
51    
52      filter{regex(s/foo/bar/)}
53    
54  =item *  =item *
55    
56  optional C<lookup{...}> will be then performed. See C<WebPAC::Lookups>.  optional C<lookup{...}> will be then performed. See C<WebPAC::Lookups>.
# Line 82  Create new normalisation object Line 86  Create new normalisation object
86          db => $db_obj,          db => $db_obj,
87          lookup_regex => $lookup->regex,          lookup_regex => $lookup->regex,
88          lookup => $lookup_obj,          lookup => $lookup_obj,
89            prefix => 'foobar',
90    );    );
91    
92  Parametar C<filter> defines user supplied snippets of perl code which can  Parametar C<filter> defines user supplied snippets of perl code which can
93  be use with C<filter{...}> notation.  be use with C<filter{...}> notation.
94    
95    C<prefix> is used to form filename for database record (to support multiple
96    source files which are joined in one database).
97    
98  Recommended parametar C<lookup_regex> is used to enable parsing of lookups  Recommended parametar C<lookup_regex> is used to enable parsing of lookups
99  in structures. If you pass this parametar, you must also pass C<lookup>  in structures. If you pass this parametar, you must also pass C<lookup>
100  which is C<WebPAC::Lookup> object.  which is C<WebPAC::Lookup> object.
# Line 111  sub new { Line 119  sub new {
119    
120          $log->logdie("lookup must be WebPAC::Lookup object") if ($self->{'lookup'} && ! $self->{'lookup'}->isa('WebPAC::Lookup'));          $log->logdie("lookup must be WebPAC::Lookup object") if ($self->{'lookup'} && ! $self->{'lookup'}->isa('WebPAC::Lookup'));
121    
122            $log->warn("no prefix defined. please check that!") unless ($self->{'prefix'});
123    
124            $log->debug("using lookup regex: ", $self->{lookup_regex}) if ($r && $l);
125    
126            if (! $self->{filter} || ! $self->{filter}->{regex}) {
127                    $log->debug("adding built-in filter regex");
128                    $self->{filter}->{regex} = sub {
129                            my ($val, $regex) = @_;
130                            eval "\$val =~ $regex";
131                            return $val;
132                    };
133            }
134    
135          $self ? return $self : return undef;          $self ? return $self : return undef;
136  }  }
137    
# Line 122  C<conf/normalize/*.xml>. Line 143  C<conf/normalize/*.xml>.
143    
144  This structures are used to produce output.  This structures are used to produce output.
145    
146   my @ds = $webpac->data_structure($rec);   my $ds = $webpac->data_structure($rec);
   
 B<Note: historical oddity follows>  
   
 This method will also set C<< $webpac->{'currnet_filename'} >> if there is  
 C<< <filename> >> tag and C<< $webpac->{'headline'} >> if there is  
 C<< <headline> >> tag.  
147    
148  =cut  =cut
149    
# Line 140  sub data_structure { Line 155  sub data_structure {
155          my $rec = shift;          my $rec = shift;
156          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);          $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o);
157    
158            $log->debug("data_structure rec = ", sub { Dumper($rec) });
159    
160            $log->logdie("need unique ID (mfn) in field 000 of record ", sub { Dumper($rec) } ) unless (defined($rec->{'000'}));
161    
162            my $id = $rec->{'000'}->[0] || $log->logdie("field 000 isn't array!");
163    
164          my $cache_file;          my $cache_file;
165    
166          if ($self->{'db'}) {          if ($self->{'db'}) {
167                  my @ds = $self->{'db'}->load_ds($rec);                  my $ds = $self->{'db'}->load_ds( id => $id, prefix => $self->{prefix} );
168                  $log->debug("load_ds( rec = ", sub { Dumper($rec) }, ") = ", sub { Dumper(@ds) });                  $log->debug("load_ds( rec = ", sub { Dumper($rec) }, ") = ", sub { Dumper($ds) });
169                  return @ds if ($#ds > 0);                  return $ds if ($ds);
170                  $log->debug("cache miss, creating");                  $log->debug("cache miss, creating");
171          }          }
172    
         undef $self->{'currnet_filename'};  
         undef $self->{'headline'};  
   
173          my @sorted_tags;          my @sorted_tags;
174          if ($self->{tags_by_order}) {          if ($self->{tags_by_order}) {
175                  @sorted_tags = @{$self->{tags_by_order}};                  @sorted_tags = @{$self->{tags_by_order}};
# Line 160  sub data_structure { Line 178  sub data_structure {
178                  $self->{tags_by_order} = \@sorted_tags;                  $self->{tags_by_order} = \@sorted_tags;
179          }          }
180    
181          my @ds;          my $ds;
182    
183          $log->debug("tags: ",sub { join(", ",@sorted_tags) });          $log->debug("tags: ",sub { join(", ",@sorted_tags) });
184    
# Line 195  sub data_structure { Line 213  sub data_structure {
213                                  @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;                                  @v = map { $self->apply_format($tag->{'format_name'},$tag->{'format_delimiter'},$_) } @v;
214                          }                          }
215    
                         if ($field eq 'filename') {  
                                 $self->{'current_filename'} = join('',@v);  
                                 $log->debug("filename: ",$self->{'current_filename'});  
                         } elsif ($field eq 'headline') {  
                                 $self->{'headline'} .= join('',@v);  
                                 $log->debug("headline: ",$self->{'headline'});  
                                 next; # don't return headline in data_structure!  
                         }  
   
216                          # delimiter will join repeatable fields                          # delimiter will join repeatable fields
217                          if ($tag->{'delimiter'}) {                          if ($tag->{'delimiter'}) {
218                                  @v = ( join($tag->{'delimiter'}, @v) );                                  @v = ( join($tag->{'delimiter'}, @v) );
219                          }                          }
220    
221                          # default types                          # default types
222                          my @types = qw(display swish);                          my @types = qw(display search);
223                          # override by type attribute                          # override by type attribute
224                          @types = ( $tag->{'type'} ) if ($tag->{'type'});                          @types = ( $tag->{'type'} ) if ($tag->{'type'});
225    
226                          foreach my $type (@types) {                          foreach my $type (@types) {
227                                  # append to previous line?                                  # append to previous line?
228                                  $log->debug("type: $type ",sub { join(" ",@v) }, $row->{'append'} || 'no append');                                  $log->debug("type: $type ",sub { join(" ",@v) }, " ", $row->{'append'} || 'no append');
229                                  if ($tag->{'append'}) {                                  if ($tag->{'append'}) {
230    
231                                          # I will delimit appended part with                                          # I will delimit appended part with
# Line 243  sub data_structure { Line 252  sub data_structure {
252    
253                          # TODO: name_sigular, name_plural                          # TODO: name_sigular, name_plural
254                          my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};                          my $name = $self->{'import_xml'}->{'indexer'}->{$field}->{'name'};
255                          $row->{'name'} = $name ? $self->_x($name) : $field;                          my $row_name = $name ? $self->_x($name) : $field;
256    
257                          # post-sort all values in field                          # post-sort all values in field
258                          if ($self->{'import_xml'}->{'indexer'}->{$field}->{'sort'}) {                          if ($self->{'import_xml'}->{'indexer'}->{$field}->{'sort'}) {
259                                  $log->warn("sort at field tag not implemented");                                  $log->warn("sort at field tag not implemented");
260                          }                          }
261    
262                          push @ds, $row;                          $ds->{$row_name} = $row;
263    
264                          $log->debug("row $field: ",sub { Dumper($row) });                          $log->debug("row $field: ",sub { Dumper($row) });
265                  }                  }
# Line 258  sub data_structure { Line 267  sub data_structure {
267          }          }
268    
269          $self->{'db'}->save_ds(          $self->{'db'}->save_ds(
270                  ds => \@ds,                  id => $id,
271                  current_filename => $self->{'current_filename'},                  ds => $ds,
272                  headline => $self->{'headline'},                  prefix => $self->{prefix},
273          ) if ($self->{'db'});          ) if ($self->{'db'});
274    
275          $log->debug("ds: ", sub { Dumper(@ds) });          $log->debug("ds: ", sub { Dumper($ds) });
276    
277          return @ds;          $log->logconfess("data structure returned is not array any more!") if wantarray;
278    
279            return $ds;
280    
281  }  }
282    
# Line 277  return output or nothing depending on ev Line 288  return output or nothing depending on ev
288    
289   my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);   my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
290    
291    Filters are implemented here. While simple form of filters looks like this:
292    
293      filter{name_of_filter}
294    
295    but, filters can also have variable number of parametars like this:
296    
297      filter{name_of_filter(param,param,param)}
298    
299  =cut  =cut
300    
301    my $warn_once;
302    
303  sub parse {  sub parse {
304          my $self = shift;          my $self = shift;
305    
# Line 348  sub parse { Line 369  sub parse {
369                  return if (! $self->_eval($eval));                  return if (! $self->_eval($eval));
370          }          }
371                    
372          if ($filter_name && $self->{'filter'}->{$filter_name}) {          if ($filter_name) {
373                  $log->debug("about to filter{$filter_name} format: $out");                  my @filter_args;
374                  $out = $self->{'filter'}->{$filter_name}->($out);                  if ($filter_name =~ s/(\w+)\((.*)\)/$1/) {
375                  return unless(defined($out));                          @filter_args = split(/,/, $2);
376                  $log->debug("filter result: $out");                  }
377                    if ($self->{'filter'}->{$filter_name}) {
378                            $log->debug("about to filter{$filter_name} format: $out with arguments: ", join(",", @filter_args));
379                            unshift @filter_args, $out;
380                            $out = $self->{'filter'}->{$filter_name}->(@filter_args);
381                            return unless(defined($out));
382                            $log->debug("filter result: $out");
383                    } elsif (! $warn_once->{$filter_name}) {
384                            $log->warn("trying to use undefined filter $filter_name");
385                            $warn_once->{$filter_name}++;
386                    }
387          }          }
388    
389          return $out;          return $out;
# Line 462  sub fill_in { Line 493  sub fill_in {
493                  # do we have lookups?                  # do we have lookups?
494                  if ($self->{'lookup'}) {                  if ($self->{'lookup'}) {
495                          if ($self->{'lookup'}->can('lookup')) {                          if ($self->{'lookup'}->can('lookup')) {
496                                  return $self->{'lookup'}->lookup($format);                                  my @lookup = $self->{lookup}->lookup($format);
497                                    $log->debug("lookup $format", join(", ", @lookup));
498                                    return @lookup;
499                          } else {                          } else {
500                                  $log->warn("Have lookup object but can't invoke lookup method");                                  $log->warn("Have lookup object but can't invoke lookup method");
501                          }                          }
# Line 538  sub get_data { Line 571  sub get_data {
571                  if ($sf && $$rec->{$f}->[$i]->{$sf}) {                  if ($sf && $$rec->{$f}->[$i]->{$sf}) {
572                          $$found++ if (defined($$found));                          $$found++ if (defined($$found));
573                          return $$rec->{$f}->[$i]->{$sf};                          return $$rec->{$f}->[$i]->{$sf};
574                  } elsif ($$rec->{$f}->[$i]) {                  } elsif (! $sf && $$rec->{$f}->[$i]) {
575                          $$found++ if (defined($$found));                          $$found++ if (defined($$found));
576                          # it still might have subfield, just                          # it still might have subfield, just
577                          # not specified, so we'll dump all                          # not specified, so we'll dump all
# Line 551  sub get_data { Line 584  sub get_data {
584                          } else {                          } else {
585                                  return $$rec->{$f}->[$i];                                  return $$rec->{$f}->[$i];
586                          }                          }
587                    } else {
588                            return '';
589                  }                  }
590          } else {          } else {
591                  return '';                  return '';
# Line 674  under the same terms as Perl itself. Line 709  under the same terms as Perl itself.
709    
710  =cut  =cut
711    
712  1; # End of WebPAC::DB  1; # End of WebPAC::Normalize

Legend:
Removed from v.38  
changed lines
  Added in v.295

  ViewVC Help
Powered by ViewVC 1.1.26