/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 125 by dpavlin, Thu Nov 24 11:47:15 2005 UTC revision 260 by dpavlin, Fri Dec 16 14:40:55 2005 UTC
# Line 11  WebPAC::Normalize - data mungling for no Line 11  WebPAC::Normalize - data mungling for no
11    
12  =head1 VERSION  =head1 VERSION
13    
14  Version 0.02  Version 0.06
15    
16  =cut  =cut
17    
18  our $VERSION = '0.02';  our $VERSION = '0.06';
19    
20  =head1 SYNOPSIS  =head1 SYNOPSIS
21    
# Line 47  optional C<filter{filter_name}> at B<beg Line 47  optional C<filter{filter_name}> at B<beg
47  code defined as code ref on format after field substitution to producing  code defined as code ref on format after field substitution to producing
48  output  output
49    
50    There is one built-in filter called C<regex> which can be use like this:
51    
52      filter{regex(s/foo/bar/)}
53    
54  =item *  =item *
55    
56  optional C<lookup{...}> will be then performed. See C<WebPAC::Lookups>.  optional C<lookup{...}> will be then performed. See C<WebPAC::Lookups>.
# Line 82  Create new normalisation object Line 86  Create new normalisation object
86          db => $db_obj,          db => $db_obj,
87          lookup_regex => $lookup->regex,          lookup_regex => $lookup->regex,
88          lookup => $lookup_obj,          lookup => $lookup_obj,
89            prefix => 'foobar',
90    );    );
91    
92  Parametar C<filter> defines user supplied snippets of perl code which can  Parametar C<filter> defines user supplied snippets of perl code which can
93  be use with C<filter{...}> notation.  be use with C<filter{...}> notation.
94    
95    C<prefix> is used to form filename for database record (to support multiple
96    source files which are joined in one database).
97    
98  Recommended parametar C<lookup_regex> is used to enable parsing of lookups  Recommended parametar C<lookup_regex> is used to enable parsing of lookups
99  in structures. If you pass this parametar, you must also pass C<lookup>  in structures. If you pass this parametar, you must also pass C<lookup>
100  which is C<WebPAC::Lookup> object.  which is C<WebPAC::Lookup> object.
# Line 111  sub new { Line 119  sub new {
119    
120          $log->logdie("lookup must be WebPAC::Lookup object") if ($self->{'lookup'} && ! $self->{'lookup'}->isa('WebPAC::Lookup'));          $log->logdie("lookup must be WebPAC::Lookup object") if ($self->{'lookup'} && ! $self->{'lookup'}->isa('WebPAC::Lookup'));
121    
122            $log->warn("no prefix defined. please check that!") unless ($self->{'prefix'});
123    
124            $log->debug("using lookup regex: ", $self->{lookup_regex}) if ($r && $l);
125    
126            if ($self->{filter} && ! $self->{filter}->{regex}) {
127                    $log->debug("adding built-in filter regex");
128                    $self->{filter}->{regex} = sub {
129                            my ($val, $regex) = @_;
130                            eval "\$val =~ $regex";
131                            return $val;
132                    };
133            }
134    
135          $self ? return $self : return undef;          $self ? return $self : return undef;
136  }  }
137    
# Line 138  sub data_structure { Line 159  sub data_structure {
159    
160          $log->logdie("need unique ID (mfn) in field 000 of record ", sub { Dumper($rec) } ) unless (defined($rec->{'000'}));          $log->logdie("need unique ID (mfn) in field 000 of record ", sub { Dumper($rec) } ) unless (defined($rec->{'000'}));
161    
162          my $mfn = $rec->{'000'}->[0] || $log->logdie("field 000 isn't array!");          my $id = $rec->{'000'}->[0] || $log->logdie("field 000 isn't array!");
163    
164          my $cache_file;          my $cache_file;
165    
166          if ($self->{'db'}) {          if ($self->{'db'}) {
167                  my $ds = $self->{'db'}->load_ds( $mfn );                  my $ds = $self->{'db'}->load_ds( id => $id, prefix => $self->{prefix} );
168                  $log->debug("load_ds( rec = ", sub { Dumper($rec) }, ") = ", sub { Dumper($ds) });                  $log->debug("load_ds( rec = ", sub { Dumper($rec) }, ") = ", sub { Dumper($ds) });
169                  return $ds if ($ds);                  return $ds if ($ds);
170                  $log->debug("cache miss, creating");                  $log->debug("cache miss, creating");
# Line 207  sub data_structure { Line 228  sub data_structure {
228    
229                          foreach my $type (@types) {                          foreach my $type (@types) {
230                                  # append to previous line?                                  # append to previous line?
231                                  $log->debug("type: $type ",sub { join(" ",@v) }, $row->{'append'} || 'no append');                                  $log->debug("type: $type ",sub { join(" ",@v) }, " ", $row->{'append'} || 'no append');
232                                  if ($tag->{'append'}) {                                  if ($tag->{'append'}) {
233    
234                                          # I will delimit appended part with                                          # I will delimit appended part with
# Line 249  sub data_structure { Line 270  sub data_structure {
270          }          }
271    
272          $self->{'db'}->save_ds(          $self->{'db'}->save_ds(
273                  id => $mfn,                  id => $id,
274                  ds => $ds,                  ds => $ds,
275                    prefix => $self->{prefix},
276          ) if ($self->{'db'});          ) if ($self->{'db'});
277    
278          $log->debug("ds: ", sub { Dumper($ds) });          $log->debug("ds: ", sub { Dumper($ds) });
# Line 269  return output or nothing depending on ev Line 291  return output or nothing depending on ev
291    
292   my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);   my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
293    
294    Filters are implemented here. While simple form of filters looks like this:
295    
296      filter{name_of_filter}
297    
298    but, filters can also have variable number of parametars like this:
299    
300      filter{name_of_filter(param,param,param)}
301    
302  =cut  =cut
303    
304  sub parse {  sub parse {
# Line 340  sub parse { Line 370  sub parse {
370                  return if (! $self->_eval($eval));                  return if (! $self->_eval($eval));
371          }          }
372                    
373          if ($filter_name && $self->{'filter'}->{$filter_name}) {          if ($filter_name) {
374                  $log->debug("about to filter{$filter_name} format: $out");                  my @filter_args;
375                  $out = $self->{'filter'}->{$filter_name}->($out);                  if ($filter_name =~ s/(\w+)\((.*)\)/$1/) {
376                  return unless(defined($out));                          @filter_args = split(/,/, $2);
377                  $log->debug("filter result: $out");                  }
378                    if ($self->{'filter'}->{$filter_name}) {
379                            $log->debug("about to filter{$filter_name} format: $out with arguments: ", join(",", @filter_args));
380                            unshift @filter_args, $out;
381                            $out = $self->{'filter'}->{$filter_name}->(@filter_args);
382                            return unless(defined($out));
383                            $log->debug("filter result: $out");
384                    } else {
385                            $log->warn("trying to use undefined filter $filter_name");
386                    }
387          }          }
388    
389          return $out;          return $out;
# Line 454  sub fill_in { Line 493  sub fill_in {
493                  # do we have lookups?                  # do we have lookups?
494                  if ($self->{'lookup'}) {                  if ($self->{'lookup'}) {
495                          if ($self->{'lookup'}->can('lookup')) {                          if ($self->{'lookup'}->can('lookup')) {
496                                  return $self->{'lookup'}->lookup($format);                                  my @lookup = $self->{lookup}->lookup($format);
497                                    $log->debug("lookup $format", join(", ", @lookup));
498                                    return @lookup;
499                          } else {                          } else {
500                                  $log->warn("Have lookup object but can't invoke lookup method");                                  $log->warn("Have lookup object but can't invoke lookup method");
501                          }                          }
# Line 668  under the same terms as Perl itself. Line 709  under the same terms as Perl itself.
709    
710  =cut  =cut
711    
712  1; # End of WebPAC::DB  1; # End of WebPAC::Normalize

Legend:
Removed from v.125  
changed lines
  Added in v.260

  ViewVC Help
Powered by ViewVC 1.1.26