/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 253 by dpavlin, Thu Dec 15 17:01:10 2005 UTC revision 317 by dpavlin, Fri Dec 23 21:37:05 2005 UTC
# Line 11  WebPAC::Normalize - data mungling for no Line 11  WebPAC::Normalize - data mungling for no
11    
12  =head1 VERSION  =head1 VERSION
13    
14  Version 0.05  Version 0.08
15    
16  =cut  =cut
17    
18  our $VERSION = '0.05';  our $VERSION = '0.08';
19    
20  =head1 SYNOPSIS  =head1 SYNOPSIS
21    
# Line 47  optional C<filter{filter_name}> at B<beg Line 47  optional C<filter{filter_name}> at B<beg
47  code defined as code ref on format after field substitution to producing  code defined as code ref on format after field substitution to producing
48  output  output
49    
50    There is one built-in filter called C<regex> which can be use like this:
51    
52      filter{regex(s/foo/bar/)}
53    
54  =item *  =item *
55    
56  optional C<lookup{...}> will be then performed. See C<WebPAC::Lookups>.  optional C<lookup{...}> will be then performed. See C<WebPAC::Lookups>.
# Line 119  sub new { Line 123  sub new {
123    
124          $log->debug("using lookup regex: ", $self->{lookup_regex}) if ($r && $l);          $log->debug("using lookup regex: ", $self->{lookup_regex}) if ($r && $l);
125    
126            if (! $self->{filter} || ! $self->{filter}->{regex}) {
127                    $log->debug("adding built-in filter regex");
128                    $self->{filter}->{regex} = sub {
129                            my ($val, $regex) = @_;
130                            eval "\$val =~ $regex";
131                            return $val;
132                    };
133            }
134    
135          $self ? return $self : return undef;          $self ? return $self : return undef;
136  }  }
137    
# Line 144  sub data_structure { Line 157  sub data_structure {
157    
158          $log->debug("data_structure rec = ", sub { Dumper($rec) });          $log->debug("data_structure rec = ", sub { Dumper($rec) });
159    
160          $log->logdie("need unique ID (mfn) in field 000 of record ", sub { Dumper($rec) } ) unless (defined($rec->{'000'}));          $log->logdie("need unique ID (mfn) in field 000 of record " . Dumper($rec) ) unless (defined($rec->{'000'}));
161    
162          my $id = $rec->{'000'}->[0] || $log->logdie("field 000 isn't array!");          my $id = $rec->{'000'}->[0] || $log->logdie("field 000 isn't array!");
163    
# Line 157  sub data_structure { Line 170  sub data_structure {
170                  $log->debug("cache miss, creating");                  $log->debug("cache miss, creating");
171          }          }
172    
         undef $self->{'currnet_filename'};  
         undef $self->{'headline'};  
   
173          my @sorted_tags;          my @sorted_tags;
174          if ($self->{tags_by_order}) {          if ($self->{tags_by_order}) {
175                  @sorted_tags = @{$self->{tags_by_order}};                  @sorted_tags = @{$self->{tags_by_order}};
# Line 278  return output or nothing depending on ev Line 288  return output or nothing depending on ev
288    
289   my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);   my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
290    
291    Filters are implemented here. While simple form of filters looks like this:
292    
293      filter{name_of_filter}
294    
295    but, filters can also have variable number of parametars like this:
296    
297      filter{name_of_filter(param,param,param)}
298    
299  =cut  =cut
300    
301    my $warn_once;
302    
303  sub parse {  sub parse {
304          my $self = shift;          my $self = shift;
305    
# Line 307  sub parse { Line 327  sub parse {
327          # remove filter{...} from beginning          # remove filter{...} from beginning
328          $filter_name = $1 if ($format =~ s/^filter{([^}]+)}//s);          $filter_name = $1 if ($format =~ s/^filter{([^}]+)}//s);
329    
330            # did we found any (att all) field from format in row?
331            my $found_any = 0;
332            # prefix before first field which we preserve it $found_any
333          my $prefix;          my $prefix;
334          my $all_found=0;  
335            my $f_step = 1;
336    
337          while ($format =~ s/^(.*?)(v|s)(\d+)(?:\^(\w))?//s) {          while ($format =~ s/^(.*?)(v|s)(\d+)(?:\^(\w))?//s) {
338    
339                  my $del = $1 || '';                  my $del = $1 || '';
340                  $prefix ||= $del if ($all_found == 0);                  $prefix = $del if ($f_step == 1);
341    
342                  # repeatable index                  # repeatable index
343                  my $r = $i;                  my $r = $i;
# Line 323  sub parse { Line 347  sub parse {
347                  my $tmp = $self->get_data(\$rec,$3,$4,$r,\$found);                  my $tmp = $self->get_data(\$rec,$3,$4,$r,\$found);
348    
349                  if ($found) {                  if ($found) {
350                          push @out, $del;                          $found_any += $found;
351    
352                            # we will skip delimiter before first occurence of field!
353                            push @out, $del unless($found_any == 1);
354                          push @out, $tmp;                          push @out, $tmp;
                         $all_found += $found;  
355                  }                  }
356                    $f_step++;
357          }          }
358    
359          return if (! $all_found);          return if (! $found_any);
360    
361          my $out = join('',@out);          my $out = join('',@out);
362    
# Line 349  sub parse { Line 376  sub parse {
376                  return if (! $self->_eval($eval));                  return if (! $self->_eval($eval));
377          }          }
378                    
379          if ($filter_name && $self->{'filter'}->{$filter_name}) {          if ($filter_name) {
380                  $log->debug("about to filter{$filter_name} format: $out");                  my @filter_args;
381                  $out = $self->{'filter'}->{$filter_name}->($out);                  if ($filter_name =~ s/(\w+)\((.*)\)/$1/) {
382                  return unless(defined($out));                          @filter_args = split(/,/, $2);
383                  $log->debug("filter result: $out");                  }
384                    if ($self->{'filter'}->{$filter_name}) {
385                            $log->debug("about to filter{$filter_name} format: $out with arguments: ", join(",", @filter_args));
386                            unshift @filter_args, $out;
387                            $out = $self->{'filter'}->{$filter_name}->(@filter_args);
388                            return unless(defined($out));
389                            $log->debug("filter result: $out");
390                    } elsif (! $warn_once->{$filter_name}) {
391                            $log->warn("trying to use undefined filter $filter_name");
392                            $warn_once->{$filter_name}++;
393                    }
394          }          }
395    
396          return $out;          return $out;

Legend:
Removed from v.253  
changed lines
  Added in v.317

  ViewVC Help
Powered by ViewVC 1.1.26