--- trunk/lib/WebPAC/Normalize/Lookup.pm 2005/07/16 11:07:38 3 +++ trunk/lib/WebPAC/Lookup.pm 2006/06/26 16:39:51 536 @@ -1,100 +1,123 @@ -package WebPAC::Normalize::Lookup; +package WebPAC::Lookup; use warnings; use strict; -use WebPAC::Common; - -use base qw/WebPAC::Common/; +use base qw/WebPAC::Common WebPAC::Lookup::Normalize/; use File::Slurp; +use YAML qw/LoadFile/; +use Data::Dumper; =head1 NAME -WebPAC::Normalize::Lookup - simple normalisation plugin to produce lookup +WebPAC::Lookup - simple normalisation plugin to produce lookup =head1 VERSION -Version 0.01 +Version 0.03 =cut -our $VERSION = '0.01'; +our $VERSION = '0.03'; =head1 SYNOPSIS This module will produce in-memory lookups for easy resolution of lookups -to different records in source files. It can also be use with -C to produce tree hierarchies. +to different records in source files. This will enable you to resolve +relational data in source format. + +It can also be use with C to produce tree hierarchies. -Lookups are defined in C. +Lookups are defined in C. C argument is an array of lookups to create. Each lookup must have C and C. Optional parametar C is perl code to evaluate before storing -value in index. +value in lookup. - my $lookup => [ + @lookup = [ { 'key' => 'd:v900', 'val' => 'v250^a' }, { 'eval' => '"v901^a" eq "Područje"', 'key' => 'pa:v561^4:v562^4:v461^1', 'val' => 'v900' }, ]; - =head1 FUNCTIONS =head2 new Create new lookup object. - my $lookup = new WebPAC::Normalize::Lookup( - config => '/path/to/conf/lookup/lookup.pm', + my $lookup = new WebPAC::Lookup( + lookup_file => '/path/to/conf/lookup/lookup.pm', + is_lookup_regex => 'lookup{[^\{\}]+}'; + save_lookup_regex => 'lookup{([^\{\}]+)}'; ); =cut sub new { - my $class = shift; - my $self = {@_}; + my $class = shift; + my $self = {@_}; bless($self, $class); my $log = $self->_get_logger(); - my $config = $self->{'config'} || $log->logconfess("need path to lookup file in config parametar"); + my $lookup_file = $self->{'lookup_file'} || $log->logconfess("need path to lookup file in lookup_file parametar"); - my $lookup_code = read_file($config) || $log->logconfess("can't read lookup file $config: $!"); + my $lookup_code = read_file($lookup_file) || $log->logconfess("can't read lookup file $lookup_file: $!"); - { + if ($lookup_file =~ m#\.pm$#) { no strict 'vars'; - do $config or $log->logdie("Failed to read configuration parameters '$config' $! $@"); - $self->{'lookup_def'} = \@lookup || $log->logdie("lookup config doesn't produce \@lookup array"); + do $lookup_file or $log->logdie("Failed to read configuration parameters '$lookup_file' $! $@"); + $self->{'lookup_def'} = \@lookup || $log->logdie("lookup config $lookup_file doesn't produce \@lookup array"); + } elsif ($lookup_file =~ m#\.(:?yml|yaml)$#) { + my $yaml = LoadFile( $lookup_file ) || $log->logdie("lookup YAML file $lookup_file error: $!"); + $self->{'lookup_def'} = $yaml->{lookup} || $log->logdie("lookup YAML file $lookup_file should begin with 'lookup:'"); + } else { + $log->logide("unsupported lookup file $lookup_file"); } + $log->debug("lookup_def: " . Dumper( $self->{lookup_def} )); + + $log->logconfess("lookup config file isn't ARRAY but ", sub { Dumper( $self->{'lookup_def'} ) }) if ($self->{'lookup_def'} !~ /ARRAY/o); + + $self->{'is_lookup_regex'} ||= 'lookup{[^\{\}]+}'; + $self->{'save_lookup_regex'} ||= 'lookup{([^\{\}]+)}'; + - $log->logconfess("lookup config file isn't ARRAY") if ($self->{'lookup_def'} !~ /ARRAY/o); + $self->{'LOOKUP_REGEX'} = qr/$self->{'is_lookup_regex'}/; + $self->{'LOOKUP_REGEX_SAVE'} = qr/$self->{'save_lookup_regex'}/; + + $log->debug("regexps lookup:", $self->{'LOOKUP_REGEX'}, " save:", $self->{'LOOKUP_REGEX_SAVE'}); $self ? return $self : return undef; } -=head2 create_lookup +=head2 add Create lookup from record using lookup definition. - $self->create_lookup($rec, @lookups); + $self->add($rec); -Called internally by C methods. +Returns true if this record produced lookup. =cut -sub create_lookup { +sub add { my $self = shift; my $log = $self->_get_logger(); my $rec = shift || $log->logconfess("need record to create lookup"); $log->logconfess("need HASH as first argument!") if ($rec !~ /HASH/o); + $log->logconfess("need HASH as first argument!") if ($self->{'lookup_def'} !~ /ARRAY/o); + + my $n = 0; + + foreach my $i (@{ $self->{'lookup_def'} }) { + $log->logconfess("need key in ", Dumper($i) ) unless defined($i->{'key'}); + $log->logconfess("need val in ", Dumper($i) ) unless defined($i->{'val'}); - foreach my $i ($self->{'loookup_def'}) { - $log->logconfess("need key") unless defined($i->{'key'}); - $log->logconfess("need val") unless defined($i->{'val'}); + $n++; if (defined($i->{'eval'})) { # eval first, so we can skip fill_in for key and val @@ -103,28 +126,104 @@ my $key = $self->fill_in($rec,$i->{'key'}) || next; my @val = $self->fill_in($rec,$i->{'val'}) || next; $log->debug("stored $key = ",sub { join(" | ",@val) }); - push @{$self->{'lookup'}->{$key}}, @val; + push @{$self->{'_lookup_data'}->{$key}}, @val; } } else { my $key = $self->fill_in($rec,$i->{'key'}) || next; my @val = $self->fill_in($rec,$i->{'val'}) || next; $log->debug("stored $key = ",sub { join(" | ",@val) }); - push @{$self->{'lookup'}->{$key}}, @val; + push @{$self->{'_lookup_data'}->{$key}}, @val; + } + } + + return $n; +} + +=head2 lookup + +Perform lookups on format supplied to it. + + my $text = $lookup->lookup('lookup{v900}'); + +Lookups can be nested (like lookup{Blookup{Blookup{B}}}). + +=cut + +sub lookup { + my $self = shift; + + my $log = $self->_get_logger(); + + my $tmp = shift || $log->logconfess("need format"); + + if ($tmp =~ $self->{'LOOKUP_REGEX'}) { + my @in = ( $tmp ); + + my @out; + while (my $f = shift @in) { + if ($f =~ $self->{'LOOKUP_REGEX_SAVE'}) { + my $k = $1; + if ($self->{'_lookup_data'}->{$k}) { + foreach my $nv (@{$self->{'_lookup_data'}->{$k}}) { + my $tmp2 = $f; + $tmp2 =~ s/lookup{$k}/$nv/g; + push @in, $tmp2; + } + } else { + undef $f; + } + } elsif ($f) { + push @out, $f; + } } + $log->debug("lookup for: ",$tmp, " returned: ", join(", ",@out)); + + $log->logconfess("return is array and it's not expected!") unless wantarray; + + return @out; + } else { + return $tmp; } } +=head2 lookup_hash + +Returns hash representation of lookup data + + my $l_hash = $lookup->lookup_hash; + +=cut + +sub lookup_hash { + my $self = shift; + return $self->{_lookup_data}; +} + +=head2 regex + +Returns precompiled regex for lookup format. + + if ($foo =~ $lookup->reges) { ... } + +=cut + +sub regex { + my $self = shift; + + return $self->{'LOOKUP_REGEX'}; +} + =head1 AUTHOR Dobrica Pavlinusic, C<< >> =head1 COPYRIGHT & LICENSE -Copyright 2005 Dobrica Pavlinusic, All Rights Reserved. +Copyright 2005-2006 Dobrica Pavlinusic, All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut -1; # End of WebPAC::Normalize::Lookup +1; # End of WebPAC::Lookup