--- trunk/lib/WebPAC/Normalize.pm 2005/07/16 20:35:30 10 +++ trunk/lib/WebPAC/Normalize.pm 2006/06/29 15:29:19 538 @@ -1,45 +1,349 @@ package WebPAC::Normalize; +use Exporter 'import'; +@EXPORT = qw/ + _set_rec _set_lookup + _get_ds _clean_ds + + tag search display + rec1 rec2 rec + regex prefix suffix surround + first lookup join_with +/; use warnings; use strict; +#use base qw/WebPAC::Common/; +use Data::Dumper; + =head1 NAME -WebPAC::Normalize - normalisation of source file +WebPAC::Normalize - describe normalisaton rules using sets =head1 VERSION -Version 0.01 +Version 0.05 =cut -our $VERSION = '0.01'; +our $VERSION = '0.05'; =head1 SYNOPSIS -This package contains code that could be helpful in implementing different -normalisation front-ends. +This module uses C files to perform normalisation +from input records using perl functions which are specialized for set +processing. + +Sets are implemented as arrays, and normalisation file is valid perl, which +means that you check it's validity before running WebPAC using +C. + +Normalisation can generate multiple output normalized data. For now, supported output +types (on the left side of definition) are: C, C and C. =head1 FUNCTIONS -=head2 none_yet +Functions which start with C<_> are private and used by WebPAC internally. +All other functions are available for use within normalisation rules. + +=head2 data_structure + +Return data structure + + my $ds = WebPAC::Normalize::data_structure( + lookup => $lookup->lookup_hash, + row => $row, + rules => $normalize_pl_config, + ); + +This function will B if normalizastion can't be evaled. + +Since this function isn't exported you have to call it with +C. + +=cut + +sub data_structure { + my $arg = {@_}; + + die "need row argument" unless ($arg->{row}); + die "need normalisation argument" unless ($arg->{rules}); + + no strict 'subs'; + _set_lookup( $arg->{lookup} ); + _set_rec( $arg->{row} ); + _clean_ds(); + eval "$arg->{rules}"; + die "error evaling $arg->{rules}: $@\n" if ($@); + return _get_ds(); +} + +=head2 _set_rec + +Set current record hash + + _set_rec( $rec ); + +=cut + +my $rec; + +sub _set_rec { + $rec = shift or die "no record hash"; +} + +=head2 _get_ds + +Return hash formatted as data structure + + my $ds = _get_ds(); + +=cut + +my $out; + +sub _get_ds { + return $out; +} + +=head2 _clean_ds + +Clean data structure hash for next record + + _clean_ds(); + +=cut + +sub _clean_ds { + $out = undef; +} + +=head2 _set_lookup + +Set current lookup hash + + _set_lookup( $lookup ); + +=cut + +my $lookup; + +sub _set_lookup { + $lookup = shift; +} + +=head2 tag + +Define new tag for I and I. + + tag('Title', rec('200','a') ); + + +=cut + +sub tag { + my $name = shift or die "tag needs name as first argument"; + my @o = grep { defined($_) && $_ ne '' } @_; + return unless (@o); + $out->{$name}->{tag} = $name; + $out->{$name}->{search} = \@o; + $out->{$name}->{display} = \@o; +} + +=head2 display + +Define tag just for I + + @v = display('Title', rec('200','a') ); + +=cut + +sub display { + my $name = shift or die "display needs name as first argument"; + my @o = grep { defined($_) && $_ ne '' } @_; + return unless (@o); + $out->{$name}->{tag} = $name; + $out->{$name}->{display} = \@o; +} + +=head2 search + +Prepare values just for I + + @v = search('Title', rec('200','a') ); + +=cut + +sub search { + my $name = shift or die "search needs name as first argument"; + my @o = grep { defined($_) && $_ ne '' } @_; + return unless (@o); + $out->{$name}->{tag} = $name; + $out->{$name}->{search} = \@o; +} + +=head2 rec1 + +Return all values in some field + + @v = rec1('200') + +TODO: order of values is probably same as in source data, need to investigate that + +=cut + +sub rec1 { + my $f = shift; + return unless (defined($rec) && defined($rec->{$f})); + if (ref($rec->{$f}) eq 'ARRAY') { + return map { + if (ref($_) eq 'HASH') { + values %{$_}; + } else { + $_; + } + } @{ $rec->{$f} }; + } elsif( defined($rec->{$f}) ) { + return $rec->{$f}; + } +} + +=head2 rec2 + +Return all values in specific field and subfield + + @v = rec2('200','a') + +=cut + +sub rec2 { + my $f = shift; + return unless (defined($rec && $rec->{$f})); + my $sf = shift; + return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} }; +} + +=head2 rec + +syntaxtic sugar for + + @v = rec('200') + @v = rec('200','a') =cut -sub none_yet { +sub rec { + if ($#_ == 0) { + return rec1(@_); + } elsif ($#_ == 1) { + return rec2(@_); + } } -=head1 AUTHOR +=head2 regex + +Apply regex to some or all values + + @v = regex( 's/foo/bar/g', @v ); + +=cut -Dobrica Pavlinusic, C<< >> +sub regex { + my $r = shift; + my @out; + #warn "r: $r\n",Dumper(\@_); + foreach my $t (@_) { + next unless ($t); + eval "\$t =~ $r"; + push @out, $t if ($t && $t ne ''); + } + return @out; +} -=head1 COPYRIGHT & LICENSE +=head2 prefix -Copyright 2005 Dobrica Pavlinusic, All Rights Reserved. +Prefix all values with a string -This program is free software; you can redistribute it and/or modify it -under the same terms as Perl itself. + @v = prefix( 'my_', @v ); =cut -1; # End of WebPAC::DB +sub prefix { + my $p = shift or die "prefix needs string as first argument"; + return map { $p . $_ } grep { defined($_) } @_; +} + +=head2 suffix + +suffix all values with a string + + @v = suffix( '_my', @v ); + +=cut + +sub suffix { + my $s = shift or die "suffix needs string as first argument"; + return map { $_ . $s } grep { defined($_) } @_; +} + +=head2 surround + +surround all values with a two strings + + @v = surround( 'prefix_', '_suffix', @v ); + +=cut + +sub surround { + my $p = shift or die "surround need prefix as first argument"; + my $s = shift or die "surround needs suffix as second argument"; + return map { $p . $_ . $s } grep { defined($_) } @_; +} + +=head2 first + +Return first element + + $v = first( @v ); + +=cut + +sub first { + my $r = shift; + return $r; +} + +=head2 lookup + +Consult lookup hashes for some value + + @v = lookup( $v ); + @v = lookup( @v ); + +=cut + +sub lookup { + my $k = shift or return; + return unless (defined($lookup->{$k})); + if (ref($lookup->{$k}) eq 'ARRAY') { + return @{ $lookup->{$k} }; + } else { + return $lookup->{$k}; + } +} + +=head2 join_with + +Joins walues with some delimiter + + $v = join_with(", ", @v); + +=cut + +sub join_with { + my $d = shift; + return join($d, grep { defined($_) && $_ ne '' } @_); +} + +# END +1;