/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 707 by dpavlin, Mon Sep 25 15:26:12 2006 UTC revision 785 by dpavlin, Wed Dec 6 23:44:36 2006 UTC
# Line 2  package WebPAC::Normalize; Line 2  package WebPAC::Normalize;
2  use Exporter 'import';  use Exporter 'import';
3  @EXPORT = qw/  @EXPORT = qw/
4          _set_rec _set_lookup          _set_rec _set_lookup
5            _set_load_row
6          _get_ds _clean_ds          _get_ds _clean_ds
7          _debug          _debug
8          _pack_subfields_hash          _pack_subfields_hash
# Line 18  use Exporter 'import'; Line 19  use Exporter 'import';
19          save_into_lookup          save_into_lookup
20    
21          split_rec_on          split_rec_on
22    
23            get set
24  /;  /;
25    
26  use warnings;  use warnings;
# Line 26  use strict; Line 29  use strict;
29  #use base qw/WebPAC::Common/;  #use base qw/WebPAC::Common/;
30  use Data::Dump qw/dump/;  use Data::Dump qw/dump/;
31  use Storable qw/dclone/;  use Storable qw/dclone/;
32    use Carp qw/confess/;
33    
34  # debugging warn(s)  # debugging warn(s)
35  my $debug = 0;  my $debug = 0;
# Line 37  WebPAC::Normalize - describe normalisato Line 41  WebPAC::Normalize - describe normalisato
41    
42  =head1 VERSION  =head1 VERSION
43    
44  Version 0.20  Version 0.24
45    
46  =cut  =cut
47    
48  our $VERSION = '0.20';  our $VERSION = '0.24';
49    
50  =head1 SYNOPSIS  =head1 SYNOPSIS
51    
# Line 67  All other functions are available for us Line 71  All other functions are available for us
71  Return data structure  Return data structure
72    
73    my $ds = WebPAC::Normalize::data_structure(    my $ds = WebPAC::Normalize::data_structure(
74          lookup => $lookup_variable,          lookup => $lookup_hash,
75          row => $row,          row => $row,
76          rules => $normalize_pl_config,          rules => $normalize_pl_config,
77          marc_encoding => 'utf-8',          marc_encoding => 'utf-8',
78          config => $config,          config => $config,
79            load_row_coderef => sub {
80                    my ($database,$input,$mfn) = shift;
81                    $store->load_row( database => $database, input => $input, id => $mfn );
82            },
83    );    );
84    
85  Options C<row>, C<rules> and C<log> are mandatory while all  Options C<row>, C<rules> and C<log> are mandatory while all
86  other are optional.  other are optional.
87    
88    C<load_row_coderef> is closure only used when executing lookups, so they will
89    die if it's not defined.
90    
91  This function will B<die> if normalizastion can't be evaled.  This function will B<die> if normalizastion can't be evaled.
92    
93  Since this function isn't exported you have to call it with  Since this function isn't exported you have to call it with
# Line 84  C<WebPAC::Normalize::data_structure>. Line 95  C<WebPAC::Normalize::data_structure>.
95    
96  =cut  =cut
97    
98    my $load_row_coderef;
99    
100  sub data_structure {  sub data_structure {
101          my $arg = {@_};          my $arg = {@_};
102    
# Line 91  sub data_structure { Line 104  sub data_structure {
104          die "need normalisation argument" unless ($arg->{rules});          die "need normalisation argument" unless ($arg->{rules});
105    
106          no strict 'subs';          no strict 'subs';
107          _set_lookup( $arg->{lookup} ) if (defined( $arg->{lookup} ));          _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
108          _set_rec( $arg->{row} );          _set_rec( $arg->{row} );
109          _set_config( $arg->{config} ) if (defined( $arg->{config} ));          _set_config( $arg->{config} ) if defined($arg->{config});
110          _clean_ds( %{ $arg } );          _clean_ds( %{ $arg } );
111            $load_row_coderef = $arg->{load_row_coderef};
112    
113          eval "$arg->{rules}";          eval "$arg->{rules}";
114          die "error evaling $arg->{rules}: $@\n" if ($@);          die "error evaling $arg->{rules}: $@\n" if ($@);
115    
# Line 151  Return hash formatted as data structure Line 166  Return hash formatted as data structure
166    
167  =cut  =cut
168    
169  my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);  my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $leader);
170  my ($marc_record_offset, $marc_fetch_offset) = (0, 0);  my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
171    
172  sub _get_ds {  sub _get_ds {
# Line 168  Clean data structure hash for next recor Line 183  Clean data structure hash for next recor
183    
184  sub _clean_ds {  sub _clean_ds {
185          my $a = {@_};          my $a = {@_};
186          ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();          ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $leader) = ();
187          ($marc_record_offset, $marc_fetch_offset) = (0,0);          ($marc_record_offset, $marc_fetch_offset) = (0,0);
188          $marc_encoding = $a->{marc_encoding};          $marc_encoding = $a->{marc_encoding};
189  }  }
# Line 199  sub _get_lookup { Line 214  sub _get_lookup {
214          return $lookup;          return $lookup;
215  }  }
216    
217    =head2 _set_load_row
218    
219    Setup code reference which will return L<data_structure> from
220    L<WebPAC::Store>
221    
222      _set_load_row(sub {
223                    my ($database,$input,$mfn) = @_;
224                    $store->load_row( database => $database, input => $input, id => $mfn );
225      });
226    
227    =cut
228    
229    sub _set_load_row {
230            my $coderef = shift;
231            confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
232    
233            $load_row_coderef = $coderef;
234    }
235    
236  =head2 _get_marc_fields  =head2 _get_marc_fields
237    
238  Get all fields defined by calls to C<marc>  Get all fields defined by calls to C<marc>
# Line 455  sub marc_leader { Line 489  sub marc_leader {
489          my ($offset,$value) = @_;          my ($offset,$value) = @_;
490    
491          if ($offset) {          if ($offset) {
492                  $out->{' leader'}->{ $offset } = $value;                  $leader->{ $offset } = $value;
493          } else {          } else {
494                  return $out->{' leader'};                  return $leader;
495          }          }
496  }  }
497    
# Line 848  syntaxtic sugar for Line 882  syntaxtic sugar for
882    @v = rec('200')    @v = rec('200')
883    @v = rec('200','a')    @v = rec('200','a')
884    
885    If rec() returns just single value, it will
886    return scalar, not array.
887    
888  =cut  =cut
889    
890  sub rec {  sub rec {
# Line 857  sub rec { Line 894  sub rec {
894          } elsif ($#_ == 1) {          } elsif ($#_ == 1) {
895                  @out = rec2(@_);                  @out = rec2(@_);
896          }          }
897          if (@out) {          if ($#out == 0 && ! wantarray) {
898                    return $out[0];
899            } elsif (@out) {
900                  return @out;                  return @out;
901          } else {          } else {
902                  return '';                  return '';
# Line 941  sub first { Line 980  sub first {
980    
981  Consult lookup hashes for some value  Consult lookup hashes for some value
982    
983    @v = lookup( $v );    @v = lookup(
984    @v = lookup( @v );          sub {
985                    'ffkk/peri/mfn'.rec('000')
986            },
987            'ffkk','peri','200-a-200-e',
988            sub {
989                    first(rec(200,'a')).' '.first(rec('200','e'))
990            }
991      );
992    
993    Code like above will be B<automatically generated> using L<WebPAC::Parse> from
994    normal lookup definition in C<conf/lookup/something.pl> which looks like:
995    
996      lookup(
997            # which results to return from record recorded in lookup
998            sub { 'ffkk/peri/mfn' . rec('000') },
999            # from which database and input
1000            'ffkk','peri',
1001            # such that following values match
1002            sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1003            # if this part is missing, we will try to match same fields
1004            # from lookup record and current one, or you can override
1005            # which records to use from current record using
1006            sub { rec('900','x') . ' ' . rec('900','y') },
1007      )
1008    
1009    You can think about this lookup as SQL (if that helps):
1010    
1011      select
1012            sub { what }
1013      from
1014            database, input
1015      where
1016        sub { filter from lookuped record }
1017      having
1018        sub { optional filter on current record }
1019    
1020  FIXME B<currently this one is broken!>  Easy as pie, right?
1021    
1022  =cut  =cut
1023    
1024  sub lookup {  sub lookup {
1025          my $k = shift or return;          my ($what, $database, $input, $key, $having) = @_;
1026          return unless (defined($lookup->{$k}));  
1027          if (ref($lookup->{$k}) eq 'ARRAY') {          confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1028                  return @{ $lookup->{$k} };  
1029            warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1030            return unless (defined($lookup->{$database}->{$input}->{$key}));
1031    
1032            confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1033    
1034            my $mfns;
1035            my @having = $having->();
1036    
1037            warn "## having = ", dump( @having ) if ($debug > 2);
1038    
1039            foreach my $h ( @having ) {
1040                    if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1041                            warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1042                            $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1043                    }
1044            }
1045    
1046            return unless ($mfns);
1047    
1048            my @mfns = sort keys %$mfns;
1049    
1050            warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1051    
1052            my $old_rec = $rec;
1053            my @out;
1054    
1055            foreach my $mfn (@mfns) {
1056                    $rec = $load_row_coderef->( $database, $input, $mfn );
1057    
1058                    warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1059    
1060                    my @vals = $what->();
1061    
1062                    push @out, ( @vals );
1063    
1064                    warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1065            }
1066    
1067    #       if (ref($lookup->{$k}) eq 'ARRAY') {
1068    #               return @{ $lookup->{$k} };
1069    #       } else {
1070    #               return $lookup->{$k};
1071    #       }
1072    
1073            $rec = $old_rec;
1074    
1075            warn "## lookup returns = ", dump(@out), $/ if ($debug);
1076    
1077            if ($#out == 0) {
1078                    return $out[0];
1079          } else {          } else {
1080                  return $lookup->{$k};                  return @out;
1081          }          }
1082  }  }
1083    
1084  =head2 save_into_lookup  =head2 save_into_lookup
1085    
1086  Save value into lookup.  Save value into lookup. It associates current database, input
1087    and specific keys with one or more values which will be
1088    associated over MFN.
1089    
1090    save_into_lookup($key,sub {  MFN will be extracted from first occurence current of field 000
1091    in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1092    
1093      my $nr = save_into_lookup($database,$input,$key,sub {
1094          # code which produce one or more values          # code which produce one or more values
1095    });    });
1096    
1097  This function shouldn't be called directly, it's called from code created by L<WebPAC::Parser>.  It returns number of items saved.
1098    
1099    This function shouldn't be called directly, it's called from code created by
1100    L<WebPAC::Parser>.
1101    
1102  =cut  =cut
1103    
1104  sub save_into_lookup {  sub save_into_lookup {
1105          my ($k,$coderef) = @_;          my ($database,$input,$key,$coderef) = @_;
1106          die "save_into_lookup needs key" unless defined($k);          die "save_into_lookup needs database" unless defined($database);
1107            die "save_into_lookup needs input" unless defined($input);
1108            die "save_into_lookup needs key" unless defined($key);
1109          die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );          die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1110          my $mfn = $rec->{'000'}->[0] || die "mfn not defined or zero";  
1111            warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1112    
1113            my $mfn =
1114                    defined($rec->{'000'}->[0])     ?       $rec->{'000'}->[0]      :
1115                    defined($config->{_mfn})        ?       $config->{_mfn}         :
1116                                                                                    die "mfn not defined or zero";
1117    
1118            my $nr = 0;
1119    
1120          foreach my $v ( $coderef->() ) {          foreach my $v ( $coderef->() ) {
1121                  $lookup->{$k}->{$v}->{$mfn}++;                  $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1122                  warn "# lookup $k $v $mfn saved\n";     # if ($debug > 1);                  warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1123                    $nr++;
1124          }          }
1125    
1126            return $nr;
1127  }  }
1128    
1129  =head2 config  =head2 config
# Line 1105  sub split_rec_on { Line 1250  sub split_rec_on {
1250          }          }
1251  }  }
1252    
1253    my $hash;
1254    
1255    =head2 set
1256    
1257      set( key => 'value' );
1258    
1259    =cut
1260    
1261    sub set {
1262            my ($k,$v) = @_;
1263            warn "## set ( $k => ", dump($v), " )", $/;
1264            $hash->{$k} = $v;
1265    };
1266    
1267    =head2 get
1268    
1269      get( 'key' );
1270    
1271    =cut
1272    
1273    sub get {
1274            my $k = shift || return;
1275            my $v = $hash->{$k};
1276            warn "## get $k = ", dump( $v ), $/;
1277            return $v;
1278    }
1279    
1280    
1281  # END  # END
1282  1;  1;

Legend:
Removed from v.707  
changed lines
  Added in v.785

  ViewVC Help
Powered by ViewVC 1.1.26