/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 661 by dpavlin, Fri Sep 8 17:47:58 2006 UTC revision 785 by dpavlin, Wed Dec 6 23:44:36 2006 UTC
# Line 2  package WebPAC::Normalize; Line 2  package WebPAC::Normalize;
2  use Exporter 'import';  use Exporter 'import';
3  @EXPORT = qw/  @EXPORT = qw/
4          _set_rec _set_lookup          _set_rec _set_lookup
5            _set_load_row
6          _get_ds _clean_ds          _get_ds _clean_ds
7          _debug          _debug
8          _pack_subfields_hash          _pack_subfields_hash
# Line 15  use Exporter 'import'; Line 16  use Exporter 'import';
16          rec1 rec2 rec          rec1 rec2 rec
17          regex prefix suffix surround          regex prefix suffix surround
18          first lookup join_with          first lookup join_with
19            save_into_lookup
20    
21          split_rec_on          split_rec_on
22    
23            get set
24  /;  /;
25    
26  use warnings;  use warnings;
# Line 25  use strict; Line 29  use strict;
29  #use base qw/WebPAC::Common/;  #use base qw/WebPAC::Common/;
30  use Data::Dump qw/dump/;  use Data::Dump qw/dump/;
31  use Storable qw/dclone/;  use Storable qw/dclone/;
32    use Carp qw/confess/;
33    
34  # debugging warn(s)  # debugging warn(s)
35  my $debug = 0;  my $debug = 0;
# Line 36  WebPAC::Normalize - describe normalisato Line 41  WebPAC::Normalize - describe normalisato
41    
42  =head1 VERSION  =head1 VERSION
43    
44  Version 0.18  Version 0.24
45    
46  =cut  =cut
47    
48  our $VERSION = '0.18';  our $VERSION = '0.24';
49    
50  =head1 SYNOPSIS  =head1 SYNOPSIS
51    
# Line 66  All other functions are available for us Line 71  All other functions are available for us
71  Return data structure  Return data structure
72    
73    my $ds = WebPAC::Normalize::data_structure(    my $ds = WebPAC::Normalize::data_structure(
74          lookup => $lookup->lookup_hash,          lookup => $lookup_hash,
75          row => $row,          row => $row,
76          rules => $normalize_pl_config,          rules => $normalize_pl_config,
77          marc_encoding => 'utf-8',          marc_encoding => 'utf-8',
78          config => $config,          config => $config,
79            load_row_coderef => sub {
80                    my ($database,$input,$mfn) = shift;
81                    $store->load_row( database => $database, input => $input, id => $mfn );
82            },
83    );    );
84    
85  Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all  Options C<row>, C<rules> and C<log> are mandatory while all
86  other are optional.  other are optional.
87    
88    C<load_row_coderef> is closure only used when executing lookups, so they will
89    die if it's not defined.
90    
91  This function will B<die> if normalizastion can't be evaled.  This function will B<die> if normalizastion can't be evaled.
92    
93  Since this function isn't exported you have to call it with  Since this function isn't exported you have to call it with
# Line 83  C<WebPAC::Normalize::data_structure>. Line 95  C<WebPAC::Normalize::data_structure>.
95    
96  =cut  =cut
97    
98    my $load_row_coderef;
99    
100  sub data_structure {  sub data_structure {
101          my $arg = {@_};          my $arg = {@_};
102    
# Line 90  sub data_structure { Line 104  sub data_structure {
104          die "need normalisation argument" unless ($arg->{rules});          die "need normalisation argument" unless ($arg->{rules});
105    
106          no strict 'subs';          no strict 'subs';
107          _set_lookup( $arg->{lookup} );          _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
108          _set_rec( $arg->{row} );          _set_rec( $arg->{row} );
109          _set_config( $arg->{config} );          _set_config( $arg->{config} ) if defined($arg->{config});
110          _clean_ds( %{ $arg } );          _clean_ds( %{ $arg } );
111            $load_row_coderef = $arg->{load_row_coderef};
112    
113          eval "$arg->{rules}";          eval "$arg->{rules}";
114          die "error evaling $arg->{rules}: $@\n" if ($@);          die "error evaling $arg->{rules}: $@\n" if ($@);
115    
# Line 150  Return hash formatted as data structure Line 166  Return hash formatted as data structure
166    
167  =cut  =cut
168    
169  my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);  my ($out, $marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $leader);
170  my ($marc_record_offset, $marc_fetch_offset) = (0, 0);  my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
171    
172  sub _get_ds {  sub _get_ds {
# Line 167  Clean data structure hash for next recor Line 183  Clean data structure hash for next recor
183    
184  sub _clean_ds {  sub _clean_ds {
185          my $a = {@_};          my $a = {@_};
186          ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();          ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators, $leader) = ();
187          ($marc_record_offset, $marc_fetch_offset) = (0,0);          ($marc_record_offset, $marc_fetch_offset) = (0,0);
188          $marc_encoding = $a->{marc_encoding};          $marc_encoding = $a->{marc_encoding};
189  }  }
# Line 186  sub _set_lookup { Line 202  sub _set_lookup {
202          $lookup = shift;          $lookup = shift;
203  }  }
204    
205    =head2 _get_lookup
206    
207    Get current lookup hash
208    
209      my $lookup = _get_lookup();
210    
211    =cut
212    
213    sub _get_lookup {
214            return $lookup;
215    }
216    
217    =head2 _set_load_row
218    
219    Setup code reference which will return L<data_structure> from
220    L<WebPAC::Store>
221    
222      _set_load_row(sub {
223                    my ($database,$input,$mfn) = @_;
224                    $store->load_row( database => $database, input => $input, id => $mfn );
225      });
226    
227    =cut
228    
229    sub _set_load_row {
230            my $coderef = shift;
231            confess "argument isn't CODE" unless ref($coderef) eq 'CODE';
232    
233            $load_row_coderef = $coderef;
234    }
235    
236  =head2 _get_marc_fields  =head2 _get_marc_fields
237    
238  Get all fields defined by calls to C<marc>  Get all fields defined by calls to C<marc>
# Line 442  sub marc_leader { Line 489  sub marc_leader {
489          my ($offset,$value) = @_;          my ($offset,$value) = @_;
490    
491          if ($offset) {          if ($offset) {
492                  $out->{' leader'}->{ $offset } = $value;                  $leader->{ $offset } = $value;
493          } else {          } else {
494                  return $out->{' leader'};                  return $leader;
495          }          }
496  }  }
497    
# Line 722  above. Line 769  above.
769    
770  =head2 _pack_subfields_hash  =head2 _pack_subfields_hash
771    
772   @values = _pack_subfields_hash( $h, $include_subfields )   @subfields = _pack_subfields_hash( $h );
773     $subfields = _pack_subfields_hash( $h, 1 );
774    
775    Return each subfield value in array or pack them all together and return scalar
776    with subfields (denoted by C<^>) and values.
777    
778  =cut  =cut
779    
# Line 732  sub _pack_subfields_hash { Line 783  sub _pack_subfields_hash {
783    
784          my ($h,$include_subfields) = @_;          my ($h,$include_subfields) = @_;
785    
   
786          if ( defined($h->{subfields}) ) {          if ( defined($h->{subfields}) ) {
787                  my $sfs = delete $h->{subfields} || die "no subfields?";                  my $sfs = delete $h->{subfields} || die "no subfields?";
788                  my @out;                  my @out;
# Line 742  sub _pack_subfields_hash { Line 792  sub _pack_subfields_hash {
792                          my $o = shift @$sfs;                          my $o = shift @$sfs;
793                          if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {                          if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
794                                  # single element subfields are not arrays                                  # single element subfields are not arrays
795    #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
796    
797                                  push @out, $h->{$sf};                                  push @out, $h->{$sf};
798                          } else {                          } else {
799  #warn "====> $f $sf $o $#$sfs ", dump( $sfs ), "\n";  #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
800                                  push @out, $h->{$sf}->[$o];                                  push @out, $h->{$sf}->[$o];
801                          }                          }
802                  }                  }
803                  return @out;                  if ($include_subfields) {
804                            return join('', @out);
805                    } else {
806                            return @out;
807                    }
808          } else {          } else {
809                  # FIXME this should probably be in alphabetical order instead of hash order                  if ($include_subfields) {
810                  values %{$h};                          my $out = '';
811                            foreach my $sf (sort keys %$h) {
812                                    if (ref($h->{$sf}) eq 'ARRAY') {
813                                            $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
814                                    } else {
815                                            $out .= '^' . $sf . $h->{$sf};
816                                    }
817                            }
818                            return $out;
819                    } else {
820                            # FIXME this should probably be in alphabetical order instead of hash order
821                            values %{$h};
822                    }
823          }          }
824  }  }
825    
# Line 814  syntaxtic sugar for Line 882  syntaxtic sugar for
882    @v = rec('200')    @v = rec('200')
883    @v = rec('200','a')    @v = rec('200','a')
884    
885    If rec() returns just single value, it will
886    return scalar, not array.
887    
888  =cut  =cut
889    
890  sub rec {  sub rec {
# Line 823  sub rec { Line 894  sub rec {
894          } elsif ($#_ == 1) {          } elsif ($#_ == 1) {
895                  @out = rec2(@_);                  @out = rec2(@_);
896          }          }
897          if (@out) {          if ($#out == 0 && ! wantarray) {
898                    return $out[0];
899            } elsif (@out) {
900                  return @out;                  return @out;
901          } else {          } else {
902                  return '';                  return '';
# Line 907  sub first { Line 980  sub first {
980    
981  Consult lookup hashes for some value  Consult lookup hashes for some value
982    
983    @v = lookup( $v );    @v = lookup(
984    @v = lookup( @v );          sub {
985                    'ffkk/peri/mfn'.rec('000')
986            },
987            'ffkk','peri','200-a-200-e',
988            sub {
989                    first(rec(200,'a')).' '.first(rec('200','e'))
990            }
991      );
992    
993    Code like above will be B<automatically generated> using L<WebPAC::Parse> from
994    normal lookup definition in C<conf/lookup/something.pl> which looks like:
995    
996      lookup(
997            # which results to return from record recorded in lookup
998            sub { 'ffkk/peri/mfn' . rec('000') },
999            # from which database and input
1000            'ffkk','peri',
1001            # such that following values match
1002            sub { first(rec(200,'a')) . ' ' . first(rec('200','e')) },
1003            # if this part is missing, we will try to match same fields
1004            # from lookup record and current one, or you can override
1005            # which records to use from current record using
1006            sub { rec('900','x') . ' ' . rec('900','y') },
1007      )
1008    
1009    You can think about this lookup as SQL (if that helps):
1010    
1011      select
1012            sub { what }
1013      from
1014            database, input
1015      where
1016        sub { filter from lookuped record }
1017      having
1018        sub { optional filter on current record }
1019    
1020    Easy as pie, right?
1021    
1022  =cut  =cut
1023    
1024  sub lookup {  sub lookup {
1025          my $k = shift or return;          my ($what, $database, $input, $key, $having) = @_;
1026          return unless (defined($lookup->{$k}));  
1027          if (ref($lookup->{$k}) eq 'ARRAY') {          confess "lookup needs 5 arguments: what, database, input, key, having\n" unless ($#_ == 4);
1028                  return @{ $lookup->{$k} };  
1029            warn "## lookup ($database, $input, $key)", $/ if ($debug > 1);
1030            return unless (defined($lookup->{$database}->{$input}->{$key}));
1031    
1032            confess "lookup really need load_row_coderef added to data_structure\n" unless ($load_row_coderef);
1033    
1034            my $mfns;
1035            my @having = $having->();
1036    
1037            warn "## having = ", dump( @having ) if ($debug > 2);
1038    
1039            foreach my $h ( @having ) {
1040                    if (defined($lookup->{$database}->{$input}->{$key}->{$h})) {
1041                            warn "lookup for $database/$input/$key/$h return ",dump($lookup->{$database}->{$input}->{$key}->{$h}),"\n" if ($debug);
1042                            $mfns->{$_}++ foreach keys %{ $lookup->{$database}->{$input}->{$key}->{$h} };
1043                    }
1044            }
1045    
1046            return unless ($mfns);
1047    
1048            my @mfns = sort keys %$mfns;
1049    
1050            warn "# lookup loading $database/$input/$key mfn ", join(",",@mfns)," having ",dump(@having),"\n" if ($debug);
1051    
1052            my $old_rec = $rec;
1053            my @out;
1054    
1055            foreach my $mfn (@mfns) {
1056                    $rec = $load_row_coderef->( $database, $input, $mfn );
1057    
1058                    warn "got $database/$input/$mfn = ", dump($rec), $/ if ($debug);
1059    
1060                    my @vals = $what->();
1061    
1062                    push @out, ( @vals );
1063    
1064                    warn "lookup for mfn $mfn returned ", dump(@vals), $/ if ($debug);
1065            }
1066    
1067    #       if (ref($lookup->{$k}) eq 'ARRAY') {
1068    #               return @{ $lookup->{$k} };
1069    #       } else {
1070    #               return $lookup->{$k};
1071    #       }
1072    
1073            $rec = $old_rec;
1074    
1075            warn "## lookup returns = ", dump(@out), $/ if ($debug);
1076    
1077            if ($#out == 0) {
1078                    return $out[0];
1079          } else {          } else {
1080                  return $lookup->{$k};                  return @out;
1081            }
1082    }
1083    
1084    =head2 save_into_lookup
1085    
1086    Save value into lookup. It associates current database, input
1087    and specific keys with one or more values which will be
1088    associated over MFN.
1089    
1090    MFN will be extracted from first occurence current of field 000
1091    in current record, or if it doesn't exist from L<_set_config> C<_mfn>.
1092    
1093      my $nr = save_into_lookup($database,$input,$key,sub {
1094            # code which produce one or more values
1095      });
1096    
1097    It returns number of items saved.
1098    
1099    This function shouldn't be called directly, it's called from code created by
1100    L<WebPAC::Parser>.
1101    
1102    =cut
1103    
1104    sub save_into_lookup {
1105            my ($database,$input,$key,$coderef) = @_;
1106            die "save_into_lookup needs database" unless defined($database);
1107            die "save_into_lookup needs input" unless defined($input);
1108            die "save_into_lookup needs key" unless defined($key);
1109            die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
1110    
1111            warn "## save_into_lookup rec = ", dump($rec), " config = ", dump($config), $/ if ($debug > 2);
1112    
1113            my $mfn =
1114                    defined($rec->{'000'}->[0])     ?       $rec->{'000'}->[0]      :
1115                    defined($config->{_mfn})        ?       $config->{_mfn}         :
1116                                                                                    die "mfn not defined or zero";
1117    
1118            my $nr = 0;
1119    
1120            foreach my $v ( $coderef->() ) {
1121                    $lookup->{$database}->{$input}->{$key}->{$v}->{$mfn}++;
1122                    warn "# saved lookup $database/$input/$key [$v] $mfn\n" if ($debug > 1);
1123                    $nr++;
1124          }          }
1125    
1126            return $nr;
1127  }  }
1128    
1129  =head2 config  =head2 config
# Line 1046  sub split_rec_on { Line 1250  sub split_rec_on {
1250          }          }
1251  }  }
1252    
1253    my $hash;
1254    
1255    =head2 set
1256    
1257      set( key => 'value' );
1258    
1259    =cut
1260    
1261    sub set {
1262            my ($k,$v) = @_;
1263            warn "## set ( $k => ", dump($v), " )", $/;
1264            $hash->{$k} = $v;
1265    };
1266    
1267    =head2 get
1268    
1269      get( 'key' );
1270    
1271    =cut
1272    
1273    sub get {
1274            my $k = shift || return;
1275            my $v = $hash->{$k};
1276            warn "## get $k = ", dump( $v ), $/;
1277            return $v;
1278    }
1279    
1280    
1281  # END  # END
1282  1;  1;

Legend:
Removed from v.661  
changed lines
  Added in v.785

  ViewVC Help
Powered by ViewVC 1.1.26