/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 889 by dpavlin, Thu Sep 6 19:12:15 2007 UTC revision 1021 by dpavlin, Sat Nov 10 11:11:16 2007 UTC
# Line 1  Line 1 
1  package WebPAC::Normalize;  package WebPAC::Normalize;
2  use Exporter 'import';  use Exporter 'import';
3  @EXPORT = qw/  our @EXPORT = qw/
4          _set_rec _set_lookup          _set_ds _set_lookup
5          _set_load_row          _set_load_row
6          _get_ds _clean_ds          _get_ds _clean_ds
7          _debug          _debug
8          _pack_subfields_hash          _pack_subfields_hash
9    
10          tag search display          search_display search display sorted
11    
12          marc marc_indicators marc_repeatable_subfield          marc marc_indicators marc_repeatable_subfield
13          marc_compose marc_leader marc_fixed          marc_compose marc_leader marc_fixed
14          marc_duplicate marc_remove marc_count          marc_duplicate marc_remove marc_count
15          marc_original_order          marc_original_order
16            marc_template
17    
18          rec1 rec2 rec          rec1 rec2 rec
19            frec frec_eq frec_ne
20          regex prefix suffix surround          regex prefix suffix surround
21          first lookup join_with          first lookup join_with
22          save_into_lookup          save_into_lookup
# Line 22  use Exporter 'import'; Line 25  use Exporter 'import';
25    
26          get set          get set
27          count          count
28    
29  /;  /;
30    
31  use warnings;  use warnings;
# Line 35  use Carp qw/confess/; Line 39  use Carp qw/confess/;
39  # debugging warn(s)  # debugging warn(s)
40  my $debug = 0;  my $debug = 0;
41    
42    # FIXME
43    use WebPAC::Normalize::ISBN;
44    push @EXPORT, ( 'isbn_10', 'isbn_13' );
45    
46    use WebPAC::Normalize::MARC;
47    push @EXPORT, ( 'marc_template' );
48    
49  =head1 NAME  =head1 NAME
50    
51  WebPAC::Normalize - describe normalisaton rules using sets  WebPAC::Normalize - describe normalisaton rules using sets
52    
 =head1 VERSION  
   
 Version 0.29  
   
53  =cut  =cut
54    
55  our $VERSION = '0.29';  our $VERSION = '0.35';
56    
57  =head1 SYNOPSIS  =head1 SYNOPSIS
58    
# Line 59  means that you check it's validity befor Line 65  means that you check it's validity befor
65  C<perl -c normalize.pl>.  C<perl -c normalize.pl>.
66    
67  Normalisation can generate multiple output normalized data. For now, supported output  Normalisation can generate multiple output normalized data. For now, supported output
68  types (on the left side of definition) are: C<tag>, C<display>, C<search> and  types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
69  C<marc>.  C<marc>.
70    
71  =head1 FUNCTIONS  =head1 FUNCTIONS
# Line 78  Return data structure Line 84  Return data structure
84          marc_encoding => 'utf-8',          marc_encoding => 'utf-8',
85          config => $config,          config => $config,
86          load_row_coderef => sub {          load_row_coderef => sub {
87                  my ($database,$input,$mfn) = shift;                  my ($database,$input,$mfn) = @_;
88                  $store->load_row( database => $database, input => $input, id => $mfn );                  $store->load_row( database => $database, input => $input, id => $mfn );
89          },          },
90    );    );
# Line 104  sub data_structure { Line 110  sub data_structure {
110          die "need row argument" unless ($arg->{row});          die "need row argument" unless ($arg->{row});
111          die "need normalisation argument" unless ($arg->{rules});          die "need normalisation argument" unless ($arg->{rules});
112    
         no strict 'subs';  
113          _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});          _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
114          _set_rec( $arg->{row} );          _set_ds( $arg->{row} );
115          _set_config( $arg->{config} ) if defined($arg->{config});          _set_config( $arg->{config} ) if defined($arg->{config});
116          _clean_ds( %{ $arg } );          _clean_ds( %{ $arg } );
117          $load_row_coderef = $arg->{load_row_coderef};          $load_row_coderef = $arg->{load_row_coderef};
118    
119          eval "$arg->{rules}";          no strict 'subs';
120            no warnings 'redefine';
121            eval "$arg->{rules};";
122          die "error evaling $arg->{rules}: $@\n" if ($@);          die "error evaling $arg->{rules}: $@\n" if ($@);
123    
124          return _get_ds();          return _get_ds();
125  }  }
126    
127  =head2 _set_rec  =head2 _set_ds
128    
129  Set current record hash  Set current record hash
130    
131    _set_rec( $rec );    _set_ds( $rec );
132    
133  =cut  =cut
134    
135  my $rec;  my $rec;
136    
137  sub _set_rec {  sub _set_ds {
138          $rec = shift or die "no record hash";          $rec = shift or die "no record hash";
139  }  }
140    
141    =head2
142    
143      my $rec = _get_rec();
144    
145    =cut
146    
147    sub _get_rec { $rec };
148    
149  =head2 _set_config  =head2 _set_config
150    
151  Set current config hash  Set current config hash
# Line 171  my ($out, $marc_record, $marc_encoding, Line 186  my ($out, $marc_record, $marc_encoding,
186  my ($marc_record_offset, $marc_fetch_offset) = (0, 0);  my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
187    
188  sub _get_ds {  sub _get_ds {
189    #warn "## out = ",dump($out);
190          return $out;          return $out;
191  }  }
192    
# Line 442  sub _debug { Line 458  sub _debug {
458    
459  Those functions generally have to first in your normalization file.  Those functions generally have to first in your normalization file.
460    
461  =head2 tag  =head2 search_display
462    
463  Define new tag for I<search> and I<display>.  Define output for L<search> and L<display> at the same time
464    
465    tag('Title', rec('200','a') );    search_display('Title', rec('200','a') );
466    
467    
468  =cut  =cut
469    
470  sub tag {  sub search_display {
471          my $name = shift or die "tag needs name as first argument";          my $name = shift or die "search_display needs name as first argument";
472          my @o = grep { defined($_) && $_ ne '' } @_;          my @o = grep { defined($_) && $_ ne '' } @_;
473          return unless (@o);          return unless (@o);
         $out->{$name}->{tag} = $name;  
474          $out->{$name}->{search} = \@o;          $out->{$name}->{search} = \@o;
475          $out->{$name}->{display} = \@o;          $out->{$name}->{display} = \@o;
476  }  }
477    
478    =head2 tag
479    
480    Old name for L<search_display>, but supported
481    
482    =cut
483    
484    sub tag {
485            search_display( @_ );
486    }
487    
488  =head2 display  =head2 display
489    
490  Define tag just for I<display>  Define output just for I<display>
491    
492    @v = display('Title', rec('200','a') );    @v = display('Title', rec('200','a') );
493    
494  =cut  =cut
495    
496  sub display {  sub _field {
497          my $name = shift or die "display needs name as first argument";          my $type = shift or confess "need type -- BUG?";
498            my $name = shift or confess "needs name as first argument";
499          my @o = grep { defined($_) && $_ ne '' } @_;          my @o = grep { defined($_) && $_ ne '' } @_;
500          return unless (@o);          return unless (@o);
501          $out->{$name}->{tag} = $name;          $out->{$name}->{$type} = \@o;
         $out->{$name}->{display} = \@o;  
502  }  }
503    
504    sub display { _field( 'display', @_ ) }
505    
506  =head2 search  =head2 search
507    
508  Prepare values just for I<search>  Prepare values just for I<search>
# Line 484  Prepare values just for I<search> Line 511  Prepare values just for I<search>
511    
512  =cut  =cut
513    
514  sub search {  sub search { _field( 'search', @_ ) }
515          my $name = shift or die "search needs name as first argument";  
516          my @o = grep { defined($_) && $_ ne '' } @_;  =head2 sorted
517          return unless (@o);  
518          $out->{$name}->{tag} = $name;  Insert into lists which will be automatically sorted
519          $out->{$name}->{search} = \@o;  
520  }   sorted('Title', rec('200','a') );
521    
522    =cut
523    
524    sub sorted { _field( 'sorted', @_ ) }
525    
526    
527  =head2 marc_leader  =head2 marc_leader
528    
# Line 795  sub marc_original_order { Line 827  sub marc_original_order {
827          return unless defined($rec->{$from});          return unless defined($rec->{$from});
828    
829          my $r = $rec->{$from};          my $r = $rec->{$from};
830          die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');          die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
831    
832          my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');          my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
833          warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);          warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
# Line 840  sub marc_original_order { Line 872  sub marc_original_order {
872          warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);          warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
873  }  }
874    
875    
876  =head2 marc_count  =head2 marc_count
877    
878  Return number of MARC records created using L</marc_duplicate>.  Return number of MARC records created using L</marc_duplicate>.
# Line 852  sub marc_count { Line 885  sub marc_count {
885          return $#{ $marc_record };          return $#{ $marc_record };
886  }  }
887    
888    =head2 _marc_push
889    
890     _marc_push( $marc );
891    
892    =cut
893    
894    sub _marc_push {
895            my $marc = shift || die "no marc?";
896            push @{ $marc_record->[ $marc_record_offset ] }, $marc;
897    }
898    
899    
900  =head1 Functions to extract data from input  =head1 Functions to extract data from input
901    
# Line 997  sub rec { Line 1041  sub rec {
1041          }          }
1042  }  }
1043    
1044    =head2 frec
1045    
1046    Returns first value from field
1047    
1048      $v = frec('200');
1049      $v = frec('200','a');
1050    
1051    =cut
1052    
1053    sub frec {
1054            my @out = rec(@_);
1055            warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
1056            return shift @out;
1057    }
1058    
1059    =head2 frec_eq
1060    
1061    =head2 frec_ne
1062    
1063    Check if first values from two fields are same or different
1064    
1065      if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
1066            # values are same
1067      } else {
1068        # values are different
1069      }
1070    
1071    Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
1072    could write something like:
1073    
1074      if ( frec( '900','a' ) eq frec( '910','c' ) ) {
1075            # yada tada
1076      }
1077    
1078    but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
1079    in order to parse text and create invalid function C<eqfrec>.
1080    
1081    =cut
1082    
1083    sub frec_eq {
1084            my ( $f1,$sf1, $f2, $sf2 ) = @_;
1085            return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
1086    }
1087    
1088    sub frec_ne {
1089            return ! frec_eq( @_ );
1090    }
1091    
1092  =head2 regex  =head2 regex
1093    
1094  Apply regex to some or all values  Apply regex to some or all values
# Line 1232  Consult config values stored in C<config Line 1324  Consult config values stored in C<config
1324    $database_code = config();    # use _ from hash    $database_code = config();    # use _ from hash
1325    $database_name = config('name');    $database_name = config('name');
1326    $database_input_name = config('input name');    $database_input_name = config('input name');
   $tag = config('input normalize tag');  
1327    
1328  Up to three levels are supported.  Up to three levels are supported.
1329    

Legend:
Removed from v.889  
changed lines
  Added in v.1021

  ViewVC Help
Powered by ViewVC 1.1.26