/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 980 by dpavlin, Sat Nov 3 13:33:19 2007 UTC revision 1021 by dpavlin, Sat Nov 10 11:11:16 2007 UTC
# Line 1  Line 1 
1  package WebPAC::Normalize;  package WebPAC::Normalize;
2  use Exporter 'import';  use Exporter 'import';
3  our @EXPORT = qw/  our @EXPORT = qw/
4          _set_rec _set_lookup          _set_ds _set_lookup
5          _set_load_row          _set_load_row
6          _get_ds _clean_ds          _get_ds _clean_ds
7          _debug          _debug
# Line 13  our @EXPORT = qw/ Line 13  our @EXPORT = qw/
13          marc_compose marc_leader marc_fixed          marc_compose marc_leader marc_fixed
14          marc_duplicate marc_remove marc_count          marc_duplicate marc_remove marc_count
15          marc_original_order          marc_original_order
16            marc_template
17    
18          rec1 rec2 rec          rec1 rec2 rec
19          frec          frec frec_eq frec_ne
20          regex prefix suffix surround          regex prefix suffix surround
21          first lookup join_with          first lookup join_with
22          save_into_lookup          save_into_lookup
# Line 38  use Carp qw/confess/; Line 39  use Carp qw/confess/;
39  # debugging warn(s)  # debugging warn(s)
40  my $debug = 0;  my $debug = 0;
41    
42    # FIXME
43  use WebPAC::Normalize::ISBN;  use WebPAC::Normalize::ISBN;
44  push @EXPORT, ( 'isbn_10', 'isbn_13' );  push @EXPORT, ( 'isbn_10', 'isbn_13' );
45    
46    use WebPAC::Normalize::MARC;
47    push @EXPORT, ( 'marc_template' );
48    
49  =head1 NAME  =head1 NAME
50    
51  WebPAC::Normalize - describe normalisaton rules using sets  WebPAC::Normalize - describe normalisaton rules using sets
52    
53  =cut  =cut
54    
55  our $VERSION = '0.31';  our $VERSION = '0.35';
56    
57  =head1 SYNOPSIS  =head1 SYNOPSIS
58    
# Line 105  sub data_structure { Line 110  sub data_structure {
110          die "need row argument" unless ($arg->{row});          die "need row argument" unless ($arg->{row});
111          die "need normalisation argument" unless ($arg->{rules});          die "need normalisation argument" unless ($arg->{rules});
112    
         no strict 'subs';  
113          _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});          _set_lookup( $arg->{lookup} ) if defined($arg->{lookup});
114          _set_rec( $arg->{row} );          _set_ds( $arg->{row} );
115          _set_config( $arg->{config} ) if defined($arg->{config});          _set_config( $arg->{config} ) if defined($arg->{config});
116          _clean_ds( %{ $arg } );          _clean_ds( %{ $arg } );
117          $load_row_coderef = $arg->{load_row_coderef};          $load_row_coderef = $arg->{load_row_coderef};
118    
119          eval "$arg->{rules}";          no strict 'subs';
120            no warnings 'redefine';
121            eval "$arg->{rules};";
122          die "error evaling $arg->{rules}: $@\n" if ($@);          die "error evaling $arg->{rules}: $@\n" if ($@);
123    
124          return _get_ds();          return _get_ds();
125  }  }
126    
127  =head2 _set_rec  =head2 _set_ds
128    
129  Set current record hash  Set current record hash
130    
131    _set_rec( $rec );    _set_ds( $rec );
132    
133  =cut  =cut
134    
135  my $rec;  my $rec;
136    
137  sub _set_rec {  sub _set_ds {
138          $rec = shift or die "no record hash";          $rec = shift or die "no record hash";
139  }  }
140    
141    =head2
142    
143      my $rec = _get_rec();
144    
145    =cut
146    
147    sub _get_rec { $rec };
148    
149  =head2 _set_config  =head2 _set_config
150    
151  Set current config hash  Set current config hash
# Line 172  my ($out, $marc_record, $marc_encoding, Line 186  my ($out, $marc_record, $marc_encoding,
186  my ($marc_record_offset, $marc_fetch_offset) = (0, 0);  my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
187    
188  sub _get_ds {  sub _get_ds {
189    #warn "## out = ",dump($out);
 warn "## out = ",dump($out);  
   
190          return $out;          return $out;
191  }  }
192    
# Line 815  sub marc_original_order { Line 827  sub marc_original_order {
827          return unless defined($rec->{$from});          return unless defined($rec->{$from});
828    
829          my $r = $rec->{$from};          my $r = $rec->{$from};
830          die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');          die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
831    
832          my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');          my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
833          warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);          warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
# Line 860  sub marc_original_order { Line 872  sub marc_original_order {
872          warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);          warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
873  }  }
874    
875    
876  =head2 marc_count  =head2 marc_count
877    
878  Return number of MARC records created using L</marc_duplicate>.  Return number of MARC records created using L</marc_duplicate>.
# Line 872  sub marc_count { Line 885  sub marc_count {
885          return $#{ $marc_record };          return $#{ $marc_record };
886  }  }
887    
888    =head2 _marc_push
889    
890     _marc_push( $marc );
891    
892    =cut
893    
894    sub _marc_push {
895            my $marc = shift || die "no marc?";
896            push @{ $marc_record->[ $marc_record_offset ] }, $marc;
897    }
898    
899    
900  =head1 Functions to extract data from input  =head1 Functions to extract data from input
901    
# Line 1001  return scalar, not array. Line 1025  return scalar, not array.
1025    
1026  =cut  =cut
1027    
 sub frec {  
         my @out = rec(@_);  
         warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;  
         return shift @out;  
 }  
   
1028  sub rec {  sub rec {
1029          my @out;          my @out;
1030          if ($#_ == 0) {          if ($#_ == 0) {
# Line 1023  sub rec { Line 1041  sub rec {
1041          }          }
1042  }  }
1043    
1044    =head2 frec
1045    
1046    Returns first value from field
1047    
1048      $v = frec('200');
1049      $v = frec('200','a');
1050    
1051    =cut
1052    
1053    sub frec {
1054            my @out = rec(@_);
1055            warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
1056            return shift @out;
1057    }
1058    
1059    =head2 frec_eq
1060    
1061    =head2 frec_ne
1062    
1063    Check if first values from two fields are same or different
1064    
1065      if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
1066            # values are same
1067      } else {
1068        # values are different
1069      }
1070    
1071    Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
1072    could write something like:
1073    
1074      if ( frec( '900','a' ) eq frec( '910','c' ) ) {
1075            # yada tada
1076      }
1077    
1078    but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
1079    in order to parse text and create invalid function C<eqfrec>.
1080    
1081    =cut
1082    
1083    sub frec_eq {
1084            my ( $f1,$sf1, $f2, $sf2 ) = @_;
1085            return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
1086    }
1087    
1088    sub frec_ne {
1089            return ! frec_eq( @_ );
1090    }
1091    
1092  =head2 regex  =head2 regex
1093    
1094  Apply regex to some or all values  Apply regex to some or all values

Legend:
Removed from v.980  
changed lines
  Added in v.1021

  ViewVC Help
Powered by ViewVC 1.1.26