/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1013 by dpavlin, Wed Nov 7 11:54:34 2007 UTC revision 1018 by dpavlin, Fri Nov 9 23:46:44 2007 UTC
# Line 16  our @EXPORT = qw/ Line 16  our @EXPORT = qw/
16          marc_template          marc_template
17    
18          rec1 rec2 rec          rec1 rec2 rec
19          frec          frec frec_eq frec_ne
20          regex prefix suffix surround          regex prefix suffix surround
21          first lookup join_with          first lookup join_with
22          save_into_lookup          save_into_lookup
# Line 48  WebPAC::Normalize - describe normalisato Line 48  WebPAC::Normalize - describe normalisato
48    
49  =cut  =cut
50    
51  our $VERSION = '0.33';  our $VERSION = '0.34';
52    
53  =head1 SYNOPSIS  =head1 SYNOPSIS
54    
# Line 815  sub marc_original_order { Line 815  sub marc_original_order {
815          return unless defined($rec->{$from});          return unless defined($rec->{$from});
816    
817          my $r = $rec->{$from};          my $r = $rec->{$from};
818          die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');          die "record field $from isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
819    
820          my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');          my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
821          warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);          warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
# Line 873  sub marc_template { Line 873  sub marc_template {
873                  die "$_ not ARRAY" if ref($args->{$_}) ne 'ARRAY';                  die "$_ not ARRAY" if ref($args->{$_}) ne 'ARRAY';
874          }          }
875    
876          my $r = $rec->{ $args->{from} }; # || return;          my $r = $rec->{ $args->{from} } || return;
877          die "record field ", $args->{from}, " isn't array\n" unless (ref($r) eq 'ARRAY');          die "record field ", $args->{from}, " isn't array ",dump( $rec ) unless (ref($r) eq 'ARRAY');
878    
879          my @subfields_rename = @{ $args->{subfields_rename} };          my @subfields_rename = @{ $args->{subfields_rename} };
880  #       warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename );  #       warn "### subfields_rename [$#subfields_rename] = ",dump( @subfields_rename );
# Line 933  sub marc_template { Line 933  sub marc_template {
933    
934                  my ( $new_r, $from_count, $to_count );                  my ( $new_r, $from_count, $to_count );
935                  foreach my $sf ( keys %{$r} ) {                  foreach my $sf ( keys %{$r} ) {
936                            # skip everything which isn't one char subfield (e.g. 'subfields')
937                            next unless $sf =~ m/^\w$/;
938                          my $nr = $from_count->{$sf}++;                          my $nr = $from_count->{$sf}++;
939                          my $rename_to = $subfields_rename->{ $sf };     # ||                          my $rename_to = $subfields_rename->{ $sf } ||
940  #                               die "can't find subfield rename for $sf/$nr in ", dump( $subfields_rename );                                  die "can't find subfield rename for $sf/$nr in ", dump( $subfields_rename );
941                          warn "### rename $sf/$nr to ", dump( $rename_to->[$nr] ), $/;                          warn "### rename $sf/$nr to ", dump( $rename_to->[$nr] ), $/;
942                          my ( $to_sf, $to_nr ) = @{ $rename_to->[$nr] };                          my ( $to_sf, $to_nr ) = @{ $rename_to->[$nr] };
943                          $new_r->{ $to_sf }->[ $to_nr ] = [ $sf => $nr ];                          $new_r->{ $to_sf }->[ $to_nr ] = [ $sf => $nr ];
# Line 984  sub marc_template { Line 986  sub marc_template {
986                          my ( $sf, $nr ) = @$sf;                          my ( $sf, $nr ) = @$sf;
987                          my $v = $fill_in->{$sf}->[$nr] || die "can't find fill_in $sf/$nr";                          my $v = $fill_in->{$sf}->[$nr] || die "can't find fill_in $sf/$nr";
988                          warn "++ $sf/$nr |$v|\n";                          warn "++ $sf/$nr |$v|\n";
989                          push @$m, [ $sf, $v ];                          push @$m, ( $sf, $v );
990                  }                  }
991    
992                  warn "#### >>>> created marc: ", dump( $m );                  warn "#### >>>> created marc: ", dump( $m );
# Line 993  sub marc_template { Line 995  sub marc_template {
995          }          }
996    
997          warn "### marc_template produced: ",dump( @marc_out );          warn "### marc_template produced: ",dump( @marc_out );
998          return @marc_out;  
999            foreach my $marc ( @marc_out ) {
1000                    warn "+++ ",dump( $marc );
1001                    push @{ $marc_record->[ $marc_record_offset ] }, $marc;
1002            }
1003  }  }
1004    
1005  =head2 marc_count  =head2 marc_count
# Line 1168  sub frec { Line 1174  sub frec {
1174          return shift @out;          return shift @out;
1175  }  }
1176    
1177    =head2 frec_eq
1178    
1179    =head2 frec_ne
1180    
1181    Check if first values from two fields are same or different
1182    
1183      if ( frec_eq( 900 => 'a', 910 => 'c' ) ) {
1184            # values are same
1185      } else {
1186        # values are different
1187      }
1188    
1189    Strictly speaking C<frec_eq> and C<frec_ne> wouldn't be needed if you
1190    could write something like:
1191    
1192      if ( frec( '900','a' ) eq frec( '910','c' ) ) {
1193            # yada tada
1194      }
1195    
1196    but you can't since our parser L<WebPAC::Parser> will remove all whitespaces
1197    in order to parse text and create invalid function C<eqfrec>.
1198    
1199    =cut
1200    
1201    sub frec_eq {
1202            my ( $f1,$sf1, $f2, $sf2 ) = @_;
1203            return (rec( $f1, $sf1 ))[0] eq (rec( $f2, $sf2 ))[0];
1204    }
1205    
1206    sub frec_ne {
1207            return ! frec_eq( @_ );
1208    }
1209    
1210  =head2 regex  =head2 regex
1211    
1212  Apply regex to some or all values  Apply regex to some or all values

Legend:
Removed from v.1013  
changed lines
  Added in v.1018

  ViewVC Help
Powered by ViewVC 1.1.26