/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 605 by dpavlin, Sun Jul 30 14:23:23 2006 UTC revision 711 by dpavlin, Tue Sep 26 10:05:25 2006 UTC
# Line 4  use Exporter 'import'; Line 4  use Exporter 'import';
4          _set_rec _set_lookup          _set_rec _set_lookup
5          _get_ds _clean_ds          _get_ds _clean_ds
6          _debug          _debug
7            _pack_subfields_hash
8    
9          tag search display          tag search display
10          marc marc_indicators marc_repeatable_subfield          marc marc_indicators marc_repeatable_subfield
# Line 14  use Exporter 'import'; Line 15  use Exporter 'import';
15          rec1 rec2 rec          rec1 rec2 rec
16          regex prefix suffix surround          regex prefix suffix surround
17          first lookup join_with          first lookup join_with
18            save_into_lookup
19    
20          split_rec_on          split_rec_on
21  /;  /;
# Line 23  use strict; Line 25  use strict;
25    
26  #use base qw/WebPAC::Common/;  #use base qw/WebPAC::Common/;
27  use Data::Dump qw/dump/;  use Data::Dump qw/dump/;
 use Encode qw/from_to/;  
28  use Storable qw/dclone/;  use Storable qw/dclone/;
29    
30  # debugging warn(s)  # debugging warn(s)
# Line 36  WebPAC::Normalize - describe normalisato Line 37  WebPAC::Normalize - describe normalisato
37    
38  =head1 VERSION  =head1 VERSION
39    
40  Version 0.15  Version 0.20
41    
42  =cut  =cut
43    
44  our $VERSION = '0.15';  our $VERSION = '0.20';
45    
46  =head1 SYNOPSIS  =head1 SYNOPSIS
47    
# Line 66  All other functions are available for us Line 67  All other functions are available for us
67  Return data structure  Return data structure
68    
69    my $ds = WebPAC::Normalize::data_structure(    my $ds = WebPAC::Normalize::data_structure(
70          lookup => $lookup->lookup_hash,          lookup => $lookup_variable,
71          row => $row,          row => $row,
72          rules => $normalize_pl_config,          rules => $normalize_pl_config,
73          marc_encoding => 'utf-8',          marc_encoding => 'utf-8',
74          config => $config,          config => $config,
75    );    );
76    
77  Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all  Options C<row>, C<rules> and C<log> are mandatory while all
78  other are optional.  other are optional.
79    
80  This function will B<die> if normalizastion can't be evaled.  This function will B<die> if normalizastion can't be evaled.
# Line 90  sub data_structure { Line 91  sub data_structure {
91          die "need normalisation argument" unless ($arg->{rules});          die "need normalisation argument" unless ($arg->{rules});
92    
93          no strict 'subs';          no strict 'subs';
94          _set_lookup( $arg->{lookup} );          _set_lookup( $arg->{lookup} ) if (defined( $arg->{lookup} ));
95          _set_rec( $arg->{row} );          _set_rec( $arg->{row} );
96          _set_config( $arg->{config} );          _set_config( $arg->{config} ) if (defined( $arg->{config} ));
97          _clean_ds( %{ $arg } );          _clean_ds( %{ $arg } );
98          eval "$arg->{rules}";          eval "$arg->{rules}";
99          die "error evaling $arg->{rules}: $@\n" if ($@);          die "error evaling $arg->{rules}: $@\n" if ($@);
# Line 186  sub _set_lookup { Line 187  sub _set_lookup {
187          $lookup = shift;          $lookup = shift;
188  }  }
189    
190    =head2 _get_lookup
191    
192    Get current lookup hash
193    
194      my $lookup = _get_lookup();
195    
196    =cut
197    
198    sub _get_lookup {
199            return $lookup;
200    }
201    
202  =head2 _get_marc_fields  =head2 _get_marc_fields
203    
204  Get all fields defined by calls to C<marc>  Get all fields defined by calls to C<marc>
# Line 469  sub marc { Line 482  sub marc {
482          foreach (@_) {          foreach (@_) {
483                  my $v = $_;             # make var read-write for Encode                  my $v = $_;             # make var read-write for Encode
484                  next unless (defined($v) && $v !~ /^\s*$/);                  next unless (defined($v) && $v !~ /^\s*$/);
                 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);  
485                  my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');                  my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
486                  if (defined $sf) {                  if (defined $sf) {
487                          push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];                          push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $i1, $i2, $sf => $v ];
# Line 540  sub marc_compose { Line 552  sub marc_compose {
552    
553          warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);          warn "### marc_compose input subfields = ", dump(@_),$/ if ($debug > 2);
554    
555            if ($#_ % 2 != 1) {
556                    die "ERROR: marc_compose",dump($f,@_)," not valid (must be even).\nDo you need to add first() or join() around some argument?\n";
557            }
558    
559          while (@_) {          while (@_) {
560                  my $sf = shift or die "marc_compose $f needs subfield";                  my $sf = shift;
561                  my $v = shift;                  my $v = shift;
562    
563                  next unless (defined($v) && $v !~ /^\s*$/);                  next unless (defined($v) && $v !~ /^\s*$/);
                 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);  
564                  warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);                  warn "## ++ marc_compose($f,$sf,$v) ", dump( $m ),$/ if ($debug > 1);
565                  if ($sf ne '+') {                  if ($sf ne '+') {
566                          push @$m, ( $sf, $v );                          push @$m, ( $sf, $v );
# Line 646  sub marc_remove { Line 661  sub marc_remove {
661    
662  Copy all subfields preserving original order to marc field.  Copy all subfields preserving original order to marc field.
663    
664    marc_original_order(210, 260);    marc_original_order( marc_field_number, original_input_field_number );
665    
666    Please note that field numbers are consistent with other commands (marc
667    field number first), but somewhat counter-intuitive (destination and then
668    source).
669    
670  You might want to use this command if you are just renaming subfields or  You might want to use this command if you are just renaming subfields or
671  using pre-processing modify_record in C<config.yml> and don't need any  using pre-processing modify_record in C<config.yml> and don't need any
672  post-processing or want to preserve order of original subfields.  post-processing or want to preserve order of original subfields.
673    
674    
675  =cut  =cut
676    
677  sub marc_original_order {  sub marc_original_order {
678    
679          my ($from, $to) = @_;          my ($to, $from) = @_;
680          die "marc_original_order needs from and to fields\n" unless ($from && $to);          die "marc_original_order needs from and to fields\n" unless ($from && $to);
681    
682          my $r = $rec->{$from} || return;          return unless defined($rec->{$from});
683    
684            my $r = $rec->{$from};
685          die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');          die "record field $from isn't array\n" unless (ref($r) eq 'ARRAY');
686    
687          my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');          my ($i1,$i2) = defined($marc_indicators->{$to}) ? @{ $marc_indicators->{$to} } : (' ',' ');
688          warn "## marc_original_order($from,$to) source = ", dump( $r ),$/ if ($debug > 1);          warn "## marc_original_order($to,$from) source = ", dump( $r ),$/ if ($debug > 1);
689    
690          foreach my $d (@$r) {          foreach my $d (@$r) {
691    
692                  if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {                  if (! defined($d->{subfields}) && ref($d->{subfields}) ne 'ARRAY') {
693                          warn "# marc_original_order($from,$to): field $from doesn't have subfields specification\n";                          warn "# marc_original_order($to,$from): field $from doesn't have subfields specification\n";
694                          next;                          next;
695                  }                  }
696                    
# Line 676  sub marc_original_order { Line 698  sub marc_original_order {
698    
699                  die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);                  die "field $from doesn't have even number of subfields specifications\n" unless($#sfs % 2 == 1);
700    
701  warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/;                  warn "#--> d: ",dump($d), "\n#--> sfs: ",dump(@sfs),$/ if ($debug > 2);
702    
703                  my $m = [ $to, $i1, $i2 ];                  my $m = [ $to, $i1, $i2 ];
704    
705                  while (my $sf = shift @sfs) {                  while (my $sf = shift @sfs) {
706  warn "#--> sf: ",dump($sf), $/;  
707                            warn "#--> sf: ",dump($sf), $/ if ($debug > 2);
708                          my $offset = shift @sfs;                          my $offset = shift @sfs;
709                          die "corrupted sufields specification for field $from\n" unless defined($offset);                          die "corrupted sufields specification for field $from\n" unless defined($offset);
710    
# Line 702  warn "#--> sf: ",dump($sf), $/; Line 725  warn "#--> sf: ",dump($sf), $/;
725          }          }
726    
727          warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);          warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
   
         warn "# marc_original_order is partly implemented";  
728  }  }
729    
730    
# Line 712  warn "#--> sf: ",dump($sf), $/; Line 733  warn "#--> sf: ",dump($sf), $/;
733  This function should be used inside functions to create C<data_structure> described  This function should be used inside functions to create C<data_structure> described
734  above.  above.
735    
736    =head2 _pack_subfields_hash
737    
738     @subfields = _pack_subfields_hash( $h );
739     $subfields = _pack_subfields_hash( $h, 1 );
740    
741    Return each subfield value in array or pack them all together and return scalar
742    with subfields (denoted by C<^>) and values.
743    
744    =cut
745    
746    sub _pack_subfields_hash {
747    
748            warn "## _pack_subfields_hash( ",dump(@_), " )\n" if ($debug > 1);
749    
750            my ($h,$include_subfields) = @_;
751    
752            if ( defined($h->{subfields}) ) {
753                    my $sfs = delete $h->{subfields} || die "no subfields?";
754                    my @out;
755                    while (@$sfs) {
756                            my $sf = shift @$sfs;
757                            push @out, '^' . $sf if ($include_subfields);
758                            my $o = shift @$sfs;
759                            if ($o == 0 && ref( $h->{$sf} ) ne 'ARRAY' ) {
760                                    # single element subfields are not arrays
761    #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
762    
763                                    push @out, $h->{$sf};
764                            } else {
765    #warn "====> $sf $o / $#$sfs ", dump( $sfs, $h->{$sf} ), "\n";
766                                    push @out, $h->{$sf}->[$o];
767                            }
768                    }
769                    if ($include_subfields) {
770                            return join('', @out);
771                    } else {
772                            return @out;
773                    }
774            } else {
775                    if ($include_subfields) {
776                            my $out = '';
777                            foreach my $sf (sort keys %$h) {
778                                    if (ref($h->{$sf}) eq 'ARRAY') {
779                                            $out .= '^' . $sf . join('^' . $sf, @{ $h->{$sf} });
780                                    } else {
781                                            $out .= '^' . $sf . $h->{$sf};
782                                    }
783                            }
784                            return $out;
785                    } else {
786                            # FIXME this should probably be in alphabetical order instead of hash order
787                            values %{$h};
788                    }
789            }
790    }
791    
792  =head2 rec1  =head2 rec1
793    
794  Return all values in some field  Return all values in some field
# Line 728  sub rec1 { Line 805  sub rec1 {
805          return unless (defined($rec) && defined($rec->{$f}));          return unless (defined($rec) && defined($rec->{$f}));
806          warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);          warn "rec1($f) = ", dump( $rec->{$f} ), $/ if ($debug > 1);
807          if (ref($rec->{$f}) eq 'ARRAY') {          if (ref($rec->{$f}) eq 'ARRAY') {
808                  return map {                  my @out;
809                          if (ref($_) eq 'HASH') {                  foreach my $h ( @{ $rec->{$f} } ) {
810                                  values %{$_};                          if (ref($h) eq 'HASH') {
811                                    push @out, ( _pack_subfields_hash( $h ) );
812                          } else {                          } else {
813                                  $_;                                  push @out, $h;
814                          }                          }
815                  } @{ $rec->{$f} };                  }
816                    return @out;
817          } elsif( defined($rec->{$f}) ) {          } elsif( defined($rec->{$f}) ) {
818                  return $rec->{$f};                  return $rec->{$f};
819          }          }
# Line 865  Consult lookup hashes for some value Line 944  Consult lookup hashes for some value
944    @v = lookup( $v );    @v = lookup( $v );
945    @v = lookup( @v );    @v = lookup( @v );
946    
947    FIXME B<currently this one is broken!>
948    
949  =cut  =cut
950    
951  sub lookup {  sub lookup {
# Line 877  sub lookup { Line 958  sub lookup {
958          }          }
959  }  }
960    
961    =head2 save_into_lookup
962    
963    Save value into lookup.
964    
965      save_into_lookup($key,sub {
966            # code which produce one or more values
967      });
968    
969    This function shouldn't be called directly, it's called from code created by L<WebPAC::Parser>.
970    
971    =cut
972    
973    sub save_into_lookup {
974            my ($k,$coderef) = @_;
975            die "save_into_lookup needs key" unless defined($k);
976            die "save_into_lookup needs CODE" unless ( defined($coderef) && ref($coderef) eq 'CODE' );
977            my $mfn = $rec->{'000'}->[0] || die "mfn not defined or zero";
978            foreach my $v ( $coderef->() ) {
979                    $lookup->{$k}->{$v}->{$mfn}++;
980                    warn "# lookup $k $v $mfn saved\n" if ($debug > 1);
981            }
982    }
983    
984  =head2 config  =head2 config
985    
986  Consult config values stored in C<config.yml>  Consult config values stored in C<config.yml>

Legend:
Removed from v.605  
changed lines
  Added in v.711

  ViewVC Help
Powered by ViewVC 1.1.26