/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 812 by dpavlin, Sun Apr 1 21:47:42 2007 UTC revision 982 by dpavlin, Sat Nov 3 13:35:03 2007 UTC
# Line 1  Line 1 
1  package WebPAC::Normalize;  package WebPAC::Normalize;
2  use Exporter 'import';  use Exporter 'import';
3  @EXPORT = qw/  our @EXPORT = qw/
4          _set_rec _set_lookup          _set_rec _set_lookup
5          _set_load_row          _set_load_row
6          _get_ds _clean_ds          _get_ds _clean_ds
7          _debug          _debug
8          _pack_subfields_hash          _pack_subfields_hash
9    
10          tag search display          search_display search display sorted
11    
12          marc marc_indicators marc_repeatable_subfield          marc marc_indicators marc_repeatable_subfield
13          marc_compose marc_leader          marc_compose marc_leader marc_fixed
14          marc_duplicate marc_remove          marc_duplicate marc_remove marc_count
15          marc_original_order          marc_original_order
16    
17          rec1 rec2 rec          rec1 rec2 rec
18            frec
19          regex prefix suffix surround          regex prefix suffix surround
20          first lookup join_with          first lookup join_with
21          save_into_lookup          save_into_lookup
# Line 22  use Exporter 'import'; Line 24  use Exporter 'import';
24    
25          get set          get set
26          count          count
27    
28  /;  /;
29    
30  use warnings;  use warnings;
# Line 35  use Carp qw/confess/; Line 38  use Carp qw/confess/;
38  # debugging warn(s)  # debugging warn(s)
39  my $debug = 0;  my $debug = 0;
40    
41    use WebPAC::Normalize::ISBN;
42    push @EXPORT, ( 'isbn_10', 'isbn_13' );
43    
44  =head1 NAME  =head1 NAME
45    
46  WebPAC::Normalize - describe normalisaton rules using sets  WebPAC::Normalize - describe normalisaton rules using sets
47    
 =head1 VERSION  
   
 Version 0.26  
   
48  =cut  =cut
49    
50  our $VERSION = '0.26';  our $VERSION = '0.31';
51    
52  =head1 SYNOPSIS  =head1 SYNOPSIS
53    
# Line 59  means that you check it's validity befor Line 60  means that you check it's validity befor
60  C<perl -c normalize.pl>.  C<perl -c normalize.pl>.
61    
62  Normalisation can generate multiple output normalized data. For now, supported output  Normalisation can generate multiple output normalized data. For now, supported output
63  types (on the left side of definition) are: C<tag>, C<display>, C<search> and  types (on the left side of definition) are: C<search_display>, C<display>, C<search> and
64  C<marc>.  C<marc>.
65    
66  =head1 FUNCTIONS  =head1 FUNCTIONS
# Line 78  Return data structure Line 79  Return data structure
79          marc_encoding => 'utf-8',          marc_encoding => 'utf-8',
80          config => $config,          config => $config,
81          load_row_coderef => sub {          load_row_coderef => sub {
82                  my ($database,$input,$mfn) = shift;                  my ($database,$input,$mfn) = @_;
83                  $store->load_row( database => $database, input => $input, id => $mfn );                  $store->load_row( database => $database, input => $input, id => $mfn );
84          },          },
85    );    );
# Line 171  my ($out, $marc_record, $marc_encoding, Line 172  my ($out, $marc_record, $marc_encoding,
172  my ($marc_record_offset, $marc_fetch_offset) = (0, 0);  my ($marc_record_offset, $marc_fetch_offset) = (0, 0);
173    
174  sub _get_ds {  sub _get_ds {
175    #warn "## out = ",dump($out);
176          return $out;          return $out;
177  }  }
178    
# Line 289  will return 42th copy record (if it exis Line 291  will return 42th copy record (if it exis
291    
292  =cut  =cut
293    
294    my $fetch_pos;
295    
296  sub _get_marc_fields {  sub _get_marc_fields {
297    
298          my $arg = {@_};          my $arg = {@_};
299          warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);          warn "### _get_marc_fields arg: ", dump($arg), $/ if ($debug > 2);
300          my $offset = $marc_fetch_offset;          $fetch_pos = $marc_fetch_offset;
301          if ($arg->{offset}) {          if ($arg->{offset}) {
302                  $offset = $arg->{offset};                  $fetch_pos = $arg->{offset};
303          } elsif($arg->{fetch_next}) {          } elsif($arg->{fetch_next}) {
304                  $marc_fetch_offset++;                  $marc_fetch_offset++;
305          }          }
# Line 304  sub _get_marc_fields { Line 308  sub _get_marc_fields {
308    
309          warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);          warn "### full marc_record = ", dump( @{ $marc_record }), $/ if ($debug > 2);
310    
311          my $marc_rec = $marc_record->[ $offset ];          my $marc_rec = $marc_record->[ $fetch_pos ];
312    
313          warn "## _get_marc_fields (at offset: $offset) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);          warn "## _get_marc_fields (at offset: $fetch_pos) -- marc_record = ", dump( @$marc_rec ), $/ if ($debug > 1);
314    
315          return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);          return if (! $marc_rec || ref($marc_rec) ne 'ARRAY' || $#{ $marc_rec } < 0);
316    
# Line 327  sub _get_marc_fields { Line 331  sub _get_marc_fields {
331    
332          if ($debug) {          if ($debug) {
333                  warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );                  warn "## marc_repeatable_subfield = ", dump( $marc_repeatable_subfield ), $/ if ( $marc_repeatable_subfield );
334                  warn "## marc_record[$offset] = ", dump( $marc_rec ), $/;                  warn "## marc_record[$fetch_pos] = ", dump( $marc_rec ), $/;
335                  warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;                  warn "## sorted_marc_record = ", dump( \@sorted_marc_record ), $/;
336                  warn "## subfield count = ", dump( $u ), $/;                  warn "## subfield count = ", dump( $u ), $/;
337          }          }
# Line 408  sub _get_marc_fields { Line 412  sub _get_marc_fields {
412          return \@m;          return \@m;
413  }  }
414    
415    =head2 _get_marc_leader
416    
417    Return leader from currently fetched record by L</_get_marc_fields>
418    
419      print WebPAC::Normalize::_get_marc_leader();
420    
421    =cut
422    
423    sub _get_marc_leader {
424            die "no fetch_pos, did you called _get_marc_fields first?" unless ( defined( $fetch_pos ) );
425            return $marc_leader->[ $fetch_pos ];
426    }
427    
428  =head2 _debug  =head2 _debug
429    
430  Change level of debug warnings  Change level of debug warnings
# Line 427  sub _debug { Line 444  sub _debug {
444    
445  Those functions generally have to first in your normalization file.  Those functions generally have to first in your normalization file.
446    
447  =head2 tag  =head2 search_display
448    
449  Define new tag for I<search> and I<display>.  Define output for L<search> and L<display> at the same time
450    
451    tag('Title', rec('200','a') );    search_display('Title', rec('200','a') );
452    
453    
454  =cut  =cut
455    
456  sub tag {  sub search_display {
457          my $name = shift or die "tag needs name as first argument";          my $name = shift or die "search_display needs name as first argument";
458          my @o = grep { defined($_) && $_ ne '' } @_;          my @o = grep { defined($_) && $_ ne '' } @_;
459          return unless (@o);          return unless (@o);
         $out->{$name}->{tag} = $name;  
460          $out->{$name}->{search} = \@o;          $out->{$name}->{search} = \@o;
461          $out->{$name}->{display} = \@o;          $out->{$name}->{display} = \@o;
462  }  }
463    
464    =head2 tag
465    
466    Old name for L<search_display>, but supported
467    
468    =cut
469    
470    sub tag {
471            search_display( @_ );
472    }
473    
474  =head2 display  =head2 display
475    
476  Define tag just for I<display>  Define output just for I<display>
477    
478    @v = display('Title', rec('200','a') );    @v = display('Title', rec('200','a') );
479    
480  =cut  =cut
481    
482  sub display {  sub _field {
483          my $name = shift or die "display needs name as first argument";          my $type = shift or confess "need type -- BUG?";
484            my $name = shift or confess "needs name as first argument";
485          my @o = grep { defined($_) && $_ ne '' } @_;          my @o = grep { defined($_) && $_ ne '' } @_;
486          return unless (@o);          return unless (@o);
487          $out->{$name}->{tag} = $name;          $out->{$name}->{$type} = \@o;
         $out->{$name}->{display} = \@o;  
488  }  }
489    
490    sub display { _field( 'display', @_ ) }
491    
492  =head2 search  =head2 search
493    
494  Prepare values just for I<search>  Prepare values just for I<search>
# Line 469  Prepare values just for I<search> Line 497  Prepare values just for I<search>
497    
498  =cut  =cut
499    
500  sub search {  sub search { _field( 'search', @_ ) }
501          my $name = shift or die "search needs name as first argument";  
502          my @o = grep { defined($_) && $_ ne '' } @_;  =head2 sorted
503          return unless (@o);  
504          $out->{$name}->{tag} = $name;  Insert into lists which will be automatically sorted
505          $out->{$name}->{search} = \@o;  
506  }   sorted('Title', rec('200','a') );
507    
508    =cut
509    
510    sub sorted { _field( 'sorted', @_ ) }
511    
512    
513  =head2 marc_leader  =head2 marc_leader
514    
# Line 490  sub marc_leader { Line 523  sub marc_leader {
523          my ($offset,$value) = @_;          my ($offset,$value) = @_;
524    
525          if ($offset) {          if ($offset) {
526                  $marc_leader->{ $offset } = $value;                  $marc_leader->[ $marc_record_offset ]->{ $offset } = $value;
527          } else {          } else {
528                  return $marc_leader;                  
529                    if (defined($marc_leader)) {
530                            die "marc_leader not array = ", dump( $marc_leader ) unless (ref($marc_leader) eq 'ARRAY');
531                            return $marc_leader->[ $marc_record_offset ];
532                    } else {
533                            return;
534                    }
535            }
536    }
537    
538    =head2 marc_fixed
539    
540    Create control/indentifier fields with values in fixed positions
541    
542      marc_fixed('008', 00, '070402');
543      marc_fixed('008', 39, '|');
544    
545    Positions not specified will be filled with spaces (C<0x20>).
546    
547    There will be no effort to extend last specified value to full length of
548    field in standard.
549    
550    =cut
551    
552    sub marc_fixed {
553            my ($f, $pos, $val) = @_;
554            die "need marc(field, position, value)" unless defined($f) && defined($pos);
555    
556            confess "need val" unless defined $val;
557    
558            my $update = 0;
559    
560            map {
561                    if ($_->[0] eq $f) {
562                            my $old = $_->[1];
563                            if (length($old) <= $pos) {
564                                    $_->[1] .= ' ' x ( $pos - length($old) ) . $val;
565                                    warn "## marc_fixed($f,$pos,'$val') append '$old' -> '$_->[1]'\n" if ($debug > 1);
566                            } else {
567                                    $_->[1] = substr($old, 0, $pos) . $val . substr($old, $pos + length($val));
568                                    warn "## marc_fixed($f,$pos,'$val') update '$old' -> '$_->[1]'\n" if ($debug > 1);
569                            }
570                            $update++;
571                    }
572            } @{ $marc_record->[ $marc_record_offset ] };
573    
574            if (! $update) {
575                    my $v = ' ' x $pos . $val;
576                    push @{ $marc_record->[ $marc_record_offset ] }, [ $f, $v ];
577                    warn "## marc_fixed($f,$pos,'val') created '$v'\n" if ($debug > 1);
578          }          }
579  }  }
580    
# Line 624  sub marc_duplicate { Line 706  sub marc_duplicate {
706           my $m = $marc_record->[ -1 ];           my $m = $marc_record->[ -1 ];
707           die "can't duplicate record which isn't defined" unless ($m);           die "can't duplicate record which isn't defined" unless ($m);
708           push @{ $marc_record }, dclone( $m );           push @{ $marc_record }, dclone( $m );
709           warn "## marc_duplicate = ", dump(@$marc_record), $/ if ($debug > 1);           push @{ $marc_leader }, dclone( marc_leader() );
710             warn "## marc_duplicate = ", dump(@$marc_leader, @$marc_record), $/ if ($debug > 1);
711           $marc_record_offset = $#{ $marc_record };           $marc_record_offset = $#{ $marc_record };
712           warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);           warn "## marc_record_offset = $marc_record_offset", $/ if ($debug > 1);
713    
714  }  }
715    
716  =head2 marc_remove  =head2 marc_remove
# Line 774  sub marc_original_order { Line 858  sub marc_original_order {
858          warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);          warn "## marc_record = ", dump( $marc_record ),$/ if ($debug > 1);
859  }  }
860    
861    =head2 marc_count
862    
863    Return number of MARC records created using L</marc_duplicate>.
864    
865      print "created ", marc_count(), " records";
866    
867    =cut
868    
869    sub marc_count {
870            return $#{ $marc_record };
871    }
872    
873    
874  =head1 Functions to extract data from input  =head1 Functions to extract data from input
875    
# Line 796  sub _pack_subfields_hash { Line 892  sub _pack_subfields_hash {
892    
893          my ($h,$include_subfields) = @_;          my ($h,$include_subfields) = @_;
894    
895            # sanity and ease of use
896            return $h if (ref($h) ne 'HASH');
897    
898          if ( defined($h->{subfields}) ) {          if ( defined($h->{subfields}) ) {
899                  my $sfs = delete $h->{subfields} || die "no subfields?";                  my $sfs = delete $h->{subfields} || die "no subfields?";
900                  my @out;                  my @out;
# Line 900  return scalar, not array. Line 999  return scalar, not array.
999    
1000  =cut  =cut
1001    
1002    sub frec {
1003            my @out = rec(@_);
1004            warn "rec(",dump(@_),") has more than one return value, ignoring\n" if $#out > 0;
1005            return shift @out;
1006    }
1007    
1008  sub rec {  sub rec {
1009          my @out;          my @out;
1010          if ($#_ == 0) {          if ($#_ == 0) {
# Line 945  Prefix all values with a string Line 1050  Prefix all values with a string
1050  =cut  =cut
1051    
1052  sub prefix {  sub prefix {
1053          my $p = shift or return;          my $p = shift;
1054            return @_ unless defined( $p );
1055          return map { $p . $_ } grep { defined($_) } @_;          return map { $p . $_ } grep { defined($_) } @_;
1056  }  }
1057    
# Line 958  suffix all values with a string Line 1064  suffix all values with a string
1064  =cut  =cut
1065    
1066  sub suffix {  sub suffix {
1067          my $s = shift or die "suffix needs string as first argument";          my $s = shift;
1068            return @_ unless defined( $s );
1069          return map { $_ . $s } grep { defined($_) } @_;          return map { $_ . $s } grep { defined($_) } @_;
1070  }  }
1071    
# Line 971  surround all values with a two strings Line 1078  surround all values with a two strings
1078  =cut  =cut
1079    
1080  sub surround {  sub surround {
1081          my $p = shift or die "surround need prefix as first argument";          my $p = shift;
1082          my $s = shift or die "surround needs suffix as second argument";          my $s = shift;
1083            $p = '' unless defined( $p );
1084            $s = '' unless defined( $s );
1085          return map { $p . $_ . $s } grep { defined($_) } @_;          return map { $p . $_ . $s } grep { defined($_) } @_;
1086  }  }
1087    
# Line 1147  Consult config values stored in C<config Line 1256  Consult config values stored in C<config
1256    $database_code = config();    # use _ from hash    $database_code = config();    # use _ from hash
1257    $database_name = config('name');    $database_name = config('name');
1258    $database_input_name = config('input name');    $database_input_name = config('input name');
   $tag = config('input normalize tag');  
1259    
1260  Up to three levels are supported.  Up to three levels are supported.
1261    

Legend:
Removed from v.812  
changed lines
  Added in v.982

  ViewVC Help
Powered by ViewVC 1.1.26