/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 536 by dpavlin, Mon Jun 26 16:39:51 2006 UTC revision 542 by dpavlin, Thu Jun 29 21:18:59 2006 UTC
# Line 1  Line 1 
1  package WebPAC::Normalize;  package WebPAC::Normalize;
2  use Exporter 'import';  use Exporter 'import';
3  @EXPORT = qw/  @EXPORT = qw/
4          set_rec set_lookup          _set_rec _set_lookup
5          get_ds clean_ds          _get_ds _clean_ds
6    
7          tag search display          tag search display
8            marc21
9    
10          rec1 rec2 rec          rec1 rec2 rec
11          regex prefix suffix surround          regex prefix suffix surround
12          first lookup join_with          first lookup join_with
# Line 14  use strict; Line 17  use strict;
17    
18  #use base qw/WebPAC::Common/;  #use base qw/WebPAC::Common/;
19  use Data::Dumper;  use Data::Dumper;
20    use Encode qw/from_to/;
21    
22  =head1 NAME  =head1 NAME
23    
# Line 21  WebPAC::Normalize - describe normalisato Line 25  WebPAC::Normalize - describe normalisato
25    
26  =head1 VERSION  =head1 VERSION
27    
28  Version 0.04  Version 0.06
29    
30  =cut  =cut
31    
32  our $VERSION = '0.04';  our $VERSION = '0.06';
33    
34  =head1 SYNOPSIS  =head1 SYNOPSIS
35    
# Line 38  means that you check it's validity befor Line 42  means that you check it's validity befor
42  C<perl -c normalize.pl>.  C<perl -c normalize.pl>.
43    
44  Normalisation can generate multiple output normalized data. For now, supported output  Normalisation can generate multiple output normalized data. For now, supported output
45  types (on the left side of definition) are: C<tag>, C<display> and C<search>.  types (on the left side of definition) are: C<tag>, C<display>, C<search> and
46    C<marc21>.
47    
48  =head1 FUNCTIONS  =head1 FUNCTIONS
49    
50    Functions which start with C<_> are private and used by WebPAC internally.
51    All other functions are available for use within normalisation rules.
52    
53  =head2 data_structure  =head2 data_structure
54    
55  Return data structure  Return data structure
56    
57    my $ds = WebPAC::Normalize(    my $ds = WebPAC::Normalize::data_structure(
58          lookup => $lookup->lookup_hash,          lookup => $lookup->lookup_hash,
59          row => $row,          row => $row,
60          rules => $normalize_pl_config,          rules => $normalize_pl_config,
61            marc_encoding => 'utf-8',
62    );    );
63    
64    Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
65    other are optional.
66    
67  This function will B<die> if normalizastion can't be evaled.  This function will B<die> if normalizastion can't be evaled.
68    
69    Since this function isn't exported you have to call it with
70    C<WebPAC::Normalize::data_structure>.
71    
72  =cut  =cut
73    
74  sub data_structure {  sub data_structure {
# Line 63  sub data_structure { Line 78  sub data_structure {
78          die "need normalisation argument" unless ($arg->{rules});          die "need normalisation argument" unless ($arg->{rules});
79    
80          no strict 'subs';          no strict 'subs';
81          set_lookup( $arg->{lookup} );          _set_lookup( $arg->{lookup} );
82          set_rec( $arg->{row} );          _set_rec( $arg->{row} );
83          clean_ds();          _clean_ds( %{ $arg } );
84          eval "$arg->{rules}";          eval "$arg->{rules}";
85          die "error evaling $arg->{rules}: $@\n" if ($@);          die "error evaling $arg->{rules}: $@\n" if ($@);
86          return get_ds();  
87            return _get_ds();
88  }  }
89    
90  =head2 set_rec  =head2 _set_rec
91    
92  Set current record hash  Set current record hash
93    
94    set_rec( $rec );    _set_rec( $rec );
95    
96  =cut  =cut
97    
98  my $rec;  my $rec;
99    
100  sub set_rec {  sub _set_rec {
101          $rec = shift or die "no record hash";          $rec = shift or die "no record hash";
102  }  }
103    
104    =head2 _get_ds
105    
106    Return hash formatted as data structure
107    
108      my $ds = _get_ds();
109    
110    =cut
111    
112    my $out;
113    my $marc21;
114    my $marc_encoding;
115    
116    sub _get_ds {
117            return $out;
118    }
119    
120    =head2 _clean_ds
121    
122    Clean data structure hash for next record
123    
124      _clean_ds();
125    
126    =cut
127    
128    sub _clean_ds {
129            my $a = {@_};
130            $out = undef;
131            $marc21 = undef;
132            $marc_encoding = $a->{marc_encoding};
133    }
134    
135    =head2 _set_lookup
136    
137    Set current lookup hash
138    
139      _set_lookup( $lookup );
140    
141    =cut
142    
143    my $lookup;
144    
145    sub _set_lookup {
146            $lookup = shift;
147    }
148    
149    =head2 _get_marc21_fields
150    
151    Get all fields defined by calls to C<marc21>
152    
153            $marc->add_fields( WebPAC::Normalize:_get_marc21_fields() );
154    
155    B<TODO>: implement exceptions to magic which unrolls repeated subfields
156    as new field with that subfield.
157    
158    =cut
159    
160    sub _get_marc21_fields {
161            my @m;
162            my $last;
163            foreach my $row (@{ $marc21 }) {
164                    if ($last && (
165                            $last->[0] eq $row->[0] ||              # check if field is same
166                            $last->[1] eq $row->[1] ||              # check for i1
167                            $last->[2] eq $row->[2]                 # and for i2
168                    ) ) {
169                            $last->[3]->{ $row->[3] } = $row->[4];
170                            warn "## ++ added $row->[0] ^$row->[3]\n";
171                            next;
172                    } elsif ($last) {
173                            push @m, $last;
174                    }
175    
176                    push @m, $row;
177            }
178    
179            push @m, $last if ($last);
180    
181            return @m;
182    }
183    
184    =head1 Functions to create C<data_structure>
185    
186    Those functions generally have to first in your normalization file.
187    
188  =head2 tag  =head2 tag
189    
190  Define new tag for I<search> and I<display>.  Define new tag for I<search> and I<display>.
# Line 94  Define new tag for I<search> and I<displ Line 194  Define new tag for I<search> and I<displ
194    
195  =cut  =cut
196    
 my $out;  
   
197  sub tag {  sub tag {
198          my $name = shift or die "tag needs name as first argument";          my $name = shift or die "tag needs name as first argument";
199          my @o = grep { defined($_) && $_ ne '' } @_;          my @o = grep { defined($_) && $_ ne '' } @_;
# Line 137  sub search { Line 235  sub search {
235          $out->{$name}->{search} = \@o;          $out->{$name}->{search} = \@o;
236  }  }
237    
238  =head2 get_ds  =head2 marc21
   
 Return hash formatted as data structure  
   
   my $ds = get_ds();  
   
 =cut  
   
 sub get_ds {  
         return $out;  
 }  
   
 =head2 clean_ds  
239    
240  Clean data structure hash for next record  Save value for MARC field
241    
242    clean_ds();    marc21('900','a', rec('200','a') );
243    
244  =cut  =cut
245    
246  sub clean_ds {  sub marc21 {
247          $out = undef;          my $f = shift or die "marc21 needs field";
248            die "marc21 field must be numer" unless ($f =~ /^\d+$/);
249    
250            my $sf = shift or die "marc21 needs subfield";
251    
252            foreach (@_) {
253                    my $v = $_;             # make var read-write for Encode
254                    next unless (defined($v) && $v !~ /^\s+$/);
255                    from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
256                    push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ];
257            }
258  }  }
259    
260  =head2 set_lookup  =head1 Functions to extract data from input
261    
262  Set current lookup hash  This function should be used inside functions to create C<data_structure> described
263    above.
   set_lookup( $lookup );  
   
 =cut  
   
 my $lookup;  
   
 sub set_lookup {  
         $lookup = shift;  
 }  
264    
265  =head2 rec1  =head2 rec1
266    

Legend:
Removed from v.536  
changed lines
  Added in v.542

  ViewVC Help
Powered by ViewVC 1.1.26