/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 536 - (hide annotations)
Mon Jun 26 16:39:51 2006 UTC (17 years, 9 months ago) by dpavlin
File size: 5409 byte(s)
 r719@llin:  dpavlin | 2006-06-26 18:40:57 +0200
 big refacture: depriciate and remove all normalisation formats except .pl sets (but
 old code is still available in WebPAC::Lookup::Normalize because lookups use it) [2.20]

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3     @EXPORT = qw/
4     set_rec set_lookup
5     get_ds clean_ds
6     tag search display
7     rec1 rec2 rec
8     regex prefix suffix surround
9     first lookup join_with
10     /;
11 dpavlin 10
12     use warnings;
13     use strict;
14 dpavlin 536
15     #use base qw/WebPAC::Common/;
16 dpavlin 13 use Data::Dumper;
17 dpavlin 10
18     =head1 NAME
19    
20 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
21 dpavlin 10
22     =head1 VERSION
23    
24 dpavlin 536 Version 0.04
25 dpavlin 10
26     =cut
27    
28 dpavlin 536 our $VERSION = '0.04';
29 dpavlin 10
30     =head1 SYNOPSIS
31    
32 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
33     from input records using perl functions which are specialized for set
34     processing.
35 dpavlin 10
36 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
37     means that you check it's validity before running WebPAC using
38     C<perl -c normalize.pl>.
39 dpavlin 15
40 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
41     types (on the left side of definition) are: C<tag>, C<display> and C<search>.
42 dpavlin 15
43 dpavlin 10 =head1 FUNCTIONS
44    
45 dpavlin 536 =head2 data_structure
46 dpavlin 10
47 dpavlin 536 Return data structure
48 dpavlin 13
49 dpavlin 536 my $ds = WebPAC::Normalize(
50     lookup => $lookup->lookup_hash,
51     row => $row,
52     rules => $normalize_pl_config,
53 dpavlin 13 );
54    
55 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
56 dpavlin 15
57 dpavlin 10 =cut
58    
59 dpavlin 536 sub data_structure {
60     my $arg = {@_};
61 dpavlin 13
62 dpavlin 536 die "need row argument" unless ($arg->{row});
63     die "need normalisation argument" unless ($arg->{rules});
64 dpavlin 31
65 dpavlin 536 no strict 'subs';
66     set_lookup( $arg->{lookup} );
67     set_rec( $arg->{row} );
68     clean_ds();
69     eval "$arg->{rules}";
70     die "error evaling $arg->{rules}: $@\n" if ($@);
71     return get_ds();
72 dpavlin 10 }
73    
74 dpavlin 536 =head2 set_rec
75 dpavlin 13
76 dpavlin 536 Set current record hash
77 dpavlin 433
78 dpavlin 536 set_rec( $rec );
79 dpavlin 433
80     =cut
81    
82 dpavlin 536 my $rec;
83 dpavlin 433
84 dpavlin 536 sub set_rec {
85     $rec = shift or die "no record hash";
86 dpavlin 433 }
87    
88 dpavlin 536 =head2 tag
89 dpavlin 433
90 dpavlin 536 Define new tag for I<search> and I<display>.
91 dpavlin 433
92 dpavlin 536 tag('Title', rec('200','a') );
93 dpavlin 13
94    
95     =cut
96    
97 dpavlin 536 my $out;
98 dpavlin 13
99 dpavlin 536 sub tag {
100     my $name = shift or die "tag needs name as first argument";
101     my @o = grep { defined($_) && $_ ne '' } @_;
102     return unless (@o);
103     $out->{$name}->{tag} = $name;
104     $out->{$name}->{search} = \@o;
105     $out->{$name}->{display} = \@o;
106     }
107 dpavlin 13
108 dpavlin 536 =head2 display
109 dpavlin 13
110 dpavlin 536 Define tag just for I<display>
111 dpavlin 125
112 dpavlin 536 @v = display('Title', rec('200','a') );
113 dpavlin 125
114 dpavlin 536 =cut
115 dpavlin 125
116 dpavlin 536 sub display {
117     my $name = shift or die "display needs name as first argument";
118     my @o = grep { defined($_) && $_ ne '' } @_;
119     return unless (@o);
120     $out->{$name}->{tag} = $name;
121     $out->{$name}->{display} = \@o;
122     }
123 dpavlin 13
124 dpavlin 536 =head2 search
125 dpavlin 13
126 dpavlin 536 Prepare values just for I<search>
127 dpavlin 13
128 dpavlin 536 @v = search('Title', rec('200','a') );
129 dpavlin 433
130 dpavlin 536 =cut
131 dpavlin 13
132 dpavlin 536 sub search {
133     my $name = shift or die "search needs name as first argument";
134     my @o = grep { defined($_) && $_ ne '' } @_;
135     return unless (@o);
136     $out->{$name}->{tag} = $name;
137     $out->{$name}->{search} = \@o;
138 dpavlin 13 }
139    
140 dpavlin 536 =head2 get_ds
141 dpavlin 13
142 dpavlin 536 Return hash formatted as data structure
143 dpavlin 13
144 dpavlin 536 my $ds = get_ds();
145 dpavlin 13
146     =cut
147    
148 dpavlin 536 sub get_ds {
149 dpavlin 13 return $out;
150     }
151    
152 dpavlin 536 =head2 clean_ds
153 dpavlin 15
154 dpavlin 536 Clean data structure hash for next record
155 dpavlin 15
156 dpavlin 536 clean_ds();
157 dpavlin 15
158 dpavlin 536 =cut
159 dpavlin 15
160 dpavlin 536 sub clean_ds {
161     $out = undef;
162     }
163 dpavlin 15
164 dpavlin 536 =head2 set_lookup
165 dpavlin 15
166 dpavlin 536 Set current lookup hash
167 dpavlin 15
168 dpavlin 536 set_lookup( $lookup );
169 dpavlin 371
170 dpavlin 15 =cut
171    
172 dpavlin 536 my $lookup;
173 dpavlin 15
174 dpavlin 536 sub set_lookup {
175     $lookup = shift;
176     }
177 dpavlin 15
178 dpavlin 536 =head2 rec1
179 dpavlin 371
180 dpavlin 536 Return all values in some field
181 dpavlin 371
182 dpavlin 536 @v = rec1('200')
183 dpavlin 15
184 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
185 dpavlin 15
186 dpavlin 536 =cut
187 dpavlin 15
188 dpavlin 536 sub rec1 {
189     my $f = shift;
190     return unless (defined($rec) && defined($rec->{$f}));
191     if (ref($rec->{$f}) eq 'ARRAY') {
192     return map {
193     if (ref($_) eq 'HASH') {
194     values %{$_};
195 dpavlin 31 } else {
196 dpavlin 536 $_;
197 dpavlin 31 }
198 dpavlin 536 } @{ $rec->{$f} };
199     } elsif( defined($rec->{$f}) ) {
200     return $rec->{$f};
201 dpavlin 15 }
202     }
203    
204 dpavlin 536 =head2 rec2
205 dpavlin 15
206 dpavlin 536 Return all values in specific field and subfield
207 dpavlin 13
208 dpavlin 536 @v = rec2('200','a')
209 dpavlin 13
210     =cut
211    
212 dpavlin 536 sub rec2 {
213     my $f = shift;
214     return unless (defined($rec && $rec->{$f}));
215     my $sf = shift;
216     return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
217     }
218 dpavlin 13
219 dpavlin 536 =head2 rec
220 dpavlin 13
221 dpavlin 536 syntaxtic sugar for
222 dpavlin 13
223 dpavlin 536 @v = rec('200')
224     @v = rec('200','a')
225 dpavlin 13
226 dpavlin 536 =cut
227 dpavlin 373
228 dpavlin 536 sub rec {
229     if ($#_ == 0) {
230     return rec1(@_);
231     } elsif ($#_ == 1) {
232     return rec2(@_);
233 dpavlin 13 }
234     }
235    
236 dpavlin 536 =head2 regex
237 dpavlin 15
238 dpavlin 536 Apply regex to some or all values
239 dpavlin 15
240 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
241 dpavlin 15
242     =cut
243    
244 dpavlin 536 sub regex {
245     my $r = shift;
246     my @out;
247     #warn "r: $r\n",Dumper(\@_);
248     foreach my $t (@_) {
249     next unless ($t);
250     eval "\$t =~ $r";
251     push @out, $t if ($t && $t ne '');
252 dpavlin 368 }
253 dpavlin 536 return @out;
254 dpavlin 15 }
255    
256 dpavlin 536 =head2 prefix
257 dpavlin 15
258 dpavlin 536 Prefix all values with a string
259 dpavlin 15
260 dpavlin 536 @v = prefix( 'my_', @v );
261 dpavlin 15
262     =cut
263    
264 dpavlin 536 sub prefix {
265     my $p = shift or die "prefix needs string as first argument";
266     return map { $p . $_ } grep { defined($_) } @_;
267     }
268 dpavlin 15
269 dpavlin 536 =head2 suffix
270 dpavlin 15
271 dpavlin 536 suffix all values with a string
272 dpavlin 15
273 dpavlin 536 @v = suffix( '_my', @v );
274 dpavlin 15
275 dpavlin 536 =cut
276 dpavlin 15
277 dpavlin 536 sub suffix {
278     my $s = shift or die "suffix needs string as first argument";
279     return map { $_ . $s } grep { defined($_) } @_;
280 dpavlin 15 }
281    
282 dpavlin 536 =head2 surround
283 dpavlin 13
284 dpavlin 536 surround all values with a two strings
285 dpavlin 13
286 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
287 dpavlin 13
288     =cut
289    
290 dpavlin 536 sub surround {
291     my $p = shift or die "surround need prefix as first argument";
292     my $s = shift or die "surround needs suffix as second argument";
293     return map { $p . $_ . $s } grep { defined($_) } @_;
294 dpavlin 13 }
295    
296 dpavlin 536 =head2 first
297 dpavlin 13
298 dpavlin 536 Return first element
299 dpavlin 15
300 dpavlin 536 $v = first( @v );
301 dpavlin 13
302     =cut
303    
304 dpavlin 536 sub first {
305     my $r = shift;
306     return $r;
307 dpavlin 13 }
308    
309 dpavlin 536 =head2 lookup
310 dpavlin 13
311 dpavlin 536 Consult lookup hashes for some value
312 dpavlin 13
313 dpavlin 536 @v = lookup( $v );
314     @v = lookup( @v );
315 dpavlin 13
316     =cut
317    
318 dpavlin 536 sub lookup {
319     my $k = shift or return;
320     return unless (defined($lookup->{$k}));
321     if (ref($lookup->{$k}) eq 'ARRAY') {
322     return @{ $lookup->{$k} };
323     } else {
324     return $lookup->{$k};
325     }
326 dpavlin 13 }
327    
328 dpavlin 536 =head2 join_with
329 dpavlin 13
330 dpavlin 536 Joins walues with some delimiter
331 dpavlin 10
332 dpavlin 536 $v = join_with(", ", @v);
333 dpavlin 10
334 dpavlin 536 =cut
335 dpavlin 10
336 dpavlin 536 sub join_with {
337     my $d = shift;
338     return join($d, grep { defined($_) && $_ ne '' } @_);
339     }
340 dpavlin 10
341 dpavlin 536 # END
342     1;

  ViewVC Help
Powered by ViewVC 1.1.26