/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 538 - (hide annotations)
Thu Jun 29 15:29:19 2006 UTC (16 years, 7 months ago) by dpavlin
File size: 5692 byte(s)
 r722@llin:  dpavlin | 2006-06-26 21:29:56 +0200
 make private funtions with _

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3     @EXPORT = qw/
4 dpavlin 538 _set_rec _set_lookup
5     _get_ds _clean_ds
6    
7 dpavlin 536 tag search display
8     rec1 rec2 rec
9     regex prefix suffix surround
10     first lookup join_with
11     /;
12 dpavlin 10
13     use warnings;
14     use strict;
15 dpavlin 536
16     #use base qw/WebPAC::Common/;
17 dpavlin 13 use Data::Dumper;
18 dpavlin 10
19     =head1 NAME
20    
21 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
22 dpavlin 10
23     =head1 VERSION
24    
25 dpavlin 538 Version 0.05
26 dpavlin 10
27     =cut
28    
29 dpavlin 538 our $VERSION = '0.05';
30 dpavlin 10
31     =head1 SYNOPSIS
32    
33 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
34     from input records using perl functions which are specialized for set
35     processing.
36 dpavlin 10
37 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
38     means that you check it's validity before running WebPAC using
39     C<perl -c normalize.pl>.
40 dpavlin 15
41 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
42     types (on the left side of definition) are: C<tag>, C<display> and C<search>.
43 dpavlin 15
44 dpavlin 10 =head1 FUNCTIONS
45    
46 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
47     All other functions are available for use within normalisation rules.
48    
49 dpavlin 536 =head2 data_structure
50 dpavlin 10
51 dpavlin 536 Return data structure
52 dpavlin 13
53 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
54 dpavlin 536 lookup => $lookup->lookup_hash,
55     row => $row,
56     rules => $normalize_pl_config,
57 dpavlin 13 );
58    
59 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
60 dpavlin 15
61 dpavlin 538 Since this function isn't exported you have to call it with
62     C<WebPAC::Normalize::data_structure>.
63    
64 dpavlin 10 =cut
65    
66 dpavlin 536 sub data_structure {
67     my $arg = {@_};
68 dpavlin 13
69 dpavlin 536 die "need row argument" unless ($arg->{row});
70     die "need normalisation argument" unless ($arg->{rules});
71 dpavlin 31
72 dpavlin 536 no strict 'subs';
73 dpavlin 538 _set_lookup( $arg->{lookup} );
74     _set_rec( $arg->{row} );
75     _clean_ds();
76 dpavlin 536 eval "$arg->{rules}";
77     die "error evaling $arg->{rules}: $@\n" if ($@);
78 dpavlin 538 return _get_ds();
79 dpavlin 10 }
80    
81 dpavlin 538 =head2 _set_rec
82 dpavlin 13
83 dpavlin 536 Set current record hash
84 dpavlin 433
85 dpavlin 538 _set_rec( $rec );
86 dpavlin 433
87     =cut
88    
89 dpavlin 536 my $rec;
90 dpavlin 433
91 dpavlin 538 sub _set_rec {
92 dpavlin 536 $rec = shift or die "no record hash";
93 dpavlin 433 }
94    
95 dpavlin 538 =head2 _get_ds
96    
97     Return hash formatted as data structure
98    
99     my $ds = _get_ds();
100    
101     =cut
102    
103     my $out;
104    
105     sub _get_ds {
106     return $out;
107     }
108    
109     =head2 _clean_ds
110    
111     Clean data structure hash for next record
112    
113     _clean_ds();
114    
115     =cut
116    
117     sub _clean_ds {
118     $out = undef;
119     }
120    
121     =head2 _set_lookup
122    
123     Set current lookup hash
124    
125     _set_lookup( $lookup );
126    
127     =cut
128    
129     my $lookup;
130    
131     sub _set_lookup {
132     $lookup = shift;
133     }
134    
135 dpavlin 536 =head2 tag
136 dpavlin 433
137 dpavlin 536 Define new tag for I<search> and I<display>.
138 dpavlin 433
139 dpavlin 536 tag('Title', rec('200','a') );
140 dpavlin 13
141    
142     =cut
143    
144 dpavlin 536 sub tag {
145     my $name = shift or die "tag needs name as first argument";
146     my @o = grep { defined($_) && $_ ne '' } @_;
147     return unless (@o);
148     $out->{$name}->{tag} = $name;
149     $out->{$name}->{search} = \@o;
150     $out->{$name}->{display} = \@o;
151     }
152 dpavlin 13
153 dpavlin 536 =head2 display
154 dpavlin 13
155 dpavlin 536 Define tag just for I<display>
156 dpavlin 125
157 dpavlin 536 @v = display('Title', rec('200','a') );
158 dpavlin 125
159 dpavlin 536 =cut
160 dpavlin 125
161 dpavlin 536 sub display {
162     my $name = shift or die "display needs name as first argument";
163     my @o = grep { defined($_) && $_ ne '' } @_;
164     return unless (@o);
165     $out->{$name}->{tag} = $name;
166     $out->{$name}->{display} = \@o;
167     }
168 dpavlin 13
169 dpavlin 536 =head2 search
170 dpavlin 13
171 dpavlin 536 Prepare values just for I<search>
172 dpavlin 13
173 dpavlin 536 @v = search('Title', rec('200','a') );
174 dpavlin 433
175 dpavlin 536 =cut
176 dpavlin 13
177 dpavlin 536 sub search {
178     my $name = shift or die "search needs name as first argument";
179     my @o = grep { defined($_) && $_ ne '' } @_;
180     return unless (@o);
181     $out->{$name}->{tag} = $name;
182     $out->{$name}->{search} = \@o;
183 dpavlin 13 }
184    
185 dpavlin 536 =head2 rec1
186 dpavlin 371
187 dpavlin 536 Return all values in some field
188 dpavlin 371
189 dpavlin 536 @v = rec1('200')
190 dpavlin 15
191 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
192 dpavlin 15
193 dpavlin 536 =cut
194 dpavlin 15
195 dpavlin 536 sub rec1 {
196     my $f = shift;
197     return unless (defined($rec) && defined($rec->{$f}));
198     if (ref($rec->{$f}) eq 'ARRAY') {
199     return map {
200     if (ref($_) eq 'HASH') {
201     values %{$_};
202 dpavlin 31 } else {
203 dpavlin 536 $_;
204 dpavlin 31 }
205 dpavlin 536 } @{ $rec->{$f} };
206     } elsif( defined($rec->{$f}) ) {
207     return $rec->{$f};
208 dpavlin 15 }
209     }
210    
211 dpavlin 536 =head2 rec2
212 dpavlin 15
213 dpavlin 536 Return all values in specific field and subfield
214 dpavlin 13
215 dpavlin 536 @v = rec2('200','a')
216 dpavlin 13
217     =cut
218    
219 dpavlin 536 sub rec2 {
220     my $f = shift;
221     return unless (defined($rec && $rec->{$f}));
222     my $sf = shift;
223     return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
224     }
225 dpavlin 13
226 dpavlin 536 =head2 rec
227 dpavlin 13
228 dpavlin 536 syntaxtic sugar for
229 dpavlin 13
230 dpavlin 536 @v = rec('200')
231     @v = rec('200','a')
232 dpavlin 13
233 dpavlin 536 =cut
234 dpavlin 373
235 dpavlin 536 sub rec {
236     if ($#_ == 0) {
237     return rec1(@_);
238     } elsif ($#_ == 1) {
239     return rec2(@_);
240 dpavlin 13 }
241     }
242    
243 dpavlin 536 =head2 regex
244 dpavlin 15
245 dpavlin 536 Apply regex to some or all values
246 dpavlin 15
247 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
248 dpavlin 15
249     =cut
250    
251 dpavlin 536 sub regex {
252     my $r = shift;
253     my @out;
254     #warn "r: $r\n",Dumper(\@_);
255     foreach my $t (@_) {
256     next unless ($t);
257     eval "\$t =~ $r";
258     push @out, $t if ($t && $t ne '');
259 dpavlin 368 }
260 dpavlin 536 return @out;
261 dpavlin 15 }
262    
263 dpavlin 536 =head2 prefix
264 dpavlin 15
265 dpavlin 536 Prefix all values with a string
266 dpavlin 15
267 dpavlin 536 @v = prefix( 'my_', @v );
268 dpavlin 15
269     =cut
270    
271 dpavlin 536 sub prefix {
272     my $p = shift or die "prefix needs string as first argument";
273     return map { $p . $_ } grep { defined($_) } @_;
274     }
275 dpavlin 15
276 dpavlin 536 =head2 suffix
277 dpavlin 15
278 dpavlin 536 suffix all values with a string
279 dpavlin 15
280 dpavlin 536 @v = suffix( '_my', @v );
281 dpavlin 15
282 dpavlin 536 =cut
283 dpavlin 15
284 dpavlin 536 sub suffix {
285     my $s = shift or die "suffix needs string as first argument";
286     return map { $_ . $s } grep { defined($_) } @_;
287 dpavlin 15 }
288    
289 dpavlin 536 =head2 surround
290 dpavlin 13
291 dpavlin 536 surround all values with a two strings
292 dpavlin 13
293 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
294 dpavlin 13
295     =cut
296    
297 dpavlin 536 sub surround {
298     my $p = shift or die "surround need prefix as first argument";
299     my $s = shift or die "surround needs suffix as second argument";
300     return map { $p . $_ . $s } grep { defined($_) } @_;
301 dpavlin 13 }
302    
303 dpavlin 536 =head2 first
304 dpavlin 13
305 dpavlin 536 Return first element
306 dpavlin 15
307 dpavlin 536 $v = first( @v );
308 dpavlin 13
309     =cut
310    
311 dpavlin 536 sub first {
312     my $r = shift;
313     return $r;
314 dpavlin 13 }
315    
316 dpavlin 536 =head2 lookup
317 dpavlin 13
318 dpavlin 536 Consult lookup hashes for some value
319 dpavlin 13
320 dpavlin 536 @v = lookup( $v );
321     @v = lookup( @v );
322 dpavlin 13
323     =cut
324    
325 dpavlin 536 sub lookup {
326     my $k = shift or return;
327     return unless (defined($lookup->{$k}));
328     if (ref($lookup->{$k}) eq 'ARRAY') {
329     return @{ $lookup->{$k} };
330     } else {
331     return $lookup->{$k};
332     }
333 dpavlin 13 }
334    
335 dpavlin 536 =head2 join_with
336 dpavlin 13
337 dpavlin 536 Joins walues with some delimiter
338 dpavlin 10
339 dpavlin 536 $v = join_with(", ", @v);
340 dpavlin 10
341 dpavlin 536 =cut
342 dpavlin 10
343 dpavlin 536 sub join_with {
344     my $d = shift;
345     return join($d, grep { defined($_) && $_ ne '' } @_);
346     }
347 dpavlin 10
348 dpavlin 536 # END
349     1;

  ViewVC Help
Powered by ViewVC 1.1.26