/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 536 - (show annotations)
Mon Jun 26 16:39:51 2006 UTC (16 years, 7 months ago) by dpavlin
File size: 5409 byte(s)
 r719@llin:  dpavlin | 2006-06-26 18:40:57 +0200
 big refacture: depriciate and remove all normalisation formats except .pl sets (but
 old code is still available in WebPAC::Lookup::Normalize because lookups use it) [2.20]

1 package WebPAC::Normalize;
2 use Exporter 'import';
3 @EXPORT = qw/
4 set_rec set_lookup
5 get_ds clean_ds
6 tag search display
7 rec1 rec2 rec
8 regex prefix suffix surround
9 first lookup join_with
10 /;
11
12 use warnings;
13 use strict;
14
15 #use base qw/WebPAC::Common/;
16 use Data::Dumper;
17
18 =head1 NAME
19
20 WebPAC::Normalize - describe normalisaton rules using sets
21
22 =head1 VERSION
23
24 Version 0.04
25
26 =cut
27
28 our $VERSION = '0.04';
29
30 =head1 SYNOPSIS
31
32 This module uses C<conf/normalize/*.pl> files to perform normalisation
33 from input records using perl functions which are specialized for set
34 processing.
35
36 Sets are implemented as arrays, and normalisation file is valid perl, which
37 means that you check it's validity before running WebPAC using
38 C<perl -c normalize.pl>.
39
40 Normalisation can generate multiple output normalized data. For now, supported output
41 types (on the left side of definition) are: C<tag>, C<display> and C<search>.
42
43 =head1 FUNCTIONS
44
45 =head2 data_structure
46
47 Return data structure
48
49 my $ds = WebPAC::Normalize(
50 lookup => $lookup->lookup_hash,
51 row => $row,
52 rules => $normalize_pl_config,
53 );
54
55 This function will B<die> if normalizastion can't be evaled.
56
57 =cut
58
59 sub data_structure {
60 my $arg = {@_};
61
62 die "need row argument" unless ($arg->{row});
63 die "need normalisation argument" unless ($arg->{rules});
64
65 no strict 'subs';
66 set_lookup( $arg->{lookup} );
67 set_rec( $arg->{row} );
68 clean_ds();
69 eval "$arg->{rules}";
70 die "error evaling $arg->{rules}: $@\n" if ($@);
71 return get_ds();
72 }
73
74 =head2 set_rec
75
76 Set current record hash
77
78 set_rec( $rec );
79
80 =cut
81
82 my $rec;
83
84 sub set_rec {
85 $rec = shift or die "no record hash";
86 }
87
88 =head2 tag
89
90 Define new tag for I<search> and I<display>.
91
92 tag('Title', rec('200','a') );
93
94
95 =cut
96
97 my $out;
98
99 sub tag {
100 my $name = shift or die "tag needs name as first argument";
101 my @o = grep { defined($_) && $_ ne '' } @_;
102 return unless (@o);
103 $out->{$name}->{tag} = $name;
104 $out->{$name}->{search} = \@o;
105 $out->{$name}->{display} = \@o;
106 }
107
108 =head2 display
109
110 Define tag just for I<display>
111
112 @v = display('Title', rec('200','a') );
113
114 =cut
115
116 sub display {
117 my $name = shift or die "display needs name as first argument";
118 my @o = grep { defined($_) && $_ ne '' } @_;
119 return unless (@o);
120 $out->{$name}->{tag} = $name;
121 $out->{$name}->{display} = \@o;
122 }
123
124 =head2 search
125
126 Prepare values just for I<search>
127
128 @v = search('Title', rec('200','a') );
129
130 =cut
131
132 sub search {
133 my $name = shift or die "search needs name as first argument";
134 my @o = grep { defined($_) && $_ ne '' } @_;
135 return unless (@o);
136 $out->{$name}->{tag} = $name;
137 $out->{$name}->{search} = \@o;
138 }
139
140 =head2 get_ds
141
142 Return hash formatted as data structure
143
144 my $ds = get_ds();
145
146 =cut
147
148 sub get_ds {
149 return $out;
150 }
151
152 =head2 clean_ds
153
154 Clean data structure hash for next record
155
156 clean_ds();
157
158 =cut
159
160 sub clean_ds {
161 $out = undef;
162 }
163
164 =head2 set_lookup
165
166 Set current lookup hash
167
168 set_lookup( $lookup );
169
170 =cut
171
172 my $lookup;
173
174 sub set_lookup {
175 $lookup = shift;
176 }
177
178 =head2 rec1
179
180 Return all values in some field
181
182 @v = rec1('200')
183
184 TODO: order of values is probably same as in source data, need to investigate that
185
186 =cut
187
188 sub rec1 {
189 my $f = shift;
190 return unless (defined($rec) && defined($rec->{$f}));
191 if (ref($rec->{$f}) eq 'ARRAY') {
192 return map {
193 if (ref($_) eq 'HASH') {
194 values %{$_};
195 } else {
196 $_;
197 }
198 } @{ $rec->{$f} };
199 } elsif( defined($rec->{$f}) ) {
200 return $rec->{$f};
201 }
202 }
203
204 =head2 rec2
205
206 Return all values in specific field and subfield
207
208 @v = rec2('200','a')
209
210 =cut
211
212 sub rec2 {
213 my $f = shift;
214 return unless (defined($rec && $rec->{$f}));
215 my $sf = shift;
216 return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
217 }
218
219 =head2 rec
220
221 syntaxtic sugar for
222
223 @v = rec('200')
224 @v = rec('200','a')
225
226 =cut
227
228 sub rec {
229 if ($#_ == 0) {
230 return rec1(@_);
231 } elsif ($#_ == 1) {
232 return rec2(@_);
233 }
234 }
235
236 =head2 regex
237
238 Apply regex to some or all values
239
240 @v = regex( 's/foo/bar/g', @v );
241
242 =cut
243
244 sub regex {
245 my $r = shift;
246 my @out;
247 #warn "r: $r\n",Dumper(\@_);
248 foreach my $t (@_) {
249 next unless ($t);
250 eval "\$t =~ $r";
251 push @out, $t if ($t && $t ne '');
252 }
253 return @out;
254 }
255
256 =head2 prefix
257
258 Prefix all values with a string
259
260 @v = prefix( 'my_', @v );
261
262 =cut
263
264 sub prefix {
265 my $p = shift or die "prefix needs string as first argument";
266 return map { $p . $_ } grep { defined($_) } @_;
267 }
268
269 =head2 suffix
270
271 suffix all values with a string
272
273 @v = suffix( '_my', @v );
274
275 =cut
276
277 sub suffix {
278 my $s = shift or die "suffix needs string as first argument";
279 return map { $_ . $s } grep { defined($_) } @_;
280 }
281
282 =head2 surround
283
284 surround all values with a two strings
285
286 @v = surround( 'prefix_', '_suffix', @v );
287
288 =cut
289
290 sub surround {
291 my $p = shift or die "surround need prefix as first argument";
292 my $s = shift or die "surround needs suffix as second argument";
293 return map { $p . $_ . $s } grep { defined($_) } @_;
294 }
295
296 =head2 first
297
298 Return first element
299
300 $v = first( @v );
301
302 =cut
303
304 sub first {
305 my $r = shift;
306 return $r;
307 }
308
309 =head2 lookup
310
311 Consult lookup hashes for some value
312
313 @v = lookup( $v );
314 @v = lookup( @v );
315
316 =cut
317
318 sub lookup {
319 my $k = shift or return;
320 return unless (defined($lookup->{$k}));
321 if (ref($lookup->{$k}) eq 'ARRAY') {
322 return @{ $lookup->{$k} };
323 } else {
324 return $lookup->{$k};
325 }
326 }
327
328 =head2 join_with
329
330 Joins walues with some delimiter
331
332 $v = join_with(", ", @v);
333
334 =cut
335
336 sub join_with {
337 my $d = shift;
338 return join($d, grep { defined($_) && $_ ne '' } @_);
339 }
340
341 # END
342 1;

  ViewVC Help
Powered by ViewVC 1.1.26