/[webpac2]/trunk/lib/WebPAC/Normalize/Set.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize/Set.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 501 - (hide annotations)
Sun May 14 22:08:51 2006 UTC (17 years, 11 months ago) by dpavlin
File size: 5424 byte(s)
added suffix and surround, regex now skips empty and undef values

1 dpavlin 460 package WebPAC::Normalize::Set;
2     use Exporter 'import';
3     @EXPORT = qw/
4     set_rec set_lookup
5 dpavlin 461 get_ds clean_ds
6 dpavlin 460 tag search display
7 dpavlin 463 rec1 rec2 rec
8 dpavlin 501 regex prefix suffix surround
9 dpavlin 463 first lookup join_with
10 dpavlin 460 /;
11    
12     use warnings;
13     use strict;
14    
15     #use base qw/WebPAC::Common/;
16     use Data::Dumper;
17    
18     =head1 NAME
19    
20     WebPAC::Normalize::Set - describe normalisaton rules using sets
21    
22     =head1 VERSION
23    
24 dpavlin 490 Version 0.04
25 dpavlin 460
26     =cut
27    
28 dpavlin 490 our $VERSION = '0.04';
29 dpavlin 460
30     =head1 SYNOPSIS
31    
32     This module uses C<conf/normalize/*.pl> files to perform normalisation
33 dpavlin 467 from input records using perl functions which are specialized for set
34     processing.
35 dpavlin 460
36 dpavlin 467 Sets are implemented as arrays, and normalisation file is valid perl, which
37     means that you check it's validity before running WebPAC using
38     C<perl -c normalize.pl>.
39    
40     Normalisation can generate multiple output normalized data. For now, supported output
41     types (on the left side of definition) are: C<tag>, C<display> and C<search>.
42    
43 dpavlin 460 =head1 FUNCTIONS
44    
45 dpavlin 490 =head2 data_structure
46    
47     Return data structure
48    
49     my $ds = WebPAC::Normalize::Set(
50     lookup => $lookup->lookup_hash,
51     row => $row,
52     rules => $normalize_pl_config,
53     );
54    
55     This function will B<die> if normalizastion can't be evaled.
56    
57     =cut
58    
59     sub data_structure {
60     my $arg = {@_};
61    
62     die "need row argument" unless ($arg->{row});
63     die "need normalisation argument" unless ($arg->{rules});
64    
65     no strict 'subs';
66     set_lookup( $arg->{lookup} );
67     set_rec( $arg->{row} );
68     clean_ds();
69     eval "$arg->{rules}";
70     die "error evaling $arg->{rules}: $@\n" if ($@);
71     return get_ds();
72     }
73    
74 dpavlin 460 =head2 set_rec
75    
76     Set current record hash
77    
78     set_rec( $rec );
79    
80     =cut
81    
82     my $rec;
83    
84     sub set_rec {
85     $rec = shift or die "no record hash";
86     }
87    
88     =head2 tag
89    
90 dpavlin 467 Define new tag for I<search> and I<display>.
91 dpavlin 460
92     tag('Title', rec('200','a') );
93    
94    
95     =cut
96    
97     my $out;
98    
99     sub tag {
100 dpavlin 463 my $name = shift or die "tag needs name as first argument";
101 dpavlin 472 my @o = grep { defined($_) && $_ ne '' } @_;
102     return unless (@o);
103 dpavlin 460 $out->{$name}->{tag} = $name;
104 dpavlin 472 $out->{$name}->{search} = \@o;
105     $out->{$name}->{display} = \@o;
106 dpavlin 460 }
107    
108     =head2 display
109    
110     Define tag just for I<display>
111    
112     @v = display('Title', rec('200','a') );
113    
114     =cut
115    
116     sub display {
117 dpavlin 463 my $name = shift or die "display needs name as first argument";
118 dpavlin 472 my @o = grep { defined($_) && $_ ne '' } @_;
119     return unless (@o);
120 dpavlin 460 $out->{$name}->{tag} = $name;
121 dpavlin 472 $out->{$name}->{display} = \@o;
122 dpavlin 460 }
123    
124     =head2 search
125    
126     Prepare values just for I<search>
127    
128     @v = search('Title', rec('200','a') );
129    
130     =cut
131    
132     sub search {
133 dpavlin 463 my $name = shift or die "search needs name as first argument";
134 dpavlin 472 my @o = grep { defined($_) && $_ ne '' } @_;
135     return unless (@o);
136 dpavlin 460 $out->{$name}->{tag} = $name;
137 dpavlin 472 $out->{$name}->{search} = \@o;
138 dpavlin 460 }
139    
140     =head2 get_ds
141    
142     Return hash formatted as data structure
143    
144     my $ds = get_ds();
145    
146     =cut
147    
148     sub get_ds {
149     return $out;
150     }
151    
152 dpavlin 461 =head2 clean_ds
153    
154     Clean data structure hash for next record
155    
156     clean_ds();
157    
158     =cut
159    
160     sub clean_ds {
161     $out = undef;
162     }
163    
164     =head2 set_lookup
165    
166     Set current lookup hash
167    
168     set_lookup( $lookup );
169    
170     =cut
171    
172 dpavlin 460 my $lookup;
173    
174     sub set_lookup {
175 dpavlin 475 $lookup = shift;
176 dpavlin 460 }
177    
178     =head2 rec1
179    
180     Return all values in some field
181    
182     @v = rec1('200')
183    
184     TODO: order of values is probably same as in source data, need to investigate that
185    
186     =cut
187    
188     sub rec1 {
189     my $f = shift;
190 dpavlin 490 return unless (defined($rec) && defined($rec->{$f}));
191 dpavlin 460 if (ref($rec->{$f}) eq 'ARRAY') {
192 dpavlin 490 return map {
193     if (ref($_) eq 'HASH') {
194     values %{$_};
195     } else {
196     $_;
197     }
198     } @{ $rec->{$f} };
199 dpavlin 460 } elsif( defined($rec->{$f}) ) {
200     return $rec->{$f};
201     }
202     }
203    
204     =head2 rec2
205    
206     Return all values in specific field and subfield
207    
208     @v = rec2('200','a')
209    
210     =cut
211    
212     sub rec2 {
213     my $f = shift;
214 dpavlin 461 return unless (defined($rec && $rec->{$f}));
215 dpavlin 460 my $sf = shift;
216 dpavlin 490 return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
217 dpavlin 460 }
218    
219     =head2 rec
220    
221     syntaxtic sugar for
222    
223     @v = rec('200')
224     @v = rec('200','a')
225    
226     =cut
227    
228     sub rec {
229     if ($#_ == 0) {
230     return rec1(@_);
231     } elsif ($#_ == 1) {
232     return rec2(@_);
233     }
234     }
235    
236     =head2 regex
237    
238     Apply regex to some or all values
239    
240     @v = regex( 's/foo/bar/g', @v );
241    
242     =cut
243    
244     sub regex {
245     my $r = shift;
246     my @out;
247 dpavlin 465 #warn "r: $r\n",Dumper(\@_);
248 dpavlin 460 foreach my $t (@_) {
249 dpavlin 501 next unless ($t);
250 dpavlin 460 eval "\$t =~ $r";
251 dpavlin 501 push @out, $t if ($t && $t ne '');
252 dpavlin 460 }
253     return @out;
254     }
255    
256 dpavlin 463 =head2 prefix
257    
258     Prefix all values with a string
259    
260     @v = prefix( 'my_', @v );
261    
262     =cut
263    
264     sub prefix {
265     my $p = shift or die "prefix needs string as first argument";
266 dpavlin 467 return map { $p . $_ } grep { defined($_) } @_;
267 dpavlin 463 }
268    
269 dpavlin 501 =head2 suffix
270    
271     suffix all values with a string
272    
273     @v = suffix( '_my', @v );
274    
275     =cut
276    
277     sub suffix {
278     my $s = shift or die "suffix needs string as first argument";
279     return map { $_ . $s } grep { defined($_) } @_;
280     }
281    
282     =head2 surround
283    
284     surround all values with a two strings
285    
286     @v = surround( 'prefix_', '_suffix', @v );
287    
288     =cut
289    
290     sub surround {
291     my $p = shift or die "surround need prefix as first argument";
292     my $s = shift or die "surround needs suffix as second argument";
293     return map { $p . $_ . $s } grep { defined($_) } @_;
294     }
295    
296 dpavlin 460 =head2 first
297    
298     Return first element
299    
300     $v = first( @v );
301    
302     =cut
303    
304     sub first {
305     my $r = shift;
306     return $r;
307     }
308    
309     =head2 lookup
310    
311     Consult lookup hashes for some value
312    
313     @v = lookup( $v );
314     @v = lookup( @v );
315    
316     =cut
317    
318     sub lookup {
319 dpavlin 465 my $k = shift or return;
320     return unless (defined($lookup->{$k}));
321 dpavlin 460 if (ref($lookup->{$k}) eq 'ARRAY') {
322     return @{ $lookup->{$k} };
323     } else {
324     return $lookup->{$k};
325     }
326     }
327    
328     =head2 join_with
329    
330     Joins walues with some delimiter
331    
332     $v = join_with(", ", @v);
333    
334     =cut
335    
336     sub join_with {
337     my $d = shift;
338 dpavlin 486 return join($d, grep { defined($_) && $_ ne '' } @_);
339 dpavlin 460 }
340    
341     # END
342     1;

  ViewVC Help
Powered by ViewVC 1.1.26