/[webpac2]/trunk/lib/WebPAC/Normalize/Set.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize/Set.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 490 - (hide annotations)
Sun May 14 12:35:20 2006 UTC (17 years, 11 months ago) by dpavlin
File size: 4846 byte(s)
 r637@llin:  dpavlin | 2006-05-14 14:38:22 +0200
 added data_structure (which does most of magic), support for semi-valid data structures

1 dpavlin 460 package WebPAC::Normalize::Set;
2     use Exporter 'import';
3     @EXPORT = qw/
4     set_rec set_lookup
5 dpavlin 461 get_ds clean_ds
6 dpavlin 460 tag search display
7 dpavlin 463 rec1 rec2 rec
8     regex prefix
9     first lookup join_with
10 dpavlin 460 /;
11    
12     use warnings;
13     use strict;
14    
15     #use base qw/WebPAC::Common/;
16     use Data::Dumper;
17    
18     =head1 NAME
19    
20     WebPAC::Normalize::Set - describe normalisaton rules using sets
21    
22     =head1 VERSION
23    
24 dpavlin 490 Version 0.04
25 dpavlin 460
26     =cut
27    
28 dpavlin 490 our $VERSION = '0.04';
29 dpavlin 460
30     =head1 SYNOPSIS
31    
32     This module uses C<conf/normalize/*.pl> files to perform normalisation
33 dpavlin 467 from input records using perl functions which are specialized for set
34     processing.
35 dpavlin 460
36 dpavlin 467 Sets are implemented as arrays, and normalisation file is valid perl, which
37     means that you check it's validity before running WebPAC using
38     C<perl -c normalize.pl>.
39    
40     Normalisation can generate multiple output normalized data. For now, supported output
41     types (on the left side of definition) are: C<tag>, C<display> and C<search>.
42    
43 dpavlin 460 =head1 FUNCTIONS
44    
45 dpavlin 490 =head2 data_structure
46    
47     Return data structure
48    
49     my $ds = WebPAC::Normalize::Set(
50     lookup => $lookup->lookup_hash,
51     row => $row,
52     rules => $normalize_pl_config,
53     );
54    
55     This function will B<die> if normalizastion can't be evaled.
56    
57     =cut
58    
59     sub data_structure {
60     my $arg = {@_};
61    
62     die "need row argument" unless ($arg->{row});
63     die "need normalisation argument" unless ($arg->{rules});
64    
65     no strict 'subs';
66     set_lookup( $arg->{lookup} );
67     set_rec( $arg->{row} );
68     clean_ds();
69     eval "$arg->{rules}";
70     die "error evaling $arg->{rules}: $@\n" if ($@);
71     return get_ds();
72     }
73    
74 dpavlin 460 =head2 set_rec
75    
76     Set current record hash
77    
78     set_rec( $rec );
79    
80     =cut
81    
82     my $rec;
83    
84     sub set_rec {
85     $rec = shift or die "no record hash";
86     }
87    
88     =head2 tag
89    
90 dpavlin 467 Define new tag for I<search> and I<display>.
91 dpavlin 460
92     tag('Title', rec('200','a') );
93    
94    
95     =cut
96    
97     my $out;
98    
99     sub tag {
100 dpavlin 463 my $name = shift or die "tag needs name as first argument";
101 dpavlin 472 my @o = grep { defined($_) && $_ ne '' } @_;
102     return unless (@o);
103 dpavlin 460 $out->{$name}->{tag} = $name;
104 dpavlin 472 $out->{$name}->{search} = \@o;
105     $out->{$name}->{display} = \@o;
106 dpavlin 460 }
107    
108     =head2 display
109    
110     Define tag just for I<display>
111    
112     @v = display('Title', rec('200','a') );
113    
114     =cut
115    
116     sub display {
117 dpavlin 463 my $name = shift or die "display needs name as first argument";
118 dpavlin 472 my @o = grep { defined($_) && $_ ne '' } @_;
119     return unless (@o);
120 dpavlin 460 $out->{$name}->{tag} = $name;
121 dpavlin 472 $out->{$name}->{display} = \@o;
122 dpavlin 460 }
123    
124     =head2 search
125    
126     Prepare values just for I<search>
127    
128     @v = search('Title', rec('200','a') );
129    
130     =cut
131    
132     sub search {
133 dpavlin 463 my $name = shift or die "search needs name as first argument";
134 dpavlin 472 my @o = grep { defined($_) && $_ ne '' } @_;
135     return unless (@o);
136 dpavlin 460 $out->{$name}->{tag} = $name;
137 dpavlin 472 $out->{$name}->{search} = \@o;
138 dpavlin 460 }
139    
140     =head2 get_ds
141    
142     Return hash formatted as data structure
143    
144     my $ds = get_ds();
145    
146     =cut
147    
148     sub get_ds {
149     return $out;
150     }
151    
152 dpavlin 461 =head2 clean_ds
153    
154     Clean data structure hash for next record
155    
156     clean_ds();
157    
158     =cut
159    
160     sub clean_ds {
161     $out = undef;
162     }
163    
164     =head2 set_lookup
165    
166     Set current lookup hash
167    
168     set_lookup( $lookup );
169    
170     =cut
171    
172 dpavlin 460 my $lookup;
173    
174     sub set_lookup {
175 dpavlin 475 $lookup = shift;
176 dpavlin 460 }
177    
178     =head2 rec1
179    
180     Return all values in some field
181    
182     @v = rec1('200')
183    
184     TODO: order of values is probably same as in source data, need to investigate that
185    
186     =cut
187    
188     sub rec1 {
189     my $f = shift;
190 dpavlin 490 return unless (defined($rec) && defined($rec->{$f}));
191 dpavlin 460 if (ref($rec->{$f}) eq 'ARRAY') {
192 dpavlin 490 return map {
193     if (ref($_) eq 'HASH') {
194     values %{$_};
195     } else {
196     $_;
197     }
198     } @{ $rec->{$f} };
199 dpavlin 460 } elsif( defined($rec->{$f}) ) {
200     return $rec->{$f};
201     }
202     }
203    
204     =head2 rec2
205    
206     Return all values in specific field and subfield
207    
208     @v = rec2('200','a')
209    
210     =cut
211    
212     sub rec2 {
213     my $f = shift;
214 dpavlin 461 return unless (defined($rec && $rec->{$f}));
215 dpavlin 460 my $sf = shift;
216 dpavlin 490 return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
217 dpavlin 460 }
218    
219     =head2 rec
220    
221     syntaxtic sugar for
222    
223     @v = rec('200')
224     @v = rec('200','a')
225    
226     =cut
227    
228     sub rec {
229     if ($#_ == 0) {
230     return rec1(@_);
231     } elsif ($#_ == 1) {
232     return rec2(@_);
233     }
234     }
235    
236     =head2 regex
237    
238     Apply regex to some or all values
239    
240     @v = regex( 's/foo/bar/g', @v );
241    
242     =cut
243    
244     sub regex {
245     my $r = shift;
246     my @out;
247 dpavlin 465 #warn "r: $r\n",Dumper(\@_);
248 dpavlin 460 foreach my $t (@_) {
249     eval "\$t =~ $r";
250     push @out, $t;
251     }
252     return @out;
253     }
254    
255 dpavlin 463 =head2 prefix
256    
257     Prefix all values with a string
258    
259     @v = prefix( 'my_', @v );
260    
261     =cut
262    
263     sub prefix {
264     my $p = shift or die "prefix needs string as first argument";
265 dpavlin 467 return map { $p . $_ } grep { defined($_) } @_;
266 dpavlin 463 }
267    
268 dpavlin 460 =head2 first
269    
270     Return first element
271    
272     $v = first( @v );
273    
274     =cut
275    
276     sub first {
277     my $r = shift;
278     return $r;
279     }
280    
281     =head2 lookup
282    
283     Consult lookup hashes for some value
284    
285     @v = lookup( $v );
286     @v = lookup( @v );
287    
288     =cut
289    
290     sub lookup {
291 dpavlin 465 my $k = shift or return;
292     return unless (defined($lookup->{$k}));
293 dpavlin 460 if (ref($lookup->{$k}) eq 'ARRAY') {
294     return @{ $lookup->{$k} };
295     } else {
296     return $lookup->{$k};
297     }
298     }
299    
300     =head2 join_with
301    
302     Joins walues with some delimiter
303    
304     $v = join_with(", ", @v);
305    
306     =cut
307    
308     sub join_with {
309     my $d = shift;
310 dpavlin 486 return join($d, grep { defined($_) && $_ ne '' } @_);
311 dpavlin 460 }
312    
313     # END
314     1;

  ViewVC Help
Powered by ViewVC 1.1.26