/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 540 - (hide annotations)
Thu Jun 29 15:29:41 2006 UTC (16 years, 7 months ago) by dpavlin
File size: 6608 byte(s)
 r726@llin:  dpavlin | 2006-06-29 17:31:13 +0200
 add marc21 to normalize and create MARC file from those data [2.22]

1 dpavlin 10 package WebPAC::Normalize;
2 dpavlin 536 use Exporter 'import';
3     @EXPORT = qw/
4 dpavlin 538 _set_rec _set_lookup
5     _get_ds _clean_ds
6    
7 dpavlin 536 tag search display
8 dpavlin 540 marc21
9    
10 dpavlin 536 rec1 rec2 rec
11     regex prefix suffix surround
12     first lookup join_with
13     /;
14 dpavlin 10
15     use warnings;
16     use strict;
17 dpavlin 536
18     #use base qw/WebPAC::Common/;
19 dpavlin 13 use Data::Dumper;
20 dpavlin 10
21     =head1 NAME
22    
23 dpavlin 536 WebPAC::Normalize - describe normalisaton rules using sets
24 dpavlin 10
25     =head1 VERSION
26    
27 dpavlin 538 Version 0.05
28 dpavlin 10
29     =cut
30    
31 dpavlin 538 our $VERSION = '0.05';
32 dpavlin 10
33     =head1 SYNOPSIS
34    
35 dpavlin 536 This module uses C<conf/normalize/*.pl> files to perform normalisation
36     from input records using perl functions which are specialized for set
37     processing.
38 dpavlin 10
39 dpavlin 536 Sets are implemented as arrays, and normalisation file is valid perl, which
40     means that you check it's validity before running WebPAC using
41     C<perl -c normalize.pl>.
42 dpavlin 15
43 dpavlin 536 Normalisation can generate multiple output normalized data. For now, supported output
44 dpavlin 540 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
45     C<marc21>.
46 dpavlin 15
47 dpavlin 10 =head1 FUNCTIONS
48    
49 dpavlin 538 Functions which start with C<_> are private and used by WebPAC internally.
50     All other functions are available for use within normalisation rules.
51    
52 dpavlin 536 =head2 data_structure
53 dpavlin 10
54 dpavlin 536 Return data structure
55 dpavlin 13
56 dpavlin 538 my $ds = WebPAC::Normalize::data_structure(
57 dpavlin 536 lookup => $lookup->lookup_hash,
58     row => $row,
59     rules => $normalize_pl_config,
60 dpavlin 13 );
61    
62 dpavlin 540 Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
63     other are optional.
64    
65 dpavlin 536 This function will B<die> if normalizastion can't be evaled.
66 dpavlin 15
67 dpavlin 538 Since this function isn't exported you have to call it with
68     C<WebPAC::Normalize::data_structure>.
69    
70 dpavlin 10 =cut
71    
72 dpavlin 536 sub data_structure {
73     my $arg = {@_};
74 dpavlin 13
75 dpavlin 536 die "need row argument" unless ($arg->{row});
76     die "need normalisation argument" unless ($arg->{rules});
77 dpavlin 31
78 dpavlin 536 no strict 'subs';
79 dpavlin 538 _set_lookup( $arg->{lookup} );
80     _set_rec( $arg->{row} );
81     _clean_ds();
82 dpavlin 540
83 dpavlin 536 eval "$arg->{rules}";
84     die "error evaling $arg->{rules}: $@\n" if ($@);
85 dpavlin 540
86 dpavlin 538 return _get_ds();
87 dpavlin 10 }
88    
89 dpavlin 538 =head2 _set_rec
90 dpavlin 13
91 dpavlin 536 Set current record hash
92 dpavlin 433
93 dpavlin 538 _set_rec( $rec );
94 dpavlin 433
95     =cut
96    
97 dpavlin 536 my $rec;
98 dpavlin 433
99 dpavlin 538 sub _set_rec {
100 dpavlin 536 $rec = shift or die "no record hash";
101 dpavlin 433 }
102    
103 dpavlin 538 =head2 _get_ds
104    
105     Return hash formatted as data structure
106    
107     my $ds = _get_ds();
108    
109     =cut
110    
111     my $out;
112 dpavlin 540 my $marc21;
113 dpavlin 538
114     sub _get_ds {
115     return $out;
116     }
117    
118     =head2 _clean_ds
119    
120     Clean data structure hash for next record
121    
122     _clean_ds();
123    
124     =cut
125    
126     sub _clean_ds {
127     $out = undef;
128 dpavlin 540 $marc21 = undef;
129 dpavlin 538 }
130    
131     =head2 _set_lookup
132    
133     Set current lookup hash
134    
135     _set_lookup( $lookup );
136    
137     =cut
138    
139     my $lookup;
140    
141     sub _set_lookup {
142     $lookup = shift;
143     }
144    
145 dpavlin 540 =head2 _get_marc21_fields
146    
147     Get all fields defined by calls to C<marc21>
148    
149     $marc->add_fields( WebPAC::Normalize:_get_marc21_fields() );
150    
151     =cut
152    
153     sub _get_marc21_fields {
154     return @{$marc21};
155     }
156    
157     =head1 Functions to create C<data_structure>
158    
159     Those functions generally have to first in your normalization file.
160    
161 dpavlin 536 =head2 tag
162 dpavlin 433
163 dpavlin 536 Define new tag for I<search> and I<display>.
164 dpavlin 433
165 dpavlin 536 tag('Title', rec('200','a') );
166 dpavlin 13
167    
168     =cut
169    
170 dpavlin 536 sub tag {
171     my $name = shift or die "tag needs name as first argument";
172     my @o = grep { defined($_) && $_ ne '' } @_;
173     return unless (@o);
174     $out->{$name}->{tag} = $name;
175     $out->{$name}->{search} = \@o;
176     $out->{$name}->{display} = \@o;
177     }
178 dpavlin 13
179 dpavlin 536 =head2 display
180 dpavlin 13
181 dpavlin 536 Define tag just for I<display>
182 dpavlin 125
183 dpavlin 536 @v = display('Title', rec('200','a') );
184 dpavlin 125
185 dpavlin 536 =cut
186 dpavlin 125
187 dpavlin 536 sub display {
188     my $name = shift or die "display needs name as first argument";
189     my @o = grep { defined($_) && $_ ne '' } @_;
190     return unless (@o);
191     $out->{$name}->{tag} = $name;
192     $out->{$name}->{display} = \@o;
193     }
194 dpavlin 13
195 dpavlin 536 =head2 search
196 dpavlin 13
197 dpavlin 536 Prepare values just for I<search>
198 dpavlin 13
199 dpavlin 536 @v = search('Title', rec('200','a') );
200 dpavlin 433
201 dpavlin 536 =cut
202 dpavlin 13
203 dpavlin 536 sub search {
204     my $name = shift or die "search needs name as first argument";
205     my @o = grep { defined($_) && $_ ne '' } @_;
206     return unless (@o);
207     $out->{$name}->{tag} = $name;
208     $out->{$name}->{search} = \@o;
209 dpavlin 13 }
210    
211 dpavlin 540 =head2 marc21
212    
213     Save value for MARC field
214    
215     marc21('900','a', rec('200','a') );
216    
217     =cut
218    
219     sub marc21 {
220     my $f = shift or die "marc21 needs field";
221     die "marc21 field must be numer" unless ($f =~ /^\d+$/);
222    
223     my $sf = shift or die "marc21 needs subfield";
224    
225     foreach my $v (@_) {
226     push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ];
227     }
228     }
229    
230     =head1 Functions to extract data from input
231    
232     This function should be used inside functions to create C<data_structure> described
233     above.
234    
235 dpavlin 536 =head2 rec1
236 dpavlin 371
237 dpavlin 536 Return all values in some field
238 dpavlin 371
239 dpavlin 536 @v = rec1('200')
240 dpavlin 15
241 dpavlin 536 TODO: order of values is probably same as in source data, need to investigate that
242 dpavlin 15
243 dpavlin 536 =cut
244 dpavlin 15
245 dpavlin 536 sub rec1 {
246     my $f = shift;
247     return unless (defined($rec) && defined($rec->{$f}));
248     if (ref($rec->{$f}) eq 'ARRAY') {
249     return map {
250     if (ref($_) eq 'HASH') {
251     values %{$_};
252 dpavlin 31 } else {
253 dpavlin 536 $_;
254 dpavlin 31 }
255 dpavlin 536 } @{ $rec->{$f} };
256     } elsif( defined($rec->{$f}) ) {
257     return $rec->{$f};
258 dpavlin 15 }
259     }
260    
261 dpavlin 536 =head2 rec2
262 dpavlin 15
263 dpavlin 536 Return all values in specific field and subfield
264 dpavlin 13
265 dpavlin 536 @v = rec2('200','a')
266 dpavlin 13
267     =cut
268    
269 dpavlin 536 sub rec2 {
270     my $f = shift;
271     return unless (defined($rec && $rec->{$f}));
272     my $sf = shift;
273     return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
274     }
275 dpavlin 13
276 dpavlin 536 =head2 rec
277 dpavlin 13
278 dpavlin 536 syntaxtic sugar for
279 dpavlin 13
280 dpavlin 536 @v = rec('200')
281     @v = rec('200','a')
282 dpavlin 13
283 dpavlin 536 =cut
284 dpavlin 373
285 dpavlin 536 sub rec {
286     if ($#_ == 0) {
287     return rec1(@_);
288     } elsif ($#_ == 1) {
289     return rec2(@_);
290 dpavlin 13 }
291     }
292    
293 dpavlin 536 =head2 regex
294 dpavlin 15
295 dpavlin 536 Apply regex to some or all values
296 dpavlin 15
297 dpavlin 536 @v = regex( 's/foo/bar/g', @v );
298 dpavlin 15
299     =cut
300    
301 dpavlin 536 sub regex {
302     my $r = shift;
303     my @out;
304     #warn "r: $r\n",Dumper(\@_);
305     foreach my $t (@_) {
306     next unless ($t);
307     eval "\$t =~ $r";
308     push @out, $t if ($t && $t ne '');
309 dpavlin 368 }
310 dpavlin 536 return @out;
311 dpavlin 15 }
312    
313 dpavlin 536 =head2 prefix
314 dpavlin 15
315 dpavlin 536 Prefix all values with a string
316 dpavlin 15
317 dpavlin 536 @v = prefix( 'my_', @v );
318 dpavlin 15
319     =cut
320    
321 dpavlin 536 sub prefix {
322     my $p = shift or die "prefix needs string as first argument";
323     return map { $p . $_ } grep { defined($_) } @_;
324     }
325 dpavlin 15
326 dpavlin 536 =head2 suffix
327 dpavlin 15
328 dpavlin 536 suffix all values with a string
329 dpavlin 15
330 dpavlin 536 @v = suffix( '_my', @v );
331 dpavlin 15
332 dpavlin 536 =cut
333 dpavlin 15
334 dpavlin 536 sub suffix {
335     my $s = shift or die "suffix needs string as first argument";
336     return map { $_ . $s } grep { defined($_) } @_;
337 dpavlin 15 }
338    
339 dpavlin 536 =head2 surround
340 dpavlin 13
341 dpavlin 536 surround all values with a two strings
342 dpavlin 13
343 dpavlin 536 @v = surround( 'prefix_', '_suffix', @v );
344 dpavlin 13
345     =cut
346    
347 dpavlin 536 sub surround {
348     my $p = shift or die "surround need prefix as first argument";
349     my $s = shift or die "surround needs suffix as second argument";
350     return map { $p . $_ . $s } grep { defined($_) } @_;
351 dpavlin 13 }
352    
353 dpavlin 536 =head2 first
354 dpavlin 13
355 dpavlin 536 Return first element
356 dpavlin 15
357 dpavlin 536 $v = first( @v );
358 dpavlin 13
359     =cut
360    
361 dpavlin 536 sub first {
362     my $r = shift;
363     return $r;
364 dpavlin 13 }
365    
366 dpavlin 536 =head2 lookup
367 dpavlin 13
368 dpavlin 536 Consult lookup hashes for some value
369 dpavlin 13
370 dpavlin 536 @v = lookup( $v );
371     @v = lookup( @v );
372 dpavlin 13
373     =cut
374    
375 dpavlin 536 sub lookup {
376     my $k = shift or return;
377     return unless (defined($lookup->{$k}));
378     if (ref($lookup->{$k}) eq 'ARRAY') {
379     return @{ $lookup->{$k} };
380     } else {
381     return $lookup->{$k};
382     }
383 dpavlin 13 }
384    
385 dpavlin 536 =head2 join_with
386 dpavlin 13
387 dpavlin 536 Joins walues with some delimiter
388 dpavlin 10
389 dpavlin 536 $v = join_with(", ", @v);
390 dpavlin 10
391 dpavlin 536 =cut
392 dpavlin 10
393 dpavlin 536 sub join_with {
394     my $d = shift;
395     return join($d, grep { defined($_) && $_ ne '' } @_);
396     }
397 dpavlin 10
398 dpavlin 536 # END
399     1;

  ViewVC Help
Powered by ViewVC 1.1.26