/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 540 - (show annotations)
Thu Jun 29 15:29:41 2006 UTC (17 years, 9 months ago) by dpavlin
File size: 6608 byte(s)
 r726@llin:  dpavlin | 2006-06-29 17:31:13 +0200
 add marc21 to normalize and create MARC file from those data [2.22]

1 package WebPAC::Normalize;
2 use Exporter 'import';
3 @EXPORT = qw/
4 _set_rec _set_lookup
5 _get_ds _clean_ds
6
7 tag search display
8 marc21
9
10 rec1 rec2 rec
11 regex prefix suffix surround
12 first lookup join_with
13 /;
14
15 use warnings;
16 use strict;
17
18 #use base qw/WebPAC::Common/;
19 use Data::Dumper;
20
21 =head1 NAME
22
23 WebPAC::Normalize - describe normalisaton rules using sets
24
25 =head1 VERSION
26
27 Version 0.05
28
29 =cut
30
31 our $VERSION = '0.05';
32
33 =head1 SYNOPSIS
34
35 This module uses C<conf/normalize/*.pl> files to perform normalisation
36 from input records using perl functions which are specialized for set
37 processing.
38
39 Sets are implemented as arrays, and normalisation file is valid perl, which
40 means that you check it's validity before running WebPAC using
41 C<perl -c normalize.pl>.
42
43 Normalisation can generate multiple output normalized data. For now, supported output
44 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
45 C<marc21>.
46
47 =head1 FUNCTIONS
48
49 Functions which start with C<_> are private and used by WebPAC internally.
50 All other functions are available for use within normalisation rules.
51
52 =head2 data_structure
53
54 Return data structure
55
56 my $ds = WebPAC::Normalize::data_structure(
57 lookup => $lookup->lookup_hash,
58 row => $row,
59 rules => $normalize_pl_config,
60 );
61
62 Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
63 other are optional.
64
65 This function will B<die> if normalizastion can't be evaled.
66
67 Since this function isn't exported you have to call it with
68 C<WebPAC::Normalize::data_structure>.
69
70 =cut
71
72 sub data_structure {
73 my $arg = {@_};
74
75 die "need row argument" unless ($arg->{row});
76 die "need normalisation argument" unless ($arg->{rules});
77
78 no strict 'subs';
79 _set_lookup( $arg->{lookup} );
80 _set_rec( $arg->{row} );
81 _clean_ds();
82
83 eval "$arg->{rules}";
84 die "error evaling $arg->{rules}: $@\n" if ($@);
85
86 return _get_ds();
87 }
88
89 =head2 _set_rec
90
91 Set current record hash
92
93 _set_rec( $rec );
94
95 =cut
96
97 my $rec;
98
99 sub _set_rec {
100 $rec = shift or die "no record hash";
101 }
102
103 =head2 _get_ds
104
105 Return hash formatted as data structure
106
107 my $ds = _get_ds();
108
109 =cut
110
111 my $out;
112 my $marc21;
113
114 sub _get_ds {
115 return $out;
116 }
117
118 =head2 _clean_ds
119
120 Clean data structure hash for next record
121
122 _clean_ds();
123
124 =cut
125
126 sub _clean_ds {
127 $out = undef;
128 $marc21 = undef;
129 }
130
131 =head2 _set_lookup
132
133 Set current lookup hash
134
135 _set_lookup( $lookup );
136
137 =cut
138
139 my $lookup;
140
141 sub _set_lookup {
142 $lookup = shift;
143 }
144
145 =head2 _get_marc21_fields
146
147 Get all fields defined by calls to C<marc21>
148
149 $marc->add_fields( WebPAC::Normalize:_get_marc21_fields() );
150
151 =cut
152
153 sub _get_marc21_fields {
154 return @{$marc21};
155 }
156
157 =head1 Functions to create C<data_structure>
158
159 Those functions generally have to first in your normalization file.
160
161 =head2 tag
162
163 Define new tag for I<search> and I<display>.
164
165 tag('Title', rec('200','a') );
166
167
168 =cut
169
170 sub tag {
171 my $name = shift or die "tag needs name as first argument";
172 my @o = grep { defined($_) && $_ ne '' } @_;
173 return unless (@o);
174 $out->{$name}->{tag} = $name;
175 $out->{$name}->{search} = \@o;
176 $out->{$name}->{display} = \@o;
177 }
178
179 =head2 display
180
181 Define tag just for I<display>
182
183 @v = display('Title', rec('200','a') );
184
185 =cut
186
187 sub display {
188 my $name = shift or die "display needs name as first argument";
189 my @o = grep { defined($_) && $_ ne '' } @_;
190 return unless (@o);
191 $out->{$name}->{tag} = $name;
192 $out->{$name}->{display} = \@o;
193 }
194
195 =head2 search
196
197 Prepare values just for I<search>
198
199 @v = search('Title', rec('200','a') );
200
201 =cut
202
203 sub search {
204 my $name = shift or die "search needs name as first argument";
205 my @o = grep { defined($_) && $_ ne '' } @_;
206 return unless (@o);
207 $out->{$name}->{tag} = $name;
208 $out->{$name}->{search} = \@o;
209 }
210
211 =head2 marc21
212
213 Save value for MARC field
214
215 marc21('900','a', rec('200','a') );
216
217 =cut
218
219 sub marc21 {
220 my $f = shift or die "marc21 needs field";
221 die "marc21 field must be numer" unless ($f =~ /^\d+$/);
222
223 my $sf = shift or die "marc21 needs subfield";
224
225 foreach my $v (@_) {
226 push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ];
227 }
228 }
229
230 =head1 Functions to extract data from input
231
232 This function should be used inside functions to create C<data_structure> described
233 above.
234
235 =head2 rec1
236
237 Return all values in some field
238
239 @v = rec1('200')
240
241 TODO: order of values is probably same as in source data, need to investigate that
242
243 =cut
244
245 sub rec1 {
246 my $f = shift;
247 return unless (defined($rec) && defined($rec->{$f}));
248 if (ref($rec->{$f}) eq 'ARRAY') {
249 return map {
250 if (ref($_) eq 'HASH') {
251 values %{$_};
252 } else {
253 $_;
254 }
255 } @{ $rec->{$f} };
256 } elsif( defined($rec->{$f}) ) {
257 return $rec->{$f};
258 }
259 }
260
261 =head2 rec2
262
263 Return all values in specific field and subfield
264
265 @v = rec2('200','a')
266
267 =cut
268
269 sub rec2 {
270 my $f = shift;
271 return unless (defined($rec && $rec->{$f}));
272 my $sf = shift;
273 return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
274 }
275
276 =head2 rec
277
278 syntaxtic sugar for
279
280 @v = rec('200')
281 @v = rec('200','a')
282
283 =cut
284
285 sub rec {
286 if ($#_ == 0) {
287 return rec1(@_);
288 } elsif ($#_ == 1) {
289 return rec2(@_);
290 }
291 }
292
293 =head2 regex
294
295 Apply regex to some or all values
296
297 @v = regex( 's/foo/bar/g', @v );
298
299 =cut
300
301 sub regex {
302 my $r = shift;
303 my @out;
304 #warn "r: $r\n",Dumper(\@_);
305 foreach my $t (@_) {
306 next unless ($t);
307 eval "\$t =~ $r";
308 push @out, $t if ($t && $t ne '');
309 }
310 return @out;
311 }
312
313 =head2 prefix
314
315 Prefix all values with a string
316
317 @v = prefix( 'my_', @v );
318
319 =cut
320
321 sub prefix {
322 my $p = shift or die "prefix needs string as first argument";
323 return map { $p . $_ } grep { defined($_) } @_;
324 }
325
326 =head2 suffix
327
328 suffix all values with a string
329
330 @v = suffix( '_my', @v );
331
332 =cut
333
334 sub suffix {
335 my $s = shift or die "suffix needs string as first argument";
336 return map { $_ . $s } grep { defined($_) } @_;
337 }
338
339 =head2 surround
340
341 surround all values with a two strings
342
343 @v = surround( 'prefix_', '_suffix', @v );
344
345 =cut
346
347 sub surround {
348 my $p = shift or die "surround need prefix as first argument";
349 my $s = shift or die "surround needs suffix as second argument";
350 return map { $p . $_ . $s } grep { defined($_) } @_;
351 }
352
353 =head2 first
354
355 Return first element
356
357 $v = first( @v );
358
359 =cut
360
361 sub first {
362 my $r = shift;
363 return $r;
364 }
365
366 =head2 lookup
367
368 Consult lookup hashes for some value
369
370 @v = lookup( $v );
371 @v = lookup( @v );
372
373 =cut
374
375 sub lookup {
376 my $k = shift or return;
377 return unless (defined($lookup->{$k}));
378 if (ref($lookup->{$k}) eq 'ARRAY') {
379 return @{ $lookup->{$k} };
380 } else {
381 return $lookup->{$k};
382 }
383 }
384
385 =head2 join_with
386
387 Joins walues with some delimiter
388
389 $v = join_with(", ", @v);
390
391 =cut
392
393 sub join_with {
394 my $d = shift;
395 return join($d, grep { defined($_) && $_ ne '' } @_);
396 }
397
398 # END
399 1;

  ViewVC Help
Powered by ViewVC 1.1.26