/[webpac2]/trunk/lib/WebPAC/Normalize.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Normalize.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 541 - (show annotations)
Thu Jun 29 21:18:50 2006 UTC (16 years, 7 months ago) by dpavlin
File size: 6895 byte(s)
 r730@llin:  dpavlin | 2006-06-29 21:33:48 +0200
 use MARC::Record 2.0 to support utf-8 encoding in MARC
 http://marcpm.sourceforge.net/

1 package WebPAC::Normalize;
2 use Exporter 'import';
3 @EXPORT = qw/
4 _set_rec _set_lookup
5 _get_ds _clean_ds
6
7 tag search display
8 marc21
9
10 rec1 rec2 rec
11 regex prefix suffix surround
12 first lookup join_with
13 /;
14
15 use warnings;
16 use strict;
17
18 #use base qw/WebPAC::Common/;
19 use Data::Dumper;
20 use Encode qw/from_to/;
21
22 =head1 NAME
23
24 WebPAC::Normalize - describe normalisaton rules using sets
25
26 =head1 VERSION
27
28 Version 0.06
29
30 =cut
31
32 our $VERSION = '0.06';
33
34 =head1 SYNOPSIS
35
36 This module uses C<conf/normalize/*.pl> files to perform normalisation
37 from input records using perl functions which are specialized for set
38 processing.
39
40 Sets are implemented as arrays, and normalisation file is valid perl, which
41 means that you check it's validity before running WebPAC using
42 C<perl -c normalize.pl>.
43
44 Normalisation can generate multiple output normalized data. For now, supported output
45 types (on the left side of definition) are: C<tag>, C<display>, C<search> and
46 C<marc21>.
47
48 =head1 FUNCTIONS
49
50 Functions which start with C<_> are private and used by WebPAC internally.
51 All other functions are available for use within normalisation rules.
52
53 =head2 data_structure
54
55 Return data structure
56
57 my $ds = WebPAC::Normalize::data_structure(
58 lookup => $lookup->lookup_hash,
59 row => $row,
60 rules => $normalize_pl_config,
61 marc_encoding => 'utf-8',
62 );
63
64 Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
65 other are optional.
66
67 This function will B<die> if normalizastion can't be evaled.
68
69 Since this function isn't exported you have to call it with
70 C<WebPAC::Normalize::data_structure>.
71
72 =cut
73
74 sub data_structure {
75 my $arg = {@_};
76
77 die "need row argument" unless ($arg->{row});
78 die "need normalisation argument" unless ($arg->{rules});
79
80 no strict 'subs';
81 _set_lookup( $arg->{lookup} );
82 _set_rec( $arg->{row} );
83 _clean_ds( %{ $arg } );
84 eval "$arg->{rules}";
85 die "error evaling $arg->{rules}: $@\n" if ($@);
86
87 return _get_ds();
88 }
89
90 =head2 _set_rec
91
92 Set current record hash
93
94 _set_rec( $rec );
95
96 =cut
97
98 my $rec;
99
100 sub _set_rec {
101 $rec = shift or die "no record hash";
102 }
103
104 =head2 _get_ds
105
106 Return hash formatted as data structure
107
108 my $ds = _get_ds();
109
110 =cut
111
112 my $out;
113 my $marc21;
114 my $marc_encoding;
115
116 sub _get_ds {
117 return $out;
118 }
119
120 =head2 _clean_ds
121
122 Clean data structure hash for next record
123
124 _clean_ds();
125
126 =cut
127
128 sub _clean_ds {
129 my $a = {@_};
130 $out = undef;
131 $marc21 = undef;
132 $marc_encoding = $a->{marc_encoding};
133 }
134
135 =head2 _set_lookup
136
137 Set current lookup hash
138
139 _set_lookup( $lookup );
140
141 =cut
142
143 my $lookup;
144
145 sub _set_lookup {
146 $lookup = shift;
147 }
148
149 =head2 _get_marc21_fields
150
151 Get all fields defined by calls to C<marc21>
152
153 $marc->add_fields( WebPAC::Normalize:_get_marc21_fields() );
154
155 =cut
156
157 sub _get_marc21_fields {
158 return @{$marc21};
159 }
160
161 =head1 Functions to create C<data_structure>
162
163 Those functions generally have to first in your normalization file.
164
165 =head2 tag
166
167 Define new tag for I<search> and I<display>.
168
169 tag('Title', rec('200','a') );
170
171
172 =cut
173
174 sub tag {
175 my $name = shift or die "tag needs name as first argument";
176 my @o = grep { defined($_) && $_ ne '' } @_;
177 return unless (@o);
178 $out->{$name}->{tag} = $name;
179 $out->{$name}->{search} = \@o;
180 $out->{$name}->{display} = \@o;
181 }
182
183 =head2 display
184
185 Define tag just for I<display>
186
187 @v = display('Title', rec('200','a') );
188
189 =cut
190
191 sub display {
192 my $name = shift or die "display needs name as first argument";
193 my @o = grep { defined($_) && $_ ne '' } @_;
194 return unless (@o);
195 $out->{$name}->{tag} = $name;
196 $out->{$name}->{display} = \@o;
197 }
198
199 =head2 search
200
201 Prepare values just for I<search>
202
203 @v = search('Title', rec('200','a') );
204
205 =cut
206
207 sub search {
208 my $name = shift or die "search needs name as first argument";
209 my @o = grep { defined($_) && $_ ne '' } @_;
210 return unless (@o);
211 $out->{$name}->{tag} = $name;
212 $out->{$name}->{search} = \@o;
213 }
214
215 =head2 marc21
216
217 Save value for MARC field
218
219 marc21('900','a', rec('200','a') );
220
221 =cut
222
223 sub marc21 {
224 my $f = shift or die "marc21 needs field";
225 die "marc21 field must be numer" unless ($f =~ /^\d+$/);
226
227 my $sf = shift or die "marc21 needs subfield";
228
229 foreach (@_) {
230 my $v = $_; # make var read-write for Encode
231 next unless (defined($v) && $v !~ /^\s+$/);
232 from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
233 push @{ $marc21 }, [ $f, ' ', ' ', $sf => $v ];
234 }
235 }
236
237 =head1 Functions to extract data from input
238
239 This function should be used inside functions to create C<data_structure> described
240 above.
241
242 =head2 rec1
243
244 Return all values in some field
245
246 @v = rec1('200')
247
248 TODO: order of values is probably same as in source data, need to investigate that
249
250 =cut
251
252 sub rec1 {
253 my $f = shift;
254 return unless (defined($rec) && defined($rec->{$f}));
255 if (ref($rec->{$f}) eq 'ARRAY') {
256 return map {
257 if (ref($_) eq 'HASH') {
258 values %{$_};
259 } else {
260 $_;
261 }
262 } @{ $rec->{$f} };
263 } elsif( defined($rec->{$f}) ) {
264 return $rec->{$f};
265 }
266 }
267
268 =head2 rec2
269
270 Return all values in specific field and subfield
271
272 @v = rec2('200','a')
273
274 =cut
275
276 sub rec2 {
277 my $f = shift;
278 return unless (defined($rec && $rec->{$f}));
279 my $sf = shift;
280 return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
281 }
282
283 =head2 rec
284
285 syntaxtic sugar for
286
287 @v = rec('200')
288 @v = rec('200','a')
289
290 =cut
291
292 sub rec {
293 if ($#_ == 0) {
294 return rec1(@_);
295 } elsif ($#_ == 1) {
296 return rec2(@_);
297 }
298 }
299
300 =head2 regex
301
302 Apply regex to some or all values
303
304 @v = regex( 's/foo/bar/g', @v );
305
306 =cut
307
308 sub regex {
309 my $r = shift;
310 my @out;
311 #warn "r: $r\n",Dumper(\@_);
312 foreach my $t (@_) {
313 next unless ($t);
314 eval "\$t =~ $r";
315 push @out, $t if ($t && $t ne '');
316 }
317 return @out;
318 }
319
320 =head2 prefix
321
322 Prefix all values with a string
323
324 @v = prefix( 'my_', @v );
325
326 =cut
327
328 sub prefix {
329 my $p = shift or die "prefix needs string as first argument";
330 return map { $p . $_ } grep { defined($_) } @_;
331 }
332
333 =head2 suffix
334
335 suffix all values with a string
336
337 @v = suffix( '_my', @v );
338
339 =cut
340
341 sub suffix {
342 my $s = shift or die "suffix needs string as first argument";
343 return map { $_ . $s } grep { defined($_) } @_;
344 }
345
346 =head2 surround
347
348 surround all values with a two strings
349
350 @v = surround( 'prefix_', '_suffix', @v );
351
352 =cut
353
354 sub surround {
355 my $p = shift or die "surround need prefix as first argument";
356 my $s = shift or die "surround needs suffix as second argument";
357 return map { $p . $_ . $s } grep { defined($_) } @_;
358 }
359
360 =head2 first
361
362 Return first element
363
364 $v = first( @v );
365
366 =cut
367
368 sub first {
369 my $r = shift;
370 return $r;
371 }
372
373 =head2 lookup
374
375 Consult lookup hashes for some value
376
377 @v = lookup( $v );
378 @v = lookup( @v );
379
380 =cut
381
382 sub lookup {
383 my $k = shift or return;
384 return unless (defined($lookup->{$k}));
385 if (ref($lookup->{$k}) eq 'ARRAY') {
386 return @{ $lookup->{$k} };
387 } else {
388 return $lookup->{$k};
389 }
390 }
391
392 =head2 join_with
393
394 Joins walues with some delimiter
395
396 $v = join_with(", ", @v);
397
398 =cut
399
400 sub join_with {
401 my $d = shift;
402 return join($d, grep { defined($_) && $_ ne '' } @_);
403 }
404
405 # END
406 1;

  ViewVC Help
Powered by ViewVC 1.1.26