lib/WebPAC/Normalize.pm

package WebPAC::Normalize;
use Exporter 'import';
@EXPORT = qw/
        _set_rec _set_lookup
        _get_ds _clean_ds

        tag search display
        rec1 rec2 rec
        regex prefix suffix surround
        first lookup join_with
/;

use warnings;
use strict;

#use base qw/WebPAC::Common/;
use Data::Dumper;

=head1 NAME

WebPAC::Normalize - describe normalisaton rules using sets

=head1 VERSION

Version 0.05

=cut

our $VERSION = '0.05';

=head1 SYNOPSIS

This module uses C<conf/normalize/*.pl> files to perform normalisation
from input records using perl functions which are specialized for set
processing.

Sets are implemented as arrays, and normalisation file is valid perl, which
means that you check it's validity before running WebPAC using
C<perl -c normalize.pl>.

Normalisation can generate multiple output normalized data. For now, supported output
types (on the left side of definition) are: C<tag>, C<display> and C<search>.

=head1 FUNCTIONS

Functions which start with C<_> are private and used by WebPAC internally.
All other functions are available for use within normalisation rules.

=head2 data_structure

Return data structure

  my $ds = WebPAC::Normalize::data_structure(
        lookup => $lookup->lookup_hash,
        row => $row,
        rules => $normalize_pl_config,
  );

This function will B<die> if normalizastion can't be evaled.

Since this function isn't exported you have to call it with 
C<WebPAC::Normalize::data_structure>.

=cut

sub data_structure {
        my $arg = {@_};

        die "need row argument" unless ($arg->{row});
        die "need normalisation argument" unless ($arg->{rules});

        no strict 'subs';
        _set_lookup( $arg->{lookup} );
        _set_rec( $arg->{row} );
        _clean_ds();
        eval "$arg->{rules}";
        die "error evaling $arg->{rules}: $@\n" if ($@);
        return _get_ds();
}

=head2 _set_rec

Set current record hash

  _set_rec( $rec );

=cut

my $rec;

sub _set_rec {
        $rec = shift or die "no record hash";
}

=head2 _get_ds

Return hash formatted as data structure

  my $ds = _get_ds();

=cut

my $out;

sub _get_ds {
        return $out;
}

=head2 _clean_ds

Clean data structure hash for next record

  _clean_ds();

=cut

sub _clean_ds {
        $out = undef;
}

=head2 _set_lookup

Set current lookup hash

  _set_lookup( $lookup );

=cut

my $lookup;

sub _set_lookup {
        $lookup = shift;
}

=head2 tag

Define new tag for I<search> and I<display>.

  tag('Title', rec('200','a') );


=cut

sub tag {
        my $name = shift or die "tag needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@o;
        $out->{$name}->{display} = \@o;
}

=head2 display

Define tag just for I<display>

  @v = display('Title', rec('200','a') );

=cut

sub display {
        my $name = shift or die "display needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{display} = \@o;
}

=head2 search

Prepare values just for I<search>

  @v = search('Title', rec('200','a') );

=cut

sub search {
        my $name = shift or die "search needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@o;
}

=head2 rec1

Return all values in some field

  @v = rec1('200')

TODO: order of values is probably same as in source data, need to investigate that

=cut

sub rec1 {
        my $f = shift;
        return unless (defined($rec) && defined($rec->{$f}));
        if (ref($rec->{$f}) eq 'ARRAY') {
                return map { 
                        if (ref($_) eq 'HASH') {
                                values %{$_};
                        } else {
                                $_;
                        }
                } @{ $rec->{$f} };
        } elsif( defined($rec->{$f}) ) {
                return $rec->{$f};
        }
}

=head2 rec2

Return all values in specific field and subfield

  @v = rec2('200','a')

=cut

sub rec2 {
        my $f = shift;
        return unless (defined($rec && $rec->{$f}));
        my $sf = shift;
        return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
}

=head2 rec

syntaxtic sugar for

  @v = rec('200')
  @v = rec('200','a')

=cut

sub rec {
        if ($#_ == 0) {
                return rec1(@_);
        } elsif ($#_ == 1) {
                return rec2(@_);
        }
}

=head2 regex

Apply regex to some or all values

  @v = regex( 's/foo/bar/g', @v );

=cut

sub regex {
        my $r = shift;
        my @out;
        #warn "r: $r\n",Dumper(\@_);
        foreach my $t (@_) {
                next unless ($t);
                eval "\$t =~ $r";
                push @out, $t if ($t && $t ne '');
        }
        return @out;
}

=head2 prefix

Prefix all values with a string

  @v = prefix( 'my_', @v );

=cut

sub prefix {
        my $p = shift or die "prefix needs string as first argument";
        return map { $p . $_ } grep { defined($_) } @_;
}

=head2 suffix

suffix all values with a string

  @v = suffix( '_my', @v );

=cut

sub suffix {
        my $s = shift or die "suffix needs string as first argument";
        return map { $_ . $s } grep { defined($_) } @_;
}

=head2 surround

surround all values with a two strings

  @v = surround( 'prefix_', '_suffix', @v );

=cut

sub surround {
        my $p = shift or die "surround need prefix as first argument";
        my $s = shift or die "surround needs suffix as second argument";
        return map { $p . $_ . $s } grep { defined($_) } @_;
}

=head2 first

Return first element

  $v = first( @v );

=cut

sub first {
        my $r = shift;
        return $r;
}

=head2 lookup

Consult lookup hashes for some value

  @v = lookup( $v );
  @v = lookup( @v );

=cut

sub lookup {
        my $k = shift or return;
        return unless (defined($lookup->{$k}));
        if (ref($lookup->{$k}) eq 'ARRAY') {
                return @{ $lookup->{$k} };
        } else {
                return $lookup->{$k};
        }
}

=head2 join_with

Joins walues with some delimiter

  $v = join_with(", ", @v);

=cut

sub join_with {
        my $d = shift;
        return join($d, grep { defined($_) && $_ ne '' } @_);
}

# END
1;
1	package WebPAC::Normalize;
2	use Exporter 'import';
3	@EXPORT = qw/
4	_set_rec _set_lookup
5	_get_ds _clean_ds
6
7	tag search display
8	rec1 rec2 rec
9	regex prefix suffix surround
10	first lookup join_with
11	/;
12
13	use warnings;
14	use strict;
15
16	#use base qw/WebPAC::Common/;
17	use Data::Dumper;
18
19	=head1 NAME
20
21	WebPAC::Normalize - describe normalisaton rules using sets
22
23	=head1 VERSION
24
25	Version 0.05
26
27	=cut
28
29	our $VERSION = '0.05';
30
31	=head1 SYNOPSIS
32
33	This module uses C<conf/normalize/*.pl> files to perform normalisation
34	from input records using perl functions which are specialized for set
35	processing.
36
37	Sets are implemented as arrays, and normalisation file is valid perl, which
38	means that you check it's validity before running WebPAC using
39	C<perl -c normalize.pl>.
40
41	Normalisation can generate multiple output normalized data. For now, supported output
42	types (on the left side of definition) are: C<tag>, C<display> and C<search>.
43
44	=head1 FUNCTIONS
45
46	Functions which start with C<_> are private and used by WebPAC internally.
47	All other functions are available for use within normalisation rules.
48
49	=head2 data_structure
50
51	Return data structure
52
53	my $ds = WebPAC::Normalize::data_structure(
54	lookup => $lookup->lookup_hash,
55	row => $row,
56	rules => $normalize_pl_config,
57	);
58
59	This function will B<die> if normalizastion can't be evaled.
60
61	Since this function isn't exported you have to call it with
62	C<WebPAC::Normalize::data_structure>.
63
64	=cut
65
66	sub data_structure {
67	my $arg = {@_};
68
69	die "need row argument" unless ($arg->{row});
70	die "need normalisation argument" unless ($arg->{rules});
71
72	no strict 'subs';
73	_set_lookup( $arg->{lookup} );
74	_set_rec( $arg->{row} );
75	_clean_ds();
76	eval "$arg->{rules}";
77	die "error evaling $arg->{rules}: $@\n" if ($@);
78	return _get_ds();
79	}
80
81	=head2 _set_rec
82
83	Set current record hash
84
85	_set_rec( $rec );
86
87	=cut
88
89	my $rec;
90
91	sub _set_rec {
92	$rec = shift or die "no record hash";
93	}
94
95	=head2 _get_ds
96
97	Return hash formatted as data structure
98
99	my $ds = _get_ds();
100
101	=cut
102
103	my $out;
104
105	sub _get_ds {
106	return $out;
107	}
108
109	=head2 _clean_ds
110
111	Clean data structure hash for next record
112
113	_clean_ds();
114
115	=cut
116
117	sub _clean_ds {
118	$out = undef;
119	}
120
121	=head2 _set_lookup
122
123	Set current lookup hash
124
125	_set_lookup( $lookup );
126
127	=cut
128
129	my $lookup;
130
131	sub _set_lookup {
132	$lookup = shift;
133	}
134
135	=head2 tag
136
137	Define new tag for I<search> and I<display>.
138
139	tag('Title', rec('200','a') );
140
141
142	=cut
143
144	sub tag {
145	my $name = shift or die "tag needs name as first argument";
146	my @o = grep { defined($_) && $_ ne '' } @_;
147	return unless (@o);
148	$out->{$name}->{tag} = $name;
149	$out->{$name}->{search} = \@o;
150	$out->{$name}->{display} = \@o;
151	}
152
153	=head2 display
154
155	Define tag just for I<display>
156
157	@v = display('Title', rec('200','a') );
158
159	=cut
160
161	sub display {
162	my $name = shift or die "display needs name as first argument";
163	my @o = grep { defined($_) && $_ ne '' } @_;
164	return unless (@o);
165	$out->{$name}->{tag} = $name;
166	$out->{$name}->{display} = \@o;
167	}
168
169	=head2 search
170
171	Prepare values just for I<search>
172
173	@v = search('Title', rec('200','a') );
174
175	=cut
176
177	sub search {
178	my $name = shift or die "search needs name as first argument";
179	my @o = grep { defined($_) && $_ ne '' } @_;
180	return unless (@o);
181	$out->{$name}->{tag} = $name;
182	$out->{$name}->{search} = \@o;
183	}
184
185	=head2 rec1
186
187	Return all values in some field
188
189	@v = rec1('200')
190
191	TODO: order of values is probably same as in source data, need to investigate that
192
193	=cut
194
195	sub rec1 {
196	my $f = shift;
197	return unless (defined($rec) && defined($rec->{$f}));
198	if (ref($rec->{$f}) eq 'ARRAY') {
199	return map {
200	if (ref($_) eq 'HASH') {
201	values %{$_};
202	} else {
203	$_;
204	}
205	} @{ $rec->{$f} };
206	} elsif( defined($rec->{$f}) ) {
207	return $rec->{$f};
208	}
209	}
210
211	=head2 rec2
212
213	Return all values in specific field and subfield
214
215	@v = rec2('200','a')
216
217	=cut
218
219	sub rec2 {
220	my $f = shift;
221	return unless (defined($rec && $rec->{$f}));
222	my $sf = shift;
223	return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
224	}
225
226	=head2 rec
227
228	syntaxtic sugar for
229
230	@v = rec('200')
231	@v = rec('200','a')
232
233	=cut
234
235	sub rec {
236	if ($#_ == 0) {
237	return rec1(@_);
238	} elsif ($#_ == 1) {
239	return rec2(@_);
240	}
241	}
242
243	=head2 regex
244
245	Apply regex to some or all values
246
247	@v = regex( 's/foo/bar/g', @v );
248
249	=cut
250
251	sub regex {
252	my $r = shift;
253	my @out;
254	#warn "r: $r\n",Dumper(\@_);
255	foreach my $t (@_) {
256	next unless ($t);
257	eval "\$t =~ $r";
258	push @out, $t if ($t && $t ne '');
259	}
260	return @out;
261	}
262
263	=head2 prefix
264
265	Prefix all values with a string
266
267	@v = prefix( 'my_', @v );
268
269	=cut
270
271	sub prefix {
272	my $p = shift or die "prefix needs string as first argument";
273	return map { $p . $_ } grep { defined($_) } @_;
274	}
275
276	=head2 suffix
277
278	suffix all values with a string
279
280	@v = suffix( '_my', @v );
281
282	=cut
283
284	sub suffix {
285	my $s = shift or die "suffix needs string as first argument";
286	return map { $_ . $s } grep { defined($_) } @_;
287	}
288
289	=head2 surround
290
291	surround all values with a two strings
292
293	@v = surround( 'prefix_', '_suffix', @v );
294
295	=cut
296
297	sub surround {
298	my $p = shift or die "surround need prefix as first argument";
299	my $s = shift or die "surround needs suffix as second argument";
300	return map { $p . $_ . $s } grep { defined($_) } @_;
301	}
302
303	=head2 first
304
305	Return first element
306
307	$v = first( @v );
308
309	=cut
310
311	sub first {
312	my $r = shift;
313	return $r;
314	}
315
316	=head2 lookup
317
318	Consult lookup hashes for some value
319
320	@v = lookup( $v );
321	@v = lookup( @v );
322
323	=cut
324
325	sub lookup {
326	my $k = shift or return;
327	return unless (defined($lookup->{$k}));
328	if (ref($lookup->{$k}) eq 'ARRAY') {
329	return @{ $lookup->{$k} };
330	} else {
331	return $lookup->{$k};
332	}
333	}
334
335	=head2 join_with
336
337	Joins walues with some delimiter
338
339	$v = join_with(", ", @v);
340
341	=cut
342
343	sub join_with {
344	my $d = shift;
345	return join($d, grep { defined($_) && $_ ne '' } @_);
346	}
347
348	# END
349	1;