lib/WebPAC/Normalize.pm

package WebPAC::Normalize;
use Exporter 'import';
@EXPORT = qw/
        set_rec set_lookup
        get_ds clean_ds
        tag search display
        rec1 rec2 rec
        regex prefix suffix surround
        first lookup join_with
/;

use warnings;
use strict;

#use base qw/WebPAC::Common/;
use Data::Dumper;

=head1 NAME

WebPAC::Normalize - describe normalisaton rules using sets

=head1 VERSION

Version 0.04

=cut

our $VERSION = '0.04';

=head1 SYNOPSIS

This module uses C<conf/normalize/*.pl> files to perform normalisation
from input records using perl functions which are specialized for set
processing.

Sets are implemented as arrays, and normalisation file is valid perl, which
means that you check it's validity before running WebPAC using
C<perl -c normalize.pl>.

Normalisation can generate multiple output normalized data. For now, supported output
types (on the left side of definition) are: C<tag>, C<display> and C<search>.

=head1 FUNCTIONS

=head2 data_structure

Return data structure

  my $ds = WebPAC::Normalize(
        lookup => $lookup->lookup_hash,
        row => $row,
        rules => $normalize_pl_config,
  );

This function will B<die> if normalizastion can't be evaled.

=cut

sub data_structure {
        my $arg = {@_};

        die "need row argument" unless ($arg->{row});
        die "need normalisation argument" unless ($arg->{rules});

        no strict 'subs';
        set_lookup( $arg->{lookup} );
        set_rec( $arg->{row} );
        clean_ds();
        eval "$arg->{rules}";
        die "error evaling $arg->{rules}: $@\n" if ($@);
        return get_ds();
}

=head2 set_rec

Set current record hash

  set_rec( $rec );

=cut

my $rec;

sub set_rec {
        $rec = shift or die "no record hash";
}

=head2 tag

Define new tag for I<search> and I<display>.

  tag('Title', rec('200','a') );


=cut

my $out;

sub tag {
        my $name = shift or die "tag needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@o;
        $out->{$name}->{display} = \@o;
}

=head2 display

Define tag just for I<display>

  @v = display('Title', rec('200','a') );

=cut

sub display {
        my $name = shift or die "display needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{display} = \@o;
}

=head2 search

Prepare values just for I<search>

  @v = search('Title', rec('200','a') );

=cut

sub search {
        my $name = shift or die "search needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@o;
}

=head2 get_ds

Return hash formatted as data structure

  my $ds = get_ds();

=cut

sub get_ds {
        return $out;
}

=head2 clean_ds

Clean data structure hash for next record

  clean_ds();

=cut

sub clean_ds {
        $out = undef;
}

=head2 set_lookup

Set current lookup hash

  set_lookup( $lookup );

=cut

my $lookup;

sub set_lookup {
        $lookup = shift;
}

=head2 rec1

Return all values in some field

  @v = rec1('200')

TODO: order of values is probably same as in source data, need to investigate that

=cut

sub rec1 {
        my $f = shift;
        return unless (defined($rec) && defined($rec->{$f}));
        if (ref($rec->{$f}) eq 'ARRAY') {
                return map { 
                        if (ref($_) eq 'HASH') {
                                values %{$_};
                        } else {
                                $_;
                        }
                } @{ $rec->{$f} };
        } elsif( defined($rec->{$f}) ) {
                return $rec->{$f};
        }
}

=head2 rec2

Return all values in specific field and subfield

  @v = rec2('200','a')

=cut

sub rec2 {
        my $f = shift;
        return unless (defined($rec && $rec->{$f}));
        my $sf = shift;
        return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
}

=head2 rec

syntaxtic sugar for

  @v = rec('200')
  @v = rec('200','a')

=cut

sub rec {
        if ($#_ == 0) {
                return rec1(@_);
        } elsif ($#_ == 1) {
                return rec2(@_);
        }
}

=head2 regex

Apply regex to some or all values

  @v = regex( 's/foo/bar/g', @v );

=cut

sub regex {
        my $r = shift;
        my @out;
        #warn "r: $r\n",Dumper(\@_);
        foreach my $t (@_) {
                next unless ($t);
                eval "\$t =~ $r";
                push @out, $t if ($t && $t ne '');
        }
        return @out;
}

=head2 prefix

Prefix all values with a string

  @v = prefix( 'my_', @v );

=cut

sub prefix {
        my $p = shift or die "prefix needs string as first argument";
        return map { $p . $_ } grep { defined($_) } @_;
}

=head2 suffix

suffix all values with a string

  @v = suffix( '_my', @v );

=cut

sub suffix {
        my $s = shift or die "suffix needs string as first argument";
        return map { $_ . $s } grep { defined($_) } @_;
}

=head2 surround

surround all values with a two strings

  @v = surround( 'prefix_', '_suffix', @v );

=cut

sub surround {
        my $p = shift or die "surround need prefix as first argument";
        my $s = shift or die "surround needs suffix as second argument";
        return map { $p . $_ . $s } grep { defined($_) } @_;
}

=head2 first

Return first element

  $v = first( @v );

=cut

sub first {
        my $r = shift;
        return $r;
}

=head2 lookup

Consult lookup hashes for some value

  @v = lookup( $v );
  @v = lookup( @v );

=cut

sub lookup {
        my $k = shift or return;
        return unless (defined($lookup->{$k}));
        if (ref($lookup->{$k}) eq 'ARRAY') {
                return @{ $lookup->{$k} };
        } else {
                return $lookup->{$k};
        }
}

=head2 join_with

Joins walues with some delimiter

  $v = join_with(", ", @v);

=cut

sub join_with {
        my $d = shift;
        return join($d, grep { defined($_) && $_ ne '' } @_);
}

# END
1;
1	package WebPAC::Normalize;
2	use Exporter 'import';
3	@EXPORT = qw/
4	set_rec set_lookup
5	get_ds clean_ds
6	tag search display
7	rec1 rec2 rec
8	regex prefix suffix surround
9	first lookup join_with
10	/;
11
12	use warnings;
13	use strict;
14
15	#use base qw/WebPAC::Common/;
16	use Data::Dumper;
17
18	=head1 NAME
19
20	WebPAC::Normalize - describe normalisaton rules using sets
21
22	=head1 VERSION
23
24	Version 0.04
25
26	=cut
27
28	our $VERSION = '0.04';
29
30	=head1 SYNOPSIS
31
32	This module uses C<conf/normalize/*.pl> files to perform normalisation
33	from input records using perl functions which are specialized for set
34	processing.
35
36	Sets are implemented as arrays, and normalisation file is valid perl, which
37	means that you check it's validity before running WebPAC using
38	C<perl -c normalize.pl>.
39
40	Normalisation can generate multiple output normalized data. For now, supported output
41	types (on the left side of definition) are: C<tag>, C<display> and C<search>.
42
43	=head1 FUNCTIONS
44
45	=head2 data_structure
46
47	Return data structure
48
49	my $ds = WebPAC::Normalize(
50	lookup => $lookup->lookup_hash,
51	row => $row,
52	rules => $normalize_pl_config,
53	);
54
55	This function will B<die> if normalizastion can't be evaled.
56
57	=cut
58
59	sub data_structure {
60	my $arg = {@_};
61
62	die "need row argument" unless ($arg->{row});
63	die "need normalisation argument" unless ($arg->{rules});
64
65	no strict 'subs';
66	set_lookup( $arg->{lookup} );
67	set_rec( $arg->{row} );
68	clean_ds();
69	eval "$arg->{rules}";
70	die "error evaling $arg->{rules}: $@\n" if ($@);
71	return get_ds();
72	}
73
74	=head2 set_rec
75
76	Set current record hash
77
78	set_rec( $rec );
79
80	=cut
81
82	my $rec;
83
84	sub set_rec {
85	$rec = shift or die "no record hash";
86	}
87
88	=head2 tag
89
90	Define new tag for I<search> and I<display>.
91
92	tag('Title', rec('200','a') );
93
94
95	=cut
96
97	my $out;
98
99	sub tag {
100	my $name = shift or die "tag needs name as first argument";
101	my @o = grep { defined($_) && $_ ne '' } @_;
102	return unless (@o);
103	$out->{$name}->{tag} = $name;
104	$out->{$name}->{search} = \@o;
105	$out->{$name}->{display} = \@o;
106	}
107
108	=head2 display
109
110	Define tag just for I<display>
111
112	@v = display('Title', rec('200','a') );
113
114	=cut
115
116	sub display {
117	my $name = shift or die "display needs name as first argument";
118	my @o = grep { defined($_) && $_ ne '' } @_;
119	return unless (@o);
120	$out->{$name}->{tag} = $name;
121	$out->{$name}->{display} = \@o;
122	}
123
124	=head2 search
125
126	Prepare values just for I<search>
127
128	@v = search('Title', rec('200','a') );
129
130	=cut
131
132	sub search {
133	my $name = shift or die "search needs name as first argument";
134	my @o = grep { defined($_) && $_ ne '' } @_;
135	return unless (@o);
136	$out->{$name}->{tag} = $name;
137	$out->{$name}->{search} = \@o;
138	}
139
140	=head2 get_ds
141
142	Return hash formatted as data structure
143
144	my $ds = get_ds();
145
146	=cut
147
148	sub get_ds {
149	return $out;
150	}
151
152	=head2 clean_ds
153
154	Clean data structure hash for next record
155
156	clean_ds();
157
158	=cut
159
160	sub clean_ds {
161	$out = undef;
162	}
163
164	=head2 set_lookup
165
166	Set current lookup hash
167
168	set_lookup( $lookup );
169
170	=cut
171
172	my $lookup;
173
174	sub set_lookup {
175	$lookup = shift;
176	}
177
178	=head2 rec1
179
180	Return all values in some field
181
182	@v = rec1('200')
183
184	TODO: order of values is probably same as in source data, need to investigate that
185
186	=cut
187
188	sub rec1 {
189	my $f = shift;
190	return unless (defined($rec) && defined($rec->{$f}));
191	if (ref($rec->{$f}) eq 'ARRAY') {
192	return map {
193	if (ref($_) eq 'HASH') {
194	values %{$_};
195	} else {
196	$_;
197	}
198	} @{ $rec->{$f} };
199	} elsif( defined($rec->{$f}) ) {
200	return $rec->{$f};
201	}
202	}
203
204	=head2 rec2
205
206	Return all values in specific field and subfield
207
208	@v = rec2('200','a')
209
210	=cut
211
212	sub rec2 {
213	my $f = shift;
214	return unless (defined($rec && $rec->{$f}));
215	my $sf = shift;
216	return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
217	}
218
219	=head2 rec
220
221	syntaxtic sugar for
222
223	@v = rec('200')
224	@v = rec('200','a')
225
226	=cut
227
228	sub rec {
229	if ($#_ == 0) {
230	return rec1(@_);
231	} elsif ($#_ == 1) {
232	return rec2(@_);
233	}
234	}
235
236	=head2 regex
237
238	Apply regex to some or all values
239
240	@v = regex( 's/foo/bar/g', @v );
241
242	=cut
243
244	sub regex {
245	my $r = shift;
246	my @out;
247	#warn "r: $r\n",Dumper(\@_);
248	foreach my $t (@_) {
249	next unless ($t);
250	eval "\$t =~ $r";
251	push @out, $t if ($t && $t ne '');
252	}
253	return @out;
254	}
255
256	=head2 prefix
257
258	Prefix all values with a string
259
260	@v = prefix( 'my_', @v );
261
262	=cut
263
264	sub prefix {
265	my $p = shift or die "prefix needs string as first argument";
266	return map { $p . $_ } grep { defined($_) } @_;
267	}
268
269	=head2 suffix
270
271	suffix all values with a string
272
273	@v = suffix( '_my', @v );
274
275	=cut
276
277	sub suffix {
278	my $s = shift or die "suffix needs string as first argument";
279	return map { $_ . $s } grep { defined($_) } @_;
280	}
281
282	=head2 surround
283
284	surround all values with a two strings
285
286	@v = surround( 'prefix_', '_suffix', @v );
287
288	=cut
289
290	sub surround {
291	my $p = shift or die "surround need prefix as first argument";
292	my $s = shift or die "surround needs suffix as second argument";
293	return map { $p . $_ . $s } grep { defined($_) } @_;
294	}
295
296	=head2 first
297
298	Return first element
299
300	$v = first( @v );
301
302	=cut
303
304	sub first {
305	my $r = shift;
306	return $r;
307	}
308
309	=head2 lookup
310
311	Consult lookup hashes for some value
312
313	@v = lookup( $v );
314	@v = lookup( @v );
315
316	=cut
317
318	sub lookup {
319	my $k = shift or return;
320	return unless (defined($lookup->{$k}));
321	if (ref($lookup->{$k}) eq 'ARRAY') {
322	return @{ $lookup->{$k} };
323	} else {
324	return $lookup->{$k};
325	}
326	}
327
328	=head2 join_with
329
330	Joins walues with some delimiter
331
332	$v = join_with(", ", @v);
333
334	=cut
335
336	sub join_with {
337	my $d = shift;
338	return join($d, grep { defined($_) && $_ ne '' } @_);
339	}
340
341	# END
342	1;