WebPAC/Normalize/Set.pm

package WebPAC::Normalize::Set;
use Exporter 'import';
@EXPORT = qw/
        set_rec set_lookup
        get_ds clean_ds
        tag search display
        rec1 rec2 rec
        regex prefix
        first lookup join_with
/;

use warnings;
use strict;

#use base qw/WebPAC::Common/;
use Data::Dumper;

=head1 NAME

WebPAC::Normalize::Set - describe normalisaton rules using sets

=head1 VERSION

Version 0.03

=cut

our $VERSION = '0.03';

=head1 SYNOPSIS

This module uses C<conf/normalize/*.pl> files to perform normalisation
from input records using perl functions which are specialized for set
processing.

Sets are implemented as arrays, and normalisation file is valid perl, which
means that you check it's validity before running WebPAC using
C<perl -c normalize.pl>.

Normalisation can generate multiple output normalized data. For now, supported output
types (on the left side of definition) are: C<tag>, C<display> and C<search>.

=head1 FUNCTIONS

=head2 set_rec

Set current record hash

  set_rec( $rec );

=cut

my $rec;

sub set_rec {
        $rec = shift or die "no record hash";
}

=head2 tag

Define new tag for I<search> and I<display>.

  tag('Title', rec('200','a') );


=cut

my $out;

sub tag {
        my $name = shift or die "tag needs name as first argument";
        return unless (@_);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@_;
        $out->{$name}->{display} = \@_;
}

=head2 display

Define tag just for I<display>

  @v = display('Title', rec('200','a') );

=cut

sub display {
        my $name = shift or die "display needs name as first argument";
        return unless (@_);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{display} = \@_;
}

=head2 search

Prepare values just for I<search>

  @v = search('Title', rec('200','a') );

=cut

sub search {
        my $name = shift or die "search needs name as first argument";
        return unless (@_);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@_;
}

=head2 get_ds

Return hash formatted as data structure

  my $ds = get_ds();

=cut

sub get_ds {
        return $out;
}

=head2 clean_ds

Clean data structure hash for next record

  clean_ds();

=cut

sub clean_ds {
        $out = undef;
}

=head2 set_lookup

Set current lookup hash

  set_lookup( $lookup );

=cut

my $lookup;

sub set_lookup {
        $lookup = shift or die "no lookup hash";
}

=head2 rec1

Return all values in some field

  @v = rec1('200')

TODO: order of values is probably same as in source data, need to investigate that

=cut

sub rec1 {
        my $f = shift;
        return unless (defined($rec && $rec->{$f}));
        if (ref($rec->{$f}) eq 'ARRAY') {
                if (ref($rec->{$f}->[0]) eq 'HASH') {
                        return map { values %{$_} } @{ $rec->{$f} };
                } else {
                        return @{ $rec->{$f} };
                }
        } elsif( defined($rec->{$f}) ) {
                return $rec->{$f};
        }
}

=head2 rec2

Return all values in specific field and subfield

  @v = rec2('200','a')

=cut

sub rec2 {
        my $f = shift;
        return unless (defined($rec && $rec->{$f}));
        my $sf = shift;
        return map { $_->{$sf} } grep { $_->{$sf} } @{ $rec->{$f} };
}

=head2 rec

syntaxtic sugar for

  @v = rec('200')
  @v = rec('200','a')

=cut

sub rec {
        if ($#_ == 0) {
                return rec1(@_);
        } elsif ($#_ == 1) {
                return rec2(@_);
        }
}

=head2 regex

Apply regex to some or all values

  @v = regex( 's/foo/bar/g', @v );

=cut

sub regex {
        my $r = shift;
        my @out;
        #warn "r: $r\n",Dumper(\@_);
        foreach my $t (@_) {
                eval "\$t =~ $r";
                push @out, $t;
        }
        return @out;
}

=head2 prefix

Prefix all values with a string

  @v = prefix( 'my_', @v );

=cut

sub prefix {
        my $p = shift or die "prefix needs string as first argument";
        return map { $p . $_ } grep { defined($_) } @_;
}

=head2 first

Return first element

  $v = first( @v );

=cut

sub first {
        my $r = shift;
        return $r;
}

=head2 lookup

Consult lookup hashes for some value

  @v = lookup( $v );
  @v = lookup( @v );

=cut

sub lookup {
        my $k = shift or return;
        return unless (defined($lookup->{$k}));
        if (ref($lookup->{$k}) eq 'ARRAY') {
                return @{ $lookup->{$k} };
        } else {
                return $lookup->{$k};
        }
}

=head2 join_with

Joins walues with some delimiter

  $v = join_with(", ", @v);

=cut

sub join_with {
        my $d = shift;
        return join($d, @_);
}

# END
1;
1	package WebPAC::Normalize::Set;
2	use Exporter 'import';
3	@EXPORT = qw/
4	set_rec set_lookup
5	get_ds clean_ds
6	tag search display
7	rec1 rec2 rec
8	regex prefix
9	first lookup join_with
10	/;
11
12	use warnings;
13	use strict;
14
15	#use base qw/WebPAC::Common/;
16	use Data::Dumper;
17
18	=head1 NAME
19
20	WebPAC::Normalize::Set - describe normalisaton rules using sets
21
22	=head1 VERSION
23
24	Version 0.03
25
26	=cut
27
28	our $VERSION = '0.03';
29
30	=head1 SYNOPSIS
31
32	This module uses C<conf/normalize/*.pl> files to perform normalisation
33	from input records using perl functions which are specialized for set
34	processing.
35
36	Sets are implemented as arrays, and normalisation file is valid perl, which
37	means that you check it's validity before running WebPAC using
38	C<perl -c normalize.pl>.
39
40	Normalisation can generate multiple output normalized data. For now, supported output
41	types (on the left side of definition) are: C<tag>, C<display> and C<search>.
42
43	=head1 FUNCTIONS
44
45	=head2 set_rec
46
47	Set current record hash
48
49	set_rec( $rec );
50
51	=cut
52
53	my $rec;
54
55	sub set_rec {
56	$rec = shift or die "no record hash";
57	}
58
59	=head2 tag
60
61	Define new tag for I<search> and I<display>.
62
63	tag('Title', rec('200','a') );
64
65
66	=cut
67
68	my $out;
69
70	sub tag {
71	my $name = shift or die "tag needs name as first argument";
72	return unless (@_);
73	$out->{$name}->{tag} = $name;
74	$out->{$name}->{search} = \@_;
75	$out->{$name}->{display} = \@_;
76	}
77
78	=head2 display
79
80	Define tag just for I<display>
81
82	@v = display('Title', rec('200','a') );
83
84	=cut
85
86	sub display {
87	my $name = shift or die "display needs name as first argument";
88	return unless (@_);
89	$out->{$name}->{tag} = $name;
90	$out->{$name}->{display} = \@_;
91	}
92
93	=head2 search
94
95	Prepare values just for I<search>
96
97	@v = search('Title', rec('200','a') );
98
99	=cut
100
101	sub search {
102	my $name = shift or die "search needs name as first argument";
103	return unless (@_);
104	$out->{$name}->{tag} = $name;
105	$out->{$name}->{search} = \@_;
106	}
107
108	=head2 get_ds
109
110	Return hash formatted as data structure
111
112	my $ds = get_ds();
113
114	=cut
115
116	sub get_ds {
117	return $out;
118	}
119
120	=head2 clean_ds
121
122	Clean data structure hash for next record
123
124	clean_ds();
125
126	=cut
127
128	sub clean_ds {
129	$out = undef;
130	}
131
132	=head2 set_lookup
133
134	Set current lookup hash
135
136	set_lookup( $lookup );
137
138	=cut
139
140	my $lookup;
141
142	sub set_lookup {
143	$lookup = shift or die "no lookup hash";
144	}
145
146	=head2 rec1
147
148	Return all values in some field
149
150	@v = rec1('200')
151
152	TODO: order of values is probably same as in source data, need to investigate that
153
154	=cut
155
156	sub rec1 {
157	my $f = shift;
158	return unless (defined($rec && $rec->{$f}));
159	if (ref($rec->{$f}) eq 'ARRAY') {
160	if (ref($rec->{$f}->[0]) eq 'HASH') {
161	return map { values %{$_} } @{ $rec->{$f} };
162	} else {
163	return @{ $rec->{$f} };
164	}
165	} elsif( defined($rec->{$f}) ) {
166	return $rec->{$f};
167	}
168	}
169
170	=head2 rec2
171
172	Return all values in specific field and subfield
173
174	@v = rec2('200','a')
175
176	=cut
177
178	sub rec2 {
179	my $f = shift;
180	return unless (defined($rec && $rec->{$f}));
181	my $sf = shift;
182	return map { $_->{$sf} } grep { $_->{$sf} } @{ $rec->{$f} };
183	}
184
185	=head2 rec
186
187	syntaxtic sugar for
188
189	@v = rec('200')
190	@v = rec('200','a')
191
192	=cut
193
194	sub rec {
195	if ($#_ == 0) {
196	return rec1(@_);
197	} elsif ($#_ == 1) {
198	return rec2(@_);
199	}
200	}
201
202	=head2 regex
203
204	Apply regex to some or all values
205
206	@v = regex( 's/foo/bar/g', @v );
207
208	=cut
209
210	sub regex {
211	my $r = shift;
212	my @out;
213	#warn "r: $r\n",Dumper(\@_);
214	foreach my $t (@_) {
215	eval "\$t =~ $r";
216	push @out, $t;
217	}
218	return @out;
219	}
220
221	=head2 prefix
222
223	Prefix all values with a string
224
225	@v = prefix( 'my_', @v );
226
227	=cut
228
229	sub prefix {
230	my $p = shift or die "prefix needs string as first argument";
231	return map { $p . $_ } grep { defined($_) } @_;
232	}
233
234	=head2 first
235
236	Return first element
237
238	$v = first( @v );
239
240	=cut
241
242	sub first {
243	my $r = shift;
244	return $r;
245	}
246
247	=head2 lookup
248
249	Consult lookup hashes for some value
250
251	@v = lookup( $v );
252	@v = lookup( @v );
253
254	=cut
255
256	sub lookup {
257	my $k = shift or return;
258	return unless (defined($lookup->{$k}));
259	if (ref($lookup->{$k}) eq 'ARRAY') {
260	return @{ $lookup->{$k} };
261	} else {
262	return $lookup->{$k};
263	}
264	}
265
266	=head2 join_with
267
268	Joins walues with some delimiter
269
270	$v = join_with(", ", @v);
271
272	=cut
273
274	sub join_with {
275	my $d = shift;
276	return join($d, @_);
277	}
278
279	# END
280	1;