WebPAC/Normalize/Set.pm

package WebPAC::Normalize::Set;
use Exporter 'import';
@EXPORT = qw/
        set_rec set_lookup
        get_ds clean_ds
        tag search display
        rec1 rec2 rec
        regex prefix
        first lookup join_with
/;

use warnings;
use strict;

#use base qw/WebPAC::Common/;
use Data::Dumper;

=head1 NAME

WebPAC::Normalize::Set - describe normalisaton rules using sets

=head1 VERSION

Version 0.03

=cut

our $VERSION = '0.03';

=head1 SYNOPSIS

This module uses C<conf/normalize/*.pl> files to perform normalisation
from input records using perl functions which are specialized for set
processing.

Sets are implemented as arrays, and normalisation file is valid perl, which
means that you check it's validity before running WebPAC using
C<perl -c normalize.pl>.

Normalisation can generate multiple output normalized data. For now, supported output
types (on the left side of definition) are: C<tag>, C<display> and C<search>.

=head1 FUNCTIONS

=head2 set_rec

Set current record hash

  set_rec( $rec );

=cut

my $rec;

sub set_rec {
        $rec = shift or die "no record hash";
}

=head2 tag

Define new tag for I<search> and I<display>.

  tag('Title', rec('200','a') );


=cut

my $out;

sub tag {
        my $name = shift or die "tag needs name as first argument";
        return unless (@_);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@_;
        $out->{$name}->{display} = \@_;
}

=head2 display

Define tag just for I<display>

  @v = display('Title', rec('200','a') );

=cut

sub display {
        my $name = shift or die "display needs name as first argument";
        return unless (@_);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{display} = \@_;
}

=head2 search

Prepare values just for I<search>

  @v = search('Title', rec('200','a') );

=cut

sub search {
        my $name = shift or die "search needs name as first argument";
        return unless (@_);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@_;
}

=head2 get_ds

Return hash formatted as data structure

  my $ds = get_ds();

=cut

sub get_ds {
        return $out;
}

=head2 clean_ds

Clean data structure hash for next record

  clean_ds();

=cut

sub clean_ds {
        $out = undef;
}

=head2 set_lookup

Set current lookup hash

  set_lookup( $lookup );

=cut

my $lookup;

sub set_lookup {
        $lookup = shift or die "no lookup hash";
}

=head2 rec1

Return all values in some field

  @v = rec1('200')

TODO: order of values is probably same as in source data, need to investigate that

=cut

sub rec1 {
        my $f = shift;
        return unless (defined($rec && $rec->{$f}));
        if (ref($rec->{$f}) eq 'ARRAY') {
                if (ref($rec->{$f}->[0]) eq 'HASH') {
                        return map { values %{$_} } @{ $rec->{$f} };
                } else {
                        return @{ $rec->{$f} };
                }
        } elsif( defined($rec->{$f}) ) {
                return $rec->{$f};
        }
}

=head2 rec2

Return all values in specific field and subfield

  @v = rec2('200','a')

=cut

sub rec2 {
        my $f = shift;
        return unless (defined($rec && $rec->{$f}));
        my $sf = shift;
        return map { $_->{$sf} } grep { $_->{$sf} } @{ $rec->{$f} };
}

=head2 rec

syntaxtic sugar for

  @v = rec('200')
  @v = rec('200','a')

=cut

sub rec {
        if ($#_ == 0) {
                return rec1(@_);
        } elsif ($#_ == 1) {
                return rec2(@_);
        }
}

=head2 regex

Apply regex to some or all values

  @v = regex( 's/foo/bar/g', @v );

=cut

sub regex {
        my $r = shift;
        my @out;
        #warn "r: $r\n",Dumper(\@_);
        foreach my $t (@_) {
                eval "\$t =~ $r";
                push @out, $t;
        }
        return @out;
}

=head2 prefix

Prefix all values with a string

  @v = prefix( 'my_', @v );

=cut

sub prefix {
        my $p = shift or die "prefix needs string as first argument";
        return map { $p . $_ } grep { defined($_) } @_;
}

=head2 first

Return first element

  $v = first( @v );

=cut

sub first {
        my $r = shift;
        return $r;
}

=head2 lookup

Consult lookup hashes for some value

  @v = lookup( $v );
  @v = lookup( @v );

=cut

sub lookup {
        my $k = shift or return;
        return unless (defined($lookup->{$k}));
        if (ref($lookup->{$k}) eq 'ARRAY') {
                return @{ $lookup->{$k} };
        } else {
                return $lookup->{$k};
        }
}

=head2 join_with

Joins walues with some delimiter

  $v = join_with(", ", @v);

=cut

sub join_with {
        my $d = shift;
        return join($d, @_);
}

# END
1;
1	dpavlin	460	package WebPAC::Normalize::Set;
2			use Exporter 'import';
3			@EXPORT = qw/
4			set_rec set_lookup
5	dpavlin	461	get_ds clean_ds
6	dpavlin	460	tag search display
7	dpavlin	463	rec1 rec2 rec
8			regex prefix
9			first lookup join_with
10	dpavlin	460	/;
11
12			use warnings;
13			use strict;
14
15			#use base qw/WebPAC::Common/;
16			use Data::Dumper;
17
18			=head1 NAME
19
20			WebPAC::Normalize::Set - describe normalisaton rules using sets
21
22			=head1 VERSION
23
24	dpavlin	463	Version 0.03
25	dpavlin	460
26			=cut
27
28	dpavlin	463	our $VERSION = '0.03';
29	dpavlin	460
30			=head1 SYNOPSIS
31
32			This module uses C<conf/normalize/*.pl> files to perform normalisation
33	dpavlin	467	from input records using perl functions which are specialized for set
34			processing.
35	dpavlin	460
36	dpavlin	467	Sets are implemented as arrays, and normalisation file is valid perl, which
37			means that you check it's validity before running WebPAC using
38			C<perl -c normalize.pl>.
39
40			Normalisation can generate multiple output normalized data. For now, supported output
41			types (on the left side of definition) are: C<tag>, C<display> and C<search>.
42
43	dpavlin	460	=head1 FUNCTIONS
44
45			=head2 set_rec
46
47			Set current record hash
48
49			set_rec( $rec );
50
51			=cut
52
53			my $rec;
54
55			sub set_rec {
56			$rec = shift or die "no record hash";
57			}
58
59			=head2 tag
60
61	dpavlin	467	Define new tag for I<search> and I<display>.
62	dpavlin	460
63			tag('Title', rec('200','a') );
64
65
66			=cut
67
68			my $out;
69
70			sub tag {
71	dpavlin	463	my $name = shift or die "tag needs name as first argument";
72	dpavlin	460	return unless (@_);
73			$out->{$name}->{tag} = $name;
74			$out->{$name}->{search} = \@_;
75			$out->{$name}->{display} = \@_;
76			}
77
78			=head2 display
79
80			Define tag just for I<display>
81
82			@v = display('Title', rec('200','a') );
83
84			=cut
85
86			sub display {
87	dpavlin	463	my $name = shift or die "display needs name as first argument";
88	dpavlin	460	return unless (@_);
89			$out->{$name}->{tag} = $name;
90			$out->{$name}->{display} = \@_;
91			}
92
93			=head2 search
94
95			Prepare values just for I<search>
96
97			@v = search('Title', rec('200','a') );
98
99			=cut
100
101			sub search {
102	dpavlin	463	my $name = shift or die "search needs name as first argument";
103	dpavlin	460	return unless (@_);
104			$out->{$name}->{tag} = $name;
105			$out->{$name}->{search} = \@_;
106			}
107
108			=head2 get_ds
109
110			Return hash formatted as data structure
111
112			my $ds = get_ds();
113
114			=cut
115
116			sub get_ds {
117			return $out;
118			}
119
120	dpavlin	461	=head2 clean_ds
121
122			Clean data structure hash for next record
123
124			clean_ds();
125
126			=cut
127
128			sub clean_ds {
129			$out = undef;
130			}
131
132			=head2 set_lookup
133
134			Set current lookup hash
135
136			set_lookup( $lookup );
137
138			=cut
139
140	dpavlin	460	my $lookup;
141
142			sub set_lookup {
143			$lookup = shift or die "no lookup hash";
144			}
145
146			=head2 rec1
147
148			Return all values in some field
149
150			@v = rec1('200')
151
152			TODO: order of values is probably same as in source data, need to investigate that
153
154			=cut
155
156			sub rec1 {
157			my $f = shift;
158	dpavlin	461	return unless (defined($rec && $rec->{$f}));
159	dpavlin	460	if (ref($rec->{$f}) eq 'ARRAY') {
160	dpavlin	461	if (ref($rec->{$f}->[0]) eq 'HASH') {
161			return map { values %{$_} } @{ $rec->{$f} };
162			} else {
163			return @{ $rec->{$f} };
164			}
165	dpavlin	460	} elsif( defined($rec->{$f}) ) {
166			return $rec->{$f};
167			}
168			}
169
170			=head2 rec2
171
172			Return all values in specific field and subfield
173
174			@v = rec2('200','a')
175
176			=cut
177
178			sub rec2 {
179			my $f = shift;
180	dpavlin	461	return unless (defined($rec && $rec->{$f}));
181	dpavlin	460	my $sf = shift;
182			return map { $_->{$sf} } grep { $_->{$sf} } @{ $rec->{$f} };
183			}
184
185			=head2 rec
186
187			syntaxtic sugar for
188
189			@v = rec('200')
190			@v = rec('200','a')
191
192			=cut
193
194			sub rec {
195			if ($#_ == 0) {
196			return rec1(@_);
197			} elsif ($#_ == 1) {
198			return rec2(@_);
199			}
200			}
201
202			=head2 regex
203
204			Apply regex to some or all values
205
206			@v = regex( 's/foo/bar/g', @v );
207
208			=cut
209
210			sub regex {
211			my $r = shift;
212			my @out;
213	dpavlin	465	#warn "r: $r\n",Dumper(\@_);
214	dpavlin	460	foreach my $t (@_) {
215			eval "\$t =~ $r";
216			push @out, $t;
217			}
218			return @out;
219			}
220
221	dpavlin	463	=head2 prefix
222
223			Prefix all values with a string
224
225			@v = prefix( 'my_', @v );
226
227			=cut
228
229			sub prefix {
230			my $p = shift or die "prefix needs string as first argument";
231	dpavlin	467	return map { $p . $_ } grep { defined($_) } @_;
232	dpavlin	463	}
233
234	dpavlin	460	=head2 first
235
236			Return first element
237
238			$v = first( @v );
239
240			=cut
241
242			sub first {
243			my $r = shift;
244			return $r;
245			}
246
247			=head2 lookup
248
249			Consult lookup hashes for some value
250
251			@v = lookup( $v );
252			@v = lookup( @v );
253
254			=cut
255
256			sub lookup {
257	dpavlin	465	my $k = shift or return;
258			return unless (defined($lookup->{$k}));
259	dpavlin	460	if (ref($lookup->{$k}) eq 'ARRAY') {
260			return @{ $lookup->{$k} };
261			} else {
262			return $lookup->{$k};
263			}
264			}
265
266			=head2 join_with
267
268			Joins walues with some delimiter
269
270			$v = join_with(", ", @v);
271
272			=cut
273
274			sub join_with {
275			my $d = shift;
276			return join($d, @_);
277			}
278
279			# END
280			1;