lib/WebPAC/Normalize.pm

package WebPAC::Normalize;
use Exporter 'import';
@EXPORT = qw/
        _set_rec _set_lookup
        _get_ds _clean_ds

        tag search display
        marc marc_indicators marc_repeatable_subfield

        rec1 rec2 rec
        regex prefix suffix surround
        first lookup join_with
/;

use warnings;
use strict;

#use base qw/WebPAC::Common/;
use Data::Dump qw/dump/;
use Encode qw/from_to/;

# debugging warn(s)
my $debug = 0;


=head1 NAME

WebPAC::Normalize - describe normalisaton rules using sets

=head1 VERSION

Version 0.06

=cut

our $VERSION = '0.06';

=head1 SYNOPSIS

This module uses C<conf/normalize/*.pl> files to perform normalisation
from input records using perl functions which are specialized for set
processing.

Sets are implemented as arrays, and normalisation file is valid perl, which
means that you check it's validity before running WebPAC using
C<perl -c normalize.pl>.

Normalisation can generate multiple output normalized data. For now, supported output
types (on the left side of definition) are: C<tag>, C<display>, C<search> and
C<marc>.

=head1 FUNCTIONS

Functions which start with C<_> are private and used by WebPAC internally.
All other functions are available for use within normalisation rules.

=head2 data_structure

Return data structure

  my $ds = WebPAC::Normalize::data_structure(
        lookup => $lookup->lookup_hash,
        row => $row,
        rules => $normalize_pl_config,
        marc_encoding => 'utf-8',
  );

Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
other are optional.

This function will B<die> if normalizastion can't be evaled.

Since this function isn't exported you have to call it with 
C<WebPAC::Normalize::data_structure>.

=cut

sub data_structure {
        my $arg = {@_};

        die "need row argument" unless ($arg->{row});
        die "need normalisation argument" unless ($arg->{rules});

        no strict 'subs';
        _set_lookup( $arg->{lookup} );
        _set_rec( $arg->{row} );
        _clean_ds( %{ $arg } );
        eval "$arg->{rules}";
        die "error evaling $arg->{rules}: $@\n" if ($@);

        return _get_ds();
}

=head2 _set_rec

Set current record hash

  _set_rec( $rec );

=cut

my $rec;

sub _set_rec {
        $rec = shift or die "no record hash";
}

=head2 _get_ds

Return hash formatted as data structure

  my $ds = _get_ds();

=cut

my ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);

sub _get_ds {
        return $out;
}

=head2 _clean_ds

Clean data structure hash for next record

  _clean_ds();

=cut

sub _clean_ds {
        my $a = {@_};
        ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();
        $marc_encoding = $a->{marc_encoding};
}

=head2 _set_lookup

Set current lookup hash

  _set_lookup( $lookup );

=cut

my $lookup;

sub _set_lookup {
        $lookup = shift;
}

=head2 _get_marc_fields

Get all fields defined by calls to C<marc>

        $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );


We are using I<magic> which detect repeatable fields only from
sequence of field/subfield data generated by normalization.

Repeatable field is created if there is second occurence of same subfield or
if any of indicators are different. This is sane for most cases except for
non-repeatable fields with repeatable subfields.

You can change behaviour of that using C<marc_repeatable_subfield>.

=cut

sub _get_marc_fields {

        return if (! $marc_record || ref($marc_record) ne 'ARRAY' || $#{ $marc_record } < 0);

        # first, sort all existing fields 
        # XXX might not be needed, but modern perl might randomize elements in hash
        my @sorted_marc_record = sort {
                $a->[0] . $a->[3] cmp $b->[0] . $b->[3] 
        } @{ $marc_record };

        # output marc fields
        my @m;

        # count unique field-subfields (used for offset when walking to next subfield)
        my $u;
        map { $u->{ $_->[0] . $_->[3]  }++ } @sorted_marc_record;

        if ($debug) {
                warn "## marc_repeatable_subfield ", dump( $marc_repeatable_subfield ), $/;
                warn "## marc_record ", dump( $marc_record ), $/;
                warn "## sorted_marc_record ", dump( \@sorted_marc_record ), $/;
                warn "## subfield count ", dump( $u ), $/;
        }

        my $len = $#sorted_marc_record;
        my $visited;
        my $i = 0;
        my $field;

        foreach ( 0 .. $len ) {

                # find next element which isn't visited
                while ($visited->{$i}) {
                        $i = ($i + 1) % ($len + 1);
                }

                # mark it visited
                $visited->{$i}++;

                my $row = $sorted_marc_record[$i];

                # field and subfield which is key for
                # marc_repeatable_subfield and u
                my $fsf = $row->[0] . $row->[3];

                if ($debug > 1) {

                        print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
                        print "### this [$i]: ", dump( $row ),$/;
                        print "### sf: ", $row->[3], " vs ", $field->[3],
                                $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
                                if ($#$field >= 0);

                }

                # if field exists
                if ( $#$field >= 0 ) {
                        if (
                                $row->[0] ne $field->[0] ||             # field
                                $row->[1] ne $field->[1] ||             # i1
                                $row->[2] ne $field->[2]                # i2
                        ) {
                                push @m, $field;
                                warn "## saved/1 ", dump( $field ),$/ if ($debug);
                                $field = $row;

                        } elsif (
                                ( $row->[3] lt $field->[-2] )           # subfield which is not next (e.g. a after c)
                                ||
                                ( $row->[3] eq $field->[-2] &&          # same subfield, but not repeatable
                                        ! $marc_repeatable_subfield->{ $fsf }
                                )
                        ) {
                                push @m, $field;
                                warn "## saved/2 ", dump( $field ),$/ if ($debug);
                                $field = $row;

                        } else {
                                # append new subfields to existing field
                                push @$field, ( $row->[3], $row->[4] );
                        }
                } else {
                        # insert first field
                        $field = $row;
                }

                if (! $marc_repeatable_subfield->{ $fsf }) {
                        # make step to next subfield
                        $i = ($i + $u->{ $fsf } ) % ($len + 1);
                }
        }

        if ($#$field >= 0) {
                push @m, $field;
                warn "## saved/3 ", dump( $field ),$/ if ($debug);
        }

        return @m;
}

=head1 Functions to create C<data_structure>

Those functions generally have to first in your normalization file.

=head2 tag

Define new tag for I<search> and I<display>.

  tag('Title', rec('200','a') );


=cut

sub tag {
        my $name = shift or die "tag needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@o;
        $out->{$name}->{display} = \@o;
}

=head2 display

Define tag just for I<display>

  @v = display('Title', rec('200','a') );

=cut

sub display {
        my $name = shift or die "display needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{display} = \@o;
}

=head2 search

Prepare values just for I<search>

  @v = search('Title', rec('200','a') );

=cut

sub search {
        my $name = shift or die "search needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@o;
}

=head2 marc

Save value for MARC field

  marc('900','a', rec('200','a') );

=cut

sub marc {
        my $f = shift or die "marc needs field";
        die "marc field must be numer" unless ($f =~ /^\d+$/);

        my $sf = shift or die "marc needs subfield";

        foreach (@_) {
                my $v = $_;             # make var read-write for Encode
                next unless (defined($v) && $v !~ /^\s*$/);
                from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
                my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
                push @{ $marc_record }, [ $f, $i1, $i2, $sf => $v ];
        }
}

=head2 marc_repeatable_subfield

Save values for MARC repetable subfield

  marc_repeatable_subfield('910', 'z', rec('909') );

=cut

sub marc_repeatable_subfield {
        my ($f,$sf) = @_;
        die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
        $marc_repeatable_subfield->{ $f . $sf }++;
        marc(@_);
}

=head2 marc_indicators

Set both indicators for MARC field

  marc_indicators('900', ' ', 1);

Any indicator value other than C<0-9> will be treated as undefined.

=cut

sub marc_indicators {
        my $f = shift || die "marc_indicators need field!\n";
        my ($i1,$i2) = @_;
        die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
        die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));

        $i1 = ' ' if ($i1 !~ /^\d$/);
        $i2 = ' ' if ($i2 !~ /^\d$/);
        @{ $marc_indicators->{$f} } = ($i1,$i2);
}


=head1 Functions to extract data from input

This function should be used inside functions to create C<data_structure> described
above.

=head2 rec1

Return all values in some field

  @v = rec1('200')

TODO: order of values is probably same as in source data, need to investigate that

=cut

sub rec1 {
        my $f = shift;
        return unless (defined($rec) && defined($rec->{$f}));
        if (ref($rec->{$f}) eq 'ARRAY') {
                return map { 
                        if (ref($_) eq 'HASH') {
                                values %{$_};
                        } else {
                                $_;
                        }
                } @{ $rec->{$f} };
        } elsif( defined($rec->{$f}) ) {
                return $rec->{$f};
        }
}

=head2 rec2

Return all values in specific field and subfield

  @v = rec2('200','a')

=cut

sub rec2 {
        my $f = shift;
        return unless (defined($rec && $rec->{$f}));
        my $sf = shift;
        return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
}

=head2 rec

syntaxtic sugar for

  @v = rec('200')
  @v = rec('200','a')

=cut

sub rec {
        if ($#_ == 0) {
                return rec1(@_);
        } elsif ($#_ == 1) {
                return rec2(@_);
        }
}

=head2 regex

Apply regex to some or all values

  @v = regex( 's/foo/bar/g', @v );

=cut

sub regex {
        my $r = shift;
        my @out;
        #warn "r: $r\n", dump(\@_);
        foreach my $t (@_) {
                next unless ($t);
                eval "\$t =~ $r";
                push @out, $t if ($t && $t ne '');
        }
        return @out;
}

=head2 prefix

Prefix all values with a string

  @v = prefix( 'my_', @v );

=cut

sub prefix {
        my $p = shift or die "prefix needs string as first argument";
        return map { $p . $_ } grep { defined($_) } @_;
}

=head2 suffix

suffix all values with a string

  @v = suffix( '_my', @v );

=cut

sub suffix {
        my $s = shift or die "suffix needs string as first argument";
        return map { $_ . $s } grep { defined($_) } @_;
}

=head2 surround

surround all values with a two strings

  @v = surround( 'prefix_', '_suffix', @v );

=cut

sub surround {
        my $p = shift or die "surround need prefix as first argument";
        my $s = shift or die "surround needs suffix as second argument";
        return map { $p . $_ . $s } grep { defined($_) } @_;
}

=head2 first

Return first element

  $v = first( @v );

=cut

sub first {
        my $r = shift;
        return $r;
}

=head2 lookup

Consult lookup hashes for some value

  @v = lookup( $v );
  @v = lookup( @v );

=cut

sub lookup {
        my $k = shift or return;
        return unless (defined($lookup->{$k}));
        if (ref($lookup->{$k}) eq 'ARRAY') {
                return @{ $lookup->{$k} };
        } else {
                return $lookup->{$k};
        }
}

=head2 join_with

Joins walues with some delimiter

  $v = join_with(", ", @v);

=cut

sub join_with {
        my $d = shift;
        return join($d, grep { defined($_) && $_ ne '' } @_);
}

# END
1;
1	package WebPAC::Normalize;
2	use Exporter 'import';
3	@EXPORT = qw/
4	_set_rec _set_lookup
5	_get_ds _clean_ds
6
7	tag search display
8	marc marc_indicators marc_repeatable_subfield
9
10	rec1 rec2 rec
11	regex prefix suffix surround
12	first lookup join_with
13	/;
14
15	use warnings;
16	use strict;
17
18	#use base qw/WebPAC::Common/;
19	use Data::Dump qw/dump/;
20	use Encode qw/from_to/;
21
22	# debugging warn(s)
23	my $debug = 0;
24
25
26	=head1 NAME
27
28	WebPAC::Normalize - describe normalisaton rules using sets
29
30	=head1 VERSION
31
32	Version 0.06
33
34	=cut
35
36	our $VERSION = '0.06';
37
38	=head1 SYNOPSIS
39
40	This module uses C<conf/normalize/*.pl> files to perform normalisation
41	from input records using perl functions which are specialized for set
42	processing.
43
44	Sets are implemented as arrays, and normalisation file is valid perl, which
45	means that you check it's validity before running WebPAC using
46	C<perl -c normalize.pl>.
47
48	Normalisation can generate multiple output normalized data. For now, supported output
49	types (on the left side of definition) are: C<tag>, C<display>, C<search> and
50	C<marc>.
51
52	=head1 FUNCTIONS
53
54	Functions which start with C<_> are private and used by WebPAC internally.
55	All other functions are available for use within normalisation rules.
56
57	=head2 data_structure
58
59	Return data structure
60
61	my $ds = WebPAC::Normalize::data_structure(
62	lookup => $lookup->lookup_hash,
63	row => $row,
64	rules => $normalize_pl_config,
65	marc_encoding => 'utf-8',
66	);
67
68	Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
69	other are optional.
70
71	This function will B<die> if normalizastion can't be evaled.
72
73	Since this function isn't exported you have to call it with
74	C<WebPAC::Normalize::data_structure>.
75
76	=cut
77
78	sub data_structure {
79	my $arg = {@_};
80
81	die "need row argument" unless ($arg->{row});
82	die "need normalisation argument" unless ($arg->{rules});
83
84	no strict 'subs';
85	_set_lookup( $arg->{lookup} );
86	_set_rec( $arg->{row} );
87	_clean_ds( %{ $arg } );
88	eval "$arg->{rules}";
89	die "error evaling $arg->{rules}: $@\n" if ($@);
90
91	return _get_ds();
92	}
93
94	=head2 _set_rec
95
96	Set current record hash
97
98	_set_rec( $rec );
99
100	=cut
101
102	my $rec;
103
104	sub _set_rec {
105	$rec = shift or die "no record hash";
106	}
107
108	=head2 _get_ds
109
110	Return hash formatted as data structure
111
112	my $ds = _get_ds();
113
114	=cut
115
116	my ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);
117
118	sub _get_ds {
119	return $out;
120	}
121
122	=head2 _clean_ds
123
124	Clean data structure hash for next record
125
126	_clean_ds();
127
128	=cut
129
130	sub _clean_ds {
131	my $a = {@_};
132	($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();
133	$marc_encoding = $a->{marc_encoding};
134	}
135
136	=head2 _set_lookup
137
138	Set current lookup hash
139
140	_set_lookup( $lookup );
141
142	=cut
143
144	my $lookup;
145
146	sub _set_lookup {
147	$lookup = shift;
148	}
149
150	=head2 _get_marc_fields
151
152	Get all fields defined by calls to C<marc>
153
154	$marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
155
156
157
158	We are using I<magic> which detect repeatable fields only from
159	sequence of field/subfield data generated by normalization.
160
161	Repeatable field is created if there is second occurence of same subfield or
162	if any of indicators are different. This is sane for most cases except for
163	non-repeatable fields with repeatable subfields.
164
165	You can change behaviour of that using C<marc_repeatable_subfield>.
166
167	=cut
168
169	sub _get_marc_fields {
170
171	return if (! $marc_record \|\| ref($marc_record) ne 'ARRAY' \|\| $#{ $marc_record } < 0);
172
173	# first, sort all existing fields
174	# XXX might not be needed, but modern perl might randomize elements in hash
175	my @sorted_marc_record = sort {
176	$a->[0] . $a->[3] cmp $b->[0] . $b->[3]
177	} @{ $marc_record };
178
179	# output marc fields
180	my @m;
181
182	# count unique field-subfields (used for offset when walking to next subfield)
183	my $u;
184	map { $u->{ $_->[0] . $_->[3] }++ } @sorted_marc_record;
185
186	if ($debug) {
187	warn "## marc_repeatable_subfield ", dump( $marc_repeatable_subfield ), $/;
188	warn "## marc_record ", dump( $marc_record ), $/;
189	warn "## sorted_marc_record ", dump( \@sorted_marc_record ), $/;
190	warn "## subfield count ", dump( $u ), $/;
191	}
192
193	my $len = $#sorted_marc_record;
194	my $visited;
195	my $i = 0;
196	my $field;
197
198	foreach ( 0 .. $len ) {
199
200	# find next element which isn't visited
201	while ($visited->{$i}) {
202	$i = ($i + 1) % ($len + 1);
203	}
204
205	# mark it visited
206	$visited->{$i}++;
207
208	my $row = $sorted_marc_record[$i];
209
210	# field and subfield which is key for
211	# marc_repeatable_subfield and u
212	my $fsf = $row->[0] . $row->[3];
213
214	if ($debug > 1) {
215
216	print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
217	print "### this [$i]: ", dump( $row ),$/;
218	print "### sf: ", $row->[3], " vs ", $field->[3],
219	$marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
220	if ($#$field >= 0);
221
222	}
223
224	# if field exists
225	if ( $#$field >= 0 ) {
226	if (
227	$row->[0] ne $field->[0] \|\| # field
228	$row->[1] ne $field->[1] \|\| # i1
229	$row->[2] ne $field->[2] # i2
230	) {
231	push @m, $field;
232	warn "## saved/1 ", dump( $field ),$/ if ($debug);
233	$field = $row;
234
235	} elsif (
236	( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
237	\|\|
238	( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
239	! $marc_repeatable_subfield->{ $fsf }
240	)
241	) {
242	push @m, $field;
243	warn "## saved/2 ", dump( $field ),$/ if ($debug);
244	$field = $row;
245
246	} else {
247	# append new subfields to existing field
248	push @$field, ( $row->[3], $row->[4] );
249	}
250	} else {
251	# insert first field
252	$field = $row;
253	}
254
255	if (! $marc_repeatable_subfield->{ $fsf }) {
256	# make step to next subfield
257	$i = ($i + $u->{ $fsf } ) % ($len + 1);
258	}
259	}
260
261	if ($#$field >= 0) {
262	push @m, $field;
263	warn "## saved/3 ", dump( $field ),$/ if ($debug);
264	}
265
266	return @m;
267	}
268
269	=head1 Functions to create C<data_structure>
270
271	Those functions generally have to first in your normalization file.
272
273	=head2 tag
274
275	Define new tag for I<search> and I<display>.
276
277	tag('Title', rec('200','a') );
278
279
280	=cut
281
282	sub tag {
283	my $name = shift or die "tag needs name as first argument";
284	my @o = grep { defined($_) && $_ ne '' } @_;
285	return unless (@o);
286	$out->{$name}->{tag} = $name;
287	$out->{$name}->{search} = \@o;
288	$out->{$name}->{display} = \@o;
289	}
290
291	=head2 display
292
293	Define tag just for I<display>
294
295	@v = display('Title', rec('200','a') );
296
297	=cut
298
299	sub display {
300	my $name = shift or die "display needs name as first argument";
301	my @o = grep { defined($_) && $_ ne '' } @_;
302	return unless (@o);
303	$out->{$name}->{tag} = $name;
304	$out->{$name}->{display} = \@o;
305	}
306
307	=head2 search
308
309	Prepare values just for I<search>
310
311	@v = search('Title', rec('200','a') );
312
313	=cut
314
315	sub search {
316	my $name = shift or die "search needs name as first argument";
317	my @o = grep { defined($_) && $_ ne '' } @_;
318	return unless (@o);
319	$out->{$name}->{tag} = $name;
320	$out->{$name}->{search} = \@o;
321	}
322
323	=head2 marc
324
325	Save value for MARC field
326
327	marc('900','a', rec('200','a') );
328
329	=cut
330
331	sub marc {
332	my $f = shift or die "marc needs field";
333	die "marc field must be numer" unless ($f =~ /^\d+$/);
334
335	my $sf = shift or die "marc needs subfield";
336
337	foreach (@_) {
338	my $v = $_; # make var read-write for Encode
339	next unless (defined($v) && $v !~ /^\s*$/);
340	from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
341	my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
342	push @{ $marc_record }, [ $f, $i1, $i2, $sf => $v ];
343	}
344	}
345
346	=head2 marc_repeatable_subfield
347
348	Save values for MARC repetable subfield
349
350	marc_repeatable_subfield('910', 'z', rec('909') );
351
352	=cut
353
354	sub marc_repeatable_subfield {
355	my ($f,$sf) = @_;
356	die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
357	$marc_repeatable_subfield->{ $f . $sf }++;
358	marc(@_);
359	}
360
361	=head2 marc_indicators
362
363	Set both indicators for MARC field
364
365	marc_indicators('900', ' ', 1);
366
367	Any indicator value other than C<0-9> will be treated as undefined.
368
369	=cut
370
371	sub marc_indicators {
372	my $f = shift \|\| die "marc_indicators need field!\n";
373	my ($i1,$i2) = @_;
374	die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
375	die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
376
377	$i1 = ' ' if ($i1 !~ /^\d$/);
378	$i2 = ' ' if ($i2 !~ /^\d$/);
379	@{ $marc_indicators->{$f} } = ($i1,$i2);
380	}
381
382
383	=head1 Functions to extract data from input
384
385	This function should be used inside functions to create C<data_structure> described
386	above.
387
388	=head2 rec1
389
390	Return all values in some field
391
392	@v = rec1('200')
393
394	TODO: order of values is probably same as in source data, need to investigate that
395
396	=cut
397
398	sub rec1 {
399	my $f = shift;
400	return unless (defined($rec) && defined($rec->{$f}));
401	if (ref($rec->{$f}) eq 'ARRAY') {
402	return map {
403	if (ref($_) eq 'HASH') {
404	values %{$_};
405	} else {
406	$_;
407	}
408	} @{ $rec->{$f} };
409	} elsif( defined($rec->{$f}) ) {
410	return $rec->{$f};
411	}
412	}
413
414	=head2 rec2
415
416	Return all values in specific field and subfield
417
418	@v = rec2('200','a')
419
420	=cut
421
422	sub rec2 {
423	my $f = shift;
424	return unless (defined($rec && $rec->{$f}));
425	my $sf = shift;
426	return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
427	}
428
429	=head2 rec
430
431	syntaxtic sugar for
432
433	@v = rec('200')
434	@v = rec('200','a')
435
436	=cut
437
438	sub rec {
439	if ($#_ == 0) {
440	return rec1(@_);
441	} elsif ($#_ == 1) {
442	return rec2(@_);
443	}
444	}
445
446	=head2 regex
447
448	Apply regex to some or all values
449
450	@v = regex( 's/foo/bar/g', @v );
451
452	=cut
453
454	sub regex {
455	my $r = shift;
456	my @out;
457	#warn "r: $r\n", dump(\@_);
458	foreach my $t (@_) {
459	next unless ($t);
460	eval "\$t =~ $r";
461	push @out, $t if ($t && $t ne '');
462	}
463	return @out;
464	}
465
466	=head2 prefix
467
468	Prefix all values with a string
469
470	@v = prefix( 'my_', @v );
471
472	=cut
473
474	sub prefix {
475	my $p = shift or die "prefix needs string as first argument";
476	return map { $p . $_ } grep { defined($_) } @_;
477	}
478
479	=head2 suffix
480
481	suffix all values with a string
482
483	@v = suffix( '_my', @v );
484
485	=cut
486
487	sub suffix {
488	my $s = shift or die "suffix needs string as first argument";
489	return map { $_ . $s } grep { defined($_) } @_;
490	}
491
492	=head2 surround
493
494	surround all values with a two strings
495
496	@v = surround( 'prefix_', '_suffix', @v );
497
498	=cut
499
500	sub surround {
501	my $p = shift or die "surround need prefix as first argument";
502	my $s = shift or die "surround needs suffix as second argument";
503	return map { $p . $_ . $s } grep { defined($_) } @_;
504	}
505
506	=head2 first
507
508	Return first element
509
510	$v = first( @v );
511
512	=cut
513
514	sub first {
515	my $r = shift;
516	return $r;
517	}
518
519	=head2 lookup
520
521	Consult lookup hashes for some value
522
523	@v = lookup( $v );
524	@v = lookup( @v );
525
526	=cut
527
528	sub lookup {
529	my $k = shift or return;
530	return unless (defined($lookup->{$k}));
531	if (ref($lookup->{$k}) eq 'ARRAY') {
532	return @{ $lookup->{$k} };
533	} else {
534	return $lookup->{$k};
535	}
536	}
537
538	=head2 join_with
539
540	Joins walues with some delimiter
541
542	$v = join_with(", ", @v);
543
544	=cut
545
546	sub join_with {
547	my $d = shift;
548	return join($d, grep { defined($_) && $_ ne '' } @_);
549	}
550
551	# END
552	1;