lib/WebPAC/Normalize.pm

package WebPAC::Normalize;
use Exporter 'import';
@EXPORT = qw/
        _set_rec _set_lookup
        _get_ds _clean_ds
        _debug

        tag search display
        marc marc_indicators marc_repeatable_subfield

        rec1 rec2 rec
        regex prefix suffix surround
        first lookup join_with
/;

use warnings;
use strict;

#use base qw/WebPAC::Common/;
use Data::Dump qw/dump/;
use Encode qw/from_to/;

# debugging warn(s)
my $debug = 0;


=head1 NAME

WebPAC::Normalize - describe normalisaton rules using sets

=head1 VERSION

Version 0.07

=cut

our $VERSION = '0.07';

=head1 SYNOPSIS

This module uses C<conf/normalize/*.pl> files to perform normalisation
from input records using perl functions which are specialized for set
processing.

Sets are implemented as arrays, and normalisation file is valid perl, which
means that you check it's validity before running WebPAC using
C<perl -c normalize.pl>.

Normalisation can generate multiple output normalized data. For now, supported output
types (on the left side of definition) are: C<tag>, C<display>, C<search> and
C<marc>.

=head1 FUNCTIONS

Functions which start with C<_> are private and used by WebPAC internally.
All other functions are available for use within normalisation rules.

=head2 data_structure

Return data structure

  my $ds = WebPAC::Normalize::data_structure(
        lookup => $lookup->lookup_hash,
        row => $row,
        rules => $normalize_pl_config,
        marc_encoding => 'utf-8',
  );

Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
other are optional.

This function will B<die> if normalizastion can't be evaled.

Since this function isn't exported you have to call it with 
C<WebPAC::Normalize::data_structure>.

=cut

sub data_structure {
        my $arg = {@_};

        die "need row argument" unless ($arg->{row});
        die "need normalisation argument" unless ($arg->{rules});

        no strict 'subs';
        _set_lookup( $arg->{lookup} );
        _set_rec( $arg->{row} );
        _clean_ds( %{ $arg } );
        eval "$arg->{rules}";
        die "error evaling $arg->{rules}: $@\n" if ($@);

        return _get_ds();
}

=head2 _set_rec

Set current record hash

  _set_rec( $rec );

=cut

my $rec;

sub _set_rec {
        $rec = shift or die "no record hash";
}

=head2 _get_ds

Return hash formatted as data structure

  my $ds = _get_ds();

=cut

my ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);

sub _get_ds {
        return $out;
}

=head2 _clean_ds

Clean data structure hash for next record

  _clean_ds();

=cut

sub _clean_ds {
        my $a = {@_};
        ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();
        $marc_encoding = $a->{marc_encoding};
}

=head2 _set_lookup

Set current lookup hash

  _set_lookup( $lookup );

=cut

my $lookup;

sub _set_lookup {
        $lookup = shift;
}

=head2 _get_marc_fields

Get all fields defined by calls to C<marc>

        $marc->add_fields( WebPAC::Normalize:_get_marc_fields() );

We are using I<magic> which detect repeatable fields only from
sequence of field/subfield data generated by normalization.

Repeatable field is created when there is second occurence of same subfield or
if any of indicators are different.

This is sane for most cases. Something like:

  900a-1 900b-1 900c-1
  900a-2 900b-2
  900a-3

will be created from any combination of:

  900a-1 900a-2 900a-3 900b-1 900b-2 900c-1

and following rules:

  marc('900','a', rec('200','a') );
  marc('900','b', rec('200','b') );
  marc('900','c', rec('200','c') );

which might not be what you have in mind. If you need repeatable subfield,
define it using C<marc_repeatable_subfield> like this:

....

=cut

sub _get_marc_fields {

        return if (! $marc_record || ref($marc_record) ne 'ARRAY' || $#{ $marc_record } < 0);

        # first, sort all existing fields 
        # XXX might not be needed, but modern perl might randomize elements in hash
        my @sorted_marc_record = sort {
                $a->[0] . $a->[3] cmp $b->[0] . $b->[3] 
        } @{ $marc_record };

        # output marc fields
        my @m;

        # count unique field-subfields (used for offset when walking to next subfield)
        my $u;
        map { $u->{ $_->[0] . $_->[3]  }++ } @sorted_marc_record;

        if ($debug) {
                warn "## marc_repeatable_subfield ", dump( $marc_repeatable_subfield ), $/;
                warn "## marc_record ", dump( $marc_record ), $/;
                warn "## sorted_marc_record ", dump( \@sorted_marc_record ), $/;
                warn "## subfield count ", dump( $u ), $/;
        }

        my $len = $#sorted_marc_record;
        my $visited;
        my $i = 0;
        my $field;

        foreach ( 0 .. $len ) {

                # find next element which isn't visited
                while ($visited->{$i}) {
                        $i = ($i + 1) % ($len + 1);
                }

                # mark it visited
                $visited->{$i}++;

                my $row = $sorted_marc_record[$i];

                # field and subfield which is key for
                # marc_repeatable_subfield and u
                my $fsf = $row->[0] . $row->[3];

                if ($debug > 1) {

                        print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
                        print "### this [$i]: ", dump( $row ),$/;
                        print "### sf: ", $row->[3], " vs ", $field->[3],
                                $marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
                                if ($#$field >= 0);

                }

                # if field exists
                if ( $#$field >= 0 ) {
                        if (
                                $row->[0] ne $field->[0] ||             # field
                                $row->[1] ne $field->[1] ||             # i1
                                $row->[2] ne $field->[2]                # i2
                        ) {
                                push @m, $field;
                                warn "## saved/1 ", dump( $field ),$/ if ($debug);
                                $field = $row;

                        } elsif (
                                ( $row->[3] lt $field->[-2] )           # subfield which is not next (e.g. a after c)
                                ||
                                ( $row->[3] eq $field->[-2] &&          # same subfield, but not repeatable
                                        ! $marc_repeatable_subfield->{ $fsf }
                                )
                        ) {
                                push @m, $field;
                                warn "## saved/2 ", dump( $field ),$/ if ($debug);
                                $field = $row;

                        } else {
                                # append new subfields to existing field
                                push @$field, ( $row->[3], $row->[4] );
                        }
                } else {
                        # insert first field
                        $field = $row;
                }

                if (! $marc_repeatable_subfield->{ $fsf }) {
                        # make step to next subfield
                        $i = ($i + $u->{ $fsf } ) % ($len + 1);
                }
        }

        if ($#$field >= 0) {
                push @m, $field;
                warn "## saved/3 ", dump( $field ),$/ if ($debug);
        }

        return @m;
}

=head2 _debug

Change level of debug warnings

  _debug( 2 );

=cut

sub _debug {
        my $l = shift;
        return $debug unless defined($l);
        $debug = $l;
}

=head1 Functions to create C<data_structure>

Those functions generally have to first in your normalization file.

=head2 tag

Define new tag for I<search> and I<display>.

  tag('Title', rec('200','a') );


=cut

sub tag {
        my $name = shift or die "tag needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@o;
        $out->{$name}->{display} = \@o;
}

=head2 display

Define tag just for I<display>

  @v = display('Title', rec('200','a') );

=cut

sub display {
        my $name = shift or die "display needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{display} = \@o;
}

=head2 search

Prepare values just for I<search>

  @v = search('Title', rec('200','a') );

=cut

sub search {
        my $name = shift or die "search needs name as first argument";
        my @o = grep { defined($_) && $_ ne '' } @_;
        return unless (@o);
        $out->{$name}->{tag} = $name;
        $out->{$name}->{search} = \@o;
}

=head2 marc

Save value for MARC field

  marc('900','a', rec('200','a') );

=cut

sub marc {
        my $f = shift or die "marc needs field";
        die "marc field must be numer" unless ($f =~ /^\d+$/);

        my $sf = shift or die "marc needs subfield";

        foreach (@_) {
                my $v = $_;             # make var read-write for Encode
                next unless (defined($v) && $v !~ /^\s*$/);
                from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
                my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
                push @{ $marc_record }, [ $f, $i1, $i2, $sf => $v ];
        }
}

=head2 marc_repeatable_subfield

Save values for MARC repetable subfield

  marc_repeatable_subfield('910', 'z', rec('909') );

=cut

sub marc_repeatable_subfield {
        my ($f,$sf) = @_;
        die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
        $marc_repeatable_subfield->{ $f . $sf }++;
        marc(@_);
}

=head2 marc_indicators

Set both indicators for MARC field

  marc_indicators('900', ' ', 1);

Any indicator value other than C<0-9> will be treated as undefined.

=cut

sub marc_indicators {
        my $f = shift || die "marc_indicators need field!\n";
        my ($i1,$i2) = @_;
        die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
        die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));

        $i1 = ' ' if ($i1 !~ /^\d$/);
        $i2 = ' ' if ($i2 !~ /^\d$/);
        @{ $marc_indicators->{$f} } = ($i1,$i2);
}


=head1 Functions to extract data from input

This function should be used inside functions to create C<data_structure> described
above.

=head2 rec1

Return all values in some field

  @v = rec1('200')

TODO: order of values is probably same as in source data, need to investigate that

=cut

sub rec1 {
        my $f = shift;
        return unless (defined($rec) && defined($rec->{$f}));
        if (ref($rec->{$f}) eq 'ARRAY') {
                return map { 
                        if (ref($_) eq 'HASH') {
                                values %{$_};
                        } else {
                                $_;
                        }
                } @{ $rec->{$f} };
        } elsif( defined($rec->{$f}) ) {
                return $rec->{$f};
        }
}

=head2 rec2

Return all values in specific field and subfield

  @v = rec2('200','a')

=cut

sub rec2 {
        my $f = shift;
        return unless (defined($rec && $rec->{$f}));
        my $sf = shift;
        return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
}

=head2 rec

syntaxtic sugar for

  @v = rec('200')
  @v = rec('200','a')

=cut

sub rec {
        if ($#_ == 0) {
                return rec1(@_);
        } elsif ($#_ == 1) {
                return rec2(@_);
        }
}

=head2 regex

Apply regex to some or all values

  @v = regex( 's/foo/bar/g', @v );

=cut

sub regex {
        my $r = shift;
        my @out;
        #warn "r: $r\n", dump(\@_);
        foreach my $t (@_) {
                next unless ($t);
                eval "\$t =~ $r";
                push @out, $t if ($t && $t ne '');
        }
        return @out;
}

=head2 prefix

Prefix all values with a string

  @v = prefix( 'my_', @v );

=cut

sub prefix {
        my $p = shift or die "prefix needs string as first argument";
        return map { $p . $_ } grep { defined($_) } @_;
}

=head2 suffix

suffix all values with a string

  @v = suffix( '_my', @v );

=cut

sub suffix {
        my $s = shift or die "suffix needs string as first argument";
        return map { $_ . $s } grep { defined($_) } @_;
}

=head2 surround

surround all values with a two strings

  @v = surround( 'prefix_', '_suffix', @v );

=cut

sub surround {
        my $p = shift or die "surround need prefix as first argument";
        my $s = shift or die "surround needs suffix as second argument";
        return map { $p . $_ . $s } grep { defined($_) } @_;
}

=head2 first

Return first element

  $v = first( @v );

=cut

sub first {
        my $r = shift;
        return $r;
}

=head2 lookup

Consult lookup hashes for some value

  @v = lookup( $v );
  @v = lookup( @v );

=cut

sub lookup {
        my $k = shift or return;
        return unless (defined($lookup->{$k}));
        if (ref($lookup->{$k}) eq 'ARRAY') {
                return @{ $lookup->{$k} };
        } else {
                return $lookup->{$k};
        }
}

=head2 join_with

Joins walues with some delimiter

  $v = join_with(", ", @v);

=cut

sub join_with {
        my $d = shift;
        return join($d, grep { defined($_) && $_ ne '' } @_);
}

# END
1;
1	package WebPAC::Normalize;
2	use Exporter 'import';
3	@EXPORT = qw/
4	_set_rec _set_lookup
5	_get_ds _clean_ds
6	_debug
7
8	tag search display
9	marc marc_indicators marc_repeatable_subfield
10
11	rec1 rec2 rec
12	regex prefix suffix surround
13	first lookup join_with
14	/;
15
16	use warnings;
17	use strict;
18
19	#use base qw/WebPAC::Common/;
20	use Data::Dump qw/dump/;
21	use Encode qw/from_to/;
22
23	# debugging warn(s)
24	my $debug = 0;
25
26
27	=head1 NAME
28
29	WebPAC::Normalize - describe normalisaton rules using sets
30
31	=head1 VERSION
32
33	Version 0.07
34
35	=cut
36
37	our $VERSION = '0.07';
38
39	=head1 SYNOPSIS
40
41	This module uses C<conf/normalize/*.pl> files to perform normalisation
42	from input records using perl functions which are specialized for set
43	processing.
44
45	Sets are implemented as arrays, and normalisation file is valid perl, which
46	means that you check it's validity before running WebPAC using
47	C<perl -c normalize.pl>.
48
49	Normalisation can generate multiple output normalized data. For now, supported output
50	types (on the left side of definition) are: C<tag>, C<display>, C<search> and
51	C<marc>.
52
53	=head1 FUNCTIONS
54
55	Functions which start with C<_> are private and used by WebPAC internally.
56	All other functions are available for use within normalisation rules.
57
58	=head2 data_structure
59
60	Return data structure
61
62	my $ds = WebPAC::Normalize::data_structure(
63	lookup => $lookup->lookup_hash,
64	row => $row,
65	rules => $normalize_pl_config,
66	marc_encoding => 'utf-8',
67	);
68
69	Options C<lookup>, C<row>, C<rules> and C<log> are mandatory while all
70	other are optional.
71
72	This function will B<die> if normalizastion can't be evaled.
73
74	Since this function isn't exported you have to call it with
75	C<WebPAC::Normalize::data_structure>.
76
77	=cut
78
79	sub data_structure {
80	my $arg = {@_};
81
82	die "need row argument" unless ($arg->{row});
83	die "need normalisation argument" unless ($arg->{rules});
84
85	no strict 'subs';
86	_set_lookup( $arg->{lookup} );
87	_set_rec( $arg->{row} );
88	_clean_ds( %{ $arg } );
89	eval "$arg->{rules}";
90	die "error evaling $arg->{rules}: $@\n" if ($@);
91
92	return _get_ds();
93	}
94
95	=head2 _set_rec
96
97	Set current record hash
98
99	_set_rec( $rec );
100
101	=cut
102
103	my $rec;
104
105	sub _set_rec {
106	$rec = shift or die "no record hash";
107	}
108
109	=head2 _get_ds
110
111	Return hash formatted as data structure
112
113	my $ds = _get_ds();
114
115	=cut
116
117	my ($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators);
118
119	sub _get_ds {
120	return $out;
121	}
122
123	=head2 _clean_ds
124
125	Clean data structure hash for next record
126
127	_clean_ds();
128
129	=cut
130
131	sub _clean_ds {
132	my $a = {@_};
133	($out,$marc_record, $marc_encoding, $marc_repeatable_subfield, $marc_indicators) = ();
134	$marc_encoding = $a->{marc_encoding};
135	}
136
137	=head2 _set_lookup
138
139	Set current lookup hash
140
141	_set_lookup( $lookup );
142
143	=cut
144
145	my $lookup;
146
147	sub _set_lookup {
148	$lookup = shift;
149	}
150
151	=head2 _get_marc_fields
152
153	Get all fields defined by calls to C<marc>
154
155	$marc->add_fields( WebPAC::Normalize:_get_marc_fields() );
156
157	We are using I<magic> which detect repeatable fields only from
158	sequence of field/subfield data generated by normalization.
159
160	Repeatable field is created when there is second occurence of same subfield or
161	if any of indicators are different.
162
163	This is sane for most cases. Something like:
164
165	900a-1 900b-1 900c-1
166	900a-2 900b-2
167	900a-3
168
169	will be created from any combination of:
170
171	900a-1 900a-2 900a-3 900b-1 900b-2 900c-1
172
173	and following rules:
174
175	marc('900','a', rec('200','a') );
176	marc('900','b', rec('200','b') );
177	marc('900','c', rec('200','c') );
178
179	which might not be what you have in mind. If you need repeatable subfield,
180	define it using C<marc_repeatable_subfield> like this:
181
182	....
183
184	=cut
185
186	sub _get_marc_fields {
187
188	return if (! $marc_record \|\| ref($marc_record) ne 'ARRAY' \|\| $#{ $marc_record } < 0);
189
190	# first, sort all existing fields
191	# XXX might not be needed, but modern perl might randomize elements in hash
192	my @sorted_marc_record = sort {
193	$a->[0] . $a->[3] cmp $b->[0] . $b->[3]
194	} @{ $marc_record };
195
196	# output marc fields
197	my @m;
198
199	# count unique field-subfields (used for offset when walking to next subfield)
200	my $u;
201	map { $u->{ $_->[0] . $_->[3] }++ } @sorted_marc_record;
202
203	if ($debug) {
204	warn "## marc_repeatable_subfield ", dump( $marc_repeatable_subfield ), $/;
205	warn "## marc_record ", dump( $marc_record ), $/;
206	warn "## sorted_marc_record ", dump( \@sorted_marc_record ), $/;
207	warn "## subfield count ", dump( $u ), $/;
208	}
209
210	my $len = $#sorted_marc_record;
211	my $visited;
212	my $i = 0;
213	my $field;
214
215	foreach ( 0 .. $len ) {
216
217	# find next element which isn't visited
218	while ($visited->{$i}) {
219	$i = ($i + 1) % ($len + 1);
220	}
221
222	# mark it visited
223	$visited->{$i}++;
224
225	my $row = $sorted_marc_record[$i];
226
227	# field and subfield which is key for
228	# marc_repeatable_subfield and u
229	my $fsf = $row->[0] . $row->[3];
230
231	if ($debug > 1) {
232
233	print "### field so far [", $#$field, "] : ", dump( $field ), " ", $field ? 'T' : 'F', $/;
234	print "### this [$i]: ", dump( $row ),$/;
235	print "### sf: ", $row->[3], " vs ", $field->[3],
236	$marc_repeatable_subfield->{ $row->[0] . $row->[3] } ? ' (repeatable)' : '', $/,
237	if ($#$field >= 0);
238
239	}
240
241	# if field exists
242	if ( $#$field >= 0 ) {
243	if (
244	$row->[0] ne $field->[0] \|\| # field
245	$row->[1] ne $field->[1] \|\| # i1
246	$row->[2] ne $field->[2] # i2
247	) {
248	push @m, $field;
249	warn "## saved/1 ", dump( $field ),$/ if ($debug);
250	$field = $row;
251
252	} elsif (
253	( $row->[3] lt $field->[-2] ) # subfield which is not next (e.g. a after c)
254	\|\|
255	( $row->[3] eq $field->[-2] && # same subfield, but not repeatable
256	! $marc_repeatable_subfield->{ $fsf }
257	)
258	) {
259	push @m, $field;
260	warn "## saved/2 ", dump( $field ),$/ if ($debug);
261	$field = $row;
262
263	} else {
264	# append new subfields to existing field
265	push @$field, ( $row->[3], $row->[4] );
266	}
267	} else {
268	# insert first field
269	$field = $row;
270	}
271
272	if (! $marc_repeatable_subfield->{ $fsf }) {
273	# make step to next subfield
274	$i = ($i + $u->{ $fsf } ) % ($len + 1);
275	}
276	}
277
278	if ($#$field >= 0) {
279	push @m, $field;
280	warn "## saved/3 ", dump( $field ),$/ if ($debug);
281	}
282
283	return @m;
284	}
285
286	=head2 _debug
287
288	Change level of debug warnings
289
290	_debug( 2 );
291
292	=cut
293
294	sub _debug {
295	my $l = shift;
296	return $debug unless defined($l);
297	$debug = $l;
298	}
299
300	=head1 Functions to create C<data_structure>
301
302	Those functions generally have to first in your normalization file.
303
304	=head2 tag
305
306	Define new tag for I<search> and I<display>.
307
308	tag('Title', rec('200','a') );
309
310
311	=cut
312
313	sub tag {
314	my $name = shift or die "tag needs name as first argument";
315	my @o = grep { defined($_) && $_ ne '' } @_;
316	return unless (@o);
317	$out->{$name}->{tag} = $name;
318	$out->{$name}->{search} = \@o;
319	$out->{$name}->{display} = \@o;
320	}
321
322	=head2 display
323
324	Define tag just for I<display>
325
326	@v = display('Title', rec('200','a') );
327
328	=cut
329
330	sub display {
331	my $name = shift or die "display needs name as first argument";
332	my @o = grep { defined($_) && $_ ne '' } @_;
333	return unless (@o);
334	$out->{$name}->{tag} = $name;
335	$out->{$name}->{display} = \@o;
336	}
337
338	=head2 search
339
340	Prepare values just for I<search>
341
342	@v = search('Title', rec('200','a') );
343
344	=cut
345
346	sub search {
347	my $name = shift or die "search needs name as first argument";
348	my @o = grep { defined($_) && $_ ne '' } @_;
349	return unless (@o);
350	$out->{$name}->{tag} = $name;
351	$out->{$name}->{search} = \@o;
352	}
353
354	=head2 marc
355
356	Save value for MARC field
357
358	marc('900','a', rec('200','a') );
359
360	=cut
361
362	sub marc {
363	my $f = shift or die "marc needs field";
364	die "marc field must be numer" unless ($f =~ /^\d+$/);
365
366	my $sf = shift or die "marc needs subfield";
367
368	foreach (@_) {
369	my $v = $_; # make var read-write for Encode
370	next unless (defined($v) && $v !~ /^\s*$/);
371	from_to($v, 'iso-8859-2', $marc_encoding) if ($marc_encoding);
372	my ($i1,$i2) = defined($marc_indicators->{$f}) ? @{ $marc_indicators->{$f} } : (' ',' ');
373	push @{ $marc_record }, [ $f, $i1, $i2, $sf => $v ];
374	}
375	}
376
377	=head2 marc_repeatable_subfield
378
379	Save values for MARC repetable subfield
380
381	marc_repeatable_subfield('910', 'z', rec('909') );
382
383	=cut
384
385	sub marc_repeatable_subfield {
386	my ($f,$sf) = @_;
387	die "marc_repeatable_subfield need field and subfield!\n" unless ($f && $sf);
388	$marc_repeatable_subfield->{ $f . $sf }++;
389	marc(@_);
390	}
391
392	=head2 marc_indicators
393
394	Set both indicators for MARC field
395
396	marc_indicators('900', ' ', 1);
397
398	Any indicator value other than C<0-9> will be treated as undefined.
399
400	=cut
401
402	sub marc_indicators {
403	my $f = shift \|\| die "marc_indicators need field!\n";
404	my ($i1,$i2) = @_;
405	die "marc_indicators($f, ...) need i1!\n" unless(defined($i1));
406	die "marc_indicators($f, $i1, ...) need i2!\n" unless(defined($i2));
407
408	$i1 = ' ' if ($i1 !~ /^\d$/);
409	$i2 = ' ' if ($i2 !~ /^\d$/);
410	@{ $marc_indicators->{$f} } = ($i1,$i2);
411	}
412
413
414	=head1 Functions to extract data from input
415
416	This function should be used inside functions to create C<data_structure> described
417	above.
418
419	=head2 rec1
420
421	Return all values in some field
422
423	@v = rec1('200')
424
425	TODO: order of values is probably same as in source data, need to investigate that
426
427	=cut
428
429	sub rec1 {
430	my $f = shift;
431	return unless (defined($rec) && defined($rec->{$f}));
432	if (ref($rec->{$f}) eq 'ARRAY') {
433	return map {
434	if (ref($_) eq 'HASH') {
435	values %{$_};
436	} else {
437	$_;
438	}
439	} @{ $rec->{$f} };
440	} elsif( defined($rec->{$f}) ) {
441	return $rec->{$f};
442	}
443	}
444
445	=head2 rec2
446
447	Return all values in specific field and subfield
448
449	@v = rec2('200','a')
450
451	=cut
452
453	sub rec2 {
454	my $f = shift;
455	return unless (defined($rec && $rec->{$f}));
456	my $sf = shift;
457	return map { $_->{$sf} } grep { ref($_) eq 'HASH' && $_->{$sf} } @{ $rec->{$f} };
458	}
459
460	=head2 rec
461
462	syntaxtic sugar for
463
464	@v = rec('200')
465	@v = rec('200','a')
466
467	=cut
468
469	sub rec {
470	if ($#_ == 0) {
471	return rec1(@_);
472	} elsif ($#_ == 1) {
473	return rec2(@_);
474	}
475	}
476
477	=head2 regex
478
479	Apply regex to some or all values
480
481	@v = regex( 's/foo/bar/g', @v );
482
483	=cut
484
485	sub regex {
486	my $r = shift;
487	my @out;
488	#warn "r: $r\n", dump(\@_);
489	foreach my $t (@_) {
490	next unless ($t);
491	eval "\$t =~ $r";
492	push @out, $t if ($t && $t ne '');
493	}
494	return @out;
495	}
496
497	=head2 prefix
498
499	Prefix all values with a string
500
501	@v = prefix( 'my_', @v );
502
503	=cut
504
505	sub prefix {
506	my $p = shift or die "prefix needs string as first argument";
507	return map { $p . $_ } grep { defined($_) } @_;
508	}
509
510	=head2 suffix
511
512	suffix all values with a string
513
514	@v = suffix( '_my', @v );
515
516	=cut
517
518	sub suffix {
519	my $s = shift or die "suffix needs string as first argument";
520	return map { $_ . $s } grep { defined($_) } @_;
521	}
522
523	=head2 surround
524
525	surround all values with a two strings
526
527	@v = surround( 'prefix_', '_suffix', @v );
528
529	=cut
530
531	sub surround {
532	my $p = shift or die "surround need prefix as first argument";
533	my $s = shift or die "surround needs suffix as second argument";
534	return map { $p . $_ . $s } grep { defined($_) } @_;
535	}
536
537	=head2 first
538
539	Return first element
540
541	$v = first( @v );
542
543	=cut
544
545	sub first {
546	my $r = shift;
547	return $r;
548	}
549
550	=head2 lookup
551
552	Consult lookup hashes for some value
553
554	@v = lookup( $v );
555	@v = lookup( @v );
556
557	=cut
558
559	sub lookup {
560	my $k = shift or return;
561	return unless (defined($lookup->{$k}));
562	if (ref($lookup->{$k}) eq 'ARRAY') {
563	return @{ $lookup->{$k} };
564	} else {
565	return $lookup->{$k};
566	}
567	}
568
569	=head2 join_with
570
571	Joins walues with some delimiter
572
573	$v = join_with(", ", @v);
574
575	=cut
576
577	sub join_with {
578	my $d = shift;
579	return join($d, grep { defined($_) && $_ ne '' } @_);
580	}
581
582	# END
583	1;