trunk2/lib/WebPAC.pm

package WebPAC;

use warnings;
use strict;

use Carp;
use Text::Iconv;
use Config::IniFiles;
use XML::Simple;
use Template;

use Data::Dumper;

=head1 NAME

WebPAC - base class for WebPAC

=head1 DESCRIPTION

This module implements methods used by WebPAC.

=head1 METHODS

=head2 new

This will create new instance of WebPAC using configuration specified by C<config_file>.

 my $webpac = new WebPAC(
        config_file => 'name.conf',
        [code_page => 'ISO-8859-2',]
 );

Default C<code_page> is C<ISO-8859-2>.

It will also read configuration files
C<global.conf> (used by indexer and Web font-end)
and configuration file specified by C<config_file>
which describes databases to be indexed.

=cut

# mapping between data type and tag which specify
# format in XML file
my %type2tag = (
        'isis' => 'isis',
#       'excel' => 'column',
#       'marc' => 'marc',
#       'feed' => 'feed'
);

sub new {
        my $class = shift;
        my $self = {@_};
        bless($self, $class);

        # fill in default values
        # output codepage
        $self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});

        #
        # read global.conf
        #

        my $config = new Config::IniFiles( -file => 'global.conf' ) || croak "can't open 'global.conf'";

        # read global config parametars
        foreach my $var (qw(
                        dbi_dbd
                        dbi_dsn
                        dbi_user
                        dbi_passwd
                        show_progress
                        my_unac_filter
                        output_template
                )) {
                $self->{'global_config'}->{$var} = $config->val('global', $var);
        }

        #
        # read indexer config file
        #

        $self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'";

        # create UTF-8 convertor for import_xml files
        $self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});

        # create Template toolkit instance
        $self->{'tt'} = Template->new(
                INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} || './output_template'),
#               FILTERS => {
#                       'foo' => \&foo_filter,
#               },
                EVAL_PERL => 1,
        );

        return $self;
}

=head2 open_isis

Open CDS/ISIS database using OpenIsis module and read all records to memory.

 $webpac->open_isis(
        filename => '/data/ISIS/ISIS',
        code_page => '852',
        limit_mfn => '500',
        lookup => [ ... ],
 );

By default, ISIS code page is assumed to be C<852>.

If optional parametar C<limit_mfn> is set, it will read just 500 records
from database in example above.

C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
C<val>. Optional parametar C<eval> is perl code to evaluate before storing
value in index.

 lookup => [
  { 'key' => 'd:v900', 'val' => 'v250^a' },
  { 'eval' => '"v901^a" eq "Podruèje"',
    'key' => 'pa:v561^4:v562^4:v461^1',
    'val' => 'v900' },
 ]

Returns number of last record read into memory (size of database, really).

=cut

sub open_isis {
        my $self = shift;
        my $arg = {@_};

        croak "need filename" if (! $arg->{'filename'});
        my $code_page = $arg->{'code_page'} || '852';

        use OpenIsis;

        #$self->{'isis_code_page'} = $code_page;

        # create Text::Iconv object
        my $cp = Text::Iconv->new($code_page,$self->{'code_page'});

        print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});

        my $isis_db = OpenIsis::open($arg->{'filename'});

        my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1;

        $maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});

        print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});

        # read database
        for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {

                # read record
                my $row = OpenIsis::read( $isis_db, $mfn );
                foreach my $k (keys %{$row}) {
                        if ($k ne "mfn") {
                                foreach my $l (@{$row->{$k}}) {
                                        $l = $cp->convert($l);
                                        # has subfields?
                                        my $val;
                                        if ($l =~ m/\^/) {
                                                foreach my $t (split(/\^/,$l)) {
                                                        next if (! $t);
                                                        $val->{substr($t,0,1)} = substr($t,1);
                                                }
                                        } else {
                                                $val = $l;
                                        }

                                        push @{$self->{'data'}->{$mfn}->{$k}}, $val;
                                }
                        }

                }

                # create lookup
                my $rec = $self->{'data'}->{$mfn};
                $self->create_lookup($rec, @{$arg->{'lookup'}});

        }

        $self->{'current_mfn'} = 1;

        # store max mfn and return it.
        return $self->{'max_mfn'} = $maxmfn;
}

=head2 fetch_rec

Fetch next record from database. It will also display progress bar (once
it's implemented, that is).

 my $rec = $webpac->fetch_rec;

=cut

sub fetch_rec {
        my $self = shift;

        my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!";

        if ($mfn > $self->{'max_mfn'}) {
                $self->{'current_mfn'} = $self->{'max_mfn'};
                return;
        }

        return $self->{'data'}->{$mfn};
}

=head2 open_import_xml

Read file from C<import_xml/> directory and parse it.

 $webpac->open_import_xml(type => 'isis');

=cut

sub open_import_xml {
        my $self = shift;

        my $arg = {@_};
        confess "need type to load file from import_xml/" if (! $arg->{'type'});

        $self->{'type'} = $arg->{'type'};

        my $type_base = $arg->{'type'};
        $type_base =~ s/_.*$//g;

        $self->{'tag'} = $type2tag{$type_base};

        print STDERR "using type '",$self->{'type'},"' tag <",$self->{'tag'},">\n" if ($self->{'debug'});

        my $f = "./import_xml/".$self->{'type'}.".xml";
        confess "import_xml file '$f' doesn't exist!" if (! -e "$f");

        print STDERR "reading '$f'\n" if ($self->{'debug'});

        $self->{'import_xml'} = XMLin($f,
                ForceArray => [ $self->{'tag'}, 'config', 'format' ],
                ForceContent => 1
        );

}

=head2 create_lookup

Create lookup from record using lookup definition.

 $self->create_lookup($rec, @lookups);

Called internally by C<open_*> methods.

=cut

sub create_lookup {
        my $self = shift;

        my $rec = shift || confess "need record to create lookup";
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        foreach my $i (@_) {
                if ($i->{'eval'}) {
                        my $eval = $self->fill_in($rec,$i->{'eval'});
                        my $key = $self->fill_in($rec,$i->{'key'});
                        my @val = $self->fill_in($rec,$i->{'val'});
                        if ($key && @val && eval $eval) {
                                push @{$self->{'lookup'}->{$key}}, @val;
                        }
                } else {
                        my $key = $self->fill_in($rec,$i->{'key'});
                        my @val = $self->fill_in($rec,$i->{'val'});
                        if ($key && @val) {
                                push @{$self->{'lookup'}->{$key}}, @val;
                        }
                }
        }
}

=head2 get_data

Returns value from record.

 my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);

Arguments are:
record reference C<$rec>,
field C<$f>,
optional subfiled C<$sf>,
index for repeatable values C<$i>.

Optinal variable C<$found> will be incremeted if there
is field.

Returns value or empty string.

=cut

sub get_data {
        my $self = shift;

        my ($rec,$f,$sf,$i,$found) = @_;

        if ($$rec->{$f}) {
                return '' if (! $$rec->{$f}->[$i]);
                if ($sf && $$rec->{$f}->[$i]->{$sf}) {
                        $$found++ if (defined($$found));
                        return $$rec->{$f}->[$i]->{$sf};
                } elsif ($$rec->{$f}->[$i]) {
                        $$found++ if (defined($$found));
                        # it still might have subfield, just
                        # not specified, so we'll dump all
                        if ($$rec->{$f}->[$i] =~ /HASH/o) {
                                my $out;
                                foreach my $k (keys %{$$rec->{$f}->[$i]}) {
                                        $out .= $$rec->{$f}->[$i]->{$k}." ";
                                }
                                return $out;
                        } else {
                                return $$rec->{$f}->[$i];
                        }
                }
        } else {
                return '';
        }
}

=head2 fill_in

Workhourse of all: takes record from in-memory structure of database and
strings with placeholders and returns string or array of with substituted
values from record.

 my $text = $webpac->fill_in($rec,'v250^a');

Optional argument is ordinal number for repeatable fields. By default,
it's assume to be first repeatable field (fields are perl array, so first
element is 0).
Following example will read second value from repeatable field.

 my $text = $webpac->fill_in($rec,'Title: v250^a',1);

This function B<does not> perform parsing of format to inteligenty skip
delimiters before fields which aren't used.

=cut

# internal function to eval code
sub _eval {
        my $self = shift;

        my $code = shift || return;
        no strict 'subs';
        my $ret = eval $code;
        if ($@) {
                print STDERR "problem with eval code [$code]: $@\n";
        }
        return $ret;
}

sub fill_in {
        my $self = shift;

        my $rec = shift || confess "need data record";
        my $format = shift || confess "need format to parse";
        # iteration (for repeatable fields)
        my $i = shift || 0;

        # FIXME remove for speedup?
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        my $found = 0;

        my $eval_code;
        # remove eval{...} from beginning
        $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);

        # do actual replacement of placeholders
        $format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;

        if ($found) {
                if ($eval_code) {
                        my $eval = $self->fill_in($rec,$eval_code,$i);
                        return if (! $self->_eval($eval));
                }
                # do we have lookups?
                if ($format =~ /\[[^\[\]]+\]/o) {
print "## probable lookup: $format\n";
                        return $self->lookup($format);
                } else {
                        return $format;
                }
        } else {
                return;
        }
}

=head2 lookup

Perform lookups on format supplied to it.

 my $text = $self->lookup('[v900]');

Lookups can be nested (like C<[d:[a:[v900]]]>).

=cut

sub lookup {
        my $self = shift;

        my $tmp = shift || confess "need format";

        if ($tmp =~ /\[[^\[\]]+\]/o) {
                my @in = ( $tmp );
print "## lookup $tmp\n";
                my @out;
                while (my $f = shift @in) {
                        if ($f =~ /\[([^\[\]]+)\]/) {
                                my $k = $1;
                                if ($self->{'lookup'}->{$k}) {
                                        foreach my $nv (@{$self->{'lookup'}->{$k}}) {
                                                my $tmp2 = $f;
                                                $tmp2 =~ s/\[$k\]/$nv/g;
                                                push @in, $tmp2;
                                        }
                                } else {
                                        undef $f;
                                }
                        } elsif ($f) {
                                push @out, $f;
                        }
                }
                return @out;
        } else {
                return $tmp;
        }
}

=head2 parse

Perform smart parsing of string, skipping delimiters for fields which aren't
defined. It can also eval code in format starting with C<eval{...}> and
return output or nothing depending on eval code.

 my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);

=cut

sub parse {
        my $self = shift;

        my ($rec, $format_utf8, $i) = @_;

        return if (! $format_utf8);

        confess("need HASH as first argument!") if ($rec !~ /HASH/o);
        confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});

        $i = 0 if (! $i);

        my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});

        my @out;

        my $eval_code;
        # remove eval{...} from beginning
        $eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);

        my $prefix;
        my $all_found=0;

        while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) {

                my $del = $1 || '';
                $prefix ||= $del if ($all_found == 0);

                my $found = 0;
                my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);

                if ($found) {
                        push @out, $del;
                        push @out, $tmp;
                        $all_found += $found;
                }
        }

        return if (! $all_found);

        my $out = join('',@out) . $format;

        # add prefix if not there
        $out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);

        if ($eval_code) {
                my $eval = $self->fill_in($rec,$eval_code,$i);
                return if (! $self->_eval($eval));
        }

        return $out;
}

=head2 parse_to_arr

Similar to C<parse>, but returns array of all repeatable fields

 my @arr = $webpac->parse_to_arr($rec,'v250^a');

=cut

sub parse_to_arr {
        my $self = shift;

        my ($rec, $format_utf8) = @_;

        confess("need HASH as first argument!") if ($rec !~ /HASH/o);
        return if (! $format_utf8);

        my $i = 0;
        my @arr;

        while (my $v = $self->parse($rec,$format_utf8,$i++)) {
                push @arr, $v;
        }

        return @arr;
}

=head2 data_structure

Create in-memory data structure which represents layout from C<import_xml>.
It is used later to produce output.

 my @ds = $webpac->data_structure($rec);

=cut

# private method _sort_by_order
# sort subrouting using order="" attribute
sub _sort_by_order {
        my $self = shift;

        my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} ||
                $self->{'import_xml'}->{'indexer'}->{$a};
        my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} ||
                $self->{'import_xml'}->{'indexer'}->{$b};

        return $va <=> $vb;
}

sub data_structure {
        my $self = shift;

        my $rec = shift;
        confess("need HASH as first argument!") if ($rec !~ /HASH/o);

        my @sorted_tags;
        if ($self->{tags_by_order}) {
                @sorted_tags = @{$self->{tags_by_order}};
        } else {
                @sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
                $self->{tags_by_order} = \@sorted_tags;
        }

        my @ds;

        foreach my $field (@sorted_tags) {

                my $row;

#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});

                foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
                        my @v = $self->parse_to_arr($rec,$tag->{'content'});

                        next if (! @v);

                        # does tag have type?
                        if ($tag->{'type'}) {
                                push @{$row->{$tag->{'type'}}}, @v;
                        } else {
                                push @{$row->{'display'}}, @v;
                                push @{$row->{'swish'}}, @v;
                        }
                }

                if ($row) {
                        $row->{'tag'} = $field;
                        push @ds, $row;
                }

        }

        return @ds;

}

=head2 output

Create output from in-memory data structure using Template Toolkit template.

my $text = $webpac->output( template => 'text.tt', data => @ds );

=cut

sub output {
        my $self = shift;

        my $args = {@_};

        confess("need template name") if (! $args->{'template'});
        confess("need data array") if (! $args->{'data'});

        my $out;

        $self->{'tt'}->process(
                $args->{'template'},
                $args,
                \$out
        ) || confess $self->{'tt'}->error();

        return $out;
}

1;
1	package WebPAC;
2
3	use warnings;
4	use strict;
5
6	use Carp;
7	use Text::Iconv;
8	use Config::IniFiles;
9	use XML::Simple;
10	use Template;
11
12	use Data::Dumper;
13
14	=head1 NAME
15
16	WebPAC - base class for WebPAC
17
18	=head1 DESCRIPTION
19
20	This module implements methods used by WebPAC.
21
22	=head1 METHODS
23
24	=head2 new
25
26	This will create new instance of WebPAC using configuration specified by C<config_file>.
27
28	my $webpac = new WebPAC(
29	config_file => 'name.conf',
30	[code_page => 'ISO-8859-2',]
31	);
32
33	Default C<code_page> is C<ISO-8859-2>.
34
35	It will also read configuration files
36	C<global.conf> (used by indexer and Web font-end)
37	and configuration file specified by C<config_file>
38	which describes databases to be indexed.
39
40	=cut
41
42	# mapping between data type and tag which specify
43	# format in XML file
44	my %type2tag = (
45	'isis' => 'isis',
46	# 'excel' => 'column',
47	# 'marc' => 'marc',
48	# 'feed' => 'feed'
49	);
50
51	sub new {
52	my $class = shift;
53	my $self = {@_};
54	bless($self, $class);
55
56	# fill in default values
57	# output codepage
58	$self->{'code_page'} = 'ISO-8859-2' if (! $self->{'code_page'});
59
60	#
61	# read global.conf
62	#
63
64	my $config = new Config::IniFiles( -file => 'global.conf' ) \|\| croak "can't open 'global.conf'";
65
66	# read global config parametars
67	foreach my $var (qw(
68	dbi_dbd
69	dbi_dsn
70	dbi_user
71	dbi_passwd
72	show_progress
73	my_unac_filter
74	output_template
75	)) {
76	$self->{'global_config'}->{$var} = $config->val('global', $var);
77	}
78
79	#
80	# read indexer config file
81	#
82
83	$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) \|\| croak "can't open '$self->{config_file}'";
84
85	# create UTF-8 convertor for import_xml files
86	$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'});
87
88	# create Template toolkit instance
89	$self->{'tt'} = Template->new(
90	INCLUDE_PATH => ($self->{'global_config_file'}->{'output_template'} \|\| './output_template'),
91	# FILTERS => {
92	# 'foo' => \&foo_filter,
93	# },
94	EVAL_PERL => 1,
95	);
96
97	return $self;
98	}
99
100	=head2 open_isis
101
102	Open CDS/ISIS database using OpenIsis module and read all records to memory.
103
104	$webpac->open_isis(
105	filename => '/data/ISIS/ISIS',
106	code_page => '852',
107	limit_mfn => '500',
108	lookup => [ ... ],
109	);
110
111	By default, ISIS code page is assumed to be C<852>.
112
113	If optional parametar C<limit_mfn> is set, it will read just 500 records
114	from database in example above.
115
116	C<lookup> argument is an array of lookups to create. Each lookup must have C<key> and
117	C<val>. Optional parametar C<eval> is perl code to evaluate before storing
118	value in index.
119
120	lookup => [
121	{ 'key' => 'd:v900', 'val' => 'v250^a' },
122	{ 'eval' => '"v901^a" eq "Podruèje"',
123	'key' => 'pa:v561^4:v562^4:v461^1',
124	'val' => 'v900' },
125	]
126
127	Returns number of last record read into memory (size of database, really).
128
129	=cut
130
131	sub open_isis {
132	my $self = shift;
133	my $arg = {@_};
134
135	croak "need filename" if (! $arg->{'filename'});
136	my $code_page = $arg->{'code_page'} \|\| '852';
137
138	use OpenIsis;
139
140	#$self->{'isis_code_page'} = $code_page;
141
142	# create Text::Iconv object
143	my $cp = Text::Iconv->new($code_page,$self->{'code_page'});
144
145	print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'});
146
147	my $isis_db = OpenIsis::open($arg->{'filename'});
148
149	my $maxmfn = OpenIsis::maxRowid( $isis_db ) \|\| 1;
150
151	$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn});
152
153	print STDERR "processing $maxmfn records...\n" if ($self->{'debug'});
154
155	# read database
156	for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) {
157
158	# read record
159	my $row = OpenIsis::read( $isis_db, $mfn );
160	foreach my $k (keys %{$row}) {
161	if ($k ne "mfn") {
162	foreach my $l (@{$row->{$k}}) {
163	$l = $cp->convert($l);
164	# has subfields?
165	my $val;
166	if ($l =~ m/\^/) {
167	foreach my $t (split(/\^/,$l)) {
168	next if (! $t);
169	$val->{substr($t,0,1)} = substr($t,1);
170	}
171	} else {
172	$val = $l;
173	}
174
175	push @{$self->{'data'}->{$mfn}->{$k}}, $val;
176	}
177	}
178
179	}
180
181	# create lookup
182	my $rec = $self->{'data'}->{$mfn};
183	$self->create_lookup($rec, @{$arg->{'lookup'}});
184
185	}
186
187	$self->{'current_mfn'} = 1;
188
189	# store max mfn and return it.
190	return $self->{'max_mfn'} = $maxmfn;
191	}
192
193	=head2 fetch_rec
194
195	Fetch next record from database. It will also display progress bar (once
196	it's implemented, that is).
197
198	my $rec = $webpac->fetch_rec;
199
200	=cut
201
202	sub fetch_rec {
203	my $self = shift;
204
205	my $mfn = $self->{'current_mfn'}++ \|\| confess "it seems that you didn't load database!";
206
207	if ($mfn > $self->{'max_mfn'}) {
208	$self->{'current_mfn'} = $self->{'max_mfn'};
209	return;
210	}
211
212	return $self->{'data'}->{$mfn};
213	}
214
215	=head2 open_import_xml
216
217	Read file from C<import_xml/> directory and parse it.
218
219	$webpac->open_import_xml(type => 'isis');
220
221	=cut
222
223	sub open_import_xml {
224	my $self = shift;
225
226	my $arg = {@_};
227	confess "need type to load file from import_xml/" if (! $arg->{'type'});
228
229	$self->{'type'} = $arg->{'type'};
230
231	my $type_base = $arg->{'type'};
232	$type_base =~ s/_.*$//g;
233
234	$self->{'tag'} = $type2tag{$type_base};
235
236	print STDERR "using type '",$self->{'type'},"' tag <",$self->{'tag'},">\n" if ($self->{'debug'});
237
238	my $f = "./import_xml/".$self->{'type'}.".xml";
239	confess "import_xml file '$f' doesn't exist!" if (! -e "$f");
240
241	print STDERR "reading '$f'\n" if ($self->{'debug'});
242
243	$self->{'import_xml'} = XMLin($f,
244	ForceArray => [ $self->{'tag'}, 'config', 'format' ],
245	ForceContent => 1
246	);
247
248	}
249
250	=head2 create_lookup
251
252	Create lookup from record using lookup definition.
253
254	$self->create_lookup($rec, @lookups);
255
256	Called internally by C<open_*> methods.
257
258	=cut
259
260	sub create_lookup {
261	my $self = shift;
262
263	my $rec = shift \|\| confess "need record to create lookup";
264	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
265
266	foreach my $i (@_) {
267	if ($i->{'eval'}) {
268	my $eval = $self->fill_in($rec,$i->{'eval'});
269	my $key = $self->fill_in($rec,$i->{'key'});
270	my @val = $self->fill_in($rec,$i->{'val'});
271	if ($key && @val && eval $eval) {
272	push @{$self->{'lookup'}->{$key}}, @val;
273	}
274	} else {
275	my $key = $self->fill_in($rec,$i->{'key'});
276	my @val = $self->fill_in($rec,$i->{'val'});
277	if ($key && @val) {
278	push @{$self->{'lookup'}->{$key}}, @val;
279	}
280	}
281	}
282	}
283
284	=head2 get_data
285
286	Returns value from record.
287
288	my $text = $self->get_data(\$rec,$f,$sf,$i,\$found);
289
290	Arguments are:
291	record reference C<$rec>,
292	field C<$f>,
293	optional subfiled C<$sf>,
294	index for repeatable values C<$i>.
295
296	Optinal variable C<$found> will be incremeted if there
297	is field.
298
299	Returns value or empty string.
300
301	=cut
302
303	sub get_data {
304	my $self = shift;
305
306	my ($rec,$f,$sf,$i,$found) = @_;
307
308	if ($$rec->{$f}) {
309	return '' if (! $$rec->{$f}->[$i]);
310	if ($sf && $$rec->{$f}->[$i]->{$sf}) {
311	$$found++ if (defined($$found));
312	return $$rec->{$f}->[$i]->{$sf};
313	} elsif ($$rec->{$f}->[$i]) {
314	$$found++ if (defined($$found));
315	# it still might have subfield, just
316	# not specified, so we'll dump all
317	if ($$rec->{$f}->[$i] =~ /HASH/o) {
318	my $out;
319	foreach my $k (keys %{$$rec->{$f}->[$i]}) {
320	$out .= $$rec->{$f}->[$i]->{$k}." ";
321	}
322	return $out;
323	} else {
324	return $$rec->{$f}->[$i];
325	}
326	}
327	} else {
328	return '';
329	}
330	}
331
332	=head2 fill_in
333
334	Workhourse of all: takes record from in-memory structure of database and
335	strings with placeholders and returns string or array of with substituted
336	values from record.
337
338	my $text = $webpac->fill_in($rec,'v250^a');
339
340	Optional argument is ordinal number for repeatable fields. By default,
341	it's assume to be first repeatable field (fields are perl array, so first
342	element is 0).
343	Following example will read second value from repeatable field.
344
345	my $text = $webpac->fill_in($rec,'Title: v250^a',1);
346
347	This function B<does not> perform parsing of format to inteligenty skip
348	delimiters before fields which aren't used.
349
350	=cut
351
352	# internal function to eval code
353	sub _eval {
354	my $self = shift;
355
356	my $code = shift \|\| return;
357	no strict 'subs';
358	my $ret = eval $code;
359	if ($@) {
360	print STDERR "problem with eval code [$code]: $@\n";
361	}
362	return $ret;
363	}
364
365	sub fill_in {
366	my $self = shift;
367
368	my $rec = shift \|\| confess "need data record";
369	my $format = shift \|\| confess "need format to parse";
370	# iteration (for repeatable fields)
371	my $i = shift \|\| 0;
372
373	# FIXME remove for speedup?
374	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
375
376	my $found = 0;
377
378	my $eval_code;
379	# remove eval{...} from beginning
380	$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
381
382	# do actual replacement of placeholders
383	$format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges;
384
385	if ($found) {
386	if ($eval_code) {
387	my $eval = $self->fill_in($rec,$eval_code,$i);
388	return if (! $self->_eval($eval));
389	}
390	# do we have lookups?
391	if ($format =~ /\[[^\[\]]+\]/o) {
392	print "## probable lookup: $format\n";
393	return $self->lookup($format);
394	} else {
395	return $format;
396	}
397	} else {
398	return;
399	}
400	}
401
402	=head2 lookup
403
404	Perform lookups on format supplied to it.
405
406	my $text = $self->lookup('[v900]');
407
408	Lookups can be nested (like C<[d:[a:[v900]]]>).
409
410	=cut
411
412	sub lookup {
413	my $self = shift;
414
415	my $tmp = shift \|\| confess "need format";
416
417	if ($tmp =~ /\[[^\[\]]+\]/o) {
418	my @in = ( $tmp );
419	print "## lookup $tmp\n";
420	my @out;
421	while (my $f = shift @in) {
422	if ($f =~ /\[([^\[\]]+)\]/) {
423	my $k = $1;
424	if ($self->{'lookup'}->{$k}) {
425	foreach my $nv (@{$self->{'lookup'}->{$k}}) {
426	my $tmp2 = $f;
427	$tmp2 =~ s/\[$k\]/$nv/g;
428	push @in, $tmp2;
429	}
430	} else {
431	undef $f;
432	}
433	} elsif ($f) {
434	push @out, $f;
435	}
436	}
437	return @out;
438	} else {
439	return $tmp;
440	}
441	}
442
443	=head2 parse
444
445	Perform smart parsing of string, skipping delimiters for fields which aren't
446	defined. It can also eval code in format starting with C<eval{...}> and
447	return output or nothing depending on eval code.
448
449	my $text = $webpac->parse($rec,'eval{"v901^a" eq "Deskriptor"}descriptor: v250^a', $i);
450
451	=cut
452
453	sub parse {
454	my $self = shift;
455
456	my ($rec, $format_utf8, $i) = @_;
457
458	return if (! $format_utf8);
459
460	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
461	confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'});
462
463	$i = 0 if (! $i);
464
465	my $format = $self->{'utf2cp'}->convert($format_utf8) \|\| confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'});
466
467	my @out;
468
469	my $eval_code;
470	# remove eval{...} from beginning
471	$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s);
472
473	my $prefix;
474	my $all_found=0;
475
476	while ($format =~ s/^(.?)v(\d+)(?:\^(\w))//s) {
477
478	my $del = $1 \|\| '';
479	$prefix \|\|= $del if ($all_found == 0);
480
481	my $found = 0;
482	my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found);
483
484	if ($found) {
485	push @out, $del;
486	push @out, $tmp;
487	$all_found += $found;
488	}
489	}
490
491	return if (! $all_found);
492
493	my $out = join('',@out) . $format;
494
495	# add prefix if not there
496	$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/);
497
498	if ($eval_code) {
499	my $eval = $self->fill_in($rec,$eval_code,$i);
500	return if (! $self->_eval($eval));
501	}
502
503	return $out;
504	}
505
506	=head2 parse_to_arr
507
508	Similar to C<parse>, but returns array of all repeatable fields
509
510	my @arr = $webpac->parse_to_arr($rec,'v250^a');
511
512	=cut
513
514	sub parse_to_arr {
515	my $self = shift;
516
517	my ($rec, $format_utf8) = @_;
518
519	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
520	return if (! $format_utf8);
521
522	my $i = 0;
523	my @arr;
524
525	while (my $v = $self->parse($rec,$format_utf8,$i++)) {
526	push @arr, $v;
527	}
528
529	return @arr;
530	}
531
532	=head2 data_structure
533
534	Create in-memory data structure which represents layout from C<import_xml>.
535	It is used later to produce output.
536
537	my @ds = $webpac->data_structure($rec);
538
539	=cut
540
541	# private method _sort_by_order
542	# sort subrouting using order="" attribute
543	sub _sort_by_order {
544	my $self = shift;
545
546	my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} \|\|
547	$self->{'import_xml'}->{'indexer'}->{$a};
548	my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} \|\|
549	$self->{'import_xml'}->{'indexer'}->{$b};
550
551	return $va <=> $vb;
552	}
553
554	sub data_structure {
555	my $self = shift;
556
557	my $rec = shift;
558	confess("need HASH as first argument!") if ($rec !~ /HASH/o);
559
560	my @sorted_tags;
561	if ($self->{tags_by_order}) {
562	@sorted_tags = @{$self->{tags_by_order}};
563	} else {
564	@sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}};
565	$self->{tags_by_order} = \@sorted_tags;
566	}
567
568	my @ds;
569
570	foreach my $field (@sorted_tags) {
571
572	my $row;
573
574	#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}});
575
576	foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) {
577	my @v = $self->parse_to_arr($rec,$tag->{'content'});
578
579	next if (! @v);
580
581	# does tag have type?
582	if ($tag->{'type'}) {
583	push @{$row->{$tag->{'type'}}}, @v;
584	} else {
585	push @{$row->{'display'}}, @v;
586	push @{$row->{'swish'}}, @v;
587	}
588	}
589
590	if ($row) {
591	$row->{'tag'} = $field;
592	push @ds, $row;
593	}
594
595	}
596
597	return @ds;
598
599	}
600
601	=head2 output
602
603	Create output from in-memory data structure using Template Toolkit template.
604
605	my $text = $webpac->output( template => 'text.tt', data => @ds );
606
607	=cut
608
609	sub output {
610	my $self = shift;
611
612	my $args = {@_};
613
614	confess("need template name") if (! $args->{'template'});
615	confess("need data array") if (! $args->{'data'});
616
617	my $out;
618
619	$self->{'tt'}->process(
620	$args->{'template'},
621	$args,
622	\$out
623	) \|\| confess $self->{'tt'}->error();
624
625	return $out;
626	}
627
628	1;