3 |
use Carp; |
use Carp; |
4 |
use Text::Iconv; |
use Text::Iconv; |
5 |
use Config::IniFiles; |
use Config::IniFiles; |
6 |
|
use XML::Simple; |
7 |
|
|
8 |
|
use Data::Dumper; |
9 |
|
|
10 |
=head1 NAME |
=head1 NAME |
11 |
|
|
35 |
|
|
36 |
=cut |
=cut |
37 |
|
|
38 |
|
# mapping between data type and tag which specify |
39 |
|
# format in XML file |
40 |
|
my %type2tag = ( |
41 |
|
'isis' => 'isis', |
42 |
|
# 'excel' => 'column', |
43 |
|
# 'marc' => 'marc', |
44 |
|
# 'feed' => 'feed' |
45 |
|
); |
46 |
|
|
47 |
sub new { |
sub new { |
48 |
my $class = shift; |
my $class = shift; |
49 |
my $self = {@_}; |
my $self = {@_}; |
77 |
|
|
78 |
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; |
$self->{indexer_config_file} = new Config::IniFiles( -file => $self->{config_file} ) || croak "can't open '$self->{config_file}'"; |
79 |
|
|
80 |
# read global config parametars |
$self->{'utf2cp'} = Text::Iconv->new('UTF-8' ,$self->{'code_page'}); |
|
foreach my $var (qw( |
|
|
dbi_dbd |
|
|
dbi_dsn |
|
|
dbi_user |
|
|
dbi_passwd |
|
|
show_progress |
|
|
my_unac_filter |
|
|
)) { |
|
|
$self->{global_config}->{$var} = $self->{global_config_file}->val('global', $var); |
|
|
} |
|
|
|
|
81 |
return $self; |
return $self; |
82 |
} |
} |
83 |
|
|
126 |
# create Text::Iconv object |
# create Text::Iconv object |
127 |
my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); |
my $cp = Text::Iconv->new($code_page,$self->{'code_page'}); |
128 |
|
|
129 |
|
print STDERR "reading ISIS database '",$arg->{'filename'},"'\n" if ($self->{'debug'}); |
130 |
|
|
131 |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
my $isis_db = OpenIsis::open($arg->{'filename'}); |
132 |
|
|
133 |
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; |
my $maxmfn = OpenIsis::maxRowid( $isis_db ) || 1; |
134 |
|
|
135 |
|
$maxmfn = $self->{limit_mfn} if ($self->{limit_mfn}); |
136 |
|
|
137 |
|
print STDERR "processing $maxmfn records...\n" if ($self->{'debug'}); |
138 |
|
|
139 |
# read database |
# read database |
140 |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
for (my $mfn = 1; $mfn <= $maxmfn; $mfn++) { |
141 |
|
|
168 |
|
|
169 |
} |
} |
170 |
|
|
171 |
|
$self->{'current_mfn'} = 1; |
172 |
|
|
173 |
# store max mfn and return it. |
# store max mfn and return it. |
174 |
return $self->{'max_mfn'} = $maxmfn; |
return $self->{'max_mfn'} = $maxmfn; |
175 |
} |
} |
176 |
|
|
177 |
|
=head2 fetch_rec |
178 |
|
|
179 |
|
Fetch next record from database. It will also display progress bar (once |
180 |
|
it's implemented, that is). |
181 |
|
|
182 |
|
my $rec = $webpac->fetch_rec; |
183 |
|
|
184 |
|
=cut |
185 |
|
|
186 |
|
sub fetch_rec { |
187 |
|
my $self = shift; |
188 |
|
|
189 |
|
my $mfn = $self->{'current_mfn'}++ || confess "it seems that you didn't load database!"; |
190 |
|
|
191 |
|
if ($mfn > $self->{'max_mfn'}) { |
192 |
|
$self->{'current_mfn'} = $self->{'max_mfn'}; |
193 |
|
return; |
194 |
|
} |
195 |
|
|
196 |
|
return $self->{'data'}->{$mfn}; |
197 |
|
} |
198 |
|
|
199 |
|
=head2 open_import_xml |
200 |
|
|
201 |
|
Read file from C<import_xml/> directory and parse it. |
202 |
|
|
203 |
|
$webpac->open_import_xml(type => 'isis'); |
204 |
|
|
205 |
|
=cut |
206 |
|
|
207 |
|
sub open_import_xml { |
208 |
|
my $self = shift; |
209 |
|
|
210 |
|
my $arg = {@_}; |
211 |
|
confess "need type to load file from import_xml/" if (! $arg->{'type'}); |
212 |
|
|
213 |
|
$self->{'type'} = $arg->{'type'}; |
214 |
|
|
215 |
|
my $type_base = $arg->{'type'}; |
216 |
|
$type_base =~ s/_.*$//g; |
217 |
|
|
218 |
|
$self->{'tag'} = $type2tag{$type_base}; |
219 |
|
|
220 |
|
print STDERR "using type ",$self->{'type'}," tag ",$self->{'tag'},"\n" if ($self->{'debug'}); |
221 |
|
|
222 |
|
my $f = "./import_xml/".$self->{'type'}.".xml"; |
223 |
|
confess "import_xml file '$f' doesn't exist!" if (! -e "$f"); |
224 |
|
|
225 |
|
print STDERR "reading '$f'\n" if ($self->{'debug'}); |
226 |
|
|
227 |
|
$self->{'import_xml'} = XMLin($f, |
228 |
|
ForceArray => [ $self->{'tag'}, 'config', 'format' ], |
229 |
|
ForceContent => 1 |
230 |
|
); |
231 |
|
|
232 |
|
print Dumper($self->{'import_xml'}); |
233 |
|
|
234 |
|
} |
235 |
|
|
236 |
=head2 create_lookup |
=head2 create_lookup |
237 |
|
|
238 |
Create lookup from record using lookup definition. |
Create lookup from record using lookup definition. |
292 |
return $$rec->{$f}->[$i]->{$sf}; |
return $$rec->{$f}->[$i]->{$sf}; |
293 |
} elsif ($$rec->{$f}->[$i]) { |
} elsif ($$rec->{$f}->[$i]) { |
294 |
$$found++ if (defined($$found)); |
$$found++ if (defined($$found)); |
295 |
return $$rec->{$f}->[$i]; |
# it still might have subfield, just |
296 |
|
# not specified, so we'll dump all |
297 |
|
if ($$rec->{$f}->[$i] =~ /HASH/o) { |
298 |
|
my $out; |
299 |
|
foreach my $k (keys %{$$rec->{$f}->[$i]}) { |
300 |
|
$out .= $$rec->{$f}->[$i]->{$k}." "; |
301 |
|
} |
302 |
|
return $out; |
303 |
|
} else { |
304 |
|
return $$rec->{$f}->[$i]; |
305 |
|
} |
306 |
} |
} |
307 |
} else { |
} else { |
308 |
return ''; |
return ''; |
342 |
|
|
343 |
my $found = 0; |
my $found = 0; |
344 |
|
|
345 |
|
my $eval_code; |
346 |
|
# remove eval{...} from beginning |
347 |
|
$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); |
348 |
|
|
349 |
# do actual replacement of placeholders |
# do actual replacement of placeholders |
350 |
$format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges; |
$format =~ s/v(\d+)(?:\^(\w))*/$self->get_data(\$rec,$1,$2,$i,\$found)/ges; |
351 |
|
|
352 |
if ($found) { |
if ($found) { |
353 |
|
if ($eval_code) { |
354 |
|
my $eval = $self->fill_in($rec,$eval_code,$i); |
355 |
|
return if (! eval $eval); |
356 |
|
} |
357 |
# do we have lookups? |
# do we have lookups? |
358 |
if ($format =~ /\[[^\[\]]+\]/o) { |
if ($format =~ /\[[^\[\]]+\]/o) { |
359 |
return $self->lookup($format); |
return $self->lookup($format); |
422 |
sub parse { |
sub parse { |
423 |
my $self = shift; |
my $self = shift; |
424 |
|
|
425 |
my ($rec, $format, $i) = @_; |
my ($rec, $format_utf8, $i) = @_; |
426 |
|
|
427 |
|
return if (! $format_utf8); |
428 |
|
|
429 |
|
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
430 |
|
confess("need utf2cp Text::Iconv object!") if (! $self->{'utf2cp'}); |
431 |
|
|
432 |
|
$i = 0 if (! $i); |
433 |
|
|
434 |
|
my $format = $self->{'utf2cp'}->convert($format_utf8) || confess("can't convert '$format_utf8' from UTF-8 to ",$self->{'code_page'}); |
435 |
|
|
436 |
my @out; |
my @out; |
437 |
|
|
439 |
# remove eval{...} from beginning |
# remove eval{...} from beginning |
440 |
$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); |
$eval_code = $1 if ($format =~ s/^eval{([^}]+)}//s); |
441 |
|
|
442 |
my $prefix = ''; |
my $prefix; |
443 |
$prefix = $1 if ($format =~ s/^(.+)(v\d+(?:\^\w)*)/$2/s); |
my $all_found=0; |
444 |
|
|
445 |
|
#print "## $format\n"; |
446 |
|
while ($format =~ s/^(.*?)v(\d+)(?:\^(\w))*//s) { |
447 |
|
#print "## [ $1 | $2 | $3 ] $format\n"; |
448 |
|
|
449 |
sub f_sf_del { |
my $del = $1 || ''; |
450 |
my ($self,$rec,$out,$f,$sf,$del,$i) = @_; |
$prefix ||= $del if ($all_found == 0); |
451 |
|
|
452 |
|
my $found = 0; |
453 |
|
my $tmp = $self->get_data(\$rec,$2,$3,$i,\$found); |
454 |
|
|
|
my $found=0; |
|
|
my $tmp = $self->get_data($rec,$f,$sf,$i,\$found); |
|
455 |
if ($found) { |
if ($found) { |
456 |
push @{$$out}, $tmp; |
push @out, $del; |
457 |
push @{$$out}, $del; |
push @out, $tmp; |
458 |
|
$all_found += $found; |
459 |
} |
} |
|
return ''; |
|
460 |
} |
} |
461 |
|
|
462 |
#$format =~ s/(.*)v(\d+)(?:\^(\w))*/f_sf_del($self,\$rec,\@out,$2,$3,$1,$i/ges; |
return if (! $all_found); |
463 |
|
|
464 |
|
my $out = join('',@out) . $format; |
465 |
|
|
466 |
|
# add prefix if not there |
467 |
|
$out = $prefix . $out if ($out !~ m/^\Q$prefix\E/); |
468 |
|
|
469 |
|
if ($eval_code) { |
470 |
|
my $eval = $self->fill_in($rec,$eval_code,$i); |
471 |
|
return if (! eval $eval); |
472 |
|
} |
473 |
|
|
474 |
|
return $out; |
475 |
|
} |
476 |
|
|
477 |
|
=head2 data_structure |
478 |
|
|
479 |
|
Create in-memory data structure which represents layout from C<import_xml>. |
480 |
|
It is used later to produce output. |
481 |
|
|
482 |
|
my $ds = $webpac->data_structure($rec); |
483 |
|
|
484 |
|
=cut |
485 |
|
|
486 |
|
# private method _sort_by_order |
487 |
|
# sort subrouting using order="" attribute |
488 |
|
sub _sort_by_order { |
489 |
|
my $self = shift; |
490 |
|
|
491 |
|
my $va = $self->{'import_xml'}->{'indexer'}->{$a}->{'order'} || |
492 |
|
$self->{'import_xml'}->{'indexer'}->{$a}; |
493 |
|
my $vb = $self->{'import_xml'}->{'indexer'}->{$b}->{'order'} || |
494 |
|
$self->{'import_xml'}->{'indexer'}->{$b}; |
495 |
|
|
496 |
|
return $va <=> $vb; |
497 |
|
} |
498 |
|
|
499 |
|
sub data_structure { |
500 |
|
my $self = shift; |
501 |
|
|
502 |
|
my $rec = shift; |
503 |
|
confess("need HASH as first argument!") if ($rec !~ /HASH/o); |
504 |
|
|
505 |
|
my @sorted_tags; |
506 |
|
if ($self->{tags_by_order}) { |
507 |
|
@sorted_tags = @{$self->{tags_by_order}}; |
508 |
|
} else { |
509 |
|
@sorted_tags = sort { $self->_sort_by_order } keys %{$self->{'import_xml'}->{'indexer'}}; |
510 |
|
$self->{tags_by_order} = \@sorted_tags; |
511 |
|
} |
512 |
|
|
513 |
|
my $ds; |
514 |
|
|
515 |
|
foreach my $field (@sorted_tags) { |
516 |
|
|
517 |
|
my $row; |
518 |
|
my $i = 0; |
519 |
|
|
520 |
|
#print "field $field [",$self->{'tag'},"] = ",Dumper($self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}); |
521 |
|
|
522 |
|
foreach my $tag (@{$self->{'import_xml'}->{'indexer'}->{$field}->{$self->{'tag'}}}) { |
523 |
|
|
524 |
|
my $v = $self->parse($rec,$tag->{'content'},$i); |
525 |
|
print "## $i:",$tag->{'content'}," = ",($v || 'null'),"\n"; |
526 |
|
|
527 |
|
next if (!$v || $v && $v eq ''); |
528 |
|
|
529 |
|
# does tag have type? |
530 |
|
if ($tag->{'type'}) { |
531 |
|
push @{$row->{$tag->{'type'}}}, $v; |
532 |
|
} else { |
533 |
|
push @{$row->{'display'}}, $v; |
534 |
|
push @{$row->{'swish'}}, $v; |
535 |
|
} |
536 |
|
} |
537 |
|
|
538 |
|
push @{$ds->{$field}}, $row if ($row); |
539 |
|
|
540 |
|
} |
541 |
|
|
542 |
print Dumper(@out); |
print Dumper($ds); |
543 |
|
|
544 |
} |
} |
545 |
|
|