9 |
use WebPAC::Common 0.02; |
use WebPAC::Common 0.02; |
10 |
use WebPAC::Parser 0.08; |
use WebPAC::Parser 0.08; |
11 |
use WebPAC::Input 0.16; |
use WebPAC::Input 0.16; |
12 |
use WebPAC::Store 0.14; |
use WebPAC::Store 0.15; |
13 |
use WebPAC::Normalize 0.22; |
use WebPAC::Normalize 0.22; |
14 |
use WebPAC::Output::TT; |
use WebPAC::Output::TT; |
15 |
use WebPAC::Validate 0.11; |
use WebPAC::Validate 0.11; |
255 |
|
|
256 |
} elsif ($use_indexer eq 'kinosearch') { |
} elsif ($use_indexer eq 'kinosearch') { |
257 |
|
|
258 |
# open KinoSearch |
die "no longer supported"; |
|
require WebPAC::Output::KinoSearch; |
|
|
$indexer_config->{clean} = 1 unless (-e $indexer_config->{index_path}); |
|
|
$indexer = new WebPAC::Output::KinoSearch( %{ $indexer_config } ); |
|
259 |
|
|
260 |
} else { |
} else { |
261 |
$log->logdie("unknown use_indexer: $use_indexer"); |
$log->logdie("unknown use_indexer: $use_indexer"); |
295 |
# |
# |
296 |
# now WebPAC::Store |
# now WebPAC::Store |
297 |
# |
# |
298 |
my $abs_path = abs_path($0); |
my $store = new WebPAC::Store({ |
299 |
$abs_path =~ s#/[^/]*$#/#; |
debug => $debug, |
300 |
|
}); |
301 |
|
|
|
my $db_path = $config->webpac('db_path'); |
|
302 |
|
|
303 |
if ($clean) { |
# |
304 |
$log->info("creating new database '$database' in $db_path"); |
# prepare output |
305 |
rmtree( $db_path ) || $log->warn("can't remove $db_path: $!"); |
# |
306 |
} else { |
my @outputs = force_array( $db_config->{output}, sub { |
307 |
$log->info("working on database '$database' in $db_path"); |
$log->error("Database $database doesn't have any outputs defined. Do you want to remove it from configuration?" ); |
308 |
} |
} ); |
309 |
|
|
310 |
my $store = new WebPAC::Store( |
my @output_modules; |
311 |
path => $db_path, |
|
312 |
debug => $debug, |
foreach my $output ( @outputs ) { |
313 |
); |
|
314 |
|
warn '## output = ',dump( $output ); |
315 |
|
|
316 |
|
my $module = $output->{module} || $log->logdie("need module in output section of $database"); |
317 |
|
$module = 'WebPAC::Output::' . $module unless $module =~ m/::/; |
318 |
|
|
319 |
|
$log->debug("loading output module $module"); |
320 |
|
eval "require $module"; |
321 |
|
|
322 |
|
# add database to arugemnts for output filter |
323 |
|
$output->{database} = $database; |
324 |
|
|
325 |
|
$log->debug("calling $module->new(",dump( $output ),")"); |
326 |
|
my $out = new $module->new( $output ); |
327 |
|
$out->init; |
328 |
|
|
329 |
|
push @output_modules, $out; |
330 |
|
} |
331 |
|
|
332 |
|
|
333 |
# |
# |
334 |
# now, iterate through input formats |
# now, iterate through input formats |
335 |
# |
# |
336 |
|
|
337 |
my @inputs; |
|
338 |
if (ref($db_config->{input}) eq 'ARRAY') { |
my @inputs = force_array( $db_config->{input}, sub { |
|
@inputs = @{ $db_config->{input} }; |
|
|
} elsif ($db_config->{input}) { |
|
|
push @inputs, $db_config->{input}; |
|
|
} else { |
|
339 |
$log->info("database $database doesn't have inputs defined"); |
$log->info("database $database doesn't have inputs defined"); |
340 |
} |
} ); |
341 |
|
|
342 |
foreach my $input (@inputs) { |
foreach my $input (@inputs) { |
343 |
|
|
509 |
} |
} |
510 |
|
|
511 |
|
|
512 |
|
# setup input name for all output filters |
513 |
|
foreach my $out ( @output_modules ) { |
514 |
|
if ( $out->can('input') ) { |
515 |
|
$out->input( $input_name ); |
516 |
|
} else { |
517 |
|
$log->warn("output filter ",ref($out)," doesn't support input name"); |
518 |
|
} |
519 |
|
} |
520 |
|
|
521 |
|
|
522 |
foreach my $pos ( 0 ... $input_db->size ) { |
foreach my $pos ( 0 ... $input_db->size ) { |
523 |
|
|
524 |
my $row = $input_db->fetch || next; |
my $row = $input_db->fetch || next; |
527 |
|
|
528 |
my $mfn = $row->{'000'}->[0]; |
my $mfn = $row->{'000'}->[0]; |
529 |
|
|
530 |
if (! $mfn || $mfn !~ m#^\d+$#) { |
if (! $mfn || $mfn !~ m{^\d+$}) { |
531 |
$log->warn("record $pos doesn't have valid MFN but '$mfn', using $pos"); |
$log->warn("record $pos doesn't have valid MFN but '$mfn', using $pos"); |
532 |
$mfn = $pos; |
$mfn = $pos; |
533 |
push @{ $row->{'000'} }, $pos; |
push @{ $row->{'000'} }, $pos; |
589 |
|
|
590 |
$log->info("Created $i instances of MFN $mfn\n") if ($i > 1); |
$log->info("Created $i instances of MFN $mfn\n") if ($i > 1); |
591 |
} |
} |
592 |
|
|
593 |
|
foreach my $out ( @output_modules ) { |
594 |
|
$out->add( $mfn, $ds ) if $out->can('add'); |
595 |
|
} |
596 |
|
|
597 |
} |
} |
598 |
|
|
599 |
if ($validate) { |
if ($validate) { |
624 |
|
|
625 |
eval { $indexer->finish } if ($indexer && $indexer->can('finish')); |
eval { $indexer->finish } if ($indexer && $indexer->can('finish')); |
626 |
|
|
627 |
|
foreach my $out ( @output_modules ) { |
628 |
|
$out->finish if $out->can('finish'); |
629 |
|
} |
630 |
|
|
631 |
my $dt = time() - $start_t; |
my $dt = time() - $start_t; |
632 |
$log->info("$total_rows records ", $indexer ? "indexed " : "", |
$log->info("$total_rows records ", $indexer ? "indexed " : "", |
633 |
sprintf("in %.2f sec [%.2f rec/sec]", |
sprintf("in %.2f sec [%.2f rec/sec]", |