9 |
use WebPAC::Common 0.02; |
use WebPAC::Common 0.02; |
10 |
use WebPAC::Parser 0.08; |
use WebPAC::Parser 0.08; |
11 |
use WebPAC::Input 0.16; |
use WebPAC::Input 0.16; |
12 |
use WebPAC::Store 0.14; |
use WebPAC::Store 0.15; |
13 |
use WebPAC::Normalize 0.22; |
use WebPAC::Normalize 0.22; |
14 |
use WebPAC::Output::TT; |
use WebPAC::Output::TT; |
15 |
use WebPAC::Validate 0.11; |
use WebPAC::Validate 0.11; |
255 |
|
|
256 |
} elsif ($use_indexer eq 'kinosearch') { |
} elsif ($use_indexer eq 'kinosearch') { |
257 |
|
|
258 |
# open KinoSearch |
die "no longer supported"; |
|
require WebPAC::Output::KinoSearch; |
|
|
$indexer_config->{clean} = 1 unless (-e $indexer_config->{index_path}); |
|
|
$indexer = new WebPAC::Output::KinoSearch( %{ $indexer_config } ); |
|
259 |
|
|
260 |
} else { |
} else { |
261 |
$log->logdie("unknown use_indexer: $use_indexer"); |
$log->logdie("unknown use_indexer: $use_indexer"); |
295 |
# |
# |
296 |
# now WebPAC::Store |
# now WebPAC::Store |
297 |
# |
# |
298 |
my $abs_path = abs_path($0); |
my $store = new WebPAC::Store({ |
299 |
$abs_path =~ s#/[^/]*$#/#; |
debug => $debug, |
300 |
|
}); |
|
my $db_path = $config->webpac('db_path'); |
|
301 |
|
|
302 |
if ($clean) { |
sub iterate_over { |
303 |
$log->info("creating new database '$database' in $db_path"); |
my ( $what, $error ) = @_; |
304 |
rmtree( $db_path ) || $log->warn("can't remove $db_path: $!"); |
my @result; |
305 |
} else { |
if ( ref( $what ) eq 'ARRAY' ) { |
306 |
$log->info("working on database '$database' in $db_path"); |
@result = @{ $what }; |
307 |
|
} elsif ($db_config->{input}) { |
308 |
|
@result = ( $what ); |
309 |
|
} else { |
310 |
|
$error->() if ref($error) eq 'CODE'; |
311 |
|
} |
312 |
|
return @result; |
313 |
} |
} |
314 |
|
|
315 |
my $store = new WebPAC::Store( |
# |
316 |
path => $db_path, |
# prepare output |
317 |
debug => $debug, |
# |
318 |
); |
my @outputs = iterate_over( $db_config->{output}, sub { |
319 |
|
$log->error("Database $database doesn't have any outputs defined. Do you want to remove it from configuration?" ); |
320 |
|
} ); |
321 |
|
|
322 |
|
my @output_modules; |
323 |
|
|
324 |
|
foreach my $output ( @outputs ) { |
325 |
|
|
326 |
|
warn '## output = ',dump( $output ); |
327 |
|
|
328 |
|
my $module = $output->{module} || $log->logdie("need module in output section of $database"); |
329 |
|
$module = 'WebPAC::Output::' . $module unless $module =~ m/::/; |
330 |
|
|
331 |
|
$log->debug("loading output module $module"); |
332 |
|
eval "require $module"; |
333 |
|
|
334 |
|
$output->{database} = $database; |
335 |
|
|
336 |
|
$log->debug("calling $module->new(",dump( $output ),")"); |
337 |
|
my $out = new $module->new( $output ); |
338 |
|
$out->init; |
339 |
|
|
340 |
|
push @output_modules, $out; |
341 |
|
} |
342 |
|
|
343 |
|
|
344 |
# |
# |
345 |
# now, iterate through input formats |
# now, iterate through input formats |
346 |
# |
# |
347 |
|
|
348 |
my @inputs; |
|
349 |
if (ref($db_config->{input}) eq 'ARRAY') { |
my @inputs = iterate_over( $db_config->{input}, sub { |
|
@inputs = @{ $db_config->{input} }; |
|
|
} elsif ($db_config->{input}) { |
|
|
push @inputs, $db_config->{input}; |
|
|
} else { |
|
350 |
$log->info("database $database doesn't have inputs defined"); |
$log->info("database $database doesn't have inputs defined"); |
351 |
} |
} ); |
352 |
|
|
353 |
foreach my $input (@inputs) { |
foreach my $input (@inputs) { |
354 |
|
|
383 |
stats => $stats, |
stats => $stats, |
384 |
modify_records => $input->{modify_records}, |
modify_records => $input->{modify_records}, |
385 |
modify_file => $input->{modify_file}, |
modify_file => $input->{modify_file}, |
386 |
|
input_config => $input, |
387 |
); |
); |
388 |
$log->logdie("can't create input using $input_module") unless ($input); |
$log->logdie("can't create input using $input_module") unless ($input); |
389 |
|
|
528 |
|
|
529 |
my $mfn = $row->{'000'}->[0]; |
my $mfn = $row->{'000'}->[0]; |
530 |
|
|
531 |
if (! $mfn || $mfn !~ m#^\d+$#) { |
if (! $mfn || $mfn !~ m{^\d+$}) { |
532 |
$log->warn("record $pos doesn't have valid MFN but '$mfn', using $pos"); |
$log->warn("record $pos doesn't have valid MFN but '$mfn', using $pos"); |
533 |
$mfn = $pos; |
$mfn = $pos; |
534 |
push @{ $row->{'000'} }, $pos; |
push @{ $row->{'000'} }, $pos; |
590 |
|
|
591 |
$log->info("Created $i instances of MFN $mfn\n") if ($i > 1); |
$log->info("Created $i instances of MFN $mfn\n") if ($i > 1); |
592 |
} |
} |
593 |
|
|
594 |
|
foreach my $out ( @output_modules ) { |
595 |
|
$out->add( $mfn, $ds ) if $out->can('add'); |
596 |
|
} |
597 |
|
|
598 |
} |
} |
599 |
|
|
600 |
if ($validate) { |
if ($validate) { |
625 |
|
|
626 |
eval { $indexer->finish } if ($indexer && $indexer->can('finish')); |
eval { $indexer->finish } if ($indexer && $indexer->can('finish')); |
627 |
|
|
628 |
|
foreach my $out ( @output_modules ) { |
629 |
|
$out->finish if $out->can('finish'); |
630 |
|
} |
631 |
|
|
632 |
my $dt = time() - $start_t; |
my $dt = time() - $start_t; |
633 |
$log->info("$total_rows records ", $indexer ? "indexed " : "", |
$log->info("$total_rows records ", $indexer ? "indexed " : "", |
634 |
sprintf("in %.2f sec [%.2f rec/sec]", |
sprintf("in %.2f sec [%.2f rec/sec]", |