9 |
use WebPAC::Common 0.02; |
use WebPAC::Common 0.02; |
10 |
use WebPAC::Parser 0.08; |
use WebPAC::Parser 0.08; |
11 |
use WebPAC::Input 0.16; |
use WebPAC::Input 0.16; |
12 |
use WebPAC::Store 0.14; |
use WebPAC::Store 0.15; |
13 |
use WebPAC::Normalize 0.22; |
use WebPAC::Normalize 0.22; |
14 |
use WebPAC::Output::TT; |
use WebPAC::Output::TT; |
15 |
use WebPAC::Validate 0.11; |
use WebPAC::Validate 0.11; |
177 |
|
|
178 |
my $validate; |
my $validate; |
179 |
$validate = new WebPAC::Validate( |
$validate = new WebPAC::Validate( |
|
path => $validate_path, |
|
180 |
delimiters => $config->webpac('delimiters'), |
delimiters => $config->webpac('delimiters'), |
|
delimiters_path => $validate_delimiters_path, |
|
181 |
) if ($validate_path || $validate_delimiters_path); |
) if ($validate_path || $validate_delimiters_path); |
182 |
|
|
183 |
my $use_indexer = $config->use_indexer; |
my $use_indexer = $config->use_indexer; |
185 |
if ($stats) { |
if ($stats) { |
186 |
$log->debug("disabled indexing for stats collection"); |
$log->debug("disabled indexing for stats collection"); |
187 |
$use_indexer = undef; |
$use_indexer = undef; |
188 |
} else { |
} elsif ( $use_indexer ) { |
189 |
$log->info("using $use_indexer indexing engine..."); |
$log->info("using $use_indexer indexing engine..."); |
190 |
} |
} |
191 |
|
|
212 |
return $c; |
return $c; |
213 |
} |
} |
214 |
|
|
215 |
while (my ($database, $db_config) = each %{ $config->databases }) { |
foreach my $database ( sort keys %{ $config->databases } ) { |
216 |
|
my $db_config = $config->databases->{$database}; |
217 |
|
|
218 |
my ($only_database,$only_input) = split(m#/#, $only_filter) if ($only_filter); |
my ($only_database,$only_input) = split(m#/#, $only_filter) if ($only_filter); |
219 |
next if ($only_database && $database !~ m/$only_database/i); |
next if ($only_database && $database !~ m/$only_database/i); |
244 |
if ($use_indexer eq 'hyperestraier') { |
if ($use_indexer eq 'hyperestraier') { |
245 |
|
|
246 |
# open Hyper Estraier database |
# open Hyper Estraier database |
247 |
use WebPAC::Output::Estraier '0.10'; |
require WebPAC::Output::Estraier; |
248 |
$indexer = new WebPAC::Output::Estraier( %{ $indexer_config } ); |
$indexer = new WebPAC::Output::Estraier( %{ $indexer_config } ); |
249 |
|
|
250 |
} elsif ($use_indexer eq 'hyperestraier-native') { |
} elsif ($use_indexer eq 'hyperestraier-native') { |
251 |
|
|
252 |
# open Hyper Estraier database |
# open Hyper Estraier database |
253 |
use WebPAC::Output::EstraierNative; |
require WebPAC::Output::EstraierNative; |
254 |
$indexer = new WebPAC::Output::EstraierNative( %{ $indexer_config } ); |
$indexer = new WebPAC::Output::EstraierNative( %{ $indexer_config } ); |
255 |
|
|
256 |
} elsif ($use_indexer eq 'kinosearch') { |
} elsif ($use_indexer eq 'kinosearch') { |
257 |
|
|
258 |
# open KinoSearch |
die "no longer supported"; |
|
use WebPAC::Output::KinoSearch; |
|
|
$indexer_config->{clean} = 1 unless (-e $indexer_config->{index_path}); |
|
|
$indexer = new WebPAC::Output::KinoSearch( %{ $indexer_config } ); |
|
259 |
|
|
260 |
} else { |
} else { |
261 |
$log->logdie("unknown use_indexer: $use_indexer"); |
$log->logdie("unknown use_indexer: $use_indexer"); |
262 |
} |
} |
263 |
|
|
264 |
$log->logide("can't continue without valid indexer") unless ($indexer); |
$log->logdie("can't continue without valid indexer") unless ($indexer); |
265 |
} |
} |
266 |
|
|
267 |
|
|
295 |
# |
# |
296 |
# now WebPAC::Store |
# now WebPAC::Store |
297 |
# |
# |
298 |
my $abs_path = abs_path($0); |
my $store = new WebPAC::Store({ |
299 |
$abs_path =~ s#/[^/]*$#/#; |
debug => $debug, |
300 |
|
}); |
301 |
|
|
|
my $db_path = $config->webpac('db_path'); |
|
302 |
|
|
303 |
if ($clean) { |
# |
304 |
$log->info("creating new database '$database' in $db_path"); |
# prepare output |
305 |
rmtree( $db_path ) || $log->warn("can't remove $db_path: $!"); |
# |
306 |
} else { |
my @outputs = force_array( $db_config->{output}, sub { |
307 |
$log->info("working on database '$database' in $db_path"); |
$log->error("Database $database doesn't have any outputs defined. Do you want to remove it from configuration?" ); |
308 |
} |
} ); |
309 |
|
|
310 |
my $store = new WebPAC::Store( |
my @output_modules; |
311 |
path => $db_path, |
|
312 |
debug => $debug, |
foreach my $output ( @outputs ) { |
313 |
); |
|
314 |
|
warn '## output = ',dump( $output ); |
315 |
|
|
316 |
|
my $module = $output->{module} || $log->logdie("need module in output section of $database"); |
317 |
|
$module = 'WebPAC::Output::' . $module unless $module =~ m/::/; |
318 |
|
|
319 |
|
$log->debug("loading output module $module"); |
320 |
|
eval "require $module"; |
321 |
|
|
322 |
|
# add database to arugemnts for output filter |
323 |
|
$output->{database} = $database; |
324 |
|
|
325 |
|
$log->debug("calling $module->new(",dump( $output ),")"); |
326 |
|
my $out = new $module->new( $output ); |
327 |
|
$out->init; |
328 |
|
|
329 |
|
push @output_modules, $out; |
330 |
|
} |
331 |
|
|
332 |
|
|
333 |
# |
# |
334 |
# now, iterate through input formats |
# now, iterate through input formats |
335 |
# |
# |
336 |
|
|
337 |
my @inputs; |
|
338 |
if (ref($db_config->{input}) eq 'ARRAY') { |
my @inputs = force_array( $db_config->{input}, sub { |
|
@inputs = @{ $db_config->{input} }; |
|
|
} elsif ($db_config->{input}) { |
|
|
push @inputs, $db_config->{input}; |
|
|
} else { |
|
339 |
$log->info("database $database doesn't have inputs defined"); |
$log->info("database $database doesn't have inputs defined"); |
340 |
} |
} ); |
341 |
|
|
342 |
foreach my $input (@inputs) { |
foreach my $input (@inputs) { |
343 |
|
|
372 |
stats => $stats, |
stats => $stats, |
373 |
modify_records => $input->{modify_records}, |
modify_records => $input->{modify_records}, |
374 |
modify_file => $input->{modify_file}, |
modify_file => $input->{modify_file}, |
375 |
|
input_config => $input, |
376 |
); |
); |
377 |
$log->logdie("can't create input using $input_module") unless ($input); |
$log->logdie("can't create input using $input_module") unless ($input); |
378 |
|
|
457 |
print $report_fh "Report for database '$database' input '$input_name' records ", |
print $report_fh "Report for database '$database' input '$input_name' records ", |
458 |
$offset || 1, "-", $limit || $input->{limit} || $maxmfn, "\n\n"; |
$offset || 1, "-", $limit || $input->{limit} || $maxmfn, "\n\n"; |
459 |
$log->info("Generating report file $path"); |
$log->info("Generating report file $path"); |
460 |
|
|
461 |
|
if ( $validate ) { |
462 |
|
$validate->read_validate_file( $validate->fill_in( $validate_path, database => $database, input => $input_name ) ) if ( $validate_path ); |
463 |
|
$validate->read_validate_delimiters_file( $validate->fill_in( $validate_delimiters_path, database => $database, input => $input_name ) ) if ( $validate_delimiters_path ); |
464 |
|
} |
465 |
} |
} |
466 |
|
|
467 |
my $marc; |
my $marc; |
509 |
} |
} |
510 |
|
|
511 |
|
|
512 |
|
# setup input name for all output filters |
513 |
|
foreach my $out ( @output_modules ) { |
514 |
|
if ( $out->can('input') ) { |
515 |
|
$out->input( $input_name ); |
516 |
|
} else { |
517 |
|
$log->warn("output filter ",ref($out)," doesn't support input name"); |
518 |
|
} |
519 |
|
} |
520 |
|
|
521 |
|
|
522 |
foreach my $pos ( 0 ... $input_db->size ) { |
foreach my $pos ( 0 ... $input_db->size ) { |
523 |
|
|
524 |
my $row = $input_db->fetch || next; |
my $row = $input_db->fetch || next; |
527 |
|
|
528 |
my $mfn = $row->{'000'}->[0]; |
my $mfn = $row->{'000'}->[0]; |
529 |
|
|
530 |
if (! $mfn || $mfn !~ m#^\d+$#) { |
if (! $mfn || $mfn !~ m{^\d+$}) { |
531 |
$log->warn("record $pos doesn't have valid MFN but '$mfn', using $pos"); |
$log->warn("record $pos doesn't have valid MFN but '$mfn', using $pos"); |
532 |
$mfn = $pos; |
$mfn = $pos; |
533 |
push @{ $row->{'000'} }, $pos; |
push @{ $row->{'000'} }, $pos; |
589 |
|
|
590 |
$log->info("Created $i instances of MFN $mfn\n") if ($i > 1); |
$log->info("Created $i instances of MFN $mfn\n") if ($i > 1); |
591 |
} |
} |
592 |
|
|
593 |
|
foreach my $out ( @output_modules ) { |
594 |
|
$out->add( $mfn, $ds ) if $out->can('add'); |
595 |
|
} |
596 |
|
|
597 |
} |
} |
598 |
|
|
599 |
if ($validate) { |
if ($validate) { |
624 |
|
|
625 |
eval { $indexer->finish } if ($indexer && $indexer->can('finish')); |
eval { $indexer->finish } if ($indexer && $indexer->can('finish')); |
626 |
|
|
627 |
|
foreach my $out ( @output_modules ) { |
628 |
|
$out->finish if $out->can('finish'); |
629 |
|
} |
630 |
|
|
631 |
my $dt = time() - $start_t; |
my $dt = time() - $start_t; |
632 |
$log->info("$total_rows records ", $indexer ? "indexed " : "", |
$log->info("$total_rows records ", $indexer ? "indexed " : "", |
633 |
sprintf("in %.2f sec [%.2f rec/sec]", |
sprintf("in %.2f sec [%.2f rec/sec]", |
651 |
} |
} |
652 |
|
|
653 |
# save new delimiters if needed |
# save new delimiters if needed |
654 |
$validate->save_delimiters_templates if ( $validate ); |
$validate->save_delimiters_templates if ( $validate_delimiters_path ); |
655 |
|
|
656 |
# |
# |
657 |
# handle links or merge after indexing |
# handle links or merge after indexing |