10 |
use WebPAC::Parser 0.04; |
use WebPAC::Parser 0.04; |
11 |
use WebPAC::Input 0.13; |
use WebPAC::Input 0.13; |
12 |
use WebPAC::Store 0.11; |
use WebPAC::Store 0.11; |
13 |
use WebPAC::Normalize 0.11; |
use WebPAC::Normalize 0.22; |
14 |
use WebPAC::Output::TT; |
use WebPAC::Output::TT; |
15 |
use WebPAC::Validate 0.06; |
use WebPAC::Validate 0.06; |
16 |
use WebPAC::Output::MARC; |
use WebPAC::Output::MARC; |
354 |
delete( $input->{lookup} ); |
delete( $input->{lookup} ); |
355 |
} |
} |
356 |
|
|
|
my $lookup; |
|
357 |
my $lookup_coderef; |
my $lookup_coderef; |
358 |
|
|
359 |
if (@lookups) { |
if (@lookups) { |
364 |
my $rec = shift || die "need rec!"; |
my $rec = shift || die "need rec!"; |
365 |
my $mfn = $rec->{'000'}->[0] || die "need mfn in 000"; |
my $mfn = $rec->{'000'}->[0] || die "need mfn in 000"; |
366 |
|
|
367 |
|
$store->save_row( |
368 |
|
database => $database, |
369 |
|
input => $input_name, |
370 |
|
id => $mfn, |
371 |
|
row => $rec, |
372 |
|
); |
373 |
|
|
374 |
WebPAC::Normalize::data_structure( |
WebPAC::Normalize::data_structure( |
375 |
row => $rec, |
row => $rec, |
376 |
rules => $rules, |
rules => $rules, |
|
lookup => $lookup, |
|
377 |
config => create_ds_config( $db_config, $database, $input, $mfn ), |
config => create_ds_config( $db_config, $database, $input, $mfn ), |
378 |
); |
); |
379 |
|
|
380 |
warn "current lookup = ", dump($lookup) if ($lookup); |
#warn "current lookup: ", dump(WebPAC::Normalize::_get_lookup()); |
381 |
}; |
}; |
382 |
|
|
383 |
WebPAC::Normalize::_set_lookup( undef ); |
WebPAC::Normalize::_set_lookup( undef ); |
386 |
|
|
387 |
}; |
}; |
388 |
|
|
389 |
|
my $lookup_jar; |
390 |
|
|
391 |
my $maxmfn = $input_db->open( |
my $maxmfn = $input_db->open( |
392 |
path => $input->{path}, |
path => $input->{path}, |
393 |
code_page => $input->{encoding}, # database encoding |
code_page => $input->{encoding}, # database encoding |
394 |
lookup_coderef => $lookup_coderef, |
lookup_coderef => $lookup_coderef, |
395 |
|
lookup => $lookup_jar, |
396 |
%{ $input }, |
%{ $input }, |
397 |
); |
); |
398 |
|
|
421 |
$log->info("Generating report file $path"); |
$log->info("Generating report file $path"); |
422 |
} |
} |
423 |
|
|
424 |
my @norm_array = ref($input->{normalize}) eq 'ARRAY' ? |
my $marc; |
|
@{ $input->{normalize} } : ( $input->{normalize} ); |
|
|
|
|
425 |
if ($marc_normalize) { |
if ($marc_normalize) { |
426 |
@norm_array = ( { |
$marc = new WebPAC::Output::MARC( |
427 |
path => $marc_normalize, |
path => $marc_output || "out/marc/${database}-${input_name}.marc", |
|
output => $marc_output || "out/marc/${database}-${input_name}.marc", |
|
|
} ); |
|
|
} |
|
|
|
|
|
foreach my $normalize (@norm_array) { |
|
|
|
|
|
my $normalize_path = $normalize->{path} || $log->logdie("can't find normalize path in config"); |
|
|
|
|
|
$log->logdie("Found '$normalize_path' as normalization file which isn't supported any more!") unless ( $normalize_path =~ m!\.pl$!i ); |
|
|
|
|
|
my $rules = read_file( $normalize_path ) or die "can't open $normalize_path: $!"; |
|
|
|
|
|
$log->info("Using $normalize_path for normalization..."); |
|
|
|
|
|
my $marc = new WebPAC::Output::MARC( |
|
|
path => $normalize->{output}, |
|
428 |
lint => $marc_lint, |
lint => $marc_lint, |
429 |
dump => $marc_dump, |
dump => $marc_dump, |
430 |
) if ($normalize->{output}); |
); |
431 |
|
} |
|
# reset position in database |
|
|
$input_db->seek(1); |
|
|
|
|
|
# generate name of config key for indexer (strip everything after -) |
|
|
my $indexer_config = $use_indexer; |
|
|
$indexer_config =~ s/^(\w+)-?.*$/$1/g if ($indexer_config); |
|
|
|
|
|
foreach my $pos ( 0 ... $input_db->size ) { |
|
|
|
|
|
my $row = $input_db->fetch || next; |
|
|
|
|
|
my $mfn = $row->{'000'}->[0]; |
|
432 |
|
|
433 |
if (! $mfn || $mfn !~ m#^\d+$#) { |
my $rules = $parser->normalize_rules($database,$input_name) || $log->logdie("no normalize rules found for $database/$input_name"); |
434 |
$log->warn("record $pos doesn't have valid MFN but '$mfn', using $pos"); |
$log->debug("parsed normalize rules:\n$rules"); |
|
$mfn = $pos; |
|
|
push @{ $row->{'000'} }, $pos; |
|
|
} |
|
435 |
|
|
436 |
|
# reset position in database |
437 |
|
$input_db->seek(1); |
438 |
|
|
439 |
if ($validate) { |
# generate name of config key for indexer (strip everything after -) |
440 |
if ( my $errors = $validate->validate_errors( $row, $input_db->dump ) ) { |
my $indexer_config = $use_indexer; |
441 |
$log->error( "MFN $mfn validation error:\n", |
$indexer_config =~ s/^(\w+)-?.*$/$1/g if ($indexer_config); |
442 |
$validate->report_error( $errors ) |
|
443 |
|
my $lookup_hash; |
444 |
|
my $depends = $parser->depends($database,$input_name); |
445 |
|
|
446 |
|
if ($depends) { |
447 |
|
$log->debug("$database/$input_name depends on: ", dump($depends)) if ($depends); |
448 |
|
$log->logdie("parser->depends didn't return HASH") unless (ref($depends) eq 'HASH'); |
449 |
|
|
450 |
|
foreach my $db (keys %$depends) { |
451 |
|
foreach my $i (keys %{$depends->{$db}}) { |
452 |
|
foreach my $k (keys %{$depends->{$db}->{$i}}) { |
453 |
|
$log->debug("loading lookup $db/$i"); |
454 |
|
$lookup_hash->{$db}->{$i}->{$k} = $store->load_lookup( |
455 |
|
database => $db, |
456 |
|
input => $i, |
457 |
|
key => $k, |
458 |
); |
); |
459 |
} |
} |
460 |
} |
} |
461 |
|
} |
462 |
|
|
463 |
my $ds = WebPAC::Normalize::data_structure( |
$log->debug("lookup_hash = ", dump( $lookup_hash )); |
464 |
row => $row, |
} |
|
rules => $rules, |
|
|
lookup => $lookup ? $lookup->lookup_hash : undef, |
|
|
config => create_ds_config( $db_config, $database, $input, $mfn ), |
|
|
marc_encoding => 'utf-8', |
|
|
); |
|
465 |
|
|
|
$store->save_ds( |
|
|
database => $database, |
|
|
input => $input_name, |
|
|
id => $mfn, |
|
|
ds => $ds, |
|
|
) if ($ds && !$stats); |
|
466 |
|
|
467 |
$indexer->add( |
foreach my $pos ( 0 ... $input_db->size ) { |
|
id => "${input_name}/${mfn}", |
|
|
ds => $ds, |
|
|
type => $config->get($indexer_config)->{type}, |
|
|
) if ($indexer && $ds); |
|
|
|
|
|
if ($marc) { |
|
|
my $i = 0; |
|
|
|
|
|
while (my $fields = WebPAC::Normalize::_get_marc_fields( fetch_next => 1 ) ) { |
|
|
$marc->add( |
|
|
id => $mfn . ( $i ? "/$i" : '' ), |
|
|
fields => $fields, |
|
|
leader => WebPAC::Normalize::marc_leader(), |
|
|
row => $row, |
|
|
); |
|
|
$i++; |
|
|
} |
|
468 |
|
|
469 |
$log->info("Created $i instances of MFN $mfn\n") if ($i > 1); |
my $row = $input_db->fetch || next; |
|
} |
|
470 |
|
|
471 |
$total_rows++; |
my $mfn = $row->{'000'}->[0]; |
472 |
|
|
473 |
|
if (! $mfn || $mfn !~ m#^\d+$#) { |
474 |
|
$log->warn("record $pos doesn't have valid MFN but '$mfn', using $pos"); |
475 |
|
$mfn = $pos; |
476 |
|
push @{ $row->{'000'} }, $pos; |
477 |
} |
} |
478 |
|
|
479 |
|
|
480 |
if ($validate) { |
if ($validate) { |
481 |
my $errors = $validate->report; |
if ( my $errors = $validate->validate_errors( $row, $input_db->dump ) ) { |
482 |
if ($errors) { |
$log->error( "MFN $mfn validation error:\n", |
483 |
$log->info("validation errors:\n$errors\n" ); |
$validate->report_error( $errors ) |
484 |
print $report_fh "$errors\n" if ($report_fh); |
); |
485 |
} |
} |
486 |
} |
} |
487 |
|
|
488 |
if ($stats) { |
my $ds = WebPAC::Normalize::data_structure( |
489 |
my $s = $input_db->stats; |
row => $row, |
490 |
$log->info("statistics of fields usage:\n$s"); |
rules => $rules, |
491 |
print $report_fh "Statistics of fields usage:\n$s" if ($report_fh); |
lookup => $lookup_hash, |
492 |
|
config => create_ds_config( $db_config, $database, $input, $mfn ), |
493 |
|
marc_encoding => 'utf-8', |
494 |
|
load_row_coderef => sub { |
495 |
|
my ($database,$input,$mfn) = @_; |
496 |
|
return $store->load_row( |
497 |
|
database => $database, |
498 |
|
input => $input, |
499 |
|
id => $mfn, |
500 |
|
); |
501 |
|
}, |
502 |
|
); |
503 |
|
|
504 |
|
$log->debug("ds = ",dump($ds)); |
505 |
|
|
506 |
|
$store->save_ds( |
507 |
|
database => $database, |
508 |
|
input => $input_name, |
509 |
|
id => $mfn, |
510 |
|
ds => $ds, |
511 |
|
) if ($ds && !$stats); |
512 |
|
|
513 |
|
$indexer->add( |
514 |
|
id => "${input_name}/${mfn}", |
515 |
|
ds => $ds, |
516 |
|
type => $config->get($indexer_config)->{type}, |
517 |
|
) if ($indexer && $ds); |
518 |
|
|
519 |
|
if ($marc) { |
520 |
|
my $i = 0; |
521 |
|
|
522 |
|
while (my $fields = WebPAC::Normalize::_get_marc_fields( fetch_next => 1 ) ) { |
523 |
|
$marc->add( |
524 |
|
id => $mfn . ( $i ? "/$i" : '' ), |
525 |
|
fields => $fields, |
526 |
|
leader => WebPAC::Normalize::marc_leader(), |
527 |
|
row => $row, |
528 |
|
); |
529 |
|
$i++; |
530 |
|
} |
531 |
|
|
532 |
|
$log->info("Created $i instances of MFN $mfn\n") if ($i > 1); |
533 |
} |
} |
534 |
|
|
535 |
# close MARC file |
$total_rows++; |
536 |
$marc->finish if ($marc); |
} |
537 |
|
|
538 |
# close report |
if ($validate) { |
539 |
close($report_fh) if ($report_fh) |
my $errors = $validate->report; |
540 |
|
if ($errors) { |
541 |
|
$log->info("validation errors:\n$errors\n" ); |
542 |
|
print $report_fh "$errors\n" if ($report_fh); |
543 |
|
} |
544 |
} |
} |
545 |
|
|
546 |
|
if ($stats) { |
547 |
|
my $s = $input_db->stats; |
548 |
|
$log->info("statistics of fields usage:\n$s"); |
549 |
|
print $report_fh "Statistics of fields usage:\n$s" if ($report_fh); |
550 |
|
} |
551 |
|
|
552 |
|
# close MARC file |
553 |
|
$marc->finish if ($marc); |
554 |
|
|
555 |
|
# close report |
556 |
|
close($report_fh) if ($report_fh) |
557 |
|
|
558 |
} |
} |
559 |
|
|
560 |
eval { $indexer->finish } if ($indexer && $indexer->can('finish')); |
eval { $indexer->finish } if ($indexer && $indexer->can('finish')); |