--- trunk/run.pl 2006/08/25 12:31:06 619 +++ trunk/run.pl 2006/09/07 15:57:48 653 @@ -12,7 +12,7 @@ use WebPAC::Store 0.03; use WebPAC::Normalize 0.11; use WebPAC::Output::TT; -use WebPAC::Validate; +use WebPAC::Validate 0.06; use WebPAC::Output::MARC; use YAML qw/LoadFile/; use Getopt::Long; @@ -60,8 +60,8 @@ =item --stats -disable indexing and dump statistics about field and subfield -usage for each input +disable indexing, modify_* in configuration and dump statistics about field +and subfield usage for each input =item --validate path/to/validation_file @@ -208,7 +208,11 @@ my $indexer; if ($use_indexer) { - my $indexer_config = $config->{$use_indexer} || $log->logdie("can't find '$use_indexer' part in confguration"); + + my $cfg_name = $use_indexer; + $cfg_name =~ s/\-.*$//; + + my $indexer_config = $config->{$cfg_name} || $log->logdie("can't find '$cfg_name' part in confguration"); $indexer_config->{database} = $database; $indexer_config->{clean} = $clean; $indexer_config->{label} = $db_config->{name}; @@ -222,6 +226,14 @@ use WebPAC::Output::Estraier '0.10'; $indexer = new WebPAC::Output::Estraier( %{ $indexer_config } ); + } elsif ($use_indexer eq 'hyperestraier-native') { + + # open Hyper Estraier database + use WebPAC::Output::EstraierNative; + $indexer = new WebPAC::Output::EstraierNative( %{ $indexer_config } ); + + $use_indexer = 'hyperestraier'; + } elsif ($use_indexer eq 'kinosearch') { # open KinoSearch @@ -323,6 +335,12 @@ $input->{lookup} ? "lookup '$input->{lookup}'" : "" ); + if ($stats) { + # disable modification of records if --stats is in use + delete($input->{modify_records}); + delete($input->{modify_file}); + } + my $input_db = new WebPAC::Input( module => $input_module, encoding => $config->{webpac}->{webpac_encoding}, @@ -335,6 +353,7 @@ recode => $input->{recode}, stats => $stats, modify_records => $input->{modify_records}, + modify_file => $input->{modify_file}, ); $log->logdie("can't create input using $input_module") unless ($input); @@ -387,8 +406,11 @@ if ($validate) { - my @errors = $validate->validate_errors( $row ); - $log->error( "MFN $mfn validation errors:\n", join("\n", @errors) ) if (@errors); + if ( my $errors = $validate->validate_errors( $row, $input_db->dump ) ) { + $log->error( "MFN $mfn validation error:\n", + dump( $errors ) + ); + } } my $ds_config = dclone($db_config); @@ -441,6 +463,8 @@ $total_rows++; } + $log->info("validation errors:\n", dump( $validate->all_errors ) ) if ($validate && defined($validate->all_errors)); + $log->info("statistics of fields usage:\n", $input_db->stats) if ($stats); # close MARC file