--- trunk/run.pl 2006/09/06 19:25:22 636 +++ trunk/run.pl 2006/09/13 17:32:49 675 @@ -12,7 +12,7 @@ use WebPAC::Store 0.03; use WebPAC::Normalize 0.11; use WebPAC::Output::TT; -use WebPAC::Validate; +use WebPAC::Validate 0.06; use WebPAC::Output::MARC; use YAML qw/LoadFile/; use Getopt::Long; @@ -60,8 +60,8 @@ =item --stats -disable indexing and dump statistics about field and subfield -usage for each input +disable indexing, modify_* in configuration and dump statistics about field +and subfield usage for each input =item --validate path/to/validation_file @@ -232,8 +232,6 @@ use WebPAC::Output::EstraierNative; $indexer = new WebPAC::Output::EstraierNative( %{ $indexer_config } ); - $use_indexer = 'hyperestraier'; - } elsif ($use_indexer eq 'kinosearch') { # open KinoSearch @@ -335,6 +333,12 @@ $input->{lookup} ? "lookup '$input->{lookup}'" : "" ); + if ($stats) { + # disable modification of records if --stats is in use + delete($input->{modify_records}); + delete($input->{modify_file}); + } + my $input_db = new WebPAC::Input( module => $input_module, encoding => $config->{webpac}->{webpac_encoding}, @@ -357,6 +361,16 @@ %{ $input }, ); + my $report_fh; + if ($stats || $validate) { + my $path = "out/report/" . $database . '-' . $input->{name} . '.txt'; + open($report_fh, '>', $path) || $log->logdie("can't open $path: $!"); + + print $report_fh "Report for database '$database' input '$input->{name}' records ", + $offset || 1, "-", $limit || $input->{limit} || $maxmfn, "\n\n"; + $log->info("Generating report file $path"); + } + my @norm_array = ref($input->{normalize}) eq 'ARRAY' ? @{ $input->{normalize} } : ( $input->{normalize} ); @@ -386,6 +400,10 @@ # reset position in database $input_db->seek(1); + # generate name of config key for indexer (strip everything after -) + my $indexer_config = $use_indexer; + $indexer_config =~ s/^(\w+)-?.*$/$1/g if ($indexer_config); + foreach my $pos ( 0 ... $input_db->size ) { my $row = $input_db->fetch || next; @@ -400,8 +418,11 @@ if ($validate) { - my @errors = $validate->validate_errors( $row ); - $log->error( "MFN $mfn validation errors:\n", join("\n", @errors) ) if (@errors); + if ( my $errors = $validate->validate_errors( $row, $input_db->dump ) ) { + $log->error( "MFN $mfn validation error:\n", + $validate->report_error( $errors ) + ); + } } my $ds_config = dclone($db_config); @@ -432,7 +453,7 @@ $indexer->add( id => $input->{name} . "/" . $mfn, ds => $ds, - type => $config->{$use_indexer}->{type}, + type => $config->{$indexer_config}->{type}, ) if ($indexer && $ds); if ($marc) { @@ -454,11 +475,25 @@ $total_rows++; } - $log->info("statistics of fields usage:\n", $input_db->stats) if ($stats); + if ($validate) { + my $errors = $validate->report; + if ($errors) { + $log->info("validation errors:\n$errors\n" ); + print $report_fh "$errors\n" if ($report_fh); + } + } + + if ($stats) { + my $s = $input_db->stats; + $log->info("statistics of fields usage:\n$s"); + print $report_fh "Statistics of fields usage:\n$s" if ($report_fh); + } # close MARC file $marc->finish if ($marc); + # close report + close($report_fh) if ($report_fh) } }