--- trunk/run.pl 2006/09/05 15:14:14 627 +++ trunk/run.pl 2006/09/11 15:59:35 671 @@ -12,7 +12,7 @@ use WebPAC::Store 0.03; use WebPAC::Normalize 0.11; use WebPAC::Output::TT; -use WebPAC::Validate; +use WebPAC::Validate 0.06; use WebPAC::Output::MARC; use YAML qw/LoadFile/; use Getopt::Long; @@ -60,8 +60,8 @@ =item --stats -disable indexing and dump statistics about field and subfield -usage for each input +disable indexing, modify_* in configuration and dump statistics about field +and subfield usage for each input =item --validate path/to/validation_file @@ -335,6 +335,12 @@ $input->{lookup} ? "lookup '$input->{lookup}'" : "" ); + if ($stats) { + # disable modification of records if --stats is in use + delete($input->{modify_records}); + delete($input->{modify_file}); + } + my $input_db = new WebPAC::Input( module => $input_module, encoding => $config->{webpac}->{webpac_encoding}, @@ -347,6 +353,7 @@ recode => $input->{recode}, stats => $stats, modify_records => $input->{modify_records}, + modify_file => $input->{modify_file}, ); $log->logdie("can't create input using $input_module") unless ($input); @@ -356,6 +363,16 @@ %{ $input }, ); + my $report_fh; + if ($stats || $validate) { + my $path = "out/report/" . $database . '-' . $input->{name} . '.txt'; + open($report_fh, '>', $path) || $log->logdie("can't open $path: $!"); + + print $report_fh "Report for database '$database' input '$input->{name}' records ", + $offset || 1, "-", $limit || $input->{limit} || $maxmfn, "\n\n"; + $log->info("Generating report file $path"); + } + my @norm_array = ref($input->{normalize}) eq 'ARRAY' ? @{ $input->{normalize} } : ( $input->{normalize} ); @@ -399,8 +416,11 @@ if ($validate) { - my @errors = $validate->validate_errors( $row ); - $log->error( "MFN $mfn validation errors:\n", join("\n", @errors) ) if (@errors); + if ( my $errors = $validate->validate_errors( $row, $input_db->dump ) ) { + $log->error( "MFN $mfn validation error:\n", + $validate->report_error( $errors ) + ); + } } my $ds_config = dclone($db_config); @@ -453,11 +473,25 @@ $total_rows++; } - $log->info("statistics of fields usage:\n", $input_db->stats) if ($stats); + if ($validate) { + my $errors = $validate->report; + if ($errors) { + $log->info("validation errors:\n$errors\n" ); + print $report_fh "$errors\n" if ($report_fh); + } + } + + if ($stats) { + my $s = $input_db->stats; + $log->info("statistics of fields usage:\n$s"); + print $report_fh "Statistics of fields usage:\n$s" if ($report_fh); + } # close MARC file $marc->finish if ($marc); + # close report + close($report_fh) || $log->logdie("can't close report: $!"); } }