--- trunk/run.pl 2006/05/15 17:49:01 511 +++ trunk/run.pl 2006/05/18 13:48:58 520 @@ -14,6 +14,7 @@ use WebPAC::Normalize::XML; use WebPAC::Normalize::Set; use WebPAC::Output::TT; +use WebPAC::Validate; use YAML qw/LoadFile/; use Getopt::Long; use File::Path; @@ -63,6 +64,10 @@ disable indexing and dump statistics about field and subfield usage for each input +=item --validate path/to/validation_file + +turn on extra validation of imput records, see L + =back =cut @@ -76,6 +81,7 @@ my $only_filter; my $force_set = 0; my $stats = 0; +my $validate_path; GetOptions( "limit=i" => \$limit, @@ -87,6 +93,7 @@ "debug" => \$debug, "force-set" => \$force_set, "stats" => \$stats, + "validate=s" => \$validate_path, ); $config = LoadFile($config); @@ -98,6 +105,11 @@ my $log = _new WebPAC::Common()->_get_logger(); $log->info( "-" x 79 ); +my $validate; +$validate = new WebPAC::Validate( + path => $validate_path, +) if ($validate_path); + my $use_indexer = $config->{use_indexer} || 'hyperestraier'; if ($stats) { $log->debug("option --stats disables update of indexing engine..."); @@ -111,7 +123,7 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) { - my ($only_database,$only_input) = split(m#/#, $only_filter); + my ($only_database,$only_input) = split(m#/#, $only_filter) if ($only_filter); next if ($only_database && $database !~ m/$only_database/i); my $indexer; @@ -182,7 +194,7 @@ foreach my $input (@inputs) { - next if ($only_input && $input->{name} =~ m#$only_input#i || $input->{type} =~ m#$only_input#i); + next if ($only_input && ($input->{name} !~ m#$only_input#i && $input->{type} !~ m#$only_input#i)); my $type = lc($input->{type}); @@ -190,7 +202,7 @@ my $lookup = new WebPAC::Lookup( lookup_file => $input->{lookup}, - ); + ) if ($input->{lookup}); my $input_module = $config->{webpac}->{inputs}->{$type}; @@ -215,7 +227,7 @@ my $n = new WebPAC::Normalize::XML( # filter => { 'foo' => sub { shift } }, db => $db, - lookup_regex => $lookup->regex, + lookup_regex => $lookup ? $lookup->regex : undef, lookup => $lookup, prefix => $input->{name}, ); @@ -262,6 +274,12 @@ push @{ $row->{'000'} }, $pos; } + + if ($validate) { + my @errors = $validate->validate_errors( $row ); + $log->error( "MFN $mfn validation errors:\n", join("\n", @errors) ) if (@errors); + } + my $ds; if ($n) { @@ -270,7 +288,7 @@ $ds = WebPAC::Normalize::Set::data_structure( row => $row, rules => $rules, - lookup => $lookup->lookup_hash, + lookup => $lookup ? $lookup->lookup_hash : undef, ); $db->save_ds(