--- trunk/run.pl 2006/05/15 17:49:01 511 +++ trunk/run.pl 2006/05/21 19:29:26 523 @@ -14,6 +14,7 @@ use WebPAC::Normalize::XML; use WebPAC::Normalize::Set; use WebPAC::Output::TT; +use WebPAC::Validate; use YAML qw/LoadFile/; use Getopt::Long; use File::Path; @@ -63,6 +64,10 @@ disable indexing and dump statistics about field and subfield usage for each input +=item --validate path/to/validation_file + +turn on extra validation of imput records, see L + =back =cut @@ -76,6 +81,7 @@ my $only_filter; my $force_set = 0; my $stats = 0; +my $validate_path; GetOptions( "limit=i" => \$limit, @@ -87,6 +93,7 @@ "debug" => \$debug, "force-set" => \$force_set, "stats" => \$stats, + "validate=s" => \$validate_path, ); $config = LoadFile($config); @@ -98,6 +105,11 @@ my $log = _new WebPAC::Common()->_get_logger(); $log->info( "-" x 79 ); +my $validate; +$validate = new WebPAC::Validate( + path => $validate_path, +) if ($validate_path); + my $use_indexer = $config->{use_indexer} || 'hyperestraier'; if ($stats) { $log->debug("option --stats disables update of indexing engine..."); @@ -111,7 +123,7 @@ while (my ($database, $db_config) = each %{ $config->{databases} }) { - my ($only_database,$only_input) = split(m#/#, $only_filter); + my ($only_database,$only_input) = split(m#/#, $only_filter) if ($only_filter); next if ($only_database && $database !~ m/$only_database/i); my $indexer; @@ -182,7 +194,7 @@ foreach my $input (@inputs) { - next if ($only_input && $input->{name} =~ m#$only_input#i || $input->{type} =~ m#$only_input#i); + next if ($only_input && ($input->{name} !~ m#$only_input#i && $input->{type} !~ m#$only_input#i)); my $type = lc($input->{type}); @@ -190,11 +202,13 @@ my $lookup = new WebPAC::Lookup( lookup_file => $input->{lookup}, - ); + ) if ($input->{lookup}); my $input_module = $config->{webpac}->{inputs}->{$type}; - $log->info("working on input '$input->{name}' in $input->{path} [type: $input->{type}] using $input_module lookup '$input->{lookup}'"); + $log->info("working on input '$input->{name}' in $input->{path} [type: $input->{type}] using $input_module", + $input->{lookup} ? "lookup '$input->{lookup}'" : "" + ); my $input_db = new WebPAC::Input( module => $input_module, @@ -210,12 +224,13 @@ my $maxmfn = $input_db->open( path => $input->{path}, code_page => $input->{encoding}, # database encoding + %{ $input }, ); my $n = new WebPAC::Normalize::XML( # filter => { 'foo' => sub { shift } }, db => $db, - lookup_regex => $lookup->regex, + lookup_regex => $lookup ? $lookup->regex : undef, lookup => $lookup, prefix => $input->{name}, ); @@ -262,6 +277,12 @@ push @{ $row->{'000'} }, $pos; } + + if ($validate) { + my @errors = $validate->validate_errors( $row ); + $log->error( "MFN $mfn validation errors:\n", join("\n", @errors) ) if (@errors); + } + my $ds; if ($n) { @@ -270,7 +291,7 @@ $ds = WebPAC::Normalize::Set::data_structure( row => $row, rules => $rules, - lookup => $lookup->lookup_hash, + lookup => $lookup ? $lookup->lookup_hash : undef, ); $db->save_ds(