--- trunk/run.pl 2006/05/14 22:24:18 504 +++ trunk/run.pl 2006/05/15 13:32:18 508 @@ -52,9 +52,13 @@ =item --force-set -force conversion Cpath> in C from +force conversion C<< normalize->path >> in C from C<.xml> to C<.pl> +=item --stats + +dump statistics about used fields and subfields in each input + =back =cut @@ -67,6 +71,7 @@ my $debug = 0; my $only_db_name; my $force_set = 0; +my $stats = 0; GetOptions( "limit=i" => \$limit, @@ -77,6 +82,7 @@ "config" => \$config, "debug" => \$debug, "force-set" => \$force_set, + "stats" => \$stats, ); $config = LoadFile($config); @@ -181,6 +187,7 @@ offset => $offset, lookup => $lookup, recode => $input->{recode}, + stats => $stats, ); $log->logdie("can't create input using $input_module") unless ($input); @@ -239,13 +246,24 @@ push @{ $row->{'000'} }, $pos; } - my $ds = $n ? $n->data_structure($row) : - WebPAC::Normalize::Set::data_structure( + + my $ds; + if ($n) { + $ds = $n->data_structure($row); + } else { + $ds = WebPAC::Normalize::Set::data_structure( row => $row, rules => $rules, lookup => $lookup->lookup_hash, ); + $db->save_ds( + id => $mfn, + ds => $ds, + prefix => $input->{name}, + ) if ($ds); + } + $indexer->add( id => $input->{name} . "/" . $mfn, ds => $ds, @@ -255,6 +273,8 @@ $total_rows++; } + $log->info("statistics of fields usage:\n", $input_db->stats) if ($stats); + }; eval { $indexer->finish } if ($indexer->can('finish'));