--- trunk/run.pl 2006/05/15 13:15:01 507 +++ trunk/run.pl 2006/05/15 17:23:38 509 @@ -52,12 +52,13 @@ =item --force-set -force conversion Cpath> in C from +force conversion C<< normalize->path >> in C from C<.xml> to C<.pl> =item --stats -dump statistics about used fields and subfields in each input +disable indexing and dump statistics about field and subfield +usage for each input =back @@ -92,9 +93,15 @@ die "no databases in config file!\n" unless ($config->{databases}); my $log = _new WebPAC::Common()->_get_logger(); +$log->info( "-" x 79 ); my $use_indexer = $config->{use_indexer} || 'hyperestraier'; -$log->info("using $use_indexer indexing engine..."); +if ($stats) { + $log->debug("option --stats disables update of indexing engine..."); + $use_indexer = undef; +} else { + $log->info("using $use_indexer indexing engine..."); +} my $total_rows = 0; my $start_t = time(); @@ -105,29 +112,32 @@ my $indexer; - my $indexer_config = $config->{$use_indexer} || $log->logdie("can't find '$use_indexer' part in confguration"); - $indexer_config->{database} = $database; - $indexer_config->{clean} = $clean; - $indexer_config->{label} = $db_config->{name}; - - if ($use_indexer eq 'hyperestraier') { - - # open Hyper Estraier database - use WebPAC::Output::Estraier '0.10'; - $indexer = new WebPAC::Output::Estraier( %{ $indexer_config } ); - - } elsif ($use_indexer eq 'kinosearch') { - - # open KinoSearch - use WebPAC::Output::KinoSearch; - $indexer_config->{clean} = 1 unless (-e $indexer_config->{index_path}); - $indexer = new WebPAC::Output::KinoSearch( %{ $indexer_config } ); + if ($use_indexer) { + my $indexer_config = $config->{$use_indexer} || $log->logdie("can't find '$use_indexer' part in confguration"); + $indexer_config->{database} = $database; + $indexer_config->{clean} = $clean; + $indexer_config->{label} = $db_config->{name}; + + if ($use_indexer eq 'hyperestraier') { + + # open Hyper Estraier database + use WebPAC::Output::Estraier '0.10'; + $indexer = new WebPAC::Output::Estraier( %{ $indexer_config } ); + + } elsif ($use_indexer eq 'kinosearch') { + + # open KinoSearch + use WebPAC::Output::KinoSearch; + $indexer_config->{clean} = 1 unless (-e $indexer_config->{index_path}); + $indexer = new WebPAC::Output::KinoSearch( %{ $indexer_config } ); - } else { - $log->logdie("unknown use_indexer: $use_indexer"); + } else { + $log->logdie("unknown use_indexer: $use_indexer"); + } + + $log->logide("can't continue without valid indexer") unless ($indexer); } - $log->logide("can't continue without valid indexer") unless ($indexer); # # now WebPAC::Store @@ -246,18 +256,29 @@ push @{ $row->{'000'} }, $pos; } - my $ds = $n ? $n->data_structure($row) : - WebPAC::Normalize::Set::data_structure( + + my $ds; + if ($n) { + $ds = $n->data_structure($row); + } else { + $ds = WebPAC::Normalize::Set::data_structure( row => $row, rules => $rules, lookup => $lookup->lookup_hash, ); + $db->save_ds( + id => $mfn, + ds => $ds, + prefix => $input->{name}, + ) if ($ds && !$stats); + } + $indexer->add( id => $input->{name} . "/" . $mfn, ds => $ds, type => $config->{$use_indexer}->{type}, - ); + ) if ($indexer); $total_rows++; } @@ -266,7 +287,7 @@ }; - eval { $indexer->finish } if ($indexer->can('finish')); + eval { $indexer->finish } if ($indexer && $indexer->can('finish')); my $dt = time() - $start_t; $log->info("$total_rows records indexed in " .