--- trunk/lib/WebPAC/Output/KinoSearch.pm 2006/04/17 16:50:53 434 +++ trunk/lib/WebPAC/Output/KinoSearch.pm 2006/08/25 16:20:21 620 @@ -9,6 +9,7 @@ use KinoSearch::Analysis::PolyAnalyzer; use Encode qw/from_to/; use Data::Dumper; +use Storable; =head1 NAME @@ -16,11 +17,11 @@ =head1 VERSION -Version 0.01 +Version 0.03 =cut -our $VERSION = '0.01'; +our $VERSION = '0.03'; =head1 SYNOPSIS @@ -89,7 +90,11 @@ $self->{encoding} ||= 'ISO-8859-2'; - $log->info("using index $self->{index_path} with encoding $self->{encoding}"); + $self->{index_path} .= '/' . $self->{database}; + + $self->{clean} = 1 if (! -e $self->{index_path} . '/segments'); + + $log->info("using", $self->{clean} ? ' new' : '', " index $self->{index_path} with encoding $self->{encoding}"); my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' ); @@ -99,6 +104,18 @@ analyzer => $analyzer, ); + my $fields_path = $self->{index_path} . '/fields.storable'; + $fields_path =~ s#//#/#g; + if (-e $fields_path) { + $self->{fields} = retrieve($fields_path) || + $log->warn("can't open $fields_path: $!"); + } else { + $log->error("This will be dummy run since no fields statistics are found!"); + $log->error("You will have to re-run indexing to get search results!"); + $self->{dummy_run} = 1; + } + $self->{fields_path} = $fields_path; + foreach my $f (@{ $self->{fields} }) { $self->{invindex}->spec_field( name => $f, @@ -155,13 +172,18 @@ my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )"); - sub add_value($$$) { - my ($doc,$n,$v) = @_; + sub _add_value($$$$$) { + my ($self,$log,$doc,$n,$v) = @_; + return unless ($v); + + $self->{value_usage}->{$n}++; + return if ($self->{dummy_run}); + eval { $doc->set_value($n, $self->convert($v) ) }; $log->warn("can't insert: $n = $v") if ($@); } - add_value($doc, 'uri', $uri); + _add_value($self,$log,$doc, 'uri', $uri); $log->debug("ds = ", sub { Dumper($args->{'ds'}) } ); @@ -183,11 +205,11 @@ $vals = $self->convert( $vals ) or $log->logdie("can't convert '$vals' to UTF-8"); - add_value($doc, $tag, $vals ); + _add_value($self, $log, $doc, $tag, $vals ); } if (my $text = $args->{'text'}) { - add_value($doc, 'bodytext', $text ); + _add_value($self, $log, $doc, 'bodytext', $text ); } #$log->debug("adding ", sub { $doc->dump_draft } ); @@ -207,8 +229,20 @@ sub finish { my $self = shift; - $self->_get_logger()->info("finish index writing to disk"); + my $log = $self->_get_logger(); + + $log->info("finish index writing to disk"); $self->{invindex}->finish; + + $log->info("writing value usage file"); + + # add fields from last run + map { $self->{value_usage}->{$_}++ } @{ $self->{fields} }; + + my @fields = keys %{ $self->{value_usage} }; + store \@fields, $self->{fields_path} || + $log->warn("can't write $self->{fields_path}: $!"); + } =head2 convert