/[webpac2]/trunk/lib/WebPAC/Output/KinoSearch.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Output/KinoSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 434 by dpavlin, Mon Apr 17 16:50:53 2006 UTC revision 887 by dpavlin, Mon Sep 3 15:26:46 2007 UTC
# Line 8  use base qw/WebPAC::Common/; Line 8  use base qw/WebPAC::Common/;
8  use KinoSearch::InvIndexer;  use KinoSearch::InvIndexer;
9  use KinoSearch::Analysis::PolyAnalyzer;  use KinoSearch::Analysis::PolyAnalyzer;
10  use Encode qw/from_to/;  use Encode qw/from_to/;
11  use Data::Dumper;  use Data::Dump qw/dump/;
12    use Storable;
13    
14  =head1 NAME  =head1 NAME
15    
# Line 16  WebPAC::Output::KinoSearch - Create Kino Line 17  WebPAC::Output::KinoSearch - Create Kino
17    
18  =head1 VERSION  =head1 VERSION
19    
20  Version 0.01  Version 0.03
21    
22  =cut  =cut
23    
24  our $VERSION = '0.01';  our $VERSION = '0.03';
25    
26  =head1 SYNOPSIS  =head1 SYNOPSIS
27    
# Line 79  sub new { Line 80  sub new {
80    
81          my $log = $self->_get_logger;          my $log = $self->_get_logger;
82    
83          #$log->debug("self: ", sub { Dumper($self) });          #$log->debug("self: ", sub { dump($self) });
84    
85          foreach my $p (qw/index_path fields database/) {          foreach my $p (qw/index_path fields database/) {
86                  $log->logdie("need $p") unless ($self->{$p});                  $log->logdie("need $p") unless ($self->{$p});
# Line 89  sub new { Line 90  sub new {
90    
91          $self->{encoding} ||= 'ISO-8859-2';          $self->{encoding} ||= 'ISO-8859-2';
92    
93          $log->info("using index $self->{index_path} with encoding $self->{encoding}");          $self->{index_path} .= '/' . $self->{database};
94    
95            $self->{clean} = 1 if (! -e $self->{index_path} . '/segments');
96    
97            $log->info("using", $self->{clean} ? ' new' : '', " index $self->{index_path} with encoding $self->{encoding}");
98    
99          my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );          my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );
100    
# Line 99  sub new { Line 104  sub new {
104                  analyzer => $analyzer,                  analyzer => $analyzer,
105          );          );
106    
107            my $fields_path = $self->{index_path} . '/fields.storable';
108            $fields_path =~ s#//#/#g;
109            if (-e $fields_path) {
110                    $self->{fields} = retrieve($fields_path) ||
111                            $log->warn("can't open $fields_path: $!");
112            } else {
113                    $log->error("This will be dummy run since no fields statistics are found!");
114                    $log->error("You will have to re-run indexing to get search results!");
115                    $self->{dummy_run} = 1;
116            }
117            $self->{fields_path} = $fields_path;
118    
119          foreach my $f (@{ $self->{fields} }) {          foreach my $f (@{ $self->{fields} }) {
120                  $self->{invindex}->spec_field(                  $self->{invindex}->spec_field(
121                          name  => $f,                          name  => $f,
# Line 155  sub add { Line 172  sub add {
172    
173          my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )");          my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )");
174    
175          sub add_value($$$) {          sub _add_value($$$$$) {
176                  my ($doc,$n,$v) = @_;                  my ($self,$log,$doc,$n,$v) = @_;
177                    return unless ($v);
178    
179                    $self->{value_usage}->{$n}++;
180                    return if ($self->{dummy_run});
181    
182                  eval { $doc->set_value($n, $self->convert($v) ) };                  eval { $doc->set_value($n, $self->convert($v) ) };
183                  $log->warn("can't insert: $n = $v") if ($@);                  $log->warn("can't insert: $n = $v") if ($@);
184          }          }
185    
186          add_value($doc, 'uri', $uri);          _add_value($self,$log,$doc, 'uri', $uri);
187    
188          $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );          $log->debug("ds = ", sub { dump($args->{'ds'}) } );
189    
190          # filter all tags which have type defined          # filter all tags which have type defined
191          my @tags = grep {          my @tags = grep {
# Line 183  sub add { Line 205  sub add {
205                  $vals = $self->convert( $vals ) or                  $vals = $self->convert( $vals ) or
206                          $log->logdie("can't convert '$vals' to UTF-8");                          $log->logdie("can't convert '$vals' to UTF-8");
207    
208                  add_value($doc, $tag, $vals );                  _add_value($self, $log, $doc, $tag, $vals );
209          }          }
210    
211          if (my $text = $args->{'text'}) {          if (my $text = $args->{'text'}) {
212                  add_value($doc, 'bodytext', $text );                  _add_value($self, $log, $doc, 'bodytext', $text );
213          }          }
214    
215          #$log->debug("adding ", sub { $doc->dump_draft } );          #$log->debug("adding ", sub { $doc->dump_draft } );
# Line 207  Close index Line 229  Close index
229  sub finish {  sub finish {
230          my $self = shift;          my $self = shift;
231    
232          $self->_get_logger()->info("finish index writing to disk");          my $log = $self->_get_logger();
233    
234            $log->info("finish index writing to disk");
235          $self->{invindex}->finish;          $self->{invindex}->finish;
236    
237            $log->info("writing value usage file");
238    
239            # add fields from last run
240            map { $self->{value_usage}->{$_}++ } @{ $self->{fields} };
241    
242            my @fields = keys %{ $self->{value_usage} };
243            store \@fields, $self->{fields_path} ||
244                    $log->warn("can't write $self->{fields_path}: $!");
245    
246  }  }
247    
248  =head2 convert  =head2 convert

Legend:
Removed from v.434  
changed lines
  Added in v.887

  ViewVC Help
Powered by ViewVC 1.1.26