/[webpac2]/trunk/lib/WebPAC/Output/KinoSearch.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Output/KinoSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 437 by dpavlin, Sun Apr 30 19:06:09 2006 UTC revision 919 by dpavlin, Tue Oct 30 22:07:11 2007 UTC
# Line 3  package WebPAC::Output::KinoSearch; Line 3  package WebPAC::Output::KinoSearch;
3  use warnings;  use warnings;
4  use strict;  use strict;
5    
6  use base qw/WebPAC::Common/;  use base qw/WebPAC::Common WebPAC::Output Class::Accessor/;
7    __PACKAGE__->mk_accessors(qw(
8            path
9            database
10            encoding
11            clean
12    
13  use KinoSearch::InvIndexer;          index
14  use KinoSearch::Analysis::PolyAnalyzer;  ));
15    
16    use KinoSearch::Simple;
17    use File::Path;
18  use Encode qw/from_to/;  use Encode qw/from_to/;
19  use Data::Dumper;  use Data::Dump qw/dump/;
20    use Storable;
21    
22  =head1 NAME  =head1 NAME
23    
# Line 16  WebPAC::Output::KinoSearch - Create Kino Line 25  WebPAC::Output::KinoSearch - Create Kino
25    
26  =head1 VERSION  =head1 VERSION
27    
28  Version 0.01  Version 0.05
29    
30  =cut  =cut
31    
32  our $VERSION = '0.01';  our $VERSION = '0.05';
33    
34  =head1 SYNOPSIS  =head1 SYNOPSIS
35    
# Line 33  type C<search>. Line 42  type C<search>.
42    
43  Open KinoSearch index  Open KinoSearch index
44    
45   my $est = new WebPAC::Output::KinoSearch(   my $out = new WebPAC::Output::KinoSearch({
46          index_path => '/path/to/invindex',          path => '/path/to/invindex',
         fields => qw/name of all filelds used/,  
47          database => 'demo',          database => 'demo',
         label => 'node label',  
48          encoding => 'iso-8859-2',          encoding => 'iso-8859-2',
49          clean => 1,          clean => 1,
50   );   });
51    
52  Options are:  Options are:
53    
54  =over 4  =over 4
55    
56  =item index_path  =item path
57    
58  path to KinoSearch index to use  path to KinoSearch index to use
59    
 =item fields  
   
 name of all fields used in this index  
   
60  =item database  =item database
61    
62  name of database from which data comes  name of database from which data comes
63    
 =item label  
   
 label for node (optional)  
   
64  =item encoding  =item encoding
65    
66  character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>  character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
# Line 70  index. Line 69  index.
69    
70  =back  =back
71    
72    =head2 init
73    
74      $out->init;
75    
76  =cut  =cut
77    
78  sub new {  sub init {
79          my $class = shift;          my $self = shift;
         my $self = {@_};  
         bless($self, $class);  
80    
81          my $log = $self->_get_logger;          my $log = $self->_get_logger;
82    
83          #$log->debug("self: ", sub { Dumper($self) });          #$log->debug("self: ", sub { dump($self) });
84    
85          foreach my $p (qw/index_path fields database/) {          foreach my $p (qw/path database/) {
86                  $log->logdie("need $p") unless ($self->{$p});                  $log->logdie("need $p") unless ($self->$p);
87          }          }
88    
89          $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');  #       $log->logdie("fields is not ARRAY") unless (ref($self->{fields}) eq 'ARRAY');
90    
91          $self->{encoding} ||= 'ISO-8859-2';          $self->encoding( 'ISO-8859-2' ) unless $self->encoding;
92    
93            if ( ! -e $self->path ) {
94                    mkpath $self->path || $log->logdie("can't create ", $self->path,": $!");
95                    $log->info("created ", $self->path);
96            } elsif ( $self->clean ) {
97                    $log->info("removing existing ", $self->path);
98                    rmtree $self->path || $log->logdie("can't remove ", $self->path,": $!");
99                    mkpath $self->path || $log->logdie("can't create ", $self->path,": $!");
100            }
101    
102          $log->info("using index $self->{index_path} with encoding $self->{encoding}");          my $path = $self->path . '/' . $self->database;
103    
104          my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );          $log->info("using index $path with encoding ", $self->encoding);
105    
106          $self->{invindex} = KinoSearch::InvIndexer->new(          my $index = KinoSearch::Simple->new(
107                  invindex => $self->{index_path},                  path => $path,
108                  create   => $self->{clean},                  language => 'en',
                 analyzer => $analyzer,  
109          );          );
110    
111          foreach my $f (@{ $self->{fields} }) {          $log->logdie("can't open $path: $!") unless $index;
112                  $self->{invindex}->spec_field(  
113                          name  => $f,          $self->index( $index );
 #                       boost => 10,  
                         stored => 1,  
                         indexed => 1,  
                         vectorized => 0,  
                 );  
         }  
114    
         $self ? return $self : return undef;  
115  }  }
116    
117    
118  =head2 add  =head2 add
119    
120  Adds one entry to database.  Adds one entry
   
   $est->add(  
         id => 42,  
         ds => $ds,  
         type => 'display',  
         text => 'optional text from which snippet is created',  
   );  
   
 This function will create  entries in index using following URI format:  
   
   C<file:///type/database%20name/000>  
121    
122  Each tag in C<data_structure> with specified C<type> will create one    $out->add( 42, $ds );
 attribute and corresponding hidden text (used for search).  
123    
124  =cut  =cut
125    
126  sub add {  sub add {
127          my $self = shift;          my $self = shift;
128    
129          my $args = {@_};          my ( $id, $ds ) = @_;
130    
131          my $log = $self->_get_logger;          my $log = $self->_get_logger;
132            $log->logdie("need id") unless defined $id;
133            $log->logdie("need ds") unless $ds;
134    
135          my $database = $self->{'database'} || $log->logconfess('no database in $self');          $log->debug("id: $id ds = ",dump($ds));
         $log->logconfess('need invindex in object') unless ($self->{'invindex'});  
   
         foreach my $p (qw/id ds type/) {  
                 $log->logdie("need $p") unless ($args->{$p});  
         }  
   
         my $type = $args->{'type'};  
         my $id = $args->{'id'};  
   
         my $uri = "file:///$type/$database/$id";  
         $log->debug("creating $uri");  
136    
137          my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )");          my $hash = $self->ds_to_hash( $ds, 'search' ) || return;
138    
139          sub add_value($$$$$) {          $hash->{database} ||= $self->database;
140                  my ($self,$log,$doc,$n,$v) = @_;          $hash->{id} ||= $id;
                 return unless ($v);  
                 eval { $doc->set_value($n, $self->convert($v) ) };  
                 $log->warn("can't insert: $n = $v") if ($@);  
         }  
   
         add_value($self,$log,$doc, 'uri', $uri);  
   
         $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );  
   
         # filter all tags which have type defined  
         my @tags = grep {  
                 ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )  
         } keys %{ $args->{'ds'} };  
   
         $log->debug("tags = ", join(",", @tags));  
141    
142          return unless (@tags);          $log->debug("add( $id, ", sub { dump($ds) }," ) => ", sub { dump( $hash ) });
143    
144          foreach my $tag (@tags) {          $self->index->add_doc( $hash );
   
                 my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });  
   
                 next if (! $vals);  
   
                 $vals = $self->convert( $vals ) or  
                         $log->logdie("can't convert '$vals' to UTF-8");  
   
                 add_value($self, $log, $doc, $tag, $vals );  
         }  
   
         if (my $text = $args->{'text'}) {  
                 add_value($self, $log, $doc, 'bodytext', $text );  
         }  
   
         #$log->debug("adding ", sub { $doc->dump_draft } );  
         $self->{invindex}->add_doc($doc) || $log->warn("can't add document $uri");  
145    
146          return 1;          return 1;
147  }  }
# Line 201  sub add { Line 150  sub add {
150    
151  Close index  Close index
152    
153   $index->finish;   $out->finish;
154    
155  =cut  =cut
156    
157  sub finish {  sub finish {
158          my $self = shift;          my $self = shift;
159    
160          $self->_get_logger()->info("finish index writing to disk");          my $log = $self->_get_logger();
         $self->{invindex}->finish;  
 }  
   
 =head2 convert  
161    
162   my $utf8_string = $self->convert('string in codepage');          $log->info("dummy finish");
   
 =cut  
   
 sub convert {  
         my $self = shift;  
163    
         my $text = shift || return;  
         from_to($text, $self->{encoding}, 'UTF-8');  
         return $text;  
164  }  }
165    
166  =head1 AUTHOR  =head1 AUTHOR
# Line 232  Dobrica Pavlinusic, C<< <dpavlin@rot13.o Line 169  Dobrica Pavlinusic, C<< <dpavlin@rot13.o
169    
170  =head1 COPYRIGHT & LICENSE  =head1 COPYRIGHT & LICENSE
171    
172  Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.  Copyright 2005-2007 Dobrica Pavlinusic, All Rights Reserved.
173    
174  This program is free software; you can redistribute it and/or modify it  This program is free software; you can redistribute it and/or modify it
175  under the same terms as Perl itself.  under the same terms as Perl itself.

Legend:
Removed from v.437  
changed lines
  Added in v.919

  ViewVC Help
Powered by ViewVC 1.1.26