/[Grep]/lib/Grep/Search/KinoSearch.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Search/KinoSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 189 - (hide annotations)
Fri May 23 18:28:19 2008 UTC (15 years, 9 months ago) by dpavlin
File size: 6699 byte(s)
move KinoSearch supprot in own package
1 dpavlin 189 package Grep::Search::KinoSearch;
2 dpavlin 47
3     use strict;
4     use warnings;
5 dpavlin 112 use base qw( Class::Accessor );
6 dpavlin 189 Grep::Search::KinoSearch->mk_accessors( qw( create index_path hits ) );
7 dpavlin 47
8     use Data::Dump qw/dump/;
9 dpavlin 127 use KinoSearch::InvIndexer;
10     use KinoSearch::Searcher;
11 dpavlin 47 use Jifty::Util;
12    
13 dpavlin 189 my $debug = 0;
14 dpavlin 47
15 dpavlin 109 =head1 NAME
16 dpavlin 47
17 dpavlin 189 Grep::Search::KinoSearch - full text search using L<KinoSearch>
18 dpavlin 47
19 dpavlin 109 =head1 METHODS
20 dpavlin 47
21 dpavlin 109 =head2 new
22 dpavlin 58
23 dpavlin 189 my $search = Grep::Search::KinoSearch->new();
24 dpavlin 109
25 dpavlin 189 my $search = Grep::Search::KinoSearch->new( create => 1 );
26 dpavlin 144
27 dpavlin 109 =cut
28    
29 dpavlin 112 sub log { Jifty->web->log }
30    
31 dpavlin 109 sub new {
32     my $class = shift;
33     my $self = $class->SUPER::new(@_);
34    
35 dpavlin 127 my $index_path = Jifty::Util->app_root . '/var/invindex';
36 dpavlin 109
37     $self->index_path( $index_path );
38    
39 dpavlin 185 return $self;
40     }
41    
42     =head2 invindexer
43    
44     Accessor to call any method defined on L<KinoSearch::InvIndexer>
45    
46     $search->invindexer->delete_by_term( 'id', 42 );
47    
48     =cut
49    
50 dpavlin 189 our $indexes;
51 dpavlin 185
52     sub invindexer {
53     my $self = shift;
54     my $invindexer;
55     my $index_path = $self->index_path or die "no index_path?";
56    
57     if ( $invindexer = $indexes->{$index_path} ) {
58     $self->log->debug("Using cached index $index_path");
59 dpavlin 47 } else {
60 dpavlin 189 if ( $self->create || -e $index_path ) {
61 dpavlin 185 $self->log->debug("Creating new index $index_path");
62 dpavlin 189 $invindexer = KinoSearch::InvIndexer->new( invindex => Grep::Search::KinoSearch::Schema->clobber( $index_path ) )
63     or die "can't create index $index_path: $!";
64     $self->create( 0 );
65 dpavlin 185 } else {
66     $self->log->debug("Opening index: $index_path");
67 dpavlin 189 $invindexer = KinoSearch::InvIndexer->new( invindex => Grep::Search::KinoSearch::Schema->open( $index_path ) )
68     or die "can't open index $index_path: $!";
69 dpavlin 185 }
70     $indexes->{$index_path} = $invindexer;
71 dpavlin 47 }
72    
73 dpavlin 189 return $invindexer;
74 dpavlin 49 }
75    
76 dpavlin 47 =head2 add
77    
78 dpavlin 109 $search->add( $record, $owner_id );
79 dpavlin 47
80     =cut
81    
82     sub add {
83     my $self = shift;
84    
85     my $i = shift or die "no record to add";
86 dpavlin 64 my $uid = shift;
87 dpavlin 47
88     die "record not Jifty::Record but ", ref $i unless ($i->isa('Jifty::Record'));
89    
90     my $pk = { $i->primary_keys };
91    
92 dpavlin 127 my $doc;
93 dpavlin 47
94     my @columns = map { $_->name } $i->columns;
95    
96     foreach my $c ( @columns ) {
97    
98     my $v = $i->$c;
99    
100     if ( ref($v) ne '' ) {
101 dpavlin 53
102     foreach my $f_c ( qw/id name title/ ) {
103     if ( $i->$c->can( $f_c ) ) {
104     my $f_v = $i->$c->$f_c || $i->$c->{values}->{ $f_c };
105     my $col = $c . '_' . $f_c;
106     if ( $f_v ) {
107     warn " # $col = $f_v\n" if ($debug);
108 dpavlin 127 $doc->{ $col } = $f_v;
109 dpavlin 53 } else {
110     warn " . $col is NULL\n" if ($debug);
111     }
112     }
113     }
114    
115     if ($v->isa('Jifty::DateTime')) {
116 dpavlin 47 warn " d $c = $v\n" if ($debug);
117 dpavlin 127 $doc->{$c} = $v;
118 dpavlin 47 } else {
119     warn " s $c = $v [",ref($v),"]\n" if ($debug);
120     }
121     next;
122     }
123    
124     next if (! defined($v) || $v eq '');
125    
126 dpavlin 160 eval { $v =~ s/<[^>]+>/ /gs; };
127     if ($@) {
128     Jifty->log->error("can't strip html from $c in item ", $i->id);
129     next;
130     }
131 dpavlin 47
132     if ( defined( $pk->{$c} ) ) {
133 dpavlin 127 $doc->{ $c } = $v;
134 dpavlin 47 warn " * $c = $v\n" if ($debug);
135     } else {
136 dpavlin 127 $doc->{ $c } = $v;
137 dpavlin 53 warn " + $c = ", $self->snippet( 50, $v ), "\n" if ($debug);
138 dpavlin 47 }
139     }
140    
141 dpavlin 64 # add _owner_id to speed up filtering of search results
142     $uid ||= Jifty->web->current_user->id;
143 dpavlin 127 $doc->{ '_owner_id' } = $uid;
144 dpavlin 64
145 dpavlin 127 $self->invindexer->add_doc( $doc );
146 dpavlin 109
147 dpavlin 98 $self->log->debug("added ", $i->id, " for user $uid to index");
148 dpavlin 151
149     return 1;
150 dpavlin 47 }
151    
152 dpavlin 109 =head2 collection
153 dpavlin 47
154 dpavlin 141 Return C<Grep::Model::ItemCollection> which is result of C<search query>
155    
156 dpavlin 109 my $ItemCollection = $search->collection( 'search query' );
157 dpavlin 47
158 dpavlin 141 =head2 hits
159    
160     Return number of results from last C<collection> call
161    
162     my $num_results = $search->hits;
163    
164 dpavlin 47 =cut
165    
166     sub collection {
167     my $self = shift;
168    
169     my $q = shift or die "no q?";
170    
171 dpavlin 127 my $searcher = KinoSearch::Searcher->new(
172 dpavlin 189 invindex => Grep::Search::KinoSearch::Schema->open( $self->index_path ), );
173 dpavlin 98 $self->log->debug("$searcher created");
174 dpavlin 47
175 dpavlin 151 my $full_q = "($q)";
176 dpavlin 64
177 dpavlin 151 my $uid = Jifty->web->current_user->id;
178 dpavlin 153 $full_q .= ' AND _owner_id:' . $uid if (defined $uid);
179 dpavlin 151
180 dpavlin 127 $self->log->debug("searching for '$q' using $full_q");
181 dpavlin 153
182     my $query_parser = KinoSearch::QueryParser->new(
183 dpavlin 189 schema => Grep::Search::KinoSearch::Schema->new,
184 dpavlin 153 fields => [ qw/ title link content summary category author / ],
185     );
186     $query_parser->set_heed_colons(1); # enable field:value AND/OR/NOT syntax
187     my $query = $query_parser->parse( $full_q );
188 dpavlin 127 my $hits = $searcher->search(
189 dpavlin 153 query => $query,
190 dpavlin 127 # offset => $offset,
191     # num_wanted => $hits_per_page,
192     );
193 dpavlin 64
194 dpavlin 141 $self->hits( $hits->total_hits );
195 dpavlin 47
196 dpavlin 141 $self->log->debug("found ", $self->hits, " results");
197 dpavlin 47
198     my $collection = Grep::Model::ItemCollection->new();
199    
200     my @results;
201    
202 dpavlin 127 my $i = 0;
203 dpavlin 180 while ( my $hit = $hits->fetch_hit ) {
204 dpavlin 47
205 dpavlin 127 my $score = $hit->{score};
206     my $title = $hit->{title};
207     my $id = $hit->{id};
208 dpavlin 47
209 dpavlin 144 $self->log->debug("result $i [$id] $title $score");
210 dpavlin 47
211     my $item = Grep::Model::Item->new();
212     my ($ok,$msg) = $item->load_by_cols( id => $id );
213    
214     if ( $ok ) {
215     $collection->add_record( $item );
216     } else {
217     warn "can't load item $id\n";
218     }
219    
220     }
221    
222 dpavlin 127 $self->log->debug("finished search");
223 dpavlin 47
224     return $collection;
225     }
226    
227     =head2 finish
228    
229 dpavlin 109 $search->finish
230 dpavlin 47
231     =cut
232    
233     sub finish {
234     my $self = shift;
235 dpavlin 127 if ($self->invindexer) {
236 dpavlin 102 $self->log->debug("closing index");
237 dpavlin 127 $self->invindexer->finish;
238 dpavlin 47 }
239 dpavlin 58
240 dpavlin 110 $self->log->debug("finish");
241    
242 dpavlin 112 undef $self;
243    
244 dpavlin 151 return 1;
245 dpavlin 47 }
246    
247 dpavlin 53 =head2 snippet
248    
249     my $short = $self->snippet( 50, $text );
250    
251     =cut
252    
253     sub snippet {
254     my $self = shift;
255    
256     my $len = shift or die "no len?";
257     my $m = join(" ", @_);
258    
259     $m =~ s/\s+/ /gs;
260    
261     if (length($m) > $len) {
262     return substr($m,0,$len) . '...';
263     } else {
264     return $m;
265     }
266     }
267    
268 dpavlin 189 package Grep::Search::KinoSearch::KeywordField;
269 dpavlin 127 use base qw( KinoSearch::Schema::FieldSpec );
270     sub analyzed { 0 }
271 dpavlin 153 #sub indexed { 1 }
272     #sub stored { 1 }
273     sub vectorized { 0 }
274 dpavlin 127
275 dpavlin 189 package Grep::Search::KinoSearch::Schema;
276 dpavlin 127
277     =head1 NAME
278    
279 dpavlin 189 Grep::Search::KinoSearch::Schema - schema definition for full-text search
280 dpavlin 127
281     =cut
282    
283     use base 'KinoSearch::Schema';
284     use KinoSearch::Analysis::PolyAnalyzer;
285    
286 dpavlin 131 our %fields = (
287 dpavlin 189 id => 'Grep::Search::KinoSearch::KeywordField',
288 dpavlin 127
289 dpavlin 189 in_feed_id => 'Grep::Search::KinoSearch::KeywordField',
290     in_feed_url => 'Grep::Search::KinoSearch::KeywordField',
291 dpavlin 127 in_feed_title => 'KinoSearch::Schema::FieldSpec',
292 dpavlin 189 in_feed_owner => 'Grep::Search::KinoSearch::KeywordField',
293     in_feed_created_on => 'Grep::Search::KinoSearch::KeywordField',
294 dpavlin 127
295     title => 'KinoSearch::Schema::FieldSpec',
296 dpavlin 189 link => 'Grep::Search::KinoSearch::KeywordField',
297 dpavlin 127 content => 'KinoSearch::Schema::FieldSpec',
298     summary => 'KinoSearch::Schema::FieldSpec',
299     category => 'KinoSearch::Schema::FieldSpec',
300     author => 'KinoSearch::Schema::FieldSpec',
301 dpavlin 189 created_on => 'Grep::Search::KinoSearch::KeywordField',
302     last_update => 'Grep::Search::KinoSearch::KeywordField',
303 dpavlin 127
304 dpavlin 189 _owner_id => 'Grep::Search::KinoSearch::KeywordField',
305 dpavlin 127 );
306    
307     sub analyzer {
308     return KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );
309     }
310    
311 dpavlin 47 1;

  ViewVC Help
Powered by ViewVC 1.1.26