/[Grep]/lib/Grep/Search/KinoSearch.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Search/KinoSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 72 - (hide annotations)
Fri Feb 23 09:54:28 2007 UTC (17 years ago) by dpavlin
Original Path: lib/Grep/Search.pm
File size: 4373 byte(s)
another great refactoring: added new Source object which implements
searching within feed (which now can be anything as long as it produce fields
which somewhat resamble RSS feed). Source plugins implement just (site or
source format specific) fetching of items. 

Sample implementation of MoinMoin scraper, which fetch full pages from wiki
for results, so it has performance impact on remote wiki, be kind to it.
1 dpavlin 47 package Grep::Search;
2    
3     use strict;
4     use warnings;
5    
6     use Data::Dump qw/dump/;
7     use Lucene;
8     use Jifty::Util;
9    
10     my $index_path = Jifty::Util->app_root . '/var/lucene';
11    
12     my ( $analyzer, $store, $writer );
13    
14 dpavlin 53 my $debug = 1;
15 dpavlin 58 my $create;
16 dpavlin 47
17 dpavlin 49 sub create {
18 dpavlin 47
19 dpavlin 58 if (defined( $create )) {
20     Jifty->log->debug("using previous create $create");
21     return $create;
22     }
23    
24 dpavlin 47 if (! -e "$index_path/segments") {
25     $create = 1;
26 dpavlin 58 Jifty->log->debug("create index $index_path");
27 dpavlin 47 } else {
28 dpavlin 58 $create = 0;
29     Jifty->log->debug("open index: $index_path");
30 dpavlin 47 }
31 dpavlin 49 return $create;
32 dpavlin 47 }
33    
34 dpavlin 49 sub analyzer {
35     my $self = shift;
36     $analyzer ||= new Lucene::Analysis::Standard::StandardAnalyzer();
37     return $analyzer;
38     }
39    
40     sub store {
41     my $self = shift;
42    
43     $store ||= Lucene::Store::FSDirectory->getDirectory( $index_path, $self->create );
44     return $store;
45     }
46    
47     sub writer {
48     my $self = shift;
49     $writer ||= new Lucene::Index::IndexWriter( $self->store, $self->analyzer, $self->create );
50     return $writer;
51     }
52    
53 dpavlin 47 =head2 add
54    
55 dpavlin 64 Grep::Search->add( $record, $owner_id );
56 dpavlin 47
57     =cut
58    
59     sub add {
60     my $self = shift;
61    
62     my $i = shift or die "no record to add";
63 dpavlin 64 my $uid = shift;
64 dpavlin 47
65     die "record not Jifty::Record but ", ref $i unless ($i->isa('Jifty::Record'));
66    
67     my $pk = { $i->primary_keys };
68    
69     my $doc = new Lucene::Document;
70    
71     my @columns = map { $_->name } $i->columns;
72    
73     foreach my $c ( @columns ) {
74    
75     my $v = $i->$c;
76    
77     if ( ref($v) ne '' ) {
78 dpavlin 53
79     foreach my $f_c ( qw/id name title/ ) {
80     if ( $i->$c->can( $f_c ) ) {
81     my $f_v = $i->$c->$f_c || $i->$c->{values}->{ $f_c };
82     my $col = $c . '_' . $f_c;
83     if ( $f_v ) {
84     warn " # $col = $f_v\n" if ($debug);
85     $doc->add(Lucene::Document::Field->Text( $col, $f_v ));
86     } else {
87     warn " . $col is NULL\n" if ($debug);
88     }
89     }
90     }
91    
92     if ($v->isa('Jifty::DateTime')) {
93 dpavlin 47 warn " d $c = $v\n" if ($debug);
94     $doc->add(Lucene::Document::Field->Keyword( $c, "$v" ));
95     } else {
96     warn " s $c = $v [",ref($v),"]\n" if ($debug);
97     }
98     next;
99     }
100    
101     next if (! defined($v) || $v eq '');
102    
103     $v =~ s/<[^>]+>/ /gs;
104    
105     if ( defined( $pk->{$c} ) ) {
106     $doc->add(Lucene::Document::Field->Keyword( $c, $v ));
107     warn " * $c = $v\n" if ($debug);
108     } else {
109     $doc->add(Lucene::Document::Field->Text( $c, $v ));
110 dpavlin 53 warn " + $c = ", $self->snippet( 50, $v ), "\n" if ($debug);
111 dpavlin 47 }
112     }
113    
114 dpavlin 64 # add _owner_id to speed up filtering of search results
115     $uid ||= Jifty->web->current_user->id;
116     $doc->add(Lucene::Document::Field->Keyword( '_owner_id', $uid ));
117    
118 dpavlin 49 $self->writer->addDocument($doc);
119 dpavlin 47
120 dpavlin 64 Jifty->log->debug("added ", $i->id, " for user $uid to index");
121 dpavlin 47 }
122    
123     =head2
124    
125     my $ItemCollection = Grep::Search->collection( 'search query' );
126    
127     =cut
128    
129     sub collection {
130     my $self = shift;
131    
132     my $q = shift or die "no q?";
133    
134 dpavlin 58 return if ( $self->create );
135    
136 dpavlin 49 my $searcher = new Lucene::Search::IndexSearcher($self->store);
137     my $parser = new Lucene::QueryParser("content", $self->analyzer);
138 dpavlin 47
139 dpavlin 64 my $full_q = "($q) AND _owner_id:" . Jifty->web->current_user->id;
140    
141     my $query = $parser->parse( $full_q );
142    
143 dpavlin 60 Jifty->log->debug("searching for '$q' using ", $query->toString);
144 dpavlin 47
145     my $hits = $searcher->search($query);
146     my $num_hits = $hits->length();
147    
148     Jifty->log->debug("found $num_hits results");
149    
150     my $collection = Grep::Model::ItemCollection->new();
151    
152     my @results;
153    
154     for ( my $i = 0; $i < $num_hits; $i++ ) {
155    
156     my $doc = $hits->doc( $i );
157    
158     my $score = $hits->score($i);
159     my $title = $doc->get("title");
160     my $id = $doc->get("id");
161    
162     warn "## $i $score $title\n";
163    
164     my $item = Grep::Model::Item->new();
165     my ($ok,$msg) = $item->load_by_cols( id => $id );
166    
167     if ( $ok ) {
168     $collection->add_record( $item );
169     } else {
170     warn "can't load item $id\n";
171     }
172    
173     }
174    
175     undef $hits;
176     undef $query;
177     undef $parser;
178     undef $searcher;
179    
180     return $collection;
181     }
182    
183     =head2 finish
184    
185     Grep::Search->finish
186    
187     =cut
188    
189     sub finish {
190     my $self = shift;
191     if ($writer) {
192     warn "closing index\n";
193     $writer->close;
194     }
195     undef $writer;
196 dpavlin 72 undef $store;
197 dpavlin 58 undef $create;
198    
199     return;
200 dpavlin 47 }
201    
202 dpavlin 57 =for TODO
203    
204 dpavlin 47 sub _signal {
205     my $s = shift;
206     warn "catched SIG $s\n";
207     finish();
208     exit(0);
209     }
210    
211     $SIG{'__DIE__'} = \&_signal;
212     $SIG{'INT'} = \&_signal;
213     $SIG{'QUIT'} = \&_signal;
214    
215 dpavlin 57 =cut
216 dpavlin 53
217     =head2 snippet
218    
219     my $short = $self->snippet( 50, $text );
220    
221    
222     =cut
223    
224     sub snippet {
225     my $self = shift;
226    
227     my $len = shift or die "no len?";
228     my $m = join(" ", @_);
229    
230     $m =~ s/\s+/ /gs;
231    
232     if (length($m) > $len) {
233     return substr($m,0,$len) . '...';
234     } else {
235     return $m;
236     }
237     }
238    
239 dpavlin 47 1;

  ViewVC Help
Powered by ViewVC 1.1.26