/[Grep]/lib/Grep/Search.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /lib/Grep/Search.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 72 - (show annotations)
Fri Feb 23 09:54:28 2007 UTC (17 years, 2 months ago) by dpavlin
File size: 4373 byte(s)
another great refactoring: added new Source object which implements
searching within feed (which now can be anything as long as it produce fields
which somewhat resamble RSS feed). Source plugins implement just (site or
source format specific) fetching of items. 

Sample implementation of MoinMoin scraper, which fetch full pages from wiki
for results, so it has performance impact on remote wiki, be kind to it.
1 package Grep::Search;
2
3 use strict;
4 use warnings;
5
6 use Data::Dump qw/dump/;
7 use Lucene;
8 use Jifty::Util;
9
10 my $index_path = Jifty::Util->app_root . '/var/lucene';
11
12 my ( $analyzer, $store, $writer );
13
14 my $debug = 1;
15 my $create;
16
17 sub create {
18
19 if (defined( $create )) {
20 Jifty->log->debug("using previous create $create");
21 return $create;
22 }
23
24 if (! -e "$index_path/segments") {
25 $create = 1;
26 Jifty->log->debug("create index $index_path");
27 } else {
28 $create = 0;
29 Jifty->log->debug("open index: $index_path");
30 }
31 return $create;
32 }
33
34 sub analyzer {
35 my $self = shift;
36 $analyzer ||= new Lucene::Analysis::Standard::StandardAnalyzer();
37 return $analyzer;
38 }
39
40 sub store {
41 my $self = shift;
42
43 $store ||= Lucene::Store::FSDirectory->getDirectory( $index_path, $self->create );
44 return $store;
45 }
46
47 sub writer {
48 my $self = shift;
49 $writer ||= new Lucene::Index::IndexWriter( $self->store, $self->analyzer, $self->create );
50 return $writer;
51 }
52
53 =head2 add
54
55 Grep::Search->add( $record, $owner_id );
56
57 =cut
58
59 sub add {
60 my $self = shift;
61
62 my $i = shift or die "no record to add";
63 my $uid = shift;
64
65 die "record not Jifty::Record but ", ref $i unless ($i->isa('Jifty::Record'));
66
67 my $pk = { $i->primary_keys };
68
69 my $doc = new Lucene::Document;
70
71 my @columns = map { $_->name } $i->columns;
72
73 foreach my $c ( @columns ) {
74
75 my $v = $i->$c;
76
77 if ( ref($v) ne '' ) {
78
79 foreach my $f_c ( qw/id name title/ ) {
80 if ( $i->$c->can( $f_c ) ) {
81 my $f_v = $i->$c->$f_c || $i->$c->{values}->{ $f_c };
82 my $col = $c . '_' . $f_c;
83 if ( $f_v ) {
84 warn " # $col = $f_v\n" if ($debug);
85 $doc->add(Lucene::Document::Field->Text( $col, $f_v ));
86 } else {
87 warn " . $col is NULL\n" if ($debug);
88 }
89 }
90 }
91
92 if ($v->isa('Jifty::DateTime')) {
93 warn " d $c = $v\n" if ($debug);
94 $doc->add(Lucene::Document::Field->Keyword( $c, "$v" ));
95 } else {
96 warn " s $c = $v [",ref($v),"]\n" if ($debug);
97 }
98 next;
99 }
100
101 next if (! defined($v) || $v eq '');
102
103 $v =~ s/<[^>]+>/ /gs;
104
105 if ( defined( $pk->{$c} ) ) {
106 $doc->add(Lucene::Document::Field->Keyword( $c, $v ));
107 warn " * $c = $v\n" if ($debug);
108 } else {
109 $doc->add(Lucene::Document::Field->Text( $c, $v ));
110 warn " + $c = ", $self->snippet( 50, $v ), "\n" if ($debug);
111 }
112 }
113
114 # add _owner_id to speed up filtering of search results
115 $uid ||= Jifty->web->current_user->id;
116 $doc->add(Lucene::Document::Field->Keyword( '_owner_id', $uid ));
117
118 $self->writer->addDocument($doc);
119
120 Jifty->log->debug("added ", $i->id, " for user $uid to index");
121 }
122
123 =head2
124
125 my $ItemCollection = Grep::Search->collection( 'search query' );
126
127 =cut
128
129 sub collection {
130 my $self = shift;
131
132 my $q = shift or die "no q?";
133
134 return if ( $self->create );
135
136 my $searcher = new Lucene::Search::IndexSearcher($self->store);
137 my $parser = new Lucene::QueryParser("content", $self->analyzer);
138
139 my $full_q = "($q) AND _owner_id:" . Jifty->web->current_user->id;
140
141 my $query = $parser->parse( $full_q );
142
143 Jifty->log->debug("searching for '$q' using ", $query->toString);
144
145 my $hits = $searcher->search($query);
146 my $num_hits = $hits->length();
147
148 Jifty->log->debug("found $num_hits results");
149
150 my $collection = Grep::Model::ItemCollection->new();
151
152 my @results;
153
154 for ( my $i = 0; $i < $num_hits; $i++ ) {
155
156 my $doc = $hits->doc( $i );
157
158 my $score = $hits->score($i);
159 my $title = $doc->get("title");
160 my $id = $doc->get("id");
161
162 warn "## $i $score $title\n";
163
164 my $item = Grep::Model::Item->new();
165 my ($ok,$msg) = $item->load_by_cols( id => $id );
166
167 if ( $ok ) {
168 $collection->add_record( $item );
169 } else {
170 warn "can't load item $id\n";
171 }
172
173 }
174
175 undef $hits;
176 undef $query;
177 undef $parser;
178 undef $searcher;
179
180 return $collection;
181 }
182
183 =head2 finish
184
185 Grep::Search->finish
186
187 =cut
188
189 sub finish {
190 my $self = shift;
191 if ($writer) {
192 warn "closing index\n";
193 $writer->close;
194 }
195 undef $writer;
196 undef $store;
197 undef $create;
198
199 return;
200 }
201
202 =for TODO
203
204 sub _signal {
205 my $s = shift;
206 warn "catched SIG $s\n";
207 finish();
208 exit(0);
209 }
210
211 $SIG{'__DIE__'} = \&_signal;
212 $SIG{'INT'} = \&_signal;
213 $SIG{'QUIT'} = \&_signal;
214
215 =cut
216
217 =head2 snippet
218
219 my $short = $self->snippet( 50, $text );
220
221
222 =cut
223
224 sub snippet {
225 my $self = shift;
226
227 my $len = shift or die "no len?";
228 my $m = join(" ", @_);
229
230 $m =~ s/\s+/ /gs;
231
232 if (length($m) > $len) {
233 return substr($m,0,$len) . '...';
234 } else {
235 return $m;
236 }
237 }
238
239 1;

  ViewVC Help
Powered by ViewVC 1.1.26