/[Grep]/lib/Grep/Search.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /lib/Grep/Search.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 98 - (show annotations)
Sat Feb 24 12:16:57 2007 UTC (17 years, 2 months ago) by dpavlin
File size: 4749 byte(s)
code cleaup, now isa Jifty::Object, more debug loging
1 package Grep::Search;
2
3 use strict;
4 use warnings;
5 use base 'Jifty::Object';
6
7 use Data::Dump qw/dump/;
8 use Lucene;
9 use Jifty::Util;
10
11 my $index_path = Jifty::Util->app_root . '/var/lucene';
12
13 my ( $analyzer, $store, $writer );
14
15 my $debug = 1;
16 my $create;
17
18 sub create {
19 my $self = shift;
20
21 if (defined( $create )) {
22 $self->log->debug("using previous create $create");
23 return $create;
24 }
25
26 if (! -e "$index_path/segments") {
27 $create = 1;
28 $self->log->debug("create index $index_path");
29 } else {
30 $create = 0;
31 $self->log->debug("open index: $index_path");
32 }
33 return $create;
34 }
35
36 sub analyzer {
37 my $self = shift;
38 if (! defined( $analyzer )) {
39 $analyzer = new Lucene::Analysis::Standard::StandardAnalyzer();
40 $self->log->debug("$analyzer created");
41 }
42 return $analyzer;
43 }
44
45 sub store {
46 my $self = shift;
47 if (! defined( $store )) {
48 $store = Lucene::Store::FSDirectory->getDirectory( $index_path, $self->create );
49 $self->log->debug("$store created");
50 }
51 return $store;
52 }
53
54 sub writer {
55 my $self = shift;
56 if (! defined( $writer )) {
57 $writer = new Lucene::Index::IndexWriter( $self->store, $self->analyzer, $self->create );
58 $self->log->debug("$writer created");
59 }
60 return $writer;
61 }
62
63 =head2 add
64
65 Grep::Search->add( $record, $owner_id );
66
67 =cut
68
69 sub add {
70 my $self = shift;
71
72 my $i = shift or die "no record to add";
73 my $uid = shift;
74
75 die "record not Jifty::Record but ", ref $i unless ($i->isa('Jifty::Record'));
76
77 my $pk = { $i->primary_keys };
78
79 my $doc = new Lucene::Document;
80
81 my @columns = map { $_->name } $i->columns;
82
83 foreach my $c ( @columns ) {
84
85 my $v = $i->$c;
86
87 if ( ref($v) ne '' ) {
88
89 foreach my $f_c ( qw/id name title/ ) {
90 if ( $i->$c->can( $f_c ) ) {
91 my $f_v = $i->$c->$f_c || $i->$c->{values}->{ $f_c };
92 my $col = $c . '_' . $f_c;
93 if ( $f_v ) {
94 warn " # $col = $f_v\n" if ($debug);
95 $doc->add(Lucene::Document::Field->Text( $col, $f_v ));
96 } else {
97 warn " . $col is NULL\n" if ($debug);
98 }
99 }
100 }
101
102 if ($v->isa('Jifty::DateTime')) {
103 warn " d $c = $v\n" if ($debug);
104 $doc->add(Lucene::Document::Field->Keyword( $c, "$v" ));
105 } else {
106 warn " s $c = $v [",ref($v),"]\n" if ($debug);
107 }
108 next;
109 }
110
111 next if (! defined($v) || $v eq '');
112
113 $v =~ s/<[^>]+>/ /gs;
114
115 if ( defined( $pk->{$c} ) ) {
116 $doc->add(Lucene::Document::Field->Keyword( $c, $v ));
117 warn " * $c = $v\n" if ($debug);
118 } else {
119 $doc->add(Lucene::Document::Field->Text( $c, $v ));
120 warn " + $c = ", $self->snippet( 50, $v ), "\n" if ($debug);
121 }
122 }
123
124 # add _owner_id to speed up filtering of search results
125 $uid ||= Jifty->web->current_user->id;
126 $doc->add(Lucene::Document::Field->Keyword( '_owner_id', $uid ));
127
128 $self->writer->addDocument($doc);
129
130 $self->log->debug("added ", $i->id, " for user $uid to index");
131 }
132
133 =head2
134
135 my $ItemCollection = Grep::Search->collection( 'search query' );
136
137 =cut
138
139 sub collection {
140 my $self = shift;
141
142 my $q = shift or die "no q?";
143
144 return if ( $self->create );
145
146 my $searcher = new Lucene::Search::IndexSearcher($self->store);
147 $self->log->debug("$searcher created");
148 my $parser = new Lucene::QueryParser("content", $self->analyzer);
149 $self->log->debug("$parser created");
150
151 my $full_q = "($q) AND _owner_id:" . Jifty->web->current_user->id;
152
153 my $query = $parser->parse( $full_q );
154
155 $self->log->debug("searching for '$q' using ", $query->toString);
156
157 my $hits = $searcher->search($query);
158 my $num_hits = $hits->length();
159
160 $self->log->debug("found $num_hits results");
161
162 my $collection = Grep::Model::ItemCollection->new();
163
164 my @results;
165
166 for ( my $i = 0; $i < $num_hits; $i++ ) {
167
168 my $doc = $hits->doc( $i );
169
170 my $score = $hits->score($i);
171 my $title = $doc->get("title");
172 my $id = $doc->get("id");
173
174 warn "## $i $score $title\n";
175
176 my $item = Grep::Model::Item->new();
177 my ($ok,$msg) = $item->load_by_cols( id => $id );
178
179 if ( $ok ) {
180 $collection->add_record( $item );
181 } else {
182 warn "can't load item $id\n";
183 }
184
185 }
186
187 undef $hits;
188 undef $query;
189 undef $parser;
190 $searcher->close;
191 undef $searcher;
192
193 return $collection;
194 }
195
196 =head2 finish
197
198 Grep::Search->finish
199
200 =cut
201
202 sub finish {
203 my $self = shift;
204 if ($writer) {
205 warn "closing index\n";
206 $writer->close;
207 }
208 undef $writer;
209 undef $store;
210 undef $create;
211 undef $analyzer;
212
213 return;
214 }
215
216 =for TODO
217
218 sub _signal {
219 my $s = shift;
220 warn "catched SIG $s\n";
221 finish();
222 exit(0);
223 }
224
225 $SIG{'__DIE__'} = \&_signal;
226 $SIG{'INT'} = \&_signal;
227 $SIG{'QUIT'} = \&_signal;
228
229 =cut
230
231 =head2 snippet
232
233 my $short = $self->snippet( 50, $text );
234
235
236 =cut
237
238 sub snippet {
239 my $self = shift;
240
241 my $len = shift or die "no len?";
242 my $m = join(" ", @_);
243
244 $m =~ s/\s+/ /gs;
245
246 if (length($m) > $len) {
247 return substr($m,0,$len) . '...';
248 } else {
249 return $m;
250 }
251 }
252
253 1;

  ViewVC Help
Powered by ViewVC 1.1.26