/[Grep]/lib/Grep/Search.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /lib/Grep/Search.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 112 - (show annotations)
Wed Mar 14 21:10:53 2007 UTC (17 years, 2 months ago) by dpavlin
File size: 4772 byte(s)
Grep::Search really shouldn't be Jifty::Object beause it's serialization
within Jifty confuse Lucene locks. We need just ->log anyway...
1 package Grep::Search;
2
3 use strict;
4 use warnings;
5 use base qw( Class::Accessor );
6 Grep::Search->mk_accessors( qw( analyzer store writer create index_path ) );
7
8 use Data::Dump qw/dump/;
9 use Lucene;
10 use Jifty::Util;
11
12 my $debug = 0;
13
14 =head1 NAME
15
16 Grep::Search - full text search
17
18 =head1 METHODS
19
20 =head2 new
21
22 my $search = Grep::Search->new();
23
24 =cut
25
26 sub log { Jifty->web->log }
27
28 sub new {
29 my $class = shift;
30 my $self = $class->SUPER::new(@_);
31
32 my $index_path = Jifty::Util->app_root . '/var/lucene';
33
34 $self->index_path( $index_path );
35
36 if (! -e "$index_path/segments") {
37 $self->create( 1 );
38 $self->log->debug("Creating new index $index_path");
39 } else {
40 $self->create( 0 );
41 $self->log->debug("Opening index: $index_path");
42 }
43
44 $self->analyzer( new Lucene::Analysis::Standard::StandardAnalyzer() );
45 $self->log->debug($self->analyzer . " created");
46
47 $self->store( Lucene::Store::FSDirectory->getDirectory( $index_path, $self->create ) );
48 $self->log->debug($self->store, " created");
49
50 return $self;
51 }
52
53 =head2 add
54
55 $search->add( $record, $owner_id );
56
57 =cut
58
59 sub add {
60 my $self = shift;
61
62 my $i = shift or die "no record to add";
63 my $uid = shift;
64
65 die "record not Jifty::Record but ", ref $i unless ($i->isa('Jifty::Record'));
66
67 my $pk = { $i->primary_keys };
68
69 my $doc = new Lucene::Document;
70
71 my @columns = map { $_->name } $i->columns;
72
73 foreach my $c ( @columns ) {
74
75 my $v = $i->$c;
76
77 if ( ref($v) ne '' ) {
78
79 foreach my $f_c ( qw/id name title/ ) {
80 if ( $i->$c->can( $f_c ) ) {
81 my $f_v = $i->$c->$f_c || $i->$c->{values}->{ $f_c };
82 my $col = $c . '_' . $f_c;
83 if ( $f_v ) {
84 warn " # $col = $f_v\n" if ($debug);
85 $doc->add(Lucene::Document::Field->Text( $col, $f_v ));
86 } else {
87 warn " . $col is NULL\n" if ($debug);
88 }
89 }
90 }
91
92 if ($v->isa('Jifty::DateTime')) {
93 warn " d $c = $v\n" if ($debug);
94 $doc->add(Lucene::Document::Field->Keyword( $c, "$v" ));
95 } else {
96 warn " s $c = $v [",ref($v),"]\n" if ($debug);
97 }
98 next;
99 }
100
101 next if (! defined($v) || $v eq '');
102
103 $v =~ s/<[^>]+>/ /gs;
104
105 if ( defined( $pk->{$c} ) ) {
106 $doc->add(Lucene::Document::Field->Keyword( $c, $v ));
107 warn " * $c = $v\n" if ($debug);
108 } else {
109 $doc->add(Lucene::Document::Field->Text( $c, $v ));
110 warn " + $c = ", $self->snippet( 50, $v ), "\n" if ($debug);
111 }
112 }
113
114 # add _owner_id to speed up filtering of search results
115 $uid ||= Jifty->web->current_user->id;
116 $doc->add(Lucene::Document::Field->Keyword( '_owner_id', $uid ));
117
118 if (! defined( $self->writer )) {
119 $self->writer( new Lucene::Index::IndexWriter( $self->store, $self->analyzer, $self->create ) );
120 $self->log->debug($self->writer, " created");
121 }
122
123 $self->writer->addDocument($doc);
124
125 $self->log->debug("added ", $i->id, " for user $uid to index");
126 }
127
128 =head2 collection
129
130 my $ItemCollection = $search->collection( 'search query' );
131
132 =cut
133
134 sub collection {
135 my $self = shift;
136
137 my $q = shift or die "no q?";
138
139 return if ( $self->create );
140
141 my $searcher = new Lucene::Search::IndexSearcher($self->store);
142 $self->log->debug("$searcher created");
143 my $parser = new Lucene::QueryParser("content", $self->analyzer);
144 $self->log->debug("$parser created");
145
146 my $full_q = "($q) AND _owner_id:" . Jifty->web->current_user->id;
147
148 my $query = $parser->parse( $full_q );
149
150 $self->log->debug("searching for '$q' using ", $query->toString);
151
152 my $hits = $searcher->search($query);
153 my $num_hits = $hits->length();
154
155 $self->log->debug("found $num_hits results");
156
157 my $collection = Grep::Model::ItemCollection->new();
158
159 my @results;
160
161 for ( my $i = 0; $i < $num_hits; $i++ ) {
162
163 my $doc = $hits->doc( $i );
164
165 my $score = $hits->score($i);
166 my $title = $doc->get("title");
167 my $id = $doc->get("id");
168
169 $self->log->debug("result $i $score $title");
170
171 my $item = Grep::Model::Item->new();
172 my ($ok,$msg) = $item->load_by_cols( id => $id );
173
174 if ( $ok ) {
175 $collection->add_record( $item );
176 } else {
177 warn "can't load item $id\n";
178 }
179
180 }
181
182 undef $hits;
183 undef $query;
184 undef $parser;
185 $searcher->close;
186 undef $searcher;
187
188 return $collection;
189 }
190
191 =head2 finish
192
193 $search->finish
194
195 =cut
196
197 sub finish {
198 my $self = shift;
199 if ($self->writer) {
200 $self->log->debug("closing index");
201 $self->writer->close;
202 }
203
204 $self->log->debug("finish");
205
206 undef $self;
207
208 return;
209 }
210
211 =for TODO
212
213 sub _signal {
214 my $s = shift;
215 warn "catched SIG $s\n";
216 finish();
217 exit(0);
218 }
219
220 $SIG{'__DIE__'} = \&_signal;
221 $SIG{'INT'} = \&_signal;
222 $SIG{'QUIT'} = \&_signal;
223
224 =cut
225
226 =head2 snippet
227
228 my $short = $self->snippet( 50, $text );
229
230 =cut
231
232 sub snippet {
233 my $self = shift;
234
235 my $len = shift or die "no len?";
236 my $m = join(" ", @_);
237
238 $m =~ s/\s+/ /gs;
239
240 if (length($m) > $len) {
241 return substr($m,0,$len) . '...';
242 } else {
243 return $m;
244 }
245 }
246
247 1;

  ViewVC Help
Powered by ViewVC 1.1.26