/[Grep]/lib/Grep/Search.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Search.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 118 - (hide annotations)
Sun Apr 1 11:53:22 2007 UTC (17 years, 1 month ago) by dpavlin
File size: 4800 byte(s)
tweaks
1 dpavlin 47 package Grep::Search;
2    
3     use strict;
4     use warnings;
5 dpavlin 112 use base qw( Class::Accessor );
6 dpavlin 109 Grep::Search->mk_accessors( qw( analyzer store writer create index_path ) );
7 dpavlin 47
8     use Data::Dump qw/dump/;
9     use Lucene;
10     use Jifty::Util;
11    
12 dpavlin 109 my $debug = 0;
13 dpavlin 47
14 dpavlin 109 =head1 NAME
15 dpavlin 47
16 dpavlin 109 Grep::Search - full text search
17 dpavlin 47
18 dpavlin 109 =head1 METHODS
19 dpavlin 47
20 dpavlin 109 =head2 new
21 dpavlin 58
22 dpavlin 109 my $search = Grep::Search->new();
23    
24     =cut
25    
26 dpavlin 112 sub log { Jifty->web->log }
27    
28 dpavlin 109 sub new {
29     my $class = shift;
30     my $self = $class->SUPER::new(@_);
31    
32     my $index_path = Jifty::Util->app_root . '/var/lucene';
33    
34     $self->index_path( $index_path );
35    
36 dpavlin 47 if (! -e "$index_path/segments") {
37 dpavlin 109 $self->create( 1 );
38     $self->log->debug("Creating new index $index_path");
39 dpavlin 47 } else {
40 dpavlin 109 $self->create( 0 );
41     $self->log->debug("Opening index: $index_path");
42 dpavlin 47 }
43    
44 dpavlin 109 $self->analyzer( new Lucene::Analysis::Standard::StandardAnalyzer() );
45     $self->log->debug($self->analyzer . " created");
46 dpavlin 49
47 dpavlin 109 $self->store( Lucene::Store::FSDirectory->getDirectory( $index_path, $self->create ) );
48     $self->log->debug($self->store, " created");
49 dpavlin 49
50 dpavlin 109 return $self;
51 dpavlin 49 }
52    
53 dpavlin 47 =head2 add
54    
55 dpavlin 109 $search->add( $record, $owner_id );
56 dpavlin 47
57     =cut
58    
59     sub add {
60     my $self = shift;
61    
62     my $i = shift or die "no record to add";
63 dpavlin 64 my $uid = shift;
64 dpavlin 47
65     die "record not Jifty::Record but ", ref $i unless ($i->isa('Jifty::Record'));
66    
67     my $pk = { $i->primary_keys };
68    
69     my $doc = new Lucene::Document;
70    
71     my @columns = map { $_->name } $i->columns;
72    
73     foreach my $c ( @columns ) {
74    
75     my $v = $i->$c;
76    
77     if ( ref($v) ne '' ) {
78 dpavlin 53
79     foreach my $f_c ( qw/id name title/ ) {
80     if ( $i->$c->can( $f_c ) ) {
81     my $f_v = $i->$c->$f_c || $i->$c->{values}->{ $f_c };
82     my $col = $c . '_' . $f_c;
83     if ( $f_v ) {
84     warn " # $col = $f_v\n" if ($debug);
85     $doc->add(Lucene::Document::Field->Text( $col, $f_v ));
86     } else {
87     warn " . $col is NULL\n" if ($debug);
88     }
89     }
90     }
91    
92     if ($v->isa('Jifty::DateTime')) {
93 dpavlin 47 warn " d $c = $v\n" if ($debug);
94     $doc->add(Lucene::Document::Field->Keyword( $c, "$v" ));
95     } else {
96     warn " s $c = $v [",ref($v),"]\n" if ($debug);
97     }
98     next;
99     }
100    
101     next if (! defined($v) || $v eq '');
102    
103     $v =~ s/<[^>]+>/ /gs;
104    
105     if ( defined( $pk->{$c} ) ) {
106     $doc->add(Lucene::Document::Field->Keyword( $c, $v ));
107     warn " * $c = $v\n" if ($debug);
108     } else {
109     $doc->add(Lucene::Document::Field->Text( $c, $v ));
110 dpavlin 53 warn " + $c = ", $self->snippet( 50, $v ), "\n" if ($debug);
111 dpavlin 47 }
112     }
113    
114 dpavlin 64 # add _owner_id to speed up filtering of search results
115     $uid ||= Jifty->web->current_user->id;
116     $doc->add(Lucene::Document::Field->Keyword( '_owner_id', $uid ));
117    
118 dpavlin 109 if (! defined( $self->writer )) {
119     $self->writer( new Lucene::Index::IndexWriter( $self->store, $self->analyzer, $self->create ) );
120     $self->log->debug($self->writer, " created");
121     }
122    
123 dpavlin 49 $self->writer->addDocument($doc);
124 dpavlin 47
125 dpavlin 98 $self->log->debug("added ", $i->id, " for user $uid to index");
126 dpavlin 47 }
127    
128 dpavlin 109 =head2 collection
129 dpavlin 47
130 dpavlin 109 my $ItemCollection = $search->collection( 'search query' );
131 dpavlin 47
132     =cut
133    
134     sub collection {
135     my $self = shift;
136    
137     my $q = shift or die "no q?";
138    
139 dpavlin 58 return if ( $self->create );
140    
141 dpavlin 49 my $searcher = new Lucene::Search::IndexSearcher($self->store);
142 dpavlin 98 $self->log->debug("$searcher created");
143 dpavlin 49 my $parser = new Lucene::QueryParser("content", $self->analyzer);
144 dpavlin 98 $self->log->debug("$parser created");
145 dpavlin 47
146 dpavlin 64 my $full_q = "($q) AND _owner_id:" . Jifty->web->current_user->id;
147    
148     my $query = $parser->parse( $full_q );
149    
150 dpavlin 98 $self->log->debug("searching for '$q' using ", $query->toString);
151 dpavlin 47
152     my $hits = $searcher->search($query);
153     my $num_hits = $hits->length();
154    
155 dpavlin 98 $self->log->debug("found $num_hits results");
156 dpavlin 47
157     my $collection = Grep::Model::ItemCollection->new();
158    
159     my @results;
160    
161     for ( my $i = 0; $i < $num_hits; $i++ ) {
162    
163     my $doc = $hits->doc( $i );
164    
165     my $score = $hits->score($i);
166     my $title = $doc->get("title");
167     my $id = $doc->get("id");
168    
169 dpavlin 102 $self->log->debug("result $i $score $title");
170 dpavlin 47
171     my $item = Grep::Model::Item->new();
172     my ($ok,$msg) = $item->load_by_cols( id => $id );
173    
174     if ( $ok ) {
175     $collection->add_record( $item );
176     } else {
177     warn "can't load item $id\n";
178     }
179    
180     }
181    
182     undef $hits;
183     undef $query;
184     undef $parser;
185     undef $searcher;
186    
187 dpavlin 118 $self->log->debug("finished Lucene search");
188    
189 dpavlin 47 return $collection;
190     }
191    
192     =head2 finish
193    
194 dpavlin 109 $search->finish
195 dpavlin 47
196     =cut
197    
198     sub finish {
199     my $self = shift;
200 dpavlin 109 if ($self->writer) {
201 dpavlin 102 $self->log->debug("closing index");
202 dpavlin 109 $self->writer->close;
203 dpavlin 47 }
204 dpavlin 58
205 dpavlin 110 $self->log->debug("finish");
206    
207 dpavlin 112 undef $self;
208    
209 dpavlin 58 return;
210 dpavlin 47 }
211    
212 dpavlin 57 =for TODO
213    
214 dpavlin 47 sub _signal {
215     my $s = shift;
216     warn "catched SIG $s\n";
217     finish();
218     exit(0);
219     }
220    
221     $SIG{'__DIE__'} = \&_signal;
222     $SIG{'INT'} = \&_signal;
223     $SIG{'QUIT'} = \&_signal;
224    
225 dpavlin 57 =cut
226 dpavlin 53
227     =head2 snippet
228    
229     my $short = $self->snippet( 50, $text );
230    
231     =cut
232    
233     sub snippet {
234     my $self = shift;
235    
236     my $len = shift or die "no len?";
237     my $m = join(" ", @_);
238    
239     $m =~ s/\s+/ /gs;
240    
241     if (length($m) > $len) {
242     return substr($m,0,$len) . '...';
243     } else {
244     return $m;
245     }
246     }
247    
248 dpavlin 47 1;

  ViewVC Help
Powered by ViewVC 1.1.26