/[Grep]/lib/Grep/Search/KinoSearch.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /lib/Grep/Search/KinoSearch.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 144 - (hide annotations)
Tue May 8 14:11:38 2007 UTC (17 years, 1 month ago) by dpavlin
Original Path: lib/Grep/Search.pm
File size: 5393 byte(s)
added --duplicates switch to reindex.pl to keep duplicate items, create new
index every time to prevent duplicate results for same item
1 dpavlin 47 package Grep::Search;
2    
3     use strict;
4     use warnings;
5 dpavlin 112 use base qw( Class::Accessor );
6 dpavlin 141 Grep::Search->mk_accessors( qw( invindexer create index_path hits ) );
7 dpavlin 47
8     use Data::Dump qw/dump/;
9 dpavlin 127 use KinoSearch::InvIndexer;
10     use KinoSearch::Searcher;
11 dpavlin 47 use Jifty::Util;
12    
13 dpavlin 109 my $debug = 0;
14 dpavlin 47
15 dpavlin 109 =head1 NAME
16 dpavlin 47
17 dpavlin 127 Grep::Search - full text search using L<KinoSearch>
18 dpavlin 47
19 dpavlin 109 =head1 METHODS
20 dpavlin 47
21 dpavlin 109 =head2 new
22 dpavlin 58
23 dpavlin 109 my $search = Grep::Search->new();
24    
25 dpavlin 144 my $serach = Grep::Search->new( create => 1 );
26    
27 dpavlin 109 =cut
28    
29 dpavlin 112 sub log { Jifty->web->log }
30    
31 dpavlin 109 sub new {
32     my $class = shift;
33     my $self = $class->SUPER::new(@_);
34    
35 dpavlin 127 my $index_path = Jifty::Util->app_root . '/var/invindex';
36 dpavlin 109
37     $self->index_path( $index_path );
38    
39 dpavlin 144 if ( ! -e "$index_path" || $self->create ) {
40 dpavlin 109 $self->log->debug("Creating new index $index_path");
41 dpavlin 127 $self->invindexer( KinoSearch::InvIndexer->new( invindex => Grep::Search::Schema->clobber( $index_path ) ) );
42 dpavlin 47 } else {
43 dpavlin 109 $self->log->debug("Opening index: $index_path");
44 dpavlin 127 $self->invindexer( KinoSearch::InvIndexer->new( invindex => Grep::Search::Schema->open( $index_path ) ) );
45 dpavlin 47 }
46    
47 dpavlin 109 return $self;
48 dpavlin 49 }
49    
50 dpavlin 47 =head2 add
51    
52 dpavlin 109 $search->add( $record, $owner_id );
53 dpavlin 47
54     =cut
55    
56     sub add {
57     my $self = shift;
58    
59     my $i = shift or die "no record to add";
60 dpavlin 64 my $uid = shift;
61 dpavlin 47
62     die "record not Jifty::Record but ", ref $i unless ($i->isa('Jifty::Record'));
63    
64     my $pk = { $i->primary_keys };
65    
66 dpavlin 127 my $doc;
67 dpavlin 47
68     my @columns = map { $_->name } $i->columns;
69    
70     foreach my $c ( @columns ) {
71    
72     my $v = $i->$c;
73    
74     if ( ref($v) ne '' ) {
75 dpavlin 53
76     foreach my $f_c ( qw/id name title/ ) {
77     if ( $i->$c->can( $f_c ) ) {
78     my $f_v = $i->$c->$f_c || $i->$c->{values}->{ $f_c };
79     my $col = $c . '_' . $f_c;
80     if ( $f_v ) {
81     warn " # $col = $f_v\n" if ($debug);
82 dpavlin 127 $doc->{ $col } = $f_v;
83 dpavlin 53 } else {
84     warn " . $col is NULL\n" if ($debug);
85     }
86     }
87     }
88    
89     if ($v->isa('Jifty::DateTime')) {
90 dpavlin 47 warn " d $c = $v\n" if ($debug);
91 dpavlin 127 $doc->{$c} = $v;
92 dpavlin 47 } else {
93     warn " s $c = $v [",ref($v),"]\n" if ($debug);
94     }
95     next;
96     }
97    
98     next if (! defined($v) || $v eq '');
99    
100     $v =~ s/<[^>]+>/ /gs;
101    
102     if ( defined( $pk->{$c} ) ) {
103 dpavlin 127 $doc->{ $c } = $v;
104 dpavlin 47 warn " * $c = $v\n" if ($debug);
105     } else {
106 dpavlin 127 $doc->{ $c } = $v;
107 dpavlin 53 warn " + $c = ", $self->snippet( 50, $v ), "\n" if ($debug);
108 dpavlin 47 }
109     }
110    
111 dpavlin 64 # add _owner_id to speed up filtering of search results
112     $uid ||= Jifty->web->current_user->id;
113 dpavlin 127 $doc->{ '_owner_id' } = $uid;
114 dpavlin 64
115 dpavlin 127 $self->invindexer->add_doc( $doc );
116 dpavlin 109
117 dpavlin 98 $self->log->debug("added ", $i->id, " for user $uid to index");
118 dpavlin 47 }
119    
120 dpavlin 109 =head2 collection
121 dpavlin 47
122 dpavlin 141 Return C<Grep::Model::ItemCollection> which is result of C<search query>
123    
124 dpavlin 109 my $ItemCollection = $search->collection( 'search query' );
125 dpavlin 47
126 dpavlin 141 =head2 hits
127    
128     Return number of results from last C<collection> call
129    
130     my $num_results = $search->hits;
131    
132 dpavlin 47 =cut
133    
134     sub collection {
135     my $self = shift;
136    
137     my $q = shift or die "no q?";
138    
139 dpavlin 127 my $searcher = KinoSearch::Searcher->new(
140     invindex => Grep::Search::Schema->open( $self->index_path ), );
141 dpavlin 98 $self->log->debug("$searcher created");
142 dpavlin 47
143 dpavlin 64 my $full_q = "($q) AND _owner_id:" . Jifty->web->current_user->id;
144    
145 dpavlin 127 $self->log->debug("searching for '$q' using $full_q");
146     my $hits = $searcher->search(
147     query => $full_q,
148     # offset => $offset,
149     # num_wanted => $hits_per_page,
150     );
151 dpavlin 64
152 dpavlin 141 $self->hits( $hits->total_hits );
153 dpavlin 47
154 dpavlin 141 $self->log->debug("found ", $self->hits, " results");
155 dpavlin 47
156     my $collection = Grep::Model::ItemCollection->new();
157    
158     my @results;
159    
160 dpavlin 127 my $i = 0;
161     while ( my $hit = $hits->fetch_hit_hashref ) {
162 dpavlin 47
163 dpavlin 127 my $score = $hit->{score};
164     my $title = $hit->{title};
165     my $id = $hit->{id};
166 dpavlin 47
167 dpavlin 144 $self->log->debug("result $i [$id] $title $score");
168 dpavlin 47
169     my $item = Grep::Model::Item->new();
170     my ($ok,$msg) = $item->load_by_cols( id => $id );
171    
172     if ( $ok ) {
173     $collection->add_record( $item );
174     } else {
175     warn "can't load item $id\n";
176     }
177    
178     }
179    
180 dpavlin 127 $self->log->debug("finished search");
181 dpavlin 47
182     return $collection;
183     }
184    
185     =head2 finish
186    
187 dpavlin 109 $search->finish
188 dpavlin 47
189     =cut
190    
191     sub finish {
192     my $self = shift;
193 dpavlin 127 if ($self->invindexer) {
194 dpavlin 102 $self->log->debug("closing index");
195 dpavlin 127 $self->invindexer->finish;
196 dpavlin 47 }
197 dpavlin 58
198 dpavlin 110 $self->log->debug("finish");
199    
200 dpavlin 112 undef $self;
201    
202 dpavlin 58 return;
203 dpavlin 47 }
204    
205 dpavlin 53 =head2 snippet
206    
207     my $short = $self->snippet( 50, $text );
208    
209     =cut
210    
211     sub snippet {
212     my $self = shift;
213    
214     my $len = shift or die "no len?";
215     my $m = join(" ", @_);
216    
217     $m =~ s/\s+/ /gs;
218    
219     if (length($m) > $len) {
220     return substr($m,0,$len) . '...';
221     } else {
222     return $m;
223     }
224     }
225    
226 dpavlin 127 package Grep::Search::KeywordField;
227     use base qw( KinoSearch::Schema::FieldSpec );
228     sub analyzed { 0 }
229    
230     package Grep::Search::Schema;
231    
232     =head1 NAME
233    
234 dpavlin 136 Grep::Search::Schema - schema definition for full-text search
235 dpavlin 127
236     =cut
237    
238     use base 'KinoSearch::Schema';
239     use KinoSearch::Analysis::PolyAnalyzer;
240    
241 dpavlin 131 our %fields = (
242 dpavlin 127 id => 'Grep::Search::KeywordField',
243    
244     in_feed_id => 'Grep::Search::KeywordField',
245     in_feed_url => 'Grep::Search::KeywordField',
246     in_feed_title => 'KinoSearch::Schema::FieldSpec',
247     in_feed_owner => 'Grep::Search::KeywordField',
248     in_feed_created_on => 'Grep::Search::KeywordField',
249    
250     title => 'KinoSearch::Schema::FieldSpec',
251     link => 'Grep::Search::KeywordField',
252     content => 'KinoSearch::Schema::FieldSpec',
253     summary => 'KinoSearch::Schema::FieldSpec',
254     category => 'KinoSearch::Schema::FieldSpec',
255     author => 'KinoSearch::Schema::FieldSpec',
256     issued => 'Grep::Search::KeywordField',
257     modified => 'Grep::Search::KeywordField',
258    
259     _owner_id => 'Grep::Search::KeywordField',
260     );
261    
262     sub analyzer {
263     return KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' );
264     }
265    
266 dpavlin 47 1;

  ViewVC Help
Powered by ViewVC 1.1.26