1 |
#!/usr/bin/perl -w |
#!/usr/bin/perl -w |
2 |
|
|
3 |
|
package MWS::SWISH; |
4 |
use strict; |
use strict; |
5 |
|
use warnings; |
6 |
|
|
7 |
# |
use MWS::Indexer; |
8 |
# simple implementation to use SWISH-e with queryies like |
our @ISA=qw(MWS::Indexer); |
9 |
# Lucene (subject:something) |
|
10 |
# |
our $VERSION = '1.00'; |
11 |
|
|
12 |
use SWISH::API; |
use SWISH::API; |
13 |
use Text::Iconv; |
use Text::Iconv; |
|
use Data::Dumper; |
|
14 |
use File::Temp qw/ :mktemp /; |
use File::Temp qw/ :mktemp /; |
15 |
use Text::Soundex; |
use Text::Soundex; |
16 |
|
use Carp; |
17 |
|
|
18 |
my $iso2utf = Text::Iconv->new('ISO-8859-2','UTF-8'); |
my $iso2utf = Text::Iconv->new('ISO-8859-2','UTF-8'); |
19 |
my $utf2iso = Text::Iconv->new('UTF-8','ISO-8859-2'); |
my $utf2iso = Text::Iconv->new('UTF-8','ISO-8859-2'); |
20 |
|
|
21 |
|
=head1 NAME |
22 |
|
|
23 |
|
MWS::SWISH - index your data using swish-e |
24 |
|
|
25 |
|
=head1 DESCRIPTION |
26 |
|
|
27 |
|
This is simple implementation to use SWISH-e with queryies like |
28 |
|
Lucene (subject:something) |
29 |
|
|
30 |
|
=head1 METHODS |
31 |
|
|
32 |
|
=head2 open_index |
33 |
|
|
34 |
|
This will open index in directory C<index_dir> in configuration file |
35 |
|
with name which is same as name of configuration. |
36 |
|
|
37 |
|
my $index = $self->open_index; |
38 |
|
|
39 |
|
It saves index handle into $self->{index} and returns it. |
40 |
|
|
41 |
|
=cut |
42 |
|
|
43 |
sub open_index { |
sub open_index { |
44 |
my $self = shift; |
my $self = shift; |
45 |
|
|
58 |
return $swish; |
return $swish; |
59 |
} |
} |
60 |
|
|
61 |
|
=head2 search_index |
62 |
|
|
63 |
|
Takes array of terms and operators like this: |
64 |
|
|
65 |
|
my @results = $self->search_index('field:search'); |
66 |
|
my @results = $self->search_index('fld1:word1', 'and', 'fld2:word2' ...); |
67 |
|
|
68 |
|
It returns array of hases with results. |
69 |
|
|
70 |
|
=cut |
71 |
|
|
72 |
sub search_index { |
sub search_index { |
73 |
my $self = shift; |
my $self = shift; |
74 |
|
|
124 |
|
|
125 |
sub p($$) { |
sub p($$) { |
126 |
my ($r,$prop) = @_; |
my ($r,$prop) = @_; |
127 |
$prop = $r->Property($prop); |
$prop = $r->Property($prop) || return; |
128 |
$prop =~ s/##lf##/\n/gs; |
$prop =~ s/##lf##/\n/gs; |
129 |
return $utf2iso->convert($prop); |
return $utf2iso->convert($prop); |
130 |
} |
} |
134 |
|
|
135 |
foreach my $p (qw(from to cc bcc)) { |
foreach my $p (qw(from to cc bcc)) { |
136 |
@{$self->{cache}->{$id}->{$p}} = (); |
@{$self->{cache}->{$id}->{$p}} = (); |
137 |
foreach my $v (split(/##/, p($r,$p.'_phrase'))) { |
my $props = p($r,$p.'_phrase') || last; |
138 |
|
foreach my $v (split(/##/, $props)) { |
139 |
push @{$self->{cache}->{$id}->{$p}}, $v; |
push @{$self->{cache}->{$id}->{$p}}, $v; |
140 |
$self->add_counter($p,$v); |
$self->add_counter($p,$v); |
141 |
} |
} |
156 |
return @res_ids; |
return @res_ids; |
157 |
} |
} |
158 |
|
|
159 |
# this function can be null for indexes which doesn't need special |
=head2 create_index |
160 |
# setup before add_index is called. however, swish-e support will |
|
161 |
# fork swish binary to create index at this point |
This function can be null for indexes which doesn't need special |
162 |
|
setup before add_index is called. however, swish-e support will |
163 |
|
fork swish binary to create index at this point. |
164 |
|
|
165 |
|
=cut |
166 |
|
|
167 |
sub create_index { |
sub create_index { |
168 |
my $self = shift; |
my $self = shift; |
169 |
|
|
222 |
|
|
223 |
} |
} |
224 |
|
|
225 |
|
=head2 add_index |
226 |
|
|
227 |
|
This function will add document to index. |
228 |
|
|
229 |
|
$self->add_index('mailbox messageid', $document); |
230 |
|
|
231 |
|
=cut |
232 |
|
|
233 |
sub add_index { |
sub add_index { |
234 |
my $self = shift; |
my $self = shift; |
235 |
|
|
257 |
|
|
258 |
} |
} |
259 |
|
|
260 |
|
=head2 close_index |
261 |
|
|
262 |
|
Close index at end (dummy for swish-e). |
263 |
|
|
264 |
|
=cut |
265 |
|
|
266 |
sub close_index { |
sub close_index { |
267 |
my $self = shift; |
my $self = shift; |
268 |
|
|
269 |
} |
} |
270 |
|
|
271 |
# this is optional function which return words which sound like |
=head2 apropos_index |
272 |
|
|
273 |
|
This method is optional (it can return undef) and it returns words which |
274 |
|
sound like word specified. |
275 |
|
|
276 |
|
my @words = $self->apropos_index('word') |
277 |
|
|
278 |
|
This implementation uses L<Text::Soundex>. |
279 |
|
|
280 |
|
=cut |
281 |
|
|
282 |
sub apropos_index { |
sub apropos_index { |
283 |
my $self = shift; |
my $self = shift; |
284 |
|
|