1 |
dpavlin |
12 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
|
|
use strict; |
4 |
|
|
|
5 |
|
|
# |
6 |
|
|
# simple implementation to use SWISH-e with queryies like |
7 |
|
|
# Lucene (subject:something) |
8 |
|
|
# |
9 |
|
|
|
10 |
|
|
use SWISH::API; |
11 |
dpavlin |
14 |
use Text::Iconv; |
12 |
|
|
use Data::Dumper; |
13 |
dpavlin |
12 |
|
14 |
dpavlin |
14 |
my $iso2utf = Text::Iconv->new('ISO-8859-2','UTF-8'); |
15 |
|
|
my $utf2iso = Text::Iconv->new('UTF-8','ISO-8859-2'); |
16 |
|
|
|
17 |
dpavlin |
12 |
sub open_index { |
18 |
|
|
my $self = shift; |
19 |
|
|
|
20 |
|
|
my $swish = $self->{index}; |
21 |
|
|
|
22 |
|
|
if (! $swish) { |
23 |
|
|
|
24 |
|
|
my $index_file = $self->{index_file} || croak "open_index needs index filename"; |
25 |
|
|
$index_file .= "/swish-e"; |
26 |
|
|
print STDERR "opening index '$index_file'\n"; |
27 |
|
|
$swish = SWISH::API->new($index_file); |
28 |
|
|
$swish->AbortLastError if $swish->Error; |
29 |
|
|
|
30 |
|
|
$self->{index} = $swish; |
31 |
|
|
} |
32 |
|
|
|
33 |
|
|
return $swish; |
34 |
|
|
} |
35 |
|
|
|
36 |
|
|
sub search_index { |
37 |
|
|
my $self = shift; |
38 |
|
|
|
39 |
|
|
my $s = shift || croak "search_index needs query"; |
40 |
|
|
|
41 |
|
|
my $index = $self->open_index; |
42 |
|
|
|
43 |
|
|
if ($s =~ /:/) { |
44 |
|
|
my ($fld,$val) = split(/:/,$s,2); |
45 |
|
|
$s = "$fld=($val)"; |
46 |
|
|
} |
47 |
|
|
|
48 |
|
|
print STDERR "swish search: $s\n"; |
49 |
dpavlin |
18 |
|
50 |
|
|
# convert to UTF-8 |
51 |
|
|
$s = $iso2utf->convert($s) || $s; |
52 |
dpavlin |
12 |
my $results = $index->Query($s); |
53 |
|
|
|
54 |
dpavlin |
17 |
# store total number of hits |
55 |
|
|
$self->{'total_hits'} = $results->Hits; |
56 |
|
|
|
57 |
dpavlin |
14 |
my @res_ids; |
58 |
dpavlin |
12 |
|
59 |
dpavlin |
18 |
my $count = 1; |
60 |
dpavlin |
17 |
|
61 |
dpavlin |
14 |
while ( my $r = $results->NextResult ) { |
62 |
dpavlin |
13 |
|
63 |
dpavlin |
14 |
sub p($$) { |
64 |
|
|
my ($r,$prop) = @_; |
65 |
|
|
$prop = $r->Property($prop); |
66 |
|
|
$prop =~ s/##lf##/\n/gs; |
67 |
|
|
return $utf2iso->convert($prop); |
68 |
|
|
} |
69 |
dpavlin |
13 |
|
70 |
dpavlin |
14 |
my $id = p($r,"swishdocpath"); |
71 |
|
|
push @res_ids, $id; |
72 |
|
|
|
73 |
dpavlin |
13 |
foreach my $p (qw(from to cc bcc)) { |
74 |
dpavlin |
19 |
@{$self->{cache}->{$id}->{$p}} = (); |
75 |
|
|
foreach my $v (split(/##/, p($r,$p.'_phrase'))) { |
76 |
|
|
push @{$self->{cache}->{$id}->{$p}}, $v; |
77 |
|
|
$self->add_counter($p,$v); |
78 |
|
|
} |
79 |
dpavlin |
13 |
} |
80 |
|
|
|
81 |
|
|
foreach my $p (qw(subject body date)) { |
82 |
dpavlin |
14 |
$self->{cache}->{$id}->{$p} = p($r,$p); |
83 |
dpavlin |
13 |
} |
84 |
|
|
|
85 |
dpavlin |
20 |
$self->add_counter_calendar(p($r,'date_utime')); |
86 |
|
|
|
87 |
dpavlin |
13 |
# this is redundant, but needed for templates later... |
88 |
|
|
$self->{cache}->{$id}->{'id'} = $id; |
89 |
dpavlin |
17 |
|
90 |
dpavlin |
18 |
last if (++$count > $self->{max_results}); |
91 |
dpavlin |
12 |
} |
92 |
|
|
|
93 |
dpavlin |
14 |
return @res_ids; |
94 |
dpavlin |
12 |
} |
95 |
|
|
|
96 |
|
|
sub add_index { |
97 |
|
|
my $self = shift; |
98 |
dpavlin |
14 |
|
99 |
|
|
my $mbox_id = shift || croak "add_index needs mbox_id"; |
100 |
|
|
my $document = shift || croak "add_index needs document"; |
101 |
|
|
|
102 |
|
|
my ($mbox,$id) = split(/\s/,$mbox_id,2); |
103 |
|
|
|
104 |
|
|
my $xml = qq{<message>}; |
105 |
|
|
foreach my $tag (keys %$document) { |
106 |
|
|
my $data = $document->{$tag}; |
107 |
dpavlin |
16 |
next if (! $data || $data eq ''); |
108 |
dpavlin |
14 |
# save [cr/]lf before conversion to XML |
109 |
|
|
$data =~ s/\n\r/##lf##/gs; |
110 |
|
|
$data =~ s/\n/##lf##/gs; |
111 |
dpavlin |
16 |
$xml .= "<$tag><![CDATA[".$data."]]></$tag>\n"; |
112 |
dpavlin |
14 |
} |
113 |
|
|
$xml .= qq{</message>}; |
114 |
|
|
|
115 |
|
|
$xml = $iso2utf->convert($xml); |
116 |
|
|
use bytes; # as opposed to chars |
117 |
|
|
print "Path-Name: $mbox $id\n"; |
118 |
|
|
print "Content-Length: ".(length($xml)+1)."\n"; |
119 |
|
|
print "Document-Type: XML\n\n$xml\n"; |
120 |
|
|
|
121 |
dpavlin |
12 |
} |
122 |
|
|
|
123 |
|
|
sub close_index { |
124 |
|
|
my $self = shift; |
125 |
|
|
|
126 |
|
|
} |
127 |
|
|
|
128 |
|
|
1; |