/[mws]/trunk/MWS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/MWS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 19 - (show annotations)
Fri May 7 20:52:34 2004 UTC (19 years, 11 months ago) by dpavlin
File size: 6602 byte(s)
added From, To, Cc lists with unique feature: it will count same names
in different order as, well, same name :-) It will also remove accented
characters when counting names, to detect false duplicates (using
Text::Unaccent)

1 #!/usr/bin/perl -w
2
3 package MWS;
4
5 use strict;
6 use warnings;
7 use Carp;
8
9 use Mail::Box::Manager;
10 use Config::IniFiles;
11 use POSIX qw(strftime);
12 use Text::Autoformat;
13 use Text::Iconv;
14 use Text::Unaccent;
15
16 #use MWS_plucene;
17 use MWS_swish;
18
19 require Exporter;
20
21 our @ISA = qw(Exporter);
22
23 our %EXPORT_TAGS = ();
24 our @EXPORT_OK;
25 our @EXPORT;
26
27 our $VERSION = '1.00';
28
29 my $folder; # placeholder for folders
30
31 my $debug = 2;
32
33 sub new {
34 my $class = shift;
35 my $self = {};
36 bless($self, $class);
37
38 my $config_file = shift || die "need index file";
39
40 $self->{config} = new Config::IniFiles( -file => $config_file );
41
42 my $index_file = $self->{config}->val('global', 'index') || croak "can't find [index] section in config file with path of index";
43
44 $self->{mgr} = Mail::Box::Manager->new(access => 'r');
45 $self->{index_file} = $index_file;
46
47 # placeholder for opened folders
48 $self->{folder} = {};
49
50 $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin');
51 $self->{max_results} = $self->{config}->val('global', 'max_results') || 100;
52 $self->reset_counters;
53
54 return $self;
55 }
56
57 sub normalize_string {
58 my $self = shift;
59
60 my $v = shift || return;
61
62 $v = unac_string('ISO-8859-2', $v);
63 $v = join('',sort split(/\s+/,$v));
64 $v =~ s/\W+//g;
65
66 return $v;
67 }
68
69 # reset tables for search results
70 sub reset_counters {
71 my $self = shift;
72
73 $self->{counter} = {};
74
75 # foreach my $c (qw(thread from to cc bcc lists links att)) {
76 # $self->{counter}->{$c} = {};
77 # }
78
79 }
80
81 sub add_counter($$) {
82 my $self = shift;
83
84 my ($c,$v) = @_;
85 my $k = $self->normalize_string($v);
86
87 $self->{counter}->{$c}->{$k}->{name} = $v;
88 return $self->{counter}->{$c}->{$k}->{usage}++;
89 }
90
91 sub counter {
92 my $self = shift;
93
94 my $c = shift || return;
95
96 return if (! $self->{counter}->{$c});
97
98 return $self->{counter}->{$c};
99 }
100
101 sub mbox_name2path {
102 my $self = shift;
103
104 my $mbox = shift || croak "folder_name2path needs mbox name";
105
106 return $self->{config}->val('folders', $mbox) || croak "comeone removed folder $mbox from config?";
107 }
108
109 sub open_folder {
110 my $self = shift;
111
112 my $mbox = shift || croak "open_folder needs mbox name";
113
114 if (! $self->{folder}->{$mbox}) {
115 my $mbox_path = $self->mbox_name2path($mbox);
116
117 print STDERR "about to open_folder($mbox)\n" if ($debug == 2);
118
119 $self->{folder}->{$mbox} = $self->{mgr}->open($mbox_path) || croak "can't open folder $mbox at '$mbox_path': $!";
120
121 print STDERR "open_folder($mbox) ok\n" if ($debug);
122 }
123
124 $self->{fetch_count} = 0;
125
126 return $self->{folder}->{$mbox};
127
128 }
129
130 sub close_folder {
131 my $self = shift;
132
133 my $mbox = shift || croak "open_folder needs mbox name";
134
135 $self->{folder}->{$mbox}->close(write => 'NEVER') || croak "can't close folder $mbox";
136
137 # XXX this is rather agressive!!!
138 $self->{folder} = {};
139 return
140 }
141
142 sub fetch_message {
143 my $self = shift;
144
145 my $mbox_id = shift || die "need mbox_id!";
146 my ($mbox,$id) = split(/ /,$mbox_id);
147
148 # return message with ID
149 print STDERR "fetch $id from $mbox\n" if ($debug);
150
151 if ($self->{fetch_count}++ > 100) {
152 $self->close_folder($mbox);
153 print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n";
154 }
155
156 my $msg = $self->open_folder($mbox)->find($id);
157 if ($msg) {
158 return $msg;
159 } else {
160 print STDERR "can't find message $id in $mbox. Time to re-index?\n";
161 return;
162 }
163 }
164
165
166 sub search {
167 my $self = shift;
168
169 my $s = shift || carp "search called without argument!";
170
171 $self->reset_counters;
172
173 print STDERR "search_index($s)\n" if ($debug == 2);
174 my @index_ids = $self->search_index($s);
175
176 $self->{'index_ids'} = \@index_ids;
177
178 #my $results = $#index_ids + 1;
179 #$self->{'results'} = $results;
180
181 my $results = $self->{'total_hits'} || ($#index_ids + 1);
182
183 $self->{'curr_result'} = 0;
184
185 print STDERR "$results results\n" if ($debug == 2);
186
187 return $results || 'error';
188 }
189
190 sub decode_qp($) {
191 my $self = shift;
192
193 my $tmp = shift || return;
194
195 sub decode($$) {
196 my ($cp,$qp) = @_;
197 my $iconv = Text::Iconv->new($cp,'ISO-8859-2');
198 print STDERR "decode($cp,$qp) -> " if ($debug == 2);
199 $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg;
200 $qp =~ s/_/ /g;
201 print STDERR "$qp\n" if ($debug == 2);
202 return $iconv->convert($qp) || $qp;
203 }
204
205 $tmp =~ s/=\?([^\?]+)\?Q\?(.+?)\?=/decode($1,$2)/ex;
206 $tmp =~ s/^\s*["']+(.*?)["']+\s*$/$1/g;
207 return $tmp;
208 }
209
210 sub unroll($$$) {
211 my $self = shift;
212
213 my ($message,$part,$sub) = @_;
214
215 my @arr;
216
217 return if (! $message->$part);
218
219 foreach my $from ($message->$part) {
220 my $tmp = $from->$sub || next;
221
222 $tmp = $self->decode_qp($tmp);
223 push @arr, $tmp;
224 }
225
226 return @arr;
227 }
228
229 sub fetch_all_results {
230 my $self = shift;
231
232 croak "results called before search!" if (! $self->{'index_ids'});
233
234 print STDERR "fetch_all_results [",scalar @{$self->{'index_ids'}},"]\n" if ($debug == 2);
235
236 my @arr;
237
238 foreach my $id (@{$self->{'index_ids'}}) {
239 push @arr, $self->fetch_result_by_id($id);
240 }
241
242
243 return @arr;
244 }
245
246 sub fetch_result {
247 my $self = shift;
248
249 my $args = {@_};
250
251 croak "results called before search!" if (! $self->{'index_ids'});
252
253 my $curr = $self->{'curr_result'}++;
254
255 my $id = $self->{'index_ids'}->[$curr];
256
257 print STDERR "fetch_result: $curr = $id\n" if ($debug == 2);
258
259 return $self->fetch_result_by_id($id);
260 }
261
262 sub plain_text_body {
263 my $self = shift;
264 my $message = shift || croak "plain_text_body needs message!";
265
266 my $body;
267
268 if (! $message->isMultipart) {
269 $body = $message->decoded->string;
270 } else {
271 foreach my $part ($message->parts) {
272 if ($part->body->mimeType eq 'text/plain') {
273 $body = $part->decoded->string;
274 last;
275 }
276 }
277 }
278
279 # reformat with Text::Autoformat
280 my $wrap = $self->{wrap_margin};
281 if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) {
282 $body =~ s/[\r\n]/\n/gs;
283 $body = autoformat($body, {right=>$wrap});
284 $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2);
285 }
286
287 return $body;
288 }
289
290
291 sub fetch_result_by_id {
292 my $self = shift;
293
294 my $id = shift || return;
295
296 my $row = $self->{cache}->{$id};
297
298 if (! $row) {
299
300 print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2);
301
302 my $message = $self->fetch_message($id) || return;
303
304 $row->{'id'} = $id;
305
306 foreach my $p (qw(from to cc bcc)) {
307 foreach my $v ($self->unroll($message,'from','phrase')) {
308 push @{$row->{$p}},$v;
309 $self->add_counter($p,$v);
310 }
311 }
312 $row->{'subject'} = $self->decode_qp($message->subject);
313 $row->{'body'} = $self->plain_text_body($message);
314 $row->{'date'} = $message->date;
315
316 # XXX store in cache?
317 $self->{cache}->{$id} = $row;
318 print STDERR "$id stored in cache\n" if ($debug == 2);
319 } else {
320 print STDERR "fetch_result_by_id($id) in cache\n" if ($debug == 2);
321 }
322
323 return $row;
324
325 }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26