/[mws]/trunk/MWS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/MWS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 23 - (show annotations)
Sat May 8 02:21:50 2004 UTC (19 years, 11 months ago) by dpavlin
File size: 7910 byte(s)
misc fixes and improvements -- this should be another all-working version

1 #!/usr/bin/perl -w
2
3 package MWS;
4
5 use strict;
6 use warnings;
7 use Carp;
8
9 use Mail::Box::Manager;
10 use Config::IniFiles;
11 use POSIX qw(strftime);
12 use Text::Autoformat;
13 use Text::Iconv;
14 use Text::Unaccent;
15 use Date::Parse;
16 use POSIX qw(strftime);
17 use MIME::Base64;
18
19 #use MWS_plucene;
20 use MWS_swish;
21
22 require Exporter;
23
24 our @ISA = qw(Exporter);
25
26 our %EXPORT_TAGS = ();
27 our @EXPORT_OK;
28 our @EXPORT;
29
30 our $VERSION = '1.00';
31
32 my $folder; # placeholder for folders
33
34 my $debug = 2;
35
36 sub new {
37 my $class = shift;
38 my $self = {};
39 bless($self, $class);
40
41 my $config_file = shift || die "need index file";
42
43 $self->{config} = new Config::IniFiles( -file => $config_file );
44
45 my $index_file = $self->{config}->val('global', 'index') || croak "can't find [index] section in config file with path of index";
46
47 $self->{mgr} = Mail::Box::Manager->new(access => 'r');
48 $self->{index_file} = $index_file;
49
50 # placeholder for opened folders
51 $self->{folder} = {};
52
53 $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin');
54 $self->{max_results} = $self->{config}->val('global', 'max_results') || 100;
55 $self->reset_counters;
56
57 return $self;
58 }
59
60 sub normalize_string {
61 my $self = shift;
62
63 my $v = shift || return;
64
65 $v = unac_string('ISO-8859-2', $v);
66 $v = join('',sort split(/\s+/,$v));
67 $v =~ s/\W+//g;
68
69 return lc($v);
70 }
71
72 # reset tables for search results
73 sub reset_counters {
74 my $self = shift;
75
76 $self->{counter} = {};
77
78 # foreach my $c (qw(thread from to cc bcc lists links att)) {
79 # $self->{counter}->{$c} = {};
80 # }
81
82 }
83
84 sub add_counter($$) {
85 my $self = shift;
86
87 my ($c,$v) = @_;
88 my $k = $self->normalize_string($v);
89
90 $self->{counter}->{$c}->{$k}->{name} = $v;
91 return $self->{counter}->{$c}->{$k}->{usage}++;
92 }
93
94 sub yyyymmdd {
95 my $self = shift;
96
97 my $t = shift || time;
98
99 my (undef,undef,undef,$dd,$mm,$yyyy) = localtime($t);
100 $mm++;
101 $yyyy+=1900;
102 return ($yyyy,$mm,$dd);
103 }
104
105 sub fmtdate {
106 my $self = shift;
107
108 my @out;
109 my @formats = qw(%04d %02d %02d);
110 while (my $v = shift) {
111 my $f = shift @formats;
112 push @out, sprintf($f, $v);
113 }
114
115 print STDERR "fmtdate: ",join('|',@out),"\n";
116
117 return (wantarray ? @out : join("-",@out));
118 }
119
120 sub add_counter_calendar($) {
121 my $self = shift;
122
123 my $t = shift || croak "add_counter_calendar without argument!";
124
125 my ($yyyy,$mm,$dd) = $self->fmtdate($self->yyyymmdd($t));
126
127 return $self->{counter}->{calendar}->{"$yyyy-$mm"}->{$dd}++;
128 }
129
130
131 sub counter {
132 my $self = shift;
133
134 my $c = shift || return;
135
136 return if (! $self->{counter}->{$c});
137
138 return $self->{counter}->{$c};
139 }
140
141 sub mbox_name2path {
142 my $self = shift;
143
144 my $mbox = shift || croak "folder_name2path needs mbox name";
145
146 return $self->{config}->val('folders', $mbox) || croak "comeone removed folder $mbox from config?";
147 }
148
149 sub open_folder {
150 my $self = shift;
151
152 my $mbox = shift || croak "open_folder needs mbox name";
153
154 if (! $self->{folder}->{$mbox}) {
155 my $mbox_path = $self->mbox_name2path($mbox);
156
157 print STDERR "about to open_folder($mbox)\n" if ($debug == 2);
158
159 $self->{folder}->{$mbox} = $self->{mgr}->open($mbox_path) || croak "can't open folder $mbox at '$mbox_path': $!";
160
161 print STDERR "open_folder($mbox) ok\n" if ($debug);
162 }
163
164 $self->{fetch_count} = 0;
165
166 return $self->{folder}->{$mbox};
167
168 }
169
170 sub close_folder {
171 my $self = shift;
172
173 my $mbox = shift || croak "open_folder needs mbox name";
174
175 $self->{folder}->{$mbox}->close(write => 'NEVER') || croak "can't close folder $mbox";
176
177 # XXX this is rather agressive!!!
178 $self->{folder} = {};
179 return
180 }
181
182 sub fetch_message {
183 my $self = shift;
184
185 my $mbox_id = shift || die "need mbox_id!";
186 my ($mbox,$id) = split(/ /,$mbox_id);
187
188 # return message with ID
189 print STDERR "fetch $id from $mbox\n" if ($debug);
190
191 if ($self->{fetch_count}++ > 100) {
192 $self->close_folder($mbox);
193 print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n";
194 }
195
196 my $msg = $self->open_folder($mbox)->find($id);
197 if ($msg) {
198 return $msg;
199 } else {
200 print STDERR "can't find message $id in $mbox. Time to re-index?\n";
201 return;
202 }
203 }
204
205
206 sub search {
207 my $self = shift;
208
209 carp "search called without argument!" if (! @_);
210
211 $self->reset_counters;
212
213 print STDERR "search(",join(" ",@_),")\n" if ($debug == 2);
214 my @index_ids = $self->search_index(@_);
215
216 $self->{'index_ids'} = \@index_ids;
217
218 #my $results = $#index_ids + 1;
219 #$self->{'results'} = $results;
220
221 my $results = $self->{'total_hits'} || ($#index_ids + 1);
222
223 $self->{'curr_result'} = 0;
224
225 $self->reset_counters;
226
227 print STDERR "$results results\n" if ($debug == 2);
228
229 return $results || 'error';
230 }
231
232 sub decode_qp($) {
233 my $self = shift;
234
235 my $tmp = shift || return;
236
237 sub decode($$$) {
238 my ($cp,$enc,$qp) = @_;
239
240 print STDERR "decode($cp,$qp) -> " if ($debug == 2);
241
242 if (uc($enc) eq "Q") {
243 $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg;
244 $qp =~ s/_/ /g;
245 } elsif (uc($enc) eq "B") {
246 $qp = decode_base64($qp);
247 } else {
248 croak "unsupported encoding '$enc' in decode_qp\n";
249 return $qp;
250 }
251
252 print STDERR "$qp\n" if ($debug == 2);
253
254 my $iconv = Text::Iconv->new($cp,'ISO-8859-2');
255 return $iconv->convert($qp) || '';
256 }
257
258 $tmp =~ s/=\?([^\?]+)\?([QB])\?(.+?)\?=/decode($1,$2,$3)/ige;
259 $tmp =~ s/^\s*["']+(.*?)["']+\s*$/$1/g;
260 #print STDERR "$tmp\n" if ($debug == 2);
261 return $tmp;
262 }
263
264 sub unroll($$$) {
265 my $self = shift;
266
267 my ($message,$part,$sub) = @_;
268
269 my @arr;
270
271 return if (! $message->$part);
272
273 foreach my $from ($message->$part) {
274 my $tmp = $from->$sub || next;
275
276 $tmp = $self->decode_qp($tmp);
277 push @arr, $tmp;
278 }
279
280 return @arr;
281 }
282
283 sub fetch_all_results {
284 my $self = shift;
285
286 croak "results called before search!" if (! $self->{'index_ids'});
287
288 print STDERR "fetch_all_results [",scalar @{$self->{'index_ids'}},"]\n" if ($debug == 2);
289
290 my @arr;
291
292 foreach my $id (@{$self->{'index_ids'}}) {
293 push @arr, $self->fetch_result_by_id($id);
294 }
295
296
297 return @arr;
298 }
299
300 sub fetch_result {
301 my $self = shift;
302
303 my $args = {@_};
304
305 croak "results called before search!" if (! $self->{'index_ids'});
306
307 my $curr = $self->{'curr_result'}++;
308
309 my $id = $self->{'index_ids'}->[$curr];
310
311 print STDERR "fetch_result: $curr = $id\n" if ($debug == 2);
312
313 return $self->fetch_result_by_id($id);
314 }
315
316 sub plain_text_body {
317 my $self = shift;
318 my $message = shift || croak "plain_text_body needs message!";
319
320 my $body;
321
322 if (! $message->isMultipart) {
323 $body = $message->decoded->string;
324 } else {
325 foreach my $part ($message->parts) {
326 if ($part->body->mimeType eq 'text/plain') {
327 $body = $part->decoded->string;
328 last;
329 }
330 }
331 }
332
333 # reformat with Text::Autoformat
334 my $wrap = $self->{wrap_margin};
335 if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) {
336 $body =~ s/[\r\n]/\n/gs;
337 $body = autoformat($body, {right=>$wrap, all=>1});
338 $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2);
339 }
340
341 return $body;
342 }
343
344
345 sub fetch_result_by_id {
346 my $self = shift;
347
348 my $id = shift || return;
349
350 my $row = $self->{cache}->{$id};
351
352 if (! $row) {
353
354 print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2);
355
356 my $message = $self->fetch_message($id) || return;
357
358 $row->{'id'} = $id;
359
360 foreach my $p (qw(from to cc bcc)) {
361 foreach my $v ($self->unroll($message,$p,'phrase')) {
362 push @{$row->{$p}},$v;
363 $self->add_counter($p,$v);
364 }
365 }
366 $row->{'subject'} = $self->decode_qp($message->subject);
367 $row->{'body'} = $self->plain_text_body($message);
368 my $utime = str2time($message->date);
369
370 $row->{'date_utime'} = $utime;
371
372 $row->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime));
373 $self->add_counter_calendar($utime);
374
375 # XXX store in cache?
376 $self->{cache}->{$id} = $row;
377 print STDERR "$id stored in cache\n" if ($debug == 2);
378 } else {
379 print STDERR "fetch_result_by_id($id) in cache\n" if ($debug == 2);
380 foreach my $p (qw(from to cc bcc)) {
381 foreach my $v (@{$row->{$p}}) {
382 $self->add_counter($p,$v);
383 }
384 }
385
386 $self->add_counter_calendar($row->{date_utime});
387 }
388
389 return $row;
390
391 }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26