/[mws]/trunk/MWS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/MWS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 27 - (show annotations)
Sat May 8 20:34:26 2004 UTC (19 years, 11 months ago) by dpavlin
File size: 8135 byte(s)
this is 0.9-rc1:
- scripts now accept configuration file as parametar, if none is specified,
  they will use global.conf in current directory
- more css design
- how to install using PAR
- mbox2index can now call swish-e by itself, swish-e configuration
  moved to MWS_swish.pm
- httpd server now shows 30 newest messages in this year when accessed
  through root URL

1 #!/usr/bin/perl -w
2
3 package MWS;
4
5 use strict;
6 use warnings;
7 use Carp;
8
9 use Mail::Box::Manager;
10 use Config::IniFiles;
11 use POSIX qw(strftime);
12 use Text::Autoformat;
13 use Text::Iconv;
14 use Text::Unaccent;
15 use Date::Parse;
16 use POSIX qw(strftime);
17 use MIME::Base64;
18
19 #use MWS_plucene;
20 use MWS_swish;
21
22 require Exporter;
23
24 our @ISA = qw(Exporter);
25
26 our %EXPORT_TAGS = ();
27 our @EXPORT_OK;
28 our @EXPORT;
29
30 our $VERSION = '1.00';
31
32 my $folder; # placeholder for folders
33
34 my $debug = 2;
35
36 sub new {
37 my $class = shift;
38 my $self = {};
39 bless($self, $class);
40
41 my $config_file = shift || die "need index file";
42
43 $self->{config} = new Config::IniFiles( -file => $config_file );
44
45 $self->{config_file} = $config_file;
46 $config_file =~ s/\.conf.*$//;
47 $self->{config_name} = $config_file;
48
49 my $index_dir = $self->{config}->val('global', 'index') || croak "can't find [index] section in config file with path of index";
50
51 $self->{mgr} = Mail::Box::Manager->new(access => 'r');
52 $self->{index_dir} = $index_dir;
53
54 # placeholder for opened folders
55 $self->{folder} = {};
56
57 $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin');
58 $self->{max_results} = $self->{config}->val('global', 'max_results') || 100;
59 $self->reset_counters;
60
61 return $self;
62 }
63
64 sub normalize_string {
65 my $self = shift;
66
67 my $v = shift || return;
68
69 $v = unac_string('ISO-8859-2', $v);
70 $v = join('',sort split(/\s+/,$v));
71 $v =~ s/\W+//g;
72
73 return lc($v);
74 }
75
76 # reset tables for search results
77 sub reset_counters {
78 my $self = shift;
79
80 $self->{counter} = {};
81
82 # foreach my $c (qw(thread from to cc bcc lists links att)) {
83 # $self->{counter}->{$c} = {};
84 # }
85
86 }
87
88 sub add_counter($$) {
89 my $self = shift;
90
91 my ($c,$v) = @_;
92 my $k = $self->normalize_string($v);
93
94 $self->{counter}->{$c}->{$k}->{name} = $v;
95 return $self->{counter}->{$c}->{$k}->{usage}++;
96 }
97
98 sub yyyymmdd {
99 my $self = shift;
100
101 my $t = shift || time;
102
103 my (undef,undef,undef,$dd,$mm,$yyyy) = localtime($t);
104 $mm++;
105 $yyyy+=1900;
106 return ($yyyy,$mm,$dd);
107 }
108
109 sub fmtdate {
110 my $self = shift;
111
112 my @out;
113 my @formats = qw(%04d %02d %02d);
114 while (my $v = shift) {
115 my $f = shift @formats;
116 push @out, sprintf($f, $v);
117 }
118
119 print STDERR "fmtdate: ",join('|',@out),"\n";
120
121 return (wantarray ? @out : join("-",@out));
122 }
123
124 sub add_counter_calendar($) {
125 my $self = shift;
126
127 my $t = shift || croak "add_counter_calendar without argument!";
128
129 my ($yyyy,$mm,$dd) = $self->fmtdate($self->yyyymmdd($t));
130
131 return $self->{counter}->{calendar}->{"$yyyy-$mm"}->{$dd}++;
132 }
133
134
135 sub counter {
136 my $self = shift;
137
138 my $c = shift || return;
139
140 return if (! $self->{counter}->{$c});
141
142 return $self->{counter}->{$c};
143 }
144
145 sub mbox_name2path {
146 my $self = shift;
147
148 my $mbox = shift || croak "folder_name2path needs mbox name";
149
150 return $self->{config}->val('folders', $mbox) || croak "comeone removed folder $mbox from config?";
151 }
152
153 sub open_folder {
154 my $self = shift;
155
156 my $mbox = shift || croak "open_folder needs mbox name";
157
158 if (! $self->{folder}->{$mbox}) {
159 my $mbox_path = $self->mbox_name2path($mbox);
160
161 print STDERR "about to open_folder($mbox)\n" if ($debug == 2);
162
163 $self->{folder}->{$mbox} = $self->{mgr}->open($mbox_path) || croak "can't open folder $mbox at '$mbox_path': $!";
164
165 print STDERR "open_folder($mbox) ok\n" if ($debug);
166 }
167
168 $self->{fetch_count} = 0;
169
170 return $self->{folder}->{$mbox};
171
172 }
173
174 sub close_folder {
175 my $self = shift;
176
177 my $mbox = shift || croak "open_folder needs mbox name";
178
179 $self->{folder}->{$mbox}->close(write => 'NEVER') || croak "can't close folder $mbox";
180
181 # XXX this is rather agressive!!!
182 $self->{folder} = {};
183 return
184 }
185
186 sub fetch_message {
187 my $self = shift;
188
189 my $mbox_id = shift || die "need mbox_id!";
190 my ($mbox,$id) = split(/ /,$mbox_id);
191
192 # return message with ID
193 print STDERR "fetch $id from $mbox\n" if ($debug);
194
195 if ($self->{fetch_count}++ > 100) {
196 $self->close_folder($mbox);
197 print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n";
198 }
199
200 my $msg = $self->open_folder($mbox)->find($id);
201 if ($msg) {
202 return $msg;
203 } else {
204 print STDERR "can't find message $id in $mbox. Time to re-index?\n";
205 return;
206 }
207 }
208
209
210 sub search {
211 my $self = shift;
212
213 carp "search called without argument!" if (! @_);
214
215 $self->reset_counters;
216
217 print STDERR "search(",join(" ",@_),")\n" if ($debug == 2);
218 my @index_ids = $self->search_index(@_);
219
220 $self->{'index_ids'} = \@index_ids;
221
222 #my $results = $#index_ids + 1;
223 #$self->{'results'} = $results;
224
225 my $results = $self->{'total_hits'} || ($#index_ids + 1);
226
227 $self->{'curr_result'} = 0;
228
229 $self->reset_counters;
230
231 print STDERR "$results results\n" if ($debug == 2);
232
233 return $results || 'error';
234 }
235
236 sub decode_qp($) {
237 my $self = shift;
238
239 my $tmp = shift || return;
240
241 sub decode($$$) {
242 my ($cp,$enc,$qp) = @_;
243
244 print STDERR "decode($cp,$qp) -> " if ($debug == 2);
245
246 if (uc($enc) eq "Q") {
247 $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg;
248 $qp =~ s/_/ /g;
249 } elsif (uc($enc) eq "B") {
250 $qp = decode_base64($qp);
251 } else {
252 croak "unsupported encoding '$enc' in decode_qp\n";
253 return $qp;
254 }
255
256 print STDERR "$qp\n" if ($debug == 2);
257
258 my $iconv = Text::Iconv->new($cp,'ISO-8859-2');
259 return $iconv->convert($qp) || '';
260 }
261
262 $tmp =~ s/=\?([^\?]+)\?([QB])\?(.+?)\?=/decode($1,$2,$3)/ige;
263 $tmp =~ s/^\s*["']+(.*?)["']+\s*$/$1/g;
264 #print STDERR "$tmp\n" if ($debug == 2);
265 return $tmp;
266 }
267
268 sub unroll($$$) {
269 my $self = shift;
270
271 my ($message,$part,$sub) = @_;
272
273 my @arr;
274
275 return if (! $message->$part);
276
277 foreach my $from ($message->$part) {
278 my $tmp = $from->$sub || next;
279
280 $tmp = $self->decode_qp($tmp);
281 push @arr, $tmp;
282 }
283
284 return @arr;
285 }
286
287 sub fetch_all_results {
288 my $self = shift;
289
290 croak "results called before search!" if (! $self->{'index_ids'});
291
292 print STDERR "fetch_all_results [",scalar @{$self->{'index_ids'}},"]\n" if ($debug == 2);
293
294 my @arr;
295
296 foreach my $id (@{$self->{'index_ids'}}) {
297 push @arr, $self->fetch_result_by_id($id);
298 }
299
300
301 return @arr;
302 }
303
304 sub fetch_result {
305 my $self = shift;
306
307 my $args = {@_};
308
309 croak "results called before search!" if (! $self->{'index_ids'});
310
311 my $curr = $self->{'curr_result'}++;
312
313 my $id = $self->{'index_ids'}->[$curr];
314
315 print STDERR "fetch_result: $curr = $id\n" if ($debug == 2);
316
317 return $self->fetch_result_by_id($id);
318 }
319
320 sub plain_text_body {
321 my $self = shift;
322 my $message = shift || croak "plain_text_body needs message!";
323
324 my $body;
325
326 if (! $message->isMultipart) {
327 $body = $message->decoded->string;
328 } else {
329 foreach my $part ($message->parts) {
330 if ($part->body->mimeType eq 'text/plain') {
331 $body = $part->decoded->string;
332 last;
333 }
334 }
335 }
336
337 if (! $body) {
338 $body = "[plain/text body not found]" if ($debug == 2);
339 print STDERR "plain/text body not found\n" if ($debug);
340 return;
341 }
342
343 # reformat with Text::Autoformat
344 my $wrap = $self->{wrap_margin};
345 if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) {
346 $body = autoformat($body, {right=>$wrap, all=>1});
347 $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2);
348 }
349
350 return $body;
351 }
352
353
354 sub fetch_result_by_id {
355 my $self = shift;
356
357 my $id = shift || return;
358
359 my $row = $self->{cache}->{$id};
360
361 if (! $row) {
362
363 print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2);
364
365 my $message = $self->fetch_message($id) || return;
366
367 $row->{'id'} = $id;
368
369 foreach my $p (qw(from to cc bcc)) {
370 foreach my $v ($self->unroll($message,$p,'phrase')) {
371 push @{$row->{$p}},$v;
372 $self->add_counter($p,$v);
373 }
374 }
375 $row->{'subject'} = $self->decode_qp($message->subject);
376 $row->{'body'} = $self->plain_text_body($message);
377 my $utime = str2time($message->date);
378
379 $row->{'date_utime'} = $utime;
380
381 $row->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime));
382 $self->add_counter_calendar($utime);
383
384 # XXX store in cache?
385 $self->{cache}->{$id} = $row;
386 print STDERR "$id stored in cache\n" if ($debug == 2);
387 } else {
388 print STDERR "fetch_result_by_id($id) in cache\n" if ($debug == 2);
389 foreach my $p (qw(from to cc bcc)) {
390 foreach my $v (@{$row->{$p}}) {
391 $self->add_counter($p,$v);
392 }
393 }
394
395 $self->add_counter_calendar($row->{date_utime});
396 }
397
398 return $row;
399
400 }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26