/[mws]/trunk/MWS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/MWS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 27 - (hide annotations)
Sat May 8 20:34:26 2004 UTC (20 years ago) by dpavlin
File size: 8135 byte(s)
this is 0.9-rc1:
- scripts now accept configuration file as parametar, if none is specified,
  they will use global.conf in current directory
- more css design
- how to install using PAR
- mbox2index can now call swish-e by itself, swish-e configuration
  moved to MWS_swish.pm
- httpd server now shows 30 newest messages in this year when accessed
  through root URL

1 dpavlin 2 #!/usr/bin/perl -w
2    
3     package MWS;
4    
5     use strict;
6     use warnings;
7     use Carp;
8    
9 dpavlin 3 use Mail::Box::Manager;
10 dpavlin 4 use Config::IniFiles;
11 dpavlin 13 use POSIX qw(strftime);
12 dpavlin 14 use Text::Autoformat;
13 dpavlin 16 use Text::Iconv;
14 dpavlin 19 use Text::Unaccent;
15 dpavlin 20 use Date::Parse;
16     use POSIX qw(strftime);
17 dpavlin 21 use MIME::Base64;
18 dpavlin 14
19 dpavlin 12 #use MWS_plucene;
20     use MWS_swish;
21 dpavlin 3
22 dpavlin 2 require Exporter;
23    
24     our @ISA = qw(Exporter);
25    
26     our %EXPORT_TAGS = ();
27     our @EXPORT_OK;
28     our @EXPORT;
29    
30     our $VERSION = '1.00';
31    
32     my $folder; # placeholder for folders
33    
34 dpavlin 13 my $debug = 2;
35 dpavlin 2
36     sub new {
37     my $class = shift;
38     my $self = {};
39     bless($self, $class);
40    
41 dpavlin 4 my $config_file = shift || die "need index file";
42 dpavlin 2
43 dpavlin 4 $self->{config} = new Config::IniFiles( -file => $config_file );
44 dpavlin 2
45 dpavlin 27 $self->{config_file} = $config_file;
46     $config_file =~ s/\.conf.*$//;
47     $self->{config_name} = $config_file;
48 dpavlin 4
49 dpavlin 27 my $index_dir = $self->{config}->val('global', 'index') || croak "can't find [index] section in config file with path of index";
50    
51 dpavlin 12 $self->{mgr} = Mail::Box::Manager->new(access => 'r');
52 dpavlin 27 $self->{index_dir} = $index_dir;
53 dpavlin 4
54 dpavlin 2 # placeholder for opened folders
55     $self->{folder} = {};
56    
57 dpavlin 14 $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin');
58 dpavlin 17 $self->{max_results} = $self->{config}->val('global', 'max_results') || 100;
59 dpavlin 19 $self->reset_counters;
60 dpavlin 14
61 dpavlin 2 return $self;
62     }
63    
64 dpavlin 19 sub normalize_string {
65     my $self = shift;
66    
67     my $v = shift || return;
68    
69     $v = unac_string('ISO-8859-2', $v);
70     $v = join('',sort split(/\s+/,$v));
71     $v =~ s/\W+//g;
72    
73 dpavlin 23 return lc($v);
74 dpavlin 19 }
75    
76     # reset tables for search results
77     sub reset_counters {
78     my $self = shift;
79    
80     $self->{counter} = {};
81    
82     # foreach my $c (qw(thread from to cc bcc lists links att)) {
83     # $self->{counter}->{$c} = {};
84     # }
85    
86     }
87    
88     sub add_counter($$) {
89     my $self = shift;
90    
91     my ($c,$v) = @_;
92     my $k = $self->normalize_string($v);
93    
94     $self->{counter}->{$c}->{$k}->{name} = $v;
95     return $self->{counter}->{$c}->{$k}->{usage}++;
96     }
97    
98 dpavlin 20 sub yyyymmdd {
99     my $self = shift;
100    
101     my $t = shift || time;
102    
103     my (undef,undef,undef,$dd,$mm,$yyyy) = localtime($t);
104     $mm++;
105     $yyyy+=1900;
106     return ($yyyy,$mm,$dd);
107     }
108    
109     sub fmtdate {
110     my $self = shift;
111    
112     my @out;
113     my @formats = qw(%04d %02d %02d);
114     while (my $v = shift) {
115     my $f = shift @formats;
116     push @out, sprintf($f, $v);
117     }
118    
119     print STDERR "fmtdate: ",join('|',@out),"\n";
120    
121     return (wantarray ? @out : join("-",@out));
122     }
123    
124     sub add_counter_calendar($) {
125     my $self = shift;
126    
127     my $t = shift || croak "add_counter_calendar without argument!";
128    
129     my ($yyyy,$mm,$dd) = $self->fmtdate($self->yyyymmdd($t));
130    
131     return $self->{counter}->{calendar}->{"$yyyy-$mm"}->{$dd}++;
132     }
133    
134    
135 dpavlin 19 sub counter {
136     my $self = shift;
137    
138     my $c = shift || return;
139    
140     return if (! $self->{counter}->{$c});
141    
142     return $self->{counter}->{$c};
143     }
144    
145 dpavlin 14 sub mbox_name2path {
146     my $self = shift;
147    
148     my $mbox = shift || croak "folder_name2path needs mbox name";
149    
150     return $self->{config}->val('folders', $mbox) || croak "comeone removed folder $mbox from config?";
151     }
152    
153 dpavlin 7 sub open_folder {
154 dpavlin 2 my $self = shift;
155    
156 dpavlin 7 my $mbox = shift || croak "open_folder needs mbox name";
157 dpavlin 2
158     if (! $self->{folder}->{$mbox}) {
159 dpavlin 14 my $mbox_path = $self->mbox_name2path($mbox);
160    
161 dpavlin 13 print STDERR "about to open_folder($mbox)\n" if ($debug == 2);
162 dpavlin 14
163 dpavlin 4 $self->{folder}->{$mbox} = $self->{mgr}->open($mbox_path) || croak "can't open folder $mbox at '$mbox_path': $!";
164 dpavlin 14
165     print STDERR "open_folder($mbox) ok\n" if ($debug);
166 dpavlin 2 }
167    
168 dpavlin 16 $self->{fetch_count} = 0;
169    
170 dpavlin 7 return $self->{folder}->{$mbox};
171    
172     }
173    
174 dpavlin 14 sub close_folder {
175     my $self = shift;
176    
177     my $mbox = shift || croak "open_folder needs mbox name";
178    
179 dpavlin 16 $self->{folder}->{$mbox}->close(write => 'NEVER') || croak "can't close folder $mbox";
180    
181     # XXX this is rather agressive!!!
182     $self->{folder} = {};
183     return
184 dpavlin 14 }
185    
186 dpavlin 7 sub fetch_message {
187     my $self = shift;
188    
189     my $mbox_id = shift || die "need mbox_id!";
190     my ($mbox,$id) = split(/ /,$mbox_id);
191    
192     # return message with ID
193 dpavlin 13 print STDERR "fetch $id from $mbox\n" if ($debug);
194 dpavlin 16
195     if ($self->{fetch_count}++ > 100) {
196     $self->close_folder($mbox);
197     print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n";
198     }
199    
200 dpavlin 17 my $msg = $self->open_folder($mbox)->find($id);
201     if ($msg) {
202     return $msg;
203     } else {
204 dpavlin 2 print STDERR "can't find message $id in $mbox. Time to re-index?\n";
205 dpavlin 17 return;
206     }
207 dpavlin 2 }
208    
209    
210     sub search {
211     my $self = shift;
212    
213 dpavlin 21 carp "search called without argument!" if (! @_);
214 dpavlin 2
215 dpavlin 19 $self->reset_counters;
216    
217 dpavlin 21 print STDERR "search(",join(" ",@_),")\n" if ($debug == 2);
218     my @index_ids = $self->search_index(@_);
219 dpavlin 2
220     $self->{'index_ids'} = \@index_ids;
221    
222 dpavlin 17 #my $results = $#index_ids + 1;
223     #$self->{'results'} = $results;
224    
225     my $results = $self->{'total_hits'} || ($#index_ids + 1);
226 dpavlin 2
227     $self->{'curr_result'} = 0;
228    
229 dpavlin 23 $self->reset_counters;
230    
231 dpavlin 13 print STDERR "$results results\n" if ($debug == 2);
232    
233 dpavlin 2 return $results || 'error';
234     }
235    
236 dpavlin 16 sub decode_qp($) {
237     my $self = shift;
238    
239     my $tmp = shift || return;
240    
241 dpavlin 21 sub decode($$$) {
242     my ($cp,$enc,$qp) = @_;
243    
244     print STDERR "decode($cp,$qp) -> " if ($debug == 2);
245    
246     if (uc($enc) eq "Q") {
247     $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg;
248     $qp =~ s/_/ /g;
249     } elsif (uc($enc) eq "B") {
250     $qp = decode_base64($qp);
251     } else {
252     croak "unsupported encoding '$enc' in decode_qp\n";
253     return $qp;
254     }
255    
256 dpavlin 22 print STDERR "$qp\n" if ($debug == 2);
257 dpavlin 21
258 dpavlin 20 my $iconv = Text::Iconv->new($cp,'ISO-8859-2');
259     return $iconv->convert($qp) || '';
260 dpavlin 16 }
261    
262 dpavlin 21 $tmp =~ s/=\?([^\?]+)\?([QB])\?(.+?)\?=/decode($1,$2,$3)/ige;
263 dpavlin 17 $tmp =~ s/^\s*["']+(.*?)["']+\s*$/$1/g;
264 dpavlin 20 #print STDERR "$tmp\n" if ($debug == 2);
265 dpavlin 16 return $tmp;
266     }
267    
268 dpavlin 2 sub unroll($$$) {
269 dpavlin 13 my $self = shift;
270    
271 dpavlin 2 my ($message,$part,$sub) = @_;
272    
273     my @arr;
274    
275 dpavlin 17 return if (! $message->$part);
276    
277 dpavlin 2 foreach my $from ($message->$part) {
278 dpavlin 16 my $tmp = $from->$sub || next;
279    
280     $tmp = $self->decode_qp($tmp);
281     push @arr, $tmp;
282 dpavlin 2 }
283 dpavlin 14
284     return @arr;
285 dpavlin 2 }
286 dpavlin 4
287     sub fetch_all_results {
288     my $self = shift;
289    
290     croak "results called before search!" if (! $self->{'index_ids'});
291    
292 dpavlin 14 print STDERR "fetch_all_results [",scalar @{$self->{'index_ids'}},"]\n" if ($debug == 2);
293 dpavlin 13
294 dpavlin 4 my @arr;
295    
296     foreach my $id (@{$self->{'index_ids'}}) {
297     push @arr, $self->fetch_result_by_id($id);
298     }
299    
300 dpavlin 19
301 dpavlin 4 return @arr;
302     }
303    
304 dpavlin 2 sub fetch_result {
305     my $self = shift;
306    
307 dpavlin 3 my $args = {@_};
308    
309 dpavlin 2 croak "results called before search!" if (! $self->{'index_ids'});
310    
311     my $curr = $self->{'curr_result'}++;
312    
313     my $id = $self->{'index_ids'}->[$curr];
314 dpavlin 14
315     print STDERR "fetch_result: $curr = $id\n" if ($debug == 2);
316 dpavlin 2
317 dpavlin 4 return $self->fetch_result_by_id($id);
318     }
319 dpavlin 2
320 dpavlin 7 sub plain_text_body {
321     my $self = shift;
322     my $message = shift || croak "plain_text_body needs message!";
323    
324 dpavlin 14 my $body;
325    
326 dpavlin 7 if (! $message->isMultipart) {
327 dpavlin 14 $body = $message->decoded->string;
328 dpavlin 7 } else {
329     foreach my $part ($message->parts) {
330     if ($part->body->mimeType eq 'text/plain') {
331 dpavlin 14 $body = $part->decoded->string;
332     last;
333 dpavlin 7 }
334     }
335     }
336 dpavlin 14
337 dpavlin 25 if (! $body) {
338 dpavlin 26 $body = "[plain/text body not found]" if ($debug == 2);
339     print STDERR "plain/text body not found\n" if ($debug);
340     return;
341 dpavlin 25 }
342    
343 dpavlin 14 # reformat with Text::Autoformat
344     my $wrap = $self->{wrap_margin};
345     if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) {
346 dpavlin 21 $body = autoformat($body, {right=>$wrap, all=>1});
347 dpavlin 14 $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2);
348     }
349    
350     return $body;
351 dpavlin 7 }
352    
353    
354 dpavlin 4 sub fetch_result_by_id {
355     my $self = shift;
356    
357     my $id = shift || return;
358    
359 dpavlin 13 my $row = $self->{cache}->{$id};
360 dpavlin 2
361 dpavlin 13 if (! $row) {
362 dpavlin 2
363 dpavlin 13 print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2);
364 dpavlin 2
365 dpavlin 17 my $message = $self->fetch_message($id) || return;
366 dpavlin 13
367     $row->{'id'} = $id;
368 dpavlin 19
369     foreach my $p (qw(from to cc bcc)) {
370 dpavlin 22 foreach my $v ($self->unroll($message,$p,'phrase')) {
371 dpavlin 19 push @{$row->{$p}},$v;
372     $self->add_counter($p,$v);
373     }
374     }
375 dpavlin 16 $row->{'subject'} = $self->decode_qp($message->subject);
376 dpavlin 13 $row->{'body'} = $self->plain_text_body($message);
377 dpavlin 20 my $utime = str2time($message->date);
378 dpavlin 13
379 dpavlin 20 $row->{'date_utime'} = $utime;
380    
381     $row->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime));
382     $self->add_counter_calendar($utime);
383    
384 dpavlin 13 # XXX store in cache?
385     $self->{cache}->{$id} = $row;
386     print STDERR "$id stored in cache\n" if ($debug == 2);
387     } else {
388     print STDERR "fetch_result_by_id($id) in cache\n" if ($debug == 2);
389 dpavlin 23 foreach my $p (qw(from to cc bcc)) {
390     foreach my $v (@{$row->{$p}}) {
391     $self->add_counter($p,$v);
392     }
393     }
394    
395     $self->add_counter_calendar($row->{date_utime});
396 dpavlin 13 }
397    
398 dpavlin 2 return $row;
399    
400     }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26