/[mws]/trunk/MWS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/MWS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 47 - (hide annotations)
Tue May 11 22:59:27 2004 UTC (20 years ago) by dpavlin
File size: 8325 byte(s)
Major code update: 0.9-rc3 if no serious problems are found, this will
become first public version (0.9).

- search.pl is working again
- fixed Mail::Box problem with unimplemented lock_type => 'none' on Maildir
- documented Mozilla 1.5 problem with sidebar float: right
- don't output anything from swish-e while indexing
- remove (e-mail) from addresses (it seems that Exchange like to add those)
- added progress report while indexing
- documented all command-line utilities

1 dpavlin 2 #!/usr/bin/perl -w
2    
3     package MWS;
4    
5     use strict;
6     use warnings;
7 dpavlin 41
8     use lib '.';
9    
10 dpavlin 2 use Carp;
11 dpavlin 3 use Mail::Box::Manager;
12 dpavlin 4 use Config::IniFiles;
13 dpavlin 13 use POSIX qw(strftime);
14 dpavlin 14 use Text::Autoformat;
15 dpavlin 16 use Text::Iconv;
16 dpavlin 19 use Text::Unaccent;
17 dpavlin 20 use Date::Parse;
18     use POSIX qw(strftime);
19 dpavlin 21 use MIME::Base64;
20 dpavlin 14
21 dpavlin 2 our $VERSION = '1.00';
22    
23 dpavlin 30 my $debug = 1;
24 dpavlin 2
25 dpavlin 42 my @counters = qw(from to cc bcc folder);
26    
27 dpavlin 2 sub new {
28     my $class = shift;
29 dpavlin 41 my $self = {@_};
30 dpavlin 2 bless($self, $class);
31    
32    
33 dpavlin 41 my $config_name = $self->{config_file} || croak "need config_file";
34     $config_name =~ s/\.conf.*$//;
35     $self->{config_name} = $config_name;
36 dpavlin 2
37 dpavlin 41 $self->{config} = new Config::IniFiles( -file => $self->{config_file} );
38 dpavlin 4
39 dpavlin 27 my $index_dir = $self->{config}->val('global', 'index') || croak "can't find [index] section in config file with path of index";
40    
41 dpavlin 12 $self->{mgr} = Mail::Box::Manager->new(access => 'r');
42 dpavlin 27 $self->{index_dir} = $index_dir;
43 dpavlin 4
44 dpavlin 2 # placeholder for opened folders
45     $self->{folder} = {};
46    
47 dpavlin 14 $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin');
48 dpavlin 17 $self->{max_results} = $self->{config}->val('global', 'max_results') || 100;
49 dpavlin 19 $self->reset_counters;
50 dpavlin 14
51 dpavlin 2 return $self;
52     }
53    
54 dpavlin 19 sub normalize_string {
55     my $self = shift;
56    
57     my $v = shift || return;
58    
59     $v = unac_string('ISO-8859-2', $v);
60     $v = join('',sort split(/\s+/,$v));
61     $v =~ s/\W+//g;
62    
63 dpavlin 23 return lc($v);
64 dpavlin 19 }
65    
66     # reset tables for search results
67     sub reset_counters {
68     my $self = shift;
69    
70     $self->{counter} = {};
71    
72     # foreach my $c (qw(thread from to cc bcc lists links att)) {
73     # $self->{counter}->{$c} = {};
74     # }
75    
76     }
77    
78     sub add_counter($$) {
79     my $self = shift;
80    
81     my ($c,$v) = @_;
82     my $k = $self->normalize_string($v);
83    
84 dpavlin 42 $self->{counter}->{$c}->{$k}->{name} = $v || return;
85 dpavlin 19 return $self->{counter}->{$c}->{$k}->{usage}++;
86     }
87    
88 dpavlin 20 sub yyyymmdd {
89     my $self = shift;
90    
91     my $t = shift || time;
92    
93     my (undef,undef,undef,$dd,$mm,$yyyy) = localtime($t);
94     $mm++;
95     $yyyy+=1900;
96     return ($yyyy,$mm,$dd);
97     }
98    
99     sub fmtdate {
100     my $self = shift;
101    
102     my @out;
103     my @formats = qw(%04d %02d %02d);
104     while (my $v = shift) {
105     my $f = shift @formats;
106     push @out, sprintf($f, $v);
107     }
108    
109 dpavlin 30 print STDERR "fmtdate: ",join('|',@out),"\n" if ($debug == 2);
110 dpavlin 20
111     return (wantarray ? @out : join("-",@out));
112     }
113    
114     sub add_counter_calendar($) {
115     my $self = shift;
116    
117     my $t = shift || croak "add_counter_calendar without argument!";
118    
119     my ($yyyy,$mm,$dd) = $self->fmtdate($self->yyyymmdd($t));
120    
121     return $self->{counter}->{calendar}->{"$yyyy-$mm"}->{$dd}++;
122     }
123    
124    
125 dpavlin 19 sub counter {
126     my $self = shift;
127    
128     my $c = shift || return;
129    
130     return if (! $self->{counter}->{$c});
131    
132     return $self->{counter}->{$c};
133     }
134    
135 dpavlin 14 sub mbox_name2path {
136     my $self = shift;
137    
138     my $mbox = shift || croak "folder_name2path needs mbox name";
139    
140     return $self->{config}->val('folders', $mbox) || croak "comeone removed folder $mbox from config?";
141     }
142    
143 dpavlin 7 sub open_folder {
144 dpavlin 2 my $self = shift;
145    
146 dpavlin 7 my $mbox = shift || croak "open_folder needs mbox name";
147 dpavlin 2
148     if (! $self->{folder}->{$mbox}) {
149 dpavlin 14 my $mbox_path = $self->mbox_name2path($mbox);
150    
151 dpavlin 13 print STDERR "about to open_folder($mbox)\n" if ($debug == 2);
152 dpavlin 14
153 dpavlin 47 my $folder;
154 dpavlin 14
155 dpavlin 47 # is this maildir (no locking anyway)
156     if (-d $mbox_path) {
157     $folder = $self->{mgr}->open($mbox_path);
158     } else {
159     $folder = $self->{mgr}->open($mbox_path, lock_type=>'none' );
160     }
161    
162     $self->{folder}->{$mbox} = $folder || croak "can't open folder $mbox at '$mbox_path': $!";
163    
164 dpavlin 14 print STDERR "open_folder($mbox) ok\n" if ($debug);
165 dpavlin 2 }
166    
167 dpavlin 16 $self->{fetch_count} = 0;
168    
169 dpavlin 7 return $self->{folder}->{$mbox};
170    
171     }
172    
173 dpavlin 14 sub close_folder {
174     my $self = shift;
175    
176     my $mbox = shift || croak "open_folder needs mbox name";
177    
178 dpavlin 16 $self->{folder}->{$mbox}->close(write => 'NEVER') || croak "can't close folder $mbox";
179    
180     # XXX this is rather agressive!!!
181     $self->{folder} = {};
182     return
183 dpavlin 14 }
184    
185 dpavlin 7 sub fetch_message {
186     my $self = shift;
187    
188     my $mbox_id = shift || die "need mbox_id!";
189     my ($mbox,$id) = split(/ /,$mbox_id);
190    
191     # return message with ID
192 dpavlin 13 print STDERR "fetch $id from $mbox\n" if ($debug);
193 dpavlin 16
194     if ($self->{fetch_count}++ > 100) {
195     $self->close_folder($mbox);
196     print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n";
197     }
198    
199 dpavlin 17 my $msg = $self->open_folder($mbox)->find($id);
200     if ($msg) {
201     return $msg;
202     } else {
203 dpavlin 2 print STDERR "can't find message $id in $mbox. Time to re-index?\n";
204 dpavlin 17 return;
205     }
206 dpavlin 2 }
207    
208    
209     sub search {
210     my $self = shift;
211    
212 dpavlin 21 carp "search called without argument!" if (! @_);
213 dpavlin 2
214 dpavlin 19 $self->reset_counters;
215    
216 dpavlin 21 print STDERR "search(",join(" ",@_),")\n" if ($debug == 2);
217     my @index_ids = $self->search_index(@_);
218 dpavlin 2
219     $self->{'index_ids'} = \@index_ids;
220    
221 dpavlin 17 #my $results = $#index_ids + 1;
222     #$self->{'results'} = $results;
223    
224     my $results = $self->{'total_hits'} || ($#index_ids + 1);
225 dpavlin 2
226     $self->{'curr_result'} = 0;
227    
228 dpavlin 23 $self->reset_counters;
229    
230 dpavlin 13 print STDERR "$results results\n" if ($debug == 2);
231    
232 dpavlin 2 return $results || 'error';
233     }
234    
235 dpavlin 16 sub decode_qp($) {
236     my $self = shift;
237    
238     my $tmp = shift || return;
239    
240 dpavlin 21 sub decode($$$) {
241     my ($cp,$enc,$qp) = @_;
242    
243     print STDERR "decode($cp,$qp) -> " if ($debug == 2);
244    
245     if (uc($enc) eq "Q") {
246     $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg;
247     $qp =~ s/_/ /g;
248     } elsif (uc($enc) eq "B") {
249     $qp = decode_base64($qp);
250     } else {
251     croak "unsupported encoding '$enc' in decode_qp\n";
252     return $qp;
253     }
254    
255 dpavlin 22 print STDERR "$qp\n" if ($debug == 2);
256 dpavlin 21
257 dpavlin 20 my $iconv = Text::Iconv->new($cp,'ISO-8859-2');
258     return $iconv->convert($qp) || '';
259 dpavlin 16 }
260    
261 dpavlin 21 $tmp =~ s/=\?([^\?]+)\?([QB])\?(.+?)\?=/decode($1,$2,$3)/ige;
262 dpavlin 17 $tmp =~ s/^\s*["']+(.*?)["']+\s*$/$1/g;
263 dpavlin 20 #print STDERR "$tmp\n" if ($debug == 2);
264 dpavlin 16 return $tmp;
265     }
266    
267 dpavlin 2 sub unroll($$$) {
268 dpavlin 13 my $self = shift;
269    
270 dpavlin 2 my ($message,$part,$sub) = @_;
271    
272     my @arr;
273    
274 dpavlin 17 return if (! $message->$part);
275    
276 dpavlin 2 foreach my $from ($message->$part) {
277 dpavlin 16 my $tmp = $from->$sub || next;
278    
279     $tmp = $self->decode_qp($tmp);
280     push @arr, $tmp;
281 dpavlin 2 }
282 dpavlin 14
283     return @arr;
284 dpavlin 2 }
285 dpavlin 4
286     sub fetch_all_results {
287     my $self = shift;
288    
289     croak "results called before search!" if (! $self->{'index_ids'});
290    
291 dpavlin 14 print STDERR "fetch_all_results [",scalar @{$self->{'index_ids'}},"]\n" if ($debug == 2);
292 dpavlin 13
293 dpavlin 4 my @arr;
294    
295     foreach my $id (@{$self->{'index_ids'}}) {
296     push @arr, $self->fetch_result_by_id($id);
297     }
298    
299 dpavlin 19
300 dpavlin 4 return @arr;
301     }
302    
303 dpavlin 2 sub fetch_result {
304     my $self = shift;
305    
306 dpavlin 3 my $args = {@_};
307    
308 dpavlin 2 croak "results called before search!" if (! $self->{'index_ids'});
309    
310     my $curr = $self->{'curr_result'}++;
311    
312     my $id = $self->{'index_ids'}->[$curr];
313 dpavlin 14
314     print STDERR "fetch_result: $curr = $id\n" if ($debug == 2);
315 dpavlin 2
316 dpavlin 4 return $self->fetch_result_by_id($id);
317     }
318 dpavlin 2
319 dpavlin 7 sub plain_text_body {
320     my $self = shift;
321     my $message = shift || croak "plain_text_body needs message!";
322    
323 dpavlin 14 my $body;
324    
325 dpavlin 7 if (! $message->isMultipart) {
326 dpavlin 14 $body = $message->decoded->string;
327 dpavlin 7 } else {
328     foreach my $part ($message->parts) {
329     if ($part->body->mimeType eq 'text/plain') {
330 dpavlin 14 $body = $part->decoded->string;
331     last;
332 dpavlin 7 }
333     }
334     }
335 dpavlin 14
336 dpavlin 25 if (! $body) {
337 dpavlin 26 $body = "[plain/text body not found]" if ($debug == 2);
338     print STDERR "plain/text body not found\n" if ($debug);
339     return;
340 dpavlin 25 }
341    
342 dpavlin 14 # reformat with Text::Autoformat
343     my $wrap = $self->{wrap_margin};
344     if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) {
345 dpavlin 21 $body = autoformat($body, {right=>$wrap, all=>1});
346 dpavlin 14 $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2);
347     }
348    
349     return $body;
350 dpavlin 7 }
351    
352    
353 dpavlin 4 sub fetch_result_by_id {
354     my $self = shift;
355    
356     my $id = shift || return;
357    
358 dpavlin 13 my $row = $self->{cache}->{$id};
359 dpavlin 2
360 dpavlin 13 if (! $row) {
361 dpavlin 2
362 dpavlin 13 print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2);
363 dpavlin 2
364 dpavlin 17 my $message = $self->fetch_message($id) || return;
365 dpavlin 13
366     $row->{'id'} = $id;
367 dpavlin 19
368 dpavlin 42 foreach my $p (@counters) {
369 dpavlin 22 foreach my $v ($self->unroll($message,$p,'phrase')) {
370 dpavlin 19 push @{$row->{$p}},$v;
371     $self->add_counter($p,$v);
372     }
373     }
374 dpavlin 42
375     $self->add_counter('folder', $1) if ($id =~ m/^(\S+)\s/);
376    
377 dpavlin 16 $row->{'subject'} = $self->decode_qp($message->subject);
378 dpavlin 13 $row->{'body'} = $self->plain_text_body($message);
379 dpavlin 20 my $utime = str2time($message->date);
380 dpavlin 13
381 dpavlin 20 $row->{'date_utime'} = $utime;
382    
383     $row->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime));
384     $self->add_counter_calendar($utime);
385    
386 dpavlin 13 # XXX store in cache?
387     $self->{cache}->{$id} = $row;
388     print STDERR "$id stored in cache\n" if ($debug == 2);
389     } else {
390     print STDERR "fetch_result_by_id($id) in cache\n" if ($debug == 2);
391 dpavlin 42 foreach my $p (@counters) {
392 dpavlin 23 foreach my $v (@{$row->{$p}}) {
393     $self->add_counter($p,$v);
394     }
395     }
396    
397 dpavlin 42 $self->add_counter('folder', $1) if ($id =~ m/^(\S+)\s/);
398    
399 dpavlin 23 $self->add_counter_calendar($row->{date_utime});
400 dpavlin 13 }
401    
402 dpavlin 2 return $row;
403    
404     }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26