/[mws]/trunk/MWS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/MWS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 20 - (hide annotations)
Fri May 7 23:35:39 2004 UTC (20 years ago) by dpavlin
File size: 7502 byte(s)
working calendar

1 dpavlin 2 #!/usr/bin/perl -w
2    
3     package MWS;
4    
5     use strict;
6     use warnings;
7     use Carp;
8    
9 dpavlin 3 use Mail::Box::Manager;
10 dpavlin 4 use Config::IniFiles;
11 dpavlin 13 use POSIX qw(strftime);
12 dpavlin 14 use Text::Autoformat;
13 dpavlin 16 use Text::Iconv;
14 dpavlin 19 use Text::Unaccent;
15 dpavlin 20 use Date::Parse;
16     use POSIX qw(strftime);
17 dpavlin 14
18 dpavlin 12 #use MWS_plucene;
19     use MWS_swish;
20 dpavlin 3
21 dpavlin 2 require Exporter;
22    
23     our @ISA = qw(Exporter);
24    
25     our %EXPORT_TAGS = ();
26     our @EXPORT_OK;
27     our @EXPORT;
28    
29     our $VERSION = '1.00';
30    
31     my $folder; # placeholder for folders
32    
33 dpavlin 13 my $debug = 2;
34 dpavlin 2
35     sub new {
36     my $class = shift;
37     my $self = {};
38     bless($self, $class);
39    
40 dpavlin 4 my $config_file = shift || die "need index file";
41 dpavlin 2
42 dpavlin 4 $self->{config} = new Config::IniFiles( -file => $config_file );
43 dpavlin 2
44 dpavlin 4 my $index_file = $self->{config}->val('global', 'index') || croak "can't find [index] section in config file with path of index";
45    
46 dpavlin 12 $self->{mgr} = Mail::Box::Manager->new(access => 'r');
47     $self->{index_file} = $index_file;
48 dpavlin 4
49 dpavlin 2 # placeholder for opened folders
50     $self->{folder} = {};
51    
52 dpavlin 14 $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin');
53 dpavlin 17 $self->{max_results} = $self->{config}->val('global', 'max_results') || 100;
54 dpavlin 19 $self->reset_counters;
55 dpavlin 14
56 dpavlin 2 return $self;
57     }
58    
59 dpavlin 19 sub normalize_string {
60     my $self = shift;
61    
62     my $v = shift || return;
63    
64     $v = unac_string('ISO-8859-2', $v);
65     $v = join('',sort split(/\s+/,$v));
66     $v =~ s/\W+//g;
67    
68     return $v;
69     }
70    
71     # reset tables for search results
72     sub reset_counters {
73     my $self = shift;
74    
75     $self->{counter} = {};
76    
77     # foreach my $c (qw(thread from to cc bcc lists links att)) {
78     # $self->{counter}->{$c} = {};
79     # }
80    
81     }
82    
83     sub add_counter($$) {
84     my $self = shift;
85    
86     my ($c,$v) = @_;
87     my $k = $self->normalize_string($v);
88    
89     $self->{counter}->{$c}->{$k}->{name} = $v;
90     return $self->{counter}->{$c}->{$k}->{usage}++;
91     }
92    
93 dpavlin 20 sub yyyymmdd {
94     my $self = shift;
95    
96     my $t = shift || time;
97    
98     my (undef,undef,undef,$dd,$mm,$yyyy) = localtime($t);
99     $mm++;
100     $yyyy+=1900;
101     return ($yyyy,$mm,$dd);
102     }
103    
104     sub fmtdate {
105     my $self = shift;
106    
107     my @out;
108     my @formats = qw(%04d %02d %02d);
109     while (my $v = shift) {
110     my $f = shift @formats;
111     push @out, sprintf($f, $v);
112     }
113    
114     print STDERR "fmtdate: ",join('|',@out),"\n";
115    
116     return (wantarray ? @out : join("-",@out));
117     }
118    
119     sub add_counter_calendar($) {
120     my $self = shift;
121    
122     my $t = shift || croak "add_counter_calendar without argument!";
123    
124     my ($yyyy,$mm,$dd) = $self->fmtdate($self->yyyymmdd($t));
125    
126     return $self->{counter}->{calendar}->{"$yyyy-$mm"}->{$dd}++;
127     }
128    
129    
130 dpavlin 19 sub counter {
131     my $self = shift;
132    
133     my $c = shift || return;
134    
135     return if (! $self->{counter}->{$c});
136    
137     return $self->{counter}->{$c};
138     }
139    
140 dpavlin 14 sub mbox_name2path {
141     my $self = shift;
142    
143     my $mbox = shift || croak "folder_name2path needs mbox name";
144    
145     return $self->{config}->val('folders', $mbox) || croak "comeone removed folder $mbox from config?";
146     }
147    
148 dpavlin 7 sub open_folder {
149 dpavlin 2 my $self = shift;
150    
151 dpavlin 7 my $mbox = shift || croak "open_folder needs mbox name";
152 dpavlin 2
153     if (! $self->{folder}->{$mbox}) {
154 dpavlin 14 my $mbox_path = $self->mbox_name2path($mbox);
155    
156 dpavlin 13 print STDERR "about to open_folder($mbox)\n" if ($debug == 2);
157 dpavlin 14
158 dpavlin 4 $self->{folder}->{$mbox} = $self->{mgr}->open($mbox_path) || croak "can't open folder $mbox at '$mbox_path': $!";
159 dpavlin 14
160     print STDERR "open_folder($mbox) ok\n" if ($debug);
161 dpavlin 2 }
162    
163 dpavlin 16 $self->{fetch_count} = 0;
164    
165 dpavlin 7 return $self->{folder}->{$mbox};
166    
167     }
168    
169 dpavlin 14 sub close_folder {
170     my $self = shift;
171    
172     my $mbox = shift || croak "open_folder needs mbox name";
173    
174 dpavlin 16 $self->{folder}->{$mbox}->close(write => 'NEVER') || croak "can't close folder $mbox";
175    
176     # XXX this is rather agressive!!!
177     $self->{folder} = {};
178     return
179 dpavlin 14 }
180    
181 dpavlin 7 sub fetch_message {
182     my $self = shift;
183    
184     my $mbox_id = shift || die "need mbox_id!";
185     my ($mbox,$id) = split(/ /,$mbox_id);
186    
187     # return message with ID
188 dpavlin 13 print STDERR "fetch $id from $mbox\n" if ($debug);
189 dpavlin 16
190     if ($self->{fetch_count}++ > 100) {
191     $self->close_folder($mbox);
192     print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n";
193     }
194    
195 dpavlin 17 my $msg = $self->open_folder($mbox)->find($id);
196     if ($msg) {
197     return $msg;
198     } else {
199 dpavlin 2 print STDERR "can't find message $id in $mbox. Time to re-index?\n";
200 dpavlin 17 return;
201     }
202 dpavlin 2 }
203    
204    
205     sub search {
206     my $self = shift;
207    
208     my $s = shift || carp "search called without argument!";
209    
210 dpavlin 19 $self->reset_counters;
211    
212 dpavlin 13 print STDERR "search_index($s)\n" if ($debug == 2);
213 dpavlin 9 my @index_ids = $self->search_index($s);
214 dpavlin 2
215     $self->{'index_ids'} = \@index_ids;
216    
217 dpavlin 17 #my $results = $#index_ids + 1;
218     #$self->{'results'} = $results;
219    
220     my $results = $self->{'total_hits'} || ($#index_ids + 1);
221 dpavlin 2
222     $self->{'curr_result'} = 0;
223    
224 dpavlin 13 print STDERR "$results results\n" if ($debug == 2);
225    
226 dpavlin 2 return $results || 'error';
227     }
228    
229 dpavlin 16 sub decode_qp($) {
230     my $self = shift;
231    
232     my $tmp = shift || return;
233    
234     sub decode($$) {
235     my ($cp,$qp) = @_;
236     print STDERR "decode($cp,$qp) -> " if ($debug == 2);
237     $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg;
238     $qp =~ s/_/ /g;
239 dpavlin 20 print STDERR "$qp -> " if ($debug == 2);
240     my $iconv = Text::Iconv->new($cp,'ISO-8859-2');
241     return $iconv->convert($qp) || '';
242 dpavlin 16 }
243    
244 dpavlin 20 $tmp =~ s/=\?([^\?]+)\?Q\?(.+?)\?=/decode($1,$2)/ge;
245 dpavlin 17 $tmp =~ s/^\s*["']+(.*?)["']+\s*$/$1/g;
246 dpavlin 20 #print STDERR "$tmp\n" if ($debug == 2);
247 dpavlin 16 return $tmp;
248     }
249    
250 dpavlin 2 sub unroll($$$) {
251 dpavlin 13 my $self = shift;
252    
253 dpavlin 2 my ($message,$part,$sub) = @_;
254    
255     my @arr;
256    
257 dpavlin 17 return if (! $message->$part);
258    
259 dpavlin 2 foreach my $from ($message->$part) {
260 dpavlin 16 my $tmp = $from->$sub || next;
261    
262     $tmp = $self->decode_qp($tmp);
263     push @arr, $tmp;
264 dpavlin 2 }
265 dpavlin 14
266     return @arr;
267 dpavlin 2 }
268 dpavlin 4
269     sub fetch_all_results {
270     my $self = shift;
271    
272     croak "results called before search!" if (! $self->{'index_ids'});
273    
274 dpavlin 14 print STDERR "fetch_all_results [",scalar @{$self->{'index_ids'}},"]\n" if ($debug == 2);
275 dpavlin 13
276 dpavlin 4 my @arr;
277    
278     foreach my $id (@{$self->{'index_ids'}}) {
279     push @arr, $self->fetch_result_by_id($id);
280     }
281    
282 dpavlin 19
283 dpavlin 4 return @arr;
284     }
285    
286 dpavlin 2 sub fetch_result {
287     my $self = shift;
288    
289 dpavlin 3 my $args = {@_};
290    
291 dpavlin 2 croak "results called before search!" if (! $self->{'index_ids'});
292    
293     my $curr = $self->{'curr_result'}++;
294    
295     my $id = $self->{'index_ids'}->[$curr];
296 dpavlin 14
297     print STDERR "fetch_result: $curr = $id\n" if ($debug == 2);
298 dpavlin 2
299 dpavlin 4 return $self->fetch_result_by_id($id);
300     }
301 dpavlin 2
302 dpavlin 7 sub plain_text_body {
303     my $self = shift;
304     my $message = shift || croak "plain_text_body needs message!";
305    
306 dpavlin 14 my $body;
307    
308 dpavlin 7 if (! $message->isMultipart) {
309 dpavlin 14 $body = $message->decoded->string;
310 dpavlin 7 } else {
311     foreach my $part ($message->parts) {
312     if ($part->body->mimeType eq 'text/plain') {
313 dpavlin 14 $body = $part->decoded->string;
314     last;
315 dpavlin 7 }
316     }
317     }
318 dpavlin 14
319     # reformat with Text::Autoformat
320     my $wrap = $self->{wrap_margin};
321     if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) {
322 dpavlin 16 $body =~ s/[\r\n]/\n/gs;
323     $body = autoformat($body, {right=>$wrap});
324 dpavlin 14 $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2);
325     }
326    
327     return $body;
328 dpavlin 7 }
329    
330    
331 dpavlin 4 sub fetch_result_by_id {
332     my $self = shift;
333    
334     my $id = shift || return;
335    
336 dpavlin 13 my $row = $self->{cache}->{$id};
337 dpavlin 2
338 dpavlin 13 if (! $row) {
339 dpavlin 2
340 dpavlin 13 print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2);
341 dpavlin 2
342 dpavlin 17 my $message = $self->fetch_message($id) || return;
343 dpavlin 13
344     $row->{'id'} = $id;
345 dpavlin 19
346     foreach my $p (qw(from to cc bcc)) {
347     foreach my $v ($self->unroll($message,'from','phrase')) {
348     push @{$row->{$p}},$v;
349     $self->add_counter($p,$v);
350     }
351     }
352 dpavlin 16 $row->{'subject'} = $self->decode_qp($message->subject);
353 dpavlin 13 $row->{'body'} = $self->plain_text_body($message);
354 dpavlin 20 my $utime = str2time($message->date);
355 dpavlin 13
356 dpavlin 20 $row->{'date_utime'} = $utime;
357    
358     $row->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime));
359     $self->add_counter_calendar($utime);
360    
361 dpavlin 13 # XXX store in cache?
362     $self->{cache}->{$id} = $row;
363     print STDERR "$id stored in cache\n" if ($debug == 2);
364     } else {
365     print STDERR "fetch_result_by_id($id) in cache\n" if ($debug == 2);
366     }
367    
368 dpavlin 2 return $row;
369    
370     }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26