/[mws]/trunk/MWS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/MWS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 20 - (show annotations)
Fri May 7 23:35:39 2004 UTC (19 years, 11 months ago) by dpavlin
File size: 7502 byte(s)
working calendar

1 #!/usr/bin/perl -w
2
3 package MWS;
4
5 use strict;
6 use warnings;
7 use Carp;
8
9 use Mail::Box::Manager;
10 use Config::IniFiles;
11 use POSIX qw(strftime);
12 use Text::Autoformat;
13 use Text::Iconv;
14 use Text::Unaccent;
15 use Date::Parse;
16 use POSIX qw(strftime);
17
18 #use MWS_plucene;
19 use MWS_swish;
20
21 require Exporter;
22
23 our @ISA = qw(Exporter);
24
25 our %EXPORT_TAGS = ();
26 our @EXPORT_OK;
27 our @EXPORT;
28
29 our $VERSION = '1.00';
30
31 my $folder; # placeholder for folders
32
33 my $debug = 2;
34
35 sub new {
36 my $class = shift;
37 my $self = {};
38 bless($self, $class);
39
40 my $config_file = shift || die "need index file";
41
42 $self->{config} = new Config::IniFiles( -file => $config_file );
43
44 my $index_file = $self->{config}->val('global', 'index') || croak "can't find [index] section in config file with path of index";
45
46 $self->{mgr} = Mail::Box::Manager->new(access => 'r');
47 $self->{index_file} = $index_file;
48
49 # placeholder for opened folders
50 $self->{folder} = {};
51
52 $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin');
53 $self->{max_results} = $self->{config}->val('global', 'max_results') || 100;
54 $self->reset_counters;
55
56 return $self;
57 }
58
59 sub normalize_string {
60 my $self = shift;
61
62 my $v = shift || return;
63
64 $v = unac_string('ISO-8859-2', $v);
65 $v = join('',sort split(/\s+/,$v));
66 $v =~ s/\W+//g;
67
68 return $v;
69 }
70
71 # reset tables for search results
72 sub reset_counters {
73 my $self = shift;
74
75 $self->{counter} = {};
76
77 # foreach my $c (qw(thread from to cc bcc lists links att)) {
78 # $self->{counter}->{$c} = {};
79 # }
80
81 }
82
83 sub add_counter($$) {
84 my $self = shift;
85
86 my ($c,$v) = @_;
87 my $k = $self->normalize_string($v);
88
89 $self->{counter}->{$c}->{$k}->{name} = $v;
90 return $self->{counter}->{$c}->{$k}->{usage}++;
91 }
92
93 sub yyyymmdd {
94 my $self = shift;
95
96 my $t = shift || time;
97
98 my (undef,undef,undef,$dd,$mm,$yyyy) = localtime($t);
99 $mm++;
100 $yyyy+=1900;
101 return ($yyyy,$mm,$dd);
102 }
103
104 sub fmtdate {
105 my $self = shift;
106
107 my @out;
108 my @formats = qw(%04d %02d %02d);
109 while (my $v = shift) {
110 my $f = shift @formats;
111 push @out, sprintf($f, $v);
112 }
113
114 print STDERR "fmtdate: ",join('|',@out),"\n";
115
116 return (wantarray ? @out : join("-",@out));
117 }
118
119 sub add_counter_calendar($) {
120 my $self = shift;
121
122 my $t = shift || croak "add_counter_calendar without argument!";
123
124 my ($yyyy,$mm,$dd) = $self->fmtdate($self->yyyymmdd($t));
125
126 return $self->{counter}->{calendar}->{"$yyyy-$mm"}->{$dd}++;
127 }
128
129
130 sub counter {
131 my $self = shift;
132
133 my $c = shift || return;
134
135 return if (! $self->{counter}->{$c});
136
137 return $self->{counter}->{$c};
138 }
139
140 sub mbox_name2path {
141 my $self = shift;
142
143 my $mbox = shift || croak "folder_name2path needs mbox name";
144
145 return $self->{config}->val('folders', $mbox) || croak "comeone removed folder $mbox from config?";
146 }
147
148 sub open_folder {
149 my $self = shift;
150
151 my $mbox = shift || croak "open_folder needs mbox name";
152
153 if (! $self->{folder}->{$mbox}) {
154 my $mbox_path = $self->mbox_name2path($mbox);
155
156 print STDERR "about to open_folder($mbox)\n" if ($debug == 2);
157
158 $self->{folder}->{$mbox} = $self->{mgr}->open($mbox_path) || croak "can't open folder $mbox at '$mbox_path': $!";
159
160 print STDERR "open_folder($mbox) ok\n" if ($debug);
161 }
162
163 $self->{fetch_count} = 0;
164
165 return $self->{folder}->{$mbox};
166
167 }
168
169 sub close_folder {
170 my $self = shift;
171
172 my $mbox = shift || croak "open_folder needs mbox name";
173
174 $self->{folder}->{$mbox}->close(write => 'NEVER') || croak "can't close folder $mbox";
175
176 # XXX this is rather agressive!!!
177 $self->{folder} = {};
178 return
179 }
180
181 sub fetch_message {
182 my $self = shift;
183
184 my $mbox_id = shift || die "need mbox_id!";
185 my ($mbox,$id) = split(/ /,$mbox_id);
186
187 # return message with ID
188 print STDERR "fetch $id from $mbox\n" if ($debug);
189
190 if ($self->{fetch_count}++ > 100) {
191 $self->close_folder($mbox);
192 print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n";
193 }
194
195 my $msg = $self->open_folder($mbox)->find($id);
196 if ($msg) {
197 return $msg;
198 } else {
199 print STDERR "can't find message $id in $mbox. Time to re-index?\n";
200 return;
201 }
202 }
203
204
205 sub search {
206 my $self = shift;
207
208 my $s = shift || carp "search called without argument!";
209
210 $self->reset_counters;
211
212 print STDERR "search_index($s)\n" if ($debug == 2);
213 my @index_ids = $self->search_index($s);
214
215 $self->{'index_ids'} = \@index_ids;
216
217 #my $results = $#index_ids + 1;
218 #$self->{'results'} = $results;
219
220 my $results = $self->{'total_hits'} || ($#index_ids + 1);
221
222 $self->{'curr_result'} = 0;
223
224 print STDERR "$results results\n" if ($debug == 2);
225
226 return $results || 'error';
227 }
228
229 sub decode_qp($) {
230 my $self = shift;
231
232 my $tmp = shift || return;
233
234 sub decode($$) {
235 my ($cp,$qp) = @_;
236 print STDERR "decode($cp,$qp) -> " if ($debug == 2);
237 $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg;
238 $qp =~ s/_/ /g;
239 print STDERR "$qp -> " if ($debug == 2);
240 my $iconv = Text::Iconv->new($cp,'ISO-8859-2');
241 return $iconv->convert($qp) || '';
242 }
243
244 $tmp =~ s/=\?([^\?]+)\?Q\?(.+?)\?=/decode($1,$2)/ge;
245 $tmp =~ s/^\s*["']+(.*?)["']+\s*$/$1/g;
246 #print STDERR "$tmp\n" if ($debug == 2);
247 return $tmp;
248 }
249
250 sub unroll($$$) {
251 my $self = shift;
252
253 my ($message,$part,$sub) = @_;
254
255 my @arr;
256
257 return if (! $message->$part);
258
259 foreach my $from ($message->$part) {
260 my $tmp = $from->$sub || next;
261
262 $tmp = $self->decode_qp($tmp);
263 push @arr, $tmp;
264 }
265
266 return @arr;
267 }
268
269 sub fetch_all_results {
270 my $self = shift;
271
272 croak "results called before search!" if (! $self->{'index_ids'});
273
274 print STDERR "fetch_all_results [",scalar @{$self->{'index_ids'}},"]\n" if ($debug == 2);
275
276 my @arr;
277
278 foreach my $id (@{$self->{'index_ids'}}) {
279 push @arr, $self->fetch_result_by_id($id);
280 }
281
282
283 return @arr;
284 }
285
286 sub fetch_result {
287 my $self = shift;
288
289 my $args = {@_};
290
291 croak "results called before search!" if (! $self->{'index_ids'});
292
293 my $curr = $self->{'curr_result'}++;
294
295 my $id = $self->{'index_ids'}->[$curr];
296
297 print STDERR "fetch_result: $curr = $id\n" if ($debug == 2);
298
299 return $self->fetch_result_by_id($id);
300 }
301
302 sub plain_text_body {
303 my $self = shift;
304 my $message = shift || croak "plain_text_body needs message!";
305
306 my $body;
307
308 if (! $message->isMultipart) {
309 $body = $message->decoded->string;
310 } else {
311 foreach my $part ($message->parts) {
312 if ($part->body->mimeType eq 'text/plain') {
313 $body = $part->decoded->string;
314 last;
315 }
316 }
317 }
318
319 # reformat with Text::Autoformat
320 my $wrap = $self->{wrap_margin};
321 if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) {
322 $body =~ s/[\r\n]/\n/gs;
323 $body = autoformat($body, {right=>$wrap});
324 $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2);
325 }
326
327 return $body;
328 }
329
330
331 sub fetch_result_by_id {
332 my $self = shift;
333
334 my $id = shift || return;
335
336 my $row = $self->{cache}->{$id};
337
338 if (! $row) {
339
340 print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2);
341
342 my $message = $self->fetch_message($id) || return;
343
344 $row->{'id'} = $id;
345
346 foreach my $p (qw(from to cc bcc)) {
347 foreach my $v ($self->unroll($message,'from','phrase')) {
348 push @{$row->{$p}},$v;
349 $self->add_counter($p,$v);
350 }
351 }
352 $row->{'subject'} = $self->decode_qp($message->subject);
353 $row->{'body'} = $self->plain_text_body($message);
354 my $utime = str2time($message->date);
355
356 $row->{'date_utime'} = $utime;
357
358 $row->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime));
359 $self->add_counter_calendar($utime);
360
361 # XXX store in cache?
362 $self->{cache}->{$id} = $row;
363 print STDERR "$id stored in cache\n" if ($debug == 2);
364 } else {
365 print STDERR "fetch_result_by_id($id) in cache\n" if ($debug == 2);
366 }
367
368 return $row;
369
370 }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26