/[mws]/trunk/MWS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/MWS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 19 - (hide annotations)
Fri May 7 20:52:34 2004 UTC (20 years ago) by dpavlin
File size: 6602 byte(s)
added From, To, Cc lists with unique feature: it will count same names
in different order as, well, same name :-) It will also remove accented
characters when counting names, to detect false duplicates (using
Text::Unaccent)

1 dpavlin 2 #!/usr/bin/perl -w
2    
3     package MWS;
4    
5     use strict;
6     use warnings;
7     use Carp;
8    
9 dpavlin 3 use Mail::Box::Manager;
10 dpavlin 4 use Config::IniFiles;
11 dpavlin 13 use POSIX qw(strftime);
12 dpavlin 14 use Text::Autoformat;
13 dpavlin 16 use Text::Iconv;
14 dpavlin 19 use Text::Unaccent;
15 dpavlin 14
16 dpavlin 12 #use MWS_plucene;
17     use MWS_swish;
18 dpavlin 3
19 dpavlin 2 require Exporter;
20    
21     our @ISA = qw(Exporter);
22    
23     our %EXPORT_TAGS = ();
24     our @EXPORT_OK;
25     our @EXPORT;
26    
27     our $VERSION = '1.00';
28    
29     my $folder; # placeholder for folders
30    
31 dpavlin 13 my $debug = 2;
32 dpavlin 2
33     sub new {
34     my $class = shift;
35     my $self = {};
36     bless($self, $class);
37    
38 dpavlin 4 my $config_file = shift || die "need index file";
39 dpavlin 2
40 dpavlin 4 $self->{config} = new Config::IniFiles( -file => $config_file );
41 dpavlin 2
42 dpavlin 4 my $index_file = $self->{config}->val('global', 'index') || croak "can't find [index] section in config file with path of index";
43    
44 dpavlin 12 $self->{mgr} = Mail::Box::Manager->new(access => 'r');
45     $self->{index_file} = $index_file;
46 dpavlin 4
47 dpavlin 2 # placeholder for opened folders
48     $self->{folder} = {};
49    
50 dpavlin 14 $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin');
51 dpavlin 17 $self->{max_results} = $self->{config}->val('global', 'max_results') || 100;
52 dpavlin 19 $self->reset_counters;
53 dpavlin 14
54 dpavlin 2 return $self;
55     }
56    
57 dpavlin 19 sub normalize_string {
58     my $self = shift;
59    
60     my $v = shift || return;
61    
62     $v = unac_string('ISO-8859-2', $v);
63     $v = join('',sort split(/\s+/,$v));
64     $v =~ s/\W+//g;
65    
66     return $v;
67     }
68    
69     # reset tables for search results
70     sub reset_counters {
71     my $self = shift;
72    
73     $self->{counter} = {};
74    
75     # foreach my $c (qw(thread from to cc bcc lists links att)) {
76     # $self->{counter}->{$c} = {};
77     # }
78    
79     }
80    
81     sub add_counter($$) {
82     my $self = shift;
83    
84     my ($c,$v) = @_;
85     my $k = $self->normalize_string($v);
86    
87     $self->{counter}->{$c}->{$k}->{name} = $v;
88     return $self->{counter}->{$c}->{$k}->{usage}++;
89     }
90    
91     sub counter {
92     my $self = shift;
93    
94     my $c = shift || return;
95    
96     return if (! $self->{counter}->{$c});
97    
98     return $self->{counter}->{$c};
99     }
100    
101 dpavlin 14 sub mbox_name2path {
102     my $self = shift;
103    
104     my $mbox = shift || croak "folder_name2path needs mbox name";
105    
106     return $self->{config}->val('folders', $mbox) || croak "comeone removed folder $mbox from config?";
107     }
108    
109 dpavlin 7 sub open_folder {
110 dpavlin 2 my $self = shift;
111    
112 dpavlin 7 my $mbox = shift || croak "open_folder needs mbox name";
113 dpavlin 2
114     if (! $self->{folder}->{$mbox}) {
115 dpavlin 14 my $mbox_path = $self->mbox_name2path($mbox);
116    
117 dpavlin 13 print STDERR "about to open_folder($mbox)\n" if ($debug == 2);
118 dpavlin 14
119 dpavlin 4 $self->{folder}->{$mbox} = $self->{mgr}->open($mbox_path) || croak "can't open folder $mbox at '$mbox_path': $!";
120 dpavlin 14
121     print STDERR "open_folder($mbox) ok\n" if ($debug);
122 dpavlin 2 }
123    
124 dpavlin 16 $self->{fetch_count} = 0;
125    
126 dpavlin 7 return $self->{folder}->{$mbox};
127    
128     }
129    
130 dpavlin 14 sub close_folder {
131     my $self = shift;
132    
133     my $mbox = shift || croak "open_folder needs mbox name";
134    
135 dpavlin 16 $self->{folder}->{$mbox}->close(write => 'NEVER') || croak "can't close folder $mbox";
136    
137     # XXX this is rather agressive!!!
138     $self->{folder} = {};
139     return
140 dpavlin 14 }
141    
142 dpavlin 7 sub fetch_message {
143     my $self = shift;
144    
145     my $mbox_id = shift || die "need mbox_id!";
146     my ($mbox,$id) = split(/ /,$mbox_id);
147    
148     # return message with ID
149 dpavlin 13 print STDERR "fetch $id from $mbox\n" if ($debug);
150 dpavlin 16
151     if ($self->{fetch_count}++ > 100) {
152     $self->close_folder($mbox);
153     print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n";
154     }
155    
156 dpavlin 17 my $msg = $self->open_folder($mbox)->find($id);
157     if ($msg) {
158     return $msg;
159     } else {
160 dpavlin 2 print STDERR "can't find message $id in $mbox. Time to re-index?\n";
161 dpavlin 17 return;
162     }
163 dpavlin 2 }
164    
165    
166     sub search {
167     my $self = shift;
168    
169     my $s = shift || carp "search called without argument!";
170    
171 dpavlin 19 $self->reset_counters;
172    
173 dpavlin 13 print STDERR "search_index($s)\n" if ($debug == 2);
174 dpavlin 9 my @index_ids = $self->search_index($s);
175 dpavlin 2
176     $self->{'index_ids'} = \@index_ids;
177    
178 dpavlin 17 #my $results = $#index_ids + 1;
179     #$self->{'results'} = $results;
180    
181     my $results = $self->{'total_hits'} || ($#index_ids + 1);
182 dpavlin 2
183     $self->{'curr_result'} = 0;
184    
185 dpavlin 13 print STDERR "$results results\n" if ($debug == 2);
186    
187 dpavlin 2 return $results || 'error';
188     }
189    
190 dpavlin 16 sub decode_qp($) {
191     my $self = shift;
192    
193     my $tmp = shift || return;
194    
195     sub decode($$) {
196     my ($cp,$qp) = @_;
197     my $iconv = Text::Iconv->new($cp,'ISO-8859-2');
198     print STDERR "decode($cp,$qp) -> " if ($debug == 2);
199     $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg;
200     $qp =~ s/_/ /g;
201     print STDERR "$qp\n" if ($debug == 2);
202 dpavlin 17 return $iconv->convert($qp) || $qp;
203 dpavlin 16 }
204    
205 dpavlin 17 $tmp =~ s/=\?([^\?]+)\?Q\?(.+?)\?=/decode($1,$2)/ex;
206     $tmp =~ s/^\s*["']+(.*?)["']+\s*$/$1/g;
207 dpavlin 16 return $tmp;
208     }
209    
210 dpavlin 2 sub unroll($$$) {
211 dpavlin 13 my $self = shift;
212    
213 dpavlin 2 my ($message,$part,$sub) = @_;
214    
215     my @arr;
216    
217 dpavlin 17 return if (! $message->$part);
218    
219 dpavlin 2 foreach my $from ($message->$part) {
220 dpavlin 16 my $tmp = $from->$sub || next;
221    
222     $tmp = $self->decode_qp($tmp);
223     push @arr, $tmp;
224 dpavlin 2 }
225 dpavlin 14
226     return @arr;
227 dpavlin 2 }
228 dpavlin 4
229     sub fetch_all_results {
230     my $self = shift;
231    
232     croak "results called before search!" if (! $self->{'index_ids'});
233    
234 dpavlin 14 print STDERR "fetch_all_results [",scalar @{$self->{'index_ids'}},"]\n" if ($debug == 2);
235 dpavlin 13
236 dpavlin 4 my @arr;
237    
238     foreach my $id (@{$self->{'index_ids'}}) {
239     push @arr, $self->fetch_result_by_id($id);
240     }
241    
242 dpavlin 19
243 dpavlin 4 return @arr;
244     }
245    
246 dpavlin 2 sub fetch_result {
247     my $self = shift;
248    
249 dpavlin 3 my $args = {@_};
250    
251 dpavlin 2 croak "results called before search!" if (! $self->{'index_ids'});
252    
253     my $curr = $self->{'curr_result'}++;
254    
255     my $id = $self->{'index_ids'}->[$curr];
256 dpavlin 14
257     print STDERR "fetch_result: $curr = $id\n" if ($debug == 2);
258 dpavlin 2
259 dpavlin 4 return $self->fetch_result_by_id($id);
260     }
261 dpavlin 2
262 dpavlin 7 sub plain_text_body {
263     my $self = shift;
264     my $message = shift || croak "plain_text_body needs message!";
265    
266 dpavlin 14 my $body;
267    
268 dpavlin 7 if (! $message->isMultipart) {
269 dpavlin 14 $body = $message->decoded->string;
270 dpavlin 7 } else {
271     foreach my $part ($message->parts) {
272     if ($part->body->mimeType eq 'text/plain') {
273 dpavlin 14 $body = $part->decoded->string;
274     last;
275 dpavlin 7 }
276     }
277     }
278 dpavlin 14
279     # reformat with Text::Autoformat
280     my $wrap = $self->{wrap_margin};
281     if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) {
282 dpavlin 16 $body =~ s/[\r\n]/\n/gs;
283     $body = autoformat($body, {right=>$wrap});
284 dpavlin 14 $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2);
285     }
286    
287     return $body;
288 dpavlin 7 }
289    
290    
291 dpavlin 4 sub fetch_result_by_id {
292     my $self = shift;
293    
294     my $id = shift || return;
295    
296 dpavlin 13 my $row = $self->{cache}->{$id};
297 dpavlin 2
298 dpavlin 13 if (! $row) {
299 dpavlin 2
300 dpavlin 13 print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2);
301 dpavlin 2
302 dpavlin 17 my $message = $self->fetch_message($id) || return;
303 dpavlin 13
304     $row->{'id'} = $id;
305 dpavlin 19
306     foreach my $p (qw(from to cc bcc)) {
307     foreach my $v ($self->unroll($message,'from','phrase')) {
308     push @{$row->{$p}},$v;
309     $self->add_counter($p,$v);
310     }
311     }
312 dpavlin 16 $row->{'subject'} = $self->decode_qp($message->subject);
313 dpavlin 13 $row->{'body'} = $self->plain_text_body($message);
314     $row->{'date'} = $message->date;
315    
316     # XXX store in cache?
317     $self->{cache}->{$id} = $row;
318     print STDERR "$id stored in cache\n" if ($debug == 2);
319     } else {
320     print STDERR "fetch_result_by_id($id) in cache\n" if ($debug == 2);
321     }
322    
323 dpavlin 2 return $row;
324    
325     }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26