/[mws]/trunk/MWS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/MWS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 17 - (show annotations)
Fri May 7 11:25:01 2004 UTC (19 years, 11 months ago) by dpavlin
File size: 5835 byte(s)
max_results option in configuration, better decoding of strange data
from headers

1 #!/usr/bin/perl -w
2
3 package MWS;
4
5 use strict;
6 use warnings;
7 use Carp;
8
9 use Mail::Box::Manager;
10 use Config::IniFiles;
11 use POSIX qw(strftime);
12 use Text::Autoformat;
13 use Text::Iconv;
14
15 #use MWS_plucene;
16 use MWS_swish;
17
18 require Exporter;
19
20 our @ISA = qw(Exporter);
21
22 our %EXPORT_TAGS = ();
23 our @EXPORT_OK;
24 our @EXPORT;
25
26 our $VERSION = '1.00';
27
28 my $folder; # placeholder for folders
29
30 my $debug = 2;
31
32 sub new {
33 my $class = shift;
34 my $self = {};
35 bless($self, $class);
36
37 my $config_file = shift || die "need index file";
38
39 $self->{config} = new Config::IniFiles( -file => $config_file );
40
41 my $index_file = $self->{config}->val('global', 'index') || croak "can't find [index] section in config file with path of index";
42
43 $self->{mgr} = Mail::Box::Manager->new(access => 'r');
44 $self->{index_file} = $index_file;
45
46 # placeholder for opened folders
47 $self->{folder} = {};
48
49 $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin');
50 $self->{max_results} = $self->{config}->val('global', 'max_results') || 100;
51
52 return $self;
53 }
54
55 sub mbox_name2path {
56 my $self = shift;
57
58 my $mbox = shift || croak "folder_name2path needs mbox name";
59
60 return $self->{config}->val('folders', $mbox) || croak "comeone removed folder $mbox from config?";
61 }
62
63 sub open_folder {
64 my $self = shift;
65
66 my $mbox = shift || croak "open_folder needs mbox name";
67
68 if (! $self->{folder}->{$mbox}) {
69 my $mbox_path = $self->mbox_name2path($mbox);
70
71 print STDERR "about to open_folder($mbox)\n" if ($debug == 2);
72
73 $self->{folder}->{$mbox} = $self->{mgr}->open($mbox_path) || croak "can't open folder $mbox at '$mbox_path': $!";
74
75 print STDERR "open_folder($mbox) ok\n" if ($debug);
76 }
77
78 $self->{fetch_count} = 0;
79
80 return $self->{folder}->{$mbox};
81
82 }
83
84 sub close_folder {
85 my $self = shift;
86
87 my $mbox = shift || croak "open_folder needs mbox name";
88
89 $self->{folder}->{$mbox}->close(write => 'NEVER') || croak "can't close folder $mbox";
90
91 # XXX this is rather agressive!!!
92 $self->{folder} = {};
93 return
94 }
95
96 sub fetch_message {
97 my $self = shift;
98
99 my $mbox_id = shift || die "need mbox_id!";
100 my ($mbox,$id) = split(/ /,$mbox_id);
101
102 # return message with ID
103 print STDERR "fetch $id from $mbox\n" if ($debug);
104
105 if ($self->{fetch_count}++ > 100) {
106 $self->close_folder($mbox);
107 print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n";
108 }
109
110 my $msg = $self->open_folder($mbox)->find($id);
111 if ($msg) {
112 return $msg;
113 } else {
114 print STDERR "can't find message $id in $mbox. Time to re-index?\n";
115 return;
116 }
117 }
118
119
120 sub search {
121 my $self = shift;
122
123 my $s = shift || carp "search called without argument!";
124
125 print STDERR "search_index($s)\n" if ($debug == 2);
126 my @index_ids = $self->search_index($s);
127
128 $self->{'index_ids'} = \@index_ids;
129
130 #my $results = $#index_ids + 1;
131 #$self->{'results'} = $results;
132
133 my $results = $self->{'total_hits'} || ($#index_ids + 1);
134
135 $self->{'curr_result'} = 0;
136
137 print STDERR "$results results\n" if ($debug == 2);
138
139 return $results || 'error';
140 }
141
142 sub decode_qp($) {
143 my $self = shift;
144
145 my $tmp = shift || return;
146
147 sub decode($$) {
148 my ($cp,$qp) = @_;
149 my $iconv = Text::Iconv->new($cp,'ISO-8859-2');
150 print STDERR "decode($cp,$qp) -> " if ($debug == 2);
151 $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg;
152 $qp =~ s/_/ /g;
153 print STDERR "$qp\n" if ($debug == 2);
154 return $iconv->convert($qp) || $qp;
155 }
156
157 $tmp =~ s/=\?([^\?]+)\?Q\?(.+?)\?=/decode($1,$2)/ex;
158 $tmp =~ s/^\s*["']+(.*?)["']+\s*$/$1/g;
159 return $tmp;
160 }
161
162 sub unroll($$$) {
163 my $self = shift;
164
165 my ($message,$part,$sub) = @_;
166
167 my @arr;
168
169 return if (! $message->$part);
170
171 foreach my $from ($message->$part) {
172 my $tmp = $from->$sub || next;
173
174 $tmp = $self->decode_qp($tmp);
175 push @arr, $tmp;
176 }
177
178 return @arr;
179 }
180
181 sub fetch_all_results {
182 my $self = shift;
183
184 croak "results called before search!" if (! $self->{'index_ids'});
185
186 print STDERR "fetch_all_results [",scalar @{$self->{'index_ids'}},"]\n" if ($debug == 2);
187
188 my @arr;
189
190 foreach my $id (@{$self->{'index_ids'}}) {
191 push @arr, $self->fetch_result_by_id($id);
192 }
193
194 return @arr;
195 }
196
197 sub fetch_result {
198 my $self = shift;
199
200 my $args = {@_};
201
202 croak "results called before search!" if (! $self->{'index_ids'});
203
204 my $curr = $self->{'curr_result'}++;
205
206 my $id = $self->{'index_ids'}->[$curr];
207
208 print STDERR "fetch_result: $curr = $id\n" if ($debug == 2);
209
210 return $self->fetch_result_by_id($id);
211 }
212
213 sub plain_text_body {
214 my $self = shift;
215 my $message = shift || croak "plain_text_body needs message!";
216
217 my $body;
218
219 if (! $message->isMultipart) {
220 $body = $message->decoded->string;
221 } else {
222 foreach my $part ($message->parts) {
223 if ($part->body->mimeType eq 'text/plain') {
224 $body = $part->decoded->string;
225 last;
226 }
227 }
228 }
229
230 # reformat with Text::Autoformat
231 my $wrap = $self->{wrap_margin};
232 if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) {
233 $body =~ s/[\r\n]/\n/gs;
234 $body = autoformat($body, {right=>$wrap});
235 $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2);
236 }
237
238 return $body;
239 }
240
241
242 sub fetch_result_by_id {
243 my $self = shift;
244
245 my $id = shift || return;
246
247 my $row = $self->{cache}->{$id};
248
249 if (! $row) {
250
251 print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2);
252
253 my $message = $self->fetch_message($id) || return;
254
255 $row->{'id'} = $id;
256 @{$row->{'from'}} = $self->unroll($message,'from','phrase');
257 @{$row->{'to'}} = $self->unroll($message,'to','phrase');
258 @{$row->{'cc'}} = $self->unroll($message,'cc','phrase');
259 $row->{'subject'} = $self->decode_qp($message->subject);
260 $row->{'body'} = $self->plain_text_body($message);
261 $row->{'date'} = $message->date;
262
263 # XXX store in cache?
264 $self->{cache}->{$id} = $row;
265 print STDERR "$id stored in cache\n" if ($debug == 2);
266 } else {
267 print STDERR "fetch_result_by_id($id) in cache\n" if ($debug == 2);
268 }
269
270 return $row;
271
272 }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26