/[mws]/trunk/MWS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/MWS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 16 - (show annotations)
Thu May 6 23:06:08 2004 UTC (19 years, 11 months ago) by dpavlin
File size: 5632 byte(s)
first "all-clickable-things-are-working" version (0.1)

- primitive folder close/open hack after 100 accesses to keep memory
  usage down (primary in httpd.pl)
- correctly decode Subject/From/To lines
- indexing script is now mbox2index.pl


1 #!/usr/bin/perl -w
2
3 package MWS;
4
5 use strict;
6 use warnings;
7 use Carp;
8
9 use Mail::Box::Manager;
10 use Config::IniFiles;
11 use POSIX qw(strftime);
12 use Text::Autoformat;
13 use Text::Iconv;
14
15 #use MWS_plucene;
16 use MWS_swish;
17
18 require Exporter;
19
20 our @ISA = qw(Exporter);
21
22 our %EXPORT_TAGS = ();
23 our @EXPORT_OK;
24 our @EXPORT;
25
26 our $VERSION = '1.00';
27
28 my $folder; # placeholder for folders
29
30 my $debug = 2;
31
32 sub new {
33 my $class = shift;
34 my $self = {};
35 bless($self, $class);
36
37 my $config_file = shift || die "need index file";
38
39 $self->{config} = new Config::IniFiles( -file => $config_file );
40
41 my $index_file = $self->{config}->val('global', 'index') || croak "can't find [index] section in config file with path of index";
42
43 $self->{mgr} = Mail::Box::Manager->new(access => 'r');
44 $self->{index_file} = $index_file;
45
46 # placeholder for opened folders
47 $self->{folder} = {};
48
49 $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin');
50
51 return $self;
52 }
53
54 sub mbox_name2path {
55 my $self = shift;
56
57 my $mbox = shift || croak "folder_name2path needs mbox name";
58
59 return $self->{config}->val('folders', $mbox) || croak "comeone removed folder $mbox from config?";
60 }
61
62 sub open_folder {
63 my $self = shift;
64
65 my $mbox = shift || croak "open_folder needs mbox name";
66
67 if (! $self->{folder}->{$mbox}) {
68 my $mbox_path = $self->mbox_name2path($mbox);
69
70 print STDERR "about to open_folder($mbox)\n" if ($debug == 2);
71
72 $self->{folder}->{$mbox} = $self->{mgr}->open($mbox_path) || croak "can't open folder $mbox at '$mbox_path': $!";
73
74 print STDERR "open_folder($mbox) ok\n" if ($debug);
75 }
76
77 $self->{fetch_count} = 0;
78
79 return $self->{folder}->{$mbox};
80
81 }
82
83 sub close_folder {
84 my $self = shift;
85
86 my $mbox = shift || croak "open_folder needs mbox name";
87
88 $self->{folder}->{$mbox}->close(write => 'NEVER') || croak "can't close folder $mbox";
89
90 # XXX this is rather agressive!!!
91 $self->{folder} = {};
92 return
93 }
94
95 sub fetch_message {
96 my $self = shift;
97
98 my $mbox_id = shift || die "need mbox_id!";
99 my ($mbox,$id) = split(/ /,$mbox_id);
100
101 # return message with ID
102 print STDERR "fetch $id from $mbox\n" if ($debug);
103
104 if ($self->{fetch_count}++ > 100) {
105 $self->close_folder($mbox);
106 print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n";
107 }
108
109 return $self->open_folder($mbox)->find($id) ||
110 print STDERR "can't find message $id in $mbox. Time to re-index?\n";
111 }
112
113
114 sub search {
115 my $self = shift;
116
117 my $s = shift || carp "search called without argument!";
118
119 print STDERR "search_index($s)\n" if ($debug == 2);
120 my @index_ids = $self->search_index($s);
121
122 $self->{'index_ids'} = \@index_ids;
123
124 my $results = $#index_ids + 1;
125 $self->{'results'} = $results;
126
127 $self->{'curr_result'} = 0;
128
129 print STDERR "$results results\n" if ($debug == 2);
130
131 return $results || 'error';
132 }
133
134 sub decode_qp($) {
135 my $self = shift;
136
137 my $tmp = shift || return;
138
139 sub decode($$) {
140 my ($cp,$qp) = @_;
141 my $iconv = Text::Iconv->new($cp,'ISO-8859-2');
142 print STDERR "decode($cp,$qp) -> " if ($debug == 2);
143 $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg;
144 $qp =~ s/_/ /g;
145 print STDERR "$qp\n" if ($debug == 2);
146 return $iconv->convert($qp);
147 }
148
149 $tmp =~ s/=\?([^\?]+)\?Q\?(.+)\?=/decode($1,$2)/ex;
150 return $tmp;
151 }
152
153 sub unroll($$$) {
154 my $self = shift;
155
156 my ($message,$part,$sub) = @_;
157
158 my @arr;
159
160 foreach my $from ($message->$part) {
161 my $tmp = $from->$sub || next;
162
163 $tmp = $self->decode_qp($tmp);
164 $tmp =~ s/^\s*["'](.*)["']\s*$/$1/;
165 push @arr, $tmp;
166 }
167
168 return @arr;
169 }
170
171 sub fetch_all_results {
172 my $self = shift;
173
174 croak "results called before search!" if (! $self->{'index_ids'});
175
176 print STDERR "fetch_all_results [",scalar @{$self->{'index_ids'}},"]\n" if ($debug == 2);
177
178 my @arr;
179
180 foreach my $id (@{$self->{'index_ids'}}) {
181 push @arr, $self->fetch_result_by_id($id);
182 }
183
184 return @arr;
185 }
186
187 sub fetch_result {
188 my $self = shift;
189
190 my $args = {@_};
191
192 croak "results called before search!" if (! $self->{'index_ids'});
193
194 my $curr = $self->{'curr_result'}++;
195
196 my $id = $self->{'index_ids'}->[$curr];
197
198 print STDERR "fetch_result: $curr = $id\n" if ($debug == 2);
199
200 return $self->fetch_result_by_id($id);
201 }
202
203 sub plain_text_body {
204 my $self = shift;
205 my $message = shift || croak "plain_text_body needs message!";
206
207 my $body;
208
209 if (! $message->isMultipart) {
210 $body = $message->decoded->string;
211 } else {
212 foreach my $part ($message->parts) {
213 if ($part->body->mimeType eq 'text/plain') {
214 $body = $part->decoded->string;
215 last;
216 }
217 }
218 }
219
220 # reformat with Text::Autoformat
221 my $wrap = $self->{wrap_margin};
222 if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) {
223 $body =~ s/[\r\n]/\n/gs;
224 $body = autoformat($body, {right=>$wrap});
225 $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2);
226 }
227
228 return $body;
229 }
230
231
232 sub fetch_result_by_id {
233 my $self = shift;
234
235 my $id = shift || return;
236
237 my $row = $self->{cache}->{$id};
238
239 if (! $row) {
240
241 print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2);
242
243 my $message = $self->fetch_message($id) || print STDERR "can't fetch message '$id'";
244
245 $row->{'id'} = $id;
246 @{$row->{'from'}} = $self->unroll($message,'from','phrase');
247 @{$row->{'to'}} = $self->unroll($message,'to','phrase');
248 @{$row->{'cc'}} = $self->unroll($message,'cc','phrase');
249 $row->{'subject'} = $self->decode_qp($message->subject);
250 $row->{'body'} = $self->plain_text_body($message);
251 $row->{'date'} = $message->date;
252
253 # XXX store in cache?
254 $self->{cache}->{$id} = $row;
255 print STDERR "$id stored in cache\n" if ($debug == 2);
256 } else {
257 print STDERR "fetch_result_by_id($id) in cache\n" if ($debug == 2);
258 }
259
260 return $row;
261
262 }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26