/[mws]/trunk/MWS.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/MWS.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 17 - (hide annotations)
Fri May 7 11:25:01 2004 UTC (20 years ago) by dpavlin
File size: 5835 byte(s)
max_results option in configuration, better decoding of strange data
from headers

1 dpavlin 2 #!/usr/bin/perl -w
2    
3     package MWS;
4    
5     use strict;
6     use warnings;
7     use Carp;
8    
9 dpavlin 3 use Mail::Box::Manager;
10 dpavlin 4 use Config::IniFiles;
11 dpavlin 13 use POSIX qw(strftime);
12 dpavlin 14 use Text::Autoformat;
13 dpavlin 16 use Text::Iconv;
14 dpavlin 14
15 dpavlin 12 #use MWS_plucene;
16     use MWS_swish;
17 dpavlin 3
18 dpavlin 2 require Exporter;
19    
20     our @ISA = qw(Exporter);
21    
22     our %EXPORT_TAGS = ();
23     our @EXPORT_OK;
24     our @EXPORT;
25    
26     our $VERSION = '1.00';
27    
28     my $folder; # placeholder for folders
29    
30 dpavlin 13 my $debug = 2;
31 dpavlin 2
32     sub new {
33     my $class = shift;
34     my $self = {};
35     bless($self, $class);
36    
37 dpavlin 4 my $config_file = shift || die "need index file";
38 dpavlin 2
39 dpavlin 4 $self->{config} = new Config::IniFiles( -file => $config_file );
40 dpavlin 2
41 dpavlin 4 my $index_file = $self->{config}->val('global', 'index') || croak "can't find [index] section in config file with path of index";
42    
43 dpavlin 12 $self->{mgr} = Mail::Box::Manager->new(access => 'r');
44     $self->{index_file} = $index_file;
45 dpavlin 4
46 dpavlin 2 # placeholder for opened folders
47     $self->{folder} = {};
48    
49 dpavlin 14 $self->{wrap_margin} = $self->{config}->val('global', 'wrap_margin');
50 dpavlin 17 $self->{max_results} = $self->{config}->val('global', 'max_results') || 100;
51 dpavlin 14
52 dpavlin 2 return $self;
53     }
54    
55 dpavlin 14 sub mbox_name2path {
56     my $self = shift;
57    
58     my $mbox = shift || croak "folder_name2path needs mbox name";
59    
60     return $self->{config}->val('folders', $mbox) || croak "comeone removed folder $mbox from config?";
61     }
62    
63 dpavlin 7 sub open_folder {
64 dpavlin 2 my $self = shift;
65    
66 dpavlin 7 my $mbox = shift || croak "open_folder needs mbox name";
67 dpavlin 2
68     if (! $self->{folder}->{$mbox}) {
69 dpavlin 14 my $mbox_path = $self->mbox_name2path($mbox);
70    
71 dpavlin 13 print STDERR "about to open_folder($mbox)\n" if ($debug == 2);
72 dpavlin 14
73 dpavlin 4 $self->{folder}->{$mbox} = $self->{mgr}->open($mbox_path) || croak "can't open folder $mbox at '$mbox_path': $!";
74 dpavlin 14
75     print STDERR "open_folder($mbox) ok\n" if ($debug);
76 dpavlin 2 }
77    
78 dpavlin 16 $self->{fetch_count} = 0;
79    
80 dpavlin 7 return $self->{folder}->{$mbox};
81    
82     }
83    
84 dpavlin 14 sub close_folder {
85     my $self = shift;
86    
87     my $mbox = shift || croak "open_folder needs mbox name";
88    
89 dpavlin 16 $self->{folder}->{$mbox}->close(write => 'NEVER') || croak "can't close folder $mbox";
90    
91     # XXX this is rather agressive!!!
92     $self->{folder} = {};
93     return
94 dpavlin 14 }
95    
96 dpavlin 7 sub fetch_message {
97     my $self = shift;
98    
99     my $mbox_id = shift || die "need mbox_id!";
100     my ($mbox,$id) = split(/ /,$mbox_id);
101    
102     # return message with ID
103 dpavlin 13 print STDERR "fetch $id from $mbox\n" if ($debug);
104 dpavlin 16
105     if ($self->{fetch_count}++ > 100) {
106     $self->close_folder($mbox);
107     print STDERR "close_folder($mbox) forced on ",$self->{fetch_count},"iteration\n";
108     }
109    
110 dpavlin 17 my $msg = $self->open_folder($mbox)->find($id);
111     if ($msg) {
112     return $msg;
113     } else {
114 dpavlin 2 print STDERR "can't find message $id in $mbox. Time to re-index?\n";
115 dpavlin 17 return;
116     }
117 dpavlin 2 }
118    
119    
120     sub search {
121     my $self = shift;
122    
123     my $s = shift || carp "search called without argument!";
124    
125 dpavlin 13 print STDERR "search_index($s)\n" if ($debug == 2);
126 dpavlin 9 my @index_ids = $self->search_index($s);
127 dpavlin 2
128     $self->{'index_ids'} = \@index_ids;
129    
130 dpavlin 17 #my $results = $#index_ids + 1;
131     #$self->{'results'} = $results;
132    
133     my $results = $self->{'total_hits'} || ($#index_ids + 1);
134 dpavlin 2
135     $self->{'curr_result'} = 0;
136    
137 dpavlin 13 print STDERR "$results results\n" if ($debug == 2);
138    
139 dpavlin 2 return $results || 'error';
140     }
141    
142 dpavlin 16 sub decode_qp($) {
143     my $self = shift;
144    
145     my $tmp = shift || return;
146    
147     sub decode($$) {
148     my ($cp,$qp) = @_;
149     my $iconv = Text::Iconv->new($cp,'ISO-8859-2');
150     print STDERR "decode($cp,$qp) -> " if ($debug == 2);
151     $qp =~ s/=([a-f0-9][a-f0-9])/chr(hex($1))/ieg;
152     $qp =~ s/_/ /g;
153     print STDERR "$qp\n" if ($debug == 2);
154 dpavlin 17 return $iconv->convert($qp) || $qp;
155 dpavlin 16 }
156    
157 dpavlin 17 $tmp =~ s/=\?([^\?]+)\?Q\?(.+?)\?=/decode($1,$2)/ex;
158     $tmp =~ s/^\s*["']+(.*?)["']+\s*$/$1/g;
159 dpavlin 16 return $tmp;
160     }
161    
162 dpavlin 2 sub unroll($$$) {
163 dpavlin 13 my $self = shift;
164    
165 dpavlin 2 my ($message,$part,$sub) = @_;
166    
167     my @arr;
168    
169 dpavlin 17 return if (! $message->$part);
170    
171 dpavlin 2 foreach my $from ($message->$part) {
172 dpavlin 16 my $tmp = $from->$sub || next;
173    
174     $tmp = $self->decode_qp($tmp);
175     push @arr, $tmp;
176 dpavlin 2 }
177 dpavlin 14
178     return @arr;
179 dpavlin 2 }
180 dpavlin 4
181     sub fetch_all_results {
182     my $self = shift;
183    
184     croak "results called before search!" if (! $self->{'index_ids'});
185    
186 dpavlin 14 print STDERR "fetch_all_results [",scalar @{$self->{'index_ids'}},"]\n" if ($debug == 2);
187 dpavlin 13
188 dpavlin 4 my @arr;
189    
190     foreach my $id (@{$self->{'index_ids'}}) {
191     push @arr, $self->fetch_result_by_id($id);
192     }
193    
194     return @arr;
195     }
196    
197 dpavlin 2 sub fetch_result {
198     my $self = shift;
199    
200 dpavlin 3 my $args = {@_};
201    
202 dpavlin 2 croak "results called before search!" if (! $self->{'index_ids'});
203    
204     my $curr = $self->{'curr_result'}++;
205    
206     my $id = $self->{'index_ids'}->[$curr];
207 dpavlin 14
208     print STDERR "fetch_result: $curr = $id\n" if ($debug == 2);
209 dpavlin 2
210 dpavlin 4 return $self->fetch_result_by_id($id);
211     }
212 dpavlin 2
213 dpavlin 7 sub plain_text_body {
214     my $self = shift;
215     my $message = shift || croak "plain_text_body needs message!";
216    
217 dpavlin 14 my $body;
218    
219 dpavlin 7 if (! $message->isMultipart) {
220 dpavlin 14 $body = $message->decoded->string;
221 dpavlin 7 } else {
222     foreach my $part ($message->parts) {
223     if ($part->body->mimeType eq 'text/plain') {
224 dpavlin 14 $body = $part->decoded->string;
225     last;
226 dpavlin 7 }
227     }
228     }
229 dpavlin 14
230     # reformat with Text::Autoformat
231     my $wrap = $self->{wrap_margin};
232     if ($wrap && $body && $body =~ m/^.{$wrap}..*$/m) {
233 dpavlin 16 $body =~ s/[\r\n]/\n/gs;
234     $body = autoformat($body, {right=>$wrap});
235 dpavlin 14 $body .="\n[reformated using autoformat, margin at $wrap]" if ($debug == 2);
236     }
237    
238     return $body;
239 dpavlin 7 }
240    
241    
242 dpavlin 4 sub fetch_result_by_id {
243     my $self = shift;
244    
245     my $id = shift || return;
246    
247 dpavlin 13 my $row = $self->{cache}->{$id};
248 dpavlin 2
249 dpavlin 13 if (! $row) {
250 dpavlin 2
251 dpavlin 13 print STDERR "fetch_result_by_id($id) not in cache, hitting disk\n" if ($debug == 2);
252 dpavlin 2
253 dpavlin 17 my $message = $self->fetch_message($id) || return;
254 dpavlin 13
255     $row->{'id'} = $id;
256 dpavlin 14 @{$row->{'from'}} = $self->unroll($message,'from','phrase');
257     @{$row->{'to'}} = $self->unroll($message,'to','phrase');
258     @{$row->{'cc'}} = $self->unroll($message,'cc','phrase');
259 dpavlin 16 $row->{'subject'} = $self->decode_qp($message->subject);
260 dpavlin 13 $row->{'body'} = $self->plain_text_body($message);
261     $row->{'date'} = $message->date;
262    
263     # XXX store in cache?
264     $self->{cache}->{$id} = $row;
265     print STDERR "$id stored in cache\n" if ($debug == 2);
266     } else {
267     print STDERR "fetch_result_by_id($id) in cache\n" if ($debug == 2);
268     }
269    
270 dpavlin 2 return $row;
271    
272     }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26