/[mws]/trunk/mbox2index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/mbox2index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 14 - (hide annotations)
Thu May 6 19:46:58 2004 UTC (20 years ago) by dpavlin
Original Path: trunk/mbox2swish.pl
File MIME type: text/plain
File size: 1420 byte(s)
Make mbox2swish universal for all indexer (moving index-specific
thinks in own .pm) -- after MWS_plucene.pm update it should work with it.
Use Text::AutoFormat to re-format messages wider than wrap_margin
(from global.conf). Hard-coded ISO-8859-2 encoding now works correctly,
Subject searches now remove common subject prefixes, template updates.

1 dpavlin 12 #!/usr/bin/perl -w
2    
3     use MWS;
4     use Data::Dumper;
5 dpavlin 13 use Date::Parse;
6     use POSIX qw(strftime);
7 dpavlin 12
8     my $mws = MWS->new('global.conf');
9    
10     my $debug = 1;
11    
12     foreach my $mbox ($mws->{config}->Parameters('folders')) {
13     my $mbox_path = $mws->{config}->val('folders', $mbox);
14    
15     print STDERR "working on $mbox [$mbox_path]\n" if ($debug);
16    
17     my $folder = $mws->open_folder($mbox);
18    
19 dpavlin 14 print STDERR $folder->size," bytes\n" if ($debug);
20    
21 dpavlin 12 foreach my $message ($folder->messages) {
22    
23     my $id = $message->messageId;
24    
25     my $document = {
26     id => $id,
27     folder => $mbox,
28     };
29    
30     foreach my $direction (qw(to from cc bcc)) {
31 dpavlin 13 foreach my $part (qw(phrase address comment)) {
32 dpavlin 14 $document->{$direction.'_'.$part} = join("##", $mws->unroll($message,$direction,$part));
33 dpavlin 12 }
34     }
35    
36     $document->{'subject'} = $message->get('Subject') || 'no subject';
37    
38 dpavlin 14 $document->{'body'} = $mws->plain_text_body($message);
39 dpavlin 13
40     my $utime = str2time($message->date);
41    
42     $document->{'date_utime'} = $utime;
43     $document->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime));
44    
45 dpavlin 12 # print Dumper($document);
46 dpavlin 14 $mws->add_index("$mbox $id" => $document);
47 dpavlin 12
48 dpavlin 14 # clear internal MWS cache to keep memory usage down
49     # (this should be replaced by garbage collector in MWS,
50     # but without it this is the best solution to keep machine
51     # alive while indexing)
52     $mws->{cache} = {};
53 dpavlin 12
54 dpavlin 14 undef $message;
55 dpavlin 12
56     }
57    
58 dpavlin 14 $mws->close_folder($mbox);
59     #$folder->close;
60 dpavlin 12 }
61    
62     $mws->close_index;

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26