/[mws]/trunk/mbox2index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/mbox2index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 41 - (hide annotations)
Mon May 10 20:26:17 2004 UTC (20 years ago) by dpavlin
File MIME type: text/plain
File size: 2043 byte(s)
major code re-structuring: separation of indexer code into target independent
and depended, documentation improvements

1 dpavlin 12 #!/usr/bin/perl -w
2    
3 dpavlin 41 use lib '.';
4    
5     use MWS::SWISH;
6     #use MWS::Plucene;
7 dpavlin 12 use Data::Dumper;
8 dpavlin 13 use Date::Parse;
9     use POSIX qw(strftime);
10 dpavlin 27 use Getopt::Long;
11 dpavlin 12
12 dpavlin 41
13 dpavlin 27 # are we called from this script?
14     my $recursive = 0;
15 dpavlin 12
16 dpavlin 27 my $r = GetOptions("recursive" => \$recursive);
17    
18     my $config_file = shift @ARGV || 'global.conf';
19    
20     if (! -f $config_file) {
21     print qq{Usage: $0 [/path/to/local.conf]
22    
23     If local.conf is not specified, global.conf in current directory will
24     be used.
25     };
26     exit 1;
27     }
28    
29 dpavlin 41 my $mws = MWS::SWISH->new(config_file => $config_file);
30     #my $mws = MWS::Plucene->new(config_file => $config_file);
31 dpavlin 27
32     $mws->create_index if (! $recursive);
33    
34     print STDERR "starting indexing...";
35    
36 dpavlin 12 my $debug = 1;
37    
38     foreach my $mbox ($mws->{config}->Parameters('folders')) {
39     my $mbox_path = $mws->{config}->val('folders', $mbox);
40    
41     print STDERR "working on $mbox [$mbox_path]\n" if ($debug);
42    
43     my $folder = $mws->open_folder($mbox);
44    
45 dpavlin 14 print STDERR $folder->size," bytes\n" if ($debug);
46    
47 dpavlin 12 foreach my $message ($folder->messages) {
48    
49     my $id = $message->messageId;
50    
51     my $document = {
52     id => $id,
53     folder => $mbox,
54     };
55    
56     foreach my $direction (qw(to from cc bcc)) {
57 dpavlin 13 foreach my $part (qw(phrase address comment)) {
58 dpavlin 41 my @data = $mws->unroll($message,$direction,$part);
59     $document->{$direction.'_'.$part} = join("##", @data) if (@data);
60 dpavlin 12 }
61     }
62    
63 dpavlin 17 $document->{'subject'} = $mws->decode_qp($message->subject) || 'no subject';
64 dpavlin 12
65 dpavlin 14 $document->{'body'} = $mws->plain_text_body($message);
66 dpavlin 13
67     my $utime = str2time($message->date);
68    
69     $document->{'date_utime'} = $utime;
70     $document->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime));
71    
72 dpavlin 12 # print Dumper($document);
73 dpavlin 14 $mws->add_index("$mbox $id" => $document);
74 dpavlin 12
75 dpavlin 14 # clear internal MWS cache to keep memory usage down
76     # (this should be replaced by garbage collector in MWS,
77     # but without it this is the best solution to keep machine
78     # alive while indexing)
79     $mws->{cache} = {};
80 dpavlin 12
81 dpavlin 15 # this is not complete solution. see mailbox-destruct.diff
82     $message->destruct();
83 dpavlin 12
84     }
85    
86 dpavlin 14 $mws->close_folder($mbox);
87 dpavlin 15
88 dpavlin 12 }
89    
90     $mws->close_index;

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26