/[mws]/trunk/mbox2index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/mbox2index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 43 - (hide annotations)
Tue May 11 15:36:51 2004 UTC (20 years ago) by dpavlin
File MIME type: text/plain
File size: 2134 byte(s)
don't lock mbox, commands now work when invoked as symlinks to installation
dir

1 dpavlin 12 #!/usr/bin/perl -w
2    
3 dpavlin 43 BEGIN {
4     my $basedir = readlink($0) || $0; $basedir =~ s#/[^/]+$##;
5     unshift(@INC, $basedir);
6     }
7 dpavlin 41
8     use MWS::SWISH;
9     #use MWS::Plucene;
10 dpavlin 12 use Data::Dumper;
11 dpavlin 13 use Date::Parse;
12     use POSIX qw(strftime);
13 dpavlin 27 use Getopt::Long;
14 dpavlin 12
15 dpavlin 27 # are we called from this script?
16     my $recursive = 0;
17 dpavlin 12
18 dpavlin 27 my $r = GetOptions("recursive" => \$recursive);
19    
20     my $config_file = shift @ARGV || 'global.conf';
21    
22     if (! -f $config_file) {
23     print qq{Usage: $0 [/path/to/local.conf]
24    
25     If local.conf is not specified, global.conf in current directory will
26     be used.
27     };
28     exit 1;
29     }
30    
31 dpavlin 41 my $mws = MWS::SWISH->new(config_file => $config_file);
32     #my $mws = MWS::Plucene->new(config_file => $config_file);
33 dpavlin 27
34     $mws->create_index if (! $recursive);
35    
36 dpavlin 43 print STDERR "starting indexing...\n";
37 dpavlin 27
38 dpavlin 12 my $debug = 1;
39    
40     foreach my $mbox ($mws->{config}->Parameters('folders')) {
41     my $mbox_path = $mws->{config}->val('folders', $mbox);
42    
43     print STDERR "working on $mbox [$mbox_path]\n" if ($debug);
44    
45     my $folder = $mws->open_folder($mbox);
46    
47 dpavlin 14 print STDERR $folder->size," bytes\n" if ($debug);
48    
49 dpavlin 12 foreach my $message ($folder->messages) {
50    
51     my $id = $message->messageId;
52    
53     my $document = {
54     id => $id,
55     folder => $mbox,
56     };
57    
58     foreach my $direction (qw(to from cc bcc)) {
59 dpavlin 13 foreach my $part (qw(phrase address comment)) {
60 dpavlin 41 my @data = $mws->unroll($message,$direction,$part);
61     $document->{$direction.'_'.$part} = join("##", @data) if (@data);
62 dpavlin 12 }
63     }
64    
65 dpavlin 17 $document->{'subject'} = $mws->decode_qp($message->subject) || 'no subject';
66 dpavlin 12
67 dpavlin 14 $document->{'body'} = $mws->plain_text_body($message);
68 dpavlin 13
69     my $utime = str2time($message->date);
70    
71     $document->{'date_utime'} = $utime;
72     $document->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime));
73    
74 dpavlin 12 # print Dumper($document);
75 dpavlin 14 $mws->add_index("$mbox $id" => $document);
76 dpavlin 12
77 dpavlin 14 # clear internal MWS cache to keep memory usage down
78     # (this should be replaced by garbage collector in MWS,
79     # but without it this is the best solution to keep machine
80     # alive while indexing)
81     $mws->{cache} = {};
82 dpavlin 12
83 dpavlin 15 # this is not complete solution. see mailbox-destruct.diff
84     $message->destruct();
85 dpavlin 12
86     }
87    
88 dpavlin 14 $mws->close_folder($mbox);
89 dpavlin 15
90 dpavlin 12 }
91    
92     $mws->close_index;

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26