1 |
#!/usr/bin/perl -w |
2 |
|
3 |
use lib '.'; |
4 |
|
5 |
use MWS::SWISH; |
6 |
#use MWS::Plucene; |
7 |
use Data::Dumper; |
8 |
use Date::Parse; |
9 |
use POSIX qw(strftime); |
10 |
use Getopt::Long; |
11 |
|
12 |
|
13 |
# are we called from this script? |
14 |
my $recursive = 0; |
15 |
|
16 |
my $r = GetOptions("recursive" => \$recursive); |
17 |
|
18 |
my $config_file = shift @ARGV || 'global.conf'; |
19 |
|
20 |
if (! -f $config_file) { |
21 |
print qq{Usage: $0 [/path/to/local.conf] |
22 |
|
23 |
If local.conf is not specified, global.conf in current directory will |
24 |
be used. |
25 |
}; |
26 |
exit 1; |
27 |
} |
28 |
|
29 |
my $mws = MWS::SWISH->new(config_file => $config_file); |
30 |
#my $mws = MWS::Plucene->new(config_file => $config_file); |
31 |
|
32 |
$mws->create_index if (! $recursive); |
33 |
|
34 |
print STDERR "starting indexing..."; |
35 |
|
36 |
my $debug = 1; |
37 |
|
38 |
foreach my $mbox ($mws->{config}->Parameters('folders')) { |
39 |
my $mbox_path = $mws->{config}->val('folders', $mbox); |
40 |
|
41 |
print STDERR "working on $mbox [$mbox_path]\n" if ($debug); |
42 |
|
43 |
my $folder = $mws->open_folder($mbox); |
44 |
|
45 |
print STDERR $folder->size," bytes\n" if ($debug); |
46 |
|
47 |
foreach my $message ($folder->messages) { |
48 |
|
49 |
my $id = $message->messageId; |
50 |
|
51 |
my $document = { |
52 |
id => $id, |
53 |
folder => $mbox, |
54 |
}; |
55 |
|
56 |
foreach my $direction (qw(to from cc bcc)) { |
57 |
foreach my $part (qw(phrase address comment)) { |
58 |
my @data = $mws->unroll($message,$direction,$part); |
59 |
$document->{$direction.'_'.$part} = join("##", @data) if (@data); |
60 |
} |
61 |
} |
62 |
|
63 |
$document->{'subject'} = $mws->decode_qp($message->subject) || 'no subject'; |
64 |
|
65 |
$document->{'body'} = $mws->plain_text_body($message); |
66 |
|
67 |
my $utime = str2time($message->date); |
68 |
|
69 |
$document->{'date_utime'} = $utime; |
70 |
$document->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime)); |
71 |
|
72 |
# print Dumper($document); |
73 |
$mws->add_index("$mbox $id" => $document); |
74 |
|
75 |
# clear internal MWS cache to keep memory usage down |
76 |
# (this should be replaced by garbage collector in MWS, |
77 |
# but without it this is the best solution to keep machine |
78 |
# alive while indexing) |
79 |
$mws->{cache} = {}; |
80 |
|
81 |
# this is not complete solution. see mailbox-destruct.diff |
82 |
$message->destruct(); |
83 |
|
84 |
} |
85 |
|
86 |
$mws->close_folder($mbox); |
87 |
|
88 |
} |
89 |
|
90 |
$mws->close_index; |