5 |
unshift(@INC, $basedir); |
unshift(@INC, $basedir); |
6 |
} |
} |
7 |
|
|
8 |
|
=head1 NAME |
9 |
|
|
10 |
|
mbox2index.pl - indexing script for Mail::Box Web Search |
11 |
|
|
12 |
|
=head1 SYNOPSYS |
13 |
|
|
14 |
|
mbox2index.pl [local.conf] |
15 |
|
|
16 |
|
=head1 DESCRIPTION |
17 |
|
|
18 |
|
This script will index mailboxes defined in C<global.conf> or local |
19 |
|
configuration file supplied at command line. |
20 |
|
|
21 |
|
In normal operation, using MWS::SWISH it will exec swish-e which will in |
22 |
|
turn again call this script, but this time with C<--recursive> option. |
23 |
|
|
24 |
|
=head1 SEE ALSO |
25 |
|
|
26 |
|
C<MWS> perl modules which are part of this package |
27 |
|
|
28 |
|
=cut |
29 |
|
|
30 |
use MWS::SWISH; |
use MWS::SWISH; |
31 |
#use MWS::Plucene; |
#use MWS::Plucene; |
32 |
use Data::Dumper; |
use Data::Dumper; |
66 |
|
|
67 |
my $folder = $mws->open_folder($mbox); |
my $folder = $mws->open_folder($mbox); |
68 |
|
|
69 |
print STDERR $folder->size," bytes\n" if ($debug); |
my $total = scalar $folder->messageIds; |
70 |
|
|
71 |
|
print STDERR "$total messages\n" if ($debug); |
72 |
|
|
73 |
|
my $count = 0; |
74 |
|
|
75 |
foreach my $message ($folder->messages) { |
foreach my $message ($folder->messages) { |
76 |
|
|
84 |
foreach my $direction (qw(to from cc bcc)) { |
foreach my $direction (qw(to from cc bcc)) { |
85 |
foreach my $part (qw(phrase address comment)) { |
foreach my $part (qw(phrase address comment)) { |
86 |
my @data = $mws->unroll($message,$direction,$part); |
my @data = $mws->unroll($message,$direction,$part); |
87 |
$document->{$direction.'_'.$part} = join("##", @data) if (@data); |
if (@data) { |
88 |
|
$document->{$direction.'_'.$part} = join("##", @data); |
89 |
|
$document->{$direction.'_'.$part} =~ s/\s*\(e\s*-\s*mail\)\s*//gi; |
90 |
|
} |
91 |
} |
} |
92 |
} |
} |
93 |
|
|
98 |
my $utime = str2time($message->date); |
my $utime = str2time($message->date); |
99 |
|
|
100 |
$document->{'date_utime'} = $utime; |
$document->{'date_utime'} = $utime; |
101 |
$document->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime)); |
$document->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime)) if ($utime); |
102 |
|
|
103 |
# print Dumper($document); |
# print Dumper($document); |
104 |
$mws->add_index("$mbox $id" => $document); |
$mws->add_index("$mbox $id" => $document); |
112 |
# this is not complete solution. see mailbox-destruct.diff |
# this is not complete solution. see mailbox-destruct.diff |
113 |
$message->destruct(); |
$message->destruct(); |
114 |
|
|
115 |
|
$count++; |
116 |
|
printf STDERR "%d messages in $mbox done [%d %%]\n",$count,($count * 100/$total) if ($count % 100 == 0); |
117 |
|
|
118 |
} |
} |
119 |
|
|
120 |
$mws->close_folder($mbox); |
$mws->close_folder($mbox); |