--- trunk/mbox2swish.pl 2004/05/06 16:53:40 13
+++ trunk/mbox2swish.pl 2004/05/06 19:46:58 14
@@ -2,13 +2,9 @@
use MWS;
use Data::Dumper;
-use XML::Simple;
-use Text::Iconv;
use Date::Parse;
use POSIX qw(strftime);
-my $iso2utf = Text::Iconv->new('ISO-8859-2','UTF-8');
-
my $mws = MWS->new('global.conf');
my $debug = 1;
@@ -20,6 +16,8 @@
my $folder = $mws->open_folder($mbox);
+ print STDERR $folder->size," bytes\n" if ($debug);
+
foreach my $message ($folder->messages) {
my $id = $message->messageId;
@@ -31,16 +29,13 @@
foreach my $direction (qw(to from cc bcc)) {
foreach my $part (qw(phrase address comment)) {
- my @mail_fullnames = $mws->unroll($message,$direction,$part);
- $document->{$direction.'_'.$part} = join("##",@mail_fullnames);
+ $document->{$direction.'_'.$part} = join("##", $mws->unroll($message,$direction,$part));
}
}
$document->{'subject'} = $message->get('Subject') || 'no subject';
- my $body = $mws->plain_text_body($message);
- $body =~ s,[\n\r],
,gs;
- $document->{'body'} = $body;
+ $document->{'body'} = $mws->plain_text_body($message);
my $utime = str2time($message->date);
@@ -48,25 +43,20 @@
$document->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime));
# print Dumper($document);
-# $mws->add_index("$mbox $id" => $document);
+ $mws->add_index("$mbox $id" => $document);
-# my $xml = XMLout($document);
-
- my $xml = qq{};
- foreach my $tag (keys %$document) {
- $xml .= "<$tag>{$tag}."]]>$tag>\n";
- }
- $xml .= qq{};
+ # clear internal MWS cache to keep memory usage down
+ # (this should be replaced by garbage collector in MWS,
+ # but without it this is the best solution to keep machine
+ # alive while indexing)
+ $mws->{cache} = {};
- $xml = $iso2utf->convert($xml);
- use bytes; # as opposed to chars
- print "Path-Name: $mbox $id\n";
- print "Content-Length: ".(length($xml)+1)."\n";
- print "Document-Type: XML\n\n$xml\n";
+ undef $message;
}
- $folder->close;
+ $mws->close_folder($mbox);
+ #$folder->close;
}
$mws->close_index;