--- trunk/mbox2swish.pl 2004/05/06 16:53:40 13 +++ trunk/mbox2swish.pl 2004/05/06 19:46:58 14 @@ -2,13 +2,9 @@ use MWS; use Data::Dumper; -use XML::Simple; -use Text::Iconv; use Date::Parse; use POSIX qw(strftime); -my $iso2utf = Text::Iconv->new('ISO-8859-2','UTF-8'); - my $mws = MWS->new('global.conf'); my $debug = 1; @@ -20,6 +16,8 @@ my $folder = $mws->open_folder($mbox); + print STDERR $folder->size," bytes\n" if ($debug); + foreach my $message ($folder->messages) { my $id = $message->messageId; @@ -31,16 +29,13 @@ foreach my $direction (qw(to from cc bcc)) { foreach my $part (qw(phrase address comment)) { - my @mail_fullnames = $mws->unroll($message,$direction,$part); - $document->{$direction.'_'.$part} = join("##",@mail_fullnames); + $document->{$direction.'_'.$part} = join("##", $mws->unroll($message,$direction,$part)); } } $document->{'subject'} = $message->get('Subject') || 'no subject'; - my $body = $mws->plain_text_body($message); - $body =~ s,[\n\r],
,gs; - $document->{'body'} = $body; + $document->{'body'} = $mws->plain_text_body($message); my $utime = str2time($message->date); @@ -48,25 +43,20 @@ $document->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime)); # print Dumper($document); -# $mws->add_index("$mbox $id" => $document); + $mws->add_index("$mbox $id" => $document); -# my $xml = XMLout($document); - - my $xml = qq{}; - foreach my $tag (keys %$document) { - $xml .= "<$tag>{$tag}."]]>\n"; - } - $xml .= qq{}; + # clear internal MWS cache to keep memory usage down + # (this should be replaced by garbage collector in MWS, + # but without it this is the best solution to keep machine + # alive while indexing) + $mws->{cache} = {}; - $xml = $iso2utf->convert($xml); - use bytes; # as opposed to chars - print "Path-Name: $mbox $id\n"; - print "Content-Length: ".(length($xml)+1)."\n"; - print "Document-Type: XML\n\n$xml\n"; + undef $message; } - $folder->close; + $mws->close_folder($mbox); + #$folder->close; } $mws->close_index;