/[mws]/trunk/mbox2index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/mbox2index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 13 - (hide annotations)
Thu May 6 16:53:40 2004 UTC (20 years ago) by dpavlin
Original Path: trunk/mbox2swish.pl
File MIME type: text/plain
File size: 1668 byte(s)
partial implementation for dates, more verbose debugging, index should now
return all fields writen in it (this will break Plucene code, so it's
non-function from now on)

1 dpavlin 12 #!/usr/bin/perl -w
2    
3     use MWS;
4     use Data::Dumper;
5     use XML::Simple;
6     use Text::Iconv;
7 dpavlin 13 use Date::Parse;
8     use POSIX qw(strftime);
9 dpavlin 12
10     my $iso2utf = Text::Iconv->new('ISO-8859-2','UTF-8');
11    
12     my $mws = MWS->new('global.conf');
13    
14     my $debug = 1;
15    
16     foreach my $mbox ($mws->{config}->Parameters('folders')) {
17     my $mbox_path = $mws->{config}->val('folders', $mbox);
18    
19     print STDERR "working on $mbox [$mbox_path]\n" if ($debug);
20    
21     my $folder = $mws->open_folder($mbox);
22    
23     foreach my $message ($folder->messages) {
24    
25     my $id = $message->messageId;
26    
27     my $document = {
28     id => $id,
29     folder => $mbox,
30     };
31    
32     foreach my $direction (qw(to from cc bcc)) {
33 dpavlin 13 foreach my $part (qw(phrase address comment)) {
34     my @mail_fullnames = $mws->unroll($message,$direction,$part);
35     $document->{$direction.'_'.$part} = join("##",@mail_fullnames);
36 dpavlin 12 }
37     }
38    
39     $document->{'subject'} = $message->get('Subject') || 'no subject';
40 dpavlin 13 my $body = $mws->plain_text_body($message);
41     $body =~ s,[\n\r],<br/>,gs;
42 dpavlin 12
43 dpavlin 13 $document->{'body'} = $body;
44    
45     my $utime = str2time($message->date);
46    
47     $document->{'date_utime'} = $utime;
48     $document->{'date'} = strftime("%Y-%m-%d %H:%M:%S", localtime($utime));
49    
50 dpavlin 12 # print Dumper($document);
51     # $mws->add_index("$mbox $id" => $document);
52    
53     # my $xml = XMLout($document);
54    
55     my $xml = qq{<message>};
56     foreach my $tag (keys %$document) {
57     $xml .= "<$tag><![CDATA[".$document->{$tag}."]]></$tag>\n";
58     }
59     $xml .= qq{</message>};
60    
61     $xml = $iso2utf->convert($xml);
62     use bytes; # as opposed to chars
63     print "Path-Name: $mbox $id\n";
64     print "Content-Length: ".(length($xml)+1)."\n";
65     print "Document-Type: XML\n\n$xml\n";
66    
67     }
68    
69     $folder->close;
70     }
71    
72     $mws->close_index;

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26