/[mws]/trunk/MWS_swish.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/MWS_swish.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 27 - (show annotations)
Sat May 8 20:34:26 2004 UTC (19 years, 11 months ago) by dpavlin
File size: 4597 byte(s)
this is 0.9-rc1:
- scripts now accept configuration file as parametar, if none is specified,
  they will use global.conf in current directory
- more css design
- how to install using PAR
- mbox2index can now call swish-e by itself, swish-e configuration
  moved to MWS_swish.pm
- httpd server now shows 30 newest messages in this year when accessed
  through root URL

1 #!/usr/bin/perl -w
2
3 use strict;
4
5 #
6 # simple implementation to use SWISH-e with queryies like
7 # Lucene (subject:something)
8 #
9
10 use SWISH::API;
11 use Text::Iconv;
12 use Data::Dumper;
13 use File::Temp qw/ :mktemp /;
14
15 my $iso2utf = Text::Iconv->new('ISO-8859-2','UTF-8');
16 my $utf2iso = Text::Iconv->new('UTF-8','ISO-8859-2');
17
18 sub open_index {
19 my $self = shift;
20
21 my $swish = $self->{index};
22
23 if (! $swish) {
24
25 my $index_file = $self->{index_dir}."/".$self->{config_name};
26 print STDERR "opening index '$index_file'\n";
27 $swish = SWISH::API->new($index_file);
28 $swish->AbortLastError if $swish->Error;
29
30 $self->{index} = $swish;
31 }
32
33 return $swish;
34 }
35
36 sub search_index {
37 my $self = shift;
38
39 croak "search_index needs query" if (! @_);
40
41 my $index = $self->open_index;
42
43 my $sw;
44
45 my $sort_map = {
46 'date' => 'date_utime',
47 'from' => 'from_phrase',
48 'to' => 'to_phrase',
49 'rank' => 'swishrank',
50 };
51
52 my $sort_by;
53
54 foreach my $s (@_) {
55
56 if ($s =~ /^\s*(\w+):(.+)\s*$/) {
57 my ($f,$v) = ($1,$2);
58 if (lc($f) eq "sort") {
59 my ($sf,$sv) = split(/ /,$v,2);
60 $sort_by = $sort_map->{$sf} || croak "unsupported sort by field $v - fix sort_map";
61 $sort_by .= " $sv";
62 } else {
63 $sw .= "$f=($v)";
64 }
65 } else {
66 # and/or/not operators
67 $sw .= " $s ";
68 }
69 }
70
71 print STDERR "swish search: $sw\n";
72
73 # convert to UTF-8
74 $sw = $iso2utf->convert($sw) || $sw;
75
76 my $search = $index->New_Search_Object;
77 $search->SetSort( $sort_by );
78 my $results = $search->Execute($sw);
79
80 # store total number of hits
81 $self->{'total_hits'} = $results->Hits;
82
83 my @res_ids;
84
85 my $count = 1;
86
87 while ( my $r = $results->NextResult ) {
88
89 sub p($$) {
90 my ($r,$prop) = @_;
91 $prop = $r->Property($prop);
92 $prop =~ s/##lf##/\n/gs;
93 return $utf2iso->convert($prop);
94 }
95
96 my $id = p($r,"swishdocpath");
97 push @res_ids, $id;
98
99 foreach my $p (qw(from to cc bcc)) {
100 @{$self->{cache}->{$id}->{$p}} = ();
101 foreach my $v (split(/##/, p($r,$p.'_phrase'))) {
102 push @{$self->{cache}->{$id}->{$p}}, $v;
103 $self->add_counter($p,$v);
104 }
105 }
106
107 foreach my $p (qw(subject body date date_utime)) {
108 $self->{cache}->{$id}->{$p} = p($r,$p);
109 }
110
111 $self->add_counter_calendar(p($r,'date_utime'));
112
113 # this is redundant, but needed for templates later...
114 $self->{cache}->{$id}->{'id'} = $id;
115
116 last if (++$count > $self->{max_results});
117 }
118
119 return @res_ids;
120 }
121
122 # this function can be null for indexes which doesn't need special
123 # setup before add_index is called. however, swish-e support will
124 # fork swish binary to create index at this point
125 sub create_index {
126 my $self = shift;
127
128 my $index_prog = $0 || die "can't deduce my own name!";
129 my $config_file = $self->{config_file} || die "no self->config_file";
130 my $index_file = $self->{index_dir} || die "no self->index_dir";
131 $index_file .= "/";
132 $index_file .= $self->{config_name} || die "no self->config_name";
133
134 my ($tmp_fh, $swish_config_file) = mkstemp("/tmp/swishXXXXX");
135
136 print STDERR "creating swish-e configuration file $swish_config_file\n";
137
138 my $swish_config = qq{
139 # swish-e config file
140
141 IndexDir $index_prog
142 SwishProgParameters --recursive $config_file
143
144 # input file definition
145 DefaultContents XML2
146
147 # indexed metatags
148 MetaNames xml swishdocpath
149
150 # stored metatags
151 PropertyNames from_phrase from_address
152 PropertyNames to_phrase to_address
153 PropertyNames cc_phrase cc_address
154 PropertyNames subject body
155 #PropertyNamesDate date
156 PropertyNamesNumeric date_utime
157 PropertyNames date
158
159 #XMLClassAttributes type
160 UndefinedMetaTags auto
161 UndefinedXMLAttributes auto
162
163 IndexFile $index_file
164
165 # Croatian ISO-8859-2 characters to unaccented equivalents
166 TranslateCharacters ¹©ðÐèÈæƾ® ssddcccczz
167
168
169 # debug
170 ParserWarnLevel 3
171 IndexReport 1
172
173 };
174
175 print $tmp_fh $swish_config;
176 close($tmp_fh);
177
178 exec "swish-e -S prog -c $swish_config_file" || die "can't fork swish with $swish_config_file";
179 exit 0;
180
181 }
182
183 sub add_index {
184 my $self = shift;
185
186 my $mbox_id = shift || croak "add_index needs mbox_id";
187 my $document = shift || croak "add_index needs document";
188
189 my ($mbox,$id) = split(/\s/,$mbox_id,2);
190
191 my $xml = qq{<message>};
192 foreach my $tag (keys %$document) {
193 my $data = $document->{$tag};
194 next if (! $data || $data eq '');
195 # save [cr/]lf before conversion to XML
196 $data =~ s/\n\r/##lf##/gs;
197 $data =~ s/\n/##lf##/gs;
198 $xml .= "<$tag><![CDATA[".$data."]]></$tag>\n";
199 }
200 $xml .= qq{</message>};
201
202 $xml = $iso2utf->convert($xml);
203 use bytes; # as opposed to chars
204 print "Path-Name: $mbox $id\n";
205 print "Content-Length: ".(length($xml)+1)."\n";
206 print "Document-Type: XML\n\n$xml\n";
207
208 }
209
210 sub close_index {
211 my $self = shift;
212
213 }
214
215 1;

  ViewVC Help
Powered by ViewVC 1.1.26