/[mws]/trunk/MWS_swish.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/MWS_swish.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 18 - (hide annotations)
Fri May 7 12:41:16 2004 UTC (20 years ago) by dpavlin
File size: 2454 byte(s)
fixed maximum number of displayed results, search queryies with 8-bit
characters

1 dpavlin 12 #!/usr/bin/perl -w
2    
3     use strict;
4    
5     #
6     # simple implementation to use SWISH-e with queryies like
7     # Lucene (subject:something)
8     #
9    
10     use SWISH::API;
11 dpavlin 14 use Text::Iconv;
12     use Data::Dumper;
13 dpavlin 12
14 dpavlin 14 my $iso2utf = Text::Iconv->new('ISO-8859-2','UTF-8');
15     my $utf2iso = Text::Iconv->new('UTF-8','ISO-8859-2');
16    
17 dpavlin 12 sub open_index {
18     my $self = shift;
19    
20     my $swish = $self->{index};
21    
22     if (! $swish) {
23    
24     my $index_file = $self->{index_file} || croak "open_index needs index filename";
25     $index_file .= "/swish-e";
26     print STDERR "opening index '$index_file'\n";
27     $swish = SWISH::API->new($index_file);
28     $swish->AbortLastError if $swish->Error;
29    
30     $self->{index} = $swish;
31     }
32    
33     return $swish;
34     }
35    
36     sub search_index {
37     my $self = shift;
38    
39     my $s = shift || croak "search_index needs query";
40    
41     my $index = $self->open_index;
42    
43     if ($s =~ /:/) {
44     my ($fld,$val) = split(/:/,$s,2);
45     $s = "$fld=($val)";
46     }
47    
48     print STDERR "swish search: $s\n";
49 dpavlin 18
50     # convert to UTF-8
51     $s = $iso2utf->convert($s) || $s;
52 dpavlin 12 my $results = $index->Query($s);
53    
54 dpavlin 17 # store total number of hits
55     $self->{'total_hits'} = $results->Hits;
56    
57 dpavlin 14 my @res_ids;
58 dpavlin 12
59 dpavlin 18 my $count = 1;
60 dpavlin 17
61 dpavlin 14 while ( my $r = $results->NextResult ) {
62 dpavlin 13
63 dpavlin 14 sub p($$) {
64     my ($r,$prop) = @_;
65     $prop = $r->Property($prop);
66     $prop =~ s/##lf##/\n/gs;
67     return $utf2iso->convert($prop);
68     }
69 dpavlin 13
70 dpavlin 14 my $id = p($r,"swishdocpath");
71     push @res_ids, $id;
72    
73 dpavlin 13 foreach my $p (qw(from to cc bcc)) {
74 dpavlin 14 @{$self->{cache}->{$id}->{$p}} = split(/##/, p($r,$p.'_phrase'));
75 dpavlin 13 }
76    
77     foreach my $p (qw(subject body date)) {
78 dpavlin 14 $self->{cache}->{$id}->{$p} = p($r,$p);
79 dpavlin 13 }
80    
81     # this is redundant, but needed for templates later...
82     $self->{cache}->{$id}->{'id'} = $id;
83 dpavlin 17
84 dpavlin 18 last if (++$count > $self->{max_results});
85 dpavlin 12 }
86    
87 dpavlin 14 return @res_ids;
88 dpavlin 12 }
89    
90     sub add_index {
91     my $self = shift;
92 dpavlin 14
93     my $mbox_id = shift || croak "add_index needs mbox_id";
94     my $document = shift || croak "add_index needs document";
95    
96     my ($mbox,$id) = split(/\s/,$mbox_id,2);
97    
98     my $xml = qq{<message>};
99     foreach my $tag (keys %$document) {
100     my $data = $document->{$tag};
101 dpavlin 16 next if (! $data || $data eq '');
102 dpavlin 14 # save [cr/]lf before conversion to XML
103     $data =~ s/\n\r/##lf##/gs;
104     $data =~ s/\n/##lf##/gs;
105 dpavlin 16 $xml .= "<$tag><![CDATA[".$data."]]></$tag>\n";
106 dpavlin 14 }
107     $xml .= qq{</message>};
108    
109     $xml = $iso2utf->convert($xml);
110     use bytes; # as opposed to chars
111     print "Path-Name: $mbox $id\n";
112     print "Content-Length: ".(length($xml)+1)."\n";
113     print "Document-Type: XML\n\n$xml\n";
114    
115 dpavlin 12 }
116    
117     sub close_index {
118     my $self = shift;
119    
120     }
121    
122     1;

  ViewVC Help
Powered by ViewVC 1.1.26