1 |
#!/usr/bin/perl |
2 |
# -*- Mode: Perl -*- |
3 |
# $Basename: Wais.pm $ |
4 |
# $Revision: 1.3 $ |
5 |
# Author : Ulrich Pfeifer |
6 |
# Created On : Mon Sep 16 11:08:04 1996 |
7 |
# Last Modified By: Ulrich Pfeifer |
8 |
# Last Modified On: Wed Nov 5 17:34:02 1997 |
9 |
# Language : CPerl |
10 |
# Update Count : 156 |
11 |
# Status : Unknown, Use with caution! |
12 |
# |
13 |
# (C) Copyright 1997, Ulrich Pfeifer, all rights reserved. |
14 |
# |
15 |
# |
16 |
|
17 |
package WAIT::Wais; |
18 |
|
19 |
require WAIT::Query::Wais; |
20 |
require WAIT::Database; |
21 |
use Fcntl; |
22 |
use strict; |
23 |
use vars qw(%DB %TB); |
24 |
|
25 |
my %FORMATTER; |
26 |
|
27 |
BEGIN { # check for available formatters |
28 |
%FORMATTER = qw(text WAIT::Format::Base); |
29 |
for my $inc (@INC) { |
30 |
if (-d "$inc/WAIT/Format") { |
31 |
for my $format ( <$inc/WAIT/Format/*.pm>) { |
32 |
my ($name) = ($format =~ /(\w+)\.pm$/); |
33 |
my $module = "WAIT::Format::$name"; |
34 |
$name = lc $name; |
35 |
$FORMATTER{$name} = $module; |
36 |
} |
37 |
} |
38 |
} |
39 |
} |
40 |
|
41 |
|
42 |
sub _database { |
43 |
my $path = shift; |
44 |
my ($dir, $dn, $tn) = ($path =~ m:(.*)/([^/]+)/([^/]+)$:); |
45 |
|
46 |
return $DB{"$dir/$dn"} if exists $DB{"$dir/$dn"}; |
47 |
$DB{"$dir/$dn"} = WAIT::Database->open(name => $dn, directory => $dir, |
48 |
mode => O_RDONLY); |
49 |
return $DB{"$dir/$dn"}; |
50 |
} |
51 |
|
52 |
sub _table { |
53 |
my $path = shift; |
54 |
|
55 |
return $TB{$path} if exists $TB{$path}; |
56 |
my $db = _database($path); |
57 |
my ($dir, $dn, $tn) = ($path =~ m:(.*)/([^/]+)/([^/]+)$:); |
58 |
$TB{$path} = $db->table(name => $tn); |
59 |
$TB{$path}; |
60 |
} |
61 |
|
62 |
sub Search { |
63 |
my (@requests) = @_; |
64 |
my $request; |
65 |
my $result = new WAIT::Wais::Result; |
66 |
for $request (@requests) { |
67 |
my $query = $request->{'query'}; |
68 |
my $database = $request->{'database'}; |
69 |
my $tag = $request->{'tag'} || $request->{'database'}; |
70 |
my ($dir, $dn, $tn) = ($database =~ m:(.*)/([^/]+)/([^/]+)$:); |
71 |
my $tb = _table($database); |
72 |
unless (defined $tb) { |
73 |
$result->add(Tag => $tag, Error => 'Could not open database'); |
74 |
return $result; |
75 |
} |
76 |
my $wquery; |
77 |
eval {$wquery = WAIT::Query::Wais::query($tb, $query)}; |
78 |
if ($@ ne '') { |
79 |
$result->add(Tag => $tag, Error => $@); |
80 |
return $result; |
81 |
} |
82 |
my %po = $wquery->execute(); |
83 |
$result->add(Tag => $tag, Database => $database, |
84 |
Table => $tb, Postings => \%po) |
85 |
} |
86 |
$result; |
87 |
} |
88 |
|
89 |
sub Retrieve { |
90 |
my %parm = @_; |
91 |
my $result = new WAIT::Wais::Result; |
92 |
my $tb = _table($parm{database}); |
93 |
|
94 |
unless (defined $tb) { |
95 |
$result->add(Tag => 'document', Error => 'Could not open database'); |
96 |
return $result; |
97 |
} |
98 |
|
99 |
my $did = ref($parm{docid})?$parm{docid}->did:$parm{docid}; |
100 |
|
101 |
my %rec = $tb->fetch($did); |
102 |
|
103 |
# another CPAN hack |
104 |
if ($rec{docid} =~ m(^data/)) { |
105 |
$rec{docid} = $tb->dir . '/' . $rec{docid}; |
106 |
} |
107 |
|
108 |
my $text = $tb->fetch_extern($rec{docid}); |
109 |
|
110 |
my @txt; |
111 |
$tb->open; |
112 |
if ($parm{query}) { |
113 |
@txt = WAIT::Query::Wais::query($tb,$parm{query})->hilight($text); |
114 |
} else { |
115 |
@txt = $tb->layout->tag($text); |
116 |
} |
117 |
|
118 |
if ($parm{lines}) { |
119 |
@txt = filter($parm{lines}, @txt); |
120 |
} |
121 |
|
122 |
my $type = lc $parm{type}; |
123 |
|
124 |
my $module = (exists $FORMATTER{$type})?$FORMATTER{$type}:$FORMATTER{text}; |
125 |
my $path = $module; |
126 |
$path =~ s(::)(/)g; |
127 |
|
128 |
require "$path.pm"; |
129 |
my $format = new $module; |
130 |
$text = $format->as_string(\@txt, sub {$tb->fetch($did)}); |
131 |
$result->add(Tag => 'document', Text => $text); |
132 |
} |
133 |
|
134 |
sub filter { |
135 |
my $filter = shift; |
136 |
my @result; |
137 |
my @context; |
138 |
my $lines = 0; |
139 |
my $clines = 0; |
140 |
my $elipsis = 0; |
141 |
|
142 |
while (@_) { |
143 |
my %tag = %{shift @_}; |
144 |
my $txt = shift @_; |
145 |
|
146 |
for (split /(\n)/, $txt) { |
147 |
if ($_ eq "\n") { |
148 |
if (exists $tag{_qt}) { |
149 |
#die "Weird!"; |
150 |
push @result, {_i=>1}, "[WEIRD]"; |
151 |
} elsif ($lines) { |
152 |
push @result, {}, $_; |
153 |
$lines--; |
154 |
} else { |
155 |
push @context, {}, $_; |
156 |
$clines++; |
157 |
} |
158 |
} else { |
159 |
if (exists $tag{_qt}) { |
160 |
push @result, {_i=>1}, "\n[ $elipsis linesĀ ]\n" if $elipsis; |
161 |
push @result, @context, {%tag}, $_; |
162 |
delete $tag{_qt}; |
163 |
@context = (); $clines = 0; $elipsis=0; |
164 |
$lines = $filter+1; |
165 |
} elsif ($lines) { |
166 |
push @result, \%tag, $_; |
167 |
} else { |
168 |
push @context, \%tag, $_; |
169 |
} |
170 |
} |
171 |
if ($clines>$filter) { |
172 |
my (%tag, $txt); |
173 |
while ($clines>$filter) { |
174 |
%tag = %{shift @context}; |
175 |
$txt = shift @context; |
176 |
if ($txt =~ /\n/) { |
177 |
$clines--; |
178 |
$elipsis++; |
179 |
} |
180 |
} |
181 |
} |
182 |
} |
183 |
} |
184 |
@result; |
185 |
} |
186 |
|
187 |
package WAIT::Wais::Result; |
188 |
|
189 |
sub new { |
190 |
my $type = shift; |
191 |
my %par = @_; |
192 |
my $self = {'header' => [], 'diagnostics' => [], 'text' => ''}; |
193 |
|
194 |
bless $self, $type; |
195 |
} |
196 |
|
197 |
sub _header { |
198 |
my ($database, $did, $score) = @_; |
199 |
my $types; |
200 |
my $tb = WAIT::Wais::_table($database); |
201 |
my %rec = $tb->fetch($did); |
202 |
my $lines = $rec{'lines'} || 0; |
203 |
my $length = $rec{'size'} || 0; |
204 |
unless ($length) { |
205 |
($length) = ($rec{docid} =~ /(\d+)$/) |
206 |
} |
207 |
unless ($rec{docid} =~ m(^/)) { |
208 |
$rec{docid} = $tb->dir . '/' . $rec{docid}; |
209 |
} |
210 |
my $headline = $rec{headline} || ''; |
211 |
if (exists $rec{types}) { |
212 |
$types = [split ',', $rec{types}] |
213 |
} else { |
214 |
$types = [keys %FORMATTER]; |
215 |
} |
216 |
|
217 |
[$score, $lines, $length, $headline, $types, |
218 |
WAIT::Wais::Docid->new('wait',$database, $did)]; |
219 |
} |
220 |
|
221 |
sub add { |
222 |
my $self = shift; |
223 |
my %parm = @_; |
224 |
my $tag = $parm{Tag}; |
225 |
my $docid; |
226 |
|
227 |
if ($parm{Postings}) { |
228 |
my @result; |
229 |
my @left = @{$self->{'header'}}; |
230 |
my @right; |
231 |
for (keys %{$parm{Postings}}) { |
232 |
push @right, _header($parm{Database}, $_, $parm{Postings}->{$_}) |
233 |
} |
234 |
while (($#left >= $[) or ($#right >= $[)) { |
235 |
if ($#left < $[) { |
236 |
for (@right) { |
237 |
push @result, [$tag, @{$_}]; |
238 |
} |
239 |
last; |
240 |
} |
241 |
if ($#right < $[) { |
242 |
push @result, @left; |
243 |
last; |
244 |
} |
245 |
if ($left[0]->[1] > $right[0]->[0]) { |
246 |
push @result, shift @left; |
247 |
} else { |
248 |
push @result, [$tag, @{shift @right}]; |
249 |
} |
250 |
} |
251 |
$self->{'header'} = \@result; |
252 |
} |
253 |
if ($parm{Errors}) { |
254 |
my %diag = %{$parm{Errors}}; |
255 |
for (keys %diag) { |
256 |
push(@{$self->{'diagnostics'}}, [$tag, $_, $diag{$_}]); |
257 |
} |
258 |
} |
259 |
if ($parm{Text}) { |
260 |
$self->{'text'} .= $parm{Text}; |
261 |
} |
262 |
|
263 |
$self; |
264 |
} |
265 |
|
266 |
|
267 |
sub diagnostics { |
268 |
my $self = shift; |
269 |
|
270 |
@{$self->{'diagnostics'}}; |
271 |
} |
272 |
|
273 |
sub header { |
274 |
my $self = shift; |
275 |
|
276 |
@{$self->{'header'}}; |
277 |
} |
278 |
|
279 |
sub text { |
280 |
my $self = shift; |
281 |
|
282 |
$self->{'text'}; |
283 |
} |
284 |
|
285 |
package WAIT::Wais::Docid; |
286 |
|
287 |
sub new { |
288 |
my $type = shift; |
289 |
my ($server, $database, $dodid) = @_; |
290 |
my $self = join ';', $server, $database, $dodid; |
291 |
bless \$self, $type; |
292 |
} |
293 |
|
294 |
sub did { |
295 |
($_[0]->split)[2]; |
296 |
} |
297 |
|
298 |
sub split { |
299 |
my $self = shift; |
300 |
|
301 |
split /;/, $$self; |
302 |
} |
303 |
|
304 |
1; |