1 |
dpavlin |
90 |
#!/usr/bin/perl -w |
2 |
ulpfr |
19 |
# -*- Mode: Perl -*- |
3 |
|
|
# $Basename: sman $ |
4 |
|
|
# $Revision: 1.14 $ |
5 |
|
|
# Author : Ulrich Pfeifer |
6 |
|
|
# Created On : Fri Aug 30 15:52:25 1996 |
7 |
|
|
# Last Modified By: Ulrich Pfeifer |
8 |
|
|
# Last Modified On: Mon May 8 11:03:46 2000 |
9 |
|
|
# Language : CPerl |
10 |
|
|
# |
11 |
|
|
# (C) Copyright 1996-2000, Ulrich Pfeifer |
12 |
|
|
# |
13 |
ulpfr |
10 |
|
14 |
|
|
use strict; |
15 |
|
|
|
16 |
|
|
use Term::ReadLine; |
17 |
|
|
use Getopt::Long; |
18 |
laperla |
65 |
use Fcntl qw(O_RDONLY); |
19 |
ulpfr |
10 |
use Config; |
20 |
|
|
|
21 |
dpavlin |
90 |
use lib '/data/wait/lib'; |
22 |
dpavlin |
86 |
|
23 |
ulpfr |
10 |
require WAIT::Config; |
24 |
|
|
require WAIT::Database; |
25 |
|
|
require WAIT::Query::Base; |
26 |
|
|
require WAIT::Query::Wais; |
27 |
|
|
|
28 |
|
|
|
29 |
|
|
$SIG{PIPE} = 'IGNORE'; |
30 |
|
|
my %OPT = (database => 'DB', |
31 |
|
|
dir => $WAIT::Config->{WAIT_home} || '/tmp', |
32 |
|
|
table => 'man', |
33 |
|
|
pager => $WAIT::Config->{'pager'} || 'more', |
34 |
|
|
filter => 0, |
35 |
|
|
max => 15, |
36 |
|
|
); |
37 |
|
|
|
38 |
|
|
GetOptions(\%OPT, |
39 |
|
|
'database=s', |
40 |
|
|
'dir=s', |
41 |
|
|
'table=s', |
42 |
|
|
'filter=i', |
43 |
|
|
'max=i', |
44 |
laperla |
65 |
'pager:s') || die " |
45 |
|
|
Usage: $0 |
46 |
|
|
[--database database] |
47 |
|
|
[--dir dir ] |
48 |
|
|
[--table table ] |
49 |
|
|
[--filter integer ] |
50 |
|
|
[--max integer ] |
51 |
|
|
[--pager pager ] |
52 |
|
|
"; |
53 |
ulpfr |
10 |
|
54 |
|
|
my $db = WAIT::Database->open(name => $OPT{database}, |
55 |
|
|
mode => O_RDONLY, |
56 |
|
|
directory => $OPT{dir}) |
57 |
|
|
or die "Could not open database $OPT{database}: $@"; |
58 |
|
|
|
59 |
|
|
my $tb = $db->table(name => $OPT{table}) |
60 |
|
|
or die "Could not open table $OPT{table}: $@"; |
61 |
|
|
|
62 |
|
|
# not used: my $layout = $tb->layout; # a WAIT::Parse::Nroff object |
63 |
|
|
|
64 |
|
|
my $term = new Term::ReadLine 'Simple Query Interface'; |
65 |
|
|
|
66 |
|
|
require WAIT::Format::Term; |
67 |
|
|
my $format; |
68 |
|
|
if ($Config::Config{'archname'} eq 'i586-linux') { |
69 |
|
|
# for color xterm |
70 |
|
|
$format = new WAIT::Format::Term query_s => "[01;31m", query_e => "[00m"; |
71 |
|
|
} else { |
72 |
|
|
$format = new WAIT::Format::Term; |
73 |
|
|
} |
74 |
|
|
|
75 |
dpavlin |
98 |
my $pager = ($OPT{pager}) ? \&less : \&pager; |
76 |
ulpfr |
10 |
my $OUT = $term->OUT; |
77 |
|
|
|
78 |
|
|
my $st = 1; |
79 |
|
|
print $OUT "Enter 'h' for help.\n"; |
80 |
|
|
|
81 |
|
|
# sman is basically offering three services: find the hits and show |
82 |
|
|
# them (a query), show metadata for a hit (a view), show a hot (display) |
83 |
|
|
|
84 |
|
|
my($query, @did); |
85 |
|
|
|
86 |
|
|
while (defined ($_ = &myreadline("$st> "))) { |
87 |
|
|
chomp; $st++; |
88 |
|
|
|
89 |
|
|
my(%hits, $query_text); |
90 |
|
|
if (/^$/) { |
91 |
|
|
next; |
92 |
|
|
} elsif (/^m (\d+)$/) { |
93 |
|
|
$OPT{max} = $1; |
94 |
|
|
} elsif (/^f\s*(\d+)?$/) { |
95 |
|
|
$OPT{filter} = $1; |
96 |
|
|
next; |
97 |
|
|
} elsif (/^t$/i) { |
98 |
|
|
if ($pager eq \&less) { |
99 |
|
|
$pager = \&pager; |
100 |
|
|
} else { |
101 |
|
|
$pager = \&less; |
102 |
|
|
} |
103 |
|
|
next; |
104 |
|
|
} elsif (/^(\d+)$/) { |
105 |
|
|
if (defined $did[$1]) { |
106 |
|
|
display($did[$1]); # <----------- display (full doc) |
107 |
|
|
next; |
108 |
|
|
} |
109 |
|
|
} elsif (/^d\s*(\d+)/) { |
110 |
|
|
if (defined $did[$1]) { |
111 |
|
|
view($did[$1]); # <----------- view (metadata from WAIT) |
112 |
|
|
next; |
113 |
|
|
} |
114 |
|
|
} elsif (/^q$/i) { |
115 |
|
|
last; |
116 |
|
|
} elsif (/^l$/i) { |
117 |
|
|
# fall through |
118 |
|
|
} elsif (/^[h?]$/i) { |
119 |
|
|
help(); |
120 |
|
|
next; |
121 |
|
|
} elsif (/^hh$/i) { |
122 |
|
|
extended_help(); |
123 |
|
|
next; |
124 |
|
|
} else { # <----------- A query (Display a list) |
125 |
|
|
$query_text = $_; |
126 |
|
|
eval {$query = WAIT::Query::Wais::query($tb, $_)}; |
127 |
|
|
if ($@ ne '') { |
128 |
|
|
print $OUT "$_ => $query\n\$\@='$@'\n"; |
129 |
|
|
} elsif (ref($query)) { |
130 |
ulpfr |
19 |
%hits = $query->execute(top => $OPT{max}, picky => 1); |
131 |
ulpfr |
10 |
# the hash %hits has as keys document numbers and as values |
132 |
|
|
# quality figures. The doc numbers are not what we have as docid |
133 |
|
|
# to find the item in the access class, they are WAIT's private |
134 |
|
|
# numbers. |
135 |
|
|
} else { |
136 |
|
|
next; |
137 |
|
|
} |
138 |
|
|
} |
139 |
|
|
|
140 |
|
|
next unless %hits; |
141 |
|
|
my $no = 1; # numbering the hits for the result table that is |
142 |
|
|
# presented to the user |
143 |
|
|
|
144 |
|
|
@did = (); # store the internal numbers (keys of %hits). The user |
145 |
|
|
# will use $no in sman's interface to select a hit. |
146 |
|
|
|
147 |
|
|
# the following loop uses the values of %hits to sort the results |
148 |
|
|
# according to the quality and cut after a number of rows. After |
149 |
|
|
# that %hits isn't needed anymore. |
150 |
|
|
print "Query: $query_text\n"; |
151 |
|
|
for my $did (sort {$hits{$b} <=> $hits{$a}} keys %hits) { |
152 |
|
|
|
153 |
|
|
my %tattr = $tb->fetch($did); |
154 |
|
|
# the hash %tattr contains several attributes of the item we are |
155 |
|
|
# referring to, namely the attributes that we named in the "attr" |
156 |
|
|
# argument of the create_table statement in smakewhatis |
157 |
|
|
|
158 |
|
|
printf $OUT "%2d %6.3f %s\n", $no, $hits{$did}, |
159 |
|
|
substr($tattr{headline} ||'',0,68); |
160 |
|
|
$did[$no] = $did; |
161 |
|
|
last if $no++ >= $OPT{max}; |
162 |
|
|
|
163 |
|
|
} |
164 |
|
|
|
165 |
|
|
} continue { |
166 |
ulpfr |
19 |
# we don't do this since Andreas Koenig does not think of it as feature |
167 |
ulpfr |
10 |
# $term->SetHistory(grep length($_)>4, $term->GetHistory) |
168 |
|
|
} |
169 |
ulpfr |
19 |
warn "Thank you for using sman\n"; |
170 |
ulpfr |
10 |
|
171 |
ulpfr |
19 |
$tb->close; |
172 |
|
|
$db->close; |
173 |
|
|
|
174 |
ulpfr |
10 |
sub myreadline { |
175 |
|
|
if (@ARGV) { |
176 |
|
|
return shift @ARGV; |
177 |
|
|
} else { |
178 |
|
|
$term->readline(@_); |
179 |
|
|
} |
180 |
|
|
} |
181 |
|
|
sub help { |
182 |
|
|
my $idb = "\n\t'". join(q[', '], $tb->fields()) . "'"; |
183 |
|
|
print $OUT qq[Available commands: |
184 |
|
|
|
185 |
|
|
<num> Show the document <num> |
186 |
|
|
d <num> Show the db entry of document <num> |
187 |
|
|
f <num> Display only <num> lines context |
188 |
|
|
h,? Display this help message |
189 |
|
|
hh Display query examples |
190 |
|
|
m <num> Set maxhits to <num> |
191 |
|
|
t Toggle display mode (term/less) |
192 |
|
|
q Exit from $0 |
193 |
|
|
l redisplay last ranking |
194 |
|
|
Other input is tried as wais query. |
195 |
dpavlin |
91 |
The following fields for table '$OPT{table}' are known: $idb |
196 |
ulpfr |
10 |
] ; |
197 |
|
|
} |
198 |
|
|
|
199 |
|
|
sub extended_help { |
200 |
|
|
print q{ |
201 |
|
|
Here are some query examples: |
202 |
|
|
|
203 |
|
|
information retrieval free text query |
204 |
|
|
information or retrieval same as above |
205 |
|
|
des=information retrieval `information' must be in the description |
206 |
|
|
des=(information retrieval) one of them in description |
207 |
|
|
des=(information or retrieval) same as above |
208 |
|
|
des=(information and retrieval) both of them in description |
209 |
|
|
des=(information not retrieval) `information' in description and |
210 |
|
|
`retrieval' not in description |
211 |
|
|
des=(information system*) wild-card search |
212 |
|
|
au=ilia author names may be misspelled |
213 |
|
|
|
214 |
|
|
You can build arbitary boolean combination of the above examples. |
215 |
|
|
Field names may be abbreviated. |
216 |
|
|
} |
217 |
|
|
} |
218 |
|
|
|
219 |
|
|
sub view { |
220 |
|
|
my $did = shift; |
221 |
|
|
my %tattr = $tb->fetch($did); |
222 |
|
|
for (keys %tattr) { |
223 |
|
|
print $OUT "$_ $tattr{$_}\n"; |
224 |
|
|
} |
225 |
|
|
} |
226 |
|
|
|
227 |
|
|
sub display { |
228 |
|
|
my $did = shift; |
229 |
|
|
|
230 |
|
|
return unless defined $query and defined $did; |
231 |
|
|
|
232 |
|
|
print $OUT "Wais display document $did\n"; |
233 |
|
|
my %tattr = $tb->fetch($did); |
234 |
|
|
my $tdid = $tattr{docid}; |
235 |
|
|
# WHAT DOES HE DO HERE? ULI??? |
236 |
|
|
# Re: some indexing scripts did use pathnames relative to the table directory |
237 |
|
|
# especially the cpanwait script does this. uli |
238 |
dpavlin |
86 |
if ($tdid !~ m(^/)) { |
239 |
|
|
$tdid = $tb->dir . '/' . $tdid; |
240 |
|
|
} |
241 |
dpavlin |
96 |
# if original version doesn't exist maybe someone created gziped one? |
242 |
|
|
# (this is used if you decided to compress data files after indexing, |
243 |
|
|
# next indexing will pick names with gz extension anyway) |
244 |
|
|
if (! -e $tdid && -e $tdid.".gz") { |
245 |
|
|
$tdid .= ".gz"; |
246 |
|
|
} |
247 |
ulpfr |
10 |
|
248 |
|
|
# The main task of all that follows from here is highlighting. WAIT |
249 |
|
|
# is designed to make it possible to show the user why a certain |
250 |
|
|
# document was chosen by the indexer. |
251 |
|
|
|
252 |
|
|
my $buf = $tb->fetch_extern($tdid); |
253 |
|
|
# This $buf can be an object that can have enough information to do |
254 |
|
|
# highlighting without WAIT's help. If you prefer to implement your |
255 |
|
|
# own highlighting, you can do so now with e.g. print |
256 |
|
|
# $buf->highlight(query => $query) |
257 |
|
|
|
258 |
|
|
# All you need to know to implement highlighting is how a |
259 |
|
|
# WAIT::Query::Base object looks like (left as an exercise for the |
260 |
|
|
# reader). |
261 |
|
|
|
262 |
|
|
# The impatient reader may want to implement something without |
263 |
|
|
# highlighting, in which case he does not need any info about the |
264 |
|
|
# query object and can rightaway run e.g. |
265 |
|
|
# print $buf->as_string |
266 |
|
|
|
267 |
|
|
# Thus the impatient reader does not necessarily need the following |
268 |
|
|
# heavy wizardry. Just to give you an idea what's going on: every |
269 |
|
|
# word in the text must be compared to every word in the query if it |
270 |
|
|
# is worth highlighting, and which part of the word is worth |
271 |
|
|
# highlighting. This must be done differently for every field in the |
272 |
|
|
# table and for every index defined for that field. Try to run a |
273 |
|
|
# query with 100 words and you'll be amazed to see it really works. |
274 |
|
|
# Or maybe it doesn't. You should be aware that the hilighting code |
275 |
|
|
# is to be regarded as alpha. It is certainly the least tested part |
276 |
|
|
# of WAIT so far. |
277 |
|
|
|
278 |
|
|
if ($buf) { |
279 |
|
|
my @txt = $query->hilight($buf); |
280 |
|
|
# In this operation the following things melt into one piece: |
281 |
|
|
# $query: The query entered by the user (Class isa WAIT::Query::Base) |
282 |
|
|
# $tb: The table we queried (Class WAIT::Table) |
283 |
|
|
# $buf: The document to display (User defined class or string) |
284 |
|
|
# The steps taken are: |
285 |
|
|
# 1.) $query calls "hilight" on $tb and passes |
286 |
|
|
# filtered and raw search terms ($query->{Plain} and $query->{Raw}). |
287 |
|
|
# 2.) $tb asks the layout object to tag the object which results |
288 |
|
|
# in an array with alternating elements of tags (anon HASHes) and |
289 |
|
|
# strings. |
290 |
|
|
# 3.) $tb adds some markup on its own: {qt=>1} or some such |
291 |
|
|
|
292 |
|
|
# The result of that process can optionally be sent through a |
293 |
|
|
# filter, just to impress your friends with yet more heavy |
294 |
|
|
# wizardry |
295 |
|
|
if ($OPT{filter}) { |
296 |
|
|
@txt = &filter(@txt); |
297 |
|
|
} |
298 |
|
|
|
299 |
|
|
# And then a formatter (in our case a terminal formatter) turns |
300 |
|
|
# all the markup into escape sequences and strings that can in |
301 |
|
|
# turn be sent through a pager for instance |
302 |
|
|
&$pager($format->as_string(\@txt)); |
303 |
|
|
} |
304 |
|
|
|
305 |
|
|
# Hey, that's it. The user out there is deeply impressed now. You |
306 |
|
|
# can lean back again:-) He got a document that has some words |
307 |
|
|
# hilighted and will probably read and enjoy it. Maybe he'll send |
308 |
|
|
# you an email. |
309 |
|
|
} |
310 |
|
|
|
311 |
|
|
sub filter { |
312 |
|
|
my @result; |
313 |
|
|
my @context; |
314 |
|
|
my $lines = 0; |
315 |
|
|
my $clines = 0; |
316 |
|
|
my $elipsis = 0; |
317 |
|
|
|
318 |
|
|
print STDERR "Filter ..."; |
319 |
|
|
while (@_) { |
320 |
|
|
my %tag = %{shift @_}; |
321 |
|
|
my $txt = shift @_; |
322 |
|
|
|
323 |
|
|
for (split /(\n)/, $txt) { |
324 |
|
|
if ($_ eq "\n") { |
325 |
|
|
if (exists $tag{_qt}) { |
326 |
|
|
#die "Weird!"; |
327 |
|
|
push @result, {_i=>1}, "[WEIRD]"; |
328 |
|
|
} elsif ($lines) { |
329 |
|
|
push @result, {}, $_; |
330 |
|
|
$lines--; |
331 |
|
|
} else { |
332 |
|
|
push @context, {}, $_; |
333 |
|
|
$clines++; |
334 |
|
|
} |
335 |
|
|
} else { |
336 |
|
|
if (exists $tag{_qt}) { |
337 |
|
|
push @result, {_i=>1}, "\n[ $elipsis linesĀ ]\n" if $elipsis; |
338 |
|
|
push @result, @context, {%tag}, $_; |
339 |
|
|
delete $tag{_qt}; |
340 |
|
|
@context = (); $clines = 0; $elipsis=0; |
341 |
|
|
$lines = $OPT{filter}+1; |
342 |
|
|
} elsif ($lines) { |
343 |
|
|
push @result, \%tag, $_; |
344 |
|
|
} else { |
345 |
|
|
push @context, \%tag, $_; |
346 |
|
|
} |
347 |
|
|
} |
348 |
|
|
if ($clines>$OPT{filter}) { |
349 |
|
|
my (%tag, $txt); |
350 |
|
|
while ($clines>$OPT{filter}) { |
351 |
|
|
%tag = %{shift @context}; |
352 |
|
|
$txt = shift @context; |
353 |
|
|
if ($txt =~ /\n/) { |
354 |
|
|
$clines--; |
355 |
|
|
$elipsis++; |
356 |
|
|
} |
357 |
|
|
} |
358 |
|
|
} |
359 |
|
|
} |
360 |
|
|
} |
361 |
|
|
print STDERR " done\n"; |
362 |
|
|
@result; |
363 |
|
|
} |
364 |
|
|
|
365 |
|
|
sub less { |
366 |
dpavlin |
96 |
my $flags = ''; |
367 |
dpavlin |
98 |
my $pager = $OPT{pager}; |
368 |
|
|
if ($pager =~ /less/) { |
369 |
ulpfr |
10 |
$flags = '-r'; |
370 |
dpavlin |
98 |
} elsif ($pager =~ /more/) { |
371 |
ulpfr |
10 |
$flags = '-c'; |
372 |
dpavlin |
98 |
} elsif ($pager =~ /lynx/) { |
373 |
|
|
$flags = '-stdin'; |
374 |
ulpfr |
10 |
} |
375 |
dpavlin |
98 |
open(PAGER, "| $pager $flags") or die; |
376 |
ulpfr |
10 |
print PAGER @_; |
377 |
|
|
close PAGER; |
378 |
|
|
} |
379 |
|
|
|
380 |
|
|
sub pager { |
381 |
|
|
my @lines = split /\n/, $_[0]; |
382 |
|
|
my $line = 0; |
383 |
|
|
for (@lines) { |
384 |
|
|
print "$_\n"; $line++; |
385 |
|
|
if ($line % 24 == 0) { |
386 |
|
|
my $key = $term->readline("[return]"); |
387 |
|
|
return if $key =~ /^q/i; |
388 |
|
|
} |
389 |
|
|
} |
390 |
|
|
} |
391 |
|
|
|
392 |
|
|
|
393 |
|
|
__END__ |
394 |
|
|
## ################################################################### |
395 |
|
|
## pod |
396 |
|
|
## ################################################################### |
397 |
|
|
|
398 |
|
|
=head1 NAME |
399 |
|
|
|
400 |
|
|
sman - Search and disply manuals interactive |
401 |
|
|
|
402 |
|
|
=head1 SYNOPSIS |
403 |
|
|
|
404 |
|
|
B<sman> |
405 |
|
|
[B<-database> I<database name>] |
406 |
|
|
[B<-dir> I<database directory>] |
407 |
|
|
[B<-table> I<name>] |
408 |
|
|
[B<-less>] |
409 |
|
|
[B<-filter> I<num>] |
410 |
|
|
[B<-max> I<num>] |
411 |
|
|
|
412 |
|
|
=head1 DESCRIPTION |
413 |
|
|
|
414 |
|
|
B<Sman> is an interactive search interface to your systems manual pages. |
415 |
|
|
|
416 |
|
|
=head2 OPTIONS |
417 |
|
|
|
418 |
|
|
=over 10 |
419 |
|
|
|
420 |
|
|
=item B<-database> I<database name> |
421 |
|
|
|
422 |
|
|
Change the default database name to I<database name>. |
423 |
|
|
|
424 |
|
|
=item B<-dir> I<database directory> |
425 |
|
|
|
426 |
|
|
Change the default database directory to I<database directory>. |
427 |
|
|
|
428 |
|
|
=item B<-table> I<name> |
429 |
|
|
|
430 |
|
|
Use I<name> instead of C<man> as table name. |
431 |
|
|
|
432 |
|
|
=item B<-pager> I<name> |
433 |
|
|
|
434 |
|
|
Use I<name> instead of the default pager. If no I<name> is supplied a |
435 |
|
|
buildin pager is used. |
436 |
|
|
|
437 |
|
|
=item B<-filter> I<num> |
438 |
|
|
|
439 |
|
|
Display only I<num> lines above and below an occurance of a search |
440 |
|
|
term in the manual. |
441 |
|
|
|
442 |
|
|
=item B<-max> I<num> |
443 |
|
|
|
444 |
|
|
Display only I<num> hits. Default is to 10. |
445 |
|
|
|
446 |
|
|
=head1 SEE ALSO |
447 |
|
|
|
448 |
|
|
L<smakewhatis>. |
449 |
|
|
|
450 |
|
|
=head1 AUTHOR |
451 |
|
|
|
452 |
|
|
Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt> |