/[wait]/trunk/script/sman
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/script/sman

Parent Directory Parent Directory | Revision Log Revision Log


Revision 8 - (show annotations)
Sun May 23 21:00:21 2004 UTC (20 years, 1 month ago) by dpavlin
File size: 12146 byte(s)
major milestone: indexing and searching of CPAN index generated by cpanwait
is now working again.

1 #!/usr/bin/perl -w
2 # -*- Mode: Perl -*-
3 # $Basename: sman $
4 # $Revision: 1.14 $
5 # Author : Ulrich Pfeifer
6 # Created On : Fri Aug 30 15:52:25 1996
7 # Last Modified By: Ulrich Pfeifer
8 # Last Modified On: Mon May 8 11:03:46 2000
9 # Language : CPerl
10 #
11 # (C) Copyright 1996-2000, Ulrich Pfeifer
12 #
13
14 use strict;
15
16 use lib '/data/wait/lib';
17
18 use Term::ReadLine;
19 use Getopt::Long;
20 use Fcntl;
21 use Config;
22
23 require WAIT::Config;
24 require WAIT::Database;
25 require WAIT::Query::Base;
26 require WAIT::Query::Wais;
27
28
29 $SIG{PIPE} = 'IGNORE';
30 my %OPT = (database => 'DB',
31 dir => $WAIT::Config->{WAIT_home} || '/tmp',
32 table => 'man',
33 pager => $WAIT::Config->{'pager'} || 'more',
34 filter => 0,
35 max => 15,
36 );
37
38 GetOptions(\%OPT,
39 'database=s',
40 'dir=s',
41 'table=s',
42 'filter=i',
43 'max=i',
44 'pager:s') || die "Usage: ...\n";
45
46 my $db = WAIT::Database->open(name => $OPT{database},
47 mode => O_RDONLY,
48 directory => $OPT{dir})
49 or die "Could not open database $OPT{database}: $@";
50
51 my $tb = $db->table(name => $OPT{table})
52 or die "Could not open table $OPT{table}: $@";
53
54 # not used: my $layout = $tb->layout; # a WAIT::Parse::Nroff object
55
56 my $term = new Term::ReadLine 'Simple Query Interface';
57
58 require WAIT::Format::Term;
59 my $format;
60 if ($Config::Config{'archname'} eq 'i586-linux') {
61 # for color xterm
62 $format = new WAIT::Format::Term query_s => "", query_e => "";
63 } else {
64 $format = new WAIT::Format::Term;
65 }
66
67 my $pager = ($OPT{pager}) ? \&pager : \&less;
68 my $OUT = $term->OUT;
69
70 my $st = 1;
71 print $OUT "Enter 'h' for help.\n";
72
73 # sman is basically offering three services: find the hits and show
74 # them (a query), show metadata for a hit (a view), show a hot (display)
75
76 my($query, @did);
77
78 while (defined ($_ = &myreadline("$st> "))) {
79 chomp; $st++;
80
81 my(%hits, $query_text);
82 if (/^$/) {
83 next;
84 } elsif (/^m (\d+)$/) {
85 $OPT{max} = $1;
86 } elsif (/^f\s*(\d+)?$/) {
87 $OPT{filter} = $1;
88 next;
89 } elsif (/^t$/i) {
90 if ($pager eq \&less) {
91 $pager = \&pager;
92 } else {
93 $pager = \&less;
94 }
95 next;
96 } elsif (/^(\d+)$/) {
97 if (defined $did[$1]) {
98 display($did[$1]); # <----------- display (full doc)
99 next;
100 }
101 } elsif (/^d\s*(\d+)/) {
102 if (defined $did[$1]) {
103 view($did[$1]); # <----------- view (metadata from WAIT)
104 next;
105 }
106 } elsif (/^q$/i) {
107 last;
108 } elsif (/^l$/i) {
109 # fall through
110 } elsif (/^[h?]$/i) {
111 help();
112 next;
113 } elsif (/^hh$/i) {
114 extended_help();
115 next;
116 } else { # <----------- A query (Display a list)
117 $query_text = $_;
118 eval {$query = WAIT::Query::Wais::query($tb, $_)};
119 if ($@ ne '') {
120 print $OUT "$_ => $query\n\$\@='$@'\n";
121 } elsif (ref($query)) {
122 %hits = $query->execute(top => $OPT{max}, picky => 1);
123 # the hash %hits has as keys document numbers and as values
124 # quality figures. The doc numbers are not what we have as docid
125 # to find the item in the access class, they are WAIT's private
126 # numbers.
127 } else {
128 next;
129 }
130 }
131
132 next unless %hits;
133 my $no = 1; # numbering the hits for the result table that is
134 # presented to the user
135
136 @did = (); # store the internal numbers (keys of %hits). The user
137 # will use $no in sman's interface to select a hit.
138
139 # the following loop uses the values of %hits to sort the results
140 # according to the quality and cut after a number of rows. After
141 # that %hits isn't needed anymore.
142 print "Query: $query_text\n";
143 for my $did (sort {$hits{$b} <=> $hits{$a}} keys %hits) {
144
145 my %tattr = $tb->fetch($did);
146 # the hash %tattr contains several attributes of the item we are
147 # referring to, namely the attributes that we named in the "attr"
148 # argument of the create_table statement in smakewhatis
149
150 printf $OUT "%2d %6.3f %s\n", $no, $hits{$did},
151 substr($tattr{headline} ||'',0,68);
152 $did[$no] = $did;
153 last if $no++ >= $OPT{max};
154
155 }
156
157 } continue {
158 # we don't do this since Andreas Koenig does not think of it as feature
159 # $term->SetHistory(grep length($_)>4, $term->GetHistory)
160 }
161 warn "Thank you for using sman\n";
162
163 $tb->close;
164 $db->close;
165
166 sub myreadline {
167 if (@ARGV) {
168 return shift @ARGV;
169 } else {
170 $term->readline(@_);
171 }
172 }
173 sub help {
174 my $idb = "\n\t'". join(q[', '], $tb->fields()) . "'";
175 print $OUT qq[Available commands:
176
177 <num> Show the document <num>
178 d <num> Show the db entry of document <num>
179 f <num> Display only <num> lines context
180 h,? Display this help message
181 hh Display query examples
182 m <num> Set maxhits to <num>
183 t Toggle display mode (term/less)
184 q Exit from $0
185 l redisplay last ranking
186 Other input is tried as wais query.
187 The following fields are known: $idb
188 ] ;
189 }
190
191 sub extended_help {
192 print q{
193 Here are some query examples:
194
195 information retrieval free text query
196 information or retrieval same as above
197 des=information retrieval `information' must be in the description
198 des=(information retrieval) one of them in description
199 des=(information or retrieval) same as above
200 des=(information and retrieval) both of them in description
201 des=(information not retrieval) `information' in description and
202 `retrieval' not in description
203 des=(information system*) wild-card search
204 au=ilia author names may be misspelled
205
206 You can build arbitary boolean combination of the above examples.
207 Field names may be abbreviated.
208 }
209 }
210
211 sub view {
212 my $did = shift;
213 my %tattr = $tb->fetch($did);
214 for (keys %tattr) {
215 print $OUT "$_ $tattr{$_}\n";
216 }
217 }
218
219 sub display {
220 my $did = shift;
221
222 return unless defined $query and defined $did;
223
224 print $OUT "Wais display document $did\n";
225 my %tattr = $tb->fetch($did);
226 my $tdid = $tattr{docid};
227 # WHAT DOES HE DO HERE? ULI???
228 # Re: some indexing scripts did use pathnames relative to the table directory
229 # especially the cpanwait script does this. uli
230 if ($tdid !~ m(^/)) {
231 $tdid = $tb->dir . '/' . $tdid;
232 }
233
234 # The main task of all that follows from here is highlighting. WAIT
235 # is designed to make it possible to show the user why a certain
236 # document was chosen by the indexer.
237
238 my $buf = $tb->fetch_extern($tdid);
239 # This $buf can be an object that can have enough information to do
240 # highlighting without WAIT's help. If you prefer to implement your
241 # own highlighting, you can do so now with e.g. print
242 # $buf->highlight(query => $query)
243
244 # All you need to know to implement highlighting is how a
245 # WAIT::Query::Base object looks like (left as an exercise for the
246 # reader).
247
248 # The impatient reader may want to implement something without
249 # highlighting, in which case he does not need any info about the
250 # query object and can rightaway run e.g.
251 # print $buf->as_string
252
253 # Thus the impatient reader does not necessarily need the following
254 # heavy wizardry. Just to give you an idea what's going on: every
255 # word in the text must be compared to every word in the query if it
256 # is worth highlighting, and which part of the word is worth
257 # highlighting. This must be done differently for every field in the
258 # table and for every index defined for that field. Try to run a
259 # query with 100 words and you'll be amazed to see it really works.
260 # Or maybe it doesn't. You should be aware that the hilighting code
261 # is to be regarded as alpha. It is certainly the least tested part
262 # of WAIT so far.
263
264 if ($buf) {
265 my @txt = $query->hilight($buf);
266 # In this operation the following things melt into one piece:
267 # $query: The query entered by the user (Class isa WAIT::Query::Base)
268 # $tb: The table we queried (Class WAIT::Table)
269 # $buf: The document to display (User defined class or string)
270 # The steps taken are:
271 # 1.) $query calls "hilight" on $tb and passes
272 # filtered and raw search terms ($query->{Plain} and $query->{Raw}).
273 # 2.) $tb asks the layout object to tag the object which results
274 # in an array with alternating elements of tags (anon HASHes) and
275 # strings.
276 # 3.) $tb adds some markup on its own: {qt=>1} or some such
277
278 # The result of that process can optionally be sent through a
279 # filter, just to impress your friends with yet more heavy
280 # wizardry
281 if ($OPT{filter}) {
282 @txt = &filter(@txt);
283 }
284
285 # And then a formatter (in our case a terminal formatter) turns
286 # all the markup into escape sequences and strings that can in
287 # turn be sent through a pager for instance
288 &$pager($format->as_string(\@txt));
289 }
290
291 # Hey, that's it. The user out there is deeply impressed now. You
292 # can lean back again:-) He got a document that has some words
293 # hilighted and will probably read and enjoy it. Maybe he'll send
294 # you an email.
295 }
296
297 sub filter {
298 my @result;
299 my @context;
300 my $lines = 0;
301 my $clines = 0;
302 my $elipsis = 0;
303
304 print STDERR "Filter ...";
305 while (@_) {
306 my %tag = %{shift @_};
307 my $txt = shift @_;
308
309 for (split /(\n)/, $txt) {
310 if ($_ eq "\n") {
311 if (exists $tag{_qt}) {
312 #die "Weird!";
313 push @result, {_i=>1}, "[WEIRD]";
314 } elsif ($lines) {
315 push @result, {}, $_;
316 $lines--;
317 } else {
318 push @context, {}, $_;
319 $clines++;
320 }
321 } else {
322 if (exists $tag{_qt}) {
323 push @result, {_i=>1}, "\n[ $elipsis linesĀ ]\n" if $elipsis;
324 push @result, @context, {%tag}, $_;
325 delete $tag{_qt};
326 @context = (); $clines = 0; $elipsis=0;
327 $lines = $OPT{filter}+1;
328 } elsif ($lines) {
329 push @result, \%tag, $_;
330 } else {
331 push @context, \%tag, $_;
332 }
333 }
334 if ($clines>$OPT{filter}) {
335 my (%tag, $txt);
336 while ($clines>$OPT{filter}) {
337 %tag = %{shift @context};
338 $txt = shift @context;
339 if ($txt =~ /\n/) {
340 $clines--;
341 $elipsis++;
342 }
343 }
344 }
345 }
346 }
347 print STDERR " done\n";
348 @result;
349 }
350
351 sub less {
352 my $flags;
353 if ($WAIT::Config->{pager} =~ /less/) {
354 $flags = '-r';
355 } elsif ($WAIT::Config->{pager} =~ /more/) {
356 $flags = '-c';
357 }
358 open(PAGER, "|$WAIT::Config->{pager} $flags") or die;
359 print PAGER @_;
360 close PAGER;
361 }
362
363 sub pager {
364 my @lines = split /\n/, $_[0];
365 my $line = 0;
366 for (@lines) {
367 print "$_\n"; $line++;
368 if ($line % 24 == 0) {
369 my $key = $term->readline("[return]");
370 return if $key =~ /^q/i;
371 }
372 }
373 }
374
375
376 __END__
377 ## ###################################################################
378 ## pod
379 ## ###################################################################
380
381 =head1 NAME
382
383 sman - Search and disply manuals interactive
384
385 =head1 SYNOPSIS
386
387 B<sman>
388 [B<-database> I<database name>]
389 [B<-dir> I<database directory>]
390 [B<-table> I<name>]
391 [B<-less>]
392 [B<-filter> I<num>]
393 [B<-max> I<num>]
394
395 =head1 DESCRIPTION
396
397 B<Sman> is an interactive search interface to your systems manual pages.
398
399 =head2 OPTIONS
400
401 =over 10
402
403 =item B<-database> I<database name>
404
405 Change the default database name to I<database name>.
406
407 =item B<-dir> I<database directory>
408
409 Change the default database directory to I<database directory>.
410
411 =item B<-table> I<name>
412
413 Use I<name> instead of C<man> as table name.
414
415 =item B<-pager> I<name>
416
417 Use I<name> instead of the default pager. If no I<name> is supplied a
418 buildin pager is used.
419
420 =item B<-filter> I<num>
421
422 Display only I<num> lines above and below an occurance of a search
423 term in the manual.
424
425 =item B<-max> I<num>
426
427 Display only I<num> hits. Default is to 10.
428
429 =head1 SEE ALSO
430
431 L<smakewhatis>.
432
433 =head1 AUTHOR
434
435 Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26