1 |
#!/usr/bin/perl -w |
2 |
|
3 |
use strict; |
4 |
|
5 |
use CGI::Simple; |
6 |
use CGI::Carp qw(fatalsToBrowser warningsToBrowser); |
7 |
use Search::Estraier; |
8 |
use YAML::Syck; |
9 |
use Data::Dump qw/dump/; |
10 |
|
11 |
my $q = new CGI::Simple; |
12 |
print qq{Content-type: text/html\n\r\n\r}; |
13 |
|
14 |
my $config = LoadFile('config.yml'); |
15 |
|
16 |
#warn "config = ", dump($config); |
17 |
|
18 |
if ($q->path_info() eq '/snippet') { |
19 |
|
20 |
print qq{ |
21 |
<HTML> |
22 |
<HEAD> |
23 |
<META CONTENT="text/html; charset=utf-8" HTTP-EQUIV="Content-Type"> |
24 |
<META CONTENT="no-cache" HTTP-EQUIV="Pragma"> |
25 |
<META CONTENT="-1" HTTP-EQUIV="Expires"> |
26 |
</HEAD> |
27 |
|
28 |
<div> |
29 |
<div class="post"> |
30 |
<ul> |
31 |
}; |
32 |
|
33 |
my $node = new Search::Estraier::Node(%{ $config->{estraier} }); |
34 |
|
35 |
my $o = $q->param('index') || 0; |
36 |
my $search = $q->param('q'); |
37 |
$search = join(" AND ", split(/\s+/, $search)) unless ($search =~ m/(?:AND|OR|\[|\])/); |
38 |
|
39 |
my $on_page = 30; |
40 |
my $skip = $o * $on_page; |
41 |
|
42 |
my $cond = new Search::Estraier::Condition; |
43 |
$cond->set_phrase( $search ); |
44 |
$cond->set_max( $on_page ); |
45 |
$cond->set_skip( $skip ); |
46 |
|
47 |
my $nres = $node->search($cond, ( $config->{estraier}->{depth} || 0 ) ); |
48 |
|
49 |
my $max = 0; |
50 |
|
51 |
if (defined($nres)) { |
52 |
$max = $nres->hits; |
53 |
my $time = $nres->hint('TIME'); |
54 |
print qq{ |
55 |
<div id="status_update" style="display:none;"> |
56 |
Got <b>$max</b> results for <tt>$search</tt> in <em>$time s</em> |
57 |
</div> |
58 |
}; |
59 |
|
60 |
sub html_snippet { |
61 |
my $text = shift || return; |
62 |
my $out = ''; |
63 |
foreach my $s (split(/[\n\r]{2}/, $text)) { |
64 |
$out .= ' ... ' if ($out); |
65 |
my ($pre,$hit,$post) = split(/\n/,$s,3); |
66 |
$hit =~ s/\t.*$//; |
67 |
$out .= |
68 |
$q->escapeHTML( $pre || '' ) . '<b>' . |
69 |
$q->escapeHTML( $hit || '' ) . '</b>' . |
70 |
$q->escapeHTML( $post || ''); |
71 |
} |
72 |
return $out; |
73 |
} |
74 |
|
75 |
sub attr_regex { |
76 |
my ($rdoc,$attr) = @_; |
77 |
my $text = $rdoc->attr( $attr ); |
78 |
return unless defined($text); |
79 |
|
80 |
if (my $r = $config->{estraier}->{attr_regex}->{$attr} ) { |
81 |
my $do = '$text =~ ' . $r . ';'; |
82 |
eval $do; |
83 |
if ($@) { |
84 |
warn "eval $do failed: $@\n"; |
85 |
} |
86 |
} |
87 |
return $text; |
88 |
} |
89 |
|
90 |
# for each document in results |
91 |
for my $i ( 0 ... $nres->doc_num - 1 ) { |
92 |
|
93 |
my $rdoc = $nres->get_doc($i); |
94 |
|
95 |
print "<li>"; |
96 |
|
97 |
print "<h1>", $rdoc->attr('@title'),"</h1>\n"; |
98 |
print "<h2>", attr_regex( $rdoc, 'source' ),"</h2>\n"; |
99 |
print "<p>", html_snippet( $rdoc->snippet ),"</p>\n"; |
100 |
my $uri = attr_regex( $rdoc, '@uri' ); |
101 |
print qq{<a href="$uri"><tt>$uri</tt></a> }, |
102 |
attr_regex( $rdoc, '@mdate' ); |
103 |
print " [", $skip + $i + 1, "]"; |
104 |
print "</li>"; |
105 |
} |
106 |
} else { |
107 |
die "error: ", $node->status,"\n"; |
108 |
} |
109 |
|
110 |
|
111 |
print qq{ |
112 |
</ul> |
113 |
</div> |
114 |
|
115 |
</div> |
116 |
|
117 |
</html> |
118 |
}; |
119 |
|
120 |
} else { |
121 |
|
122 |
sub page_id { |
123 |
my $page_id = time() . rand(99); |
124 |
warn "page_id = $page_id\n"; |
125 |
return $page_id; |
126 |
}; |
127 |
|
128 |
my $f = $q->path_info; |
129 |
$f =~ s/\W+//g; |
130 |
$f ||= 'search'; |
131 |
$f .= '.html'; |
132 |
open(my $s, $f) || die "$f: $!"; |
133 |
while(<$s>) { |
134 |
s/<%(.+?)%>/eval "$1"/ge; |
135 |
print; |
136 |
} |
137 |
close($f); |
138 |
|
139 |
} |