1 |
#!/usr/bin/perl -w |
2 |
|
3 |
use strict; |
4 |
|
5 |
use CGI::Simple; |
6 |
use CGI::Carp qw(fatalsToBrowser warningsToBrowser); |
7 |
use Search::Estraier; |
8 |
use YAML::Syck; |
9 |
use JSON::Syck; |
10 |
use Data::Dump qw/dump/; |
11 |
|
12 |
my $q = new CGI::Simple; |
13 |
print qq{Content-type: text/html\n\r\n\r}; |
14 |
|
15 |
my $config = LoadFile('config.yml'); |
16 |
|
17 |
my $v = { |
18 |
search => '', |
19 |
hits => 0, |
20 |
page => 0, |
21 |
max_page => 0, |
22 |
time => '', |
23 |
id => time() . rand(99), |
24 |
}; |
25 |
|
26 |
my $json; |
27 |
|
28 |
#warn "config = ", dump($config); |
29 |
|
30 |
sub json { |
31 |
return |
32 |
'<textarea id="json" style="display:none">' . |
33 |
$q->escapeHTML( JSON::Syck::Dump( $v ) ) . |
34 |
'</textarea>'; |
35 |
} |
36 |
|
37 |
sub get_results { |
38 |
my $p = {@_}; |
39 |
|
40 |
my ($search,$page) = ( $p->{search} || '', $p->{page} || 0); |
41 |
|
42 |
warn "get_results( $search , $page )\n"; |
43 |
|
44 |
sub next_page { |
45 |
return '<div id="next_page">' . |
46 |
join("\n", @_) . json() . '</div>'; |
47 |
} |
48 |
|
49 |
if (! $search || $search =~ m/^\s*$/) { |
50 |
$v->{status} = 'Enter search query'; |
51 |
return next_page(); |
52 |
} |
53 |
|
54 |
if (! $page) { |
55 |
$v->{status} = 'Error: no page number?'; |
56 |
return next_page(); |
57 |
} |
58 |
|
59 |
$search = join(" AND ", split(/\s+/, $search)) unless ($search =~ m/(?:AND|OR|\[|\])/); |
60 |
$v->{search} = $search; |
61 |
|
62 |
$v->{page} = $page; |
63 |
|
64 |
my $node = new Search::Estraier::Node(%{ $config->{estraier} }); |
65 |
|
66 |
my $on_page = 30; |
67 |
my $skip = ( $page - 1 ) * $on_page; |
68 |
|
69 |
my $cond = new Search::Estraier::Condition; |
70 |
$cond->set_phrase( $search ); |
71 |
$cond->set_max( $on_page ); |
72 |
$cond->set_skip( $skip ); |
73 |
|
74 |
my $nres = $node->search($cond, ( $config->{estraier}->{depth} || 0 ) ); |
75 |
|
76 |
my $out; |
77 |
|
78 |
if (defined($nres)) { |
79 |
$v->{hits} = $nres->hits; |
80 |
$v->{time} = $nres->hint('TIME'); |
81 |
$v->{max_page} = int( ($nres->hits + $on_page - 1) / $on_page ); |
82 |
|
83 |
$v->{status} = qq{ |
84 |
Got <b>$v->{hits}</b> results for <tt>$v->{search}</tt> |
85 |
in <em>$v->{time} s</em> |
86 |
}; |
87 |
|
88 |
sub html_snippet { |
89 |
my $text = shift || return; |
90 |
my $out = ''; |
91 |
foreach my $s (split(/[\n\r]{2}/, $text)) { |
92 |
$out .= ' ... ' if ($out); |
93 |
my ($pre,$hit,$post) = split(/\n/,$s,3); |
94 |
$hit =~ s/\t.*$//; |
95 |
$out .= |
96 |
$q->escapeHTML( $pre || '' ) . '<b>' . |
97 |
$q->escapeHTML( $hit || '' ) . '</b>' . |
98 |
$q->escapeHTML( $post || ''); |
99 |
} |
100 |
return $out; |
101 |
} |
102 |
|
103 |
sub attr_regex { |
104 |
my ($rdoc,$attr) = @_; |
105 |
my $text = $rdoc->attr( $attr ); |
106 |
return unless defined($text); |
107 |
|
108 |
if (my $r = $config->{estraier}->{attr_regex}->{$attr} ) { |
109 |
my $do = '$text =~ ' . $r . ';'; |
110 |
eval $do; |
111 |
if ($@) { |
112 |
warn "eval $do failed: $@\n"; |
113 |
} |
114 |
} |
115 |
return $text; |
116 |
} |
117 |
|
118 |
# for each document in results |
119 |
for my $i ( 0 ... $nres->doc_num - 1 ) { |
120 |
|
121 |
my $rdoc = $nres->get_doc($i); |
122 |
|
123 |
$out .= '<div class="item">' . |
124 |
'<h1>' . $rdoc->attr('@title') . '</h1>' . |
125 |
'<p>' . html_snippet( $rdoc->snippet ) . '</p>' . |
126 |
'<h2>' . attr_regex( $rdoc, 'source' ) . '</h2>'; |
127 |
my $uri = attr_regex( $rdoc, '@uri' ); |
128 |
$out .= |
129 |
qq{<a href="$uri"><tt>$uri</tt></a> } . |
130 |
attr_regex( $rdoc, '@mdate' ) . |
131 |
' [' . ( $skip + $i + 1 ) . ']'; |
132 |
} |
133 |
|
134 |
} else { |
135 |
$out .= 'error: ' . $node->status; |
136 |
} |
137 |
|
138 |
if ($v->{page} == $v->{max_page}) { |
139 |
$out .= next_page('<strong>All results shown</strong>'); |
140 |
} else { |
141 |
$out .= next_page( |
142 |
'<strong>Loading results...</strong><br/>', |
143 |
'If you are using the scroll bar, release the mouse to see more results.' |
144 |
); |
145 |
} |
146 |
|
147 |
return $out; |
148 |
|
149 |
} |
150 |
|
151 |
if ($q->path_info() eq '/snippet') { |
152 |
|
153 |
print get_results( |
154 |
search => $q->param('search'), |
155 |
page => $q->param('page'), |
156 |
); |
157 |
|
158 |
|
159 |
} else { |
160 |
|
161 |
my $get_results = get_results( |
162 |
search => $q->param('search'), |
163 |
page => 1, |
164 |
); |
165 |
|
166 |
my $f = $q->path_info; |
167 |
$f =~ s/\W+//g; |
168 |
$f ||= 'search'; |
169 |
$f .= '.html'; |
170 |
open(my $s, $f) || die "$f: $!"; |
171 |
while(<$s>) { |
172 |
s/<%(.+?)%>/eval "$1"/ge; |
173 |
print; |
174 |
} |
175 |
close($f); |
176 |
|
177 |
} |