6 |
use CGI::Carp qw(fatalsToBrowser warningsToBrowser); |
use CGI::Carp qw(fatalsToBrowser warningsToBrowser); |
7 |
use Search::Estraier; |
use Search::Estraier; |
8 |
use YAML::Syck; |
use YAML::Syck; |
9 |
|
use JSON::Syck; |
10 |
use Data::Dump qw/dump/; |
use Data::Dump qw/dump/; |
11 |
|
|
12 |
my $q = new CGI::Simple; |
my $q = new CGI::Simple; |
14 |
|
|
15 |
my $config = LoadFile('config.yml'); |
my $config = LoadFile('config.yml'); |
16 |
|
|
17 |
#warn "config = ", dump($config); |
my $v = { |
18 |
|
search => '', |
19 |
|
hits => 0, |
20 |
|
page => 0, |
21 |
|
max_page => 0, |
22 |
|
time => '', |
23 |
|
id => time() . rand(99), |
24 |
|
}; |
25 |
|
|
26 |
if ($q->path_info() eq '/snippet') { |
my $json; |
27 |
|
|
28 |
print qq{ |
sub debug { |
29 |
<HTML> |
my ($text,$var) = @_; |
30 |
<HEAD> |
print "<pre>$text = ", dump($var), "</pre>"; |
31 |
<META CONTENT="text/html; charset=utf-8" HTTP-EQUIV="Content-Type"> |
} |
|
<META CONTENT="no-cache" HTTP-EQUIV="Pragma"> |
|
|
<META CONTENT="-1" HTTP-EQUIV="Expires"> |
|
|
</HEAD> |
|
|
|
|
|
<div> |
|
|
<div class="post"> |
|
|
<ul> |
|
|
}; |
|
32 |
|
|
33 |
my $node = new Search::Estraier::Node(%{ $config->{estraier} }); |
#debug('config', $config); |
34 |
|
|
35 |
my $o = $q->param('index') || 0; |
sub json { |
36 |
my $search = $q->param('q'); |
return |
37 |
|
'<textarea id="json" style="display:none">' . |
38 |
|
$q->escapeHTML( JSON::Syck::Dump( $v ) ) . |
39 |
|
'</textarea>'; |
40 |
|
} |
41 |
|
|
42 |
my $on_page = 30; |
sub sort_order { |
43 |
my $skip = $o * $on_page; |
my $out; |
44 |
|
|
45 |
my $cond = new Search::Estraier::Condition; |
my $sort = $q->param('sort'); |
46 |
$cond->set_phrase( $search ); |
|
47 |
$cond->set_max( $on_page ); |
$out .= '<select name="sort" id="sort">'; |
48 |
$cond->set_skip( $skip ); |
|
49 |
|
foreach my $s (@{ $config->{estraier}->{order} }) { |
50 |
my $nres = $node->search($cond, 0); |
my ($text,$value) = %{$s}; |
51 |
|
$out .= qq{<option value="$value"} . |
52 |
my $max = 0; |
( $sort eq $value ? ' selected' : '' ) . |
53 |
|
qq{>$text</option>}; |
|
if (defined($nres)) { |
|
|
$max = $nres->hits; |
|
|
print "Got ", $nres->hits, " results for $search\n"; |
|
|
|
|
|
sub html_snippet { |
|
|
my $text = shift || return; |
|
|
my $out = ''; |
|
|
foreach my $s (split(/[\n\r]{2}/, $text)) { |
|
|
$out .= ' ... ' if ($out); |
|
|
my ($pre,$hit,$post) = split(/\n/,$s,3); |
|
|
$hit =~ s/\t.*$//; |
|
|
$out .= |
|
|
$q->escapeHTML( $pre || '' ) . '<b>' . |
|
|
$q->escapeHTML( $hit || '' ) . '</b>' . |
|
|
$q->escapeHTML( $post || ''); |
|
|
} |
|
|
return $out; |
|
54 |
} |
} |
55 |
|
|
56 |
|
$out .= '</select>'; |
57 |
|
} |
58 |
|
|
59 |
|
sub get_results { |
60 |
|
my $p = {@_}; |
61 |
|
|
62 |
# for each document in results |
my ($search,$page) = ( $p->{search} , $p->{page}); |
|
for my $i ( 0 ... $nres->doc_num - 1 ) { |
|
63 |
|
|
64 |
my $rdoc = $nres->get_doc($i); |
sub next_page { |
65 |
|
return '<div id="next_page">' . |
66 |
|
join("\n", @_) . json() . '</div>'; |
67 |
|
} |
68 |
|
|
69 |
print "<li>"; |
if (! $search || $search =~ m/^\s*$/) { |
70 |
|
$v->{status} = 'Enter search query'; |
71 |
|
return next_page(); |
72 |
|
} |
73 |
|
|
74 |
print "<h1>", $rdoc->attr('@title'),"</h1>\n"; |
if (! $page) { |
75 |
print "<h2>", $rdoc->attr('source'),"</h2>\n"; |
$v->{status} = 'Error: no page number?'; |
76 |
print "", html_snippet( $rdoc->snippet ),"<br/>\n"; |
return next_page(); |
|
print "[", $skip + $i, "] "; |
|
|
print "<tt>", $rdoc->attr('@uri'),"</tt>"; |
|
|
print "</li>"; |
|
77 |
} |
} |
|
} else { |
|
|
die "error: ", $node->status,"\n"; |
|
|
} |
|
78 |
|
|
79 |
|
$search = join(" AND ", split(/\s+/, $search)) unless ($search =~ m/(?:AND|OR|\[|\])/); |
80 |
|
$v->{search} = $search; |
81 |
|
|
82 |
print qq{ |
$v->{page} = $page; |
|
</ul> |
|
|
</div> |
|
83 |
|
|
84 |
</div> |
my $node = new Search::Estraier::Node(%{ $config->{estraier} }); |
85 |
|
|
86 |
|
my $on_page = 30; |
87 |
|
my $skip = ( $page - 1 ) * $on_page; |
88 |
|
|
89 |
|
my $cond = new Search::Estraier::Condition; |
90 |
|
$cond->set_phrase( $search ); |
91 |
|
$cond->set_max( $on_page ); |
92 |
|
$cond->set_skip( $skip ); |
93 |
|
$cond->set_order( $p->{sort} ) if ($p->{sort}); |
94 |
|
|
95 |
|
my $nres = $node->search($cond, ( $config->{estraier}->{depth} || 0 ) ); |
96 |
|
|
97 |
|
my $out; |
98 |
|
|
99 |
|
if (defined($nres)) { |
100 |
|
$v->{hits} = $nres->hits; |
101 |
|
$v->{time} = $nres->hint('TIME'); |
102 |
|
$v->{max_page} = int( ($nres->hits + $on_page - 1) / $on_page ); |
103 |
|
|
104 |
|
$v->{status} = qq{ |
105 |
|
Got <b>$v->{hits}</b> results for <tt>$v->{search}</tt> |
106 |
|
in <em>$v->{time} s</em> |
107 |
|
}; |
108 |
|
|
109 |
|
sub html_snippet { |
110 |
|
my $text = shift || return; |
111 |
|
my $out = ''; |
112 |
|
foreach my $s (split(/[\n\r]{2}/, $text)) { |
113 |
|
$out .= ' ... ' if ($out); |
114 |
|
my ($pre,$hit,$post) = split(/\n/,$s,3); |
115 |
|
$hit =~ s/\t.*$//; |
116 |
|
$out .= |
117 |
|
$q->escapeHTML( $pre || '' ) . '<b>' . |
118 |
|
$q->escapeHTML( $hit || '' ) . '</b>' . |
119 |
|
$q->escapeHTML( $post || ''); |
120 |
|
} |
121 |
|
return $out; |
122 |
|
} |
123 |
|
|
124 |
|
sub attr_regex { |
125 |
|
my ($rdoc,$attr) = @_; |
126 |
|
my $text = $rdoc->attr( $attr ); |
127 |
|
return unless defined($text); |
128 |
|
|
129 |
|
if (my $r = $config->{estraier}->{attr_regex}->{$attr} ) { |
130 |
|
my $do = '$text =~ ' . $r . ';'; |
131 |
|
eval $do; |
132 |
|
if ($@) { |
133 |
|
warn "eval $do failed: $@\n"; |
134 |
|
} |
135 |
|
} |
136 |
|
return $text; |
137 |
|
} |
138 |
|
|
139 |
|
my @template; |
140 |
|
open(my $t, 'result.html') || die "result.html: $!"; |
141 |
|
while(<$t>) { |
142 |
|
push @template, $_; |
143 |
|
} |
144 |
|
close($t); |
145 |
|
|
146 |
|
# for each document in results |
147 |
|
for my $i ( 0 ... $nres->doc_num - 1 ) { |
148 |
|
|
149 |
|
my $rdoc = $nres->get_doc($i); |
150 |
|
my $uri = attr_regex( $rdoc, '@uri' ); |
151 |
|
my $nr = $skip + $i + 1; |
152 |
|
|
153 |
|
map { |
154 |
|
my $l = $_; |
155 |
|
$l =~ s/<%(.+?)%>/eval "$1"/ge; |
156 |
|
$out .= $l; |
157 |
|
} @template; |
158 |
|
|
159 |
|
} |
160 |
|
|
161 |
|
} else { |
162 |
|
$out .= 'error: ' . $node->status; |
163 |
|
} |
164 |
|
|
165 |
|
if ($v->{page} == $v->{max_page}) { |
166 |
|
$out .= next_page('<br/><strong>All results shown</strong>'); |
167 |
|
} else { |
168 |
|
$out .= next_page( |
169 |
|
'<br/><strong>Loading results...</strong><br/>', |
170 |
|
'If you are using the scroll bar, release the mouse to see more results.' |
171 |
|
); |
172 |
|
} |
173 |
|
|
174 |
|
return $out; |
175 |
|
|
176 |
|
} |
177 |
|
|
178 |
|
if ($q->path_info() eq '/snippet') { |
179 |
|
|
180 |
|
print get_results( |
181 |
|
search => $q->param('search') || '', |
182 |
|
page => $q->param('page') || 0, |
183 |
|
sort => $q->param('sort') || undef, |
184 |
|
); |
185 |
|
|
|
</html> |
|
|
}; |
|
186 |
|
|
187 |
} else { |
} else { |
188 |
|
|
189 |
sub page_id { |
my $get_results = get_results( |
190 |
my $page_id = time() . rand(99); |
search => $q->param('search') || '', |
191 |
warn "page_id = $page_id\n"; |
page => 1, |
192 |
return $page_id; |
sort => $q->param('sort') || undef, |
193 |
}; |
); |
194 |
|
|
195 |
my $f = $q->path_info; |
my $f = $q->path_info; |
196 |
$f =~ s/\W+//g; |
$f =~ s/\W+//g; |
198 |
$f .= '.html'; |
$f .= '.html'; |
199 |
open(my $s, $f) || die "$f: $!"; |
open(my $s, $f) || die "$f: $!"; |
200 |
while(<$s>) { |
while(<$s>) { |
|
no strict 'vars'; |
|
201 |
s/<%(.+?)%>/eval "$1"/ge; |
s/<%(.+?)%>/eval "$1"/ge; |
202 |
print; |
print; |
203 |
} |
} |
204 |
close($f); |
close($s); |
205 |
|
|
206 |
} |
} |