6 |
use CGI::Carp qw(fatalsToBrowser warningsToBrowser); |
use CGI::Carp qw(fatalsToBrowser warningsToBrowser); |
7 |
use Search::Estraier; |
use Search::Estraier; |
8 |
use YAML::Syck; |
use YAML::Syck; |
9 |
|
use JSON::Syck; |
10 |
use Data::Dump qw/dump/; |
use Data::Dump qw/dump/; |
11 |
|
|
12 |
my $q = new CGI::Simple; |
my $q = new CGI::Simple; |
14 |
|
|
15 |
my $config = LoadFile('config.yml'); |
my $config = LoadFile('config.yml'); |
16 |
|
|
17 |
|
my $v = { |
18 |
|
search => '', |
19 |
|
hits => 0, |
20 |
|
page => 0, |
21 |
|
max_page => 0, |
22 |
|
time => '', |
23 |
|
id => time() . rand(99), |
24 |
|
}; |
25 |
|
|
26 |
|
my $json; |
27 |
|
|
28 |
#warn "config = ", dump($config); |
#warn "config = ", dump($config); |
29 |
|
|
30 |
if ($q->path_info() eq '/snippet') { |
sub get_results { |
31 |
|
my ($search, $page) = @_; |
32 |
|
|
33 |
print qq{ |
if (! $search) { |
34 |
<HTML> |
$v->{status} = 'Enter search query'; |
35 |
<HEAD> |
return; |
36 |
<META CONTENT="text/html; charset=utf-8" HTTP-EQUIV="Content-Type"> |
} |
|
<META CONTENT="no-cache" HTTP-EQUIV="Pragma"> |
|
|
<META CONTENT="-1" HTTP-EQUIV="Expires"> |
|
|
</HEAD> |
|
|
|
|
|
<div> |
|
|
<div class="post"> |
|
|
<ul> |
|
|
}; |
|
37 |
|
|
38 |
my $node = new Search::Estraier::Node(%{ $config->{estraier} }); |
if (! $page) { |
39 |
|
$v->{status} = 'Error: no page number?'; |
40 |
|
return; |
41 |
|
} |
42 |
|
|
43 |
|
$search = join(" AND ", split(/\s+/, $search)) unless ($search =~ m/(?:AND|OR|\[|\])/); |
44 |
|
$v->{search} = $search || ''; |
45 |
|
|
46 |
|
$v->{page} = $page; |
47 |
|
|
48 |
my $o = $q->param('index') || 0; |
my $node = new Search::Estraier::Node(%{ $config->{estraier} }); |
49 |
my $search = $q->param('q'); |
|
50 |
$search = join(" AND ", split(/\s+/, $search)) unless ($search =~ m/(?:AND|OR|\[|\])/); |
my $on_page = 30; |
51 |
|
my $skip = ( $page - 1 ) * $on_page; |
52 |
my $on_page = 30; |
|
53 |
my $skip = $o * $on_page; |
my $cond = new Search::Estraier::Condition; |
54 |
|
$cond->set_phrase( $search ); |
55 |
my $cond = new Search::Estraier::Condition; |
$cond->set_max( $on_page ); |
56 |
$cond->set_phrase( $search ); |
$cond->set_skip( $skip ); |
57 |
$cond->set_max( $on_page ); |
|
58 |
$cond->set_skip( $skip ); |
my $nres = $node->search($cond, ( $config->{estraier}->{depth} || 0 ) ); |
59 |
|
|
60 |
my $nres = $node->search($cond, ( $config->{estraier}->{depth} || 0 ) ); |
my $out; |
61 |
|
|
62 |
my $max = 0; |
if (defined($nres)) { |
63 |
|
$v->{hits} = $nres->hits; |
64 |
if (defined($nres)) { |
$v->{time} = $nres->hint('TIME'); |
65 |
$max = $nres->hits; |
$v->{max_page} = int( ($nres->hits + $on_page - 1) / $on_page ); |
66 |
my $time = $nres->hint('TIME'); |
|
67 |
print qq{ |
$v->{status} = qq{ |
68 |
<div id="status_update" style="display:none;"> |
Got <b>$v->{hits}</b> results for <tt>$v->{search}</tt> |
69 |
Got <b>$max</b> results for <tt>$search</tt> in <em>$time s</em> |
in <em>$v->{time} s</em> |
70 |
</div> |
}; |
71 |
}; |
|
72 |
|
sub html_snippet { |
73 |
sub html_snippet { |
my $text = shift || return; |
74 |
my $text = shift || return; |
my $out = ''; |
75 |
my $out = ''; |
foreach my $s (split(/[\n\r]{2}/, $text)) { |
76 |
foreach my $s (split(/[\n\r]{2}/, $text)) { |
$out .= ' ... ' if ($out); |
77 |
$out .= ' ... ' if ($out); |
my ($pre,$hit,$post) = split(/\n/,$s,3); |
78 |
my ($pre,$hit,$post) = split(/\n/,$s,3); |
$hit =~ s/\t.*$//; |
79 |
$hit =~ s/\t.*$//; |
$out .= |
80 |
$out .= |
$q->escapeHTML( $pre || '' ) . '<b>' . |
81 |
$q->escapeHTML( $pre || '' ) . '<b>' . |
$q->escapeHTML( $hit || '' ) . '</b>' . |
82 |
$q->escapeHTML( $hit || '' ) . '</b>' . |
$q->escapeHTML( $post || ''); |
83 |
$q->escapeHTML( $post || ''); |
} |
84 |
|
return $out; |
85 |
} |
} |
|
return $out; |
|
|
} |
|
86 |
|
|
87 |
sub attr_regex { |
sub attr_regex { |
88 |
my ($rdoc,$attr) = @_; |
my ($rdoc,$attr) = @_; |
89 |
my $text = $rdoc->attr( $attr ); |
my $text = $rdoc->attr( $attr ); |
90 |
return unless defined($text); |
return unless defined($text); |
91 |
|
|
92 |
if (my $r = $config->{estraier}->{attr_regex}->{$attr} ) { |
if (my $r = $config->{estraier}->{attr_regex}->{$attr} ) { |
93 |
my $do = '$text =~ ' . $r . ';'; |
my $do = '$text =~ ' . $r . ';'; |
94 |
eval $do; |
eval $do; |
95 |
if ($@) { |
if ($@) { |
96 |
warn "eval $do failed: $@\n"; |
warn "eval $do failed: $@\n"; |
97 |
|
} |
98 |
} |
} |
99 |
|
return $text; |
100 |
} |
} |
|
return $text; |
|
|
} |
|
101 |
|
|
102 |
# for each document in results |
# for each document in results |
103 |
for my $i ( 0 ... $nres->doc_num - 1 ) { |
for my $i ( 0 ... $nres->doc_num - 1 ) { |
104 |
|
|
105 |
my $rdoc = $nres->get_doc($i); |
my $rdoc = $nres->get_doc($i); |
106 |
|
|
107 |
print "<li>"; |
$out .= '<div class="item">' . |
108 |
|
'<h1>' . $rdoc->attr('@title') . '</h1>' . |
109 |
|
'<p>' . html_snippet( $rdoc->snippet ) . '</p>' . |
110 |
|
'<h2>' . attr_regex( $rdoc, 'source' ) . '</h2>'; |
111 |
|
my $uri = attr_regex( $rdoc, '@uri' ); |
112 |
|
$out .= |
113 |
|
qq{<a href="$uri"><tt>$uri</tt></a> } . |
114 |
|
attr_regex( $rdoc, '@mdate' ) . |
115 |
|
' [' . ( $skip + $i + 1 ) . ']'; |
116 |
|
} |
117 |
|
|
118 |
print "<h1>", $rdoc->attr('@title'),"</h1>\n"; |
} else { |
119 |
print "<h2>", attr_regex( $rdoc, 'source' ),"</h2>\n"; |
$out .= 'error: ' . $node->status; |
|
print "<p>", html_snippet( $rdoc->snippet ),"</p>\n"; |
|
|
my $uri = attr_regex( $rdoc, '@uri' ); |
|
|
print qq{<a href="$uri"><tt>$uri</tt></a> }, |
|
|
attr_regex( $rdoc, '@mdate' ); |
|
|
print " [", $skip + $i + 1, "]"; |
|
|
print "</li>"; |
|
120 |
} |
} |
|
} else { |
|
|
die "error: ", $node->status,"\n"; |
|
|
} |
|
121 |
|
|
122 |
|
$json = '<textarea id="json" style="display:none">' . |
123 |
|
$q->escapeHTML( JSON::Syck::Dump( $v ) ) . |
124 |
|
'</textarea>'; |
125 |
|
|
126 |
print qq{ |
return ($out,$json); |
|
</ul> |
|
|
</div> |
|
127 |
|
|
128 |
</div> |
} |
129 |
|
|
130 |
|
if ($q->path_info() eq '/snippet') { |
131 |
|
|
132 |
|
print join("\n<!-- json data -->", |
133 |
|
get_results( |
134 |
|
$q->param('search'), |
135 |
|
$q->param('page'), |
136 |
|
) |
137 |
|
); |
138 |
|
|
|
</html> |
|
|
}; |
|
139 |
|
|
140 |
} else { |
} else { |
141 |
|
|
142 |
sub page_id { |
my ($get_results, $json) = get_results( $q->param('search'), 1 ); |
|
my $page_id = time() . rand(99); |
|
|
warn "page_id = $page_id\n"; |
|
|
return $page_id; |
|
|
}; |
|
143 |
|
|
144 |
my $f = $q->path_info; |
my $f = $q->path_info; |
145 |
$f =~ s/\W+//g; |
$f =~ s/\W+//g; |