7 |
Catalyst::Model |
Catalyst::Model |
8 |
/; |
/; |
9 |
use WebPAC::Store 0.08; |
use WebPAC::Store 0.08; |
10 |
use WebPAC::Search::Estraier 0.05; |
use Search::Estraier 0.04; |
11 |
use File::Slurp; |
use File::Slurp; |
12 |
use Time::HiRes qw/time/; |
use Time::HiRes qw/time/; |
13 |
use Encode qw/encode decode from_to/; |
use Encode qw/encode decode from_to/; |
74 |
$est_cfg->{database} = $defaultnode; |
$est_cfg->{database} = $defaultnode; |
75 |
} |
} |
76 |
|
|
77 |
$self->{est} = new WebPAC::Search::Estraier( %{ $est_cfg } ); |
my $url = $est_cfg->{masterurl} . '/node/' . $est_cfg->{database}; |
78 |
|
|
79 |
|
$log->info("opening Hyper Estraier index $url as $est_cfg->{'user'}"); |
80 |
|
|
81 |
|
$self->{est_node} = Search::Estraier::Node->new( |
82 |
|
url => $url, |
83 |
|
user => $est_cfg->{user}, |
84 |
|
passwd => $est_cfg->{passwd}, |
85 |
|
); |
86 |
|
|
87 |
|
$log->fatal("can't create Search::Estraier::Node $url") unless ($self->{est_node}); |
88 |
|
|
89 |
# save config parametars in object |
# save config parametars in object |
90 |
foreach my $f (qw/db_path template_path hits_on_page webpac_encoding defaultdepth/) { |
foreach my $f (qw/ |
91 |
|
db_path template_path hits_on_page webpac_encoding defaultdepth |
92 |
|
masterurl |
93 |
|
/) { |
94 |
$self->{$f} = $c->config->{hyperestraier}->{$f} || |
$self->{$f} = $c->config->{hyperestraier}->{$f} || |
95 |
$c->config->{webpac}->{$f}; |
$c->config->{webpac}->{$f}; |
96 |
$log->debug("self->{$f} = " . $self->{$f}); |
$log->debug("self->{$f} = " . $self->{$f}); |
114 |
"'" |
"'" |
115 |
); |
); |
116 |
|
|
117 |
$self->{databases} = $c->config->{databases} || $log->error("can't find databases in config"); |
$self->{databases} = $c->config->{databases} || $log->fatal("can't find databases in config"); |
118 |
|
|
119 |
# create Template toolkit instance |
# create Template toolkit instance |
120 |
$self->{'tt'} = Template->new( |
$self->{'tt'} = Template->new( |
141 |
|
|
142 |
} |
} |
143 |
|
|
144 |
|
=head2 setup_site |
145 |
|
|
146 |
|
$self->setup_site('site_name'); |
147 |
|
|
148 |
|
Change node URL and database name according to site name (if available) |
149 |
|
|
150 |
|
=cut |
151 |
|
|
152 |
|
sub setup_site { |
153 |
|
my $self = shift; |
154 |
|
|
155 |
|
my $site = shift || return; |
156 |
|
|
157 |
|
my $url = $self->{masterurl} . '/node/' . $site; |
158 |
|
$self->{est_node}->set_url( $url ); |
159 |
|
$self->{log}->debug("setup_site $site"); |
160 |
|
} |
161 |
|
|
162 |
=head2 search |
=head2 search |
163 |
|
|
189 |
|
|
190 |
my $query = $args->{phrase} || $log->warn("no query phrase") && return; |
my $query = $args->{phrase} || $log->warn("no query phrase") && return; |
191 |
|
|
|
$log->debug("search model query: '$query'"); |
|
|
if ($args->{add_attr}) { |
|
|
$log->debug(" + add_attr: " . |
|
|
join("','", @{ $args->{add_attr} }) |
|
|
); |
|
|
} |
|
|
|
|
192 |
my $template_filename = $args->{template} || $self->{template}; |
my $template_filename = $args->{template} || $self->{template}; |
193 |
|
|
194 |
$args->{max} ||= $self->{'hits_for_pager'}; |
$args->{max} ||= $self->{'hits_for_pager'}; |
207 |
$args->{depth} = $default; |
$args->{depth} = $default; |
208 |
$log->warn("using default search depth $default"); |
$log->warn("using default search depth $default"); |
209 |
} |
} |
210 |
|
$args->{depth} ||= 0; |
211 |
|
|
212 |
my @results = $self->{est}->search( %{ $args } ); |
$log->debug("searching for maximum $args->{max} results using depth $args->{depth} phrase: ", $query || '[none]'); |
213 |
|
|
214 |
$times->{est} += time() - $t; |
# |
215 |
|
# construct condition for Hyper Estraier |
216 |
|
# |
217 |
|
my $cond = Search::Estraier::Condition->new(); |
218 |
|
if ( ref($args->{add_attr}) eq 'ARRAY' ) { |
219 |
|
$log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) ); |
220 |
|
map { |
221 |
|
$cond->add_attr( $_ ); |
222 |
|
$log->debug(" + $_"); |
223 |
|
} @{ $args->{add_attr} }; |
224 |
|
}; |
225 |
|
|
226 |
|
$cond->set_phrase( $query ) if ($query); |
227 |
|
$cond->set_options( $args->{options} ) if ($args->{options}); |
228 |
|
$cond->set_order( $args->{order} ) if ($args->{order}); |
229 |
|
|
230 |
|
my $max = $args->{max} || 7; |
231 |
|
my $page = $args->{page} || 1; |
232 |
|
if ($page < 1) { |
233 |
|
$log->warn("page number $page < 1"); |
234 |
|
$page = 1; |
235 |
|
} |
236 |
|
|
237 |
|
$cond->set_max( $page * $max ); |
238 |
|
|
239 |
my $hits = $#results + 1; |
my $result = $self->{est_node}->search($cond, $args->{depth}); |
240 |
|
my $hits = $result->doc_num; |
241 |
|
|
242 |
|
$times->{est} += time() - $t; |
243 |
|
|
244 |
$log->debug( sprintf("search took %.6fs and returned $hits hits.", $times->{est}) ); |
$log->debug( sprintf("search took %.6fs and returned $hits hits.", $times->{est}) ); |
245 |
|
|
246 |
# just return results? |
$log->debug( "hints: " . Dumper($result->{hints}) ); |
|
return @results unless ($args->{'template'}); |
|
247 |
|
|
248 |
# |
# |
249 |
# construct HTML results |
# fetch results |
250 |
# |
# |
251 |
|
|
252 |
my @html_results; |
my @results; |
253 |
|
|
254 |
for my $i ( 0 .. $#results ) { |
for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) { |
255 |
|
|
256 |
my ($database, $prefix, $id); |
$t = time(); |
257 |
if ( $results[$i]->{'@uri'} =~ m!/([^/]+)/([^/]+)/(\d+)$!) { |
|
258 |
($database, $prefix,$id) = ($1,$2,$3); |
#$log->debug("get_doc($i)"); |
259 |
} else { |
my $doc = $result->get_doc( $i ); |
260 |
$log->warn("can't decode database/prefix/id from " . $results[$i]->{'@uri'}); |
if (! $doc) { |
261 |
|
$log->warn("can't find result $i"); |
262 |
next; |
next; |
263 |
} |
} |
264 |
|
|
265 |
#$log->debug("load_ds( id => $id, prefix => '$prefix' )"); |
my $hash; |
266 |
|
|
267 |
$t = time(); |
foreach my $attr (@{ $args->{get_attr} }) { |
268 |
|
my $val = $doc->attr( $attr ); |
269 |
my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id ); |
#$log->debug("attr $attr = ", $val || 'undef'); |
270 |
if (! $ds) { |
$hash->{$attr} = $val if (defined($val)); |
|
$log->error("can't load_ds( ${database}/${prefix}/${id} )"); |
|
|
next; |
|
271 |
} |
} |
272 |
|
|
273 |
$times->{db} += time() - $t; |
$times->{hash} += time() - $t; |
274 |
|
|
275 |
#$log->debug( "ds = " . Dumper( \@html_results ) ); |
next unless ($hash); |
276 |
|
|
277 |
$t = time(); |
if (! $args->{'template'}) { |
278 |
|
push @results, $hash; |
279 |
|
} else { |
280 |
|
my ($database, $prefix, $id); |
281 |
|
|
282 |
my $html = $self->apply( |
if ( $hash->{'@uri'} =~ m!/([^/]+)/([^/]+)/(\d+)$!) { |
283 |
template => $template_filename, |
($database, $prefix,$id) = ($1,$2,$3); |
284 |
data => $ds, |
} else { |
285 |
record_uri => "${database}/${prefix}/${id}", |
$log->warn("can't decode database/prefix/id from " . $hash->{'@uri'}); |
286 |
config => $self->{databases}->{$database}, |
next; |
287 |
); |
} |
288 |
|
|
289 |
$times->{out} += time() - $t; |
#$log->debug("load_ds( id => $id, prefix => '$prefix' )"); |
290 |
|
|
291 |
$t = time(); |
$t = time(); |
292 |
|
|
293 |
$html = decode($self->{webpac_encoding}, $html); |
my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id ); |
294 |
|
if (! $ds) { |
295 |
|
$log->error("can't load_ds( ${database}/${prefix}/${id} )"); |
296 |
|
next; |
297 |
|
} |
298 |
|
|
299 |
|
$times->{db} += time() - $t; |
300 |
|
|
301 |
|
#$log->debug( "ds = " . Dumper( \@html_results ) ); |
302 |
|
|
303 |
|
$t = time(); |
304 |
|
|
305 |
|
my $html = $self->apply( |
306 |
|
template => $template_filename, |
307 |
|
data => $ds, |
308 |
|
record_uri => "${database}/${prefix}/${id}", |
309 |
|
config => $self->{databases}->{$database}, |
310 |
|
); |
311 |
|
|
312 |
push @html_results, $html; |
$times->{apply} += time() - $t; |
313 |
|
|
314 |
|
$t = time(); |
315 |
|
|
316 |
|
$html = decode($self->{webpac_encoding}, $html); |
317 |
|
|
318 |
|
$times->{decode} += time() - $t; |
319 |
|
|
320 |
|
push @results, $html; |
321 |
|
} |
322 |
|
|
323 |
} |
} |
324 |
|
|
325 |
#$log->debug( '@html_results = ' . Dumper( \@html_results ) ); |
#$log->debug( '@results = ' . Dumper( \@results ) ); |
326 |
|
|
327 |
$log->debug( sprintf( |
$log->debug( sprintf( |
328 |
"duration breakdown: store %.6fs, apply %.6fs, total: %.6fs", |
"duration breakdown: estraier %.6fs, hash %.6fs, store %.6fs, apply %.6fs, decode %.06f, total: %.6fs", |
329 |
$times->{db}, $times->{out}, time() - $search_start_t, |
$times->{est}, $times->{hash}, $times->{db}, $times->{apply}, $times->{decode}, time() - $search_start_t, |
330 |
) ); |
) ); |
331 |
|
|
332 |
return \@html_results; |
return \@results; |
333 |
} |
} |
334 |
|
|
335 |
=head2 record |
=head2 record |
462 |
my $log = $self->{log} || die "no log?"; |
my $log = $self->{log} || die "no log?"; |
463 |
|
|
464 |
foreach my $a (qw/template data/) { |
foreach my $a (qw/template data/) { |
465 |
$log->logconfess("need $a") unless ($args->{$a}); |
$log->fatal("need $a") unless ($args->{$a}); |
466 |
} |
} |
467 |
|
|
468 |
=head3 tt_filter_type |
=head3 tt_filter_type |
571 |
|
|
572 |
my $s; |
my $s; |
573 |
if ($s_el > 0) { |
if ($s_el > 0) { |
574 |
$s = $item->{'search'}->[$i] || die "can't find value $i for type search in field $search"; |
$s = $item->{'search'}->[$i] or warn "can't find value $i for type search in field $search"; |
575 |
} else { |
} else { |
576 |
$s = $item->{'search'}->[0]; |
$s = $item->{'search'}->[0]; |
577 |
} |
} |
578 |
#$s =~ s/([^\w.-])/sprintf("%%%02X",ord($1))/eg; |
#$s =~ s/([^\w.-])/sprintf("%%%02X",ord($1))/eg; |
579 |
$s = __quotemeta( $s ); |
$s = __quotemeta( $s ); |
580 |
|
|
581 |
my $d = $item->{'display'}->[$i] || die "can't find value $i for type display in field $display"; |
my $d = $item->{'display'}->[$i] or warn "can't find value $i for type display in field $display"; |
582 |
|
|
583 |
my $template_arg = ''; |
my $template_arg = ''; |
584 |
$template_arg = qq{,'$template'} if ($template); |
$template_arg = qq{,'$template'} if ($template); |