7 |
Catalyst::Model |
Catalyst::Model |
8 |
/; |
/; |
9 |
use WebPAC::Store 0.08; |
use WebPAC::Store 0.08; |
10 |
use WebPAC::Search::Estraier 0.05; |
use Search::Estraier 0.04; |
11 |
use File::Slurp; |
use File::Slurp; |
12 |
use Time::HiRes qw/time/; |
use Time::HiRes qw/time/; |
13 |
use Encode qw/encode decode from_to/; |
use Encode qw/encode decode from_to/; |
14 |
use Template; |
use Template; |
|
use Data::Dumper; |
|
15 |
|
|
16 |
=head1 NAME |
=head1 NAME |
17 |
|
|
65 |
|
|
66 |
$est_cfg->{encoding} = $est_cfg->{catalyst_encoding} || $c->config->{catalyst_encoding} or $c->log->fatal("can't find catalyst_encoding"); |
$est_cfg->{encoding} = $est_cfg->{catalyst_encoding} || $c->config->{catalyst_encoding} or $c->log->fatal("can't find catalyst_encoding"); |
67 |
|
|
68 |
$log->debug("using config:" . Dumper($est_cfg) ); |
$log->dumper($est_cfg, 'est_cfg'); |
69 |
|
|
70 |
if (! $est_cfg->{database}) { |
if (! $est_cfg->{database}) { |
71 |
my $defaultnode = $est_cfg->{defaultnode} || $log->logdie("can't find defaultnode in estraier configuration"); |
my $defaultnode = $est_cfg->{defaultnode} || $log->logdie("can't find defaultnode in estraier configuration"); |
73 |
$est_cfg->{database} = $defaultnode; |
$est_cfg->{database} = $defaultnode; |
74 |
} |
} |
75 |
|
|
76 |
$self->{est} = new WebPAC::Search::Estraier( %{ $est_cfg } ); |
my $url = $est_cfg->{masterurl} . '/node/' . $est_cfg->{database}; |
77 |
|
|
78 |
|
$log->info("opening Hyper Estraier index $url as $est_cfg->{'user'}"); |
79 |
|
|
80 |
|
$self->{est_node} = Search::Estraier::Node->new( |
81 |
|
url => $url, |
82 |
|
user => $est_cfg->{user}, |
83 |
|
passwd => $est_cfg->{passwd}, |
84 |
|
); |
85 |
|
|
86 |
|
$log->fatal("can't create Search::Estraier::Node $url") unless ($self->{est_node}); |
87 |
|
|
88 |
# save config parametars in object |
# save config parametars in object |
89 |
foreach my $f (qw/db_path template_path hits_on_page webpac_encoding defaultdepth/) { |
foreach my $f (qw/ |
90 |
|
db_path template_path hits_on_page webpac_encoding defaultdepth |
91 |
|
masterurl defaultnode |
92 |
|
/) { |
93 |
$self->{$f} = $c->config->{hyperestraier}->{$f} || |
$self->{$f} = $c->config->{hyperestraier}->{$f} || |
94 |
$c->config->{webpac}->{$f}; |
$c->config->{webpac}->{$f}; |
95 |
$log->debug("self->{$f} = " . $self->{$f}); |
$log->debug("self->{$f} = " . $self->{$f}); |
113 |
"'" |
"'" |
114 |
); |
); |
115 |
|
|
116 |
$self->{databases} = $c->config->{databases} || $log->error("can't find databases in config"); |
$self->{databases} = $c->config->{databases} || $log->fatal("can't find databases in config"); |
117 |
|
|
118 |
# create Template toolkit instance |
# create Template toolkit instance |
119 |
$self->{'tt'} = Template->new( |
$self->{'tt'} = Template->new( |
140 |
|
|
141 |
} |
} |
142 |
|
|
143 |
|
=head2 setup_site |
144 |
|
|
145 |
|
$self->setup_site('site_name'); |
146 |
|
|
147 |
|
Change node URL and database name according to site name (if available) or fallback |
148 |
|
to C<defaultnode> from configuration. |
149 |
|
|
150 |
|
=cut |
151 |
|
|
152 |
|
sub setup_site { |
153 |
|
my $self = shift; |
154 |
|
|
155 |
|
my $site = shift || $self->{defaultnode}; |
156 |
|
|
157 |
|
$self->{log}->fatal("setup_site can't find site or defaultnode") unless ($site); |
158 |
|
|
159 |
|
my $url = $self->{masterurl} . '/node/' . $site; |
160 |
|
$self->{est_node}->set_url( $url ); |
161 |
|
$self->{log}->debug("setup_site $site using $url"); |
162 |
|
} |
163 |
|
|
164 |
=head2 search |
=head2 search |
165 |
|
|
187 |
|
|
188 |
my $log = $self->{log}; |
my $log = $self->{log}; |
189 |
|
|
190 |
$log->debug("search args: " . Dumper( $args )); |
$log->dumper($args, 'args'); |
191 |
|
|
192 |
my $query = $args->{phrase} || $log->warn("no query phrase") && return; |
my $query = $args->{phrase} || $log->warn("no query phrase") && return; |
193 |
|
|
|
$log->debug("search model query: '$query'"); |
|
|
if ($args->{add_attr}) { |
|
|
$log->debug(" + add_attr: " . |
|
|
join("','", @{ $args->{add_attr} }) |
|
|
); |
|
|
} |
|
|
|
|
194 |
my $template_filename = $args->{template} || $self->{template}; |
my $template_filename = $args->{template} || $self->{template}; |
195 |
|
|
196 |
$args->{max} ||= $self->{'hits_for_pager'}; |
$args->{max} ||= $self->{'hits_for_pager'}; |
209 |
$args->{depth} = $default; |
$args->{depth} = $default; |
210 |
$log->warn("using default search depth $default"); |
$log->warn("using default search depth $default"); |
211 |
} |
} |
212 |
|
$args->{depth} ||= 0; |
213 |
|
|
214 |
my @results = $self->{est}->search( %{ $args } ); |
$log->debug("searching for maximum $args->{max} results using depth $args->{depth} phrase: ", $query || '[none]'); |
215 |
|
|
216 |
$times->{est} += time() - $t; |
# |
217 |
|
# construct condition for Hyper Estraier |
218 |
|
# |
219 |
|
my $cond = Search::Estraier::Condition->new(); |
220 |
|
if ( ref($args->{add_attr}) eq 'ARRAY' ) { |
221 |
|
$log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) ); |
222 |
|
map { |
223 |
|
$cond->add_attr( $_ ); |
224 |
|
$log->debug(" + $_"); |
225 |
|
} @{ $args->{add_attr} }; |
226 |
|
}; |
227 |
|
|
228 |
|
$cond->set_phrase( $query ) if ($query); |
229 |
|
$cond->set_options( $args->{options} ) if ($args->{options}); |
230 |
|
$cond->set_order( $args->{order} ) if ($args->{order}); |
231 |
|
|
232 |
|
my $max = $args->{max} || 7; |
233 |
|
my $page = $args->{page} || 1; |
234 |
|
if ($page < 1) { |
235 |
|
$log->warn("page number $page < 1"); |
236 |
|
$page = 1; |
237 |
|
} |
238 |
|
|
239 |
|
$cond->set_max( $page * $max ); |
240 |
|
|
241 |
my $hits = $#results + 1; |
my $result = $self->{est_node}->search($cond, $args->{depth}); |
242 |
|
my $hits = $result->doc_num; |
243 |
|
|
244 |
|
$times->{est} += time() - $t; |
245 |
|
|
246 |
$log->debug( sprintf("search took %.6fs and returned $hits hits.", $times->{est}) ); |
$log->debug( sprintf("search took %.6fs and returned $hits hits.", $times->{est}) ); |
247 |
|
|
248 |
# just return results? |
$log->dumper($result->{hints}, 'result->hints' ); |
|
return @results unless ($args->{'template'}); |
|
249 |
|
|
250 |
# |
# |
251 |
# construct HTML results |
# fetch results |
252 |
# |
# |
253 |
|
|
254 |
my @html_results; |
my @results; |
255 |
|
|
256 |
for my $i ( 0 .. $#results ) { |
for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) { |
257 |
|
|
258 |
my ($database, $prefix, $id); |
$t = time(); |
259 |
if ( $results[$i]->{'@uri'} =~ m!/([^/]+)/([^/]+)/(\d+)$!) { |
|
260 |
($database, $prefix,$id) = ($1,$2,$3); |
#$log->debug("get_doc($i)"); |
261 |
} else { |
my $doc = $result->get_doc( $i ); |
262 |
$log->warn("can't decode database/prefix/id from " . $results[$i]->{'@uri'}); |
if (! $doc) { |
263 |
|
$log->warn("can't find result $i"); |
264 |
next; |
next; |
265 |
} |
} |
266 |
|
|
267 |
#$log->debug("load_ds( id => $id, prefix => '$prefix' )"); |
my $hash; |
|
|
|
|
$t = time(); |
|
268 |
|
|
269 |
my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id ); |
foreach my $attr (@{ $args->{get_attr} }) { |
270 |
if (! $ds) { |
my $val = $doc->attr( $attr ); |
271 |
$log->error("can't load_ds( ${database}/${prefix}/${id} )"); |
#$log->debug("attr $attr = ", $val || 'undef'); |
272 |
next; |
$hash->{$attr} = $val if (defined($val)); |
273 |
} |
} |
274 |
|
|
275 |
$times->{db} += time() - $t; |
$times->{hash} += time() - $t; |
276 |
|
|
277 |
#$log->debug( "ds = " . Dumper( \@html_results ) ); |
next unless ($hash); |
278 |
|
|
279 |
$t = time(); |
if (! $args->{'template'}) { |
280 |
|
push @results, $hash; |
281 |
|
} else { |
282 |
|
my ($database, $prefix, $id); |
283 |
|
|
284 |
my $html = $self->apply( |
if ( $hash->{'@uri'} =~ m!/([^/]+)/([^/]+)/(\d+)$!) { |
285 |
template => $template_filename, |
($database, $prefix,$id) = ($1,$2,$3); |
286 |
data => $ds, |
} else { |
287 |
record_uri => "${database}/${prefix}/${id}", |
$log->warn("can't decode database/prefix/id from " . $hash->{'@uri'}); |
288 |
config => $self->{databases}->{$database}, |
next; |
289 |
); |
} |
290 |
|
|
291 |
$times->{out} += time() - $t; |
#$log->debug("load_ds( id => $id, prefix => '$prefix' )"); |
292 |
|
|
293 |
$t = time(); |
$t = time(); |
294 |
|
|
295 |
|
my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id ); |
296 |
|
if (! $ds) { |
297 |
|
$log->error("can't load_ds( ${database}/${prefix}/${id} )"); |
298 |
|
next; |
299 |
|
} |
300 |
|
|
301 |
$html = decode($self->{webpac_encoding}, $html); |
$times->{db} += time() - $t; |
302 |
|
|
303 |
push @html_results, $html; |
$t = time(); |
304 |
|
|
305 |
} |
my $html = $self->apply( |
306 |
|
template => $template_filename, |
307 |
|
data => $ds, |
308 |
|
record_uri => "${database}/${prefix}/${id}", |
309 |
|
config => $self->{databases}->{$database}, |
310 |
|
); |
311 |
|
|
312 |
#$log->debug( '@html_results = ' . Dumper( \@html_results ) ); |
$times->{apply} += time() - $t; |
313 |
|
|
314 |
|
$t = time(); |
315 |
|
|
316 |
|
$html = decode($self->{webpac_encoding}, $html); |
317 |
|
|
318 |
|
$times->{decode} += time() - $t; |
319 |
|
|
320 |
|
push @results, $html; |
321 |
|
} |
322 |
|
|
323 |
|
} |
324 |
|
|
325 |
$log->debug( sprintf( |
$log->debug( sprintf( |
326 |
"duration breakdown: store %.6fs, apply %.6fs, total: %.6fs", |
"duration breakdown: estraier %.6fs, hash %.6fs, store %.6fs, apply %.6fs, decode %.06f, total: %.6fs", |
327 |
$times->{db}, $times->{out}, time() - $search_start_t, |
$times->{est}, $times->{hash}, $times->{db}, $times->{apply}, $times->{decode}, time() - $search_start_t, |
328 |
) ); |
) ); |
329 |
|
|
330 |
return \@html_results; |
return \@results; |
331 |
} |
} |
332 |
|
|
333 |
=head2 record |
=head2 record |
347 |
|
|
348 |
my $args = {@_}; |
my $args = {@_}; |
349 |
my $log = $self->{log}; |
my $log = $self->{log}; |
350 |
$log->debug("record args: " . Dumper( $args )); |
$log->dumper( $args, 'args' ); |
351 |
|
|
352 |
foreach my $f (qw/record_uri template/) { |
foreach my $f (qw/record_uri template/) { |
353 |
$log->fatal("need $f") unless ($args->{$f}); |
$log->fatal("need $f") unless ($args->{$f}); |
381 |
} |
} |
382 |
|
|
383 |
|
|
384 |
|
=head2 list_nodes |
385 |
|
|
386 |
|
my @nodes = $m->list_nodes( 'site' ); |
387 |
|
|
388 |
|
Return all databases which have records for selected site. Returned array of |
389 |
|
hashes has elements C<name> and C<label>. |
390 |
|
|
391 |
|
=cut |
392 |
|
|
393 |
|
sub list_nodes { |
394 |
|
my $self = shift; |
395 |
|
|
396 |
|
my $site = shift; |
397 |
|
|
398 |
|
$self->{log}->debug("list_nodes use site $site"); |
399 |
|
|
400 |
|
$self->setup_site( $site ); |
401 |
|
|
402 |
|
my @nodes; |
403 |
|
|
404 |
|
if ($self->{est_node}->doc_num > 0) { |
405 |
|
push @nodes, { |
406 |
|
name => $self->{est_node}->name, |
407 |
|
label => $self->{est_node}->label, |
408 |
|
doc_num => $self->{est_node}->doc_num, |
409 |
|
} |
410 |
|
} |
411 |
|
|
412 |
|
# refresh set info |
413 |
|
$self->{est_node}->_set_info; |
414 |
|
|
415 |
|
my $links = $self->{est_node}->links || return @nodes; |
416 |
|
|
417 |
|
$self->{log}->dumper( $links, 'links' ); |
418 |
|
|
419 |
|
foreach my $link (@{ $links }) { |
420 |
|
my ($url, $label, $credit) = split(/\t/, $link, 3); |
421 |
|
if ($url =~ m#/node/(.+)$#) { |
422 |
|
my $node = $1; |
423 |
|
$self->setup_site( $node ); |
424 |
|
$self->{est_node}->_set_info; |
425 |
|
push @nodes, { |
426 |
|
name => $node, |
427 |
|
label => $label, |
428 |
|
doc_num => $self->{est_node}->doc_num, |
429 |
|
} |
430 |
|
} else { |
431 |
|
$self->{log}->warn("can't find node name in link $link"); |
432 |
|
} |
433 |
|
} |
434 |
|
|
435 |
|
$self->setup_site( $site ); |
436 |
|
$self->{est_node}->_set_info; |
437 |
|
|
438 |
|
$self->{log}->dumper( \@nodes, 'nodes' ); |
439 |
|
|
440 |
|
return @nodes; |
441 |
|
} |
442 |
|
|
443 |
|
=cut |
444 |
|
|
445 |
|
|
446 |
=head2 save_html |
=head2 save_html |
447 |
|
|
448 |
$m->save_html( '/full/path/to/file', $content ); |
$m->save_html( '/full/path/to/file', $content ); |
522 |
my $log = $self->{log} || die "no log?"; |
my $log = $self->{log} || die "no log?"; |
523 |
|
|
524 |
foreach my $a (qw/template data/) { |
foreach my $a (qw/template data/) { |
525 |
$log->logconfess("need $a") unless ($args->{$a}); |
$log->fatal("need $a") unless ($args->{$a}); |
526 |
} |
} |
527 |
|
|
528 |
=head3 tt_filter_type |
=head3 tt_filter_type |
631 |
|
|
632 |
my $s; |
my $s; |
633 |
if ($s_el > 0) { |
if ($s_el > 0) { |
634 |
$s = $item->{'search'}->[$i] || die "can't find value $i for type search in field $search"; |
$s = $item->{'search'}->[$i] or warn "can't find value $i for type search in field $search"; |
635 |
} else { |
} else { |
636 |
$s = $item->{'search'}->[0]; |
$s = $item->{'search'}->[0]; |
637 |
} |
} |
638 |
#$s =~ s/([^\w.-])/sprintf("%%%02X",ord($1))/eg; |
#$s =~ s/([^\w.-])/sprintf("%%%02X",ord($1))/eg; |
639 |
$s = __quotemeta( $s ); |
$s = __quotemeta( $s ); |
640 |
|
|
641 |
my $d = $item->{'display'}->[$i] || die "can't find value $i for type display in field $display"; |
my $d = $item->{'display'}->[$i] or warn "can't find value $i for type display in field $display"; |
642 |
|
|
643 |
my $template_arg = ''; |
my $template_arg = ''; |
644 |
$template_arg = qq{,'$template'} if ($template); |
$template_arg = qq{,'$template'} if ($template); |