12 |
use Time::HiRes qw/time/; |
use Time::HiRes qw/time/; |
13 |
use Encode qw/encode decode from_to/; |
use Encode qw/encode decode from_to/; |
14 |
use Template; |
use Template; |
|
use Data::Dumper; |
|
15 |
|
|
16 |
=head1 NAME |
=head1 NAME |
17 |
|
|
65 |
|
|
66 |
$est_cfg->{encoding} = $est_cfg->{catalyst_encoding} || $c->config->{catalyst_encoding} or $c->log->fatal("can't find catalyst_encoding"); |
$est_cfg->{encoding} = $est_cfg->{catalyst_encoding} || $c->config->{catalyst_encoding} or $c->log->fatal("can't find catalyst_encoding"); |
67 |
|
|
68 |
$log->debug("using config:" . Dumper($est_cfg) ); |
$log->dumper($est_cfg, 'est_cfg'); |
69 |
|
|
70 |
if (! $est_cfg->{database}) { |
if (! $est_cfg->{database}) { |
71 |
my $defaultnode = $est_cfg->{defaultnode} || $log->logdie("can't find defaultnode in estraier configuration"); |
my $defaultnode = $est_cfg->{defaultnode} || $log->logdie("can't find defaultnode in estraier configuration"); |
88 |
# save config parametars in object |
# save config parametars in object |
89 |
foreach my $f (qw/ |
foreach my $f (qw/ |
90 |
db_path template_path hits_on_page webpac_encoding defaultdepth |
db_path template_path hits_on_page webpac_encoding defaultdepth |
91 |
masterurl |
masterurl defaultnode |
92 |
/) { |
/) { |
93 |
$self->{$f} = $c->config->{hyperestraier}->{$f} || |
$self->{$f} = $c->config->{hyperestraier}->{$f} || |
94 |
$c->config->{webpac}->{$f}; |
$c->config->{webpac}->{$f}; |
144 |
|
|
145 |
$self->setup_site('site_name'); |
$self->setup_site('site_name'); |
146 |
|
|
147 |
Change node URL and database name according to site name (if available) |
Change node URL and database name according to site name (if available) or fallback |
148 |
|
to C<defaultnode> from configuration. |
149 |
|
|
150 |
=cut |
=cut |
151 |
|
|
152 |
sub setup_site { |
sub setup_site { |
153 |
my $self = shift; |
my $self = shift; |
154 |
|
|
155 |
my $site = shift || return; |
my $site = shift; |
156 |
|
if (! $site) { |
157 |
|
$site = $self->{defaultnode}; |
158 |
|
$self->{log}->warn("using default site $site"); |
159 |
|
} |
160 |
|
|
161 |
|
$self->{log}->fatal("setup_site can't find site or defaultnode") unless ($site); |
162 |
|
|
163 |
my $url = $self->{masterurl} . '/node/' . $site; |
my $url = $self->{masterurl} . '/node/' . $site; |
164 |
$self->{est_node}->set_url( $url ); |
$self->{est_node}->set_url( $url ); |
165 |
$self->{log}->debug("setup_site $site"); |
$self->{log}->debug("setup_site '$site' using $url"); |
166 |
} |
} |
167 |
|
|
168 |
=head2 search |
=head2 search |
191 |
|
|
192 |
my $log = $self->{log}; |
my $log = $self->{log}; |
193 |
|
|
194 |
$log->debug("search args: " . Dumper( $args )); |
$log->dumper($args, 'args'); |
195 |
|
|
196 |
my $query = $args->{phrase} || $log->warn("no query phrase") && return; |
my $query = $args->{phrase} || $log->warn("no query phrase") && return; |
197 |
|
|
215 |
} |
} |
216 |
$args->{depth} ||= 0; |
$args->{depth} ||= 0; |
217 |
|
|
218 |
$log->debug("searching for maximum $args->{max} results using depth $args->{depth} phrase: ", $query || '[none]'); |
$log->debug("searching " . $self->{est_node}->{url} . " max: $args->{max} depth: $args->{depth} phrase: " . ($query || '[none]') ); |
219 |
|
|
220 |
# |
# |
221 |
# construct condition for Hyper Estraier |
# construct condition for Hyper Estraier |
243 |
$cond->set_max( $page * $max ); |
$cond->set_max( $page * $max ); |
244 |
|
|
245 |
my $result = $self->{est_node}->search($cond, $args->{depth}); |
my $result = $self->{est_node}->search($cond, $args->{depth}); |
246 |
|
if (! $result) { |
247 |
|
$self->{log}->fatal("search didn't return result"); |
248 |
|
return; |
249 |
|
} |
250 |
my $hits = $result->doc_num; |
my $hits = $result->doc_num; |
251 |
|
|
252 |
$times->{est} += time() - $t; |
$times->{est} += time() - $t; |
253 |
|
|
254 |
$log->debug( sprintf("search took %.6fs and returned $hits hits.", $times->{est}) ); |
$log->debug( sprintf("search took %.6fs and returned $hits hits.", $times->{est}) ); |
255 |
|
|
256 |
$log->debug( "hints: " . Dumper($result->{hints}) ); |
$self->{hints} = $result->{hints}; |
257 |
|
#$log->dumper($self->{hints}, 'original hints' ); |
258 |
|
|
259 |
# |
# |
260 |
# fetch results |
# fetch results |
309 |
|
|
310 |
$times->{db} += time() - $t; |
$times->{db} += time() - $t; |
311 |
|
|
|
#$log->debug( "ds = " . Dumper( \@html_results ) ); |
|
|
|
|
312 |
$t = time(); |
$t = time(); |
313 |
|
|
314 |
my $html = $self->apply( |
my $html = $self->apply( |
331 |
|
|
332 |
} |
} |
333 |
|
|
|
#$log->debug( '@results = ' . Dumper( \@results ) ); |
|
|
|
|
334 |
$log->debug( sprintf( |
$log->debug( sprintf( |
335 |
"duration breakdown: estraier %.6fs, hash %.6fs, store %.6fs, apply %.6fs, decode %.06f, total: %.6fs", |
"duration breakdown: estraier %.6fs, hash %.6fs, store %.6fs, apply %.6fs, decode %.06f, total: %.6fs", |
336 |
$times->{est}, $times->{hash}, $times->{db}, $times->{apply}, $times->{decode}, time() - $search_start_t, |
$times->{est}, $times->{hash}, $times->{db}, $times->{apply}, $times->{decode}, time() - $search_start_t, |
339 |
return \@results; |
return \@results; |
340 |
} |
} |
341 |
|
|
342 |
|
=head2 hints |
343 |
|
|
344 |
|
my $hints = $m->hints; |
345 |
|
|
346 |
|
Return various useful hints about result |
347 |
|
|
348 |
|
=cut |
349 |
|
|
350 |
|
sub hints { |
351 |
|
my $self = shift; |
352 |
|
|
353 |
|
unless ($self->{hints}) { |
354 |
|
$self->{log}->fatal("no hints found!"); |
355 |
|
return; |
356 |
|
} |
357 |
|
|
358 |
|
my $hints; |
359 |
|
|
360 |
|
while (my ($key,$val) = each %{ $self->{hints} }) { |
361 |
|
|
362 |
|
#$self->{log}->debug("current hint $key = $val"); |
363 |
|
|
364 |
|
if ($key =~ m/^(?:HITS*|TIME|DOCNUM|WORDNUM)$/) { |
365 |
|
$hints->{ lc($key) } = $val; |
366 |
|
} elsif ($key =~ m/^HINT#/) { |
367 |
|
my ($word,$count) = split(/\t/,$val,2); |
368 |
|
$hints->{words}->{$word} = $count; |
369 |
|
} elsif ($key =~ m/^LINK#/) { |
370 |
|
my ($url,undef,undef,undef,undef,undef,$results) = split(/\t/,$val,7); |
371 |
|
if ($url =~ m#/node/(.+)$#) { |
372 |
|
$hints->{node}->{$1} = $results; |
373 |
|
} else { |
374 |
|
$self->{log}->debug("url $url doesn't have /node/ in it!"); |
375 |
|
} |
376 |
|
} else { |
377 |
|
$self->{log}->debug("unknown hint $key = $val"); |
378 |
|
} |
379 |
|
|
380 |
|
} |
381 |
|
|
382 |
|
$self->{log}->dumper($hints, 'model hints' ); |
383 |
|
|
384 |
|
return $hints; |
385 |
|
} |
386 |
|
|
387 |
|
|
388 |
=head2 record |
=head2 record |
389 |
|
|
390 |
my $html = $m->record( |
my $html = $m->record( |
402 |
|
|
403 |
my $args = {@_}; |
my $args = {@_}; |
404 |
my $log = $self->{log}; |
my $log = $self->{log}; |
405 |
$log->debug("record args: " . Dumper( $args )); |
$log->dumper( $args, 'args' ); |
406 |
|
|
407 |
foreach my $f (qw/record_uri template/) { |
foreach my $f (qw/record_uri template/) { |
408 |
$log->fatal("need $f") unless ($args->{$f}); |
$log->fatal("need $f") unless ($args->{$f}); |
436 |
} |
} |
437 |
|
|
438 |
|
|
439 |
|
=head2 list_nodes |
440 |
|
|
441 |
|
my @nodes = $m->list_nodes( 'site' ); |
442 |
|
|
443 |
|
Return all databases which have records for selected site. Returned array of |
444 |
|
hashes has elements C<name> and C<label>. |
445 |
|
|
446 |
|
=cut |
447 |
|
|
448 |
|
my $nodes_list; |
449 |
|
|
450 |
|
sub list_nodes { |
451 |
|
my $self = shift; |
452 |
|
|
453 |
|
my $site = shift; |
454 |
|
|
455 |
|
$self->{log}->debug("list_nodes use site $site"); |
456 |
|
|
457 |
|
$self->setup_site( $site ); |
458 |
|
|
459 |
|
# cache? |
460 |
|
return @{ $nodes_list->{$site} } if ($nodes_list->{$site} && ref($nodes_list->{$site} eq 'ARRAY')); |
461 |
|
|
462 |
|
my @nodes; |
463 |
|
|
464 |
|
if ($self->{est_node}->doc_num > 0) { |
465 |
|
push @nodes, { |
466 |
|
name => $self->{est_node}->name, |
467 |
|
label => $self->{est_node}->label, |
468 |
|
doc_num => $self->{est_node}->doc_num, |
469 |
|
} |
470 |
|
} |
471 |
|
|
472 |
|
# refresh set info |
473 |
|
$self->{est_node}->_set_info; |
474 |
|
|
475 |
|
my $links = $self->{est_node}->links || return @nodes; |
476 |
|
|
477 |
|
$self->{log}->dumper( $links, 'links' ); |
478 |
|
|
479 |
|
foreach my $link (@{ $links }) { |
480 |
|
my ($url, $label, $credit) = split(/\t/, $link, 3); |
481 |
|
if ($url =~ m#/node/(.+)$#) { |
482 |
|
my $node = $1; |
483 |
|
$self->setup_site( $node ); |
484 |
|
$self->{est_node}->_set_info; |
485 |
|
$label = decode('UTF-8', $label); |
486 |
|
push @nodes, { |
487 |
|
name => $node, |
488 |
|
label => $label, |
489 |
|
doc_num => $self->{est_node}->doc_num, |
490 |
|
} |
491 |
|
} else { |
492 |
|
$self->{log}->warn("can't find node name in link $link"); |
493 |
|
} |
494 |
|
} |
495 |
|
|
496 |
|
$self->setup_site( $site ); |
497 |
|
$self->{est_node}->_set_info; |
498 |
|
|
499 |
|
$self->{log}->dumper( \@nodes, 'nodes' ); |
500 |
|
|
501 |
|
$nodes_list->{$site} = \@nodes; |
502 |
|
|
503 |
|
return @nodes; |
504 |
|
} |
505 |
|
|
506 |
=head2 save_html |
=head2 save_html |
507 |
|
|
508 |
$m->save_html( '/full/path/to/file', $content ); |
$m->save_html( '/full/path/to/file', $content ); |
574 |
|
|
575 |
=cut |
=cut |
576 |
|
|
577 |
|
# Escape <, >, & and ", and to produce valid XML |
578 |
|
my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); |
579 |
|
my $escape_re = join '|' => keys %escape; |
580 |
|
|
581 |
sub apply { |
sub apply { |
582 |
my $self = shift; |
my $self = shift; |
583 |
|
|
625 |
if (ref($v) eq 'ARRAY') { |
if (ref($v) eq 'ARRAY') { |
626 |
if ($#{$v} == 0) { |
if ($#{$v} == 0) { |
627 |
$v = $v->[0]; |
$v = $v->[0]; |
628 |
|
$v =~ s/($escape_re)/$escape{$1}/g; |
629 |
} else { |
} else { |
630 |
$join = $default_delimiter->{$type} unless defined($join); |
$join = $default_delimiter->{$type} unless defined($join); |
631 |
$v = join($join, @{$v}); |
$v = join($join, map { |
632 |
|
s/($escape_re)/$escape{$1}/g; |
633 |
|
} @{$v}); |
634 |
} |
} |
635 |
} else { |
} else { |
636 |
warn("TT filter $type(): field $name values aren't ARRAY, ignoring"); |
warn("TT filter $type(): field $name values aren't ARRAY, ignoring"); |