7 |
Catalyst::Model |
Catalyst::Model |
8 |
/; |
/; |
9 |
use WebPAC::Store 0.08; |
use WebPAC::Store 0.08; |
10 |
use WebPAC::Search::Estraier 0.05; |
use Search::Estraier 0.04; |
11 |
use File::Slurp; |
use File::Slurp; |
12 |
use Time::HiRes qw/time/; |
use Time::HiRes qw/time/; |
13 |
use Encode qw/encode decode from_to/; |
use Encode qw/encode decode from_to/; |
14 |
|
use Template; |
15 |
use Data::Dumper; |
use Data::Dumper; |
16 |
|
|
17 |
=head1 NAME |
=head1 NAME |
74 |
$est_cfg->{database} = $defaultnode; |
$est_cfg->{database} = $defaultnode; |
75 |
} |
} |
76 |
|
|
77 |
$self->{est} = new WebPAC::Search::Estraier( %{ $est_cfg } ); |
my $url = $est_cfg->{masterurl} . '/node/' . $est_cfg->{database}; |
78 |
|
|
79 |
|
$log->info("opening Hyper Estraier index $url as $est_cfg->{'user'}"); |
80 |
|
|
81 |
|
$self->{est_node} = Search::Estraier::Node->new( |
82 |
|
url => $url, |
83 |
|
user => $est_cfg->{user}, |
84 |
|
passwd => $est_cfg->{passwd}, |
85 |
|
); |
86 |
|
|
87 |
|
$log->fatal("can't create Search::Estraier::Node $url") unless ($self->{est_node}); |
88 |
|
|
89 |
# save config parametars in object |
# save config parametars in object |
90 |
foreach my $f (qw/db_path template_path hits_on_page webpac_encoding defaultdepth/) { |
foreach my $f (qw/db_path template_path hits_on_page webpac_encoding defaultdepth/) { |
111 |
"'" |
"'" |
112 |
); |
); |
113 |
|
|
114 |
$self->{databases} = $c->config->{databases} || $log->error("can't find databases in config"); |
$self->{databases} = $c->config->{databases} || $log->fatal("can't find databases in config"); |
115 |
|
|
116 |
$self->{model_record} = $c->comp('Model::Record') or $log->error("can't find Model::Record"); |
# create Template toolkit instance |
117 |
|
$self->{'tt'} = Template->new( |
118 |
|
INCLUDE_PATH => $template_path, |
119 |
|
FILTERS => { |
120 |
|
dump_html => sub { |
121 |
|
return unless (@_); |
122 |
|
my $out; |
123 |
|
my $i = 1; |
124 |
|
foreach my $v (@_) { |
125 |
|
$out .= qq{<div id="dump_$i">} . |
126 |
|
Data::HTMLDumper->Dump([ $v ],[ "v$i" ]) . |
127 |
|
qq{</div>}; |
128 |
|
$i++; |
129 |
|
} |
130 |
|
$out =~ s!<table[^>/]*>!<table class="dump">!gis if ($out); |
131 |
|
return $out; |
132 |
|
} |
133 |
|
}, |
134 |
|
EVAL_PERL => 1, |
135 |
|
); |
136 |
|
|
137 |
return $self; |
return $self; |
138 |
|
|
169 |
|
|
170 |
my $query = $args->{phrase} || $log->warn("no query phrase") && return; |
my $query = $args->{phrase} || $log->warn("no query phrase") && return; |
171 |
|
|
|
$log->debug("search model query: '$query'"); |
|
|
if ($args->{add_attr}) { |
|
|
$log->debug(" + add_attr: " . |
|
|
join("','", @{ $args->{add_attr} }) |
|
|
); |
|
|
} |
|
|
|
|
172 |
my $template_filename = $args->{template} || $self->{template}; |
my $template_filename = $args->{template} || $self->{template}; |
173 |
|
|
174 |
$args->{max} ||= $self->{'hits_for_pager'}; |
$args->{max} ||= $self->{'hits_for_pager'}; |
187 |
$args->{depth} = $default; |
$args->{depth} = $default; |
188 |
$log->warn("using default search depth $default"); |
$log->warn("using default search depth $default"); |
189 |
} |
} |
190 |
|
$args->{depth} ||= 0; |
191 |
|
|
192 |
my @results = $self->{est}->search( %{ $args } ); |
$log->debug("searching for maximum $args->{max} results using depth $args->{depth} phrase: ", $query || '[none]'); |
193 |
|
|
194 |
$times->{est} += time() - $t; |
# |
195 |
|
# construct condition for Hyper Estraier |
196 |
|
# |
197 |
|
my $cond = Search::Estraier::Condition->new(); |
198 |
|
if ( ref($args->{add_attr}) eq 'ARRAY' ) { |
199 |
|
$log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) ); |
200 |
|
map { |
201 |
|
$cond->add_attr( $_ ); |
202 |
|
$log->debug(" + $_"); |
203 |
|
} @{ $args->{add_attr} }; |
204 |
|
}; |
205 |
|
|
206 |
|
$cond->set_phrase( $query ) if ($query); |
207 |
|
$cond->set_options( $args->{options} ) if ($args->{options}); |
208 |
|
$cond->set_order( $args->{order} ) if ($args->{order}); |
209 |
|
|
210 |
|
my $max = $args->{max} || 7; |
211 |
|
my $page = $args->{page} || 1; |
212 |
|
if ($page < 1) { |
213 |
|
$log->warn("page number $page < 1"); |
214 |
|
$page = 1; |
215 |
|
} |
216 |
|
|
217 |
|
$cond->set_max( $page * $max ); |
218 |
|
|
219 |
|
my $result = $self->{est_node}->search($cond, $args->{depth}); |
220 |
|
my $hits = $result->doc_num; |
221 |
|
|
222 |
my $hits = $#results + 1; |
$times->{est} += time() - $t; |
223 |
|
|
224 |
$log->debug( sprintf("search took %.6fs and returned $hits hits.", $times->{est}) ); |
$log->debug( sprintf("search took %.6fs and returned $hits hits.", $times->{est}) ); |
225 |
|
|
226 |
# just return results? |
$log->debug( "hints: " . Dumper($result->{hints}) ); |
|
return @results unless ($args->{'template'}); |
|
227 |
|
|
228 |
# |
# |
229 |
# construct HTML results |
# fetch results |
230 |
# |
# |
231 |
|
|
232 |
my @html_results; |
my @results; |
233 |
|
|
234 |
for my $i ( 0 .. $#results ) { |
for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) { |
235 |
|
|
236 |
my ($database, $prefix, $id); |
$t = time(); |
237 |
if ( $results[$i]->{'@uri'} =~ m!/([^/]+)/([^/]+)/(\d+)$!) { |
|
238 |
($database, $prefix,$id) = ($1,$2,$3); |
#$log->debug("get_doc($i)"); |
239 |
} else { |
my $doc = $result->get_doc( $i ); |
240 |
$log->warn("can't decode database/prefix/id from " . $results[$i]->{'@uri'}); |
if (! $doc) { |
241 |
|
$log->warn("can't find result $i"); |
242 |
next; |
next; |
243 |
} |
} |
244 |
|
|
245 |
#$log->debug("load_ds( id => $id, prefix => '$prefix' )"); |
my $hash; |
246 |
|
|
247 |
$t = time(); |
foreach my $attr (@{ $args->{get_attr} }) { |
248 |
|
my $val = $doc->attr( $attr ); |
249 |
my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id ); |
#$log->debug("attr $attr = ", $val || 'undef'); |
250 |
if (! $ds) { |
$hash->{$attr} = $val if (defined($val)); |
|
$log->error("can't load_ds( ${database}/${prefix}/${id} )"); |
|
|
next; |
|
251 |
} |
} |
252 |
|
|
253 |
$times->{db} += time() - $t; |
$times->{hash} += time() - $t; |
254 |
|
|
255 |
#$log->debug( "ds = " . Dumper( \@html_results ) ); |
next unless ($hash); |
256 |
|
|
257 |
$t = time(); |
if (! $args->{'template'}) { |
258 |
|
push @results, $hash; |
259 |
|
} else { |
260 |
|
my ($database, $prefix, $id); |
261 |
|
|
262 |
|
if ( $hash->{'@uri'} =~ m!/([^/]+)/([^/]+)/(\d+)$!) { |
263 |
|
($database, $prefix,$id) = ($1,$2,$3); |
264 |
|
} else { |
265 |
|
$log->warn("can't decode database/prefix/id from " . $hash->{'@uri'}); |
266 |
|
next; |
267 |
|
} |
268 |
|
|
269 |
|
#$log->debug("load_ds( id => $id, prefix => '$prefix' )"); |
270 |
|
|
271 |
|
$t = time(); |
272 |
|
|
273 |
|
my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id ); |
274 |
|
if (! $ds) { |
275 |
|
$log->error("can't load_ds( ${database}/${prefix}/${id} )"); |
276 |
|
next; |
277 |
|
} |
278 |
|
|
279 |
my $html = '[no output]'; |
$times->{db} += time() - $t; |
280 |
|
|
281 |
if ($self->{model_record}) { |
#$log->debug( "ds = " . Dumper( \@html_results ) ); |
282 |
$html = $self->{model_record}->apply( |
|
283 |
|
$t = time(); |
284 |
|
|
285 |
|
my $html = $self->apply( |
286 |
template => $template_filename, |
template => $template_filename, |
287 |
data => $ds, |
data => $ds, |
288 |
record_uri => "${database}/${prefix}/${id}", |
record_uri => "${database}/${prefix}/${id}", |
289 |
config => $self->{databases}->{$database}, |
config => $self->{databases}->{$database}, |
290 |
); |
); |
|
} else { |
|
|
$log->warn("skipped apply"); |
|
|
} |
|
291 |
|
|
292 |
$times->{out} += time() - $t; |
$times->{apply} += time() - $t; |
293 |
|
|
294 |
$t = time(); |
$t = time(); |
295 |
|
|
296 |
|
$html = decode($self->{webpac_encoding}, $html); |
297 |
|
|
298 |
$html = decode($self->{webpac_encoding}, $html); |
$times->{decode} += time() - $t; |
299 |
|
|
300 |
push @html_results, $html; |
push @results, $html; |
301 |
|
} |
302 |
|
|
303 |
} |
} |
304 |
|
|
305 |
#$log->debug( '@html_results = ' . Dumper( \@html_results ) ); |
#$log->debug( '@results = ' . Dumper( \@results ) ); |
306 |
|
|
307 |
$log->debug( sprintf( |
$log->debug( sprintf( |
308 |
"duration breakdown: store %.6fs, apply %.6fs, total: %.6fs", |
"duration breakdown: estraier %.6fs, hash %.6fs, store %.6fs, apply %.6fs, decode %.06f, total: %.6fs", |
309 |
$times->{db}, $times->{out}, time() - $search_start_t, |
$times->{est}, $times->{hash}, $times->{db}, $times->{apply}, $times->{decode}, time() - $search_start_t, |
310 |
) ); |
) ); |
311 |
|
|
312 |
return \@html_results; |
return \@results; |
313 |
} |
} |
314 |
|
|
315 |
=head2 record |
=head2 record |
350 |
return; |
return; |
351 |
} |
} |
352 |
|
|
353 |
my $html = $self->{model_record}->apply( |
my $html = $self->apply( |
354 |
template => $args->{template}, |
template => $args->{template}, |
355 |
data => $ds, |
data => $ds, |
356 |
record_uri => $args->{record_uri}, |
record_uri => $args->{record_uri}, |
419 |
return decode($self->{webpac_encoding}, $content); |
return decode($self->{webpac_encoding}, $content); |
420 |
} |
} |
421 |
|
|
422 |
|
|
423 |
|
=head2 apply |
424 |
|
|
425 |
|
Create output from in-memory data structure using Template Toolkit template. |
426 |
|
|
427 |
|
my $text = $tt->apply( |
428 |
|
template => 'text.tt', |
429 |
|
data => $ds, |
430 |
|
record_uri => 'database/prefix/mfn', |
431 |
|
); |
432 |
|
|
433 |
|
It also has follwing template toolikit filter routies defined: |
434 |
|
|
435 |
|
=cut |
436 |
|
|
437 |
|
sub apply { |
438 |
|
my $self = shift; |
439 |
|
|
440 |
|
my $args = {@_}; |
441 |
|
|
442 |
|
my $log = $self->{log} || die "no log?"; |
443 |
|
|
444 |
|
foreach my $a (qw/template data/) { |
445 |
|
$log->fatal("need $a") unless ($args->{$a}); |
446 |
|
} |
447 |
|
|
448 |
|
=head3 tt_filter_type |
449 |
|
|
450 |
|
filter to return values of specified from $ds, usage from TT template is in form |
451 |
|
C<d('FieldName','delimiter')>, where C<delimiter> is optional, like this: |
452 |
|
|
453 |
|
[% d('Title') %] |
454 |
|
[% d('Author',', ' %] |
455 |
|
|
456 |
|
=cut |
457 |
|
|
458 |
|
sub tt_filter_type { |
459 |
|
my ($data,$type) = @_; |
460 |
|
|
461 |
|
die "no data?" unless ($data); |
462 |
|
$type ||= 'display'; |
463 |
|
|
464 |
|
my $default_delimiter = { |
465 |
|
'display' => '¶<br/>', |
466 |
|
'index' => '\n', |
467 |
|
}; |
468 |
|
|
469 |
|
return sub { |
470 |
|
|
471 |
|
my ($name,$join) = @_; |
472 |
|
|
473 |
|
die "no data hash" unless ($data->{'data'} && ref($data->{'data'}) eq 'HASH'); |
474 |
|
# Hm? Should we die here? |
475 |
|
return unless ($name); |
476 |
|
|
477 |
|
my $item = $data->{'data'}->{$name} || return; |
478 |
|
|
479 |
|
my $v = $item->{$type} || return; |
480 |
|
|
481 |
|
if (ref($v) eq 'ARRAY') { |
482 |
|
if ($#{$v} == 0) { |
483 |
|
$v = $v->[0]; |
484 |
|
} else { |
485 |
|
$join = $default_delimiter->{$type} unless defined($join); |
486 |
|
$v = join($join, @{$v}); |
487 |
|
} |
488 |
|
} else { |
489 |
|
warn("TT filter $type(): field $name values aren't ARRAY, ignoring"); |
490 |
|
} |
491 |
|
|
492 |
|
return $v; |
493 |
|
} |
494 |
|
} |
495 |
|
|
496 |
|
$args->{'d'} = tt_filter_type($args, 'display'); |
497 |
|
$args->{'display'} = tt_filter_type($args, 'display'); |
498 |
|
|
499 |
|
=head3 tt_filter_search |
500 |
|
|
501 |
|
filter to return links to search, usage in TT: |
502 |
|
|
503 |
|
[% search('FieldToDisplay','FieldToSearch','optional delimiter', 'optional_template.tt') %] |
504 |
|
|
505 |
|
=cut |
506 |
|
|
507 |
|
sub tt_filter_search { |
508 |
|
|
509 |
|
my ($data) = @_; |
510 |
|
|
511 |
|
die "no data?" unless ($data); |
512 |
|
|
513 |
|
return sub { |
514 |
|
|
515 |
|
my ($display,$search,$delimiter,$template) = @_; |
516 |
|
|
517 |
|
# default delimiter |
518 |
|
$delimiter ||= '¶<br/>', |
519 |
|
|
520 |
|
die "no data hash" unless ($data->{'data'} && ref($data->{'data'}) eq 'HASH'); |
521 |
|
# Hm? Should we die here? |
522 |
|
return unless ($display); |
523 |
|
|
524 |
|
my $item = $data->{'data'}->{$display} || return; |
525 |
|
|
526 |
|
return unless($item->{'display'}); |
527 |
|
if (! $item->{'search'}) { |
528 |
|
warn "error in TT template: field $display didn't insert anything into search, use d('$display') and not search('$display'...)"; |
529 |
|
return; |
530 |
|
} |
531 |
|
|
532 |
|
my @warn; |
533 |
|
foreach my $type (qw/display search/) { |
534 |
|
push @warn, "field $display type $type values aren't ARRAY" unless (ref($item->{$type}) eq 'ARRAY'); |
535 |
|
} |
536 |
|
|
537 |
|
if (@warn) { |
538 |
|
warn("TT filter search(): " . join(",", @warn) . ", skipping"); |
539 |
|
return; |
540 |
|
} |
541 |
|
my @html; |
542 |
|
|
543 |
|
my $d_el = $#{ $item->{'display'} }; |
544 |
|
my $s_el = $#{ $item->{'search'} }; |
545 |
|
|
546 |
|
# easy, both fields have same number of elements or there is just |
547 |
|
# one search and multiple display |
548 |
|
if ( $d_el == $s_el || $s_el == 0 ) { |
549 |
|
|
550 |
|
foreach my $i ( 0 .. $d_el ) { |
551 |
|
|
552 |
|
my $s; |
553 |
|
if ($s_el > 0) { |
554 |
|
$s = $item->{'search'}->[$i] or warn "can't find value $i for type search in field $search"; |
555 |
|
} else { |
556 |
|
$s = $item->{'search'}->[0]; |
557 |
|
} |
558 |
|
#$s =~ s/([^\w.-])/sprintf("%%%02X",ord($1))/eg; |
559 |
|
$s = __quotemeta( $s ); |
560 |
|
|
561 |
|
my $d = $item->{'display'}->[$i] or warn "can't find value $i for type display in field $display"; |
562 |
|
|
563 |
|
my $template_arg = ''; |
564 |
|
$template_arg = qq{,'$template'} if ($template); |
565 |
|
|
566 |
|
push @html, qq{<a href="#" onclick="return search_via_link('$search','$s'${template_arg})">$d</a>}; |
567 |
|
} |
568 |
|
|
569 |
|
return join($delimiter, @html); |
570 |
|
} else { |
571 |
|
my $html = qq{<div class="notice">WARNING: we should really support if there is $d_el display elements and $s_el search elements, but currently there is no nice way to do so, so we will just display values</div>}; |
572 |
|
my $v = $item->{'display'}; |
573 |
|
|
574 |
|
if ($#{$v} == 0) { |
575 |
|
$html .= $v->[0]; |
576 |
|
} else { |
577 |
|
$html .= join($delimiter, @{$v}); |
578 |
|
} |
579 |
|
return $html; |
580 |
|
} |
581 |
|
} |
582 |
|
} |
583 |
|
|
584 |
|
$args->{'search'} = tt_filter_search($args); |
585 |
|
|
586 |
|
=head3 load_rec |
587 |
|
|
588 |
|
Used mostly for onClick events like this: |
589 |
|
|
590 |
|
<a href="#" onClick="[% load_rec( record_uri, 'template_name.tt') %]>foo</a> |
591 |
|
|
592 |
|
It will automatically do sanity checking and create correct JavaScript code. |
593 |
|
|
594 |
|
=cut |
595 |
|
|
596 |
|
$args->{'load_rec'} = sub { |
597 |
|
my @errors; |
598 |
|
|
599 |
|
my $record_uri = shift or push @errors, "record_uri missing"; |
600 |
|
my $template = shift or push @errors, "template missing"; |
601 |
|
|
602 |
|
if ($record_uri !~ m#^[^/]+/[^/]+/[^/]+$#) { |
603 |
|
push @errors, "invalid format of record_uri: $record_uri"; |
604 |
|
} |
605 |
|
|
606 |
|
if (@errors) { |
607 |
|
return "Logger.error('errors in load_rec: " . join(", ", @errors) . "'); return false;"; |
608 |
|
} else { |
609 |
|
return "load_rec('$record_uri','$template'); return false;"; |
610 |
|
} |
611 |
|
}; |
612 |
|
|
613 |
|
=head3 load_template |
614 |
|
|
615 |
|
Used to re-submit search request and load results in different template |
616 |
|
|
617 |
|
<a href="#" onClick="[% load_template( 'template_name.tt' ) %]">bar</a> |
618 |
|
|
619 |
|
=cut |
620 |
|
|
621 |
|
$args->{'load_template'} = sub { |
622 |
|
my $template = shift or return "Logger.error('load_template missing template name!'); return false;"; |
623 |
|
return "load_template($template); return false;"; |
624 |
|
}; |
625 |
|
|
626 |
|
my $out; |
627 |
|
|
628 |
|
$self->{'tt'}->process( |
629 |
|
$args->{'template'}, |
630 |
|
$args, |
631 |
|
\$out |
632 |
|
) || $log->error( "apply can't process template: ", $self->{'tt'}->error() ); |
633 |
|
|
634 |
|
return $out; |
635 |
|
} |
636 |
|
|
637 |
|
|
638 |
|
=head2 __quotemeta |
639 |
|
|
640 |
|
Helper to quote JavaScript-friendly characters |
641 |
|
|
642 |
|
=cut |
643 |
|
|
644 |
|
sub __quotemeta { |
645 |
|
local $_ = shift; |
646 |
|
$_ = decode('iso-8859-2', $_); |
647 |
|
|
648 |
|
s<([\x{0080}-\x{fffd}]+)>{sprintf '\u%0*v4X', '\u', $1}ge if ( Encode::is_utf8($_) ); |
649 |
|
{ |
650 |
|
use bytes; |
651 |
|
s<((?:[^ \x21-\x7E]|(?:\\(?!u)))+)>{sprintf '\x%0*v2X', '\x', $1}ge; |
652 |
|
} |
653 |
|
|
654 |
|
s/\\x09/\\t/g; |
655 |
|
s/\\x0A/\\n/g; |
656 |
|
s/\\x0D/\\r/g; |
657 |
|
s/"/\\"/g; |
658 |
|
s/\\x5C/\\\\/g; |
659 |
|
|
660 |
|
return $_; |
661 |
|
} |
662 |
|
|
663 |
|
|
664 |
|
|
665 |
=head1 AUTHOR |
=head1 AUTHOR |
666 |
|
|
667 |
Dobrica Pavlinusic C<< <dpavlin@rot13.org> >> |
Dobrica Pavlinusic C<< <dpavlin@rot13.org> >> |