--- Webpacus/lib/Webpacus/Model/WebPAC.pm 2005/11/26 01:54:42 155 +++ Webpacus/lib/Webpacus/Model/WebPAC.pm 2006/05/07 20:32:49 452 @@ -6,12 +6,12 @@ use base qw/ Catalyst::Model /; -use Data::Dumper; -use WebPAC::DB; -use WebPAC::Output::TT; -use WebPAC::Search::Estraier 0.02; +use WebPAC::Store 0.08; +use Search::Estraier 0.04; use File::Slurp; -use Time::HiRes; +use Time::HiRes qw/time/; +use Encode qw/encode decode from_to/; +use Template; =head1 NAME @@ -34,10 +34,13 @@ # configuration for hyper estraier full text search engine hyperestraier: - url: 'http://localhost:1978/node/webpac2' + masterurl: 'http://localhost:1978/node/webpac2' + defaultnode: 'webpac2' + defaultdepth: 1 user: 'admin' passwd: 'admin' hits_on_page: 100 + hits_for_pager: 1000 webpac: db_path: '/data/webpac2/db' @@ -45,8 +48,6 @@ template: 'html_ffzg_results_short.tt' # encoding comming from webpac webpac_encoding: 'iso-8859-2' - # encoding expected by Catalyst - out_encoding: 'UTF-8' =cut @@ -62,72 +63,107 @@ my $est_cfg = $c->config->{hyperestraier}; $est_cfg->{'log'} = $log; - $est_cfg->{encoding} = $est_cfg->{catalyst_encoding}; + $est_cfg->{encoding} = $est_cfg->{catalyst_encoding} || $c->config->{catalyst_encoding} or $c->log->fatal("can't find catalyst_encoding"); - $log->debug("using config:" . Dumper($est_cfg) ); + $log->dumper($est_cfg, 'est_cfg'); - $self->{est} = new WebPAC::Search::Estraier( %{ $est_cfg } ); + if (! $est_cfg->{database}) { + my $defaultnode = $est_cfg->{defaultnode} || $log->logdie("can't find defaultnode in estraier configuration"); + $log->info("using default node $defaultnode"); + $est_cfg->{database} = $defaultnode; + } + + my $url = $est_cfg->{masterurl} . '/node/' . $est_cfg->{database}; + + $log->info("opening Hyper Estraier index $url as $est_cfg->{'user'}"); - my $db_path = $c->config->{webpac}->{db_path}; - my $template_path = $c->config->{webpac}->{template_path}; - $self->{template_path} = $template_path; + $self->{est_node} = Search::Estraier::Node->new( + url => $url, + user => $est_cfg->{user}, + passwd => $est_cfg->{passwd}, + ); + + $log->fatal("can't create Search::Estraier::Node $url") unless ($self->{est_node}); + + # save config parametars in object + foreach my $f (qw/ + db_path template_path hits_on_page webpac_encoding defaultdepth + masterurl defaultnode + /) { + $self->{$f} = $c->config->{hyperestraier}->{$f} || + $c->config->{webpac}->{$f}; + $log->debug("self->{$f} = " . $self->{$f}); + } + my $db_path = $self->{db_path}; + my $template_path = $self->{template_path}; $log->debug("using db path '$db_path', template path '$template_path'"); - $self->{db} = new WebPAC::DB( + $self->{db} = new WebPAC::Store( path => $db_path, read_only => 1, - ); - - $self->{out} = new WebPAC::Output::TT( - include_path => $template_path, - filters => { foo => sub { shift } }, + database => $est_cfg->{database}, ); # default template from config.yaml $self->{template} ||= $c->config->{webpac}->{template}; - $self->{iconv} = new Text::Iconv( - $c->config->{webpac}->{webpac_encoding}, - $c->config->{webpac}->{out_encoding} - ); - $log->debug("converting encoding from webpac_encoding '" . $c->config->{webpac}->{webpac_encoding} . - "' to '" . - $c->config->{webpac}->{out_encoding} . "'" ); - # save config parametars in object - foreach my $f (qw/hits_on_page/) { - $self->{$_} = $c->config->{$_}; - $log->debug("self->{$_} = " . $c->config->{$_}); - } + $self->{databases} = $c->config->{databases} || $log->fatal("can't find databases in config"); + + # create Template toolkit instance + $self->{'tt'} = Template->new( + INCLUDE_PATH => $template_path, + FILTERS => { + dump_html => sub { + return unless (@_); + my $out; + my $i = 1; + foreach my $v (@_) { + $out .= qq{
} . + Data::HTMLDumper->Dump([ $v ],[ "v$i" ]) . + qq{
}; + $i++; + } + $out =~ s!/]*>!!gis if ($out); + return $out; + } + }, + EVAL_PERL => 1, + ); return $self; } -=head2 iconv_on_save +=head2 setup_site - my $out = $m->iconv_on_save( $content ); + $self->setup_site('site_name'); -Convert data saved to disk in Webpac encoding. +Change node URL and database name according to site name (if available) or fallback +to C from configuration. =cut -sub iconv_on_save { +sub setup_site { my $self = shift; - $self->{iconv_save} ||= new Text::Iconv( - $self->config->{webpac}->{out_encoding}, - $self->config->{webpac}->{webpac_encoding}, - ); + my $site = shift || $self->{defaultnode}; + if (! $site) { + $self->{log}->warn("not changing site from " . $self->{est_node}->{url}); + return; + } - $self->{iconv_save}->convert( @_ ); -} + $self->{log}->fatal("setup_site can't find site or defaultnode") unless ($site); + my $url = $self->{masterurl} . '/node/' . $site; + $self->{est_node}->set_url( $url ); + $self->{log}->debug("setup_site '$site' using $url"); +} =head2 search @@ -137,6 +173,7 @@ get_attr => [ '@uri' ], max => 42, template => 'result_template.tt', + depth => 1, ); All fields are standard C parametars except @@ -148,98 +185,336 @@ sub search { my $self = shift; + my $search_start_t = time(); + my $args = {@_}; my $log = $self->{log}; - $log->debug("args: " . Dumper( $args )); + $log->dumper($args, 'args'); my $query = $args->{phrase} || $log->warn("no query phrase") && return; - $log->debug("search model query: '$query'"); - if ($args->{add_attr}) { - $log->debug(" + add_attr: " . - join("','", @{ $args->{add_attr} }) - ); - } - my $template_filename = $args->{template} || $self->{template}; - $args->{max} ||= $self->{'hits_on_page'}; - if (! $args->{max}) { - $args->{max} = 10; - $log->warn("max not set when calling model. Using default of 10"); + $args->{hits_on_page} ||= $self->{'hits_for_pager'}; + if (! $args->{hits_on_page}) { + $args->{hits_on_page} = 100; + $log->warn("max not set when calling model. Using default of $args->{hits_on_page}"); } my $times; # store some times for benchmarking my $t = time(); - my @results = $self->{est}->search( %{ $args } ); + # transfer depth of search + if (! $args->{depth}) { + my $default = $self->{defaultdepth} || $log->logdie("can't find defaultdepth in estraier configuration"); + $args->{depth} = $default; + $log->warn("using default search depth $default"); + } + $args->{depth} ||= 0; - $times->{est} += time() - $t; + $log->debug("searching " . $self->{est_node}->{url} . " hits on page: $args->{hits_on_page} depth: $args->{depth} phrase: " . ($query || '[none]') ); + + # + # construct condition for Hyper Estraier + # + my $cond = Search::Estraier::Condition->new(); + if ( ref($args->{add_attr}) eq 'ARRAY' ) { + $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) ); + map { + $cond->add_attr( $_ ); + $log->debug(" + $_"); + } @{ $args->{add_attr} }; + }; + + $cond->set_phrase( $query ) if ($query); + $cond->set_options( $args->{options} ) if ($args->{options}); + $cond->set_order( $args->{order} ) if ($args->{order}); + + my $hits_on_page = $args->{hits_on_page} || 7; + my $page = $args->{page} || 1; + if ($page < 1) { + $log->warn("page number $page < 1"); + $page = 1; + } + + $cond->set_max( my $max = $page * $hits_on_page ); + $cond->set_skip( my $skip = ( $page - 1 ) * $hits_on_page ); + + $log->debug("search max: $max, skip: $skip"); + + my $result = $self->{est_node}->search($cond, $args->{depth}); + if (! $result) { + $self->{log}->fatal("search didn't return result"); + return; + } + my $hits = $result->doc_num; - my $hits = $#results + 1; + $times->{est} += time() - $t; - $log->debug( sprintf("search took %.2fs and returned $hits hits.", $times->{est}) ); + $log->debug( sprintf("search took %.6fs and returned $hits hits.", $times->{est}) ); - # just return results? - return @results unless ($args->{'template'}); + $self->{hints} = $result->{hints}; + #$log->dumper($self->{hints}, 'original hints' ); # - # construct HTML results + # fetch results # - my @html_results; - - for my $i ( 0 .. $#results ) { + my @results; - my $mfn = $1 if ( $results[$i]->{'@uri'} =~ m#/(\d+)$#); + - #$log->debug("load_ds( $mfn )"); + for my $i ( 0 .. ( $hits < $max ? ($hits-1) : ($max-1) ) ) { $t = time(); - my $ds = $self->{db}->load_ds( $mfn ) || $log->error("can't load_ds( $mfn )") && next; + #$log->debug("get_doc($i)"); + my $doc = $result->get_doc( $i ); + if (! $doc) { + $log->warn("can't find result $i"); + next; + } - $times->{db} += time() - $t; + my $hash; - #$log->debug( "ds = " . Dumper( \@html_results ) ); + foreach my $attr (@{ $args->{get_attr} }) { + my $val = $doc->attr( $attr ); + #$log->debug("attr $attr = ", $val || 'undef'); + $hash->{$attr} = $val if (defined($val)); + } - $t = time(); + $times->{hash} += time() - $t; - my $html = $self->{out}->apply( - template => $template_filename, - data => $ds, - ); + next unless ($hash); - $times->{out} += time() - $t; + if (! $args->{'template'}) { + push @results, $hash; + } else { + my ($database, $prefix, $id); - $t = time(); + if ( $hash->{'@uri'} =~ m!/([^/]+)/([^/]+)/(\d+)$!) { + ($database, $prefix,$id) = ($1,$2,$3); + } else { + $log->warn("can't decode database/prefix/id from " . $hash->{'@uri'}); + next; + } - $html = $self->{iconv}->convert( $html ) || $log->error("can't convert: $html"); + #$log->debug("load_ds( id => $id, prefix => '$prefix' )"); - $times->{iconv} += time() - $t; + $t = time(); - push @html_results, $html; + my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id ); + if (! $ds) { + $log->error("can't load_ds( ${database}/${prefix}/${id} )"); + next; + } - } + $times->{db} += time() - $t; + + $t = time(); + + my $html = $self->apply( + template => $template_filename, + data => $ds, + record_uri => "${database}/${prefix}/${id}", + config => $self->{databases}->{$database}, + ); + + $times->{apply} += time() - $t; + + $t = time(); + + $html = decode($self->{webpac_encoding}, $html); + + $times->{decode} += time() - $t; - #$log->debug( '@html_results = ' . Dumper( \@html_results ) ); + push @results, $html; + } + + } $log->debug( sprintf( - "time spent: db = %.2f, out = %.2f, iconv = %.2f", - $times->{db}, $times->{out}, $times->{iconv}, + "duration breakdown: estraier %.6fs, hash %.6fs, store %.6fs, apply %.6fs, decode %.06f, total: %.6fs", + $times->{est}, $times->{hash}, $times->{db}, $times->{apply}, $times->{decode}, time() - $search_start_t, ) ); - return \@html_results; + return \@results; +} + +=head2 hints + + my $hints = $m->hints; + +Return various useful hints about result + +=cut + +sub hints { + my $self = shift; + + unless ($self->{hints}) { + $self->{log}->fatal("no hints found!"); + return; + } + + my $hints; + + while (my ($key,$val) = each %{ $self->{hints} }) { + + #$self->{log}->debug("current hint $key = $val"); + + if ($key =~ m/^(?:HITS*|TIME|DOCNUM|WORDNUM)$/) { + $hints->{ lc($key) } = $val; + } elsif ($key =~ m/^HINT#/) { + my ($word,$count) = split(/\t/,$val,2); + $hints->{words}->{$word} = $count; + } elsif ($key =~ m/^LINK#/) { + my ($url,undef,undef,undef,undef,undef,$results) = split(/\t/,$val,7); + if ($url =~ m#/node/(.+)$#) { + $hints->{node}->{$1} = $results; + } else { + $self->{log}->debug("url $url doesn't have /node/ in it!"); + } + } else { + $self->{log}->debug("unknown hint $key = $val"); + } + + } + + $self->{log}->dumper($hints, 'model hints' ); + + return $hints; +} + + +=head2 record + + my $html = $m->record( + mfn => 42, + template => 'foo.tt', + ); + +This will load one record, convert it to html using C