--- Webpacus/lib/Webpacus/Model/WebPAC.pm 2005/12/17 03:19:58 271 +++ Webpacus/lib/Webpacus/Model/WebPAC.pm 2006/01/22 02:52:24 382 @@ -6,12 +6,13 @@ use base qw/ Catalyst::Model /; -use Data::Dumper; use WebPAC::Store 0.08; -use WebPAC::Output::TT 0.04; -use WebPAC::Search::Estraier 0.05; +use Search::Estraier 0.04; use File::Slurp; -use Time::HiRes; +use Time::HiRes qw/time/; +use Encode qw/encode decode from_to/; +use Template; +use Data::Dumper; =head1 NAME @@ -40,6 +41,7 @@ user: 'admin' passwd: 'admin' hits_on_page: 100 + hits_for_pager: 1000 webpac: db_path: '/data/webpac2/db' @@ -47,8 +49,6 @@ template: 'html_ffzg_results_short.tt' # encoding comming from webpac webpac_encoding: 'iso-8859-2' - # encoding expected by Catalyst - out_encoding: 'UTF-8' =cut @@ -74,10 +74,20 @@ $est_cfg->{database} = $defaultnode; } - $self->{est} = new WebPAC::Search::Estraier( %{ $est_cfg } ); + my $url = $est_cfg->{masterurl} . '/node/' . $est_cfg->{database}; + + $log->info("opening Hyper Estraier index $url as $est_cfg->{'user'}"); + + $self->{est_node} = Search::Estraier::Node->new( + url => $url, + user => $est_cfg->{user}, + passwd => $est_cfg->{passwd}, + ); + + $log->fatal("can't create Search::Estraier::Node $url") unless ($self->{est_node}); # save config parametars in object - foreach my $f (qw/db_path template_path hits_on_page webpac_encoding out_encoding defaultdepth/) { + foreach my $f (qw/db_path template_path hits_on_page webpac_encoding defaultdepth/) { $self->{$f} = $c->config->{hyperestraier}->{$f} || $c->config->{webpac}->{$f}; $log->debug("self->{$f} = " . $self->{$f}); @@ -93,26 +103,36 @@ database => $est_cfg->{database}, ); - $self->{out} = new WebPAC::Output::TT( - include_path => $template_path, - filters => { foo => sub { shift } }, - ); - # default template from config.yaml $self->{template} ||= $c->config->{webpac}->{template}; - $self->{iconv} = new Text::Iconv( - $c->config->{webpac}->{webpac_encoding}, - $c->config->{webpac}->{out_encoding} - ); - $log->debug("converting encoding from webpac_encoding '" . $c->config->{webpac}->{webpac_encoding} . - "' to '" . - $c->config->{webpac}->{out_encoding} . "'" ); + $self->{databases} = $c->config->{databases} || $log->fatal("can't find databases in config"); + + # create Template toolkit instance + $self->{'tt'} = Template->new( + INCLUDE_PATH => $template_path, + FILTERS => { + dump_html => sub { + return unless (@_); + my $out; + my $i = 1; + foreach my $v (@_) { + $out .= qq{
} . + Data::HTMLDumper->Dump([ $v ],[ "v$i" ]) . + qq{
}; + $i++; + } + $out =~ s!/]*>!!gis if ($out); + return $out; + } + }, + EVAL_PERL => 1, + ); return $self; @@ -139,11 +159,13 @@ sub search { my $self = shift; + my $search_start_t = time(); + my $args = {@_}; my $log = $self->{log}; - $log->debug("args: " . Dumper( $args )); + $log->debug("search args: " . Dumper( $args )); my $query = $args->{phrase} || $log->warn("no query phrase") && return; @@ -156,10 +178,10 @@ my $template_filename = $args->{template} || $self->{template}; - $args->{max} ||= $self->{'hits_on_page'}; + $args->{max} ||= $self->{'hits_for_pager'}; if (! $args->{max}) { - $args->{max} = 10; - $log->warn("max not set when calling model. Using default of 10"); + $args->{max} = 100; + $log->warn("max not set when calling model. Using default of $args->{max}"); } my $times; # store some times for benchmarking @@ -173,75 +195,125 @@ $log->warn("using default search depth $default"); } - my @results = $self->{est}->search( %{ $args } ); + $log->debug("searching for maximum $args->{max} results using depth $args->{depth}"); + + # + # construct condition for Hyper Estraier + # + my $cond = Search::Estraier::Condition->new(); + if ( ref($args->{add_attr}) eq 'ARRAY' ) { + $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) ); + map { + $cond->add_attr( _convert( $_ ) ); + $log->debug(" + $_"); + } @{ $args->{add_attr} }; + }; + + $cond->set_phrase( $query ) if ($query); + $cond->set_options( $args->{options} ) if ($args->{options}); + $cond->set_order( $args->{order} ) if ($args->{order}); + + my $max = $args->{max} || 7; + my $page = $args->{page} || 1; + if ($page < 1) { + $log->warn("page number $page < 1"); + $page = 1; + } $times->{est} += time() - $t; - my $hits = $#results + 1; + $cond->set_max( $page * $max ); - $log->debug( sprintf("search took %.2fs and returned $hits hits.", $times->{est}) ); + my $result = $self->{est_node}->search($cond, ( $args->{depth} || 0 )); + my $hits = $result->doc_num; - # just return results? - return @results unless ($args->{'template'}); + $log->debug( sprintf("search took %.6fs and returned $hits hits.", $times->{est}) ); # - # construct HTML results + # fetch results # - my @html_results; + my @results; - for my $i ( 0 .. $#results ) { + for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) { - my ($database, $prefix, $id); - if ( $results[$i]->{'@uri'} =~ m!/([^/]+)/([^/]+)#(\d+)$!) { - ($database, $prefix,$id) = ($1,$2,$3); - } else { - $log->warn("can't decode prefix#id from " . $results[$i]->{'@uri'}); + $t = time(); + + #$log->debug("get_doc($i)"); + my $doc = $result->get_doc( $i ); + if (! $doc) { + $log->warn("can't find result $i"); next; } - #$log->debug("load_ds( id => $id, prefix => '$prefix' )"); - - $t = time(); + my $hash; - my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id ); - if (! $ds) { - $log->error("can't load_ds( ${database}/${prefix}#${id} )"); - next; + foreach my $attr (@{ $args->{get_attr} }) { + my $val = $doc->attr( $attr ); + #$log->debug("attr $attr = ", $val || 'undef'); + $hash->{$attr} = $val if (defined($val)); } - $times->{db} += time() - $t; + $times->{hash} += time() - $t; - #$log->debug( "ds = " . Dumper( \@html_results ) ); + next unless ($hash); - $t = time(); + if (! $args->{'template'}) { + push @results, $hash; + } else { + my ($database, $prefix, $id); - my $html = $self->{out}->apply( - template => $template_filename, - data => $ds, - record_uri => "${database}/${prefix}/${id}", - ); + if ( $hash->{'@uri'} =~ m!/([^/]+)/([^/]+)/(\d+)$!) { + ($database, $prefix,$id) = ($1,$2,$3); + } else { + $log->warn("can't decode database/prefix/id from " . $hash->{'@uri'}); + next; + } - $times->{out} += time() - $t; + #$log->debug("load_ds( id => $id, prefix => '$prefix' )"); - $t = time(); + $t = time(); + + my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id ); + if (! $ds) { + $log->error("can't load_ds( ${database}/${prefix}/${id} )"); + next; + } - $html = $self->{iconv}->convert( $html ) || $log->error("can't convert: $html"); + $times->{db} += time() - $t; - $times->{iconv} += time() - $t; + #$log->debug( "ds = " . Dumper( \@html_results ) ); - push @html_results, $html; + $t = time(); + + my $html = $self->apply( + template => $template_filename, + data => $ds, + record_uri => "${database}/${prefix}/${id}", + config => $self->{databases}->{$database}, + ); + + $times->{apply} += time() - $t; + + $t = time(); + + $html = decode($self->{webpac_encoding}, $html); + + $times->{decode} += time() - $t; + + push @results, $html; + } } - #$log->debug( '@html_results = ' . Dumper( \@html_results ) ); + $log->debug( '@results = ' . Dumper( \@results ) ); $log->debug( sprintf( - "time spent: db = %.2f, out = %.2f, iconv = %.2f", - $times->{db}, $times->{out}, $times->{iconv}, + "duration breakdown: estraier %.6fs, hash %.6fs, store %.6fs, apply %.6fs, decode %.06f, total: %.6fs", + $times->{est}, $times->{hash}, $times->{db}, $times->{apply}, $times->{decode}, time() - $search_start_t, ) ); - return \@html_results; + return \@results; } =head2 record @@ -261,7 +333,7 @@ my $args = {@_}; my $log = $self->{log}; - $log->debug("args: " . Dumper( $args )); + $log->debug("record args: " . Dumper( $args )); foreach my $f (qw/record_uri template/) { $log->fatal("need $f") unless ($args->{$f}); @@ -282,22 +354,24 @@ return; } - my $html = $self->{out}->apply( + my $html = $self->apply( template => $args->{template}, data => $ds, record_uri => $args->{record_uri}, + config => $self->{databases}->{$database}, ); - $html = $self->{iconv}->convert( $html ) || $log->error("can't convert: $html"); + $html = decode($self->{webpac_encoding}, $html); return $html; } + =head2 save_html $m->save_html( '/full/path/to/file', $content ); -It will use C to convert content encoding back to +It will use C to convert content encoding back to Webpac codepage, recode JavaScript Unicode entities (%u1234), strip extra newlines at beginning and end, and save to C and if that succeeds, just rename @@ -308,25 +382,22 @@ sub save_html { my ($self, $path, $content) = @_; + # FIXME Should this be UTF-8 or someting? + my $js_encoding = $self->{webpac_encoding}; + $js_encoding = 'UTF-16'; + sub _conv_js { - my $t = shift || return; - return $self->{iconv}->convert(chr(hex($t))); + return '0x' . $_[1]; + return encode($_[0], chr(hex($_[1]))); } - $content =~ s/%u([a-fA-F0-9]{4})/_conv_js($1)/gex; + #$content =~ s/%u([a-fA-F0-9]{4})/_conv_js($js_encoding,$1)/gex; $content =~ s/^[\n\r]+//s; $content =~ s/[\n\r]+$/\n/s; + $content =~ s/\n\r/\n/gs; - my ($from, $to) = ( - $self->{out_encoding}, - $self->{webpac_encoding}, - ); - - $self->{log}->debug("using iconv to convert from $from to $to encoding"); - - my $iconv_on_save = new Text::Iconv($from, $to) - || $self->{log}->fatal("can't create iconv for saving"); - - $content = $iconv_on_save->convert( $content ) || die "no content?"; + my $disk_encoding = $self->{webpac_encoding} || 'utf-8'; + $self->{log}->debug("convert encoding to $disk_encoding"); + from_to($content, 'utf-8', $disk_encoding) || $self->{log}->warn("encoding from utf-8 to $disk_encoding failed for: $content"); write_file($path . '.new', {binmode => ':raw' }, $content) || die "can't save ${path}.new $!"; rename $path . '.new', $path || die "can't rename to $path: $!"; @@ -348,15 +419,256 @@ die "no path?" unless ($path); my $content = read_file($path, {binmode => ':raw' }) || die "can't read $path: $!"; - #$content = $q->escapeHTML($iconv_utf8->convert($content)); - $content = $self->{iconv}->convert($content); - return $content; + return decode($self->{webpac_encoding}, $content); } + +=head2 apply + +Create output from in-memory data structure using Template Toolkit template. + + my $text = $tt->apply( + template => 'text.tt', + data => $ds, + record_uri => 'database/prefix/mfn', + ); + +It also has follwing template toolikit filter routies defined: + +=cut + +sub apply { + my $self = shift; + + my $args = {@_}; + + my $log = $self->{log} || die "no log?"; + + foreach my $a (qw/template data/) { + $log->fatal("need $a") unless ($args->{$a}); + } + +=head3 tt_filter_type + +filter to return values of specified from $ds, usage from TT template is in form +C, where C is optional, like this: + + [% d('Title') %] + [% d('Author',', ' %] + +=cut + + sub tt_filter_type { + my ($data,$type) = @_; + + die "no data?" unless ($data); + $type ||= 'display'; + + my $default_delimiter = { + 'display' => '¶
', + 'index' => '\n', + }; + + return sub { + + my ($name,$join) = @_; + + die "no data hash" unless ($data->{'data'} && ref($data->{'data'}) eq 'HASH'); + # Hm? Should we die here? + return unless ($name); + + my $item = $data->{'data'}->{$name} || return; + + my $v = $item->{$type} || return; + + if (ref($v) eq 'ARRAY') { + if ($#{$v} == 0) { + $v = $v->[0]; + } else { + $join = $default_delimiter->{$type} unless defined($join); + $v = join($join, @{$v}); + } + } else { + warn("TT filter $type(): field $name values aren't ARRAY, ignoring"); + } + + return $v; + } + } + + $args->{'d'} = tt_filter_type($args, 'display'); + $args->{'display'} = tt_filter_type($args, 'display'); + +=head3 tt_filter_search + +filter to return links to search, usage in TT: + + [% search('FieldToDisplay','FieldToSearch','optional delimiter', 'optional_template.tt') %] + +=cut + + sub tt_filter_search { + + my ($data) = @_; + + die "no data?" unless ($data); + + return sub { + + my ($display,$search,$delimiter,$template) = @_; + + # default delimiter + $delimiter ||= '¶
', + + die "no data hash" unless ($data->{'data'} && ref($data->{'data'}) eq 'HASH'); + # Hm? Should we die here? + return unless ($display); + + my $item = $data->{'data'}->{$display} || return; + + return unless($item->{'display'}); + if (! $item->{'search'}) { + warn "error in TT template: field $display didn't insert anything into search, use d('$display') and not search('$display'...)"; + return; + } + + my @warn; + foreach my $type (qw/display search/) { + push @warn, "field $display type $type values aren't ARRAY" unless (ref($item->{$type}) eq 'ARRAY'); + } + + if (@warn) { + warn("TT filter search(): " . join(",", @warn) . ", skipping"); + return; + } + my @html; + + my $d_el = $#{ $item->{'display'} }; + my $s_el = $#{ $item->{'search'} }; + + # easy, both fields have same number of elements or there is just + # one search and multiple display + if ( $d_el == $s_el || $s_el == 0 ) { + + foreach my $i ( 0 .. $d_el ) { + + my $s; + if ($s_el > 0) { + $s = $item->{'search'}->[$i] || die "can't find value $i for type search in field $search"; + } else { + $s = $item->{'search'}->[0]; + } + #$s =~ s/([^\w.-])/sprintf("%%%02X",ord($1))/eg; + $s = __quotemeta( $s ); + + my $d = $item->{'display'}->[$i] || die "can't find value $i for type display in field $display"; + + my $template_arg = ''; + $template_arg = qq{,'$template'} if ($template); + + push @html, qq{$d}; + } + + return join($delimiter, @html); + } else { + my $html = qq{
WARNING: we should really support if there is $d_el display elements and $s_el search elements, but currently there is no nice way to do so, so we will just display values
}; + my $v = $item->{'display'}; + + if ($#{$v} == 0) { + $html .= $v->[0]; + } else { + $html .= join($delimiter, @{$v}); + } + return $html; + } + } + } + + $args->{'search'} = tt_filter_search($args); + +=head3 load_rec + +Used mostly for onClick events like this: + + bar + +=cut + + $args->{'load_template'} = sub { + my $template = shift or return "Logger.error('load_template missing template name!'); return false;"; + return "load_template($template); return false;"; + }; + + my $out; + + $self->{'tt'}->process( + $args->{'template'}, + $args, + \$out + ) || $log->error( "apply can't process template: ", $self->{'tt'}->error() ); + + return $out; +} + + +=head2 __quotemeta + +Helper to quote JavaScript-friendly characters + +=cut + +sub __quotemeta { + local $_ = shift; + $_ = decode('iso-8859-2', $_); + + s<([\x{0080}-\x{fffd}]+)>{sprintf '\u%0*v4X', '\u', $1}ge if ( Encode::is_utf8($_) ); + { + use bytes; + s<((?:[^ \x21-\x7E]|(?:\\(?!u)))+)>{sprintf '\x%0*v2X', '\x', $1}ge; + } + + s/\\x09/\\t/g; + s/\\x0A/\\n/g; + s/\\x0D/\\r/g; + s/"/\\"/g; + s/\\x5C/\\\\/g; + + return $_; +} + + + =head1 AUTHOR -Dobrica Pavlinusic +Dobrica Pavlinusic C<< >> =head1 LICENSE