--- Webpacus/lib/Webpacus/Model/WebPAC.pm 2005/11/22 12:57:20 94 +++ Webpacus/lib/Webpacus/Model/WebPAC.pm 2005/11/25 00:23:33 142 @@ -5,9 +5,12 @@ use lib '/data/webpac2/lib'; use base qw/ Catalyst::Model - WebPAC::Search::Estraier /; use Data::Dumper; +use WebPAC::DB; +use WebPAC::Output::TT; +use WebPAC::Search::Estraier 0.02; +use File::Slurp; =head1 NAME @@ -30,9 +33,18 @@ # configuration for hyper estraier full text search engine hyperestraier: - url: 'http://localhost:1978/node/webpac2' - user: 'admin' - passwd: 'admin' + url: 'http://localhost:1978/node/webpac2' + user: 'admin' + passwd: 'admin' + + webpac: + db_path: '/data/webpac2/db' + template_path: '/data/webpac2/conf/output/tt' + template: 'html_ffzg_results_short.tt' + # encoding comming from webpac + webpac_encoding: 'iso-8859-2' + # encoding expected by Catalyst + out_encoding: 'UTF-8' =cut @@ -48,30 +60,169 @@ my $est_cfg = $c->config->{hyperestraier}; $est_cfg->{'log'} = $log; + $est_cfg->{encoding} = $est_cfg->{catalyst_encoding}; + $log->debug("using config:" . Dumper($est_cfg) ); $self->{est} = new WebPAC::Search::Estraier( %{ $est_cfg } ); + my $db_path = $c->config->{webpac}->{db_path}; + my $template_path = $c->config->{webpac}->{template_path}; + $self->{template_path} = $template_path; + + $log->debug("using db path '$db_path', template path '$template_path'"); + + $self->{db} = new WebPAC::DB( + path => $db_path, + read_only => 1, + ); + + $self->{out} = new WebPAC::Output::TT( + include_path => $template_path, + filters => { foo => sub { shift } }, + ); + + # default template from config.yaml + $self->{template} ||= $c->config->{webpac}->{template}; + + $self->{iconv} = new Text::Iconv( + $c->config->{webpac}->{webpac_encoding}, + $c->config->{webpac}->{out_encoding} + ); + + $log->debug("converting encoding from webpac_encoding '" . + $c->config->{webpac}->{webpac_encoding} . + "' to '" . + $c->config->{webpac}->{out_encoding} . + "'" + ); + return $self; } +=head2 iconv_on_save + + my $out = $m->iconv_on_save( $content ); + +Convert data saved to disk in Webpac encoding. + +=cut + +sub iconv_on_save { + my $self = shift; + + $self->{iconv_save} ||= new Text::Iconv( + $self->config->{webpac}->{out_encoding}, + $self->config->{webpac}->{webpac_encoding}, + ); + + $self->{iconv_save}->convert( @_ ); +} + + +=head2 search + + my $m->search( 'query phrase', 'result_template.tt', \@add_attr ); + +=cut + sub search { - my ( $self, $query ) = @_; + my ( $self, $query, $template, $add_attr ) = @_; + + my $log = $self->{log}; - $self->{log}->debug("search got query: $query<--"); + $log->debug("search model query: '$query', add_attr: '" . join("','", @{$add_attr}) . "'"); + + my $template_filename = $template || $self->{template}; my @results = $self->{est}->search( - query => $query, - attr => [ '@uri' ], + phrase => $query, + get_attr => [ '@uri' ], max => 100, + add_attr => $add_attr, ); - return @results; -} + $log->debug("loading " . ($#results + 1) . " results"); + + my @html_results; + for my $i ( 0 .. $#results ) { + my $mfn = $1 if ( $results[$i]->{'@uri'} =~ m#/(\d+)$#); + + #$log->debug("load_ds( $mfn )"); + + my $ds = $self->{db}->load_ds( $mfn ) || $log->error("can't load_ds( $mfn )") && next; + #$log->debug( "ds = " . Dumper( \@html_results ) ); + + my $html = $self->{out}->apply( + template => $template_filename, + data => $ds, + ); + + $html = $self->{iconv}->convert( $html ) || $log->error("can't convert: $html"); + + push @html_results, $html; + + } + + #$log->debug( '@html_results = ' . Dumper( \@html_results ) ); + + return \@html_results; +} + +=head2 save_html + + $m->save_html( '/full/path/to/file', $content ); + +It will use C to convert content encoding back to +Webpac codepage, recode JavaScript Unicode entities (%u1234), +strip extra newlines at beginning and end, and save to +C and if that succeeds, just rename +it over original file which should be atomic on filesystem level. + +=cut + +sub save_html { + my ($self, $path, $content) = @_; + + $content = $self->iconv_on_save( $content ) || die "no content?"; + + sub _conv_js { + my $t = shift || return; + return $self->{iconv}->convert(chr(hex($t))); + } + $content =~ s/%u([a-fA-F0-9]{4})/_conv_js($1)/gex; + $content =~ s/^[\n\r]+//s; + $content =~ s/[\n\r]+$/\n/s; + + write_file($path . '.new', $content) || die "can't save ${path}.new $!"; + rename $path . '.new', $path || die "can't rename to $path: $!"; +} + +=head2 load_html + + my $html = $m->load_html('/full/path/to/file'); + +This will convert file from Webpac encoding to Catalyst and +convert that data to escaped HTML (for sending into +C<<