--- Webpacus/lib/Webpacus/Model/WebPAC.pm 2005/11/22 12:57:09 92 +++ Webpacus/lib/Webpacus/Model/WebPAC.pm 2005/11/25 00:23:33 142 @@ -3,9 +3,14 @@ use strict; use warnings; use lib '/data/webpac2/lib'; -use base qw/Catalyst::Model/; -use WebPAC::Search::Estraier; +use base qw/ + Catalyst::Model +/; use Data::Dumper; +use WebPAC::DB; +use WebPAC::Output::TT; +use WebPAC::Search::Estraier 0.02; +use File::Slurp; =head1 NAME @@ -28,9 +33,18 @@ # configuration for hyper estraier full text search engine hyperestraier: - url: 'http://localhost:1978/node/webpac2' - user: 'admin' - passwd: 'admin' + url: 'http://localhost:1978/node/webpac2' + user: 'admin' + passwd: 'admin' + + webpac: + db_path: '/data/webpac2/db' + template_path: '/data/webpac2/conf/output/tt' + template: 'html_ffzg_results_short.tt' + # encoding comming from webpac + webpac_encoding: 'iso-8859-2' + # encoding expected by Catalyst + out_encoding: 'UTF-8' =cut @@ -41,28 +55,175 @@ $self->config($config); my $log = $c->log; + $self->{log} = $log; -# if (! $c->stash->{est}) { + my $est_cfg = $c->config->{hyperestraier}; + $est_cfg->{'log'} = $log; - my $est_cfg = $c->config->{hyperestraier}; - $est_cfg->{'log'} = $log; + $est_cfg->{encoding} = $est_cfg->{catalyst_encoding}; - $log->debug("using config:" . Dumper($est_cfg) ); + $log->debug("using config:" . Dumper($est_cfg) ); -# $c->stash->{est} = new WebPAC::Search::Estraier( %{ $est_cfg } ); -# } + $self->{est} = new WebPAC::Search::Estraier( %{ $est_cfg } ); -# $log->debug("param: " . Dumper($c->req->params)); + my $db_path = $c->config->{webpac}->{db_path}; + my $template_path = $c->config->{webpac}->{template_path}; + $self->{template_path} = $template_path; -# $c->stash->{est}->search( -# query => $c->req->params->{Title}, -# max => 100, -# ); + $log->debug("using db path '$db_path', template path '$template_path'"); + + $self->{db} = new WebPAC::DB( + path => $db_path, + read_only => 1, + ); + + $self->{out} = new WebPAC::Output::TT( + include_path => $template_path, + filters => { foo => sub { shift } }, + ); + + # default template from config.yaml + $self->{template} ||= $c->config->{webpac}->{template}; + + $self->{iconv} = new Text::Iconv( + $c->config->{webpac}->{webpac_encoding}, + $c->config->{webpac}->{out_encoding} + ); + + $log->debug("converting encoding from webpac_encoding '" . + $c->config->{webpac}->{webpac_encoding} . + "' to '" . + $c->config->{webpac}->{out_encoding} . + "'" + ); return $self; } +=head2 iconv_on_save + + my $out = $m->iconv_on_save( $content ); + +Convert data saved to disk in Webpac encoding. + +=cut + +sub iconv_on_save { + my $self = shift; + + $self->{iconv_save} ||= new Text::Iconv( + $self->config->{webpac}->{out_encoding}, + $self->config->{webpac}->{webpac_encoding}, + ); + + $self->{iconv_save}->convert( @_ ); +} + + +=head2 search + + my $m->search( 'query phrase', 'result_template.tt', \@add_attr ); + +=cut + +sub search { + my ( $self, $query, $template, $add_attr ) = @_; + + my $log = $self->{log}; + + $log->debug("search model query: '$query', add_attr: '" . join("','", @{$add_attr}) . "'"); + + my $template_filename = $template || $self->{template}; + + my @results = $self->{est}->search( + phrase => $query, + get_attr => [ '@uri' ], + max => 100, + add_attr => $add_attr, + ); + + $log->debug("loading " . ($#results + 1) . " results"); + + my @html_results; + + for my $i ( 0 .. $#results ) { + + my $mfn = $1 if ( $results[$i]->{'@uri'} =~ m#/(\d+)$#); + + #$log->debug("load_ds( $mfn )"); + + my $ds = $self->{db}->load_ds( $mfn ) || $log->error("can't load_ds( $mfn )") && next; + + #$log->debug( "ds = " . Dumper( \@html_results ) ); + + my $html = $self->{out}->apply( + template => $template_filename, + data => $ds, + ); + + $html = $self->{iconv}->convert( $html ) || $log->error("can't convert: $html"); + + push @html_results, $html; + + } + + #$log->debug( '@html_results = ' . Dumper( \@html_results ) ); + + return \@html_results; +} + +=head2 save_html + + $m->save_html( '/full/path/to/file', $content ); + +It will use C to convert content encoding back to +Webpac codepage, recode JavaScript Unicode entities (%u1234), +strip extra newlines at beginning and end, and save to +C and if that succeeds, just rename +it over original file which should be atomic on filesystem level. + +=cut + +sub save_html { + my ($self, $path, $content) = @_; + + $content = $self->iconv_on_save( $content ) || die "no content?"; + + sub _conv_js { + my $t = shift || return; + return $self->{iconv}->convert(chr(hex($t))); + } + $content =~ s/%u([a-fA-F0-9]{4})/_conv_js($1)/gex; + $content =~ s/^[\n\r]+//s; + $content =~ s/[\n\r]+$/\n/s; + + write_file($path . '.new', $content) || die "can't save ${path}.new $!"; + rename $path . '.new', $path || die "can't rename to $path: $!"; +} + +=head2 load_html + + my $html = $m->load_html('/full/path/to/file'); + +This will convert file from Webpac encoding to Catalyst and +convert that data to escaped HTML (for sending into +C<<