/[webpac2]/Webpacus/lib/Webpacus/Model/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /Webpacus/lib/Webpacus/Model/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 351 - (show annotations)
Sat Jan 7 18:18:07 2006 UTC (18 years, 4 months ago) by dpavlin
File size: 8503 byte(s)
 r369@llin:  dpavlin | 2006-01-07 19:18:09 +0100
 added f.dump_html(something) filter using Data::HTMLDumper

1 package Webpacus::Model::WebPAC;
2
3 use strict;
4 use warnings;
5 use lib '/data/webpac2/lib';
6 use base qw/
7 Catalyst::Model
8 /;
9 use Data::Dumper;
10 use WebPAC::Store 0.08;
11 use WebPAC::Output::TT 0.07;
12 use WebPAC::Search::Estraier 0.05;
13 use File::Slurp;
14 use Time::HiRes;
15 use Encode qw/encode decode from_to/;
16 use Data::HTMLDumper;
17
18 =head1 NAME
19
20 Webpacus::Model::WebPAC - Catalyst Model
21
22 =head1 SYNOPSIS
23
24 See L<Webpacus> and L<WebPAC>.
25
26 =head1 DESCRIPTION
27
28 Catalyst Model for access to WebPAC data.
29
30 =head2 new
31
32 Configuration for hyperestraier in C<config.yaml> like this:
33
34 --- #YAML:1.0
35 # DO NOT USE TABS FOR INDENTATION OR label/value SEPARATION!!!
36
37 # configuration for hyper estraier full text search engine
38 hyperestraier:
39 masterurl: 'http://localhost:1978/node/webpac2'
40 defaultnode: 'webpac2'
41 defaultdepth: 1
42 user: 'admin'
43 passwd: 'admin'
44 hits_on_page: 100
45 hits_for_pager: 1000
46
47 webpac:
48 db_path: '/data/webpac2/db'
49 template_path: '/data/webpac2/conf/output/tt'
50 template: 'html_ffzg_results_short.tt'
51 # encoding comming from webpac
52 webpac_encoding: 'iso-8859-2'
53
54 =cut
55
56 sub new {
57 my ( $self, $c, $config ) = @_;
58
59 $self = $self->NEXT::new($c, $config);
60 $self->config($config);
61
62 my $log = $c->log;
63 $self->{log} = $log;
64
65 my $est_cfg = $c->config->{hyperestraier};
66 $est_cfg->{'log'} = $log;
67
68 $est_cfg->{encoding} = $est_cfg->{catalyst_encoding} || $c->config->{catalyst_encoding} or $c->log->fatal("can't find catalyst_encoding");
69
70 $log->debug("using config:" . Dumper($est_cfg) );
71
72 if (! $est_cfg->{database}) {
73 my $defaultnode = $est_cfg->{defaultnode} || $log->logdie("can't find defaultnode in estraier configuration");
74 $log->info("using default node $defaultnode");
75 $est_cfg->{database} = $defaultnode;
76 }
77
78 $self->{est} = new WebPAC::Search::Estraier( %{ $est_cfg } );
79
80 # save config parametars in object
81 foreach my $f (qw/db_path template_path hits_on_page webpac_encoding defaultdepth/) {
82 $self->{$f} = $c->config->{hyperestraier}->{$f} ||
83 $c->config->{webpac}->{$f};
84 $log->debug("self->{$f} = " . $self->{$f});
85 }
86 my $db_path = $self->{db_path};
87 my $template_path = $self->{template_path};
88
89 $log->debug("using db path '$db_path', template path '$template_path'");
90
91 $self->{db} = new WebPAC::Store(
92 path => $db_path,
93 read_only => 1,
94 database => $est_cfg->{database},
95 );
96
97 $self->{out} = new WebPAC::Output::TT(
98 include_path => $template_path,
99 filters => {
100 dump_html => sub {
101 my $t = shift || return;
102 #return Data::HTMLDumper->Dumper( $t );
103 return Data::HTMLDumper->Dump([$t],[qw/dump/]);
104 }
105 },
106 );
107
108 # default template from config.yaml
109 $self->{template} ||= $c->config->{webpac}->{template};
110
111 $log->debug("converting encoding from webpac_encoding '" .
112 $c->config->{webpac}->{webpac_encoding} .
113 "'"
114 );
115
116 $self->{databases} = $c->config->{databases} || $log->error("can't find databases in config");
117
118 return $self;
119
120 }
121
122
123 =head2 search
124
125 my $m->search(
126 phrase => 'query phrase',
127 add_attr => \@add_attr
128 get_attr => [ '@uri' ],
129 max => 42,
130 template => 'result_template.tt',
131 depth => 1,
132 );
133
134 All fields are standard C<WebPAC::Search::Estraier> parametars except
135 C<template> which will (if specified) return results in HTML using
136 selected template.
137
138 =cut
139
140 sub search {
141 my $self = shift;
142
143 my $args = {@_};
144
145 my $log = $self->{log};
146
147 $log->debug("search args: " . Dumper( $args ));
148
149 my $query = $args->{phrase} || $log->warn("no query phrase") && return;
150
151 $log->debug("search model query: '$query'");
152 if ($args->{add_attr}) {
153 $log->debug(" + add_attr: " .
154 join("','", @{ $args->{add_attr} })
155 );
156 }
157
158 my $template_filename = $args->{template} || $self->{template};
159
160 $args->{max} ||= $self->{'hits_for_pager'};
161 if (! $args->{max}) {
162 $args->{max} = 100;
163 $log->warn("max not set when calling model. Using default of $args->{max}");
164 }
165
166 my $times; # store some times for benchmarking
167
168 my $t = time();
169
170 # transfer depth of search
171 if (! $args->{depth}) {
172 my $default = $self->{defaultdepth} || $log->logdie("can't find defaultdepth in estraier configuration");
173 $args->{depth} = $default;
174 $log->warn("using default search depth $default");
175 }
176
177 my @results = $self->{est}->search( %{ $args } );
178
179 $times->{est} += time() - $t;
180
181 my $hits = $#results + 1;
182
183 $log->debug( sprintf("search took %.2fs and returned $hits hits.", $times->{est}) );
184
185 # just return results?
186 return @results unless ($args->{'template'});
187
188 #
189 # construct HTML results
190 #
191
192 my @html_results;
193
194 for my $i ( 0 .. $#results ) {
195
196 my ($database, $prefix, $id);
197 if ( $results[$i]->{'@uri'} =~ m!/([^/]+)/([^/]+)/(\d+)$!) {
198 ($database, $prefix,$id) = ($1,$2,$3);
199 } else {
200 $log->warn("can't decode database/prefix/id from " . $results[$i]->{'@uri'});
201 next;
202 }
203
204 #$log->debug("load_ds( id => $id, prefix => '$prefix' )");
205
206 $t = time();
207
208 my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id );
209 if (! $ds) {
210 $log->error("can't load_ds( ${database}/${prefix}/${id} )");
211 next;
212 }
213
214 $times->{db} += time() - $t;
215
216 #$log->debug( "ds = " . Dumper( \@html_results ) );
217
218 $t = time();
219
220 my $html = $self->{out}->apply(
221 template => $template_filename,
222 data => $ds,
223 record_uri => "${database}/${prefix}/${id}",
224 config => $self->{databases}->{$database},
225 );
226
227 $times->{out} += time() - $t;
228
229 $t = time();
230
231 $html = decode($self->{webpac_encoding}, $html);
232
233 push @html_results, $html;
234
235 }
236
237 #$log->debug( '@html_results = ' . Dumper( \@html_results ) );
238
239 $log->debug( sprintf(
240 "time spent: db = %.2f, out = %.2f",
241 $times->{db}, $times->{out},
242 ) );
243
244 return \@html_results;
245 }
246
247 =head2 record
248
249 my $html = $m->record(
250 mfn => 42,
251 template => 'foo.tt',
252 );
253
254 This will load one record, convert it to html using C<template> and return
255 it.
256
257 =cut
258
259 sub record {
260 my $self = shift;
261
262 my $args = {@_};
263 my $log = $self->{log};
264 $log->debug("record args: " . Dumper( $args ));
265
266 foreach my $f (qw/record_uri template/) {
267 $log->fatal("need $f") unless ($args->{$f});
268 }
269
270 my ($database, $prefix, $id);
271
272 if ($args->{record_uri} =~ m#^([^/]+)/([^/]+)/([^/]+)$#) {
273 ($database, $prefix, $id) = ($1,$2,$3);
274 } else {
275 $log->error("can't parse $args->{record_uri} into prefix, database and uri");
276 return;
277 }
278
279 my $ds = $self->{db}->load_ds( id => $id, prefix => $prefix, database => $database );
280 if (! $ds) {
281 $log->error("can't load_ds( $database/$prefix/$id )");
282 return;
283 }
284
285 my $html = $self->{out}->apply(
286 template => $args->{template},
287 data => $ds,
288 record_uri => $args->{record_uri},
289 config => $self->{databases}->{$database},
290 );
291
292 $html = decode($self->{webpac_encoding}, $html);
293
294 return $html;
295 }
296
297
298 =head2 save_html
299
300 $m->save_html( '/full/path/to/file', $content );
301
302 It will use C<Encode> to convert content encoding back to
303 Webpac codepage, recode JavaScript Unicode entities (%u1234),
304 strip extra newlines at beginning and end, and save to
305 C</full/path/to/file.new> and if that succeeds, just rename
306 it over original file which should be atomic on filesystem level.
307
308 =cut
309
310 sub save_html {
311 my ($self, $path, $content) = @_;
312
313 # FIXME Should this be UTF-8 or someting?
314 my $js_encoding = $self->{webpac_encoding};
315 $js_encoding = 'UTF-16';
316
317 sub _conv_js {
318 return '0x' . $_[1];
319 return encode($_[0], chr(hex($_[1])));
320 }
321 #$content =~ s/%u([a-fA-F0-9]{4})/_conv_js($js_encoding,$1)/gex;
322 $content =~ s/^[\n\r]+//s;
323 $content =~ s/[\n\r]+$/\n/s;
324 $content =~ s/\n\r/\n/gs;
325
326 my $disk_encoding = $self->{webpac_encoding} || 'utf-8';
327 $self->{log}->debug("convert encoding to $disk_encoding");
328 from_to($content, 'utf-8', $disk_encoding) || $self->{log}->warn("encoding from utf-8 to $disk_encoding failed for: $content");
329
330 write_file($path . '.new', {binmode => ':raw' }, $content) || die "can't save ${path}.new $!";
331 rename $path . '.new', $path || die "can't rename to $path: $!";
332 }
333
334 =head2 load_html
335
336 my $html = $m->load_html('/full/path/to/file');
337
338 This will convert file from Webpac encoding to Catalyst and
339 convert that data to escaped HTML (for sending into
340 C<< <textarea/> >> tags in html.
341
342 =cut
343
344 sub load_html {
345 my ($self, $path) = @_;
346
347 die "no path?" unless ($path);
348
349 my $content = read_file($path, {binmode => ':raw' }) || die "can't read $path: $!";
350
351 return decode($self->{webpac_encoding}, $content);
352 }
353
354 =head1 AUTHOR
355
356 Dobrica Pavlinusic C<< <dpavlin@rot13.org> >>
357
358 =head1 LICENSE
359
360 This library is free software, you can redistribute it and/or modify
361 it under the same terms as Perl itself.
362
363 =cut
364
365 1;

  ViewVC Help
Powered by ViewVC 1.1.26