/[webpac2]/Webpacus/lib/Webpacus/Model/WebPAC.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /Webpacus/lib/Webpacus/Model/WebPAC.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 355 - (show annotations)
Sat Jan 7 22:45:16 2006 UTC (18 years, 4 months ago) by dpavlin
File size: 8658 byte(s)
 r377@llin:  dpavlin | 2006-01-07 23:45:14 +0100
 support for multiple arguments in dump_html like f.dump_html( var1, var2, ... ),
 a little regexp to mark dump class "dump" and a sample css to make it look o.k.

1 package Webpacus::Model::WebPAC;
2
3 use strict;
4 use warnings;
5 use lib '/data/webpac2/lib';
6 use base qw/
7 Catalyst::Model
8 /;
9 use Data::Dumper;
10 use WebPAC::Store 0.08;
11 use WebPAC::Output::TT 0.07;
12 use WebPAC::Search::Estraier 0.05;
13 use File::Slurp;
14 use Time::HiRes;
15 use Encode qw/encode decode from_to/;
16 use Data::HTMLDumper;
17
18 =head1 NAME
19
20 Webpacus::Model::WebPAC - Catalyst Model
21
22 =head1 SYNOPSIS
23
24 See L<Webpacus> and L<WebPAC>.
25
26 =head1 DESCRIPTION
27
28 Catalyst Model for access to WebPAC data.
29
30 =head2 new
31
32 Configuration for hyperestraier in C<config.yaml> like this:
33
34 --- #YAML:1.0
35 # DO NOT USE TABS FOR INDENTATION OR label/value SEPARATION!!!
36
37 # configuration for hyper estraier full text search engine
38 hyperestraier:
39 masterurl: 'http://localhost:1978/node/webpac2'
40 defaultnode: 'webpac2'
41 defaultdepth: 1
42 user: 'admin'
43 passwd: 'admin'
44 hits_on_page: 100
45 hits_for_pager: 1000
46
47 webpac:
48 db_path: '/data/webpac2/db'
49 template_path: '/data/webpac2/conf/output/tt'
50 template: 'html_ffzg_results_short.tt'
51 # encoding comming from webpac
52 webpac_encoding: 'iso-8859-2'
53
54 =cut
55
56 sub new {
57 my ( $self, $c, $config ) = @_;
58
59 $self = $self->NEXT::new($c, $config);
60 $self->config($config);
61
62 my $log = $c->log;
63 $self->{log} = $log;
64
65 my $est_cfg = $c->config->{hyperestraier};
66 $est_cfg->{'log'} = $log;
67
68 $est_cfg->{encoding} = $est_cfg->{catalyst_encoding} || $c->config->{catalyst_encoding} or $c->log->fatal("can't find catalyst_encoding");
69
70 $log->debug("using config:" . Dumper($est_cfg) );
71
72 if (! $est_cfg->{database}) {
73 my $defaultnode = $est_cfg->{defaultnode} || $log->logdie("can't find defaultnode in estraier configuration");
74 $log->info("using default node $defaultnode");
75 $est_cfg->{database} = $defaultnode;
76 }
77
78 $self->{est} = new WebPAC::Search::Estraier( %{ $est_cfg } );
79
80 # save config parametars in object
81 foreach my $f (qw/db_path template_path hits_on_page webpac_encoding defaultdepth/) {
82 $self->{$f} = $c->config->{hyperestraier}->{$f} ||
83 $c->config->{webpac}->{$f};
84 $log->debug("self->{$f} = " . $self->{$f});
85 }
86 my $db_path = $self->{db_path};
87 my $template_path = $self->{template_path};
88
89 $log->debug("using db path '$db_path', template path '$template_path'");
90
91 $self->{db} = new WebPAC::Store(
92 path => $db_path,
93 read_only => 1,
94 database => $est_cfg->{database},
95 );
96
97 $self->{out} = new WebPAC::Output::TT(
98 include_path => $template_path,
99 filters => {
100 dump_html => sub {
101 return unless (@_);
102 my $out;
103 my $i = 1;
104 foreach my $v (@_) {
105 $out .= qq{<div id="dump_$i">} .
106 Data::HTMLDumper->Dump([ $v ],[ "v$i" ]) .
107 qq{</div>};
108 $i++;
109 }
110 $out =~ s!<table[^>/]*>!<table class="dump">!gis if ($out);
111 return $out;
112 }
113 },
114 );
115
116 # default template from config.yaml
117 $self->{template} ||= $c->config->{webpac}->{template};
118
119 $log->debug("converting encoding from webpac_encoding '" .
120 $c->config->{webpac}->{webpac_encoding} .
121 "'"
122 );
123
124 $self->{databases} = $c->config->{databases} || $log->error("can't find databases in config");
125
126 return $self;
127
128 }
129
130
131 =head2 search
132
133 my $m->search(
134 phrase => 'query phrase',
135 add_attr => \@add_attr
136 get_attr => [ '@uri' ],
137 max => 42,
138 template => 'result_template.tt',
139 depth => 1,
140 );
141
142 All fields are standard C<WebPAC::Search::Estraier> parametars except
143 C<template> which will (if specified) return results in HTML using
144 selected template.
145
146 =cut
147
148 sub search {
149 my $self = shift;
150
151 my $args = {@_};
152
153 my $log = $self->{log};
154
155 $log->debug("search args: " . Dumper( $args ));
156
157 my $query = $args->{phrase} || $log->warn("no query phrase") && return;
158
159 $log->debug("search model query: '$query'");
160 if ($args->{add_attr}) {
161 $log->debug(" + add_attr: " .
162 join("','", @{ $args->{add_attr} })
163 );
164 }
165
166 my $template_filename = $args->{template} || $self->{template};
167
168 $args->{max} ||= $self->{'hits_for_pager'};
169 if (! $args->{max}) {
170 $args->{max} = 100;
171 $log->warn("max not set when calling model. Using default of $args->{max}");
172 }
173
174 my $times; # store some times for benchmarking
175
176 my $t = time();
177
178 # transfer depth of search
179 if (! $args->{depth}) {
180 my $default = $self->{defaultdepth} || $log->logdie("can't find defaultdepth in estraier configuration");
181 $args->{depth} = $default;
182 $log->warn("using default search depth $default");
183 }
184
185 my @results = $self->{est}->search( %{ $args } );
186
187 $times->{est} += time() - $t;
188
189 my $hits = $#results + 1;
190
191 $log->debug( sprintf("search took %.2fs and returned $hits hits.", $times->{est}) );
192
193 # just return results?
194 return @results unless ($args->{'template'});
195
196 #
197 # construct HTML results
198 #
199
200 my @html_results;
201
202 for my $i ( 0 .. $#results ) {
203
204 my ($database, $prefix, $id);
205 if ( $results[$i]->{'@uri'} =~ m!/([^/]+)/([^/]+)/(\d+)$!) {
206 ($database, $prefix,$id) = ($1,$2,$3);
207 } else {
208 $log->warn("can't decode database/prefix/id from " . $results[$i]->{'@uri'});
209 next;
210 }
211
212 #$log->debug("load_ds( id => $id, prefix => '$prefix' )");
213
214 $t = time();
215
216 my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id );
217 if (! $ds) {
218 $log->error("can't load_ds( ${database}/${prefix}/${id} )");
219 next;
220 }
221
222 $times->{db} += time() - $t;
223
224 #$log->debug( "ds = " . Dumper( \@html_results ) );
225
226 $t = time();
227
228 my $html = $self->{out}->apply(
229 template => $template_filename,
230 data => $ds,
231 record_uri => "${database}/${prefix}/${id}",
232 config => $self->{databases}->{$database},
233 );
234
235 $times->{out} += time() - $t;
236
237 $t = time();
238
239 $html = decode($self->{webpac_encoding}, $html);
240
241 push @html_results, $html;
242
243 }
244
245 #$log->debug( '@html_results = ' . Dumper( \@html_results ) );
246
247 $log->debug( sprintf(
248 "time spent: db = %.2f, out = %.2f",
249 $times->{db}, $times->{out},
250 ) );
251
252 return \@html_results;
253 }
254
255 =head2 record
256
257 my $html = $m->record(
258 mfn => 42,
259 template => 'foo.tt',
260 );
261
262 This will load one record, convert it to html using C<template> and return
263 it.
264
265 =cut
266
267 sub record {
268 my $self = shift;
269
270 my $args = {@_};
271 my $log = $self->{log};
272 $log->debug("record args: " . Dumper( $args ));
273
274 foreach my $f (qw/record_uri template/) {
275 $log->fatal("need $f") unless ($args->{$f});
276 }
277
278 my ($database, $prefix, $id);
279
280 if ($args->{record_uri} =~ m#^([^/]+)/([^/]+)/([^/]+)$#) {
281 ($database, $prefix, $id) = ($1,$2,$3);
282 } else {
283 $log->error("can't parse $args->{record_uri} into prefix, database and uri");
284 return;
285 }
286
287 my $ds = $self->{db}->load_ds( id => $id, prefix => $prefix, database => $database );
288 if (! $ds) {
289 $log->error("can't load_ds( $database/$prefix/$id )");
290 return;
291 }
292
293 my $html = $self->{out}->apply(
294 template => $args->{template},
295 data => $ds,
296 record_uri => $args->{record_uri},
297 config => $self->{databases}->{$database},
298 );
299
300 $html = decode($self->{webpac_encoding}, $html);
301
302 return $html;
303 }
304
305
306 =head2 save_html
307
308 $m->save_html( '/full/path/to/file', $content );
309
310 It will use C<Encode> to convert content encoding back to
311 Webpac codepage, recode JavaScript Unicode entities (%u1234),
312 strip extra newlines at beginning and end, and save to
313 C</full/path/to/file.new> and if that succeeds, just rename
314 it over original file which should be atomic on filesystem level.
315
316 =cut
317
318 sub save_html {
319 my ($self, $path, $content) = @_;
320
321 # FIXME Should this be UTF-8 or someting?
322 my $js_encoding = $self->{webpac_encoding};
323 $js_encoding = 'UTF-16';
324
325 sub _conv_js {
326 return '0x' . $_[1];
327 return encode($_[0], chr(hex($_[1])));
328 }
329 #$content =~ s/%u([a-fA-F0-9]{4})/_conv_js($js_encoding,$1)/gex;
330 $content =~ s/^[\n\r]+//s;
331 $content =~ s/[\n\r]+$/\n/s;
332 $content =~ s/\n\r/\n/gs;
333
334 my $disk_encoding = $self->{webpac_encoding} || 'utf-8';
335 $self->{log}->debug("convert encoding to $disk_encoding");
336 from_to($content, 'utf-8', $disk_encoding) || $self->{log}->warn("encoding from utf-8 to $disk_encoding failed for: $content");
337
338 write_file($path . '.new', {binmode => ':raw' }, $content) || die "can't save ${path}.new $!";
339 rename $path . '.new', $path || die "can't rename to $path: $!";
340 }
341
342 =head2 load_html
343
344 my $html = $m->load_html('/full/path/to/file');
345
346 This will convert file from Webpac encoding to Catalyst and
347 convert that data to escaped HTML (for sending into
348 C<< <textarea/> >> tags in html.
349
350 =cut
351
352 sub load_html {
353 my ($self, $path) = @_;
354
355 die "no path?" unless ($path);
356
357 my $content = read_file($path, {binmode => ':raw' }) || die "can't read $path: $!";
358
359 return decode($self->{webpac_encoding}, $content);
360 }
361
362 =head1 AUTHOR
363
364 Dobrica Pavlinusic C<< <dpavlin@rot13.org> >>
365
366 =head1 LICENSE
367
368 This library is free software, you can redistribute it and/or modify
369 it under the same terms as Perl itself.
370
371 =cut
372
373 1;

  ViewVC Help
Powered by ViewVC 1.1.26