1 |
dpavlin |
92 |
package Webpacus::Model::WebPAC; |
2 |
|
|
|
3 |
|
|
use strict; |
4 |
|
|
use warnings; |
5 |
|
|
use lib '/data/webpac2/lib'; |
6 |
dpavlin |
93 |
use base qw/ |
7 |
|
|
Catalyst::Model |
8 |
|
|
/; |
9 |
dpavlin |
237 |
use WebPAC::Store 0.08; |
10 |
dpavlin |
382 |
use Search::Estraier 0.04; |
11 |
dpavlin |
135 |
use File::Slurp; |
12 |
dpavlin |
378 |
use Time::HiRes qw/time/; |
13 |
dpavlin |
348 |
use Encode qw/encode decode from_to/; |
14 |
dpavlin |
379 |
use Template; |
15 |
dpavlin |
92 |
|
16 |
|
|
=head1 NAME |
17 |
|
|
|
18 |
|
|
Webpacus::Model::WebPAC - Catalyst Model |
19 |
|
|
|
20 |
|
|
=head1 SYNOPSIS |
21 |
|
|
|
22 |
|
|
See L<Webpacus> and L<WebPAC>. |
23 |
|
|
|
24 |
|
|
=head1 DESCRIPTION |
25 |
|
|
|
26 |
|
|
Catalyst Model for access to WebPAC data. |
27 |
|
|
|
28 |
|
|
=head2 new |
29 |
|
|
|
30 |
|
|
Configuration for hyperestraier in C<config.yaml> like this: |
31 |
|
|
|
32 |
|
|
--- #YAML:1.0 |
33 |
|
|
# DO NOT USE TABS FOR INDENTATION OR label/value SEPARATION!!! |
34 |
|
|
|
35 |
|
|
# configuration for hyper estraier full text search engine |
36 |
|
|
hyperestraier: |
37 |
dpavlin |
222 |
masterurl: 'http://localhost:1978/node/webpac2' |
38 |
|
|
defaultnode: 'webpac2' |
39 |
|
|
defaultdepth: 1 |
40 |
dpavlin |
96 |
user: 'admin' |
41 |
|
|
passwd: 'admin' |
42 |
dpavlin |
143 |
hits_on_page: 100 |
43 |
dpavlin |
305 |
hits_for_pager: 1000 |
44 |
dpavlin |
92 |
|
45 |
dpavlin |
96 |
webpac: |
46 |
|
|
db_path: '/data/webpac2/db' |
47 |
|
|
template_path: '/data/webpac2/conf/output/tt' |
48 |
|
|
template: 'html_ffzg_results_short.tt' |
49 |
|
|
# encoding comming from webpac |
50 |
|
|
webpac_encoding: 'iso-8859-2' |
51 |
|
|
|
52 |
dpavlin |
92 |
=cut |
53 |
|
|
|
54 |
|
|
sub new { |
55 |
|
|
my ( $self, $c, $config ) = @_; |
56 |
|
|
|
57 |
|
|
$self = $self->NEXT::new($c, $config); |
58 |
|
|
$self->config($config); |
59 |
|
|
|
60 |
|
|
my $log = $c->log; |
61 |
dpavlin |
94 |
$self->{log} = $log; |
62 |
dpavlin |
92 |
|
63 |
dpavlin |
93 |
my $est_cfg = $c->config->{hyperestraier}; |
64 |
|
|
$est_cfg->{'log'} = $log; |
65 |
dpavlin |
92 |
|
66 |
dpavlin |
271 |
$est_cfg->{encoding} = $est_cfg->{catalyst_encoding} || $c->config->{catalyst_encoding} or $c->log->fatal("can't find catalyst_encoding"); |
67 |
dpavlin |
142 |
|
68 |
dpavlin |
400 |
$log->dumper($est_cfg, 'est_cfg'); |
69 |
dpavlin |
92 |
|
70 |
dpavlin |
222 |
if (! $est_cfg->{database}) { |
71 |
|
|
my $defaultnode = $est_cfg->{defaultnode} || $log->logdie("can't find defaultnode in estraier configuration"); |
72 |
|
|
$log->info("using default node $defaultnode"); |
73 |
|
|
$est_cfg->{database} = $defaultnode; |
74 |
|
|
} |
75 |
|
|
|
76 |
dpavlin |
382 |
my $url = $est_cfg->{masterurl} . '/node/' . $est_cfg->{database}; |
77 |
dpavlin |
92 |
|
78 |
dpavlin |
382 |
$log->info("opening Hyper Estraier index $url as $est_cfg->{'user'}"); |
79 |
|
|
|
80 |
|
|
$self->{est_node} = Search::Estraier::Node->new( |
81 |
|
|
url => $url, |
82 |
|
|
user => $est_cfg->{user}, |
83 |
|
|
passwd => $est_cfg->{passwd}, |
84 |
|
|
); |
85 |
|
|
|
86 |
|
|
$log->fatal("can't create Search::Estraier::Node $url") unless ($self->{est_node}); |
87 |
|
|
|
88 |
dpavlin |
167 |
# save config parametars in object |
89 |
dpavlin |
399 |
foreach my $f (qw/ |
90 |
|
|
db_path template_path hits_on_page webpac_encoding defaultdepth |
91 |
dpavlin |
400 |
masterurl defaultnode |
92 |
dpavlin |
399 |
/) { |
93 |
dpavlin |
167 |
$self->{$f} = $c->config->{hyperestraier}->{$f} || |
94 |
|
|
$c->config->{webpac}->{$f}; |
95 |
|
|
$log->debug("self->{$f} = " . $self->{$f}); |
96 |
|
|
} |
97 |
|
|
my $db_path = $self->{db_path}; |
98 |
|
|
my $template_path = $self->{template_path}; |
99 |
dpavlin |
95 |
|
100 |
|
|
$log->debug("using db path '$db_path', template path '$template_path'"); |
101 |
|
|
|
102 |
dpavlin |
222 |
$self->{db} = new WebPAC::Store( |
103 |
dpavlin |
95 |
path => $db_path, |
104 |
|
|
read_only => 1, |
105 |
dpavlin |
224 |
database => $est_cfg->{database}, |
106 |
dpavlin |
95 |
); |
107 |
|
|
|
108 |
dpavlin |
101 |
# default template from config.yaml |
109 |
dpavlin |
95 |
$self->{template} ||= $c->config->{webpac}->{template}; |
110 |
|
|
|
111 |
dpavlin |
100 |
$log->debug("converting encoding from webpac_encoding '" . |
112 |
|
|
$c->config->{webpac}->{webpac_encoding} . |
113 |
dpavlin |
99 |
"'" |
114 |
|
|
); |
115 |
dpavlin |
96 |
|
116 |
dpavlin |
382 |
$self->{databases} = $c->config->{databases} || $log->fatal("can't find databases in config"); |
117 |
dpavlin |
155 |
|
118 |
dpavlin |
379 |
# create Template toolkit instance |
119 |
|
|
$self->{'tt'} = Template->new( |
120 |
|
|
INCLUDE_PATH => $template_path, |
121 |
|
|
FILTERS => { |
122 |
|
|
dump_html => sub { |
123 |
|
|
return unless (@_); |
124 |
|
|
my $out; |
125 |
|
|
my $i = 1; |
126 |
|
|
foreach my $v (@_) { |
127 |
|
|
$out .= qq{<div id="dump_$i">} . |
128 |
|
|
Data::HTMLDumper->Dump([ $v ],[ "v$i" ]) . |
129 |
|
|
qq{</div>}; |
130 |
|
|
$i++; |
131 |
|
|
} |
132 |
|
|
$out =~ s!<table[^>/]*>!<table class="dump">!gis if ($out); |
133 |
|
|
return $out; |
134 |
|
|
} |
135 |
|
|
}, |
136 |
|
|
EVAL_PERL => 1, |
137 |
|
|
); |
138 |
dpavlin |
378 |
|
139 |
dpavlin |
92 |
return $self; |
140 |
|
|
|
141 |
|
|
} |
142 |
|
|
|
143 |
dpavlin |
399 |
=head2 setup_site |
144 |
dpavlin |
135 |
|
145 |
dpavlin |
399 |
$self->setup_site('site_name'); |
146 |
|
|
|
147 |
dpavlin |
400 |
Change node URL and database name according to site name (if available) or fallback |
148 |
|
|
to C<defaultnode> from configuration. |
149 |
dpavlin |
399 |
|
150 |
|
|
=cut |
151 |
|
|
|
152 |
|
|
sub setup_site { |
153 |
|
|
my $self = shift; |
154 |
|
|
|
155 |
dpavlin |
400 |
my $site = shift || $self->{defaultnode}; |
156 |
dpavlin |
399 |
|
157 |
dpavlin |
400 |
$self->{log}->fatal("setup_site can't find site or defaultnode") unless ($site); |
158 |
|
|
|
159 |
dpavlin |
399 |
my $url = $self->{masterurl} . '/node/' . $site; |
160 |
|
|
$self->{est_node}->set_url( $url ); |
161 |
dpavlin |
400 |
$self->{log}->debug("setup_site $site using $url"); |
162 |
dpavlin |
399 |
} |
163 |
|
|
|
164 |
dpavlin |
135 |
=head2 search |
165 |
|
|
|
166 |
dpavlin |
150 |
my $m->search( |
167 |
|
|
phrase => 'query phrase', |
168 |
dpavlin |
155 |
add_attr => \@add_attr |
169 |
|
|
get_attr => [ '@uri' ], |
170 |
|
|
max => 42, |
171 |
dpavlin |
150 |
template => 'result_template.tt', |
172 |
dpavlin |
222 |
depth => 1, |
173 |
dpavlin |
150 |
); |
174 |
dpavlin |
135 |
|
175 |
dpavlin |
155 |
All fields are standard C<WebPAC::Search::Estraier> parametars except |
176 |
|
|
C<template> which will (if specified) return results in HTML using |
177 |
|
|
selected template. |
178 |
|
|
|
179 |
dpavlin |
135 |
=cut |
180 |
|
|
|
181 |
dpavlin |
93 |
sub search { |
182 |
dpavlin |
150 |
my $self = shift; |
183 |
dpavlin |
93 |
|
184 |
dpavlin |
378 |
my $search_start_t = time(); |
185 |
|
|
|
186 |
dpavlin |
150 |
my $args = {@_}; |
187 |
|
|
|
188 |
dpavlin |
95 |
my $log = $self->{log}; |
189 |
dpavlin |
94 |
|
190 |
dpavlin |
400 |
$log->dumper($args, 'args'); |
191 |
dpavlin |
95 |
|
192 |
dpavlin |
150 |
my $query = $args->{phrase} || $log->warn("no query phrase") && return; |
193 |
dpavlin |
95 |
|
194 |
dpavlin |
150 |
my $template_filename = $args->{template} || $self->{template}; |
195 |
|
|
|
196 |
dpavlin |
305 |
$args->{max} ||= $self->{'hits_for_pager'}; |
197 |
dpavlin |
155 |
if (! $args->{max}) { |
198 |
dpavlin |
305 |
$args->{max} = 100; |
199 |
|
|
$log->warn("max not set when calling model. Using default of $args->{max}"); |
200 |
dpavlin |
155 |
} |
201 |
dpavlin |
93 |
|
202 |
dpavlin |
155 |
my $times; # store some times for benchmarking |
203 |
|
|
|
204 |
|
|
my $t = time(); |
205 |
|
|
|
206 |
dpavlin |
222 |
# transfer depth of search |
207 |
|
|
if (! $args->{depth}) { |
208 |
|
|
my $default = $self->{defaultdepth} || $log->logdie("can't find defaultdepth in estraier configuration"); |
209 |
|
|
$args->{depth} = $default; |
210 |
|
|
$log->warn("using default search depth $default"); |
211 |
|
|
} |
212 |
dpavlin |
383 |
$args->{depth} ||= 0; |
213 |
dpavlin |
222 |
|
214 |
dpavlin |
383 |
$log->debug("searching for maximum $args->{max} results using depth $args->{depth} phrase: ", $query || '[none]'); |
215 |
dpavlin |
155 |
|
216 |
dpavlin |
382 |
# |
217 |
|
|
# construct condition for Hyper Estraier |
218 |
|
|
# |
219 |
|
|
my $cond = Search::Estraier::Condition->new(); |
220 |
|
|
if ( ref($args->{add_attr}) eq 'ARRAY' ) { |
221 |
|
|
$log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) ); |
222 |
|
|
map { |
223 |
dpavlin |
383 |
$cond->add_attr( $_ ); |
224 |
dpavlin |
382 |
$log->debug(" + $_"); |
225 |
|
|
} @{ $args->{add_attr} }; |
226 |
|
|
}; |
227 |
|
|
|
228 |
|
|
$cond->set_phrase( $query ) if ($query); |
229 |
|
|
$cond->set_options( $args->{options} ) if ($args->{options}); |
230 |
|
|
$cond->set_order( $args->{order} ) if ($args->{order}); |
231 |
|
|
|
232 |
|
|
my $max = $args->{max} || 7; |
233 |
|
|
my $page = $args->{page} || 1; |
234 |
|
|
if ($page < 1) { |
235 |
|
|
$log->warn("page number $page < 1"); |
236 |
|
|
$page = 1; |
237 |
|
|
} |
238 |
|
|
|
239 |
|
|
$cond->set_max( $page * $max ); |
240 |
dpavlin |
99 |
|
241 |
dpavlin |
383 |
my $result = $self->{est_node}->search($cond, $args->{depth}); |
242 |
dpavlin |
414 |
if (! $result) { |
243 |
|
|
$self->{log}->fatal("search didn't return result"); |
244 |
|
|
return; |
245 |
|
|
} |
246 |
dpavlin |
382 |
my $hits = $result->doc_num; |
247 |
|
|
|
248 |
dpavlin |
384 |
$times->{est} += time() - $t; |
249 |
|
|
|
250 |
dpavlin |
380 |
$log->debug( sprintf("search took %.6fs and returned $hits hits.", $times->{est}) ); |
251 |
dpavlin |
150 |
|
252 |
dpavlin |
405 |
$self->{hints} = $result->{hints}; |
253 |
dpavlin |
421 |
#$log->dumper($self->{hints}, 'original hints' ); |
254 |
dpavlin |
385 |
|
255 |
dpavlin |
155 |
# |
256 |
dpavlin |
382 |
# fetch results |
257 |
dpavlin |
155 |
# |
258 |
|
|
|
259 |
dpavlin |
382 |
my @results; |
260 |
dpavlin |
100 |
|
261 |
dpavlin |
382 |
for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) { |
262 |
dpavlin |
95 |
|
263 |
dpavlin |
382 |
$t = time(); |
264 |
|
|
|
265 |
|
|
#$log->debug("get_doc($i)"); |
266 |
|
|
my $doc = $result->get_doc( $i ); |
267 |
|
|
if (! $doc) { |
268 |
|
|
$log->warn("can't find result $i"); |
269 |
dpavlin |
224 |
next; |
270 |
|
|
} |
271 |
dpavlin |
95 |
|
272 |
dpavlin |
382 |
my $hash; |
273 |
dpavlin |
95 |
|
274 |
dpavlin |
382 |
foreach my $attr (@{ $args->{get_attr} }) { |
275 |
|
|
my $val = $doc->attr( $attr ); |
276 |
|
|
#$log->debug("attr $attr = ", $val || 'undef'); |
277 |
|
|
$hash->{$attr} = $val if (defined($val)); |
278 |
dpavlin |
242 |
} |
279 |
dpavlin |
155 |
|
280 |
dpavlin |
382 |
$times->{hash} += time() - $t; |
281 |
dpavlin |
155 |
|
282 |
dpavlin |
382 |
next unless ($hash); |
283 |
dpavlin |
115 |
|
284 |
dpavlin |
382 |
if (! $args->{'template'}) { |
285 |
|
|
push @results, $hash; |
286 |
|
|
} else { |
287 |
|
|
my ($database, $prefix, $id); |
288 |
dpavlin |
155 |
|
289 |
dpavlin |
382 |
if ( $hash->{'@uri'} =~ m!/([^/]+)/([^/]+)/(\d+)$!) { |
290 |
|
|
($database, $prefix,$id) = ($1,$2,$3); |
291 |
|
|
} else { |
292 |
|
|
$log->warn("can't decode database/prefix/id from " . $hash->{'@uri'}); |
293 |
|
|
next; |
294 |
|
|
} |
295 |
dpavlin |
100 |
|
296 |
dpavlin |
382 |
#$log->debug("load_ds( id => $id, prefix => '$prefix' )"); |
297 |
dpavlin |
155 |
|
298 |
dpavlin |
382 |
$t = time(); |
299 |
dpavlin |
155 |
|
300 |
dpavlin |
382 |
my $ds = $self->{db}->load_ds( database => $database, prefix => $prefix, id => $id ); |
301 |
|
|
if (! $ds) { |
302 |
|
|
$log->error("can't load_ds( ${database}/${prefix}/${id} )"); |
303 |
|
|
next; |
304 |
|
|
} |
305 |
dpavlin |
100 |
|
306 |
dpavlin |
382 |
$times->{db} += time() - $t; |
307 |
dpavlin |
100 |
|
308 |
dpavlin |
382 |
$t = time(); |
309 |
|
|
|
310 |
|
|
my $html = $self->apply( |
311 |
|
|
template => $template_filename, |
312 |
|
|
data => $ds, |
313 |
|
|
record_uri => "${database}/${prefix}/${id}", |
314 |
|
|
config => $self->{databases}->{$database}, |
315 |
|
|
); |
316 |
|
|
|
317 |
|
|
$times->{apply} += time() - $t; |
318 |
|
|
|
319 |
|
|
$t = time(); |
320 |
|
|
|
321 |
|
|
$html = decode($self->{webpac_encoding}, $html); |
322 |
|
|
|
323 |
|
|
$times->{decode} += time() - $t; |
324 |
|
|
|
325 |
|
|
push @results, $html; |
326 |
|
|
} |
327 |
|
|
|
328 |
dpavlin |
95 |
} |
329 |
|
|
|
330 |
dpavlin |
155 |
$log->debug( sprintf( |
331 |
dpavlin |
382 |
"duration breakdown: estraier %.6fs, hash %.6fs, store %.6fs, apply %.6fs, decode %.06f, total: %.6fs", |
332 |
|
|
$times->{est}, $times->{hash}, $times->{db}, $times->{apply}, $times->{decode}, time() - $search_start_t, |
333 |
dpavlin |
155 |
) ); |
334 |
|
|
|
335 |
dpavlin |
382 |
return \@results; |
336 |
dpavlin |
93 |
} |
337 |
|
|
|
338 |
dpavlin |
405 |
=head2 hints |
339 |
|
|
|
340 |
|
|
my $hints = $m->hints; |
341 |
|
|
|
342 |
|
|
Return various useful hints about result |
343 |
|
|
|
344 |
|
|
=cut |
345 |
|
|
|
346 |
|
|
sub hints { |
347 |
|
|
my $self = shift; |
348 |
|
|
|
349 |
|
|
unless ($self->{hints}) { |
350 |
|
|
$self->{log}->fatal("no hints found!"); |
351 |
|
|
return; |
352 |
|
|
} |
353 |
|
|
|
354 |
|
|
my $hints; |
355 |
|
|
|
356 |
|
|
while (my ($key,$val) = each %{ $self->{hints} }) { |
357 |
|
|
|
358 |
dpavlin |
421 |
#$self->{log}->debug("current hint $key = $val"); |
359 |
|
|
|
360 |
dpavlin |
405 |
if ($key =~ m/^(?:HITS*|TIME|DOCNUM|WORDNUM)$/) { |
361 |
|
|
$hints->{ lc($key) } = $val; |
362 |
|
|
} elsif ($key =~ m/^HINT#/) { |
363 |
|
|
my ($word,$count) = split(/\t/,$val,2); |
364 |
|
|
$hints->{words}->{$word} = $count; |
365 |
|
|
} elsif ($key =~ m/^LINK#/) { |
366 |
|
|
my ($url,undef,undef,undef,undef,undef,$results) = split(/\t/,$val,7); |
367 |
|
|
if ($url =~ m#/node/(.+)$#) { |
368 |
|
|
$hints->{node}->{$1} = $results; |
369 |
dpavlin |
421 |
} else { |
370 |
|
|
$self->{log}->debug("url $url doesn't have /node/ in it!"); |
371 |
dpavlin |
405 |
} |
372 |
dpavlin |
421 |
} else { |
373 |
|
|
$self->{log}->debug("unknown hint $key = $val"); |
374 |
dpavlin |
405 |
} |
375 |
dpavlin |
421 |
|
376 |
dpavlin |
405 |
} |
377 |
|
|
|
378 |
dpavlin |
419 |
$self->{log}->dumper($hints, 'model hints' ); |
379 |
|
|
|
380 |
dpavlin |
405 |
return $hints; |
381 |
|
|
} |
382 |
|
|
|
383 |
|
|
|
384 |
dpavlin |
165 |
=head2 record |
385 |
|
|
|
386 |
|
|
my $html = $m->record( |
387 |
|
|
mfn => 42, |
388 |
|
|
template => 'foo.tt', |
389 |
|
|
); |
390 |
|
|
|
391 |
|
|
This will load one record, convert it to html using C<template> and return |
392 |
|
|
it. |
393 |
|
|
|
394 |
|
|
=cut |
395 |
|
|
|
396 |
|
|
sub record { |
397 |
|
|
my $self = shift; |
398 |
|
|
|
399 |
|
|
my $args = {@_}; |
400 |
|
|
my $log = $self->{log}; |
401 |
dpavlin |
400 |
$log->dumper( $args, 'args' ); |
402 |
dpavlin |
165 |
|
403 |
dpavlin |
242 |
foreach my $f (qw/record_uri template/) { |
404 |
dpavlin |
228 |
$log->fatal("need $f") unless ($args->{$f}); |
405 |
dpavlin |
165 |
} |
406 |
|
|
|
407 |
dpavlin |
242 |
my ($database, $prefix, $id); |
408 |
dpavlin |
165 |
|
409 |
dpavlin |
242 |
if ($args->{record_uri} =~ m#^([^/]+)/([^/]+)/([^/]+)$#) { |
410 |
|
|
($database, $prefix, $id) = ($1,$2,$3); |
411 |
|
|
} else { |
412 |
|
|
$log->error("can't parse $args->{record_uri} into prefix, database and uri"); |
413 |
|
|
return; |
414 |
|
|
} |
415 |
dpavlin |
165 |
|
416 |
dpavlin |
242 |
my $ds = $self->{db}->load_ds( id => $id, prefix => $prefix, database => $database ); |
417 |
|
|
if (! $ds) { |
418 |
|
|
$log->error("can't load_ds( $database/$prefix/$id )"); |
419 |
|
|
return; |
420 |
|
|
} |
421 |
|
|
|
422 |
dpavlin |
379 |
my $html = $self->apply( |
423 |
dpavlin |
165 |
template => $args->{template}, |
424 |
|
|
data => $ds, |
425 |
dpavlin |
242 |
record_uri => $args->{record_uri}, |
426 |
dpavlin |
305 |
config => $self->{databases}->{$database}, |
427 |
dpavlin |
165 |
); |
428 |
|
|
|
429 |
dpavlin |
348 |
$html = decode($self->{webpac_encoding}, $html); |
430 |
dpavlin |
165 |
|
431 |
|
|
return $html; |
432 |
|
|
} |
433 |
|
|
|
434 |
dpavlin |
305 |
|
435 |
dpavlin |
403 |
=head2 list_nodes |
436 |
|
|
|
437 |
|
|
my @nodes = $m->list_nodes( 'site' ); |
438 |
|
|
|
439 |
|
|
Return all databases which have records for selected site. Returned array of |
440 |
|
|
hashes has elements C<name> and C<label>. |
441 |
|
|
|
442 |
|
|
=cut |
443 |
|
|
|
444 |
|
|
sub list_nodes { |
445 |
|
|
my $self = shift; |
446 |
|
|
|
447 |
|
|
my $site = shift; |
448 |
|
|
|
449 |
|
|
$self->{log}->debug("list_nodes use site $site"); |
450 |
|
|
|
451 |
|
|
$self->setup_site( $site ); |
452 |
|
|
|
453 |
|
|
my @nodes; |
454 |
|
|
|
455 |
|
|
if ($self->{est_node}->doc_num > 0) { |
456 |
|
|
push @nodes, { |
457 |
|
|
name => $self->{est_node}->name, |
458 |
|
|
label => $self->{est_node}->label, |
459 |
dpavlin |
404 |
doc_num => $self->{est_node}->doc_num, |
460 |
dpavlin |
403 |
} |
461 |
|
|
} |
462 |
|
|
|
463 |
|
|
# refresh set info |
464 |
|
|
$self->{est_node}->_set_info; |
465 |
|
|
|
466 |
|
|
my $links = $self->{est_node}->links || return @nodes; |
467 |
|
|
|
468 |
|
|
$self->{log}->dumper( $links, 'links' ); |
469 |
|
|
|
470 |
|
|
foreach my $link (@{ $links }) { |
471 |
|
|
my ($url, $label, $credit) = split(/\t/, $link, 3); |
472 |
|
|
if ($url =~ m#/node/(.+)$#) { |
473 |
dpavlin |
404 |
my $node = $1; |
474 |
|
|
$self->setup_site( $node ); |
475 |
|
|
$self->{est_node}->_set_info; |
476 |
dpavlin |
415 |
$label = decode('UTF-8', $label); |
477 |
dpavlin |
403 |
push @nodes, { |
478 |
dpavlin |
404 |
name => $node, |
479 |
dpavlin |
403 |
label => $label, |
480 |
dpavlin |
404 |
doc_num => $self->{est_node}->doc_num, |
481 |
dpavlin |
403 |
} |
482 |
|
|
} else { |
483 |
|
|
$self->{log}->warn("can't find node name in link $link"); |
484 |
|
|
} |
485 |
|
|
} |
486 |
|
|
|
487 |
dpavlin |
404 |
$self->setup_site( $site ); |
488 |
|
|
$self->{est_node}->_set_info; |
489 |
|
|
|
490 |
dpavlin |
403 |
$self->{log}->dumper( \@nodes, 'nodes' ); |
491 |
|
|
|
492 |
|
|
return @nodes; |
493 |
|
|
} |
494 |
|
|
|
495 |
dpavlin |
135 |
=head2 save_html |
496 |
dpavlin |
93 |
|
497 |
dpavlin |
135 |
$m->save_html( '/full/path/to/file', $content ); |
498 |
dpavlin |
93 |
|
499 |
dpavlin |
348 |
It will use C<Encode> to convert content encoding back to |
500 |
dpavlin |
135 |
Webpac codepage, recode JavaScript Unicode entities (%u1234), |
501 |
|
|
strip extra newlines at beginning and end, and save to |
502 |
|
|
C</full/path/to/file.new> and if that succeeds, just rename |
503 |
|
|
it over original file which should be atomic on filesystem level. |
504 |
|
|
|
505 |
|
|
=cut |
506 |
|
|
|
507 |
|
|
sub save_html { |
508 |
|
|
my ($self, $path, $content) = @_; |
509 |
|
|
|
510 |
dpavlin |
348 |
# FIXME Should this be UTF-8 or someting? |
511 |
|
|
my $js_encoding = $self->{webpac_encoding}; |
512 |
|
|
$js_encoding = 'UTF-16'; |
513 |
|
|
|
514 |
dpavlin |
135 |
sub _conv_js { |
515 |
dpavlin |
348 |
return '0x' . $_[1]; |
516 |
|
|
return encode($_[0], chr(hex($_[1]))); |
517 |
dpavlin |
135 |
} |
518 |
dpavlin |
348 |
#$content =~ s/%u([a-fA-F0-9]{4})/_conv_js($js_encoding,$1)/gex; |
519 |
dpavlin |
135 |
$content =~ s/^[\n\r]+//s; |
520 |
|
|
$content =~ s/[\n\r]+$/\n/s; |
521 |
dpavlin |
282 |
$content =~ s/\n\r/\n/gs; |
522 |
dpavlin |
135 |
|
523 |
dpavlin |
348 |
my $disk_encoding = $self->{webpac_encoding} || 'utf-8'; |
524 |
|
|
$self->{log}->debug("convert encoding to $disk_encoding"); |
525 |
|
|
from_to($content, 'utf-8', $disk_encoding) || $self->{log}->warn("encoding from utf-8 to $disk_encoding failed for: $content"); |
526 |
dpavlin |
179 |
|
527 |
|
|
write_file($path . '.new', {binmode => ':raw' }, $content) || die "can't save ${path}.new $!"; |
528 |
dpavlin |
135 |
rename $path . '.new', $path || die "can't rename to $path: $!"; |
529 |
|
|
} |
530 |
|
|
|
531 |
|
|
=head2 load_html |
532 |
|
|
|
533 |
|
|
my $html = $m->load_html('/full/path/to/file'); |
534 |
|
|
|
535 |
|
|
This will convert file from Webpac encoding to Catalyst and |
536 |
|
|
convert that data to escaped HTML (for sending into |
537 |
|
|
C<< <textarea/> >> tags in html. |
538 |
|
|
|
539 |
|
|
=cut |
540 |
|
|
|
541 |
|
|
sub load_html { |
542 |
|
|
my ($self, $path) = @_; |
543 |
|
|
|
544 |
|
|
die "no path?" unless ($path); |
545 |
|
|
|
546 |
dpavlin |
179 |
my $content = read_file($path, {binmode => ':raw' }) || die "can't read $path: $!"; |
547 |
dpavlin |
135 |
|
548 |
dpavlin |
348 |
return decode($self->{webpac_encoding}, $content); |
549 |
dpavlin |
135 |
} |
550 |
|
|
|
551 |
dpavlin |
379 |
|
552 |
|
|
=head2 apply |
553 |
|
|
|
554 |
|
|
Create output from in-memory data structure using Template Toolkit template. |
555 |
|
|
|
556 |
|
|
my $text = $tt->apply( |
557 |
|
|
template => 'text.tt', |
558 |
|
|
data => $ds, |
559 |
|
|
record_uri => 'database/prefix/mfn', |
560 |
|
|
); |
561 |
|
|
|
562 |
|
|
It also has follwing template toolikit filter routies defined: |
563 |
|
|
|
564 |
|
|
=cut |
565 |
|
|
|
566 |
dpavlin |
413 |
# Escape <, >, & and ", and to produce valid XML |
567 |
|
|
my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); |
568 |
|
|
my $escape_re = join '|' => keys %escape; |
569 |
|
|
|
570 |
dpavlin |
379 |
sub apply { |
571 |
|
|
my $self = shift; |
572 |
|
|
|
573 |
|
|
my $args = {@_}; |
574 |
|
|
|
575 |
|
|
my $log = $self->{log} || die "no log?"; |
576 |
|
|
|
577 |
|
|
foreach my $a (qw/template data/) { |
578 |
dpavlin |
382 |
$log->fatal("need $a") unless ($args->{$a}); |
579 |
dpavlin |
379 |
} |
580 |
|
|
|
581 |
|
|
=head3 tt_filter_type |
582 |
|
|
|
583 |
|
|
filter to return values of specified from $ds, usage from TT template is in form |
584 |
|
|
C<d('FieldName','delimiter')>, where C<delimiter> is optional, like this: |
585 |
|
|
|
586 |
|
|
[% d('Title') %] |
587 |
|
|
[% d('Author',', ' %] |
588 |
|
|
|
589 |
|
|
=cut |
590 |
|
|
|
591 |
|
|
sub tt_filter_type { |
592 |
|
|
my ($data,$type) = @_; |
593 |
|
|
|
594 |
|
|
die "no data?" unless ($data); |
595 |
|
|
$type ||= 'display'; |
596 |
|
|
|
597 |
|
|
my $default_delimiter = { |
598 |
|
|
'display' => '¶<br/>', |
599 |
|
|
'index' => '\n', |
600 |
|
|
}; |
601 |
|
|
|
602 |
|
|
return sub { |
603 |
|
|
|
604 |
|
|
my ($name,$join) = @_; |
605 |
|
|
|
606 |
|
|
die "no data hash" unless ($data->{'data'} && ref($data->{'data'}) eq 'HASH'); |
607 |
|
|
# Hm? Should we die here? |
608 |
|
|
return unless ($name); |
609 |
|
|
|
610 |
|
|
my $item = $data->{'data'}->{$name} || return; |
611 |
|
|
|
612 |
|
|
my $v = $item->{$type} || return; |
613 |
|
|
|
614 |
|
|
if (ref($v) eq 'ARRAY') { |
615 |
|
|
if ($#{$v} == 0) { |
616 |
|
|
$v = $v->[0]; |
617 |
dpavlin |
413 |
$v =~ s/($escape_re)/$escape{$1}/g; |
618 |
dpavlin |
379 |
} else { |
619 |
|
|
$join = $default_delimiter->{$type} unless defined($join); |
620 |
dpavlin |
413 |
$v = join($join, map { |
621 |
|
|
s/($escape_re)/$escape{$1}/g; |
622 |
|
|
} @{$v}); |
623 |
dpavlin |
379 |
} |
624 |
|
|
} else { |
625 |
|
|
warn("TT filter $type(): field $name values aren't ARRAY, ignoring"); |
626 |
|
|
} |
627 |
|
|
|
628 |
|
|
return $v; |
629 |
|
|
} |
630 |
|
|
} |
631 |
|
|
|
632 |
|
|
$args->{'d'} = tt_filter_type($args, 'display'); |
633 |
|
|
$args->{'display'} = tt_filter_type($args, 'display'); |
634 |
|
|
|
635 |
|
|
=head3 tt_filter_search |
636 |
|
|
|
637 |
|
|
filter to return links to search, usage in TT: |
638 |
|
|
|
639 |
|
|
[% search('FieldToDisplay','FieldToSearch','optional delimiter', 'optional_template.tt') %] |
640 |
|
|
|
641 |
|
|
=cut |
642 |
|
|
|
643 |
|
|
sub tt_filter_search { |
644 |
|
|
|
645 |
|
|
my ($data) = @_; |
646 |
|
|
|
647 |
|
|
die "no data?" unless ($data); |
648 |
|
|
|
649 |
|
|
return sub { |
650 |
|
|
|
651 |
|
|
my ($display,$search,$delimiter,$template) = @_; |
652 |
|
|
|
653 |
|
|
# default delimiter |
654 |
|
|
$delimiter ||= '¶<br/>', |
655 |
|
|
|
656 |
|
|
die "no data hash" unless ($data->{'data'} && ref($data->{'data'}) eq 'HASH'); |
657 |
|
|
# Hm? Should we die here? |
658 |
|
|
return unless ($display); |
659 |
|
|
|
660 |
|
|
my $item = $data->{'data'}->{$display} || return; |
661 |
|
|
|
662 |
|
|
return unless($item->{'display'}); |
663 |
|
|
if (! $item->{'search'}) { |
664 |
|
|
warn "error in TT template: field $display didn't insert anything into search, use d('$display') and not search('$display'...)"; |
665 |
|
|
return; |
666 |
|
|
} |
667 |
|
|
|
668 |
|
|
my @warn; |
669 |
|
|
foreach my $type (qw/display search/) { |
670 |
|
|
push @warn, "field $display type $type values aren't ARRAY" unless (ref($item->{$type}) eq 'ARRAY'); |
671 |
|
|
} |
672 |
|
|
|
673 |
|
|
if (@warn) { |
674 |
|
|
warn("TT filter search(): " . join(",", @warn) . ", skipping"); |
675 |
|
|
return; |
676 |
|
|
} |
677 |
|
|
my @html; |
678 |
|
|
|
679 |
|
|
my $d_el = $#{ $item->{'display'} }; |
680 |
|
|
my $s_el = $#{ $item->{'search'} }; |
681 |
|
|
|
682 |
|
|
# easy, both fields have same number of elements or there is just |
683 |
|
|
# one search and multiple display |
684 |
|
|
if ( $d_el == $s_el || $s_el == 0 ) { |
685 |
|
|
|
686 |
|
|
foreach my $i ( 0 .. $d_el ) { |
687 |
|
|
|
688 |
|
|
my $s; |
689 |
|
|
if ($s_el > 0) { |
690 |
dpavlin |
383 |
$s = $item->{'search'}->[$i] or warn "can't find value $i for type search in field $search"; |
691 |
dpavlin |
379 |
} else { |
692 |
|
|
$s = $item->{'search'}->[0]; |
693 |
|
|
} |
694 |
|
|
#$s =~ s/([^\w.-])/sprintf("%%%02X",ord($1))/eg; |
695 |
|
|
$s = __quotemeta( $s ); |
696 |
|
|
|
697 |
dpavlin |
383 |
my $d = $item->{'display'}->[$i] or warn "can't find value $i for type display in field $display"; |
698 |
dpavlin |
379 |
|
699 |
|
|
my $template_arg = ''; |
700 |
|
|
$template_arg = qq{,'$template'} if ($template); |
701 |
|
|
|
702 |
|
|
push @html, qq{<a href="#" onclick="return search_via_link('$search','$s'${template_arg})">$d</a>}; |
703 |
|
|
} |
704 |
|
|
|
705 |
|
|
return join($delimiter, @html); |
706 |
|
|
} else { |
707 |
|
|
my $html = qq{<div class="notice">WARNING: we should really support if there is $d_el display elements and $s_el search elements, but currently there is no nice way to do so, so we will just display values</div>}; |
708 |
|
|
my $v = $item->{'display'}; |
709 |
|
|
|
710 |
|
|
if ($#{$v} == 0) { |
711 |
|
|
$html .= $v->[0]; |
712 |
|
|
} else { |
713 |
|
|
$html .= join($delimiter, @{$v}); |
714 |
|
|
} |
715 |
|
|
return $html; |
716 |
|
|
} |
717 |
|
|
} |
718 |
|
|
} |
719 |
|
|
|
720 |
|
|
$args->{'search'} = tt_filter_search($args); |
721 |
|
|
|
722 |
|
|
=head3 load_rec |
723 |
|
|
|
724 |
|
|
Used mostly for onClick events like this: |
725 |
|
|
|
726 |
|
|
<a href="#" onClick="[% load_rec( record_uri, 'template_name.tt') %]>foo</a> |
727 |
|
|
|
728 |
|
|
It will automatically do sanity checking and create correct JavaScript code. |
729 |
|
|
|
730 |
|
|
=cut |
731 |
|
|
|
732 |
|
|
$args->{'load_rec'} = sub { |
733 |
|
|
my @errors; |
734 |
|
|
|
735 |
|
|
my $record_uri = shift or push @errors, "record_uri missing"; |
736 |
|
|
my $template = shift or push @errors, "template missing"; |
737 |
|
|
|
738 |
|
|
if ($record_uri !~ m#^[^/]+/[^/]+/[^/]+$#) { |
739 |
|
|
push @errors, "invalid format of record_uri: $record_uri"; |
740 |
|
|
} |
741 |
|
|
|
742 |
|
|
if (@errors) { |
743 |
|
|
return "Logger.error('errors in load_rec: " . join(", ", @errors) . "'); return false;"; |
744 |
|
|
} else { |
745 |
|
|
return "load_rec('$record_uri','$template'); return false;"; |
746 |
|
|
} |
747 |
|
|
}; |
748 |
|
|
|
749 |
|
|
=head3 load_template |
750 |
|
|
|
751 |
|
|
Used to re-submit search request and load results in different template |
752 |
|
|
|
753 |
|
|
<a href="#" onClick="[% load_template( 'template_name.tt' ) %]">bar</a> |
754 |
|
|
|
755 |
|
|
=cut |
756 |
|
|
|
757 |
|
|
$args->{'load_template'} = sub { |
758 |
|
|
my $template = shift or return "Logger.error('load_template missing template name!'); return false;"; |
759 |
|
|
return "load_template($template); return false;"; |
760 |
|
|
}; |
761 |
|
|
|
762 |
|
|
my $out; |
763 |
|
|
|
764 |
|
|
$self->{'tt'}->process( |
765 |
|
|
$args->{'template'}, |
766 |
|
|
$args, |
767 |
|
|
\$out |
768 |
|
|
) || $log->error( "apply can't process template: ", $self->{'tt'}->error() ); |
769 |
|
|
|
770 |
|
|
return $out; |
771 |
|
|
} |
772 |
|
|
|
773 |
|
|
|
774 |
|
|
=head2 __quotemeta |
775 |
|
|
|
776 |
|
|
Helper to quote JavaScript-friendly characters |
777 |
|
|
|
778 |
|
|
=cut |
779 |
|
|
|
780 |
|
|
sub __quotemeta { |
781 |
|
|
local $_ = shift; |
782 |
|
|
$_ = decode('iso-8859-2', $_); |
783 |
|
|
|
784 |
|
|
s<([\x{0080}-\x{fffd}]+)>{sprintf '\u%0*v4X', '\u', $1}ge if ( Encode::is_utf8($_) ); |
785 |
|
|
{ |
786 |
|
|
use bytes; |
787 |
|
|
s<((?:[^ \x21-\x7E]|(?:\\(?!u)))+)>{sprintf '\x%0*v2X', '\x', $1}ge; |
788 |
|
|
} |
789 |
|
|
|
790 |
|
|
s/\\x09/\\t/g; |
791 |
|
|
s/\\x0A/\\n/g; |
792 |
|
|
s/\\x0D/\\r/g; |
793 |
|
|
s/"/\\"/g; |
794 |
|
|
s/\\x5C/\\\\/g; |
795 |
|
|
|
796 |
|
|
return $_; |
797 |
|
|
} |
798 |
|
|
|
799 |
|
|
|
800 |
|
|
|
801 |
dpavlin |
92 |
=head1 AUTHOR |
802 |
|
|
|
803 |
dpavlin |
348 |
Dobrica Pavlinusic C<< <dpavlin@rot13.org> >> |
804 |
dpavlin |
92 |
|
805 |
|
|
=head1 LICENSE |
806 |
|
|
|
807 |
|
|
This library is free software, you can redistribute it and/or modify |
808 |
|
|
it under the same terms as Perl itself. |
809 |
|
|
|
810 |
|
|
=cut |
811 |
|
|
|
812 |
|
|
1; |