/[webpac2]/trunk/lib/WebPAC/Output/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Output/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 301 - (show annotations)
Mon Dec 19 21:26:04 2005 UTC (18 years, 4 months ago) by dpavlin
File size: 8412 byte(s)
 r322@athlon:  dpavlin | 2005-12-19 22:27:06 +0100
 make run.pl moderatly chatty (along with other modules), added command line options
 (try perldoc run.pl) new target index (to reindex all) and run (to index
 first 100 records of each database)

1 package WebPAC::Output::Estraier;
2
3 use warnings;
4 use strict;
5
6 use base qw/WebPAC::Common/;
7
8 use HyperEstraier;
9 use Text::Iconv;
10 use Data::Dumper;
11 use LWP;
12 use URI::Escape;
13
14 =head1 NAME
15
16 WebPAC::Output::Estraier - Create Hyper Estraier full text index
17
18 =head1 VERSION
19
20 Version 0.07
21
22 =cut
23
24 our $VERSION = '0.07';
25
26 =head1 SYNOPSIS
27
28 Create full text index using Hyper Estraier index from data with
29 type C<search>.
30
31 =head1 FUNCTIONS
32
33 =head2 new
34
35 Connect to Hyper Estraier index using HTTP
36
37 my $est = new WebPAC::Output::Estraier(
38 masterurl => 'http://localhost:1978/',
39 user => 'admin',
40 passwd => 'admin',
41 database => 'demo',
42 encoding => 'iso-8859-2',
43 );
44
45 Options are:
46
47 =over 4
48
49 =item masterurl
50
51 URI to C<estmaster> node
52
53 =item user
54
55 C<estmaster> user with sufficient rights
56
57 =item passwd
58
59 password for user
60
61 =item database
62
63 name of database from which data comes
64
65 =item encoding
66
67 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
68 (and it probably is). This encoding will be converted to C<UTF-8> for
69 Hyper Estraier.
70
71 =back
72
73 Name of database will be used to form URI of documents in index.
74
75 =cut
76
77 sub new {
78 my $class = shift;
79 my $self = {@_};
80 bless($self, $class);
81
82 my $log = $self->_get_logger;
83
84 #$log->debug("self: ", sub { Dumper($self) });
85
86 foreach my $p (qw/masterurl user passwd database/) {
87 $log->logdie("need $p") unless ($self->{$p});
88 }
89
90 my $url = $self->{masterurl} . '/node/' . $self->{database};
91 $self->{url} = $url;
92
93 $log->debug("opening index $self->{url}");
94
95 my $nodes = $self->master( action => 'nodelist' );
96
97 $log->debug("nodes found: $nodes");
98
99 if ($nodes !~ m/^$self->{database}\t/sm) {
100 $log->warn("creating index $url");
101 $self->master(
102 action => 'nodeadd',
103 name => $self->{database},
104 label => "WebPAC $self->{database}",
105 ) || $log->logdie("can't create Hyper Estraier node $self->{database}");
106 }
107
108 $self->{'db'} = HyperEstraier::Node->new($self->{url});
109 $self->{'db'}->set_auth($self->{'user'}, $self->{passwd});
110
111 my $encoding = $self->{'encoding'} || 'ISO-8859-2';
112 $log->info("using index $self->{url} with encoding $encoding");
113
114 $self->{'iconv'} = new Text::Iconv($encoding, 'UTF-8') or
115 $log->logdie("can't create conversion from $encoding to UTF-8");
116
117 $self ? return $self : return undef;
118 }
119
120
121 =head2 add
122
123 Adds one entry to database.
124
125 $est->add(
126 id => 42,
127 ds => $ds,
128 type => 'display',
129 text => 'optional text from which snippet is created',
130 );
131
132 This function will create entries in index using following URI format:
133
134 C<file:///type/database%20name/000>
135
136 Each tag in C<data_structure> with specified C<type> will create one
137 attribute and corresponding hidden text (used for search).
138
139 =cut
140
141 sub add {
142 my $self = shift;
143
144 my $args = {@_};
145
146 my $log = $self->_get_logger;
147
148 my $database = $self->{'database'} || $log->logconfess('no database in $self');
149 $log->logconfess('need db in object') unless ($self->{'db'});
150
151 foreach my $p (qw/id ds type/) {
152 $log->logdie("need $p") unless ($args->{$p});
153 }
154
155 my $type = $args->{'type'};
156 my $id = $args->{'id'};
157
158 my $uri = "file:///$type/$database/$id";
159 $log->debug("creating $uri");
160
161 my $doc = HyperEstraier::Document->new;
162 $doc->add_attr('@uri', $self->{'iconv'}->convert($uri) );
163
164 $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
165
166 # filter all tags which have type defined
167 my @tags = grep {
168 ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
169 } keys %{ $args->{'ds'} };
170
171 $log->debug("tags = ", join(",", @tags));
172
173 return unless (@tags);
174
175 foreach my $tag (@tags) {
176
177 my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
178
179 $log->logconfess("no values for $tag/$type") unless ($vals);
180
181 $vals = $self->{'iconv'}->convert( $vals ) or
182 $log->logdie("can't convert '$vals' to UTF-8");
183
184 $doc->add_attr( $tag, $vals );
185 $doc->add_hidden_text( $vals );
186 }
187
188 my $text = $args->{'text'};
189 if ( $text ) {
190 $text = $self->{'iconv'}->convert( $text ) or
191 $log->logdie("can't convert '$text' to UTF-8");
192 $doc->add_text( $text );
193 }
194
195 $log->debug("adding ", sub { $doc->dump_draft } );
196 $self->{'db'}->put_doc($doc) || $log->logdie("can't add document $uri to node " . $self->{url} . " status: " . $self->{db}->status());
197
198 return 1;
199 }
200
201 #
202 # REST parametars validation data
203 #
204
205 my $estraier_rest = {
206 master => {
207 userdel => [ qw/name/ ],
208 nodelist => [],
209 nodeadd => [ qw/name label/ ],
210 nodedel => [ qw/name/ ],
211 },
212 node => {
213 _set_link => [ qw/url label credit/ ],
214 },
215 };
216
217 =head2 master
218
219 Issue administrative commands to C<estmaster> process and receive response
220 as array of lines
221
222 my $nodelist = $est->master( action => 'nodelist' );
223
224 =cut
225
226 sub master {
227 my $self = shift;
228
229 my $args = {@_};
230 my $log = $self->_get_logger;
231
232 my $action = $args->{action} || $log->logconfess("no action specified");
233
234 $log->logdie("action '$action' isn't supported") unless ($estraier_rest->{master}->{$action});
235
236 $log->debug("master action: $action");
237
238 return $self->estcall(
239 validate => 'master',
240 rest_url => $self->{masterurl} . '/master?action=' . $action ,
241 action => $action,
242 %{ $args },
243 );
244 }
245
246 =head2 add_link
247
248 $est->add_link(
249 from => 'ps',
250 to => 'webpac2',
251 credit => 10000,
252 );
253
254 =cut
255
256 sub add_link {
257 my $self = shift;
258
259 my $args = {@_};
260 my $log = $self->_get_logger;
261
262 my @labels = $self->master( action => 'nodelist' );
263
264 $log->debug("got labels: ", join("|", @labels));
265
266 @labels = grep(/^$args->{to}/, @labels);
267
268 my (undef,$label) = split(/\t/, shift @labels);
269
270 if (! $label) {
271 $log->warn("can't find label for $args->{to}, skipping link creaton");
272 return;
273 }
274
275 $log->debug("using label $label for $args->{to}");
276
277 return $self->estcall(
278 validate => 'node',
279 action => '_set_link',
280 rest_url => $self->{masterurl} . '/node/' . $args->{from} . '/_set_link' ,
281 url => $self->{masterurl} . '/node/' . $args->{to},
282 label => $label,
283 credit => $args->{credit},
284 );
285 }
286
287 =head2 estcall
288
289 Workhourse which does actual calls to Hyper Estraier
290
291 $self->estcall(
292 rest_url => '/master?action=' . $action,
293 validate => 'master',
294 # ...
295 );
296
297 C<rest_url> is relative URL to C<estmaster> and C<validate> is entry into
298 internal hash which will check if all parametars are available before
299 calling function.
300
301 =cut
302
303 sub estcall {
304 my $self = shift;
305 my $args = {@_};
306 my $log = $self->_get_logger;
307
308 $log->debug("estcall: ",Dumper($args));
309
310 foreach my $p (qw/rest_url validate action/) {
311 $log->die("ectcall needs $p parametar") unless ($args->{$p});
312 }
313
314 my $url = $args->{rest_url};
315 my $del = '?';
316 $del = '&' if ($url =~ m#\?#);
317
318 my $url_args;
319
320 foreach my $arg (@{ $estraier_rest->{ $args->{validate} }->{ $args->{action} } }) {
321 $log->logdie("missing parametar $arg for action $args->{action}") unless ($args->{$arg});
322 $url_args .= $del . $arg . '=' . uri_escape( $args->{$arg} );
323 $del = '&';
324 }
325
326 $url .= $url_args if ($url_args);
327
328 $log->debug("calling $url");
329
330 my $res = $self->est_ua()->get($url);
331
332 if ($res->is_success) {
333 #$log->debug( $res->content );
334 return split(/\n/, $res->content) if wantarray;
335 return $res->content || 0E0;
336 } else {
337 $log->warn("unable to call $url: " . $res->status_line);
338 return;
339 }
340
341 }
342
343 =head2 est_ua
344
345 This is helper function to create C<LWP::UserAgent> object with Super User
346 priviledges
347
348 my $ua = $self->est_ua( user => 'admin', passwd => 'admin' );
349
350 =cut
351
352
353
354 sub est_ua {
355 my $self = shift;
356
357 return $self->{_master_ua} if ($self->{_master_ua});
358
359 {
360 package AdminUserAgent;
361 use base qw/LWP::UserAgent/;
362 sub new {
363 my $self = LWP::UserAgent::new(@_);
364 $self->agent("webpac/$VERSION");
365 $self;
366 }
367 sub get_basic_credentials {
368 my($self, $realm, $uri) = @_;
369 return ($self->{user}, $self->{passwd});
370 }
371 sub set_basic_credentials {
372 my ($self, $user, $passwd) = @_;
373 $self->{user} = $user;
374 $self->{passwd} = $passwd;
375 }
376 };
377
378 $self->{_master_ua} = AdminUserAgent->new( ) || sub {
379 my $log = $self->_get_logger;
380 $log->logdie("can't create LWP::UserAgent: $!");
381 };
382
383 $self->{_master_ua}->set_basic_credentials($self->{user}, $self->{passwd});
384
385 return $self->{_master_ua};
386 }
387
388 =head1 AUTHOR
389
390 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
391
392 =head1 COPYRIGHT & LICENSE
393
394 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
395
396 This program is free software; you can redistribute it and/or modify it
397 under the same terms as Perl itself.
398
399 =cut
400
401 1; # End of WebPAC::Output::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26