/[webpac2]/trunk/lib/WebPAC/Output/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WebPAC/Output/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 75 by dpavlin, Sun Nov 20 20:32:41 2005 UTC revision 401 by dpavlin, Sun Feb 19 16:36:42 2006 UTC
# Line 5  use strict; Line 5  use strict;
5    
6  use base qw/WebPAC::Common/;  use base qw/WebPAC::Common/;
7    
8  use HyperEstraier;  use Search::Estraier;
9  use Text::Iconv;  use Encode qw/from_to/;
10  use Data::Dumper;  use Data::Dumper;
11    use LWP;
12    use URI::Escape;
13    
14  =head1 NAME  =head1 NAME
15    
# Line 15  WebPAC::Output::Estraier - Create Hyper Line 17  WebPAC::Output::Estraier - Create Hyper
17    
18  =head1 VERSION  =head1 VERSION
19    
20  Version 0.01  Version 0.10
21    
22  =cut  =cut
23    
24  our $VERSION = '0.01';  our $VERSION = '0.10';
25    
26  =head1 SYNOPSIS  =head1 SYNOPSIS
27    
# Line 33  type C<search>. Line 35  type C<search>.
35  Connect to Hyper Estraier index using HTTP  Connect to Hyper Estraier index using HTTP
36    
37   my $est = new WebPAC::Output::Estraier(   my $est = new WebPAC::Output::Estraier(
38          url => 'http://localhost:1978/node/webpac2',          masterurl => 'http://localhost:1978/',
39          user => 'admin',          user => 'admin',
40          passwd => 'admin',          passwd => 'admin',
41          database => 'demo',          database => 'demo',
42            label => 'node label',
43          encoding => 'iso-8859-2',          encoding => 'iso-8859-2',
44            clean => 1,
45   );   );
46    
47  Options are:  Options are:
48    
49  =over 4  =over 4
50    
51  =item url  =item masterurl
52    
53  URI to C<estmaster> node  URI to C<estmaster> node
54    
# Line 60  password for user Line 64  password for user
64    
65  name of database from which data comes  name of database from which data comes
66    
67    =item label
68    
69    label for node (optional)
70    
71  =item encoding  =item encoding
72    
73  character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>  character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
# Line 74  Name of database will be used to form UR Line 82  Name of database will be used to form UR
82    
83  sub new {  sub new {
84          my $class = shift;          my $class = shift;
85          my $self = {@_};          my $self = {@_};
86          bless($self, $class);          bless($self, $class);
87    
88          my $log = $self->_get_logger;          my $log = $self->_get_logger;
89    
90          foreach my $p (qw/url user passwd/) {          #$log->debug("self: ", sub { Dumper($self) });
91    
92            foreach my $p (qw/masterurl user passwd database/) {
93                  $log->logdie("need $p") unless ($self->{$p});                  $log->logdie("need $p") unless ($self->{$p});
94          }          }
95    
96          $log->info("opening Hyper Estraier index $self->{'url'}");          $self->{encoding} ||= 'ISO-8859-2';
97    
98          $self->{'db'} = HyperEstraier::Node->new($self->{'url'});          my $url = $self->{masterurl} . '/node/' . $self->{database};
99          $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});          $self->{url} = $url;
100    
101          my $encoding = $self->{'encoding'} || 'ISO-8859-2';          if ($self->{clean}) {
102          $log->info("using encoding $encoding");                  $log->debug("nodedel $self->{database}");
103                    $self->master( action => 'nodedel', name => $self->{database} );
104            } else {
105                    $log->debug("opening index $self->{url}");
106            }
107    
108            my $nodes = $self->master( action => 'nodelist' );
109    
110          $self->{'iconv'} = new Text::Iconv($encoding, 'UTF-8') or          $log->debug("nodes found: $nodes");
111                  $log->die("can't create conversion from $encoding to UTF-8");  
112            if ($nodes !~ m/^$self->{database}\t/sm) {
113                    my $label = $self->{label} || 'WebPAC ' . $self->{database};
114                    $log->warn("creating index $url ($label)");
115                    $self->master(
116                            action => 'nodeadd',
117                            name => $self->{database},
118                            label => $self->convert( $label ),
119                    ) || $log->logdie("can't create Hyper Estraier node $self->{database}");
120            }
121    
122            $self->{db} = Search::Estraier::Node->new( debug => $self->{debug} );
123            $self->{db}->set_url($self->{url});
124            $self->{db}->set_auth($self->{user}, $self->{passwd});
125    
126            $log->info("using index $self->{url} with encoding $self->{encoding}");
127    
128          $self ? return $self : return undef;          $self ? return $self : return undef;
129  }  }
# Line 106  Adds one entry to database. Line 137  Adds one entry to database.
137          id => 42,          id => 42,
138          ds => $ds,          ds => $ds,
139          type => 'display',          type => 'display',
         url_prefix => 'database name',  
140          text => 'optional text from which snippet is created',          text => 'optional text from which snippet is created',
141    );    );
142    
143  This function will create  entries in index using following URI format:  This function will create  entries in index using following URI format:
144    
145    C<file:///database%20name/000>    C<file:///type/database%20name/000>
146    
147  Each tag in C<data_structure> with specified C<type> will create one  Each tag in C<data_structure> with specified C<type> will create one
148  attribute and corresponding hidden text (used for search).  attribute and corresponding hidden text (used for search).
# Line 134  sub add { Line 164  sub add {
164          }          }
165    
166          my $type = $args->{'type'};          my $type = $args->{'type'};
167          my $mfn = $args->{'id'};          my $id = $args->{'id'};
168    
169          my $uri = "file:///$type/$database/$mfn";          my $uri = "file:///$type/$database/$id";
170          $log->debug("creating $uri");          $log->debug("creating $uri");
171    
172          my $doc = HyperEstraier::Document->new;          my $doc = Search::Estraier::Document->new;
173          $doc->add_attr('@uri', $self->{'iconv'}->convert($uri) );          $doc->add_attr('@uri', $self->convert($uri) );
174    
175          $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );          $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
176    
177          # filter all tags which have type defined          # filter all tags which have type defined
178          my @tags = grep {          my @tags = grep {
179                  defined( $args->{'ds'}->{$_}->{$type} )                  ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
180          } keys %{ $args->{'ds'} };          } keys %{ $args->{'ds'} };
181    
182          $log->debug("tags = ", join(",", @tags));          $log->debug("tags = ", join(",", @tags));
# Line 157  sub add { Line 187  sub add {
187    
188                  my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });                  my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
189    
190                  $log->logconfess("no values for $tag/$type") unless ($vals);                  next if (! $vals);
191    
192                  $vals = $self->{'iconv'}->convert( $vals ) or                  $vals = $self->convert( $vals ) or
193                          $log->die("can't convert '$vals' to UTF-8");                          $log->logdie("can't convert '$vals' to UTF-8");
194    
195                  $doc->add_attr( $tag, $vals );                  $doc->add_attr( $tag, $vals );
196                  $doc->add_hidden_text( $vals );                  $doc->add_hidden_text( $vals );
# Line 168  sub add { Line 198  sub add {
198    
199          my $text = $args->{'text'};          my $text = $args->{'text'};
200          if ( $text ) {          if ( $text ) {
201                  $text = $self->{'iconv'}->convert( $text ) or                  $text = $self->convert( $text ) or
202                          $log->die("can't convert '$text' to UTF-8");                          $log->logdie("can't convert '$text' to UTF-8");
203                  $doc->add_text( $text );                  $doc->add_text( $text );
204          }          }
205    
206          $log->debug("adding ", sub { $doc->dump_draft } );          $log->debug("adding ", sub { $doc->dump_draft } );
207          $self->{'db'}->put_doc($doc) || $log->die("can't add document $uri to index");          $self->{'db'}->put_doc($doc) || $log->warn("can't add document $uri with draft " . $doc->dump_draft . " to node " . $self->{url} . " status: " . $self->{db}->status());
208    
209          return 1;          return 1;
210  }  }
211    
212    #
213    # REST parametars validation data
214    #
215    
216    my $estraier_rest = {
217            master => {
218                    userdel => [ qw/name/ ],
219                    nodelist => [],
220                    nodeadd => [ qw/name label/ ],
221                    nodedel => [ qw/name/ ],
222            },
223            node => {
224                    _set_link => [ qw/url label credit/ ],
225            },
226    };
227    
228    =head2 master
229    
230    Issue administrative commands to C<estmaster> process and receive response
231    as array of lines
232    
233      my $nodelist = $est->master( action => 'nodelist' );
234    
235    =cut
236    
237    sub master {
238            my $self = shift;
239    
240            my $args = {@_};
241            my $log = $self->_get_logger;
242    
243            my $action = $args->{action} || $log->logconfess("no action specified");
244    
245            $log->logdie("action '$action' isn't supported") unless ($estraier_rest->{master}->{$action});
246    
247            $log->debug("master action: $action");
248    
249            return $self->estcall(
250                    validate => 'master',
251                    rest_url => $self->{masterurl} . '/master?action=' . $action ,
252                    action => $action,
253                    %{ $args },
254            );
255    }
256    
257    =head2 add_link
258    
259      $est->add_link(
260            from => 'ps',
261            to => 'webpac2',
262            credit => 10000,
263      );
264    
265    =cut
266    
267    sub add_link {
268            my $self = shift;
269    
270            my $args = {@_};
271            my $log = $self->_get_logger;
272    
273            my @labels = $self->master( action => 'nodelist' );
274    
275            $log->debug("got labels: ", join("|", @labels));
276    
277            @labels = grep(/^$args->{to}\t/, @labels);
278            my $label = shift @labels;
279            (undef,$label) = split(/\t/, $label) if ($label);
280    
281            if (! $label) {
282                    $log->warn("can't find label for $args->{to}, skipping link creaton");
283                    return;
284            }
285    
286            $log->debug("using label $label for $args->{to}");
287    
288            return $self->estcall(
289                    validate => 'node',
290                    action => '_set_link',
291                    rest_url => $self->{masterurl} . '/node/' . $args->{from} . '/_set_link' ,
292                    url => $self->{masterurl} . '/node/' . $args->{to},
293                    label => $label,
294                    credit => $args->{credit},
295            );
296    }
297    
298    =head2 estcall
299    
300    Workhourse which does actual calls to Hyper Estraier
301    
302      $self->estcall(
303            rest_url => '/master?action=' . $action,
304            validate => 'master',
305            # ...
306      );
307    
308    C<rest_url> is relative URL to C<estmaster> and C<validate> is entry into
309    internal hash which will check if all parametars are available before
310    calling function.
311    
312    =cut
313    
314    sub estcall {
315            my $self = shift;
316            my $args = {@_};
317            my $log = $self->_get_logger;
318    
319            $log->debug("estcall: ",Dumper($args));
320    
321            foreach my $p (qw/rest_url validate action/) {
322                    $log->die("ectcall needs $p parametar") unless ($args->{$p});
323            }
324    
325            my $url = $args->{rest_url};
326            my $del = '?';
327            $del = '&' if ($url =~ m#\?#);
328    
329            my $url_args;
330    
331            foreach my $arg (@{ $estraier_rest->{ $args->{validate} }->{ $args->{action} } }) {
332                    $log->logdie("missing parametar $arg for action $args->{action}") unless ($args->{$arg});
333                    $url_args .= $del . $arg . '=' . uri_escape( $args->{$arg} );
334                    $del = '&';
335            }
336    
337            $url .= $url_args if ($url_args);
338    
339            $log->debug("calling $url");
340    
341            my $res = $self->est_ua()->get($url);
342    
343            if ($res->is_success) {
344                    #$log->debug( $res->content );
345                    return split(/\n/, $res->content) if wantarray;
346                    return $res->content || 0E0;
347            } else {
348                    $log->warn("unable to call $url: " . $res->status_line);
349                    return;
350            }
351    
352    }
353    
354    =head2 est_ua
355    
356    This is helper function to create C<LWP::UserAgent> object with Super User
357    priviledges
358    
359      my $ua = $self->est_ua( user => 'admin', passwd => 'admin' );
360    
361    =cut
362    
363                                                
364    
365    sub est_ua {
366            my $self = shift;
367    
368            return $self->{_master_ua} if ($self->{_master_ua});
369    
370            {
371                    package AdminUserAgent;
372                    use base qw/LWP::UserAgent/;
373                    sub new {
374                            my $self = LWP::UserAgent::new(@_);
375                            $self->agent("webpac/$VERSION");
376                            $self;
377                    }
378                    sub get_basic_credentials {
379                            my($self, $realm, $uri) = @_;
380                            return ($self->{user}, $self->{passwd});
381                    }
382                    sub set_basic_credentials {
383                            my ($self, $user, $passwd) = @_;
384                            $self->{user} = $user;
385                            $self->{passwd} = $passwd;
386                    }
387            };
388    
389            $self->{_master_ua} = AdminUserAgent->new( ) || sub {
390                    my $log = $self->_get_logger;
391                    $log->logdie("can't create LWP::UserAgent: $!");
392            };
393    
394            $self->{_master_ua}->set_basic_credentials($self->{user}, $self->{passwd});
395    
396            return $self->{_master_ua};
397    }
398    
399    =head2 convert
400    
401     my $utf8_string = $self->convert('string in codepage');
402    
403    =cut
404    
405    sub convert {
406            my $self = shift;
407    
408            my $text = shift || return;
409            from_to($text, $self->{encoding}, 'UTF-8');
410            return $text;
411    }
412    
413  =head1 AUTHOR  =head1 AUTHOR
414    
415  Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>  Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>

Legend:
Removed from v.75  
changed lines
  Added in v.401

  ViewVC Help
Powered by ViewVC 1.1.26