/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 343 - (hide annotations)
Sat Jan 7 01:40:01 2006 UTC (18 years, 4 months ago) by dpavlin
File size: 5402 byte(s)
 r354@llin:  dpavlin | 2006-01-07 00:42:38 +0100
 update to use Search::Estraier [2.09]

1 dpavlin 80 package WebPAC::Search::Estraier;
2    
3     use warnings;
4     use strict;
5    
6 dpavlin 343 use Search::Estraier;
7 dpavlin 80 use Text::Iconv;
8     use Data::Dumper;
9    
10     =head1 NAME
11    
12     WebPAC::Search::Estraier - search Hyper Estraier full text index
13    
14     =head1 VERSION
15    
16 dpavlin 343 Version 0.06
17 dpavlin 80
18     =cut
19    
20 dpavlin 343 our $VERSION = '0.06';
21 dpavlin 80
22     =head1 SYNOPSIS
23    
24     Search WebPAC data using Hyper Estraier full text index created with
25     L<WebPAC::Output::Estraier>.
26    
27     =head1 FUNCTIONS
28    
29     =head2 new
30    
31     Connect to Hyper Estraier index using HTTP
32    
33     my $est = new WebPAC::Search::Estraier(
34 dpavlin 211 masterurl => 'http://localhost:1978/',
35     database => 'webpac2',
36 dpavlin 80 user => 'admin',
37     passwd => 'admin',
38     encoding => 'iso-8859-2',
39 dpavlin 81 log => $Log::Log4perl->log_object,
40 dpavlin 80 );
41    
42     Options are:
43    
44     =over 4
45    
46 dpavlin 211 =item maseterurl
47 dpavlin 80
48     URI to C<estmaster> node
49    
50 dpavlin 211 =item database
51    
52     name of C<estmaster> node
53    
54 dpavlin 80 =item user
55    
56     C<estmaster> user with read rights
57    
58     =item passwd
59    
60     password for user
61    
62     =item encoding
63    
64     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
65     (and it probably is). This encoding will be converted to C<UTF-8> for
66     Hyper Estraier.
67    
68 dpavlin 81 =item log
69    
70     L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
71     L<Catalyst> and there is support for it).
72    
73 dpavlin 80 =back
74    
75     =cut
76    
77     sub new {
78     my $class = shift;
79     my $self = {@_};
80     bless($self, $class);
81    
82     my $log = $self->_get_logger;
83    
84 dpavlin 211 foreach my $p (qw/masterurl user passwd/) {
85 dpavlin 80 $log->logdie("need $p") unless ($self->{$p});
86     }
87    
88 dpavlin 211 my $url = $self->{masterurl} . '/node/' . $self->{database};
89     $self->{url} = $url;
90    
91 dpavlin 80 $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
92    
93 dpavlin 343 $self->{db} = Search::Estraier::Node->new;
94     $self->{db}->set_url($self->{'url'});
95     $self->{db}->set_auth($self->{'user'}, $self->{'passwd'});
96 dpavlin 80
97     my $encoding = $self->{'encoding'} || 'ISO-8859-2';
98     $log->info("using encoding $encoding");
99    
100 dpavlin 81 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
101     $log->die("can't create conversion from UTF-8 to $encoding");
102 dpavlin 80
103     $self ? return $self : return undef;
104     }
105    
106    
107     =head2 search
108    
109     Locate items in index
110    
111 dpavlin 81 my @results = $est->search(
112 dpavlin 122 phrase => 'name of book or novel',
113     add_attr => [
114     "filepath ISTRINC $q",
115     "size NUMGT 100",
116     ],
117     get_attr => qw/PersonalName TitleProper/,
118     order => 'NUMD',
119     max => 100,
120     options => $HyperEstraier::Condition::SURE,
121 dpavlin 156 page => 42,
122 dpavlin 216 depth => 0,
123 dpavlin 80 );
124    
125 dpavlin 122 Options are close match to Hyper Estraier API, except C<get_attr> which defines
126     attributes which will be returned in hash for each record.
127    
128 dpavlin 81 Results are returned as hash array with keys named by attributes
129    
130 dpavlin 156 Pages are numbered C< 1 ... hits/max >.
131    
132 dpavlin 80 =cut
133    
134     sub search {
135     my $self = shift;
136    
137     my $args = {@_};
138    
139     my $log = $self->_get_logger;
140    
141 dpavlin 152 #$log->debug( 'search args: ' . Dumper($args) );
142    
143 dpavlin 122 $self->confess('need db in object') unless ($self->{db});
144     $self->confess('need get_attr') unless ($args->{get_attr});
145 dpavlin 81
146 dpavlin 122 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
147 dpavlin 80
148 dpavlin 122 my $q = $args->{phrase};
149 dpavlin 86
150 dpavlin 123 $log->debug("args: " . Dumper( $args ));
151    
152 dpavlin 343 my $cond = Search::Estraier::Condition->new();
153 dpavlin 122 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
154     $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
155     map {
156 dpavlin 123 $cond->add_attr( $self->{iconv}->convert( $_ ) );
157 dpavlin 122 $log->debug(" + $_");
158 dpavlin 123 } @{ $args->{add_attr} };
159     };
160 dpavlin 80
161 dpavlin 122 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
162     $cond->set_options( $args->{options} ) if ($args->{options});
163     $cond->set_order( $args->{order} ) if ($args->{order});
164 dpavlin 80
165 dpavlin 156 my $max = $args->{max} || 7;
166     my $page = $args->{page} || 1;
167 dpavlin 177 if ($page < 1) {
168     $log->warn("page number $page < 1");
169     $page = 1;
170     }
171 dpavlin 156
172     $cond->set_max( $page * $max );
173    
174 dpavlin 216 my $result = $self->{db}->search($cond, ( $args->{depth} || 0 )) ||
175 dpavlin 80 $log->die("can't search for ", sub { Dumper( $args ) });
176    
177     my $hits = $result->doc_num;
178 dpavlin 86 $log->debug("found $hits hits for '$q'");
179 dpavlin 80
180     my @results;
181    
182 dpavlin 156 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
183 dpavlin 80
184 dpavlin 86 #$log->debug("get_doc($i)");
185 dpavlin 80 my $doc = $result->get_doc( $i );
186     if (! $doc) {
187     $log->warn("can't find result $i");
188     next;
189     }
190    
191     my $hash;
192    
193 dpavlin 122 foreach my $attr (@{ $args->{get_attr} }) {
194 dpavlin 80 my $val = $doc->attr( $attr );
195 dpavlin 86 #$log->debug("attr $attr = ", $val || 'undef');
196 dpavlin 122 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
197 dpavlin 80 }
198    
199     if ($hash) {
200     push @results, $hash;
201     }
202    
203     }
204    
205 dpavlin 103 # $log->debug("results " . Dumper( \@results ));
206 dpavlin 80
207 dpavlin 87 $self->confess("expected to return array") unless (wantarray);
208 dpavlin 80
209     return @results;
210     }
211    
212     =head2 confess
213    
214     wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
215     C<< $log->error >> if they exists (like in L<Catalyst>), else plain
216     C<die>.
217    
218     =cut
219    
220     sub confess {
221     my $self = shift;
222     if (my $log = $self->{'log'}) {
223 dpavlin 87 if ($log->can('logconfess')) {
224     $log->logconfess(@_);
225 dpavlin 80 } elsif ($log->can('fatal')) {
226     $log->fatal(@_);
227 dpavlin 87 die @_;
228 dpavlin 80 } elsif ($log->can('error')) {
229     $log->error(@_);
230     } else {
231     die @_;
232     }
233     } else {
234     die @_;
235     }
236     }
237    
238     =head2 _get_logger
239    
240     For compatibility with same method from L<WebPAC::Common>, but without
241     need for it.
242    
243     =cut
244    
245     sub _get_logger {
246     my $self = shift;
247    
248     return $self->{'log'} || die "really need log!";
249     }
250    
251     =head1 AUTHOR
252    
253     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
254    
255     =head1 COPYRIGHT & LICENSE
256    
257     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
258    
259     This program is free software; you can redistribute it and/or modify it
260     under the same terms as Perl itself.
261    
262     =cut
263    
264     1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26