/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 212 - (hide annotations)
Mon Dec 5 17:47:16 2005 UTC (18 years, 4 months ago) by dpavlin
File size: 5339 byte(s)
 r11525@llin:  dpavlin | 2005-12-05 01:45:38 +0100
 minor tweaks, tests now pass

1 dpavlin 80 package WebPAC::Search::Estraier;
2    
3     use warnings;
4     use strict;
5    
6     use HyperEstraier;
7     use Text::Iconv;
8     use Data::Dumper;
9    
10     =head1 NAME
11    
12     WebPAC::Search::Estraier - search Hyper Estraier full text index
13    
14     =head1 VERSION
15    
16 dpavlin 211 Version 0.04
17 dpavlin 80
18     =cut
19    
20 dpavlin 211 our $VERSION = '0.04';
21 dpavlin 80
22     =head1 SYNOPSIS
23    
24     Search WebPAC data using Hyper Estraier full text index created with
25     L<WebPAC::Output::Estraier>.
26    
27     =head1 FUNCTIONS
28    
29     =head2 new
30    
31     Connect to Hyper Estraier index using HTTP
32    
33     my $est = new WebPAC::Search::Estraier(
34 dpavlin 211 masterurl => 'http://localhost:1978/',
35     database => 'webpac2',
36 dpavlin 80 user => 'admin',
37     passwd => 'admin',
38     encoding => 'iso-8859-2',
39 dpavlin 81 log => $Log::Log4perl->log_object,
40 dpavlin 80 );
41    
42     Options are:
43    
44     =over 4
45    
46 dpavlin 211 =item maseterurl
47 dpavlin 80
48     URI to C<estmaster> node
49    
50 dpavlin 211 =item database
51    
52     name of C<estmaster> node
53    
54 dpavlin 80 =item user
55    
56     C<estmaster> user with read rights
57    
58     =item passwd
59    
60     password for user
61    
62     =item encoding
63    
64     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
65     (and it probably is). This encoding will be converted to C<UTF-8> for
66     Hyper Estraier.
67    
68 dpavlin 81 =item log
69    
70     L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
71     L<Catalyst> and there is support for it).
72    
73 dpavlin 80 =back
74    
75     =cut
76    
77     sub new {
78     my $class = shift;
79     my $self = {@_};
80     bless($self, $class);
81    
82     my $log = $self->_get_logger;
83    
84 dpavlin 211 foreach my $p (qw/masterurl user passwd/) {
85 dpavlin 80 $log->logdie("need $p") unless ($self->{$p});
86     }
87    
88 dpavlin 211 my $url = $self->{masterurl} . '/node/' . $self->{database};
89     $self->{url} = $url;
90    
91 dpavlin 80 $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
92    
93     $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
94     $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
95    
96     my $encoding = $self->{'encoding'} || 'ISO-8859-2';
97     $log->info("using encoding $encoding");
98    
99 dpavlin 81 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
100     $log->die("can't create conversion from UTF-8 to $encoding");
101 dpavlin 80
102     $self ? return $self : return undef;
103     }
104    
105    
106     =head2 search
107    
108     Locate items in index
109    
110 dpavlin 81 my @results = $est->search(
111 dpavlin 122 phrase => 'name of book or novel',
112     add_attr => [
113     "filepath ISTRINC $q",
114     "size NUMGT 100",
115     ],
116     get_attr => qw/PersonalName TitleProper/,
117     order => 'NUMD',
118     max => 100,
119     options => $HyperEstraier::Condition::SURE,
120 dpavlin 156 page => 42,
121 dpavlin 80 );
122    
123 dpavlin 122 Options are close match to Hyper Estraier API, except C<get_attr> which defines
124     attributes which will be returned in hash for each record.
125    
126 dpavlin 81 Results are returned as hash array with keys named by attributes
127    
128 dpavlin 156 Pages are numbered C< 1 ... hits/max >.
129    
130 dpavlin 80 =cut
131    
132     sub search {
133     my $self = shift;
134    
135     my $args = {@_};
136    
137     my $log = $self->_get_logger;
138    
139 dpavlin 152 #$log->debug( 'search args: ' . Dumper($args) );
140    
141 dpavlin 122 $self->confess('need db in object') unless ($self->{db});
142     $self->confess('need get_attr') unless ($args->{get_attr});
143 dpavlin 81
144 dpavlin 122 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
145 dpavlin 80
146 dpavlin 122 my $q = $args->{phrase};
147 dpavlin 86
148 dpavlin 123 $log->debug("args: " . Dumper( $args ));
149    
150 dpavlin 80 my $cond = HyperEstraier::Condition->new();
151 dpavlin 122 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
152     $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
153     map {
154 dpavlin 123 $cond->add_attr( $self->{iconv}->convert( $_ ) );
155 dpavlin 122 $log->debug(" + $_");
156 dpavlin 123 } @{ $args->{add_attr} };
157     };
158 dpavlin 80
159 dpavlin 122 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
160     $cond->set_options( $args->{options} ) if ($args->{options});
161     $cond->set_order( $args->{order} ) if ($args->{order});
162 dpavlin 80
163 dpavlin 156 my $max = $args->{max} || 7;
164     my $page = $args->{page} || 1;
165 dpavlin 177 if ($page < 1) {
166     $log->warn("page number $page < 1");
167     $page = 1;
168     }
169 dpavlin 156
170     $cond->set_max( $page * $max );
171    
172 dpavlin 122 my $result = $self->{db}->search($cond, 0) ||
173 dpavlin 80 $log->die("can't search for ", sub { Dumper( $args ) });
174    
175     my $hits = $result->doc_num;
176 dpavlin 86 $log->debug("found $hits hits for '$q'");
177 dpavlin 80
178     my @results;
179    
180 dpavlin 156 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
181 dpavlin 80
182 dpavlin 86 #$log->debug("get_doc($i)");
183 dpavlin 80 my $doc = $result->get_doc( $i );
184     if (! $doc) {
185     $log->warn("can't find result $i");
186     next;
187     }
188    
189     my $hash;
190    
191 dpavlin 122 foreach my $attr (@{ $args->{get_attr} }) {
192 dpavlin 80 my $val = $doc->attr( $attr );
193 dpavlin 86 #$log->debug("attr $attr = ", $val || 'undef');
194 dpavlin 122 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
195 dpavlin 80 }
196    
197     if ($hash) {
198     push @results, $hash;
199     }
200    
201     }
202    
203 dpavlin 103 # $log->debug("results " . Dumper( \@results ));
204 dpavlin 80
205 dpavlin 87 $self->confess("expected to return array") unless (wantarray);
206 dpavlin 80
207     return @results;
208     }
209    
210     =head2 confess
211    
212     wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
213     C<< $log->error >> if they exists (like in L<Catalyst>), else plain
214     C<die>.
215    
216     =cut
217    
218     sub confess {
219     my $self = shift;
220     if (my $log = $self->{'log'}) {
221 dpavlin 87 if ($log->can('logconfess')) {
222     $log->logconfess(@_);
223 dpavlin 80 } elsif ($log->can('fatal')) {
224     $log->fatal(@_);
225 dpavlin 87 die @_;
226 dpavlin 80 } elsif ($log->can('error')) {
227     $log->error(@_);
228     } else {
229     die @_;
230     }
231     } else {
232     die @_;
233     }
234     }
235    
236     =head2 _get_logger
237    
238     For compatibility with same method from L<WebPAC::Common>, but without
239     need for it.
240    
241     =cut
242    
243     sub _get_logger {
244     my $self = shift;
245    
246     return $self->{'log'} || die "really need log!";
247     }
248    
249     =head1 AUTHOR
250    
251     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
252    
253     =head1 COPYRIGHT & LICENSE
254    
255     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
256    
257     This program is free software; you can redistribute it and/or modify it
258     under the same terms as Perl itself.
259    
260     =cut
261    
262     1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26