/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 343 - (show annotations)
Sat Jan 7 01:40:01 2006 UTC (18 years, 3 months ago) by dpavlin
File size: 5402 byte(s)
 r354@llin:  dpavlin | 2006-01-07 00:42:38 +0100
 update to use Search::Estraier [2.09]

1 package WebPAC::Search::Estraier;
2
3 use warnings;
4 use strict;
5
6 use Search::Estraier;
7 use Text::Iconv;
8 use Data::Dumper;
9
10 =head1 NAME
11
12 WebPAC::Search::Estraier - search Hyper Estraier full text index
13
14 =head1 VERSION
15
16 Version 0.06
17
18 =cut
19
20 our $VERSION = '0.06';
21
22 =head1 SYNOPSIS
23
24 Search WebPAC data using Hyper Estraier full text index created with
25 L<WebPAC::Output::Estraier>.
26
27 =head1 FUNCTIONS
28
29 =head2 new
30
31 Connect to Hyper Estraier index using HTTP
32
33 my $est = new WebPAC::Search::Estraier(
34 masterurl => 'http://localhost:1978/',
35 database => 'webpac2',
36 user => 'admin',
37 passwd => 'admin',
38 encoding => 'iso-8859-2',
39 log => $Log::Log4perl->log_object,
40 );
41
42 Options are:
43
44 =over 4
45
46 =item maseterurl
47
48 URI to C<estmaster> node
49
50 =item database
51
52 name of C<estmaster> node
53
54 =item user
55
56 C<estmaster> user with read rights
57
58 =item passwd
59
60 password for user
61
62 =item encoding
63
64 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
65 (and it probably is). This encoding will be converted to C<UTF-8> for
66 Hyper Estraier.
67
68 =item log
69
70 L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
71 L<Catalyst> and there is support for it).
72
73 =back
74
75 =cut
76
77 sub new {
78 my $class = shift;
79 my $self = {@_};
80 bless($self, $class);
81
82 my $log = $self->_get_logger;
83
84 foreach my $p (qw/masterurl user passwd/) {
85 $log->logdie("need $p") unless ($self->{$p});
86 }
87
88 my $url = $self->{masterurl} . '/node/' . $self->{database};
89 $self->{url} = $url;
90
91 $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
92
93 $self->{db} = Search::Estraier::Node->new;
94 $self->{db}->set_url($self->{'url'});
95 $self->{db}->set_auth($self->{'user'}, $self->{'passwd'});
96
97 my $encoding = $self->{'encoding'} || 'ISO-8859-2';
98 $log->info("using encoding $encoding");
99
100 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
101 $log->die("can't create conversion from UTF-8 to $encoding");
102
103 $self ? return $self : return undef;
104 }
105
106
107 =head2 search
108
109 Locate items in index
110
111 my @results = $est->search(
112 phrase => 'name of book or novel',
113 add_attr => [
114 "filepath ISTRINC $q",
115 "size NUMGT 100",
116 ],
117 get_attr => qw/PersonalName TitleProper/,
118 order => 'NUMD',
119 max => 100,
120 options => $HyperEstraier::Condition::SURE,
121 page => 42,
122 depth => 0,
123 );
124
125 Options are close match to Hyper Estraier API, except C<get_attr> which defines
126 attributes which will be returned in hash for each record.
127
128 Results are returned as hash array with keys named by attributes
129
130 Pages are numbered C< 1 ... hits/max >.
131
132 =cut
133
134 sub search {
135 my $self = shift;
136
137 my $args = {@_};
138
139 my $log = $self->_get_logger;
140
141 #$log->debug( 'search args: ' . Dumper($args) );
142
143 $self->confess('need db in object') unless ($self->{db});
144 $self->confess('need get_attr') unless ($args->{get_attr});
145
146 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
147
148 my $q = $args->{phrase};
149
150 $log->debug("args: " . Dumper( $args ));
151
152 my $cond = Search::Estraier::Condition->new();
153 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
154 $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
155 map {
156 $cond->add_attr( $self->{iconv}->convert( $_ ) );
157 $log->debug(" + $_");
158 } @{ $args->{add_attr} };
159 };
160
161 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
162 $cond->set_options( $args->{options} ) if ($args->{options});
163 $cond->set_order( $args->{order} ) if ($args->{order});
164
165 my $max = $args->{max} || 7;
166 my $page = $args->{page} || 1;
167 if ($page < 1) {
168 $log->warn("page number $page < 1");
169 $page = 1;
170 }
171
172 $cond->set_max( $page * $max );
173
174 my $result = $self->{db}->search($cond, ( $args->{depth} || 0 )) ||
175 $log->die("can't search for ", sub { Dumper( $args ) });
176
177 my $hits = $result->doc_num;
178 $log->debug("found $hits hits for '$q'");
179
180 my @results;
181
182 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
183
184 #$log->debug("get_doc($i)");
185 my $doc = $result->get_doc( $i );
186 if (! $doc) {
187 $log->warn("can't find result $i");
188 next;
189 }
190
191 my $hash;
192
193 foreach my $attr (@{ $args->{get_attr} }) {
194 my $val = $doc->attr( $attr );
195 #$log->debug("attr $attr = ", $val || 'undef');
196 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
197 }
198
199 if ($hash) {
200 push @results, $hash;
201 }
202
203 }
204
205 # $log->debug("results " . Dumper( \@results ));
206
207 $self->confess("expected to return array") unless (wantarray);
208
209 return @results;
210 }
211
212 =head2 confess
213
214 wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
215 C<< $log->error >> if they exists (like in L<Catalyst>), else plain
216 C<die>.
217
218 =cut
219
220 sub confess {
221 my $self = shift;
222 if (my $log = $self->{'log'}) {
223 if ($log->can('logconfess')) {
224 $log->logconfess(@_);
225 } elsif ($log->can('fatal')) {
226 $log->fatal(@_);
227 die @_;
228 } elsif ($log->can('error')) {
229 $log->error(@_);
230 } else {
231 die @_;
232 }
233 } else {
234 die @_;
235 }
236 }
237
238 =head2 _get_logger
239
240 For compatibility with same method from L<WebPAC::Common>, but without
241 need for it.
242
243 =cut
244
245 sub _get_logger {
246 my $self = shift;
247
248 return $self->{'log'} || die "really need log!";
249 }
250
251 =head1 AUTHOR
252
253 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
254
255 =head1 COPYRIGHT & LICENSE
256
257 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
258
259 This program is free software; you can redistribute it and/or modify it
260 under the same terms as Perl itself.
261
262 =cut
263
264 1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26