/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 212 - (show annotations)
Mon Dec 5 17:47:16 2005 UTC (18 years, 4 months ago) by dpavlin
File size: 5339 byte(s)
 r11525@llin:  dpavlin | 2005-12-05 01:45:38 +0100
 minor tweaks, tests now pass

1 package WebPAC::Search::Estraier;
2
3 use warnings;
4 use strict;
5
6 use HyperEstraier;
7 use Text::Iconv;
8 use Data::Dumper;
9
10 =head1 NAME
11
12 WebPAC::Search::Estraier - search Hyper Estraier full text index
13
14 =head1 VERSION
15
16 Version 0.04
17
18 =cut
19
20 our $VERSION = '0.04';
21
22 =head1 SYNOPSIS
23
24 Search WebPAC data using Hyper Estraier full text index created with
25 L<WebPAC::Output::Estraier>.
26
27 =head1 FUNCTIONS
28
29 =head2 new
30
31 Connect to Hyper Estraier index using HTTP
32
33 my $est = new WebPAC::Search::Estraier(
34 masterurl => 'http://localhost:1978/',
35 database => 'webpac2',
36 user => 'admin',
37 passwd => 'admin',
38 encoding => 'iso-8859-2',
39 log => $Log::Log4perl->log_object,
40 );
41
42 Options are:
43
44 =over 4
45
46 =item maseterurl
47
48 URI to C<estmaster> node
49
50 =item database
51
52 name of C<estmaster> node
53
54 =item user
55
56 C<estmaster> user with read rights
57
58 =item passwd
59
60 password for user
61
62 =item encoding
63
64 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
65 (and it probably is). This encoding will be converted to C<UTF-8> for
66 Hyper Estraier.
67
68 =item log
69
70 L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
71 L<Catalyst> and there is support for it).
72
73 =back
74
75 =cut
76
77 sub new {
78 my $class = shift;
79 my $self = {@_};
80 bless($self, $class);
81
82 my $log = $self->_get_logger;
83
84 foreach my $p (qw/masterurl user passwd/) {
85 $log->logdie("need $p") unless ($self->{$p});
86 }
87
88 my $url = $self->{masterurl} . '/node/' . $self->{database};
89 $self->{url} = $url;
90
91 $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
92
93 $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
94 $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
95
96 my $encoding = $self->{'encoding'} || 'ISO-8859-2';
97 $log->info("using encoding $encoding");
98
99 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
100 $log->die("can't create conversion from UTF-8 to $encoding");
101
102 $self ? return $self : return undef;
103 }
104
105
106 =head2 search
107
108 Locate items in index
109
110 my @results = $est->search(
111 phrase => 'name of book or novel',
112 add_attr => [
113 "filepath ISTRINC $q",
114 "size NUMGT 100",
115 ],
116 get_attr => qw/PersonalName TitleProper/,
117 order => 'NUMD',
118 max => 100,
119 options => $HyperEstraier::Condition::SURE,
120 page => 42,
121 );
122
123 Options are close match to Hyper Estraier API, except C<get_attr> which defines
124 attributes which will be returned in hash for each record.
125
126 Results are returned as hash array with keys named by attributes
127
128 Pages are numbered C< 1 ... hits/max >.
129
130 =cut
131
132 sub search {
133 my $self = shift;
134
135 my $args = {@_};
136
137 my $log = $self->_get_logger;
138
139 #$log->debug( 'search args: ' . Dumper($args) );
140
141 $self->confess('need db in object') unless ($self->{db});
142 $self->confess('need get_attr') unless ($args->{get_attr});
143
144 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
145
146 my $q = $args->{phrase};
147
148 $log->debug("args: " . Dumper( $args ));
149
150 my $cond = HyperEstraier::Condition->new();
151 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
152 $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
153 map {
154 $cond->add_attr( $self->{iconv}->convert( $_ ) );
155 $log->debug(" + $_");
156 } @{ $args->{add_attr} };
157 };
158
159 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
160 $cond->set_options( $args->{options} ) if ($args->{options});
161 $cond->set_order( $args->{order} ) if ($args->{order});
162
163 my $max = $args->{max} || 7;
164 my $page = $args->{page} || 1;
165 if ($page < 1) {
166 $log->warn("page number $page < 1");
167 $page = 1;
168 }
169
170 $cond->set_max( $page * $max );
171
172 my $result = $self->{db}->search($cond, 0) ||
173 $log->die("can't search for ", sub { Dumper( $args ) });
174
175 my $hits = $result->doc_num;
176 $log->debug("found $hits hits for '$q'");
177
178 my @results;
179
180 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
181
182 #$log->debug("get_doc($i)");
183 my $doc = $result->get_doc( $i );
184 if (! $doc) {
185 $log->warn("can't find result $i");
186 next;
187 }
188
189 my $hash;
190
191 foreach my $attr (@{ $args->{get_attr} }) {
192 my $val = $doc->attr( $attr );
193 #$log->debug("attr $attr = ", $val || 'undef');
194 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
195 }
196
197 if ($hash) {
198 push @results, $hash;
199 }
200
201 }
202
203 # $log->debug("results " . Dumper( \@results ));
204
205 $self->confess("expected to return array") unless (wantarray);
206
207 return @results;
208 }
209
210 =head2 confess
211
212 wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
213 C<< $log->error >> if they exists (like in L<Catalyst>), else plain
214 C<die>.
215
216 =cut
217
218 sub confess {
219 my $self = shift;
220 if (my $log = $self->{'log'}) {
221 if ($log->can('logconfess')) {
222 $log->logconfess(@_);
223 } elsif ($log->can('fatal')) {
224 $log->fatal(@_);
225 die @_;
226 } elsif ($log->can('error')) {
227 $log->error(@_);
228 } else {
229 die @_;
230 }
231 } else {
232 die @_;
233 }
234 }
235
236 =head2 _get_logger
237
238 For compatibility with same method from L<WebPAC::Common>, but without
239 need for it.
240
241 =cut
242
243 sub _get_logger {
244 my $self = shift;
245
246 return $self->{'log'} || die "really need log!";
247 }
248
249 =head1 AUTHOR
250
251 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
252
253 =head1 COPYRIGHT & LICENSE
254
255 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
256
257 This program is free software; you can redistribute it and/or modify it
258 under the same terms as Perl itself.
259
260 =cut
261
262 1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26