/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 887 - (show annotations)
Mon Sep 3 15:26:46 2007 UTC (16 years, 7 months ago) by dpavlin
File size: 5478 byte(s)
 r1322@llin:  dpavlin | 2007-09-03 16:44:01 +0200
 - replace Data::Dumper usage with Data::Dump
 - rewrite WebPAC::Store to use Class::Accessor

1 package WebPAC::Search::Estraier;
2
3 use warnings;
4 use strict;
5
6 use Search::Estraier;
7 use Encode qw/from_to/;
8 use Data::Dump qw/dump/;
9
10 =head1 NAME
11
12 WebPAC::Search::Estraier - search Hyper Estraier full text index
13
14 =head1 VERSION
15
16 Version 0.07
17
18 =cut
19
20 our $VERSION = '0.07';
21
22 =head1 SYNOPSIS
23
24 Search WebPAC data using Hyper Estraier full text index created with
25 L<WebPAC::Output::Estraier>.
26
27 =head1 FUNCTIONS
28
29 =head2 new
30
31 Connect to Hyper Estraier index using HTTP
32
33 my $est = new WebPAC::Search::Estraier(
34 masterurl => 'http://localhost:1978/',
35 database => 'webpac2',
36 user => 'admin',
37 passwd => 'admin',
38 encoding => 'iso-8859-2',
39 log => $Log::Log4perl->log_object,
40 );
41
42 Options are:
43
44 =over 4
45
46 =item maseterurl
47
48 URI to C<estmaster> node
49
50 =item database
51
52 name of C<estmaster> node
53
54 =item user
55
56 C<estmaster> user with read rights
57
58 =item passwd
59
60 password for user
61
62 =item encoding
63
64 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
65 (and it probably is). This encoding will be converted to C<UTF-8> for
66 Hyper Estraier.
67
68 =item log
69
70 L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
71 L<Catalyst> and there is support for it).
72
73 =back
74
75 =cut
76
77 sub new {
78 my $class = shift;
79 my $self = {@_};
80 bless($self, $class);
81
82 my $log = $self->_get_logger;
83
84 foreach my $p (qw/masterurl user passwd/) {
85 $log->logdie("need $p") unless ($self->{$p});
86 }
87
88 my $url = $self->{masterurl} . '/node/' . $self->{database};
89 $self->{url} = $url;
90
91 $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
92
93 $self->{db} = Search::Estraier::Node->new;
94 $self->{db}->set_url($self->{'url'});
95 $self->{db}->set_auth($self->{'user'}, $self->{'passwd'});
96
97 $self->{'encoding'} ||= 'ISO-8859-2';
98 $log->info("using encoding ",$self->{encoding});
99
100 $self ? return $self : return undef;
101 }
102
103
104 =head2 search
105
106 Locate items in index
107
108 my @results = $est->search(
109 phrase => 'name of book or novel',
110 add_attr => [
111 "filepath ISTRINC $q",
112 "size NUMGT 100",
113 ],
114 get_attr => qw/PersonalName TitleProper/,
115 order => 'NUMD',
116 max => 100,
117 options => $HyperEstraier::Condition::SURE,
118 page => 42,
119 depth => 0,
120 );
121
122 Options are close match to Hyper Estraier API, except C<get_attr> which defines
123 attributes which will be returned in hash for each record.
124
125 Results are returned as hash array with keys named by attributes
126
127 Pages are numbered C< 1 ... hits/max >.
128
129 =cut
130
131 sub search {
132 my $self = shift;
133
134 my $args = {@_};
135
136 my $log = $self->_get_logger;
137
138 #$log->debug( 'search args: ', dump($args) );
139
140 $self->confess('need db in object') unless ($self->{db});
141 $self->confess('need get_attr') unless ($args->{get_attr});
142
143 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
144
145 my $q = $args->{phrase};
146
147 $log->debug("args: ", dump( $args ));
148
149 my $cond = Search::Estraier::Condition->new();
150 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
151 $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
152 map {
153 $cond->add_attr( $self->convert( $_ ) );
154 $log->debug(" + $_");
155 } @{ $args->{add_attr} };
156 };
157
158 $cond->set_phrase( $self->convert($q) ) if ($q);
159 $cond->set_options( $args->{options} ) if ($args->{options});
160 $cond->set_order( $args->{order} ) if ($args->{order});
161
162 my $max = $args->{max} || 7;
163 my $page = $args->{page} || 1;
164 if ($page < 1) {
165 $log->warn("page number $page < 1");
166 $page = 1;
167 }
168
169 $cond->set_max( $page * $max );
170
171 my $result = $self->{db}->search($cond, ( $args->{depth} || 0 )) ||
172 $log->logdie("can't search for ", sub { dump( $args ) });
173
174 my $hits = $result->doc_num;
175 $log->debug("found $hits hits for '$q'");
176
177 my @results;
178
179 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
180
181 #$log->debug("get_doc($i)");
182 my $doc = $result->get_doc( $i );
183 if (! $doc) {
184 $log->warn("can't find result $i");
185 next;
186 }
187
188 my $hash;
189
190 foreach my $attr (@{ $args->{get_attr} }) {
191 my $val = $doc->attr( $attr );
192 #$log->debug("attr $attr = ", $val || 'undef');
193 $hash->{$attr} = $self->convert( $val ) if (defined($val));
194 }
195
196 if ($hash) {
197 push @results, $hash;
198 }
199
200 }
201
202 # $log->debug("results " . dump( \@results ));
203
204 $self->confess("expected to return array") unless (wantarray);
205
206 return @results;
207 }
208
209 =head2 confess
210
211 wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
212 C<< $log->error >> if they exists (like in L<Catalyst>), else plain
213 C<die>.
214
215 =cut
216
217 sub confess {
218 my $self = shift;
219 if (my $log = $self->{'log'}) {
220 if ($log->can('logconfess')) {
221 $log->logconfess(@_);
222 } elsif ($log->can('fatal')) {
223 $log->fatal(@_);
224 die @_;
225 } elsif ($log->can('error')) {
226 $log->error(@_);
227 } else {
228 die @_;
229 }
230 } else {
231 die @_;
232 }
233 }
234
235 =head2 convert
236
237 convert internal encoding to UTF-8
238
239 my $utf8 = $self->convert( $text );
240
241 =cut
242
243 sub convert {
244 my $self = shift;
245
246 my $text = shift || return;
247
248 from_to($text, $self->{encoding}, 'UTF-8');
249 return $text;
250 }
251
252
253 =head2 _get_logger
254
255 For compatibility with same method from L<WebPAC::Common>, but without
256 need for it.
257
258 =cut
259
260 sub _get_logger {
261 my $self = shift;
262
263 return $self->{'log'} || die "really need log!";
264 }
265
266 =head1 AUTHOR
267
268 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
269
270 =head1 COPYRIGHT & LICENSE
271
272 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
273
274 This program is free software; you can redistribute it and/or modify it
275 under the same terms as Perl itself.
276
277 =cut
278
279 1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26