/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 216 - (show annotations)
Mon Dec 5 17:47:45 2005 UTC (18 years, 4 months ago) by dpavlin
File size: 5374 byte(s)
 r11535@llin:  dpavlin | 2005-12-05 15:01:53 +0100
 added depth [0.05]

1 package WebPAC::Search::Estraier;
2
3 use warnings;
4 use strict;
5
6 use HyperEstraier;
7 use Text::Iconv;
8 use Data::Dumper;
9
10 =head1 NAME
11
12 WebPAC::Search::Estraier - search Hyper Estraier full text index
13
14 =head1 VERSION
15
16 Version 0.05
17
18 =cut
19
20 our $VERSION = '0.05';
21
22 =head1 SYNOPSIS
23
24 Search WebPAC data using Hyper Estraier full text index created with
25 L<WebPAC::Output::Estraier>.
26
27 =head1 FUNCTIONS
28
29 =head2 new
30
31 Connect to Hyper Estraier index using HTTP
32
33 my $est = new WebPAC::Search::Estraier(
34 masterurl => 'http://localhost:1978/',
35 database => 'webpac2',
36 user => 'admin',
37 passwd => 'admin',
38 encoding => 'iso-8859-2',
39 log => $Log::Log4perl->log_object,
40 );
41
42 Options are:
43
44 =over 4
45
46 =item maseterurl
47
48 URI to C<estmaster> node
49
50 =item database
51
52 name of C<estmaster> node
53
54 =item user
55
56 C<estmaster> user with read rights
57
58 =item passwd
59
60 password for user
61
62 =item encoding
63
64 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
65 (and it probably is). This encoding will be converted to C<UTF-8> for
66 Hyper Estraier.
67
68 =item log
69
70 L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
71 L<Catalyst> and there is support for it).
72
73 =back
74
75 =cut
76
77 sub new {
78 my $class = shift;
79 my $self = {@_};
80 bless($self, $class);
81
82 my $log = $self->_get_logger;
83
84 foreach my $p (qw/masterurl user passwd/) {
85 $log->logdie("need $p") unless ($self->{$p});
86 }
87
88 my $url = $self->{masterurl} . '/node/' . $self->{database};
89 $self->{url} = $url;
90
91 $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
92
93 $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
94 $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
95
96 my $encoding = $self->{'encoding'} || 'ISO-8859-2';
97 $log->info("using encoding $encoding");
98
99 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
100 $log->die("can't create conversion from UTF-8 to $encoding");
101
102 $self ? return $self : return undef;
103 }
104
105
106 =head2 search
107
108 Locate items in index
109
110 my @results = $est->search(
111 phrase => 'name of book or novel',
112 add_attr => [
113 "filepath ISTRINC $q",
114 "size NUMGT 100",
115 ],
116 get_attr => qw/PersonalName TitleProper/,
117 order => 'NUMD',
118 max => 100,
119 options => $HyperEstraier::Condition::SURE,
120 page => 42,
121 depth => 0,
122 );
123
124 Options are close match to Hyper Estraier API, except C<get_attr> which defines
125 attributes which will be returned in hash for each record.
126
127 Results are returned as hash array with keys named by attributes
128
129 Pages are numbered C< 1 ... hits/max >.
130
131 =cut
132
133 sub search {
134 my $self = shift;
135
136 my $args = {@_};
137
138 my $log = $self->_get_logger;
139
140 #$log->debug( 'search args: ' . Dumper($args) );
141
142 $self->confess('need db in object') unless ($self->{db});
143 $self->confess('need get_attr') unless ($args->{get_attr});
144
145 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
146
147 my $q = $args->{phrase};
148
149 $log->debug("args: " . Dumper( $args ));
150
151 my $cond = HyperEstraier::Condition->new();
152 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
153 $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
154 map {
155 $cond->add_attr( $self->{iconv}->convert( $_ ) );
156 $log->debug(" + $_");
157 } @{ $args->{add_attr} };
158 };
159
160 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
161 $cond->set_options( $args->{options} ) if ($args->{options});
162 $cond->set_order( $args->{order} ) if ($args->{order});
163
164 my $max = $args->{max} || 7;
165 my $page = $args->{page} || 1;
166 if ($page < 1) {
167 $log->warn("page number $page < 1");
168 $page = 1;
169 }
170
171 $cond->set_max( $page * $max );
172
173 my $result = $self->{db}->search($cond, ( $args->{depth} || 0 )) ||
174 $log->die("can't search for ", sub { Dumper( $args ) });
175
176 my $hits = $result->doc_num;
177 $log->debug("found $hits hits for '$q'");
178
179 my @results;
180
181 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
182
183 #$log->debug("get_doc($i)");
184 my $doc = $result->get_doc( $i );
185 if (! $doc) {
186 $log->warn("can't find result $i");
187 next;
188 }
189
190 my $hash;
191
192 foreach my $attr (@{ $args->{get_attr} }) {
193 my $val = $doc->attr( $attr );
194 #$log->debug("attr $attr = ", $val || 'undef');
195 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
196 }
197
198 if ($hash) {
199 push @results, $hash;
200 }
201
202 }
203
204 # $log->debug("results " . Dumper( \@results ));
205
206 $self->confess("expected to return array") unless (wantarray);
207
208 return @results;
209 }
210
211 =head2 confess
212
213 wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
214 C<< $log->error >> if they exists (like in L<Catalyst>), else plain
215 C<die>.
216
217 =cut
218
219 sub confess {
220 my $self = shift;
221 if (my $log = $self->{'log'}) {
222 if ($log->can('logconfess')) {
223 $log->logconfess(@_);
224 } elsif ($log->can('fatal')) {
225 $log->fatal(@_);
226 die @_;
227 } elsif ($log->can('error')) {
228 $log->error(@_);
229 } else {
230 die @_;
231 }
232 } else {
233 die @_;
234 }
235 }
236
237 =head2 _get_logger
238
239 For compatibility with same method from L<WebPAC::Common>, but without
240 need for it.
241
242 =cut
243
244 sub _get_logger {
245 my $self = shift;
246
247 return $self->{'log'} || die "really need log!";
248 }
249
250 =head1 AUTHOR
251
252 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
253
254 =head1 COPYRIGHT & LICENSE
255
256 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
257
258 This program is free software; you can redistribute it and/or modify it
259 under the same terms as Perl itself.
260
261 =cut
262
263 1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26