/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 177 - (show annotations)
Sun Nov 27 05:02:53 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 5180 byte(s)
 r11190@llin:  dpavlin | 2005-11-27 06:05:22 +0100
 reset pages < 1 to 1

1 package WebPAC::Search::Estraier;
2
3 use warnings;
4 use strict;
5
6 use HyperEstraier;
7 use Text::Iconv;
8 use Data::Dumper;
9
10 =head1 NAME
11
12 WebPAC::Search::Estraier - search Hyper Estraier full text index
13
14 =head1 VERSION
15
16 Version 0.03
17
18 =cut
19
20 our $VERSION = '0.03';
21
22 =head1 SYNOPSIS
23
24 Search WebPAC data using Hyper Estraier full text index created with
25 L<WebPAC::Output::Estraier>.
26
27 =head1 FUNCTIONS
28
29 =head2 new
30
31 Connect to Hyper Estraier index using HTTP
32
33 my $est = new WebPAC::Search::Estraier(
34 url => 'http://localhost:1978/node/webpac2',
35 user => 'admin',
36 passwd => 'admin',
37 encoding => 'iso-8859-2',
38 log => $Log::Log4perl->log_object,
39 );
40
41 Options are:
42
43 =over 4
44
45 =item url
46
47 URI to C<estmaster> node
48
49 =item user
50
51 C<estmaster> user with read rights
52
53 =item passwd
54
55 password for user
56
57 =item encoding
58
59 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
60 (and it probably is). This encoding will be converted to C<UTF-8> for
61 Hyper Estraier.
62
63 =item log
64
65 L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
66 L<Catalyst> and there is support for it).
67
68 =back
69
70 =cut
71
72 sub new {
73 my $class = shift;
74 my $self = {@_};
75 bless($self, $class);
76
77 my $log = $self->_get_logger;
78
79 foreach my $p (qw/url user passwd/) {
80 $log->logdie("need $p") unless ($self->{$p});
81 }
82
83 $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
84
85 $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
86 $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
87
88 my $encoding = $self->{'encoding'} || 'ISO-8859-2';
89 $log->info("using encoding $encoding");
90
91 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
92 $log->die("can't create conversion from UTF-8 to $encoding");
93
94 $self ? return $self : return undef;
95 }
96
97
98 =head2 search
99
100 Locate items in index
101
102 my @results = $est->search(
103 phrase => 'name of book or novel',
104 add_attr => [
105 "filepath ISTRINC $q",
106 "size NUMGT 100",
107 ],
108 get_attr => qw/PersonalName TitleProper/,
109 order => 'NUMD',
110 max => 100,
111 options => $HyperEstraier::Condition::SURE,
112 page => 42,
113 );
114
115 Options are close match to Hyper Estraier API, except C<get_attr> which defines
116 attributes which will be returned in hash for each record.
117
118 Results are returned as hash array with keys named by attributes
119
120 Pages are numbered C< 1 ... hits/max >.
121
122 =cut
123
124 sub search {
125 my $self = shift;
126
127 my $args = {@_};
128
129 my $log = $self->_get_logger;
130
131 #$log->debug( 'search args: ' . Dumper($args) );
132
133 $self->confess('need db in object') unless ($self->{db});
134 $self->confess('need get_attr') unless ($args->{get_attr});
135
136 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
137
138 my $q = $args->{phrase};
139
140 $log->debug("args: " . Dumper( $args ));
141
142 my $cond = HyperEstraier::Condition->new();
143 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
144 $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
145 map {
146 $cond->add_attr( $self->{iconv}->convert( $_ ) );
147 $log->debug(" + $_");
148 } @{ $args->{add_attr} };
149 };
150
151 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
152 $cond->set_options( $args->{options} ) if ($args->{options});
153 $cond->set_order( $args->{order} ) if ($args->{order});
154
155 my $max = $args->{max} || 7;
156 my $page = $args->{page} || 1;
157 if ($page < 1) {
158 $log->warn("page number $page < 1");
159 $page = 1;
160 }
161
162 $cond->set_max( $page * $max );
163
164 my $result = $self->{db}->search($cond, 0) ||
165 $log->die("can't search for ", sub { Dumper( $args ) });
166
167 my $hits = $result->doc_num;
168 $log->debug("found $hits hits for '$q'");
169
170 my @results;
171
172 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
173
174 #$log->debug("get_doc($i)");
175 my $doc = $result->get_doc( $i );
176 if (! $doc) {
177 $log->warn("can't find result $i");
178 next;
179 }
180
181 my $hash;
182
183 foreach my $attr (@{ $args->{get_attr} }) {
184 my $val = $doc->attr( $attr );
185 #$log->debug("attr $attr = ", $val || 'undef');
186 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
187 }
188
189 if ($hash) {
190 push @results, $hash;
191 }
192
193 }
194
195 # $log->debug("results " . Dumper( \@results ));
196
197 $self->confess("expected to return array") unless (wantarray);
198
199 return @results;
200 }
201
202 =head2 confess
203
204 wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
205 C<< $log->error >> if they exists (like in L<Catalyst>), else plain
206 C<die>.
207
208 =cut
209
210 sub confess {
211 my $self = shift;
212 if (my $log = $self->{'log'}) {
213 if ($log->can('logconfess')) {
214 $log->logconfess(@_);
215 } elsif ($log->can('fatal')) {
216 $log->fatal(@_);
217 die @_;
218 } elsif ($log->can('error')) {
219 $log->error(@_);
220 } else {
221 die @_;
222 }
223 } else {
224 die @_;
225 }
226 }
227
228 =head2 _get_logger
229
230 For compatibility with same method from L<WebPAC::Common>, but without
231 need for it.
232
233 =cut
234
235 sub _get_logger {
236 my $self = shift;
237
238 return $self->{'log'} || die "really need log!";
239 }
240
241 =head1 AUTHOR
242
243 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
244
245 =head1 COPYRIGHT & LICENSE
246
247 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
248
249 This program is free software; you can redistribute it and/or modify it
250 under the same terms as Perl itself.
251
252 =cut
253
254 1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26