/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 156 - (show annotations)
Sat Nov 26 14:37:33 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 5107 byte(s)
 r11150@llin:  dpavlin | 2005-11-26 15:39:52 +0100
 0.03 added support for page parametar to search

1 package WebPAC::Search::Estraier;
2
3 use warnings;
4 use strict;
5
6 use HyperEstraier;
7 use Text::Iconv;
8 use Data::Dumper;
9
10 =head1 NAME
11
12 WebPAC::Search::Estraier - search Hyper Estraier full text index
13
14 =head1 VERSION
15
16 Version 0.03
17
18 =cut
19
20 our $VERSION = '0.03';
21
22 =head1 SYNOPSIS
23
24 Search WebPAC data using Hyper Estraier full text index created with
25 L<WebPAC::Output::Estraier>.
26
27 =head1 FUNCTIONS
28
29 =head2 new
30
31 Connect to Hyper Estraier index using HTTP
32
33 my $est = new WebPAC::Search::Estraier(
34 url => 'http://localhost:1978/node/webpac2',
35 user => 'admin',
36 passwd => 'admin',
37 encoding => 'iso-8859-2',
38 log => $Log::Log4perl->log_object,
39 );
40
41 Options are:
42
43 =over 4
44
45 =item url
46
47 URI to C<estmaster> node
48
49 =item user
50
51 C<estmaster> user with read rights
52
53 =item passwd
54
55 password for user
56
57 =item encoding
58
59 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
60 (and it probably is). This encoding will be converted to C<UTF-8> for
61 Hyper Estraier.
62
63 =item log
64
65 L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
66 L<Catalyst> and there is support for it).
67
68 =back
69
70 =cut
71
72 sub new {
73 my $class = shift;
74 my $self = {@_};
75 bless($self, $class);
76
77 my $log = $self->_get_logger;
78
79 foreach my $p (qw/url user passwd/) {
80 $log->logdie("need $p") unless ($self->{$p});
81 }
82
83 $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
84
85 $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
86 $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
87
88 my $encoding = $self->{'encoding'} || 'ISO-8859-2';
89 $log->info("using encoding $encoding");
90
91 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
92 $log->die("can't create conversion from UTF-8 to $encoding");
93
94 $self ? return $self : return undef;
95 }
96
97
98 =head2 search
99
100 Locate items in index
101
102 my @results = $est->search(
103 phrase => 'name of book or novel',
104 add_attr => [
105 "filepath ISTRINC $q",
106 "size NUMGT 100",
107 ],
108 get_attr => qw/PersonalName TitleProper/,
109 order => 'NUMD',
110 max => 100,
111 options => $HyperEstraier::Condition::SURE,
112 page => 42,
113 );
114
115 Options are close match to Hyper Estraier API, except C<get_attr> which defines
116 attributes which will be returned in hash for each record.
117
118 Results are returned as hash array with keys named by attributes
119
120 Pages are numbered C< 1 ... hits/max >.
121
122 =cut
123
124 sub search {
125 my $self = shift;
126
127 my $args = {@_};
128
129 my $log = $self->_get_logger;
130
131 #$log->debug( 'search args: ' . Dumper($args) );
132
133 $self->confess('need db in object') unless ($self->{db});
134 $self->confess('need get_attr') unless ($args->{get_attr});
135
136 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
137
138 my $q = $args->{phrase};
139
140 $log->debug("args: " . Dumper( $args ));
141
142 my $cond = HyperEstraier::Condition->new();
143 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
144 $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
145 map {
146 $cond->add_attr( $self->{iconv}->convert( $_ ) );
147 $log->debug(" + $_");
148 } @{ $args->{add_attr} };
149 };
150
151 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
152 $cond->set_options( $args->{options} ) if ($args->{options});
153 $cond->set_order( $args->{order} ) if ($args->{order});
154
155 my $max = $args->{max} || 7;
156 my $page = $args->{page} || 1;
157
158 $cond->set_max( $page * $max );
159
160 my $result = $self->{db}->search($cond, 0) ||
161 $log->die("can't search for ", sub { Dumper( $args ) });
162
163 my $hits = $result->doc_num;
164 $log->debug("found $hits hits for '$q'");
165
166 my @results;
167
168 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
169
170 #$log->debug("get_doc($i)");
171 my $doc = $result->get_doc( $i );
172 if (! $doc) {
173 $log->warn("can't find result $i");
174 next;
175 }
176
177 my $hash;
178
179 foreach my $attr (@{ $args->{get_attr} }) {
180 my $val = $doc->attr( $attr );
181 #$log->debug("attr $attr = ", $val || 'undef');
182 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
183 }
184
185 if ($hash) {
186 push @results, $hash;
187 }
188
189 }
190
191 # $log->debug("results " . Dumper( \@results ));
192
193 $self->confess("expected to return array") unless (wantarray);
194
195 return @results;
196 }
197
198 =head2 confess
199
200 wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
201 C<< $log->error >> if they exists (like in L<Catalyst>), else plain
202 C<die>.
203
204 =cut
205
206 sub confess {
207 my $self = shift;
208 if (my $log = $self->{'log'}) {
209 if ($log->can('logconfess')) {
210 $log->logconfess(@_);
211 } elsif ($log->can('fatal')) {
212 $log->fatal(@_);
213 die @_;
214 } elsif ($log->can('error')) {
215 $log->error(@_);
216 } else {
217 die @_;
218 }
219 } else {
220 die @_;
221 }
222 }
223
224 =head2 _get_logger
225
226 For compatibility with same method from L<WebPAC::Common>, but without
227 need for it.
228
229 =cut
230
231 sub _get_logger {
232 my $self = shift;
233
234 return $self->{'log'} || die "really need log!";
235 }
236
237 =head1 AUTHOR
238
239 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
240
241 =head1 COPYRIGHT & LICENSE
242
243 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
244
245 This program is free software; you can redistribute it and/or modify it
246 under the same terms as Perl itself.
247
248 =cut
249
250 1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26