/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 216 - (hide annotations)
Mon Dec 5 17:47:45 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 5374 byte(s)
 r11535@llin:  dpavlin | 2005-12-05 15:01:53 +0100
 added depth [0.05]

1 dpavlin 80 package WebPAC::Search::Estraier;
2    
3     use warnings;
4     use strict;
5    
6     use HyperEstraier;
7     use Text::Iconv;
8     use Data::Dumper;
9    
10     =head1 NAME
11    
12     WebPAC::Search::Estraier - search Hyper Estraier full text index
13    
14     =head1 VERSION
15    
16 dpavlin 216 Version 0.05
17 dpavlin 80
18     =cut
19    
20 dpavlin 216 our $VERSION = '0.05';
21 dpavlin 80
22     =head1 SYNOPSIS
23    
24     Search WebPAC data using Hyper Estraier full text index created with
25     L<WebPAC::Output::Estraier>.
26    
27     =head1 FUNCTIONS
28    
29     =head2 new
30    
31     Connect to Hyper Estraier index using HTTP
32    
33     my $est = new WebPAC::Search::Estraier(
34 dpavlin 211 masterurl => 'http://localhost:1978/',
35     database => 'webpac2',
36 dpavlin 80 user => 'admin',
37     passwd => 'admin',
38     encoding => 'iso-8859-2',
39 dpavlin 81 log => $Log::Log4perl->log_object,
40 dpavlin 80 );
41    
42     Options are:
43    
44     =over 4
45    
46 dpavlin 211 =item maseterurl
47 dpavlin 80
48     URI to C<estmaster> node
49    
50 dpavlin 211 =item database
51    
52     name of C<estmaster> node
53    
54 dpavlin 80 =item user
55    
56     C<estmaster> user with read rights
57    
58     =item passwd
59    
60     password for user
61    
62     =item encoding
63    
64     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
65     (and it probably is). This encoding will be converted to C<UTF-8> for
66     Hyper Estraier.
67    
68 dpavlin 81 =item log
69    
70     L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
71     L<Catalyst> and there is support for it).
72    
73 dpavlin 80 =back
74    
75     =cut
76    
77     sub new {
78     my $class = shift;
79     my $self = {@_};
80     bless($self, $class);
81    
82     my $log = $self->_get_logger;
83    
84 dpavlin 211 foreach my $p (qw/masterurl user passwd/) {
85 dpavlin 80 $log->logdie("need $p") unless ($self->{$p});
86     }
87    
88 dpavlin 211 my $url = $self->{masterurl} . '/node/' . $self->{database};
89     $self->{url} = $url;
90    
91 dpavlin 80 $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
92    
93     $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
94     $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
95    
96     my $encoding = $self->{'encoding'} || 'ISO-8859-2';
97     $log->info("using encoding $encoding");
98    
99 dpavlin 81 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
100     $log->die("can't create conversion from UTF-8 to $encoding");
101 dpavlin 80
102     $self ? return $self : return undef;
103     }
104    
105    
106     =head2 search
107    
108     Locate items in index
109    
110 dpavlin 81 my @results = $est->search(
111 dpavlin 122 phrase => 'name of book or novel',
112     add_attr => [
113     "filepath ISTRINC $q",
114     "size NUMGT 100",
115     ],
116     get_attr => qw/PersonalName TitleProper/,
117     order => 'NUMD',
118     max => 100,
119     options => $HyperEstraier::Condition::SURE,
120 dpavlin 156 page => 42,
121 dpavlin 216 depth => 0,
122 dpavlin 80 );
123    
124 dpavlin 122 Options are close match to Hyper Estraier API, except C<get_attr> which defines
125     attributes which will be returned in hash for each record.
126    
127 dpavlin 81 Results are returned as hash array with keys named by attributes
128    
129 dpavlin 156 Pages are numbered C< 1 ... hits/max >.
130    
131 dpavlin 80 =cut
132    
133     sub search {
134     my $self = shift;
135    
136     my $args = {@_};
137    
138     my $log = $self->_get_logger;
139    
140 dpavlin 152 #$log->debug( 'search args: ' . Dumper($args) );
141    
142 dpavlin 122 $self->confess('need db in object') unless ($self->{db});
143     $self->confess('need get_attr') unless ($args->{get_attr});
144 dpavlin 81
145 dpavlin 122 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
146 dpavlin 80
147 dpavlin 122 my $q = $args->{phrase};
148 dpavlin 86
149 dpavlin 123 $log->debug("args: " . Dumper( $args ));
150    
151 dpavlin 80 my $cond = HyperEstraier::Condition->new();
152 dpavlin 122 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
153     $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
154     map {
155 dpavlin 123 $cond->add_attr( $self->{iconv}->convert( $_ ) );
156 dpavlin 122 $log->debug(" + $_");
157 dpavlin 123 } @{ $args->{add_attr} };
158     };
159 dpavlin 80
160 dpavlin 122 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
161     $cond->set_options( $args->{options} ) if ($args->{options});
162     $cond->set_order( $args->{order} ) if ($args->{order});
163 dpavlin 80
164 dpavlin 156 my $max = $args->{max} || 7;
165     my $page = $args->{page} || 1;
166 dpavlin 177 if ($page < 1) {
167     $log->warn("page number $page < 1");
168     $page = 1;
169     }
170 dpavlin 156
171     $cond->set_max( $page * $max );
172    
173 dpavlin 216 my $result = $self->{db}->search($cond, ( $args->{depth} || 0 )) ||
174 dpavlin 80 $log->die("can't search for ", sub { Dumper( $args ) });
175    
176     my $hits = $result->doc_num;
177 dpavlin 86 $log->debug("found $hits hits for '$q'");
178 dpavlin 80
179     my @results;
180    
181 dpavlin 156 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
182 dpavlin 80
183 dpavlin 86 #$log->debug("get_doc($i)");
184 dpavlin 80 my $doc = $result->get_doc( $i );
185     if (! $doc) {
186     $log->warn("can't find result $i");
187     next;
188     }
189    
190     my $hash;
191    
192 dpavlin 122 foreach my $attr (@{ $args->{get_attr} }) {
193 dpavlin 80 my $val = $doc->attr( $attr );
194 dpavlin 86 #$log->debug("attr $attr = ", $val || 'undef');
195 dpavlin 122 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
196 dpavlin 80 }
197    
198     if ($hash) {
199     push @results, $hash;
200     }
201    
202     }
203    
204 dpavlin 103 # $log->debug("results " . Dumper( \@results ));
205 dpavlin 80
206 dpavlin 87 $self->confess("expected to return array") unless (wantarray);
207 dpavlin 80
208     return @results;
209     }
210    
211     =head2 confess
212    
213     wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
214     C<< $log->error >> if they exists (like in L<Catalyst>), else plain
215     C<die>.
216    
217     =cut
218    
219     sub confess {
220     my $self = shift;
221     if (my $log = $self->{'log'}) {
222 dpavlin 87 if ($log->can('logconfess')) {
223     $log->logconfess(@_);
224 dpavlin 80 } elsif ($log->can('fatal')) {
225     $log->fatal(@_);
226 dpavlin 87 die @_;
227 dpavlin 80 } elsif ($log->can('error')) {
228     $log->error(@_);
229     } else {
230     die @_;
231     }
232     } else {
233     die @_;
234     }
235     }
236    
237     =head2 _get_logger
238    
239     For compatibility with same method from L<WebPAC::Common>, but without
240     need for it.
241    
242     =cut
243    
244     sub _get_logger {
245     my $self = shift;
246    
247     return $self->{'log'} || die "really need log!";
248     }
249    
250     =head1 AUTHOR
251    
252     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
253    
254     =head1 COPYRIGHT & LICENSE
255    
256     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
257    
258     This program is free software; you can redistribute it and/or modify it
259     under the same terms as Perl itself.
260    
261     =cut
262    
263     1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26