/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 887 - (hide annotations)
Mon Sep 3 15:26:46 2007 UTC (16 years, 7 months ago) by dpavlin
File size: 5478 byte(s)
 r1322@llin:  dpavlin | 2007-09-03 16:44:01 +0200
 - replace Data::Dumper usage with Data::Dump
 - rewrite WebPAC::Store to use Class::Accessor

1 dpavlin 80 package WebPAC::Search::Estraier;
2    
3     use warnings;
4     use strict;
5    
6 dpavlin 343 use Search::Estraier;
7 dpavlin 535 use Encode qw/from_to/;
8 dpavlin 887 use Data::Dump qw/dump/;
9 dpavlin 80
10     =head1 NAME
11    
12     WebPAC::Search::Estraier - search Hyper Estraier full text index
13    
14     =head1 VERSION
15    
16 dpavlin 535 Version 0.07
17 dpavlin 80
18     =cut
19    
20 dpavlin 535 our $VERSION = '0.07';
21 dpavlin 80
22     =head1 SYNOPSIS
23    
24     Search WebPAC data using Hyper Estraier full text index created with
25     L<WebPAC::Output::Estraier>.
26    
27     =head1 FUNCTIONS
28    
29     =head2 new
30    
31     Connect to Hyper Estraier index using HTTP
32    
33     my $est = new WebPAC::Search::Estraier(
34 dpavlin 211 masterurl => 'http://localhost:1978/',
35     database => 'webpac2',
36 dpavlin 80 user => 'admin',
37     passwd => 'admin',
38     encoding => 'iso-8859-2',
39 dpavlin 81 log => $Log::Log4perl->log_object,
40 dpavlin 80 );
41    
42     Options are:
43    
44     =over 4
45    
46 dpavlin 211 =item maseterurl
47 dpavlin 80
48     URI to C<estmaster> node
49    
50 dpavlin 211 =item database
51    
52     name of C<estmaster> node
53    
54 dpavlin 80 =item user
55    
56     C<estmaster> user with read rights
57    
58     =item passwd
59    
60     password for user
61    
62     =item encoding
63    
64     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
65     (and it probably is). This encoding will be converted to C<UTF-8> for
66     Hyper Estraier.
67    
68 dpavlin 81 =item log
69    
70     L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
71     L<Catalyst> and there is support for it).
72    
73 dpavlin 80 =back
74    
75     =cut
76    
77     sub new {
78     my $class = shift;
79     my $self = {@_};
80     bless($self, $class);
81    
82     my $log = $self->_get_logger;
83    
84 dpavlin 211 foreach my $p (qw/masterurl user passwd/) {
85 dpavlin 80 $log->logdie("need $p") unless ($self->{$p});
86     }
87    
88 dpavlin 211 my $url = $self->{masterurl} . '/node/' . $self->{database};
89     $self->{url} = $url;
90    
91 dpavlin 80 $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
92    
93 dpavlin 343 $self->{db} = Search::Estraier::Node->new;
94     $self->{db}->set_url($self->{'url'});
95     $self->{db}->set_auth($self->{'user'}, $self->{'passwd'});
96 dpavlin 80
97 dpavlin 535 $self->{'encoding'} ||= 'ISO-8859-2';
98     $log->info("using encoding ",$self->{encoding});
99 dpavlin 80
100     $self ? return $self : return undef;
101     }
102    
103    
104     =head2 search
105    
106     Locate items in index
107    
108 dpavlin 81 my @results = $est->search(
109 dpavlin 122 phrase => 'name of book or novel',
110     add_attr => [
111     "filepath ISTRINC $q",
112     "size NUMGT 100",
113     ],
114     get_attr => qw/PersonalName TitleProper/,
115     order => 'NUMD',
116     max => 100,
117     options => $HyperEstraier::Condition::SURE,
118 dpavlin 156 page => 42,
119 dpavlin 216 depth => 0,
120 dpavlin 80 );
121    
122 dpavlin 122 Options are close match to Hyper Estraier API, except C<get_attr> which defines
123     attributes which will be returned in hash for each record.
124    
125 dpavlin 81 Results are returned as hash array with keys named by attributes
126    
127 dpavlin 156 Pages are numbered C< 1 ... hits/max >.
128    
129 dpavlin 80 =cut
130    
131     sub search {
132     my $self = shift;
133    
134     my $args = {@_};
135    
136     my $log = $self->_get_logger;
137    
138 dpavlin 887 #$log->debug( 'search args: ', dump($args) );
139 dpavlin 152
140 dpavlin 122 $self->confess('need db in object') unless ($self->{db});
141     $self->confess('need get_attr') unless ($args->{get_attr});
142 dpavlin 81
143 dpavlin 122 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
144 dpavlin 80
145 dpavlin 122 my $q = $args->{phrase};
146 dpavlin 86
147 dpavlin 887 $log->debug("args: ", dump( $args ));
148 dpavlin 123
149 dpavlin 343 my $cond = Search::Estraier::Condition->new();
150 dpavlin 122 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
151     $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
152     map {
153 dpavlin 535 $cond->add_attr( $self->convert( $_ ) );
154 dpavlin 122 $log->debug(" + $_");
155 dpavlin 123 } @{ $args->{add_attr} };
156     };
157 dpavlin 80
158 dpavlin 535 $cond->set_phrase( $self->convert($q) ) if ($q);
159 dpavlin 122 $cond->set_options( $args->{options} ) if ($args->{options});
160     $cond->set_order( $args->{order} ) if ($args->{order});
161 dpavlin 80
162 dpavlin 156 my $max = $args->{max} || 7;
163     my $page = $args->{page} || 1;
164 dpavlin 177 if ($page < 1) {
165     $log->warn("page number $page < 1");
166     $page = 1;
167     }
168 dpavlin 156
169     $cond->set_max( $page * $max );
170    
171 dpavlin 216 my $result = $self->{db}->search($cond, ( $args->{depth} || 0 )) ||
172 dpavlin 887 $log->logdie("can't search for ", sub { dump( $args ) });
173 dpavlin 80
174     my $hits = $result->doc_num;
175 dpavlin 86 $log->debug("found $hits hits for '$q'");
176 dpavlin 80
177     my @results;
178    
179 dpavlin 156 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
180 dpavlin 80
181 dpavlin 86 #$log->debug("get_doc($i)");
182 dpavlin 80 my $doc = $result->get_doc( $i );
183     if (! $doc) {
184     $log->warn("can't find result $i");
185     next;
186     }
187    
188     my $hash;
189    
190 dpavlin 122 foreach my $attr (@{ $args->{get_attr} }) {
191 dpavlin 80 my $val = $doc->attr( $attr );
192 dpavlin 86 #$log->debug("attr $attr = ", $val || 'undef');
193 dpavlin 535 $hash->{$attr} = $self->convert( $val ) if (defined($val));
194 dpavlin 80 }
195    
196     if ($hash) {
197     push @results, $hash;
198     }
199    
200     }
201    
202 dpavlin 887 # $log->debug("results " . dump( \@results ));
203 dpavlin 80
204 dpavlin 87 $self->confess("expected to return array") unless (wantarray);
205 dpavlin 80
206     return @results;
207     }
208    
209     =head2 confess
210    
211     wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
212     C<< $log->error >> if they exists (like in L<Catalyst>), else plain
213     C<die>.
214    
215     =cut
216    
217     sub confess {
218     my $self = shift;
219     if (my $log = $self->{'log'}) {
220 dpavlin 87 if ($log->can('logconfess')) {
221     $log->logconfess(@_);
222 dpavlin 80 } elsif ($log->can('fatal')) {
223     $log->fatal(@_);
224 dpavlin 87 die @_;
225 dpavlin 80 } elsif ($log->can('error')) {
226     $log->error(@_);
227     } else {
228     die @_;
229     }
230     } else {
231     die @_;
232     }
233     }
234    
235 dpavlin 535 =head2 convert
236    
237     convert internal encoding to UTF-8
238    
239     my $utf8 = $self->convert( $text );
240    
241     =cut
242    
243     sub convert {
244     my $self = shift;
245    
246     my $text = shift || return;
247    
248     from_to($text, $self->{encoding}, 'UTF-8');
249     return $text;
250     }
251    
252    
253 dpavlin 80 =head2 _get_logger
254    
255     For compatibility with same method from L<WebPAC::Common>, but without
256     need for it.
257    
258     =cut
259    
260     sub _get_logger {
261     my $self = shift;
262    
263     return $self->{'log'} || die "really need log!";
264     }
265    
266     =head1 AUTHOR
267    
268     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
269    
270     =head1 COPYRIGHT & LICENSE
271    
272     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
273    
274     This program is free software; you can redistribute it and/or modify it
275     under the same terms as Perl itself.
276    
277     =cut
278    
279     1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26