/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 177 - (hide annotations)
Sun Nov 27 05:02:53 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 5180 byte(s)
 r11190@llin:  dpavlin | 2005-11-27 06:05:22 +0100
 reset pages < 1 to 1

1 dpavlin 80 package WebPAC::Search::Estraier;
2    
3     use warnings;
4     use strict;
5    
6     use HyperEstraier;
7     use Text::Iconv;
8     use Data::Dumper;
9    
10     =head1 NAME
11    
12     WebPAC::Search::Estraier - search Hyper Estraier full text index
13    
14     =head1 VERSION
15    
16 dpavlin 156 Version 0.03
17 dpavlin 80
18     =cut
19    
20 dpavlin 156 our $VERSION = '0.03';
21 dpavlin 80
22     =head1 SYNOPSIS
23    
24     Search WebPAC data using Hyper Estraier full text index created with
25     L<WebPAC::Output::Estraier>.
26    
27     =head1 FUNCTIONS
28    
29     =head2 new
30    
31     Connect to Hyper Estraier index using HTTP
32    
33     my $est = new WebPAC::Search::Estraier(
34     url => 'http://localhost:1978/node/webpac2',
35     user => 'admin',
36     passwd => 'admin',
37     encoding => 'iso-8859-2',
38 dpavlin 81 log => $Log::Log4perl->log_object,
39 dpavlin 80 );
40    
41     Options are:
42    
43     =over 4
44    
45     =item url
46    
47     URI to C<estmaster> node
48    
49     =item user
50    
51     C<estmaster> user with read rights
52    
53     =item passwd
54    
55     password for user
56    
57     =item encoding
58    
59     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
60     (and it probably is). This encoding will be converted to C<UTF-8> for
61     Hyper Estraier.
62    
63 dpavlin 81 =item log
64    
65     L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
66     L<Catalyst> and there is support for it).
67    
68 dpavlin 80 =back
69    
70     =cut
71    
72     sub new {
73     my $class = shift;
74     my $self = {@_};
75     bless($self, $class);
76    
77     my $log = $self->_get_logger;
78    
79     foreach my $p (qw/url user passwd/) {
80     $log->logdie("need $p") unless ($self->{$p});
81     }
82    
83     $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
84    
85     $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
86     $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
87    
88     my $encoding = $self->{'encoding'} || 'ISO-8859-2';
89     $log->info("using encoding $encoding");
90    
91 dpavlin 81 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
92     $log->die("can't create conversion from UTF-8 to $encoding");
93 dpavlin 80
94     $self ? return $self : return undef;
95     }
96    
97    
98     =head2 search
99    
100     Locate items in index
101    
102 dpavlin 81 my @results = $est->search(
103 dpavlin 122 phrase => 'name of book or novel',
104     add_attr => [
105     "filepath ISTRINC $q",
106     "size NUMGT 100",
107     ],
108     get_attr => qw/PersonalName TitleProper/,
109     order => 'NUMD',
110     max => 100,
111     options => $HyperEstraier::Condition::SURE,
112 dpavlin 156 page => 42,
113 dpavlin 80 );
114    
115 dpavlin 122 Options are close match to Hyper Estraier API, except C<get_attr> which defines
116     attributes which will be returned in hash for each record.
117    
118 dpavlin 81 Results are returned as hash array with keys named by attributes
119    
120 dpavlin 156 Pages are numbered C< 1 ... hits/max >.
121    
122 dpavlin 80 =cut
123    
124     sub search {
125     my $self = shift;
126    
127     my $args = {@_};
128    
129     my $log = $self->_get_logger;
130    
131 dpavlin 152 #$log->debug( 'search args: ' . Dumper($args) );
132    
133 dpavlin 122 $self->confess('need db in object') unless ($self->{db});
134     $self->confess('need get_attr') unless ($args->{get_attr});
135 dpavlin 81
136 dpavlin 122 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
137 dpavlin 80
138 dpavlin 122 my $q = $args->{phrase};
139 dpavlin 86
140 dpavlin 123 $log->debug("args: " . Dumper( $args ));
141    
142 dpavlin 80 my $cond = HyperEstraier::Condition->new();
143 dpavlin 122 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
144     $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
145     map {
146 dpavlin 123 $cond->add_attr( $self->{iconv}->convert( $_ ) );
147 dpavlin 122 $log->debug(" + $_");
148 dpavlin 123 } @{ $args->{add_attr} };
149     };
150 dpavlin 80
151 dpavlin 122 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
152     $cond->set_options( $args->{options} ) if ($args->{options});
153     $cond->set_order( $args->{order} ) if ($args->{order});
154 dpavlin 80
155 dpavlin 156 my $max = $args->{max} || 7;
156     my $page = $args->{page} || 1;
157 dpavlin 177 if ($page < 1) {
158     $log->warn("page number $page < 1");
159     $page = 1;
160     }
161 dpavlin 156
162     $cond->set_max( $page * $max );
163    
164 dpavlin 122 my $result = $self->{db}->search($cond, 0) ||
165 dpavlin 80 $log->die("can't search for ", sub { Dumper( $args ) });
166    
167     my $hits = $result->doc_num;
168 dpavlin 86 $log->debug("found $hits hits for '$q'");
169 dpavlin 80
170     my @results;
171    
172 dpavlin 156 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
173 dpavlin 80
174 dpavlin 86 #$log->debug("get_doc($i)");
175 dpavlin 80 my $doc = $result->get_doc( $i );
176     if (! $doc) {
177     $log->warn("can't find result $i");
178     next;
179     }
180    
181     my $hash;
182    
183 dpavlin 122 foreach my $attr (@{ $args->{get_attr} }) {
184 dpavlin 80 my $val = $doc->attr( $attr );
185 dpavlin 86 #$log->debug("attr $attr = ", $val || 'undef');
186 dpavlin 122 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
187 dpavlin 80 }
188    
189     if ($hash) {
190     push @results, $hash;
191     }
192    
193     }
194    
195 dpavlin 103 # $log->debug("results " . Dumper( \@results ));
196 dpavlin 80
197 dpavlin 87 $self->confess("expected to return array") unless (wantarray);
198 dpavlin 80
199     return @results;
200     }
201    
202     =head2 confess
203    
204     wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
205     C<< $log->error >> if they exists (like in L<Catalyst>), else plain
206     C<die>.
207    
208     =cut
209    
210     sub confess {
211     my $self = shift;
212     if (my $log = $self->{'log'}) {
213 dpavlin 87 if ($log->can('logconfess')) {
214     $log->logconfess(@_);
215 dpavlin 80 } elsif ($log->can('fatal')) {
216     $log->fatal(@_);
217 dpavlin 87 die @_;
218 dpavlin 80 } elsif ($log->can('error')) {
219     $log->error(@_);
220     } else {
221     die @_;
222     }
223     } else {
224     die @_;
225     }
226     }
227    
228     =head2 _get_logger
229    
230     For compatibility with same method from L<WebPAC::Common>, but without
231     need for it.
232    
233     =cut
234    
235     sub _get_logger {
236     my $self = shift;
237    
238     return $self->{'log'} || die "really need log!";
239     }
240    
241     =head1 AUTHOR
242    
243     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
244    
245     =head1 COPYRIGHT & LICENSE
246    
247     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
248    
249     This program is free software; you can redistribute it and/or modify it
250     under the same terms as Perl itself.
251    
252     =cut
253    
254     1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26