/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 156 - (hide annotations)
Sat Nov 26 14:37:33 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 5107 byte(s)
 r11150@llin:  dpavlin | 2005-11-26 15:39:52 +0100
 0.03 added support for page parametar to search

1 dpavlin 80 package WebPAC::Search::Estraier;
2    
3     use warnings;
4     use strict;
5    
6     use HyperEstraier;
7     use Text::Iconv;
8     use Data::Dumper;
9    
10     =head1 NAME
11    
12     WebPAC::Search::Estraier - search Hyper Estraier full text index
13    
14     =head1 VERSION
15    
16 dpavlin 156 Version 0.03
17 dpavlin 80
18     =cut
19    
20 dpavlin 156 our $VERSION = '0.03';
21 dpavlin 80
22     =head1 SYNOPSIS
23    
24     Search WebPAC data using Hyper Estraier full text index created with
25     L<WebPAC::Output::Estraier>.
26    
27     =head1 FUNCTIONS
28    
29     =head2 new
30    
31     Connect to Hyper Estraier index using HTTP
32    
33     my $est = new WebPAC::Search::Estraier(
34     url => 'http://localhost:1978/node/webpac2',
35     user => 'admin',
36     passwd => 'admin',
37     encoding => 'iso-8859-2',
38 dpavlin 81 log => $Log::Log4perl->log_object,
39 dpavlin 80 );
40    
41     Options are:
42    
43     =over 4
44    
45     =item url
46    
47     URI to C<estmaster> node
48    
49     =item user
50    
51     C<estmaster> user with read rights
52    
53     =item passwd
54    
55     password for user
56    
57     =item encoding
58    
59     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
60     (and it probably is). This encoding will be converted to C<UTF-8> for
61     Hyper Estraier.
62    
63 dpavlin 81 =item log
64    
65     L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
66     L<Catalyst> and there is support for it).
67    
68 dpavlin 80 =back
69    
70     =cut
71    
72     sub new {
73     my $class = shift;
74     my $self = {@_};
75     bless($self, $class);
76    
77     my $log = $self->_get_logger;
78    
79     foreach my $p (qw/url user passwd/) {
80     $log->logdie("need $p") unless ($self->{$p});
81     }
82    
83     $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
84    
85     $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
86     $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
87    
88     my $encoding = $self->{'encoding'} || 'ISO-8859-2';
89     $log->info("using encoding $encoding");
90    
91 dpavlin 81 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
92     $log->die("can't create conversion from UTF-8 to $encoding");
93 dpavlin 80
94     $self ? return $self : return undef;
95     }
96    
97    
98     =head2 search
99    
100     Locate items in index
101    
102 dpavlin 81 my @results = $est->search(
103 dpavlin 122 phrase => 'name of book or novel',
104     add_attr => [
105     "filepath ISTRINC $q",
106     "size NUMGT 100",
107     ],
108     get_attr => qw/PersonalName TitleProper/,
109     order => 'NUMD',
110     max => 100,
111     options => $HyperEstraier::Condition::SURE,
112 dpavlin 156 page => 42,
113 dpavlin 80 );
114    
115 dpavlin 122 Options are close match to Hyper Estraier API, except C<get_attr> which defines
116     attributes which will be returned in hash for each record.
117    
118 dpavlin 81 Results are returned as hash array with keys named by attributes
119    
120 dpavlin 156 Pages are numbered C< 1 ... hits/max >.
121    
122 dpavlin 80 =cut
123    
124     sub search {
125     my $self = shift;
126    
127     my $args = {@_};
128    
129     my $log = $self->_get_logger;
130    
131 dpavlin 152 #$log->debug( 'search args: ' . Dumper($args) );
132    
133 dpavlin 122 $self->confess('need db in object') unless ($self->{db});
134     $self->confess('need get_attr') unless ($args->{get_attr});
135 dpavlin 81
136 dpavlin 122 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
137 dpavlin 80
138 dpavlin 122 my $q = $args->{phrase};
139 dpavlin 86
140 dpavlin 123 $log->debug("args: " . Dumper( $args ));
141    
142 dpavlin 80 my $cond = HyperEstraier::Condition->new();
143 dpavlin 122 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
144     $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
145     map {
146 dpavlin 123 $cond->add_attr( $self->{iconv}->convert( $_ ) );
147 dpavlin 122 $log->debug(" + $_");
148 dpavlin 123 } @{ $args->{add_attr} };
149     };
150 dpavlin 80
151 dpavlin 122 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
152     $cond->set_options( $args->{options} ) if ($args->{options});
153     $cond->set_order( $args->{order} ) if ($args->{order});
154 dpavlin 80
155 dpavlin 156 my $max = $args->{max} || 7;
156     my $page = $args->{page} || 1;
157    
158     $cond->set_max( $page * $max );
159    
160 dpavlin 122 my $result = $self->{db}->search($cond, 0) ||
161 dpavlin 80 $log->die("can't search for ", sub { Dumper( $args ) });
162    
163     my $hits = $result->doc_num;
164 dpavlin 86 $log->debug("found $hits hits for '$q'");
165 dpavlin 80
166     my @results;
167    
168 dpavlin 156 for my $i ( (($page - 1) * $max) .. ( $hits - 1 ) ) {
169 dpavlin 80
170 dpavlin 86 #$log->debug("get_doc($i)");
171 dpavlin 80 my $doc = $result->get_doc( $i );
172     if (! $doc) {
173     $log->warn("can't find result $i");
174     next;
175     }
176    
177     my $hash;
178    
179 dpavlin 122 foreach my $attr (@{ $args->{get_attr} }) {
180 dpavlin 80 my $val = $doc->attr( $attr );
181 dpavlin 86 #$log->debug("attr $attr = ", $val || 'undef');
182 dpavlin 122 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
183 dpavlin 80 }
184    
185     if ($hash) {
186     push @results, $hash;
187     }
188    
189     }
190    
191 dpavlin 103 # $log->debug("results " . Dumper( \@results ));
192 dpavlin 80
193 dpavlin 87 $self->confess("expected to return array") unless (wantarray);
194 dpavlin 80
195     return @results;
196     }
197    
198     =head2 confess
199    
200     wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
201     C<< $log->error >> if they exists (like in L<Catalyst>), else plain
202     C<die>.
203    
204     =cut
205    
206     sub confess {
207     my $self = shift;
208     if (my $log = $self->{'log'}) {
209 dpavlin 87 if ($log->can('logconfess')) {
210     $log->logconfess(@_);
211 dpavlin 80 } elsif ($log->can('fatal')) {
212     $log->fatal(@_);
213 dpavlin 87 die @_;
214 dpavlin 80 } elsif ($log->can('error')) {
215     $log->error(@_);
216     } else {
217     die @_;
218     }
219     } else {
220     die @_;
221     }
222     }
223    
224     =head2 _get_logger
225    
226     For compatibility with same method from L<WebPAC::Common>, but without
227     need for it.
228    
229     =cut
230    
231     sub _get_logger {
232     my $self = shift;
233    
234     return $self->{'log'} || die "really need log!";
235     }
236    
237     =head1 AUTHOR
238    
239     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
240    
241     =head1 COPYRIGHT & LICENSE
242    
243     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
244    
245     This program is free software; you can redistribute it and/or modify it
246     under the same terms as Perl itself.
247    
248     =cut
249    
250     1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26