/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 123 - (hide annotations)
Wed Nov 23 21:53:01 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 4937 byte(s)
 r9074@llin:  dpavlin | 2005-11-23 22:21:48 +0100
 fixes

1 dpavlin 80 package WebPAC::Search::Estraier;
2    
3     use warnings;
4     use strict;
5    
6     use HyperEstraier;
7     use Text::Iconv;
8     use Data::Dumper;
9    
10     =head1 NAME
11    
12     WebPAC::Search::Estraier - search Hyper Estraier full text index
13    
14     =head1 VERSION
15    
16 dpavlin 122 Version 0.02
17 dpavlin 80
18     =cut
19    
20 dpavlin 122 our $VERSION = '0.02';
21 dpavlin 80
22     =head1 SYNOPSIS
23    
24     Search WebPAC data using Hyper Estraier full text index created with
25     L<WebPAC::Output::Estraier>.
26    
27     =head1 FUNCTIONS
28    
29     =head2 new
30    
31     Connect to Hyper Estraier index using HTTP
32    
33     my $est = new WebPAC::Search::Estraier(
34     url => 'http://localhost:1978/node/webpac2',
35     user => 'admin',
36     passwd => 'admin',
37     encoding => 'iso-8859-2',
38 dpavlin 81 log => $Log::Log4perl->log_object,
39 dpavlin 80 );
40    
41     Options are:
42    
43     =over 4
44    
45     =item url
46    
47     URI to C<estmaster> node
48    
49     =item user
50    
51     C<estmaster> user with read rights
52    
53     =item passwd
54    
55     password for user
56    
57     =item encoding
58    
59     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
60     (and it probably is). This encoding will be converted to C<UTF-8> for
61     Hyper Estraier.
62    
63 dpavlin 81 =item log
64    
65     L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
66     L<Catalyst> and there is support for it).
67    
68 dpavlin 80 =back
69    
70     =cut
71    
72     sub new {
73     my $class = shift;
74     my $self = {@_};
75     bless($self, $class);
76    
77     my $log = $self->_get_logger;
78    
79     foreach my $p (qw/url user passwd/) {
80     $log->logdie("need $p") unless ($self->{$p});
81     }
82    
83     $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
84    
85     $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
86     $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
87    
88     my $encoding = $self->{'encoding'} || 'ISO-8859-2';
89     $log->info("using encoding $encoding");
90    
91 dpavlin 81 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
92     $log->die("can't create conversion from UTF-8 to $encoding");
93 dpavlin 80
94     $self ? return $self : return undef;
95     }
96    
97    
98     =head2 search
99    
100     Locate items in index
101    
102 dpavlin 81 my @results = $est->search(
103 dpavlin 122 phrase => 'name of book or novel',
104     add_attr => [
105     "filepath ISTRINC $q",
106     "size NUMGT 100",
107     ],
108     get_attr => qw/PersonalName TitleProper/,
109     order => 'NUMD',
110     max => 100,
111     options => $HyperEstraier::Condition::SURE,
112 dpavlin 80 );
113    
114 dpavlin 122 Options are close match to Hyper Estraier API, except C<get_attr> which defines
115     attributes which will be returned in hash for each record.
116    
117 dpavlin 81 Results are returned as hash array with keys named by attributes
118    
119 dpavlin 80 =cut
120    
121     sub search {
122     my $self = shift;
123    
124     my $args = {@_};
125    
126     my $log = $self->_get_logger;
127    
128 dpavlin 122 $self->confess('need db in object') unless ($self->{db});
129     $self->confess('need get_attr') unless ($args->{get_attr});
130 dpavlin 81
131 dpavlin 122 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
132 dpavlin 80
133 dpavlin 122 my $q = $args->{phrase};
134 dpavlin 86
135 dpavlin 123 $log->debug("args: " . Dumper( $args ));
136    
137 dpavlin 80 my $cond = HyperEstraier::Condition->new();
138 dpavlin 122 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
139     $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
140     map {
141 dpavlin 123 $cond->add_attr( $self->{iconv}->convert( $_ ) );
142 dpavlin 122 $log->debug(" + $_");
143 dpavlin 123 } @{ $args->{add_attr} };
144     };
145 dpavlin 80
146 dpavlin 122 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
147     $cond->set_max( $args->{max} ) if ($args->{max});
148     $cond->set_options( $args->{options} ) if ($args->{options});
149     $cond->set_order( $args->{order} ) if ($args->{order});
150 dpavlin 80
151 dpavlin 122 my $result = $self->{db}->search($cond, 0) ||
152 dpavlin 80 $log->die("can't search for ", sub { Dumper( $args ) });
153    
154     my $hits = $result->doc_num;
155 dpavlin 86 $log->debug("found $hits hits for '$q'");
156 dpavlin 80
157     my @results;
158    
159 dpavlin 81 for my $i ( 0 .. ( $hits - 1 ) ) {
160 dpavlin 80
161 dpavlin 86 #$log->debug("get_doc($i)");
162 dpavlin 80 my $doc = $result->get_doc( $i );
163     if (! $doc) {
164     $log->warn("can't find result $i");
165     next;
166     }
167    
168     my $hash;
169    
170 dpavlin 122 foreach my $attr (@{ $args->{get_attr} }) {
171 dpavlin 80 my $val = $doc->attr( $attr );
172 dpavlin 86 #$log->debug("attr $attr = ", $val || 'undef');
173 dpavlin 122 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
174 dpavlin 80 }
175    
176     if ($hash) {
177     push @results, $hash;
178     }
179    
180     }
181    
182 dpavlin 103 # $log->debug("results " . Dumper( \@results ));
183 dpavlin 80
184 dpavlin 87 $self->confess("expected to return array") unless (wantarray);
185 dpavlin 80
186     return @results;
187     }
188    
189     =head2 confess
190    
191     wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
192     C<< $log->error >> if they exists (like in L<Catalyst>), else plain
193     C<die>.
194    
195     =cut
196    
197     sub confess {
198     my $self = shift;
199     if (my $log = $self->{'log'}) {
200 dpavlin 87 if ($log->can('logconfess')) {
201     $log->logconfess(@_);
202 dpavlin 80 } elsif ($log->can('fatal')) {
203     $log->fatal(@_);
204 dpavlin 87 die @_;
205 dpavlin 80 } elsif ($log->can('error')) {
206     $log->error(@_);
207     } else {
208     die @_;
209     }
210     } else {
211     die @_;
212     }
213     }
214    
215     =head2 _get_logger
216    
217     For compatibility with same method from L<WebPAC::Common>, but without
218     need for it.
219    
220     =cut
221    
222     sub _get_logger {
223     my $self = shift;
224    
225     return $self->{'log'} || die "really need log!";
226     }
227    
228     =head1 AUTHOR
229    
230     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
231    
232     =head1 COPYRIGHT & LICENSE
233    
234     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
235    
236     This program is free software; you can redistribute it and/or modify it
237     under the same terms as Perl itself.
238    
239     =cut
240    
241     1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26