/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 122 - (hide annotations)
Wed Nov 23 21:52:55 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 4887 byte(s)
 r9072@llin:  dpavlin | 2005-11-23 21:31:14 +0100
 API 0.02: changed and added options to match Hyper Estraier's API

1 dpavlin 80 package WebPAC::Search::Estraier;
2    
3     use warnings;
4     use strict;
5    
6     use HyperEstraier;
7     use Text::Iconv;
8     use Data::Dumper;
9    
10     =head1 NAME
11    
12     WebPAC::Search::Estraier - search Hyper Estraier full text index
13    
14     =head1 VERSION
15    
16 dpavlin 122 Version 0.02
17 dpavlin 80
18     =cut
19    
20 dpavlin 122 our $VERSION = '0.02';
21 dpavlin 80
22     =head1 SYNOPSIS
23    
24     Search WebPAC data using Hyper Estraier full text index created with
25     L<WebPAC::Output::Estraier>.
26    
27     =head1 FUNCTIONS
28    
29     =head2 new
30    
31     Connect to Hyper Estraier index using HTTP
32    
33     my $est = new WebPAC::Search::Estraier(
34     url => 'http://localhost:1978/node/webpac2',
35     user => 'admin',
36     passwd => 'admin',
37     encoding => 'iso-8859-2',
38 dpavlin 81 log => $Log::Log4perl->log_object,
39 dpavlin 80 );
40    
41     Options are:
42    
43     =over 4
44    
45     =item url
46    
47     URI to C<estmaster> node
48    
49     =item user
50    
51     C<estmaster> user with read rights
52    
53     =item passwd
54    
55     password for user
56    
57     =item encoding
58    
59     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
60     (and it probably is). This encoding will be converted to C<UTF-8> for
61     Hyper Estraier.
62    
63 dpavlin 81 =item log
64    
65     L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
66     L<Catalyst> and there is support for it).
67    
68 dpavlin 80 =back
69    
70     =cut
71    
72     sub new {
73     my $class = shift;
74     my $self = {@_};
75     bless($self, $class);
76    
77     my $log = $self->_get_logger;
78    
79     foreach my $p (qw/url user passwd/) {
80     $log->logdie("need $p") unless ($self->{$p});
81     }
82    
83     $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
84    
85     $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
86     $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
87    
88     my $encoding = $self->{'encoding'} || 'ISO-8859-2';
89     $log->info("using encoding $encoding");
90    
91 dpavlin 81 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
92     $log->die("can't create conversion from UTF-8 to $encoding");
93 dpavlin 80
94     $self ? return $self : return undef;
95     }
96    
97    
98     =head2 search
99    
100     Locate items in index
101    
102 dpavlin 81 my @results = $est->search(
103 dpavlin 122 phrase => 'name of book or novel',
104     add_attr => [
105     "filepath ISTRINC $q",
106     "size NUMGT 100",
107     ],
108     get_attr => qw/PersonalName TitleProper/,
109     order => 'NUMD',
110     max => 100,
111     options => $HyperEstraier::Condition::SURE,
112 dpavlin 80 );
113    
114 dpavlin 122 Options are close match to Hyper Estraier API, except C<get_attr> which defines
115     attributes which will be returned in hash for each record.
116    
117 dpavlin 81 Results are returned as hash array with keys named by attributes
118    
119 dpavlin 80 =cut
120    
121     sub search {
122     my $self = shift;
123    
124     my $args = {@_};
125    
126     my $log = $self->_get_logger;
127    
128 dpavlin 122 $self->confess('need db in object') unless ($self->{db});
129     $self->confess('need get_attr') unless ($args->{get_attr});
130 dpavlin 81
131 dpavlin 122 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
132 dpavlin 80
133 dpavlin 122 my $q = $args->{phrase};
134 dpavlin 86
135 dpavlin 80 my $cond = HyperEstraier::Condition->new();
136 dpavlin 122 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
137     $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
138     map {
139     $cond->add_attr( $self->{iconv}->conver( $_ ) );
140     $log->debug(" + $_");
141     } $args->{add_attr};
142     }
143 dpavlin 80
144 dpavlin 122 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
145     $cond->set_max( $args->{max} ) if ($args->{max});
146     $cond->set_options( $args->{options} ) if ($args->{options});
147     $cond->set_order( $args->{order} ) if ($args->{order});
148 dpavlin 80
149 dpavlin 122 my $result = $self->{db}->search($cond, 0) ||
150 dpavlin 80 $log->die("can't search for ", sub { Dumper( $args ) });
151    
152     my $hits = $result->doc_num;
153 dpavlin 86 $log->debug("found $hits hits for '$q'");
154 dpavlin 80
155     my @results;
156    
157 dpavlin 81 for my $i ( 0 .. ( $hits - 1 ) ) {
158 dpavlin 80
159 dpavlin 86 #$log->debug("get_doc($i)");
160 dpavlin 80 my $doc = $result->get_doc( $i );
161     if (! $doc) {
162     $log->warn("can't find result $i");
163     next;
164     }
165    
166     my $hash;
167    
168 dpavlin 122 foreach my $attr (@{ $args->{get_attr} }) {
169 dpavlin 80 my $val = $doc->attr( $attr );
170 dpavlin 86 #$log->debug("attr $attr = ", $val || 'undef');
171 dpavlin 122 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
172 dpavlin 80 }
173    
174     if ($hash) {
175     push @results, $hash;
176     }
177    
178     }
179    
180 dpavlin 103 # $log->debug("results " . Dumper( \@results ));
181 dpavlin 80
182 dpavlin 87 $self->confess("expected to return array") unless (wantarray);
183 dpavlin 80
184     return @results;
185     }
186    
187     =head2 confess
188    
189     wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
190     C<< $log->error >> if they exists (like in L<Catalyst>), else plain
191     C<die>.
192    
193     =cut
194    
195     sub confess {
196     my $self = shift;
197     if (my $log = $self->{'log'}) {
198 dpavlin 87 if ($log->can('logconfess')) {
199     $log->logconfess(@_);
200 dpavlin 80 } elsif ($log->can('fatal')) {
201     $log->fatal(@_);
202 dpavlin 87 die @_;
203 dpavlin 80 } elsif ($log->can('error')) {
204     $log->error(@_);
205     } else {
206     die @_;
207     }
208     } else {
209     die @_;
210     }
211     }
212    
213     =head2 _get_logger
214    
215     For compatibility with same method from L<WebPAC::Common>, but without
216     need for it.
217    
218     =cut
219    
220     sub _get_logger {
221     my $self = shift;
222    
223     return $self->{'log'} || die "really need log!";
224     }
225    
226     =head1 AUTHOR
227    
228     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
229    
230     =head1 COPYRIGHT & LICENSE
231    
232     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
233    
234     This program is free software; you can redistribute it and/or modify it
235     under the same terms as Perl itself.
236    
237     =cut
238    
239     1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26