/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 152 - (hide annotations)
Sat Nov 26 01:38:28 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 4988 byte(s)
 r11141@llin:  dpavlin | 2005-11-26 02:40:29 +0100
 one more debug message (but commented)

1 dpavlin 80 package WebPAC::Search::Estraier;
2    
3     use warnings;
4     use strict;
5    
6     use HyperEstraier;
7     use Text::Iconv;
8     use Data::Dumper;
9    
10     =head1 NAME
11    
12     WebPAC::Search::Estraier - search Hyper Estraier full text index
13    
14     =head1 VERSION
15    
16 dpavlin 122 Version 0.02
17 dpavlin 80
18     =cut
19    
20 dpavlin 122 our $VERSION = '0.02';
21 dpavlin 80
22     =head1 SYNOPSIS
23    
24     Search WebPAC data using Hyper Estraier full text index created with
25     L<WebPAC::Output::Estraier>.
26    
27     =head1 FUNCTIONS
28    
29     =head2 new
30    
31     Connect to Hyper Estraier index using HTTP
32    
33     my $est = new WebPAC::Search::Estraier(
34     url => 'http://localhost:1978/node/webpac2',
35     user => 'admin',
36     passwd => 'admin',
37     encoding => 'iso-8859-2',
38 dpavlin 81 log => $Log::Log4perl->log_object,
39 dpavlin 80 );
40    
41     Options are:
42    
43     =over 4
44    
45     =item url
46    
47     URI to C<estmaster> node
48    
49     =item user
50    
51     C<estmaster> user with read rights
52    
53     =item passwd
54    
55     password for user
56    
57     =item encoding
58    
59     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
60     (and it probably is). This encoding will be converted to C<UTF-8> for
61     Hyper Estraier.
62    
63 dpavlin 81 =item log
64    
65     L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
66     L<Catalyst> and there is support for it).
67    
68 dpavlin 80 =back
69    
70     =cut
71    
72     sub new {
73     my $class = shift;
74     my $self = {@_};
75     bless($self, $class);
76    
77     my $log = $self->_get_logger;
78    
79     foreach my $p (qw/url user passwd/) {
80     $log->logdie("need $p") unless ($self->{$p});
81     }
82    
83     $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
84    
85     $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
86     $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
87    
88     my $encoding = $self->{'encoding'} || 'ISO-8859-2';
89     $log->info("using encoding $encoding");
90    
91 dpavlin 81 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
92     $log->die("can't create conversion from UTF-8 to $encoding");
93 dpavlin 80
94     $self ? return $self : return undef;
95     }
96    
97    
98     =head2 search
99    
100     Locate items in index
101    
102 dpavlin 81 my @results = $est->search(
103 dpavlin 122 phrase => 'name of book or novel',
104     add_attr => [
105     "filepath ISTRINC $q",
106     "size NUMGT 100",
107     ],
108     get_attr => qw/PersonalName TitleProper/,
109     order => 'NUMD',
110     max => 100,
111     options => $HyperEstraier::Condition::SURE,
112 dpavlin 80 );
113    
114 dpavlin 122 Options are close match to Hyper Estraier API, except C<get_attr> which defines
115     attributes which will be returned in hash for each record.
116    
117 dpavlin 81 Results are returned as hash array with keys named by attributes
118    
119 dpavlin 80 =cut
120    
121     sub search {
122     my $self = shift;
123    
124     my $args = {@_};
125    
126     my $log = $self->_get_logger;
127    
128 dpavlin 152 #$log->debug( 'search args: ' . Dumper($args) );
129    
130 dpavlin 122 $self->confess('need db in object') unless ($self->{db});
131     $self->confess('need get_attr') unless ($args->{get_attr});
132 dpavlin 81
133 dpavlin 122 $self->confess("need get_attr as array not " . ref($args->{get_attr}) ) unless (ref($args->{get_attr}) eq 'ARRAY');
134 dpavlin 80
135 dpavlin 122 my $q = $args->{phrase};
136 dpavlin 86
137 dpavlin 123 $log->debug("args: " . Dumper( $args ));
138    
139 dpavlin 80 my $cond = HyperEstraier::Condition->new();
140 dpavlin 122 if ( ref($args->{add_attr}) eq 'ARRAY' ) {
141     $log->debug("adding search attributes: " . join(", ", @{ $args->{add_attr} }) );
142     map {
143 dpavlin 123 $cond->add_attr( $self->{iconv}->convert( $_ ) );
144 dpavlin 122 $log->debug(" + $_");
145 dpavlin 123 } @{ $args->{add_attr} };
146     };
147 dpavlin 80
148 dpavlin 122 $cond->set_phrase( $self->{iconv}->convert($q) ) if ($q);
149     $cond->set_max( $args->{max} ) if ($args->{max});
150     $cond->set_options( $args->{options} ) if ($args->{options});
151     $cond->set_order( $args->{order} ) if ($args->{order});
152 dpavlin 80
153 dpavlin 122 my $result = $self->{db}->search($cond, 0) ||
154 dpavlin 80 $log->die("can't search for ", sub { Dumper( $args ) });
155    
156     my $hits = $result->doc_num;
157 dpavlin 86 $log->debug("found $hits hits for '$q'");
158 dpavlin 80
159     my @results;
160    
161 dpavlin 81 for my $i ( 0 .. ( $hits - 1 ) ) {
162 dpavlin 80
163 dpavlin 86 #$log->debug("get_doc($i)");
164 dpavlin 80 my $doc = $result->get_doc( $i );
165     if (! $doc) {
166     $log->warn("can't find result $i");
167     next;
168     }
169    
170     my $hash;
171    
172 dpavlin 122 foreach my $attr (@{ $args->{get_attr} }) {
173 dpavlin 80 my $val = $doc->attr( $attr );
174 dpavlin 86 #$log->debug("attr $attr = ", $val || 'undef');
175 dpavlin 122 $hash->{$attr} = $self->{iconv}->convert( $val ) if (defined($val));
176 dpavlin 80 }
177    
178     if ($hash) {
179     push @results, $hash;
180     }
181    
182     }
183    
184 dpavlin 103 # $log->debug("results " . Dumper( \@results ));
185 dpavlin 80
186 dpavlin 87 $self->confess("expected to return array") unless (wantarray);
187 dpavlin 80
188     return @results;
189     }
190    
191     =head2 confess
192    
193     wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
194     C<< $log->error >> if they exists (like in L<Catalyst>), else plain
195     C<die>.
196    
197     =cut
198    
199     sub confess {
200     my $self = shift;
201     if (my $log = $self->{'log'}) {
202 dpavlin 87 if ($log->can('logconfess')) {
203     $log->logconfess(@_);
204 dpavlin 80 } elsif ($log->can('fatal')) {
205     $log->fatal(@_);
206 dpavlin 87 die @_;
207 dpavlin 80 } elsif ($log->can('error')) {
208     $log->error(@_);
209     } else {
210     die @_;
211     }
212     } else {
213     die @_;
214     }
215     }
216    
217     =head2 _get_logger
218    
219     For compatibility with same method from L<WebPAC::Common>, but without
220     need for it.
221    
222     =cut
223    
224     sub _get_logger {
225     my $self = shift;
226    
227     return $self->{'log'} || die "really need log!";
228     }
229    
230     =head1 AUTHOR
231    
232     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
233    
234     =head1 COPYRIGHT & LICENSE
235    
236     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
237    
238     This program is free software; you can redistribute it and/or modify it
239     under the same terms as Perl itself.
240    
241     =cut
242    
243     1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26