/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 81 - (hide annotations)
Mon Nov 21 14:42:22 2005 UTC (18 years, 6 months ago) by dpavlin
File size: 4322 byte(s)
 r9002@llin:  dpavlin | 2005-11-21 15:43:39 +0100
 Text::Iconv working, fixed tests, minor tweaks

1 dpavlin 80 package WebPAC::Search::Estraier;
2    
3     use warnings;
4     use strict;
5    
6     use HyperEstraier;
7     use Text::Iconv;
8     use Data::Dumper;
9    
10     =head1 NAME
11    
12     WebPAC::Search::Estraier - search Hyper Estraier full text index
13    
14     =head1 VERSION
15    
16     Version 0.01
17    
18     =cut
19    
20     our $VERSION = '0.01';
21    
22     =head1 SYNOPSIS
23    
24     Search WebPAC data using Hyper Estraier full text index created with
25     L<WebPAC::Output::Estraier>.
26    
27     =head1 FUNCTIONS
28    
29     =head2 new
30    
31     Connect to Hyper Estraier index using HTTP
32    
33     my $est = new WebPAC::Search::Estraier(
34     url => 'http://localhost:1978/node/webpac2',
35     user => 'admin',
36     passwd => 'admin',
37     encoding => 'iso-8859-2',
38 dpavlin 81 log => $Log::Log4perl->log_object,
39 dpavlin 80 );
40    
41     Options are:
42    
43     =over 4
44    
45     =item url
46    
47     URI to C<estmaster> node
48    
49     =item user
50    
51     C<estmaster> user with read rights
52    
53     =item passwd
54    
55     password for user
56    
57     =item encoding
58    
59     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
60     (and it probably is). This encoding will be converted to C<UTF-8> for
61     Hyper Estraier.
62    
63 dpavlin 81 =item log
64    
65     L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
66     L<Catalyst> and there is support for it).
67    
68 dpavlin 80 =back
69    
70     =cut
71    
72     sub new {
73     my $class = shift;
74     my $self = {@_};
75     bless($self, $class);
76    
77     my $log = $self->_get_logger;
78    
79     foreach my $p (qw/url user passwd/) {
80     $log->logdie("need $p") unless ($self->{$p});
81     }
82    
83     $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
84    
85     $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
86     $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
87    
88     my $encoding = $self->{'encoding'} || 'ISO-8859-2';
89     $log->info("using encoding $encoding");
90    
91 dpavlin 81 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
92     $log->die("can't create conversion from UTF-8 to $encoding");
93 dpavlin 80
94     $self ? return $self : return undef;
95     }
96    
97    
98     =head2 search
99    
100     Locate items in index
101    
102 dpavlin 81 my @results = $est->search(
103 dpavlin 80 query => 'name of book or novel',
104 dpavlin 81 attr => qw/PersonalName TitleProper/,
105 dpavlin 80 );
106    
107 dpavlin 81 Results are returned as hash array with keys named by attributes
108    
109 dpavlin 80 =cut
110    
111     sub search {
112     my $self = shift;
113    
114     my $args = {@_};
115    
116     my $log = $self->_get_logger;
117    
118     $log->logconfess('need db in object') unless ($self->{'db'});
119     $log->logconfess('need attr') unless ($args->{'attr'});
120 dpavlin 81
121 dpavlin 80 $log->logconfess("need attr as array not " . ref($args->{'attr'}) ) unless (ref($args->{'attr'}) eq 'ARRAY');
122    
123     my $cond = HyperEstraier::Condition->new();
124     # $cond->add_attr("filepath ISTRINC $q");
125    
126     $cond->set_phrase( $args->{'query'} ) if ($args->{'query'});
127     $cond->set_max( $args->{'max'} ) if ($args->{'max'});
128     # $cond->set_options( $HyperEstraier::Condition::SURE );
129     # $cond->set_order( 'NUMD' );
130    
131     my $result = $self->{'db'}->search($cond, 0) ||
132     $log->die("can't search for ", sub { Dumper( $args ) });
133    
134     my $hits = $result->doc_num;
135     $log->debug("found $hits hits");
136    
137     my @results;
138    
139 dpavlin 81 for my $i ( 0 .. ( $hits - 1 ) ) {
140 dpavlin 80
141     $log->debug("get_doc($i)");
142     my $doc = $result->get_doc( $i );
143     if (! $doc) {
144     $log->warn("can't find result $i");
145     next;
146     }
147    
148     my $hash;
149    
150 dpavlin 81 foreach my $attr (@{ $args->{'attr'} }) {
151 dpavlin 80 my $val = $doc->attr( $attr );
152     $log->debug("attr $attr = ", $val || 'undef');
153 dpavlin 81 $hash->{$attr} = $self->{'iconv'}->convert( $val ) if (defined($val));
154 dpavlin 80 }
155    
156     if ($hash) {
157     push @results, $hash;
158     }
159    
160     }
161    
162     $log->debug("results " . Dumper( \@results ));
163    
164     $log->logconfess("expected to return array") unless (wantarray);
165    
166     return @results;
167     }
168    
169     =head2 confess
170    
171     wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
172     C<< $log->error >> if they exists (like in L<Catalyst>), else plain
173     C<die>.
174    
175     =cut
176    
177     sub confess {
178     my $self = shift;
179     if (my $log = $self->{'log'}) {
180     if ($log->can('confess')) {
181     $log->confess(@_);
182     } elsif ($log->can('fatal')) {
183     $log->fatal(@_);
184     } elsif ($log->can('error')) {
185     $log->error(@_);
186     } else {
187     die @_;
188     }
189     } else {
190     die @_;
191     }
192     }
193    
194     =head2 _get_logger
195    
196     For compatibility with same method from L<WebPAC::Common>, but without
197     need for it.
198    
199     =cut
200    
201     sub _get_logger {
202     my $self = shift;
203    
204     return $self->{'log'} || die "really need log!";
205     }
206    
207     =head1 AUTHOR
208    
209     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
210    
211     =head1 COPYRIGHT & LICENSE
212    
213     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
214    
215     This program is free software; you can redistribute it and/or modify it
216     under the same terms as Perl itself.
217    
218     =cut
219    
220     1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26