/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 86 - (hide annotations)
Tue Nov 22 08:37:40 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 4362 byte(s)
 r9010@llin:  dpavlin | 2005-11-21 20:48:59 +0100
 make it less chatty

1 dpavlin 80 package WebPAC::Search::Estraier;
2    
3     use warnings;
4     use strict;
5    
6     use HyperEstraier;
7     use Text::Iconv;
8     use Data::Dumper;
9    
10     =head1 NAME
11    
12     WebPAC::Search::Estraier - search Hyper Estraier full text index
13    
14     =head1 VERSION
15    
16     Version 0.01
17    
18     =cut
19    
20     our $VERSION = '0.01';
21    
22     =head1 SYNOPSIS
23    
24     Search WebPAC data using Hyper Estraier full text index created with
25     L<WebPAC::Output::Estraier>.
26    
27     =head1 FUNCTIONS
28    
29     =head2 new
30    
31     Connect to Hyper Estraier index using HTTP
32    
33     my $est = new WebPAC::Search::Estraier(
34     url => 'http://localhost:1978/node/webpac2',
35     user => 'admin',
36     passwd => 'admin',
37     encoding => 'iso-8859-2',
38 dpavlin 81 log => $Log::Log4perl->log_object,
39 dpavlin 80 );
40    
41     Options are:
42    
43     =over 4
44    
45     =item url
46    
47     URI to C<estmaster> node
48    
49     =item user
50    
51     C<estmaster> user with read rights
52    
53     =item passwd
54    
55     password for user
56    
57     =item encoding
58    
59     character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
60     (and it probably is). This encoding will be converted to C<UTF-8> for
61     Hyper Estraier.
62    
63 dpavlin 81 =item log
64    
65     L<Log::Log4perl> object or equivalent (C<< $c->log >> can be used in
66     L<Catalyst> and there is support for it).
67    
68 dpavlin 80 =back
69    
70     =cut
71    
72     sub new {
73     my $class = shift;
74     my $self = {@_};
75     bless($self, $class);
76    
77     my $log = $self->_get_logger;
78    
79     foreach my $p (qw/url user passwd/) {
80     $log->logdie("need $p") unless ($self->{$p});
81     }
82    
83     $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
84    
85     $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
86     $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
87    
88     my $encoding = $self->{'encoding'} || 'ISO-8859-2';
89     $log->info("using encoding $encoding");
90    
91 dpavlin 81 $self->{'iconv'} = new Text::Iconv('UTF-8', $encoding) or
92     $log->die("can't create conversion from UTF-8 to $encoding");
93 dpavlin 80
94     $self ? return $self : return undef;
95     }
96    
97    
98     =head2 search
99    
100     Locate items in index
101    
102 dpavlin 81 my @results = $est->search(
103 dpavlin 80 query => 'name of book or novel',
104 dpavlin 81 attr => qw/PersonalName TitleProper/,
105 dpavlin 80 );
106    
107 dpavlin 81 Results are returned as hash array with keys named by attributes
108    
109 dpavlin 80 =cut
110    
111     sub search {
112     my $self = shift;
113    
114     my $args = {@_};
115    
116     my $log = $self->_get_logger;
117    
118     $log->logconfess('need db in object') unless ($self->{'db'});
119     $log->logconfess('need attr') unless ($args->{'attr'});
120 dpavlin 81
121 dpavlin 80 $log->logconfess("need attr as array not " . ref($args->{'attr'}) ) unless (ref($args->{'attr'}) eq 'ARRAY');
122    
123 dpavlin 86 my $q = $args->{'query'};
124    
125 dpavlin 80 my $cond = HyperEstraier::Condition->new();
126     # $cond->add_attr("filepath ISTRINC $q");
127    
128 dpavlin 86 $cond->set_phrase( $self->{'iconv'}->convert( $q ) ) if ($q);
129 dpavlin 80 $cond->set_max( $args->{'max'} ) if ($args->{'max'});
130     # $cond->set_options( $HyperEstraier::Condition::SURE );
131     # $cond->set_order( 'NUMD' );
132    
133     my $result = $self->{'db'}->search($cond, 0) ||
134     $log->die("can't search for ", sub { Dumper( $args ) });
135    
136     my $hits = $result->doc_num;
137 dpavlin 86 $log->debug("found $hits hits for '$q'");
138 dpavlin 80
139     my @results;
140    
141 dpavlin 81 for my $i ( 0 .. ( $hits - 1 ) ) {
142 dpavlin 80
143 dpavlin 86 #$log->debug("get_doc($i)");
144 dpavlin 80 my $doc = $result->get_doc( $i );
145     if (! $doc) {
146     $log->warn("can't find result $i");
147     next;
148     }
149    
150     my $hash;
151    
152 dpavlin 81 foreach my $attr (@{ $args->{'attr'} }) {
153 dpavlin 80 my $val = $doc->attr( $attr );
154 dpavlin 86 #$log->debug("attr $attr = ", $val || 'undef');
155 dpavlin 81 $hash->{$attr} = $self->{'iconv'}->convert( $val ) if (defined($val));
156 dpavlin 80 }
157    
158     if ($hash) {
159     push @results, $hash;
160     }
161    
162     }
163    
164     $log->debug("results " . Dumper( \@results ));
165    
166     $log->logconfess("expected to return array") unless (wantarray);
167    
168     return @results;
169     }
170    
171     =head2 confess
172    
173     wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
174     C<< $log->error >> if they exists (like in L<Catalyst>), else plain
175     C<die>.
176    
177     =cut
178    
179     sub confess {
180     my $self = shift;
181     if (my $log = $self->{'log'}) {
182     if ($log->can('confess')) {
183     $log->confess(@_);
184     } elsif ($log->can('fatal')) {
185     $log->fatal(@_);
186     } elsif ($log->can('error')) {
187     $log->error(@_);
188     } else {
189     die @_;
190     }
191     } else {
192     die @_;
193     }
194     }
195    
196     =head2 _get_logger
197    
198     For compatibility with same method from L<WebPAC::Common>, but without
199     need for it.
200    
201     =cut
202    
203     sub _get_logger {
204     my $self = shift;
205    
206     return $self->{'log'} || die "really need log!";
207     }
208    
209     =head1 AUTHOR
210    
211     Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
212    
213     =head1 COPYRIGHT & LICENSE
214    
215     Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
216    
217     This program is free software; you can redistribute it and/or modify it
218     under the same terms as Perl itself.
219    
220     =cut
221    
222     1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26