/[webpac2]/trunk/lib/WebPAC/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 80 - (show annotations)
Mon Nov 21 14:42:16 2005 UTC (18 years, 5 months ago) by dpavlin
File size: 4302 byte(s)
 r9001@llin:  dpavlin | 2005-11-21 15:29:13 +0100
 added WebPAC::Search::Estraier which should be callable from outside
 WebPAC (e.g. Catalyst)

1 package WebPAC::Search::Estraier;
2
3 use warnings;
4 use strict;
5
6 use HyperEstraier;
7 use Text::Iconv;
8 use Data::Dumper;
9
10 =head1 NAME
11
12 WebPAC::Search::Estraier - search Hyper Estraier full text index
13
14 =head1 VERSION
15
16 Version 0.01
17
18 =cut
19
20 our $VERSION = '0.01';
21
22 =head1 SYNOPSIS
23
24 Search WebPAC data using Hyper Estraier full text index created with
25 L<WebPAC::Output::Estraier>.
26
27 =head1 FUNCTIONS
28
29 =head2 new
30
31 Connect to Hyper Estraier index using HTTP
32
33 my $est = new WebPAC::Search::Estraier(
34 url => 'http://localhost:1978/node/webpac2',
35 user => 'admin',
36 passwd => 'admin',
37 encoding => 'iso-8859-2',
38 log => $Log::Log4Perl->log_object,
39 );
40
41 Options are:
42
43 =over 4
44
45 =item url
46
47 URI to C<estmaster> node
48
49 =item user
50
51 C<estmaster> user with read rights
52
53 =item passwd
54
55 password for user
56
57 =item encoding
58
59 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
60 (and it probably is). This encoding will be converted to C<UTF-8> for
61 Hyper Estraier.
62
63 =back
64
65 =cut
66
67 sub new {
68 my $class = shift;
69 my $self = {@_};
70 bless($self, $class);
71
72 my $log = $self->_get_logger;
73
74 foreach my $p (qw/url user passwd/) {
75 $log->logdie("need $p") unless ($self->{$p});
76 }
77
78 $log->info("opening Hyper Estraier index $self->{'url'} as $self->{'user'}");
79
80 $self->{'db'} = HyperEstraier::Node->new($self->{'url'});
81 $self->{'db'}->set_auth($self->{'user'}, $self->{'passwd'});
82
83 my $encoding = $self->{'encoding'} || 'ISO-8859-2';
84 $log->info("using encoding $encoding");
85
86 $self->{'iconv'} = new Text::Iconv($encoding, 'UTF-8') or
87 $log->die("can't create conversion from $encoding to UTF-8");
88
89 $self ? return $self : return undef;
90 }
91
92
93 =head2 search
94
95 Locate items in index
96
97 $est->search(
98 query => 'name of book or novel',
99 attr => [ qw/PersonalName Title/ ],
100 );
101
102 =cut
103
104 sub search {
105 my $self = shift;
106
107 my $args = {@_};
108
109 my $log = $self->_get_logger;
110
111 $log->logconfess('need db in object') unless ($self->{'db'});
112 $log->logconfess('need attr') unless ($args->{'attr'});
113 $log->logconfess("need attr as array not " . ref($args->{'attr'}) ) unless (ref($args->{'attr'}) eq 'ARRAY');
114
115 # my $database = $self->{'database'} || $log->logconfess('no database in $self');
116 # foreach my $p (qw/id ds type/) {
117 # $log->logconfess("need $p") unless ($args->{$p});
118 # }
119
120 my $cond = HyperEstraier::Condition->new();
121 # $cond->add_attr("filepath ISTRINC $q");
122
123 $cond->set_phrase( $args->{'query'} ) if ($args->{'query'});
124 $cond->set_max( $args->{'max'} ) if ($args->{'max'});
125 # $cond->set_options( $HyperEstraier::Condition::SURE );
126 # $cond->set_order( 'NUMD' );
127
128 my $result = $self->{'db'}->search($cond, 0) ||
129 $log->die("can't search for ", sub { Dumper( $args ) });
130
131 my $hits = $result->doc_num;
132 $log->debug("found $hits hits");
133
134 my @attrs = $args->{'attr'} || $self->confess("need attr");
135
136 my @results;
137
138 for my $i ( 0 .. $hits ) {
139
140 $log->debug("get_doc($i)");
141 my $doc = $result->get_doc( $i );
142 if (! $doc) {
143 $log->warn("can't find result $i");
144 next;
145 }
146
147 my $hash;
148
149 foreach my $attr (@attrs) {
150 my $val = $doc->attr( $attr );
151 $log->debug("attr $attr = ", $val || 'undef');
152 $hash->{$attr} = $val if (defined($val));
153 }
154
155 if ($hash) {
156 push @results, $hash;
157 }
158
159 }
160
161 $log->debug("results " . Dumper( \@results ));
162
163 $log->logconfess("expected to return array") unless (wantarray);
164
165 return @results;
166 }
167
168 =head2 confess
169
170 wrapper around L<Log::Log4perl> C<confess> or C<< $log->fatal >> or
171 C<< $log->error >> if they exists (like in L<Catalyst>), else plain
172 C<die>.
173
174 =cut
175
176 sub confess {
177 my $self = shift;
178 if (my $log = $self->{'log'}) {
179 if ($log->can('confess')) {
180 $log->confess(@_);
181 } elsif ($log->can('fatal')) {
182 $log->fatal(@_);
183 } elsif ($log->can('error')) {
184 $log->error(@_);
185 } else {
186 die @_;
187 }
188 } else {
189 die @_;
190 }
191 }
192
193 =head2 _get_logger
194
195 For compatibility with same method from L<WebPAC::Common>, but without
196 need for it.
197
198 =cut
199
200 sub _get_logger {
201 my $self = shift;
202
203 return $self->{'log'} || die "really need log!";
204 }
205
206 =head1 AUTHOR
207
208 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
209
210 =head1 COPYRIGHT & LICENSE
211
212 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
213
214 This program is free software; you can redistribute it and/or modify it
215 under the same terms as Perl itself.
216
217 =cut
218
219 1; # End of WebPAC::Search::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26