/[webpac2]/trunk/lib/WebPAC/Output/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Output/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 458 - (show annotations)
Wed May 10 14:08:15 2006 UTC (17 years, 11 months ago) by dpavlin
File size: 5623 byte(s)
 r575@llin:  dpavlin | 2006-05-10 16:10:56 +0200
 use Search::Estraier 0.06 new master API to create nodes, so code here is siplified

1 package WebPAC::Output::Estraier;
2
3 use warnings;
4 use strict;
5
6 use base qw/WebPAC::Common/;
7
8 use Search::Estraier 0.06;
9 use Encode qw/from_to/;
10 use Data::Dumper;
11 use LWP;
12 use URI::Escape;
13
14 =head1 NAME
15
16 WebPAC::Output::Estraier - Create Hyper Estraier full text index
17
18 =head1 VERSION
19
20 Version 0.11
21
22 =cut
23
24 our $VERSION = '0.11';
25
26 =head1 SYNOPSIS
27
28 Create full text index using Hyper Estraier index from data with
29 type C<search>.
30
31 =head1 FUNCTIONS
32
33 =head2 new
34
35 Connect to Hyper Estraier index using HTTP
36
37 my $est = new WebPAC::Output::Estraier(
38 masterurl => 'http://localhost:1978/',
39 user => 'admin',
40 passwd => 'admin',
41 database => 'demo',
42 label => 'node label',
43 encoding => 'iso-8859-2',
44 clean => 1,
45 );
46
47 Options are:
48
49 =over 4
50
51 =item masterurl
52
53 URI to C<estmaster> node
54
55 =item user
56
57 C<estmaster> user with sufficient rights
58
59 =item passwd
60
61 password for user
62
63 =item database
64
65 name of database from which data comes
66
67 =item label
68
69 label for node (optional)
70
71 =item encoding
72
73 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
74 (and it probably is). This encoding will be converted to C<UTF-8> for
75 Hyper Estraier.
76
77 =back
78
79 Name of database will be used to form URI of documents in index.
80
81 =cut
82
83 sub new {
84 my $class = shift;
85 my $self = {@_};
86 bless($self, $class);
87
88 my $log = $self->_get_logger;
89
90 #$log->debug("self: ", sub { Dumper($self) });
91
92 foreach my $p (qw/masterurl user passwd database/) {
93 $log->logdie("need $p") unless ($self->{$p});
94 }
95
96 $self->{encoding} ||= 'ISO-8859-2';
97
98 my $url = $self->{masterurl} . '/node/' . $self->{database};
99 $self->{url} = $url;
100
101 $self->{db} = Search::Estraier::Node->new(
102 url => $url,
103 user => $self->{user},
104 passwd => $self->{passwd},
105 debug => $self->{debug},
106 create => 1,
107 label => "WebPAC $self->{database}",
108 );
109
110 $log->info("using index $self->{url} with encoding $self->{encoding}");
111
112 if ($self->{clean}) {
113 $log->debug("clean $self->{database}");
114 $self->master( action => 'nodeclr', name => $self->{database} );
115 } else {
116 $log->debug("opening index $self->{url}");
117 }
118
119 $self ? return $self : return undef;
120 }
121
122
123 =head2 add
124
125 Adds one entry to database.
126
127 $est->add(
128 id => 42,
129 ds => $ds,
130 type => 'display',
131 text => 'optional text from which snippet is created',
132 );
133
134 This function will create entries in index using following URI format:
135
136 C<file:///type/database%20name/000>
137
138 Each tag in C<data_structure> with specified C<type> will create one
139 attribute and corresponding hidden text (used for search).
140
141 =cut
142
143 sub add {
144 my $self = shift;
145
146 my $args = {@_};
147
148 my $log = $self->_get_logger;
149
150 my $database = $self->{'database'} || $log->logconfess('no database in $self');
151 $log->logconfess('need db in object') unless ($self->{'db'});
152
153 foreach my $p (qw/id ds type/) {
154 $log->logdie("need $p") unless ($args->{$p});
155 }
156
157 my $type = $args->{'type'};
158 my $id = $args->{'id'};
159
160 my $uri = "file:///$type/$database/$id";
161 $log->debug("creating $uri");
162
163 my $doc = Search::Estraier::Document->new;
164 $doc->add_attr('@uri', $self->convert($uri) );
165
166 $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
167
168 # filter all tags which have type defined
169 my @tags = grep {
170 ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
171 } keys %{ $args->{'ds'} };
172
173 $log->debug("tags = ", join(",", @tags));
174
175 return unless (@tags);
176
177 foreach my $tag (@tags) {
178
179 my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
180
181 next if (! $vals);
182
183 $vals = $self->convert( $vals ) or
184 $log->logdie("can't convert '$vals' to UTF-8");
185
186 $doc->add_attr( $tag, $vals );
187 $doc->add_hidden_text( $vals );
188 }
189
190 my $text = $args->{'text'};
191 if ( $text ) {
192 $text = $self->convert( $text ) or
193 $log->logdie("can't convert '$text' to UTF-8");
194 $doc->add_text( $text );
195 }
196
197 $log->debug("adding ", sub { $doc->dump_draft } );
198 $self->{'db'}->put_doc($doc) || $log->warn("can't add document $uri with draft " . $doc->dump_draft . " to node " . $self->{url} . " status: " . $self->{db}->status());
199
200 return 1;
201 }
202
203 =head2 add_link
204
205 $est->add_link(
206 from => 'ps',
207 to => 'webpac2',
208 credit => 10000,
209 );
210
211 =cut
212
213 sub add_link {
214 my $self = shift;
215
216 my $args = {@_};
217 my $log = $self->_get_logger;
218
219 my @labels = $self->master( action => 'nodelist' );
220
221 $log->debug("got labels: ", join("|", @labels));
222
223 @labels = grep(/^$args->{to}\t/, @labels);
224 my $label = shift @labels;
225 (undef,$label) = split(/\t/, $label) if ($label);
226
227 if (! $label) {
228 $log->warn("can't find label for $args->{to}, skipping link creaton");
229 return;
230 }
231
232 $log->debug("using label $label for $args->{to}");
233
234 return $self->estcall(
235 validate => 'node',
236 action => '_set_link',
237 rest_url => $self->{masterurl} . '/node/' . $args->{from} . '/_set_link' ,
238 url => $self->{masterurl} . '/node/' . $args->{to},
239 label => $label,
240 credit => $args->{credit},
241 );
242 }
243
244
245 =head2 master
246
247 Issue administrative commands to C<estmaster> process. See documentation for
248 C<master> in L<Search::Estraier>::Node.
249
250 $self->master(
251 action => 'nodeclr',
252 name => 'foobar',
253 );
254
255 =cut
256
257 sub master {
258 my $self = shift;
259 $self->{db}->master( @_ );
260 }
261
262
263 =head2 convert
264
265 my $utf8_string = $self->convert('string in codepage');
266
267 =cut
268
269 sub convert {
270 my $self = shift;
271
272 my $text = shift || return;
273 from_to($text, $self->{encoding}, 'UTF-8');
274 return $text;
275 }
276
277 =head1 AUTHOR
278
279 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
280
281 =head1 COPYRIGHT & LICENSE
282
283 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
284
285 This program is free software; you can redistribute it and/or modify it
286 under the same terms as Perl itself.
287
288 =cut
289
290 1; # End of WebPAC::Output::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26