/[webpac2]/trunk/lib/WebPAC/Output/EstraierNative.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WebPAC/Output/EstraierNative.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 673 - (show annotations)
Mon Sep 11 20:49:37 2006 UTC (17 years, 8 months ago) by dpavlin
File size: 6186 byte(s)
index to temporary path and rename at at end [0.02]

1 package WebPAC::Output::EstraierNative;
2
3 use warnings;
4 use strict;
5
6 use base qw/WebPAC::Common/;
7
8 use Estraier;
9 use Encode qw/from_to/;
10 use Data::Dumper;
11 use LWP;
12 use URI::Escape;
13 use List::Util qw/first/;
14 use File::Path;
15
16 $Estraier::DEBUG = 1;
17
18
19 =head1 NAME
20
21 WebPAC::Output::EstraierNative - Create Hyper Estraier full text index using native bindings
22
23 =head1 VERSION
24
25 Version 0.2
26
27 =cut
28
29 our $VERSION = '0.2';
30
31 =head1 SYNOPSIS
32
33 Create full text index using Hyper Estraier index from data with
34 type C<search>.
35
36 =head1 FUNCTIONS
37
38 =head2 new
39
40 Connect to Hyper Estraier index using HTTP
41
42 my $est = new WebPAC::Output::Estraier(
43 path => 'casket/',
44 database => 'demo',
45 label => 'node label',
46 encoding => 'iso-8859-2',
47 clean => 1,
48 );
49
50 Options are:
51
52 =over 4
53
54 =item path
55
56 full or relative path to Hyper Estraier database
57
58 =item database
59
60 name of database from which data comes
61
62 =item label
63
64 label for node (optional)
65
66 =item encoding
67
68 character encoding of C<data_structure> if it's differenet than C<ISO-8859-2>
69 (and it probably is). This encoding will be converted to C<UTF-8> for
70 Hyper Estraier.
71
72 =back
73
74 Name of database will be used to form URI of documents in index.
75
76 =cut
77
78 sub new {
79 my $class = shift;
80 my $self = {@_};
81 bless($self, $class);
82
83 my $log = $self->_get_logger;
84
85 #$log->debug("self: ", sub { Dumper($self) });
86
87 foreach my $p (qw/path database/) {
88 $log->logdie("need $p") unless ($self->{$p});
89 }
90
91 $self->{encoding} ||= 'ISO-8859-2';
92
93 $self->{label} ||= "WebPAC $self->{database}";
94
95 my $path = 'casket';
96 $path =~ s!/+$!!;
97
98 $self->{_casket_path} = $path;
99
100 $path .= '.tmp';
101 if (-e $path) {
102 rmtree($path) || $log->logdie("can't remove old temporary directory $path: $!");
103 }
104 mkpath($path) || $log->logdie("can't create new temporary directory $path: $!");
105
106 my $db = new Database();
107 unless($db->open($path, Database::DBWRITER | Database::DBCREAT)) {
108 $log->logdie("can't open $path: ", $db->err_msg($db->error()) );
109 }
110
111 $self->{db} = $db;
112
113 $log->info("using ", $self->{clean} ? "new " : "", "index $self->{path} '$self->{label}' with encoding $self->{encoding}");
114
115 $self ? return $self : return undef;
116 }
117
118
119 =head2 add
120
121 Adds one entry to database.
122
123 $est->add(
124 id => 42,
125 ds => $ds,
126 type => 'display',
127 text => 'optional text from which snippet is created',
128 );
129
130 This function will create entries in index using following URI format:
131
132 C<file:///type/database%20name/000>
133
134 Each tag in C<data_structure> with specified C<type> will create one
135 attribute and corresponding hidden text (used for search).
136
137 =cut
138
139 sub add {
140 my $self = shift;
141
142 my $args = {@_};
143
144 my $log = $self->_get_logger;
145
146 my $database = $self->{'database'} || $log->logconfess('no database in $self');
147 $log->logconfess('need db in object') unless ($self->{'db'});
148
149 foreach my $p (qw/id ds type/) {
150 $log->logdie("need $p") unless ($args->{$p});
151 }
152
153 my $type = $args->{'type'};
154 my $id = $args->{'id'};
155
156 my $uri = "file:///$type/$database/$id";
157 $log->debug("creating $uri");
158
159 my $doc = new Document();
160 $doc->add_attr('@uri', $self->convert($uri) );
161
162 # store type and database name
163 $doc->add_attr('_database', $database );
164 $doc->add_attr('_type', $type );
165
166 $log->debug("ds = ", sub { Dumper($args->{'ds'}) } );
167
168 # filter all tags which have type defined
169 my @tags = grep {
170 ref($args->{'ds'}->{$_}) eq 'HASH' && defined( $args->{'ds'}->{$_}->{$type} )
171 } keys %{ $args->{'ds'} };
172
173 $log->debug("tags = ", join(",", @tags));
174
175 return unless (@tags);
176
177 foreach my $tag (@tags) {
178
179 $log->debug("$tag :: $type == ",Dumper( $args->{'ds'}->{$tag}->{$type} ) );
180
181 my $vals = join(" ", @{ $args->{'ds'}->{$tag}->{$type} });
182
183 next if (! $vals);
184
185 $vals = join(" ") if (ref($vals) eq 'ARRAY');
186
187 $vals = $self->convert( $vals ) or
188 $log->logdie("can't convert '$vals' to UTF-8");
189
190 $doc->add_attr( $tag, $vals );
191 $doc->add_hidden_text( $vals );
192 }
193
194 my $text = $args->{'text'};
195 if ( $text ) {
196 $text = $self->convert( $text ) or
197 $log->logdie("can't convert '$text' to UTF-8");
198 $doc->add_text( $text );
199 }
200
201 $log->debug("adding ", sub { $doc->dump_draft } );
202 $self->{'db'}->put_doc($doc, Database::PDCLEAN) || $log->warn("can't add document $uri with draft " . $doc->dump_draft . " to node " . $self->{path} . " status: " . $self->{db}->status());
203
204 return 1;
205 }
206
207 =head2 add_link
208
209 $est->add_link(
210 from => 'ps',
211 to => 'webpac2',
212 credit => 10000,
213 );
214
215 =cut
216
217 sub add_link {
218 my $self = shift;
219
220 my $args = {@_};
221 my $log = $self->_get_logger;
222
223 $log->warn("add_link is not implemented");
224 return;
225
226 foreach my $p (qw/from to credit/) {
227 $log->logdie("need $p") unless ($args->{$p});
228 }
229
230 my $node = first { $_->{name} eq $args->{to} } $self->master( action => 'nodelist' );
231
232 if (! $node) {
233 $log->warn("can't find node $args->{to}, skipping link creaton");
234 return;
235 }
236
237 my $label = $node->{label};
238
239 if (! $label) {
240 $log->warn("can't find label for $args->{to}, skipping link creaton");
241 return;
242 }
243
244 $log->debug("using label $label for $args->{to}");
245
246 return $self->{db}->set_link(
247 $self->{masterurl} . '/node/' . $args->{to},
248 $label,
249 $args->{credit},
250 );
251 }
252
253
254 =head2 finish
255
256 Close index and rename of to final path
257
258 $est->finish;
259
260 =cut
261
262 sub finish {
263 my $self = shift;
264
265 my $log = $self->_get_logger;
266 $log->info("closing Hyper Estraier index make it current...");
267
268 $self->{db}->close || $log->logdie("can't close index");
269
270 my $path = $self->{_casket_path} || $log->logdie("no _casket_path?");
271
272 if (-e $path) {
273 $log->warn("removing old $path");
274 rmtree($path) || $log->logdie("can't remove old temporary directory $path: $!");
275 }
276
277 rename $path . '.tmp', $path || $log->logdie("can't rename ${path}.tmp -> $path: $!");
278
279 }
280
281
282 =head2 convert
283
284 my $utf8_string = $self->convert('string in codepage');
285
286 =cut
287
288 sub convert {
289 my $self = shift;
290
291 my $text = shift || return;
292 from_to($text, $self->{encoding}, 'UTF-8');
293 return $text;
294 }
295
296 =head1 AUTHOR
297
298 Dobrica Pavlinusic, C<< <dpavlin@rot13.org> >>
299
300 =head1 COPYRIGHT & LICENSE
301
302 Copyright 2005 Dobrica Pavlinusic, All Rights Reserved.
303
304 This program is free software; you can redistribute it and/or modify it
305 under the same terms as Perl itself.
306
307 =cut
308
309 1; # End of WebPAC::Output::Estraier

  ViewVC Help
Powered by ViewVC 1.1.26