/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 18 - (show annotations)
Wed Jan 4 22:48:29 2006 UTC (18 years, 3 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 8068 byte(s)
phrase
1 package Search::Estraier;
2
3 use 5.008;
4 use strict;
5 use warnings;
6
7 our $VERSION = '0.00';
8
9 =head1 NAME
10
11 Search::Estraier - pure perl module to use Hyper Estraier search engine
12
13 =head1 SYNOPSIS
14
15 use Search::Estraier;
16 my $est = new Search::Estraier();
17
18 =head1 DESCRIPTION
19
20 This module is implementation of node API of Hyper Estraier. Since it's
21 perl-only module with dependencies only on standard perl modules, it will
22 run on all platforms on which perl runs. It doesn't require compilation
23 or Hyper Estraier development files on target machine.
24
25 It is implemented as multiple packages which closly resamble Ruby
26 implementation. It also includes methods to manage nodes.
27
28 =cut
29
30 =head2 _s
31
32 Remove multiple whitespaces from string, as well as whitespaces at beginning or end
33
34 my $text = $self->_s(" this is a text ");
35 $text = 'this is a text';
36
37 =cut
38
39 sub _s {
40 my $text = $_[1] || return;
41 $text =~ s/\s\s+/ /gs;
42 $text =~ s/^\s+//;
43 $text =~ s/\s+$//;
44 return $text;
45 }
46
47 package Search::Estraier::Document;
48
49 use Carp qw/croak confess/;
50
51 use Search::Estraier;
52 our @ISA = qw/Search::Estraier/;
53
54 =head1 Search::Estraier::Document
55
56 This class implements Document which is collection of attributes
57 (key=value), vectors (also key value) display text and hidden text.
58
59 =head2 new
60
61 Create new document, empty or from draft.
62
63 my $doc = new Search::HyperEstraier::Document;
64 my $doc2 = new Search::HyperEstraier::Document( $draft );
65
66 =cut
67
68 sub new {
69 my $class = shift;
70 my $self = {};
71 bless($self, $class);
72
73 $self->{id} = -1;
74
75 my $draft = shift;
76
77 if ($draft) {
78 my $in_text = 0;
79 foreach my $line (split(/\n/, $draft)) {
80
81 if ($in_text) {
82 if ($line =~ /^\t/) {
83 push @{ $self->{htexts} }, substr($line, 1);
84 } else {
85 push @{ $self->{dtexts} }, $line;
86 }
87 next;
88 }
89
90 if ($line =~ m/^%VECTOR\t(.+)$/) {
91 my @fields = split(/\t/, $1);
92 for my $i ( 0 .. ($#fields - 1) ) {
93 $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
94 $i++;
95 }
96 next;
97 } elsif ($line =~ m/^%/) {
98 # What is this? comment?
99 #warn "$line\n";
100 next;
101 } elsif ($line =~ m/^$/) {
102 $in_text = 1;
103 next;
104 } elsif ($line =~ m/^(.+)=(.+)$/) {
105 $self->{attrs}->{ $1 } = $2;
106 next;
107 }
108
109 warn "draft ignored: $line\n";
110 }
111 }
112
113 $self ? return $self : return undef;
114 }
115
116
117 =head2 add_attr
118
119 Add an attribute.
120
121 $doc->add_attr( name => 'value' );
122
123 Delete attribute using
124
125 $doc->add_attr( name => undef );
126
127 =cut
128
129 sub add_attr {
130 my $self = shift;
131 my $attrs = {@_};
132
133 while (my ($name, $value) = each %{ $attrs }) {
134 if (! defined($value)) {
135 delete( $self->{attrs}->{ $self->_s($name) } );
136 } else {
137 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
138 }
139 }
140
141 return 1;
142 }
143
144
145 =head2 add_text
146
147 Add a sentence of text.
148
149 $doc->add_text('this is example text to display');
150
151 =cut
152
153 sub add_text {
154 my $self = shift;
155 my $text = shift;
156 return unless defined($text);
157
158 push @{ $self->{dtexts} }, $self->_s($text);
159 }
160
161
162 =head2 add_hidden_text
163
164 Add a hidden sentence.
165
166 $doc->add_hidden_text('this is example text just for search');
167
168 =cut
169
170 sub add_hidden_text {
171 my $self = shift;
172 my $text = shift;
173 return unless defined($text);
174
175 push @{ $self->{htexts} }, $self->_s($text);
176 }
177
178 =head2 id
179
180 Get the ID number of document. If the object has never been registred, C<-1> is returned.
181
182 print $doc->id;
183
184 =cut
185
186 sub id {
187 my $self = shift;
188 return $self->{id};
189 }
190
191 =head2 attr_names
192
193 Returns array with attribute names from document object.
194
195 my @attrs = $doc->attr_names;
196
197 =cut
198
199 sub attr_names {
200 my $self = shift;
201 croak "attr_names return array, not scalar" if (! wantarray);
202 return sort keys %{ $self->{attrs} };
203 }
204
205
206 =head2 attr
207
208 Returns value of an attribute.
209
210 my $value = $doc->attr( 'attribute' );
211
212 =cut
213
214 sub attr {
215 my $self = shift;
216 my $name = shift;
217
218 return $self->{'attrs'}->{ $name };
219 }
220
221
222 =head2 texts
223
224 Returns array with text sentences.
225
226 my @texts = $doc->texts;
227
228 =cut
229
230 sub texts {
231 my $self = shift;
232 confess "texts return array, not scalar" if (! wantarray);
233 return @{ $self->{dtexts} };
234 }
235
236 =head2 cat_texts
237
238 Return whole text as single scalar.
239
240 my $text = $doc->cat_texts;
241
242 =cut
243
244 sub cat_texts {
245 my $self = shift;
246 return join(' ',@{ $self->{dtexts} });
247 }
248
249 =head2 dump_draft
250
251 Dump draft data from document object.
252
253 print $doc->dump_draft;
254
255 =cut
256
257 sub dump_draft {
258 my $self = shift;
259 my $draft;
260
261 foreach my $attr_name (sort keys %{ $self->{attrs} }) {
262 $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";
263 }
264
265 if ($self->{kwords}) {
266 $draft .= '%%VECTOR';
267 while (my ($key, $value) = each %{ $self->{kwords} }) {
268 $draft .= "\t$key\t$value";
269 }
270 $draft .= "\n";
271 }
272
273 $draft .= "\n";
274
275 $draft .= join("\n", @{ $self->{dtexts} }) . "\n";
276 $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n";
277
278 return $draft;
279 }
280
281 =head2 delete
282
283 Empty document object
284
285 $doc->delete;
286
287 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
288 convinience. Document objects which go out of scope will be destroyed
289 automatically.
290
291 =cut
292
293 sub delete {
294 my $self = shift;
295
296 foreach my $data (qw/attrs dtexts stexts kwords/) {
297 delete($self->{$data});
298 }
299
300 $self->{id} = -1;
301
302 return 1;
303 }
304
305
306
307 package Search::Estraier::Condition;
308
309 use Carp qw/confess croak/;
310
311 use Search::Estraier;
312 our @ISA = qw/Search::Estraier/;
313
314 =head1 Search::Estraier::Condition
315
316 =head2 new
317
318 my $cond = new Search::HyperEstraier::Condition;
319
320 =cut
321
322 sub new {
323 my $class = shift;
324 my $self = {};
325 bless($self, $class);
326
327 $self ? return $self : return undef;
328 }
329
330 =head2 set_phrase
331
332 $cond->set_phrase('search phrase');
333
334 =cut
335
336 sub set_phrase {
337 my $self = shift;
338 $self->{phrase} = $self->_s( shift );
339 }
340
341 =head2 add_attr
342
343 $cond->add_attr('@URI STRINC /~dpavlin/');
344
345 =cut
346
347 sub add_attr {
348 my $self = shift;
349 my $attr = shift || return;
350 push @{ $self->{attrs} }, $self->_s( $attr );
351 }
352
353 =head2 set_order
354
355 $cond->set_order('@mdate NUMD');
356
357 =cut
358
359 sub set_order {
360 my $self = shift;
361 $self->{order} = shift;
362 }
363
364 =head2 set_max
365
366 $cond->set_max(42);
367
368 =cut
369
370 sub set_max {
371 my $self = shift;
372 my $max = shift;
373 croak "set_max needs number" unless ($max =~ m/^\d+$/);
374 $self->{max} = $max;
375 }
376
377 =head2 set_options
378
379 $cond->set_options( SURE => 1 );
380
381 =cut
382
383 my $options = {
384 # check N-gram keys skipping by three
385 SURE => 1 << 0,
386 # check N-gram keys skipping by two
387 USUAL => 1 << 1,
388 # without TF-IDF tuning
389 FAST => 1 << 2,
390 # with the simplified phrase
391 AGITO => 1 << 3,
392 # check every N-gram key
393 NOIDF => 1 << 4,
394 # check N-gram keys skipping by one
395 SIMPLE => 1 << 10,
396 };
397
398 sub set_options {
399 my $self = shift;
400 my $option = shift;
401 confess "unknown option" unless ($options->{$option});
402 $self->{options} ||= $options->{$option};
403 }
404
405 =head2 phrase
406
407 Return search phrase.
408
409 print $cond->phrase;
410
411 =cut
412
413 sub phrase {
414 my $self = shift;
415 return $self->{phrase};
416 }
417
418
419 package Search::Estraier::Master;
420
421 use Carp;
422
423 =head1 Search::Estraier::Master
424
425 Controll node master. This requires user with administration priviledges.
426
427 =cut
428
429 {
430 package RequestAgent;
431 our @ISA = qw(LWP::UserAgent);
432
433 sub new {
434 my $self = LWP::UserAgent::new(@_);
435 $self->agent("Search-Estraier/$Search::Estraer::VERSION");
436 $self;
437 }
438
439 sub get_basic_credentials {
440 my($self, $realm, $uri) = @_;
441 # return ($user, $password);
442 }
443 }
444
445
446
447 =head2 new
448
449 Create new connection to node master.
450
451 my $master = new Search::Estraier::Master(
452 url => 'http://localhost:1978',
453 user => 'admin',
454 passwd => 'admin',
455 );
456
457 =cut
458
459 sub new {
460 my $class = shift;
461 my $self = {@_};
462 bless($self, $class);
463
464 foreach my $p (qw/url user passwd/) {
465 croak "need $p" unless ($self->{$p});
466 }
467
468 $self ? return $self : return undef;
469 }
470
471
472
473 ###
474
475 =head1 EXPORT
476
477 Nothing.
478
479 =head1 SEE ALSO
480
481 L<http://hyperestraier.sourceforge.net/>
482
483 Hyper Estraier Ruby interface on which this module is based.
484
485 =head1 AUTHOR
486
487 Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
488
489
490 =head1 COPYRIGHT AND LICENSE
491
492 Copyright (C) 2005-2006 by Dobrica Pavlinusic
493
494 This library is free software; you can redistribute it and/or modify
495 it under the GPL v2 or later.
496
497 =cut
498
499 1;

  ViewVC Help
Powered by ViewVC 1.1.26