/[Search-Estraier]/trunk/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 18 - (hide annotations)
Wed Jan 4 22:48:29 2006 UTC (18 years, 3 months ago) by dpavlin
File size: 8068 byte(s)
phrase
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7     our $VERSION = '0.00';
8    
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15     use Search::Estraier;
16     my $est = new Search::Estraier();
17    
18     =head1 DESCRIPTION
19    
20     This module is implementation of node API of Hyper Estraier. Since it's
21     perl-only module with dependencies only on standard perl modules, it will
22     run on all platforms on which perl runs. It doesn't require compilation
23     or Hyper Estraier development files on target machine.
24    
25     It is implemented as multiple packages which closly resamble Ruby
26     implementation. It also includes methods to manage nodes.
27    
28     =cut
29    
30 dpavlin 15 =head2 _s
31    
32     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
33    
34     my $text = $self->_s(" this is a text ");
35     $text = 'this is a text';
36    
37     =cut
38    
39     sub _s {
40     my $text = $_[1] || return;
41     $text =~ s/\s\s+/ /gs;
42     $text =~ s/^\s+//;
43     $text =~ s/\s+$//;
44     return $text;
45     }
46    
47 dpavlin 2 package Search::Estraier::Document;
48    
49 dpavlin 9 use Carp qw/croak confess/;
50 dpavlin 7
51 dpavlin 15 use Search::Estraier;
52     our @ISA = qw/Search::Estraier/;
53    
54 dpavlin 2 =head1 Search::Estraier::Document
55    
56 dpavlin 14 This class implements Document which is collection of attributes
57     (key=value), vectors (also key value) display text and hidden text.
58    
59 dpavlin 2 =head2 new
60    
61 dpavlin 14 Create new document, empty or from draft.
62    
63 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
64 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
65 dpavlin 2
66     =cut
67    
68     sub new {
69     my $class = shift;
70 dpavlin 14 my $self = {};
71 dpavlin 2 bless($self, $class);
72    
73 dpavlin 6 $self->{id} = -1;
74    
75 dpavlin 14 my $draft = shift;
76    
77     if ($draft) {
78     my $in_text = 0;
79     foreach my $line (split(/\n/, $draft)) {
80    
81     if ($in_text) {
82     if ($line =~ /^\t/) {
83     push @{ $self->{htexts} }, substr($line, 1);
84     } else {
85     push @{ $self->{dtexts} }, $line;
86     }
87     next;
88     }
89    
90     if ($line =~ m/^%VECTOR\t(.+)$/) {
91     my @fields = split(/\t/, $1);
92     for my $i ( 0 .. ($#fields - 1) ) {
93     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
94     $i++;
95     }
96     next;
97     } elsif ($line =~ m/^%/) {
98     # What is this? comment?
99     #warn "$line\n";
100     next;
101     } elsif ($line =~ m/^$/) {
102     $in_text = 1;
103     next;
104     } elsif ($line =~ m/^(.+)=(.+)$/) {
105     $self->{attrs}->{ $1 } = $2;
106     next;
107     }
108    
109     warn "draft ignored: $line\n";
110     }
111     }
112    
113 dpavlin 2 $self ? return $self : return undef;
114     }
115    
116 dpavlin 4
117 dpavlin 2 =head2 add_attr
118    
119 dpavlin 6 Add an attribute.
120    
121 dpavlin 2 $doc->add_attr( name => 'value' );
122    
123 dpavlin 9 Delete attribute using
124 dpavlin 5
125     $doc->add_attr( name => undef );
126    
127 dpavlin 2 =cut
128    
129     sub add_attr {
130     my $self = shift;
131     my $attrs = {@_};
132    
133     while (my ($name, $value) = each %{ $attrs }) {
134 dpavlin 9 if (! defined($value)) {
135 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
136 dpavlin 9 } else {
137 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
138 dpavlin 9 }
139 dpavlin 2 }
140 dpavlin 8
141     return 1;
142 dpavlin 2 }
143    
144 dpavlin 5
145     =head2 add_text
146    
147 dpavlin 6 Add a sentence of text.
148    
149 dpavlin 5 $doc->add_text('this is example text to display');
150    
151     =cut
152    
153     sub add_text {
154     my $self = shift;
155     my $text = shift;
156     return unless defined($text);
157    
158 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
159 dpavlin 5 }
160    
161    
162     =head2 add_hidden_text
163    
164 dpavlin 6 Add a hidden sentence.
165    
166 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
167    
168     =cut
169    
170     sub add_hidden_text {
171     my $self = shift;
172     my $text = shift;
173     return unless defined($text);
174    
175 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
176 dpavlin 5 }
177    
178 dpavlin 6 =head2 id
179    
180     Get the ID number of document. If the object has never been registred, C<-1> is returned.
181    
182     print $doc->id;
183    
184     =cut
185    
186     sub id {
187     my $self = shift;
188     return $self->{id};
189     }
190    
191 dpavlin 7 =head2 attr_names
192    
193 dpavlin 9 Returns array with attribute names from document object.
194 dpavlin 7
195     my @attrs = $doc->attr_names;
196    
197     =cut
198    
199     sub attr_names {
200     my $self = shift;
201 dpavlin 9 croak "attr_names return array, not scalar" if (! wantarray);
202 dpavlin 7 return sort keys %{ $self->{attrs} };
203     }
204    
205 dpavlin 8
206     =head2 attr
207    
208 dpavlin 9 Returns value of an attribute.
209 dpavlin 8
210     my $value = $doc->attr( 'attribute' );
211    
212     =cut
213    
214     sub attr {
215     my $self = shift;
216     my $name = shift;
217    
218     return $self->{'attrs'}->{ $name };
219     }
220    
221 dpavlin 9
222     =head2 texts
223    
224     Returns array with text sentences.
225    
226     my @texts = $doc->texts;
227    
228     =cut
229    
230     sub texts {
231     my $self = shift;
232 dpavlin 12 confess "texts return array, not scalar" if (! wantarray);
233 dpavlin 11 return @{ $self->{dtexts} };
234 dpavlin 9 }
235    
236 dpavlin 12 =head2 cat_texts
237    
238     Return whole text as single scalar.
239    
240     my $text = $doc->cat_texts;
241    
242     =cut
243    
244     sub cat_texts {
245     my $self = shift;
246     return join(' ',@{ $self->{dtexts} });
247     }
248    
249 dpavlin 5 =head2 dump_draft
250    
251 dpavlin 13 Dump draft data from document object.
252    
253 dpavlin 5 print $doc->dump_draft;
254    
255     =cut
256    
257     sub dump_draft {
258 dpavlin 13 my $self = shift;
259     my $draft;
260    
261     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
262     $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";
263     }
264    
265     if ($self->{kwords}) {
266     $draft .= '%%VECTOR';
267     while (my ($key, $value) = each %{ $self->{kwords} }) {
268     $draft .= "\t$key\t$value";
269     }
270     $draft .= "\n";
271     }
272    
273     $draft .= "\n";
274    
275     $draft .= join("\n", @{ $self->{dtexts} }) . "\n";
276     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n";
277    
278     return $draft;
279 dpavlin 5 }
280    
281 dpavlin 4 =head2 delete
282 dpavlin 2
283 dpavlin 4 Empty document object
284 dpavlin 2
285 dpavlin 4 $doc->delete;
286    
287 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
288     convinience. Document objects which go out of scope will be destroyed
289     automatically.
290    
291 dpavlin 4 =cut
292    
293     sub delete {
294     my $self = shift;
295    
296 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
297 dpavlin 5 delete($self->{$data});
298     }
299 dpavlin 4
300 dpavlin 10 $self->{id} = -1;
301    
302 dpavlin 4 return 1;
303     }
304    
305    
306    
307 dpavlin 15 package Search::Estraier::Condition;
308 dpavlin 4
309 dpavlin 16 use Carp qw/confess croak/;
310    
311 dpavlin 15 use Search::Estraier;
312     our @ISA = qw/Search::Estraier/;
313 dpavlin 4
314 dpavlin 16 =head1 Search::Estraier::Condition
315    
316     =head2 new
317    
318     my $cond = new Search::HyperEstraier::Condition;
319    
320     =cut
321    
322     sub new {
323     my $class = shift;
324     my $self = {};
325     bless($self, $class);
326    
327     $self ? return $self : return undef;
328     }
329    
330     =head2 set_phrase
331    
332     $cond->set_phrase('search phrase');
333    
334     =cut
335    
336     sub set_phrase {
337     my $self = shift;
338     $self->{phrase} = $self->_s( shift );
339     }
340    
341     =head2 add_attr
342    
343     $cond->add_attr('@URI STRINC /~dpavlin/');
344    
345     =cut
346    
347     sub add_attr {
348     my $self = shift;
349     my $attr = shift || return;
350     push @{ $self->{attrs} }, $self->_s( $attr );
351     }
352    
353     =head2 set_order
354    
355     $cond->set_order('@mdate NUMD');
356    
357     =cut
358    
359     sub set_order {
360     my $self = shift;
361     $self->{order} = shift;
362     }
363    
364     =head2 set_max
365    
366     $cond->set_max(42);
367    
368     =cut
369    
370     sub set_max {
371     my $self = shift;
372     my $max = shift;
373     croak "set_max needs number" unless ($max =~ m/^\d+$/);
374     $self->{max} = $max;
375     }
376    
377     =head2 set_options
378    
379     $cond->set_options( SURE => 1 );
380    
381     =cut
382    
383 dpavlin 15 my $options = {
384     # check N-gram keys skipping by three
385     SURE => 1 << 0,
386     # check N-gram keys skipping by two
387     USUAL => 1 << 1,
388     # without TF-IDF tuning
389     FAST => 1 << 2,
390     # with the simplified phrase
391     AGITO => 1 << 3,
392     # check every N-gram key
393     NOIDF => 1 << 4,
394     # check N-gram keys skipping by one
395     SIMPLE => 1 << 10,
396     };
397    
398 dpavlin 16 sub set_options {
399     my $self = shift;
400     my $option = shift;
401     confess "unknown option" unless ($options->{$option});
402     $self->{options} ||= $options->{$option};
403 dpavlin 4 }
404    
405 dpavlin 18 =head2 phrase
406    
407     Return search phrase.
408    
409     print $cond->phrase;
410    
411     =cut
412    
413     sub phrase {
414     my $self = shift;
415     return $self->{phrase};
416     }
417    
418    
419 dpavlin 2 package Search::Estraier::Master;
420    
421     use Carp;
422    
423     =head1 Search::Estraier::Master
424    
425     Controll node master. This requires user with administration priviledges.
426    
427     =cut
428    
429     {
430     package RequestAgent;
431 dpavlin 15 our @ISA = qw(LWP::UserAgent);
432 dpavlin 2
433     sub new {
434     my $self = LWP::UserAgent::new(@_);
435     $self->agent("Search-Estraier/$Search::Estraer::VERSION");
436     $self;
437     }
438    
439     sub get_basic_credentials {
440     my($self, $realm, $uri) = @_;
441     # return ($user, $password);
442     }
443     }
444    
445    
446    
447     =head2 new
448    
449     Create new connection to node master.
450    
451     my $master = new Search::Estraier::Master(
452     url => 'http://localhost:1978',
453     user => 'admin',
454     passwd => 'admin',
455     );
456    
457     =cut
458    
459     sub new {
460     my $class = shift;
461     my $self = {@_};
462     bless($self, $class);
463    
464     foreach my $p (qw/url user passwd/) {
465     croak "need $p" unless ($self->{$p});
466     }
467    
468     $self ? return $self : return undef;
469     }
470    
471    
472    
473     ###
474    
475     =head1 EXPORT
476    
477     Nothing.
478    
479     =head1 SEE ALSO
480    
481     L<http://hyperestraier.sourceforge.net/>
482    
483     Hyper Estraier Ruby interface on which this module is based.
484    
485     =head1 AUTHOR
486    
487     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
488    
489    
490     =head1 COPYRIGHT AND LICENSE
491    
492 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
493 dpavlin 2
494     This library is free software; you can redistribute it and/or modify
495     it under the GPL v2 or later.
496    
497     =cut
498    
499     1;

  ViewVC Help
Powered by ViewVC 1.1.26