/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 15 - (hide annotations)
Wed Jan 4 22:24:57 2006 UTC (18 years, 3 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 7064 byte(s)
begin work on Search::Estraier::Condition, _s moved to Search::Estraier which other modules inherit
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7     our $VERSION = '0.00';
8    
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15     use Search::Estraier;
16     my $est = new Search::Estraier();
17    
18     =head1 DESCRIPTION
19    
20     This module is implementation of node API of Hyper Estraier. Since it's
21     perl-only module with dependencies only on standard perl modules, it will
22     run on all platforms on which perl runs. It doesn't require compilation
23     or Hyper Estraier development files on target machine.
24    
25     It is implemented as multiple packages which closly resamble Ruby
26     implementation. It also includes methods to manage nodes.
27    
28     =cut
29    
30 dpavlin 15 =head2 _s
31    
32     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
33    
34     my $text = $self->_s(" this is a text ");
35     $text = 'this is a text';
36    
37     =cut
38    
39     sub _s {
40     my $text = $_[1] || return;
41     $text =~ s/\s\s+/ /gs;
42     $text =~ s/^\s+//;
43     $text =~ s/\s+$//;
44     return $text;
45     }
46    
47 dpavlin 2 package Search::Estraier::Document;
48    
49 dpavlin 9 use Carp qw/croak confess/;
50 dpavlin 7
51 dpavlin 15 use Search::Estraier;
52     our @ISA = qw/Search::Estraier/;
53    
54 dpavlin 2 =head1 Search::Estraier::Document
55    
56 dpavlin 14 This class implements Document which is collection of attributes
57     (key=value), vectors (also key value) display text and hidden text.
58    
59 dpavlin 2 =head2 new
60    
61 dpavlin 14 Create new document, empty or from draft.
62    
63 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
64 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
65 dpavlin 2
66     =cut
67    
68     sub new {
69     my $class = shift;
70 dpavlin 14 my $self = {};
71 dpavlin 2 bless($self, $class);
72    
73 dpavlin 6 $self->{id} = -1;
74    
75 dpavlin 14 my $draft = shift;
76    
77     if ($draft) {
78     my $in_text = 0;
79     foreach my $line (split(/\n/, $draft)) {
80    
81     if ($in_text) {
82     if ($line =~ /^\t/) {
83     push @{ $self->{htexts} }, substr($line, 1);
84     } else {
85     push @{ $self->{dtexts} }, $line;
86     }
87     next;
88     }
89    
90     if ($line =~ m/^%VECTOR\t(.+)$/) {
91     my @fields = split(/\t/, $1);
92     for my $i ( 0 .. ($#fields - 1) ) {
93     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
94     $i++;
95     }
96     next;
97     } elsif ($line =~ m/^%/) {
98     # What is this? comment?
99     #warn "$line\n";
100     next;
101     } elsif ($line =~ m/^$/) {
102     $in_text = 1;
103     next;
104     } elsif ($line =~ m/^(.+)=(.+)$/) {
105     $self->{attrs}->{ $1 } = $2;
106     next;
107     }
108    
109     warn "draft ignored: $line\n";
110     }
111     }
112    
113 dpavlin 2 $self ? return $self : return undef;
114     }
115    
116 dpavlin 4
117 dpavlin 2 =head2 add_attr
118    
119 dpavlin 6 Add an attribute.
120    
121 dpavlin 2 $doc->add_attr( name => 'value' );
122    
123 dpavlin 9 Delete attribute using
124 dpavlin 5
125     $doc->add_attr( name => undef );
126    
127 dpavlin 2 =cut
128    
129     sub add_attr {
130     my $self = shift;
131     my $attrs = {@_};
132    
133     while (my ($name, $value) = each %{ $attrs }) {
134 dpavlin 9 if (! defined($value)) {
135 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
136 dpavlin 9 } else {
137 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
138 dpavlin 9 }
139 dpavlin 2 }
140 dpavlin 8
141     return 1;
142 dpavlin 2 }
143    
144 dpavlin 5
145     =head2 add_text
146    
147 dpavlin 6 Add a sentence of text.
148    
149 dpavlin 5 $doc->add_text('this is example text to display');
150    
151     =cut
152    
153     sub add_text {
154     my $self = shift;
155     my $text = shift;
156     return unless defined($text);
157    
158 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
159 dpavlin 5 }
160    
161    
162     =head2 add_hidden_text
163    
164 dpavlin 6 Add a hidden sentence.
165    
166 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
167    
168     =cut
169    
170     sub add_hidden_text {
171     my $self = shift;
172     my $text = shift;
173     return unless defined($text);
174    
175 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
176 dpavlin 5 }
177    
178 dpavlin 6 =head2 id
179    
180     Get the ID number of document. If the object has never been registred, C<-1> is returned.
181    
182     print $doc->id;
183    
184     =cut
185    
186     sub id {
187     my $self = shift;
188     return $self->{id};
189     }
190    
191 dpavlin 7 =head2 attr_names
192    
193 dpavlin 9 Returns array with attribute names from document object.
194 dpavlin 7
195     my @attrs = $doc->attr_names;
196    
197     =cut
198    
199     sub attr_names {
200     my $self = shift;
201 dpavlin 9 croak "attr_names return array, not scalar" if (! wantarray);
202 dpavlin 7 return sort keys %{ $self->{attrs} };
203     }
204    
205 dpavlin 8
206     =head2 attr
207    
208 dpavlin 9 Returns value of an attribute.
209 dpavlin 8
210     my $value = $doc->attr( 'attribute' );
211    
212     =cut
213    
214     sub attr {
215     my $self = shift;
216     my $name = shift;
217    
218     return $self->{'attrs'}->{ $name };
219     }
220    
221 dpavlin 9
222     =head2 texts
223    
224     Returns array with text sentences.
225    
226     my @texts = $doc->texts;
227    
228     =cut
229    
230     sub texts {
231     my $self = shift;
232 dpavlin 12 confess "texts return array, not scalar" if (! wantarray);
233 dpavlin 11 return @{ $self->{dtexts} };
234 dpavlin 9 }
235    
236 dpavlin 12 =head2 cat_texts
237    
238     Return whole text as single scalar.
239    
240     my $text = $doc->cat_texts;
241    
242     =cut
243    
244     sub cat_texts {
245     my $self = shift;
246     return join(' ',@{ $self->{dtexts} });
247     }
248    
249 dpavlin 5 =head2 dump_draft
250    
251 dpavlin 13 Dump draft data from document object.
252    
253 dpavlin 5 print $doc->dump_draft;
254    
255     =cut
256    
257     sub dump_draft {
258 dpavlin 13 my $self = shift;
259     my $draft;
260    
261     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
262     $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";
263     }
264    
265     if ($self->{kwords}) {
266     $draft .= '%%VECTOR';
267     while (my ($key, $value) = each %{ $self->{kwords} }) {
268     $draft .= "\t$key\t$value";
269     }
270     $draft .= "\n";
271     }
272    
273     $draft .= "\n";
274    
275     $draft .= join("\n", @{ $self->{dtexts} }) . "\n";
276     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n";
277    
278     return $draft;
279 dpavlin 5 }
280    
281 dpavlin 4 =head2 delete
282 dpavlin 2
283 dpavlin 4 Empty document object
284 dpavlin 2
285 dpavlin 4 $doc->delete;
286    
287 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
288     convinience. Document objects which go out of scope will be destroyed
289     automatically.
290    
291 dpavlin 4 =cut
292    
293     sub delete {
294     my $self = shift;
295    
296 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
297 dpavlin 5 delete($self->{$data});
298     }
299 dpavlin 4
300 dpavlin 10 $self->{id} = -1;
301    
302 dpavlin 4 return 1;
303     }
304    
305    
306    
307 dpavlin 15 package Search::Estraier::Condition;
308 dpavlin 4
309 dpavlin 15 use Search::Estraier;
310     our @ISA = qw/Search::Estraier/;
311 dpavlin 4
312 dpavlin 15 my $options = {
313     # check N-gram keys skipping by three
314     SURE => 1 << 0,
315     # check N-gram keys skipping by two
316     USUAL => 1 << 1,
317     # without TF-IDF tuning
318     FAST => 1 << 2,
319     # with the simplified phrase
320     AGITO => 1 << 3,
321     # check every N-gram key
322     NOIDF => 1 << 4,
323     # check N-gram keys skipping by one
324     SIMPLE => 1 << 10,
325     };
326    
327     =head1 Search::Estraier::Condition
328    
329     =head2 new
330    
331     my $cond = new Search::HyperEstraier::Condition;
332    
333 dpavlin 4 =cut
334    
335 dpavlin 15 sub new {
336     my $class = shift;
337     my $self = {};
338     bless($self, $class);
339    
340     $self ? return $self : return undef;
341 dpavlin 4 }
342    
343    
344 dpavlin 2 package Search::Estraier::Master;
345    
346     use Carp;
347    
348     =head1 Search::Estraier::Master
349    
350     Controll node master. This requires user with administration priviledges.
351    
352     =cut
353    
354     {
355     package RequestAgent;
356 dpavlin 15 our @ISA = qw(LWP::UserAgent);
357 dpavlin 2
358     sub new {
359     my $self = LWP::UserAgent::new(@_);
360     $self->agent("Search-Estraier/$Search::Estraer::VERSION");
361     $self;
362     }
363    
364     sub get_basic_credentials {
365     my($self, $realm, $uri) = @_;
366     # return ($user, $password);
367     }
368     }
369    
370    
371    
372     =head2 new
373    
374     Create new connection to node master.
375    
376     my $master = new Search::Estraier::Master(
377     url => 'http://localhost:1978',
378     user => 'admin',
379     passwd => 'admin',
380     );
381    
382     =cut
383    
384     sub new {
385     my $class = shift;
386     my $self = {@_};
387     bless($self, $class);
388    
389     foreach my $p (qw/url user passwd/) {
390     croak "need $p" unless ($self->{$p});
391     }
392    
393     $self ? return $self : return undef;
394     }
395    
396    
397    
398     ###
399    
400     =head1 EXPORT
401    
402     Nothing.
403    
404     =head1 SEE ALSO
405    
406     L<http://hyperestraier.sourceforge.net/>
407    
408     Hyper Estraier Ruby interface on which this module is based.
409    
410     =head1 AUTHOR
411    
412     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
413    
414    
415     =head1 COPYRIGHT AND LICENSE
416    
417 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
418 dpavlin 2
419     This library is free software; you can redistribute it and/or modify
420     it under the GPL v2 or later.
421    
422     =cut
423    
424     1;

  ViewVC Help
Powered by ViewVC 1.1.26