/[Search-Estraier]/trunk/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 16 - (hide annotations)
Wed Jan 4 22:43:24 2006 UTC (18 years, 3 months ago) by dpavlin
File size: 7939 byte(s)
Search::Estraier::Condition, new, set_phrase, set_order, set_max
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7     our $VERSION = '0.00';
8    
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15     use Search::Estraier;
16     my $est = new Search::Estraier();
17    
18     =head1 DESCRIPTION
19    
20     This module is implementation of node API of Hyper Estraier. Since it's
21     perl-only module with dependencies only on standard perl modules, it will
22     run on all platforms on which perl runs. It doesn't require compilation
23     or Hyper Estraier development files on target machine.
24    
25     It is implemented as multiple packages which closly resamble Ruby
26     implementation. It also includes methods to manage nodes.
27    
28     =cut
29    
30 dpavlin 15 =head2 _s
31    
32     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
33    
34     my $text = $self->_s(" this is a text ");
35     $text = 'this is a text';
36    
37     =cut
38    
39     sub _s {
40     my $text = $_[1] || return;
41     $text =~ s/\s\s+/ /gs;
42     $text =~ s/^\s+//;
43     $text =~ s/\s+$//;
44     return $text;
45     }
46    
47 dpavlin 2 package Search::Estraier::Document;
48    
49 dpavlin 9 use Carp qw/croak confess/;
50 dpavlin 7
51 dpavlin 15 use Search::Estraier;
52     our @ISA = qw/Search::Estraier/;
53    
54 dpavlin 2 =head1 Search::Estraier::Document
55    
56 dpavlin 14 This class implements Document which is collection of attributes
57     (key=value), vectors (also key value) display text and hidden text.
58    
59 dpavlin 2 =head2 new
60    
61 dpavlin 14 Create new document, empty or from draft.
62    
63 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
64 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
65 dpavlin 2
66     =cut
67    
68     sub new {
69     my $class = shift;
70 dpavlin 14 my $self = {};
71 dpavlin 2 bless($self, $class);
72    
73 dpavlin 6 $self->{id} = -1;
74    
75 dpavlin 14 my $draft = shift;
76    
77     if ($draft) {
78     my $in_text = 0;
79     foreach my $line (split(/\n/, $draft)) {
80    
81     if ($in_text) {
82     if ($line =~ /^\t/) {
83     push @{ $self->{htexts} }, substr($line, 1);
84     } else {
85     push @{ $self->{dtexts} }, $line;
86     }
87     next;
88     }
89    
90     if ($line =~ m/^%VECTOR\t(.+)$/) {
91     my @fields = split(/\t/, $1);
92     for my $i ( 0 .. ($#fields - 1) ) {
93     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
94     $i++;
95     }
96     next;
97     } elsif ($line =~ m/^%/) {
98     # What is this? comment?
99     #warn "$line\n";
100     next;
101     } elsif ($line =~ m/^$/) {
102     $in_text = 1;
103     next;
104     } elsif ($line =~ m/^(.+)=(.+)$/) {
105     $self->{attrs}->{ $1 } = $2;
106     next;
107     }
108    
109     warn "draft ignored: $line\n";
110     }
111     }
112    
113 dpavlin 2 $self ? return $self : return undef;
114     }
115    
116 dpavlin 4
117 dpavlin 2 =head2 add_attr
118    
119 dpavlin 6 Add an attribute.
120    
121 dpavlin 2 $doc->add_attr( name => 'value' );
122    
123 dpavlin 9 Delete attribute using
124 dpavlin 5
125     $doc->add_attr( name => undef );
126    
127 dpavlin 2 =cut
128    
129     sub add_attr {
130     my $self = shift;
131     my $attrs = {@_};
132    
133     while (my ($name, $value) = each %{ $attrs }) {
134 dpavlin 9 if (! defined($value)) {
135 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
136 dpavlin 9 } else {
137 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
138 dpavlin 9 }
139 dpavlin 2 }
140 dpavlin 8
141     return 1;
142 dpavlin 2 }
143    
144 dpavlin 5
145     =head2 add_text
146    
147 dpavlin 6 Add a sentence of text.
148    
149 dpavlin 5 $doc->add_text('this is example text to display');
150    
151     =cut
152    
153     sub add_text {
154     my $self = shift;
155     my $text = shift;
156     return unless defined($text);
157    
158 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
159 dpavlin 5 }
160    
161    
162     =head2 add_hidden_text
163    
164 dpavlin 6 Add a hidden sentence.
165    
166 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
167    
168     =cut
169    
170     sub add_hidden_text {
171     my $self = shift;
172     my $text = shift;
173     return unless defined($text);
174    
175 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
176 dpavlin 5 }
177    
178 dpavlin 6 =head2 id
179    
180     Get the ID number of document. If the object has never been registred, C<-1> is returned.
181    
182     print $doc->id;
183    
184     =cut
185    
186     sub id {
187     my $self = shift;
188     return $self->{id};
189     }
190    
191 dpavlin 7 =head2 attr_names
192    
193 dpavlin 9 Returns array with attribute names from document object.
194 dpavlin 7
195     my @attrs = $doc->attr_names;
196    
197     =cut
198    
199     sub attr_names {
200     my $self = shift;
201 dpavlin 9 croak "attr_names return array, not scalar" if (! wantarray);
202 dpavlin 7 return sort keys %{ $self->{attrs} };
203     }
204    
205 dpavlin 8
206     =head2 attr
207    
208 dpavlin 9 Returns value of an attribute.
209 dpavlin 8
210     my $value = $doc->attr( 'attribute' );
211    
212     =cut
213    
214     sub attr {
215     my $self = shift;
216     my $name = shift;
217    
218     return $self->{'attrs'}->{ $name };
219     }
220    
221 dpavlin 9
222     =head2 texts
223    
224     Returns array with text sentences.
225    
226     my @texts = $doc->texts;
227    
228     =cut
229    
230     sub texts {
231     my $self = shift;
232 dpavlin 12 confess "texts return array, not scalar" if (! wantarray);
233 dpavlin 11 return @{ $self->{dtexts} };
234 dpavlin 9 }
235    
236 dpavlin 12 =head2 cat_texts
237    
238     Return whole text as single scalar.
239    
240     my $text = $doc->cat_texts;
241    
242     =cut
243    
244     sub cat_texts {
245     my $self = shift;
246     return join(' ',@{ $self->{dtexts} });
247     }
248    
249 dpavlin 5 =head2 dump_draft
250    
251 dpavlin 13 Dump draft data from document object.
252    
253 dpavlin 5 print $doc->dump_draft;
254    
255     =cut
256    
257     sub dump_draft {
258 dpavlin 13 my $self = shift;
259     my $draft;
260    
261     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
262     $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";
263     }
264    
265     if ($self->{kwords}) {
266     $draft .= '%%VECTOR';
267     while (my ($key, $value) = each %{ $self->{kwords} }) {
268     $draft .= "\t$key\t$value";
269     }
270     $draft .= "\n";
271     }
272    
273     $draft .= "\n";
274    
275     $draft .= join("\n", @{ $self->{dtexts} }) . "\n";
276     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n";
277    
278     return $draft;
279 dpavlin 5 }
280    
281 dpavlin 4 =head2 delete
282 dpavlin 2
283 dpavlin 4 Empty document object
284 dpavlin 2
285 dpavlin 4 $doc->delete;
286    
287 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
288     convinience. Document objects which go out of scope will be destroyed
289     automatically.
290    
291 dpavlin 4 =cut
292    
293     sub delete {
294     my $self = shift;
295    
296 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
297 dpavlin 5 delete($self->{$data});
298     }
299 dpavlin 4
300 dpavlin 10 $self->{id} = -1;
301    
302 dpavlin 4 return 1;
303     }
304    
305    
306    
307 dpavlin 15 package Search::Estraier::Condition;
308 dpavlin 4
309 dpavlin 16 use Carp qw/confess croak/;
310    
311 dpavlin 15 use Search::Estraier;
312     our @ISA = qw/Search::Estraier/;
313 dpavlin 4
314 dpavlin 16 =head1 Search::Estraier::Condition
315    
316     =head2 new
317    
318     my $cond = new Search::HyperEstraier::Condition;
319    
320     =cut
321    
322     sub new {
323     my $class = shift;
324     my $self = {};
325     bless($self, $class);
326    
327     $self ? return $self : return undef;
328     }
329    
330     =head2 set_phrase
331    
332     $cond->set_phrase('search phrase');
333    
334     =cut
335    
336     sub set_phrase {
337     my $self = shift;
338     $self->{phrase} = $self->_s( shift );
339     }
340    
341     =head2 add_attr
342    
343     $cond->add_attr('@URI STRINC /~dpavlin/');
344    
345     =cut
346    
347     sub add_attr {
348     my $self = shift;
349     my $attr = shift || return;
350     push @{ $self->{attrs} }, $self->_s( $attr );
351     }
352    
353     =head2 set_order
354    
355     $cond->set_order('@mdate NUMD');
356    
357     =cut
358    
359     sub set_order {
360     my $self = shift;
361     $self->{order} = shift;
362     }
363    
364     =head2 set_max
365    
366     $cond->set_max(42);
367    
368     =cut
369    
370     sub set_max {
371     my $self = shift;
372     my $max = shift;
373     croak "set_max needs number" unless ($max =~ m/^\d+$/);
374     $self->{max} = $max;
375     }
376    
377     =head2 set_options
378    
379     $cond->set_options( SURE => 1 );
380    
381     =cut
382    
383 dpavlin 15 my $options = {
384     # check N-gram keys skipping by three
385     SURE => 1 << 0,
386     # check N-gram keys skipping by two
387     USUAL => 1 << 1,
388     # without TF-IDF tuning
389     FAST => 1 << 2,
390     # with the simplified phrase
391     AGITO => 1 << 3,
392     # check every N-gram key
393     NOIDF => 1 << 4,
394     # check N-gram keys skipping by one
395     SIMPLE => 1 << 10,
396     };
397    
398 dpavlin 16 sub set_options {
399     my $self = shift;
400     my $option = shift;
401     confess "unknown option" unless ($options->{$option});
402     $self->{options} ||= $options->{$option};
403 dpavlin 4 }
404    
405 dpavlin 2 package Search::Estraier::Master;
406    
407     use Carp;
408    
409     =head1 Search::Estraier::Master
410    
411     Controll node master. This requires user with administration priviledges.
412    
413     =cut
414    
415     {
416     package RequestAgent;
417 dpavlin 15 our @ISA = qw(LWP::UserAgent);
418 dpavlin 2
419     sub new {
420     my $self = LWP::UserAgent::new(@_);
421     $self->agent("Search-Estraier/$Search::Estraer::VERSION");
422     $self;
423     }
424    
425     sub get_basic_credentials {
426     my($self, $realm, $uri) = @_;
427     # return ($user, $password);
428     }
429     }
430    
431    
432    
433     =head2 new
434    
435     Create new connection to node master.
436    
437     my $master = new Search::Estraier::Master(
438     url => 'http://localhost:1978',
439     user => 'admin',
440     passwd => 'admin',
441     );
442    
443     =cut
444    
445     sub new {
446     my $class = shift;
447     my $self = {@_};
448     bless($self, $class);
449    
450     foreach my $p (qw/url user passwd/) {
451     croak "need $p" unless ($self->{$p});
452     }
453    
454     $self ? return $self : return undef;
455     }
456    
457    
458    
459     ###
460    
461     =head1 EXPORT
462    
463     Nothing.
464    
465     =head1 SEE ALSO
466    
467     L<http://hyperestraier.sourceforge.net/>
468    
469     Hyper Estraier Ruby interface on which this module is based.
470    
471     =head1 AUTHOR
472    
473     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
474    
475    
476     =head1 COPYRIGHT AND LICENSE
477    
478 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
479 dpavlin 2
480     This library is free software; you can redistribute it and/or modify
481     it under the GPL v2 or later.
482    
483     =cut
484    
485     1;

  ViewVC Help
Powered by ViewVC 1.1.26