/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 20 - (hide annotations)
Thu Jan 5 13:55:06 2006 UTC (18 years, 2 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 9469 byte(s)
begin work on Search::HyperEstraier::ResultDocument
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7     our $VERSION = '0.00';
8    
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15     use Search::Estraier;
16     my $est = new Search::Estraier();
17    
18     =head1 DESCRIPTION
19    
20     This module is implementation of node API of Hyper Estraier. Since it's
21     perl-only module with dependencies only on standard perl modules, it will
22     run on all platforms on which perl runs. It doesn't require compilation
23     or Hyper Estraier development files on target machine.
24    
25     It is implemented as multiple packages which closly resamble Ruby
26     implementation. It also includes methods to manage nodes.
27    
28     =cut
29    
30 dpavlin 15 =head2 _s
31    
32     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
33    
34     my $text = $self->_s(" this is a text ");
35     $text = 'this is a text';
36    
37     =cut
38    
39     sub _s {
40     my $text = $_[1] || return;
41     $text =~ s/\s\s+/ /gs;
42     $text =~ s/^\s+//;
43     $text =~ s/\s+$//;
44     return $text;
45     }
46    
47 dpavlin 2 package Search::Estraier::Document;
48    
49 dpavlin 9 use Carp qw/croak confess/;
50 dpavlin 7
51 dpavlin 15 use Search::Estraier;
52     our @ISA = qw/Search::Estraier/;
53    
54 dpavlin 2 =head1 Search::Estraier::Document
55    
56 dpavlin 14 This class implements Document which is collection of attributes
57     (key=value), vectors (also key value) display text and hidden text.
58    
59 dpavlin 2 =head2 new
60    
61 dpavlin 14 Create new document, empty or from draft.
62    
63 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
64 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
65 dpavlin 2
66     =cut
67    
68     sub new {
69     my $class = shift;
70 dpavlin 14 my $self = {};
71 dpavlin 2 bless($self, $class);
72    
73 dpavlin 6 $self->{id} = -1;
74    
75 dpavlin 14 my $draft = shift;
76    
77     if ($draft) {
78     my $in_text = 0;
79     foreach my $line (split(/\n/, $draft)) {
80    
81     if ($in_text) {
82     if ($line =~ /^\t/) {
83     push @{ $self->{htexts} }, substr($line, 1);
84     } else {
85     push @{ $self->{dtexts} }, $line;
86     }
87     next;
88     }
89    
90     if ($line =~ m/^%VECTOR\t(.+)$/) {
91     my @fields = split(/\t/, $1);
92     for my $i ( 0 .. ($#fields - 1) ) {
93     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
94     $i++;
95     }
96     next;
97     } elsif ($line =~ m/^%/) {
98     # What is this? comment?
99     #warn "$line\n";
100     next;
101     } elsif ($line =~ m/^$/) {
102     $in_text = 1;
103     next;
104     } elsif ($line =~ m/^(.+)=(.+)$/) {
105     $self->{attrs}->{ $1 } = $2;
106     next;
107     }
108    
109     warn "draft ignored: $line\n";
110     }
111     }
112    
113 dpavlin 2 $self ? return $self : return undef;
114     }
115    
116 dpavlin 4
117 dpavlin 2 =head2 add_attr
118    
119 dpavlin 6 Add an attribute.
120    
121 dpavlin 2 $doc->add_attr( name => 'value' );
122    
123 dpavlin 9 Delete attribute using
124 dpavlin 5
125     $doc->add_attr( name => undef );
126    
127 dpavlin 2 =cut
128    
129     sub add_attr {
130     my $self = shift;
131     my $attrs = {@_};
132    
133     while (my ($name, $value) = each %{ $attrs }) {
134 dpavlin 9 if (! defined($value)) {
135 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
136 dpavlin 9 } else {
137 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
138 dpavlin 9 }
139 dpavlin 2 }
140 dpavlin 8
141     return 1;
142 dpavlin 2 }
143    
144 dpavlin 5
145     =head2 add_text
146    
147 dpavlin 6 Add a sentence of text.
148    
149 dpavlin 5 $doc->add_text('this is example text to display');
150    
151     =cut
152    
153     sub add_text {
154     my $self = shift;
155     my $text = shift;
156     return unless defined($text);
157    
158 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
159 dpavlin 5 }
160    
161    
162     =head2 add_hidden_text
163    
164 dpavlin 6 Add a hidden sentence.
165    
166 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
167    
168     =cut
169    
170     sub add_hidden_text {
171     my $self = shift;
172     my $text = shift;
173     return unless defined($text);
174    
175 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
176 dpavlin 5 }
177    
178 dpavlin 6 =head2 id
179    
180     Get the ID number of document. If the object has never been registred, C<-1> is returned.
181    
182     print $doc->id;
183    
184     =cut
185    
186     sub id {
187     my $self = shift;
188     return $self->{id};
189     }
190    
191 dpavlin 7 =head2 attr_names
192    
193 dpavlin 9 Returns array with attribute names from document object.
194 dpavlin 7
195     my @attrs = $doc->attr_names;
196    
197     =cut
198    
199     sub attr_names {
200     my $self = shift;
201 dpavlin 9 croak "attr_names return array, not scalar" if (! wantarray);
202 dpavlin 7 return sort keys %{ $self->{attrs} };
203     }
204    
205 dpavlin 8
206     =head2 attr
207    
208 dpavlin 9 Returns value of an attribute.
209 dpavlin 8
210     my $value = $doc->attr( 'attribute' );
211    
212     =cut
213    
214     sub attr {
215     my $self = shift;
216     my $name = shift;
217    
218     return $self->{'attrs'}->{ $name };
219     }
220    
221 dpavlin 9
222     =head2 texts
223    
224     Returns array with text sentences.
225    
226     my @texts = $doc->texts;
227    
228     =cut
229    
230     sub texts {
231     my $self = shift;
232 dpavlin 12 confess "texts return array, not scalar" if (! wantarray);
233 dpavlin 11 return @{ $self->{dtexts} };
234 dpavlin 9 }
235    
236 dpavlin 12 =head2 cat_texts
237    
238     Return whole text as single scalar.
239    
240     my $text = $doc->cat_texts;
241    
242     =cut
243    
244     sub cat_texts {
245     my $self = shift;
246     return join(' ',@{ $self->{dtexts} });
247     }
248    
249 dpavlin 5 =head2 dump_draft
250    
251 dpavlin 13 Dump draft data from document object.
252    
253 dpavlin 5 print $doc->dump_draft;
254    
255     =cut
256    
257     sub dump_draft {
258 dpavlin 13 my $self = shift;
259     my $draft;
260    
261     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
262     $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";
263     }
264    
265     if ($self->{kwords}) {
266     $draft .= '%%VECTOR';
267     while (my ($key, $value) = each %{ $self->{kwords} }) {
268     $draft .= "\t$key\t$value";
269     }
270     $draft .= "\n";
271     }
272    
273     $draft .= "\n";
274    
275     $draft .= join("\n", @{ $self->{dtexts} }) . "\n";
276     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n";
277    
278     return $draft;
279 dpavlin 5 }
280    
281 dpavlin 4 =head2 delete
282 dpavlin 2
283 dpavlin 4 Empty document object
284 dpavlin 2
285 dpavlin 4 $doc->delete;
286    
287 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
288     convinience. Document objects which go out of scope will be destroyed
289     automatically.
290    
291 dpavlin 4 =cut
292    
293     sub delete {
294     my $self = shift;
295    
296 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
297 dpavlin 5 delete($self->{$data});
298     }
299 dpavlin 4
300 dpavlin 10 $self->{id} = -1;
301    
302 dpavlin 4 return 1;
303     }
304    
305    
306    
307 dpavlin 15 package Search::Estraier::Condition;
308 dpavlin 4
309 dpavlin 16 use Carp qw/confess croak/;
310    
311 dpavlin 15 use Search::Estraier;
312     our @ISA = qw/Search::Estraier/;
313 dpavlin 4
314 dpavlin 16 =head1 Search::Estraier::Condition
315    
316     =head2 new
317    
318     my $cond = new Search::HyperEstraier::Condition;
319    
320     =cut
321    
322     sub new {
323     my $class = shift;
324     my $self = {};
325     bless($self, $class);
326    
327 dpavlin 19 $self->{max} = -1;
328     $self->{options} = 0;
329    
330 dpavlin 16 $self ? return $self : return undef;
331     }
332    
333     =head2 set_phrase
334    
335     $cond->set_phrase('search phrase');
336    
337     =cut
338    
339     sub set_phrase {
340     my $self = shift;
341     $self->{phrase} = $self->_s( shift );
342     }
343    
344     =head2 add_attr
345    
346     $cond->add_attr('@URI STRINC /~dpavlin/');
347    
348     =cut
349    
350     sub add_attr {
351     my $self = shift;
352     my $attr = shift || return;
353     push @{ $self->{attrs} }, $self->_s( $attr );
354     }
355    
356     =head2 set_order
357    
358     $cond->set_order('@mdate NUMD');
359    
360     =cut
361    
362     sub set_order {
363     my $self = shift;
364     $self->{order} = shift;
365     }
366    
367     =head2 set_max
368    
369     $cond->set_max(42);
370    
371     =cut
372    
373     sub set_max {
374     my $self = shift;
375     my $max = shift;
376     croak "set_max needs number" unless ($max =~ m/^\d+$/);
377     $self->{max} = $max;
378     }
379    
380     =head2 set_options
381    
382     $cond->set_options( SURE => 1 );
383    
384     =cut
385    
386 dpavlin 15 my $options = {
387     # check N-gram keys skipping by three
388     SURE => 1 << 0,
389     # check N-gram keys skipping by two
390     USUAL => 1 << 1,
391     # without TF-IDF tuning
392     FAST => 1 << 2,
393     # with the simplified phrase
394     AGITO => 1 << 3,
395     # check every N-gram key
396     NOIDF => 1 << 4,
397     # check N-gram keys skipping by one
398     SIMPLE => 1 << 10,
399     };
400    
401 dpavlin 16 sub set_options {
402     my $self = shift;
403     my $option = shift;
404     confess "unknown option" unless ($options->{$option});
405     $self->{options} ||= $options->{$option};
406 dpavlin 4 }
407    
408 dpavlin 18 =head2 phrase
409    
410     Return search phrase.
411    
412     print $cond->phrase;
413    
414     =cut
415    
416     sub phrase {
417     my $self = shift;
418     return $self->{phrase};
419     }
420    
421 dpavlin 19 =head2 order
422 dpavlin 18
423 dpavlin 19 Return search result order.
424    
425     print $cond->order;
426    
427     =cut
428    
429     sub order {
430     my $self = shift;
431     return $self->{order};
432     }
433    
434     =head2 attrs
435    
436     Return search result attrs.
437    
438     my @cond_attrs = $cond->attrs;
439    
440     =cut
441    
442     sub attrs {
443     my $self = shift;
444     #croak "attrs return array, not scalar" if (! wantarray);
445     return @{ $self->{attrs} };
446     }
447    
448     =head2 max
449    
450     Return maximum number of results.
451    
452     print $cond->max;
453    
454     C<-1> is returned for unitialized value, C<0> is unlimited.
455    
456     =cut
457    
458     sub max {
459     my $self = shift;
460     return $self->{max};
461     }
462    
463     =head2 options
464    
465     Return options for this condition.
466    
467     print $cond->options;
468    
469     Options are returned in numerical form.
470    
471     =cut
472    
473     sub options {
474     my $self = shift;
475     return $self->{options};
476     }
477    
478    
479 dpavlin 20 package Search::Estraier::ResultDocument;
480    
481     use Carp qw/confess croak/;
482    
483     use Search::Estraier;
484     our @ISA = qw/Search::Estraier/;
485    
486     =head1 Search::Estraier::ResultDocument
487    
488     =head2 new
489    
490     my $doc = new Search::HyperEstraier::ResultDocument(
491     uri => 'http://localhost/document/uri/42',
492     attrs => {
493     foo => 1,
494     bar => 2,
495     },
496     snippet => 'this is a text of snippet'
497     keywords => 'this\tare\tkeywords'
498     );
499    
500     =cut
501    
502     sub new {
503     my $class = shift;
504     my $self = {@_};
505     bless($self, $class);
506    
507     foreach my $f (qw/uri attrs snippet keywords/) {
508     croak "missing $f for ResultDocument" unless defined($self->{$f});
509     }
510    
511     $self ? return $self : return undef;
512     }
513    
514    
515    
516 dpavlin 2 package Search::Estraier::Master;
517    
518     use Carp;
519    
520     =head1 Search::Estraier::Master
521    
522     Controll node master. This requires user with administration priviledges.
523    
524     =cut
525    
526     {
527     package RequestAgent;
528 dpavlin 15 our @ISA = qw(LWP::UserAgent);
529 dpavlin 2
530     sub new {
531     my $self = LWP::UserAgent::new(@_);
532     $self->agent("Search-Estraier/$Search::Estraer::VERSION");
533     $self;
534     }
535    
536     sub get_basic_credentials {
537     my($self, $realm, $uri) = @_;
538     # return ($user, $password);
539     }
540     }
541    
542    
543    
544     =head2 new
545    
546     Create new connection to node master.
547    
548     my $master = new Search::Estraier::Master(
549     url => 'http://localhost:1978',
550     user => 'admin',
551     passwd => 'admin',
552     );
553    
554     =cut
555    
556     sub new {
557     my $class = shift;
558     my $self = {@_};
559     bless($self, $class);
560    
561     foreach my $p (qw/url user passwd/) {
562     croak "need $p" unless ($self->{$p});
563     }
564    
565     $self ? return $self : return undef;
566     }
567    
568    
569    
570     ###
571    
572     =head1 EXPORT
573    
574     Nothing.
575    
576     =head1 SEE ALSO
577    
578     L<http://hyperestraier.sourceforge.net/>
579    
580     Hyper Estraier Ruby interface on which this module is based.
581    
582     =head1 AUTHOR
583    
584     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
585    
586    
587     =head1 COPYRIGHT AND LICENSE
588    
589 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
590 dpavlin 2
591     This library is free software; you can redistribute it and/or modify
592     it under the GPL v2 or later.
593    
594     =cut
595    
596     1;

  ViewVC Help
Powered by ViewVC 1.1.26