/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 191 - (hide annotations)
Sun Nov 5 16:26:57 2006 UTC (17 years, 4 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 38054 byte(s)
bump version to 0.08
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7 dpavlin 191 our $VERSION = '0.08';
8 dpavlin 2
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15 dpavlin 68 =head2 Simple indexer
16 dpavlin 2
17 dpavlin 68 use Search::Estraier;
18    
19     # create and configure node
20 dpavlin 102 my $node = new Search::Estraier::Node(
21     url => 'http://localhost:1978/node/test',
22     user => 'admin',
23 dpavlin 139 passwd => 'admin',
24     create => 1,
25     label => 'Label for node',
26     croak_on_error => 1,
27 dpavlin 102 );
28 dpavlin 68
29     # create document
30     my $doc = new Search::Estraier::Document;
31    
32     # add attributes
33     $doc->add_attr('@uri', "http://estraier.gov/example.txt");
34     $doc->add_attr('@title', "Over the Rainbow");
35    
36     # add body text to document
37     $doc->add_text("Somewhere over the rainbow. Way up high.");
38     $doc->add_text("There's a land that I heard of once in a lullaby.");
39    
40 dpavlin 102 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
41 dpavlin 68
42     =head2 Simple searcher
43    
44     use Search::Estraier;
45    
46     # create and configure node
47 dpavlin 102 my $node = new Search::Estraier::Node(
48     url => 'http://localhost:1978/node/test',
49     user => 'admin',
50     passwd => 'admin',
51     croak_on_error => 1,
52     );
53 dpavlin 68
54     # create condition
55     my $cond = new Search::Estraier::Condition;
56    
57     # set search phrase
58     $cond->set_phrase("rainbow AND lullaby");
59    
60     my $nres = $node->search($cond, 0);
61 dpavlin 100
62 dpavlin 68 if (defined($nres)) {
63 dpavlin 102 print "Got ", $nres->hits, " results\n";
64    
65 dpavlin 68 # for each document in results
66     for my $i ( 0 ... $nres->doc_num - 1 ) {
67     # get result document
68     my $rdoc = $nres->get_doc($i);
69     # display attribte
70     print "URI: ", $rdoc->attr('@uri'),"\n";
71     print "Title: ", $rdoc->attr('@title'),"\n";
72     print $rdoc->snippet,"\n";
73     }
74     } else {
75     die "error: ", $node->status,"\n";
76     }
77    
78 dpavlin 2 =head1 DESCRIPTION
79    
80     This module is implementation of node API of Hyper Estraier. Since it's
81     perl-only module with dependencies only on standard perl modules, it will
82     run on all platforms on which perl runs. It doesn't require compilation
83     or Hyper Estraier development files on target machine.
84    
85     It is implemented as multiple packages which closly resamble Ruby
86     implementation. It also includes methods to manage nodes.
87    
88 dpavlin 68 There are few examples in C<scripts> directory of this distribution.
89    
90 dpavlin 2 =cut
91    
92 dpavlin 42 =head1 Inheritable common methods
93    
94     This methods should really move somewhere else.
95    
96 dpavlin 15 =head2 _s
97    
98     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
99    
100     my $text = $self->_s(" this is a text ");
101     $text = 'this is a text';
102    
103     =cut
104    
105     sub _s {
106 dpavlin 96 my $text = $_[1];
107     return unless defined($text);
108 dpavlin 15 $text =~ s/\s\s+/ /gs;
109     $text =~ s/^\s+//;
110     $text =~ s/\s+$//;
111     return $text;
112     }
113    
114 dpavlin 2 package Search::Estraier::Document;
115    
116 dpavlin 9 use Carp qw/croak confess/;
117 dpavlin 7
118 dpavlin 15 use Search::Estraier;
119     our @ISA = qw/Search::Estraier/;
120    
121 dpavlin 2 =head1 Search::Estraier::Document
122    
123 dpavlin 164 This class implements Document which is single item in Hyper Estraier.
124 dpavlin 14
125 dpavlin 164 It's is collection of:
126 dpavlin 42
127 dpavlin 164 =over 4
128    
129     =item attributes
130    
131     C<< 'key' => 'value' >> pairs which can later be used for filtering of results
132    
133     You can add common filters to C<attrindex> in estmaster's C<_conf>
134     file for better performance. See C<attrindex> in
135     L<Hyper Estraier P2P Guide|http://hyperestraier.sourceforge.net/nguide-en.html>.
136    
137     =item vectors
138    
139     also C<< 'key' => 'value' >> pairs
140    
141     =item display text
142    
143     Text which will be used to create searchable corpus of your index and
144     included in snippet output.
145    
146     =item hidden text
147    
148     Text which will be searchable, but will not be included in snippet.
149    
150     =back
151    
152 dpavlin 2 =head2 new
153    
154 dpavlin 14 Create new document, empty or from draft.
155    
156 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
157 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
158 dpavlin 2
159     =cut
160    
161     sub new {
162     my $class = shift;
163 dpavlin 14 my $self = {};
164 dpavlin 2 bless($self, $class);
165    
166 dpavlin 6 $self->{id} = -1;
167    
168 dpavlin 14 my $draft = shift;
169    
170     if ($draft) {
171     my $in_text = 0;
172     foreach my $line (split(/\n/, $draft)) {
173    
174     if ($in_text) {
175     if ($line =~ /^\t/) {
176     push @{ $self->{htexts} }, substr($line, 1);
177     } else {
178     push @{ $self->{dtexts} }, $line;
179     }
180     next;
181     }
182    
183     if ($line =~ m/^%VECTOR\t(.+)$/) {
184     my @fields = split(/\t/, $1);
185 dpavlin 166 if ($#fields % 2 == 1) {
186     $self->{kwords} = { @fields };
187     } else {
188     warn "can't decode $line\n";
189 dpavlin 14 }
190     next;
191 dpavlin 184 } elsif ($line =~ m/^%SCORE\t(.+)$/) {
192     $self->{score} = $1;
193     next;
194 dpavlin 14 } elsif ($line =~ m/^%/) {
195     # What is this? comment?
196     #warn "$line\n";
197     next;
198     } elsif ($line =~ m/^$/) {
199     $in_text = 1;
200     next;
201 dpavlin 81 } elsif ($line =~ m/^(.+)=(.*)$/) {
202 dpavlin 14 $self->{attrs}->{ $1 } = $2;
203     next;
204     }
205    
206 dpavlin 81 warn "draft ignored: '$line'\n";
207 dpavlin 14 }
208     }
209    
210 dpavlin 2 $self ? return $self : return undef;
211     }
212    
213 dpavlin 4
214 dpavlin 2 =head2 add_attr
215    
216 dpavlin 6 Add an attribute.
217    
218 dpavlin 2 $doc->add_attr( name => 'value' );
219    
220 dpavlin 9 Delete attribute using
221 dpavlin 5
222     $doc->add_attr( name => undef );
223    
224 dpavlin 2 =cut
225    
226     sub add_attr {
227     my $self = shift;
228     my $attrs = {@_};
229    
230     while (my ($name, $value) = each %{ $attrs }) {
231 dpavlin 9 if (! defined($value)) {
232 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
233 dpavlin 9 } else {
234 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
235 dpavlin 9 }
236 dpavlin 2 }
237 dpavlin 8
238     return 1;
239 dpavlin 2 }
240    
241 dpavlin 5
242     =head2 add_text
243    
244 dpavlin 6 Add a sentence of text.
245    
246 dpavlin 5 $doc->add_text('this is example text to display');
247    
248     =cut
249    
250     sub add_text {
251     my $self = shift;
252     my $text = shift;
253     return unless defined($text);
254    
255 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
256 dpavlin 5 }
257    
258    
259     =head2 add_hidden_text
260    
261 dpavlin 6 Add a hidden sentence.
262    
263 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
264    
265     =cut
266    
267     sub add_hidden_text {
268     my $self = shift;
269     my $text = shift;
270     return unless defined($text);
271    
272 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
273 dpavlin 5 }
274    
275 dpavlin 166 =head2 add_vectors
276 dpavlin 42
277 dpavlin 166 Add a vectors
278    
279     $doc->add_vector(
280     'vector_name' => 42,
281     'another' => 12345,
282     );
283    
284     =cut
285    
286     sub add_vectors {
287     my $self = shift;
288     return unless (@_);
289    
290     # this is ugly, but works
291     die "add_vector needs HASH as argument" unless ($#_ % 2 == 1);
292    
293     $self->{kwords} = {@_};
294     }
295    
296 dpavlin 184 =head2 set_score
297 dpavlin 166
298 dpavlin 184 Set the substitute score
299    
300     $doc->set_score(12345);
301    
302     =cut
303    
304     sub set_score {
305     my $self = shift;
306     my $score = shift;
307     return unless (defined($score));
308     $self->{score} = $score;
309     }
310    
311     =head2 score
312    
313     Get the substitute score
314    
315     =cut
316    
317     sub score {
318     my $self = shift;
319     return -1 unless (defined($self->{score}));
320     return $self->{score};
321     }
322    
323 dpavlin 6 =head2 id
324    
325     Get the ID number of document. If the object has never been registred, C<-1> is returned.
326    
327     print $doc->id;
328    
329     =cut
330    
331     sub id {
332     my $self = shift;
333     return $self->{id};
334     }
335    
336 dpavlin 42
337 dpavlin 7 =head2 attr_names
338    
339 dpavlin 9 Returns array with attribute names from document object.
340 dpavlin 7
341     my @attrs = $doc->attr_names;
342    
343     =cut
344    
345     sub attr_names {
346     my $self = shift;
347 dpavlin 60 return unless ($self->{attrs});
348     #croak "attr_names return array, not scalar" if (! wantarray);
349 dpavlin 7 return sort keys %{ $self->{attrs} };
350     }
351    
352 dpavlin 8
353     =head2 attr
354    
355 dpavlin 9 Returns value of an attribute.
356 dpavlin 8
357     my $value = $doc->attr( 'attribute' );
358    
359     =cut
360    
361     sub attr {
362     my $self = shift;
363     my $name = shift;
364 dpavlin 60 return unless (defined($name) && $self->{attrs});
365     return $self->{attrs}->{ $name };
366 dpavlin 8 }
367    
368 dpavlin 9
369     =head2 texts
370    
371     Returns array with text sentences.
372    
373     my @texts = $doc->texts;
374    
375     =cut
376    
377     sub texts {
378     my $self = shift;
379 dpavlin 60 #confess "texts return array, not scalar" if (! wantarray);
380     return @{ $self->{dtexts} } if ($self->{dtexts});
381 dpavlin 9 }
382    
383 dpavlin 42
384 dpavlin 12 =head2 cat_texts
385    
386     Return whole text as single scalar.
387    
388     my $text = $doc->cat_texts;
389    
390     =cut
391    
392     sub cat_texts {
393     my $self = shift;
394 dpavlin 60 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
395 dpavlin 12 }
396    
397 dpavlin 42
398 dpavlin 5 =head2 dump_draft
399    
400 dpavlin 13 Dump draft data from document object.
401    
402 dpavlin 5 print $doc->dump_draft;
403    
404     =cut
405    
406     sub dump_draft {
407 dpavlin 13 my $self = shift;
408     my $draft;
409    
410     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
411 dpavlin 97 next unless defined(my $v = $self->{attrs}->{$attr_name});
412 dpavlin 93 $draft .= $attr_name . '=' . $v . "\n";
413 dpavlin 13 }
414    
415     if ($self->{kwords}) {
416 dpavlin 166 $draft .= '%VECTOR';
417 dpavlin 13 while (my ($key, $value) = each %{ $self->{kwords} }) {
418     $draft .= "\t$key\t$value";
419     }
420     $draft .= "\n";
421     }
422    
423 dpavlin 184 if (defined($self->{score}) && $self->{score} >= 0) {
424     $draft .= "%SCORE\t" . $self->{score} . "\n";
425     }
426    
427 dpavlin 13 $draft .= "\n";
428    
429 dpavlin 40 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
430     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
431 dpavlin 13
432     return $draft;
433 dpavlin 5 }
434    
435 dpavlin 42
436 dpavlin 4 =head2 delete
437 dpavlin 2
438 dpavlin 4 Empty document object
439 dpavlin 2
440 dpavlin 4 $doc->delete;
441    
442 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
443     convinience. Document objects which go out of scope will be destroyed
444     automatically.
445    
446 dpavlin 4 =cut
447    
448     sub delete {
449     my $self = shift;
450    
451 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
452 dpavlin 5 delete($self->{$data});
453     }
454 dpavlin 4
455 dpavlin 10 $self->{id} = -1;
456    
457 dpavlin 4 return 1;
458     }
459    
460    
461    
462 dpavlin 15 package Search::Estraier::Condition;
463 dpavlin 4
464 dpavlin 98 use Carp qw/carp confess croak/;
465 dpavlin 16
466 dpavlin 15 use Search::Estraier;
467     our @ISA = qw/Search::Estraier/;
468 dpavlin 4
469 dpavlin 16 =head1 Search::Estraier::Condition
470    
471     =head2 new
472    
473     my $cond = new Search::HyperEstraier::Condition;
474    
475     =cut
476    
477     sub new {
478     my $class = shift;
479     my $self = {};
480     bless($self, $class);
481    
482 dpavlin 19 $self->{max} = -1;
483     $self->{options} = 0;
484    
485 dpavlin 16 $self ? return $self : return undef;
486     }
487    
488 dpavlin 42
489 dpavlin 16 =head2 set_phrase
490    
491     $cond->set_phrase('search phrase');
492    
493     =cut
494    
495     sub set_phrase {
496     my $self = shift;
497     $self->{phrase} = $self->_s( shift );
498     }
499    
500 dpavlin 42
501 dpavlin 16 =head2 add_attr
502    
503     $cond->add_attr('@URI STRINC /~dpavlin/');
504    
505     =cut
506    
507     sub add_attr {
508     my $self = shift;
509     my $attr = shift || return;
510     push @{ $self->{attrs} }, $self->_s( $attr );
511     }
512    
513 dpavlin 42
514 dpavlin 16 =head2 set_order
515    
516     $cond->set_order('@mdate NUMD');
517    
518     =cut
519    
520     sub set_order {
521     my $self = shift;
522     $self->{order} = shift;
523     }
524    
525 dpavlin 42
526 dpavlin 16 =head2 set_max
527    
528     $cond->set_max(42);
529    
530     =cut
531    
532     sub set_max {
533     my $self = shift;
534     my $max = shift;
535 dpavlin 43 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
536 dpavlin 16 $self->{max} = $max;
537     }
538    
539 dpavlin 42
540 dpavlin 16 =head2 set_options
541    
542 dpavlin 98 $cond->set_options( 'SURE' );
543 dpavlin 16
544 dpavlin 98 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
545    
546     Possible options are:
547    
548     =over 8
549    
550     =item SURE
551    
552     check every N-gram
553    
554     =item USUAL
555    
556     check every second N-gram
557    
558     =item FAST
559    
560     check every third N-gram
561    
562     =item AGITO
563    
564     check every fourth N-gram
565    
566     =item NOIDF
567    
568     don't perform TF-IDF tuning
569    
570     =item SIMPLE
571    
572     use simplified query phrase
573    
574     =back
575    
576     Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
577     options;
578    
579     This option changed in version C<0.04> of this module. It's backwards compatibile.
580    
581 dpavlin 16 =cut
582    
583 dpavlin 15 my $options = {
584     SURE => 1 << 0,
585     USUAL => 1 << 1,
586     FAST => 1 << 2,
587     AGITO => 1 << 3,
588     NOIDF => 1 << 4,
589     SIMPLE => 1 << 10,
590     };
591    
592 dpavlin 16 sub set_options {
593     my $self = shift;
594 dpavlin 98 my $opt = 0;
595     foreach my $option (@_) {
596     my $mask;
597     unless ($mask = $options->{$option}) {
598     if ($option eq '1') {
599     next;
600     } else {
601     croak "unknown option $option";
602     }
603     }
604     $opt += $mask;
605     }
606     $self->{options} = $opt;
607 dpavlin 4 }
608    
609 dpavlin 42
610 dpavlin 18 =head2 phrase
611    
612     Return search phrase.
613    
614     print $cond->phrase;
615    
616     =cut
617    
618     sub phrase {
619     my $self = shift;
620     return $self->{phrase};
621     }
622    
623 dpavlin 42
624 dpavlin 19 =head2 order
625 dpavlin 18
626 dpavlin 19 Return search result order.
627    
628     print $cond->order;
629    
630     =cut
631    
632     sub order {
633     my $self = shift;
634     return $self->{order};
635     }
636    
637 dpavlin 42
638 dpavlin 19 =head2 attrs
639    
640     Return search result attrs.
641    
642     my @cond_attrs = $cond->attrs;
643    
644     =cut
645    
646     sub attrs {
647     my $self = shift;
648     #croak "attrs return array, not scalar" if (! wantarray);
649 dpavlin 60 return @{ $self->{attrs} } if ($self->{attrs});
650 dpavlin 19 }
651    
652 dpavlin 42
653 dpavlin 19 =head2 max
654    
655     Return maximum number of results.
656    
657     print $cond->max;
658    
659     C<-1> is returned for unitialized value, C<0> is unlimited.
660    
661     =cut
662    
663     sub max {
664     my $self = shift;
665     return $self->{max};
666     }
667    
668 dpavlin 42
669 dpavlin 19 =head2 options
670    
671     Return options for this condition.
672    
673     print $cond->options;
674    
675     Options are returned in numerical form.
676    
677     =cut
678    
679     sub options {
680     my $self = shift;
681     return $self->{options};
682     }
683    
684    
685 dpavlin 116 =head2 set_skip
686    
687     Set number of skipped documents from beginning of results
688    
689     $cond->set_skip(42);
690    
691     Similar to C<offset> in RDBMS.
692    
693     =cut
694    
695     sub set_skip {
696     my $self = shift;
697     $self->{skip} = shift;
698     }
699    
700     =head2 skip
701    
702     Return skip for this condition.
703    
704     print $cond->skip;
705    
706     =cut
707    
708     sub skip {
709     my $self = shift;
710     return $self->{skip};
711     }
712    
713 dpavlin 184
714     =head2 set_distinct
715    
716     $cond->set_distinct('@author');
717    
718     =cut
719    
720     sub set_distinct {
721     my $self = shift;
722     $self->{distinct} = shift;
723     }
724    
725     =head2 distinct
726    
727     Return distinct attribute
728    
729     print $cond->distinct;
730    
731     =cut
732    
733     sub distinct {
734     my $self = shift;
735     return $self->{distinct};
736     }
737    
738 dpavlin 173 =head2 set_mask
739 dpavlin 116
740 dpavlin 174 Filter out some links when searching.
741 dpavlin 173
742 dpavlin 174 Argument array of link numbers, starting with 0 (current node).
743 dpavlin 173
744     $cond->set_mask(qw/0 1 4/);
745    
746     =cut
747    
748     sub set_mask {
749     my $self = shift;
750     return unless (@_);
751     $self->{mask} = \@_;
752     }
753    
754    
755 dpavlin 20 package Search::Estraier::ResultDocument;
756    
757 dpavlin 24 use Carp qw/croak/;
758 dpavlin 20
759 dpavlin 24 #use Search::Estraier;
760     #our @ISA = qw/Search::Estraier/;
761 dpavlin 20
762     =head1 Search::Estraier::ResultDocument
763    
764     =head2 new
765    
766 dpavlin 23 my $rdoc = new Search::HyperEstraier::ResultDocument(
767 dpavlin 20 uri => 'http://localhost/document/uri/42',
768     attrs => {
769     foo => 1,
770     bar => 2,
771     },
772     snippet => 'this is a text of snippet'
773     keywords => 'this\tare\tkeywords'
774     );
775    
776     =cut
777    
778     sub new {
779     my $class = shift;
780     my $self = {@_};
781     bless($self, $class);
782    
783 dpavlin 62 croak "missing uri for ResultDocument" unless defined($self->{uri});
784 dpavlin 20
785     $self ? return $self : return undef;
786     }
787    
788 dpavlin 42
789 dpavlin 23 =head2 uri
790 dpavlin 20
791 dpavlin 23 Return URI of result document
792 dpavlin 20
793 dpavlin 23 print $rdoc->uri;
794    
795     =cut
796    
797     sub uri {
798     my $self = shift;
799     return $self->{uri};
800     }
801    
802    
803     =head2 attr_names
804    
805     Returns array with attribute names from result document object.
806    
807     my @attrs = $rdoc->attr_names;
808    
809     =cut
810    
811     sub attr_names {
812     my $self = shift;
813     croak "attr_names return array, not scalar" if (! wantarray);
814     return sort keys %{ $self->{attrs} };
815     }
816    
817 dpavlin 42
818 dpavlin 23 =head2 attr
819    
820     Returns value of an attribute.
821    
822     my $value = $rdoc->attr( 'attribute' );
823    
824     =cut
825    
826     sub attr {
827     my $self = shift;
828     my $name = shift || return;
829     return $self->{attrs}->{ $name };
830     }
831    
832 dpavlin 42
833 dpavlin 23 =head2 snippet
834    
835     Return snippet from result document
836    
837     print $rdoc->snippet;
838    
839     =cut
840    
841     sub snippet {
842     my $self = shift;
843     return $self->{snippet};
844     }
845    
846 dpavlin 42
847 dpavlin 23 =head2 keywords
848    
849     Return keywords from result document
850    
851     print $rdoc->keywords;
852    
853     =cut
854    
855     sub keywords {
856     my $self = shift;
857     return $self->{keywords};
858     }
859    
860    
861 dpavlin 25 package Search::Estraier::NodeResult;
862    
863     use Carp qw/croak/;
864    
865     #use Search::Estraier;
866     #our @ISA = qw/Search::Estraier/;
867    
868     =head1 Search::Estraier::NodeResult
869    
870     =head2 new
871    
872     my $res = new Search::HyperEstraier::NodeResult(
873     docs => @array_of_rdocs,
874     hits => %hash_with_hints,
875     );
876    
877     =cut
878    
879     sub new {
880     my $class = shift;
881     my $self = {@_};
882     bless($self, $class);
883    
884     foreach my $f (qw/docs hints/) {
885     croak "missing $f for ResultDocument" unless defined($self->{$f});
886     }
887    
888     $self ? return $self : return undef;
889     }
890    
891 dpavlin 42
892 dpavlin 25 =head2 doc_num
893    
894     Return number of documents
895    
896     print $res->doc_num;
897    
898 dpavlin 100 This will return real number of documents (limited by C<max>).
899     If you want to get total number of hits, see C<hits>.
900    
901 dpavlin 25 =cut
902    
903     sub doc_num {
904     my $self = shift;
905 dpavlin 53 return $#{$self->{docs}} + 1;
906 dpavlin 25 }
907    
908 dpavlin 42
909 dpavlin 25 =head2 get_doc
910    
911     Return single document
912    
913     my $doc = $res->get_doc( 42 );
914    
915     Returns undef if document doesn't exist.
916    
917     =cut
918    
919     sub get_doc {
920     my $self = shift;
921     my $num = shift;
922 dpavlin 43 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
923 dpavlin 25 return undef if ($num < 0 || $num > $self->{docs});
924     return $self->{docs}->[$num];
925     }
926    
927 dpavlin 42
928 dpavlin 25 =head2 hint
929    
930     Return specific hint from results.
931    
932 dpavlin 100 print $res->hint( 'VERSION' );
933 dpavlin 25
934     Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
935     C<TIME>, C<LINK#n>, C<VIEW>.
936    
937     =cut
938    
939     sub hint {
940     my $self = shift;
941     my $key = shift || return;
942     return $self->{hints}->{$key};
943     }
944    
945 dpavlin 108 =head2 hints
946 dpavlin 25
947 dpavlin 91 More perlish version of C<hint>. This one returns hash.
948    
949 dpavlin 100 my %hints = $res->hints;
950 dpavlin 91
951     =cut
952    
953     sub hints {
954     my $self = shift;
955     return $self->{hints};
956     }
957    
958 dpavlin 100 =head2 hits
959    
960     Syntaxtic sugar for total number of hits for this query
961    
962     print $res->hits;
963    
964     It's same as
965    
966     print $res->hint('HIT');
967    
968     but shorter.
969    
970     =cut
971    
972     sub hits {
973     my $self = shift;
974     return $self->{hints}->{'HIT'} || 0;
975     }
976    
977 dpavlin 27 package Search::Estraier::Node;
978    
979 dpavlin 44 use Carp qw/carp croak confess/;
980 dpavlin 33 use URI;
981 dpavlin 36 use MIME::Base64;
982 dpavlin 33 use IO::Socket::INET;
983 dpavlin 49 use URI::Escape qw/uri_escape/;
984 dpavlin 29
985 dpavlin 27 =head1 Search::Estraier::Node
986    
987     =head2 new
988    
989     my $node = new Search::HyperEstraier::Node;
990    
991 dpavlin 65 or optionally with C<url> as parametar
992    
993     my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
994    
995 dpavlin 78 or in more verbose form
996    
997     my $node = new Search::HyperEstraier::Node(
998     url => 'http://localhost:1978/node/test',
999 dpavlin 132 user => 'admin',
1000     passwd => 'admin'
1001 dpavlin 139 create => 1,
1002     label => 'optional node label',
1003 dpavlin 78 debug => 1,
1004     croak_on_error => 1
1005     );
1006    
1007     with following arguments:
1008    
1009     =over 4
1010    
1011     =item url
1012    
1013     URL to node
1014    
1015 dpavlin 132 =item user
1016    
1017     specify username for node server authentication
1018    
1019     =item passwd
1020    
1021     password for authentication
1022    
1023 dpavlin 139 =item create
1024    
1025     create node if it doesn't exists
1026    
1027     =item label
1028    
1029     optional label for new node if C<create> is used
1030    
1031 dpavlin 78 =item debug
1032    
1033     dumps a B<lot> of debugging output
1034    
1035     =item croak_on_error
1036    
1037     very helpful during development. It will croak on all errors instead of
1038     silently returning C<-1> (which is convention of Hyper Estraier API in other
1039     languages).
1040    
1041     =back
1042    
1043 dpavlin 27 =cut
1044    
1045     sub new {
1046     my $class = shift;
1047     my $self = {
1048     pxport => -1,
1049 dpavlin 33 timeout => 0, # this used to be -1
1050 dpavlin 27 wwidth => 480,
1051     hwidth => 96,
1052     awidth => 96,
1053     status => -1,
1054     };
1055 dpavlin 111
1056 dpavlin 27 bless($self, $class);
1057    
1058 dpavlin 65 if ($#_ == 0) {
1059     $self->{url} = shift;
1060     } else {
1061 dpavlin 78 %$self = ( %$self, @_ );
1062    
1063 dpavlin 132 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
1064    
1065 dpavlin 65 warn "## Node debug on\n" if ($self->{debug});
1066     }
1067 dpavlin 57
1068 dpavlin 111 $self->{inform} = {
1069     dnum => -1,
1070     wnum => -1,
1071     size => -1.0,
1072     };
1073    
1074 dpavlin 139 if ($self->{create}) {
1075 dpavlin 140 if (! eval { $self->name } || $@) {
1076 dpavlin 139 my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
1077     croak "can't find node name in '$self->{url}'" unless ($name);
1078     my $label = $self->{label} || $name;
1079     $self->master(
1080     action => 'nodeadd',
1081     name => $name,
1082     label => $label,
1083     ) || croak "can't create node $name ($label)";
1084     }
1085     }
1086    
1087 dpavlin 27 $self ? return $self : return undef;
1088     }
1089    
1090 dpavlin 42
1091 dpavlin 29 =head2 set_url
1092    
1093     Specify URL to node server
1094    
1095     $node->set_url('http://localhost:1978');
1096    
1097     =cut
1098    
1099     sub set_url {
1100     my $self = shift;
1101     $self->{url} = shift;
1102     }
1103    
1104 dpavlin 42
1105 dpavlin 29 =head2 set_proxy
1106    
1107     Specify proxy server to connect to node server
1108    
1109     $node->set_proxy('proxy.example.com', 8080);
1110    
1111     =cut
1112    
1113     sub set_proxy {
1114     my $self = shift;
1115     my ($host,$port) = @_;
1116 dpavlin 43 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
1117 dpavlin 29 $self->{pxhost} = $host;
1118     $self->{pxport} = $port;
1119     }
1120    
1121 dpavlin 42
1122 dpavlin 30 =head2 set_timeout
1123    
1124     Specify timeout of connection in seconds
1125    
1126     $node->set_timeout( 15 );
1127    
1128     =cut
1129    
1130     sub set_timeout {
1131     my $self = shift;
1132     my $sec = shift;
1133 dpavlin 43 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
1134 dpavlin 30 $self->{timeout} = $sec;
1135     }
1136    
1137 dpavlin 42
1138 dpavlin 31 =head2 set_auth
1139    
1140     Specify name and password for authentication to node server.
1141    
1142     $node->set_auth('clint','eastwood');
1143    
1144     =cut
1145    
1146     sub set_auth {
1147     my $self = shift;
1148     my ($login,$passwd) = @_;
1149 dpavlin 40 my $basic_auth = encode_base64( "$login:$passwd" );
1150     chomp($basic_auth);
1151     $self->{auth} = $basic_auth;
1152 dpavlin 31 }
1153    
1154 dpavlin 42
1155 dpavlin 32 =head2 status
1156    
1157     Return status code of last request.
1158    
1159 dpavlin 40 print $node->status;
1160 dpavlin 32
1161     C<-1> means connection failure.
1162    
1163     =cut
1164    
1165     sub status {
1166     my $self = shift;
1167     return $self->{status};
1168     }
1169    
1170 dpavlin 42
1171 dpavlin 40 =head2 put_doc
1172    
1173 dpavlin 41 Add a document
1174 dpavlin 40
1175 dpavlin 41 $node->put_doc( $document_draft ) or die "can't add document";
1176    
1177 dpavlin 151 Return true on success or false on failure.
1178 dpavlin 41
1179 dpavlin 40 =cut
1180    
1181     sub put_doc {
1182     my $self = shift;
1183     my $doc = shift || return;
1184 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1185 dpavlin 151 if ($self->shuttle_url( $self->{url} . '/put_doc',
1186 dpavlin 41 'text/x-estraier-draft',
1187     $doc->dump_draft,
1188     undef
1189 dpavlin 151 ) == 200) {
1190     $self->_clear_info;
1191     return 1;
1192     }
1193     return undef;
1194 dpavlin 40 }
1195    
1196 dpavlin 41
1197     =head2 out_doc
1198    
1199     Remove a document
1200    
1201     $node->out_doc( document_id ) or "can't remove document";
1202    
1203     Return true on success or false on failture.
1204    
1205     =cut
1206    
1207     sub out_doc {
1208     my $self = shift;
1209     my $id = shift || return;
1210     return unless ($self->{url});
1211 dpavlin 43 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1212 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1213 dpavlin 41 'application/x-www-form-urlencoded',
1214     "id=$id",
1215     undef
1216 dpavlin 150 ) == 200) {
1217 dpavlin 151 $self->_clear_info;
1218     return 1;
1219 dpavlin 150 }
1220     return undef;
1221 dpavlin 41 }
1222    
1223    
1224     =head2 out_doc_by_uri
1225    
1226     Remove a registrated document using it's uri
1227    
1228 dpavlin 45 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1229 dpavlin 41
1230     Return true on success or false on failture.
1231    
1232     =cut
1233    
1234     sub out_doc_by_uri {
1235     my $self = shift;
1236     my $uri = shift || return;
1237     return unless ($self->{url});
1238 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1239 dpavlin 41 'application/x-www-form-urlencoded',
1240 dpavlin 50 "uri=" . uri_escape($uri),
1241 dpavlin 41 undef
1242 dpavlin 150 ) == 200) {
1243 dpavlin 151 $self->_clear_info;
1244     return 1;
1245 dpavlin 150 }
1246     return undef;
1247 dpavlin 41 }
1248    
1249 dpavlin 42
1250     =head2 edit_doc
1251    
1252     Edit attributes of a document
1253    
1254     $node->edit_doc( $document_draft ) or die "can't edit document";
1255    
1256     Return true on success or false on failture.
1257    
1258     =cut
1259    
1260     sub edit_doc {
1261     my $self = shift;
1262     my $doc = shift || return;
1263 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1264 dpavlin 151 if ($self->shuttle_url( $self->{url} . '/edit_doc',
1265 dpavlin 42 'text/x-estraier-draft',
1266     $doc->dump_draft,
1267     undef
1268 dpavlin 151 ) == 200) {
1269     $self->_clear_info;
1270     return 1;
1271     }
1272     return undef;
1273 dpavlin 42 }
1274    
1275    
1276 dpavlin 43 =head2 get_doc
1277    
1278     Retreive document
1279    
1280     my $doc = $node->get_doc( document_id ) or die "can't get document";
1281    
1282     Return true on success or false on failture.
1283    
1284     =cut
1285    
1286     sub get_doc {
1287     my $self = shift;
1288     my $id = shift || return;
1289     return $self->_fetch_doc( id => $id );
1290     }
1291    
1292 dpavlin 44
1293 dpavlin 43 =head2 get_doc_by_uri
1294    
1295     Retreive document
1296    
1297 dpavlin 45 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1298 dpavlin 43
1299     Return true on success or false on failture.
1300    
1301     =cut
1302    
1303     sub get_doc_by_uri {
1304     my $self = shift;
1305     my $uri = shift || return;
1306     return $self->_fetch_doc( uri => $uri );
1307     }
1308    
1309 dpavlin 44
1310 dpavlin 49 =head2 get_doc_attr
1311    
1312     Retrieve the value of an atribute from object
1313    
1314     my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1315     die "can't get document attribute";
1316    
1317     =cut
1318    
1319     sub get_doc_attr {
1320     my $self = shift;
1321     my ($id,$name) = @_;
1322     return unless ($id && $name);
1323     return $self->_fetch_doc( id => $id, attr => $name );
1324     }
1325    
1326    
1327     =head2 get_doc_attr_by_uri
1328    
1329     Retrieve the value of an atribute from object
1330    
1331     my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1332     die "can't get document attribute";
1333    
1334     =cut
1335    
1336     sub get_doc_attr_by_uri {
1337     my $self = shift;
1338     my ($uri,$name) = @_;
1339     return unless ($uri && $name);
1340     return $self->_fetch_doc( uri => $uri, attr => $name );
1341     }
1342    
1343    
1344 dpavlin 44 =head2 etch_doc
1345    
1346     Exctract document keywords
1347    
1348     my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1349    
1350     =cut
1351    
1352 dpavlin 49 sub etch_doc {
1353 dpavlin 44 my $self = shift;
1354     my $id = shift || return;
1355     return $self->_fetch_doc( id => $id, etch => 1 );
1356     }
1357    
1358     =head2 etch_doc_by_uri
1359    
1360     Retreive document
1361    
1362 dpavlin 45 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1363 dpavlin 44
1364     Return true on success or false on failture.
1365    
1366     =cut
1367    
1368     sub etch_doc_by_uri {
1369     my $self = shift;
1370     my $uri = shift || return;
1371     return $self->_fetch_doc( uri => $uri, etch => 1 );
1372     }
1373    
1374    
1375 dpavlin 45 =head2 uri_to_id
1376    
1377     Get ID of document specified by URI
1378    
1379     my $id = $node->uri_to_id( 'file:///document/uri/42' );
1380    
1381 dpavlin 103 This method won't croak, even if using C<croak_on_error>.
1382    
1383 dpavlin 45 =cut
1384    
1385     sub uri_to_id {
1386     my $self = shift;
1387     my $uri = shift || return;
1388 dpavlin 103 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1389 dpavlin 45 }
1390    
1391    
1392 dpavlin 43 =head2 _fetch_doc
1393    
1394 dpavlin 44 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1395     C<etch_doc>, C<etch_doc_by_uri>.
1396 dpavlin 43
1397 dpavlin 45 # this will decode received draft into Search::Estraier::Document object
1398     my $doc = $node->_fetch_doc( id => 42 );
1399     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1400 dpavlin 43
1401 dpavlin 45 # to extract keywords, add etch
1402     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1403     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1404    
1405 dpavlin 49 # to get document attrubute add attr
1406     my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1407     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1408    
1409 dpavlin 45 # more general form which allows implementation of
1410     # uri_to_id
1411     my $id = $node->_fetch_doc(
1412     uri => 'file:///document/uri/42',
1413     path => '/uri_to_id',
1414     chomp_resbody => 1
1415     );
1416    
1417 dpavlin 43 =cut
1418    
1419     sub _fetch_doc {
1420     my $self = shift;
1421 dpavlin 44 my $a = {@_};
1422     return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1423    
1424     my ($arg, $resbody);
1425    
1426 dpavlin 45 my $path = $a->{path} || '/get_doc';
1427 dpavlin 44 $path = '/etch_doc' if ($a->{etch});
1428    
1429     if ($a->{id}) {
1430 dpavlin 168 croak "id must be number not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1431 dpavlin 44 $arg = 'id=' . $a->{id};
1432     } elsif ($a->{uri}) {
1433 dpavlin 50 $arg = 'uri=' . uri_escape($a->{uri});
1434 dpavlin 44 } else {
1435     confess "unhandled argument. Need id or uri.";
1436 dpavlin 43 }
1437 dpavlin 44
1438 dpavlin 49 if ($a->{attr}) {
1439     $path = '/get_doc_attr';
1440     $arg .= '&attr=' . uri_escape($a->{attr});
1441     $a->{chomp_resbody} = 1;
1442     }
1443    
1444 dpavlin 44 my $rv = $self->shuttle_url( $self->{url} . $path,
1445 dpavlin 43 'application/x-www-form-urlencoded',
1446 dpavlin 44 $arg,
1447 dpavlin 45 \$resbody,
1448 dpavlin 103 $a->{croak_on_error},
1449 dpavlin 43 );
1450 dpavlin 44
1451 dpavlin 43 return if ($rv != 200);
1452 dpavlin 44
1453     if ($a->{etch}) {
1454     $self->{kwords} = {};
1455     return +{} unless ($resbody);
1456     foreach my $l (split(/\n/, $resbody)) {
1457     my ($k,$v) = split(/\t/, $l, 2);
1458     $self->{kwords}->{$k} = $v if ($v);
1459     }
1460     return $self->{kwords};
1461 dpavlin 45 } elsif ($a->{chomp_resbody}) {
1462     return unless (defined($resbody));
1463     chomp($resbody);
1464     return $resbody;
1465 dpavlin 44 } else {
1466     return new Search::Estraier::Document($resbody);
1467     }
1468 dpavlin 43 }
1469    
1470    
1471 dpavlin 48 =head2 name
1472 dpavlin 43
1473 dpavlin 48 my $node_name = $node->name;
1474 dpavlin 43
1475 dpavlin 48 =cut
1476    
1477     sub name {
1478     my $self = shift;
1479 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1480     return $self->{inform}->{name};
1481 dpavlin 48 }
1482    
1483    
1484     =head2 label
1485    
1486     my $node_label = $node->label;
1487    
1488     =cut
1489    
1490     sub label {
1491     my $self = shift;
1492 dpavlin 111 $self->_set_info unless ($self->{inform}->{label});
1493     return $self->{inform}->{label};
1494 dpavlin 48 }
1495    
1496    
1497     =head2 doc_num
1498    
1499     my $documents_in_node = $node->doc_num;
1500    
1501     =cut
1502    
1503     sub doc_num {
1504     my $self = shift;
1505 dpavlin 111 $self->_set_info if ($self->{inform}->{dnum} < 0);
1506     return $self->{inform}->{dnum};
1507 dpavlin 48 }
1508    
1509    
1510     =head2 word_num
1511    
1512     my $words_in_node = $node->word_num;
1513    
1514     =cut
1515    
1516     sub word_num {
1517     my $self = shift;
1518 dpavlin 111 $self->_set_info if ($self->{inform}->{wnum} < 0);
1519     return $self->{inform}->{wnum};
1520 dpavlin 48 }
1521    
1522    
1523     =head2 size
1524    
1525     my $node_size = $node->size;
1526    
1527     =cut
1528    
1529     sub size {
1530     my $self = shift;
1531 dpavlin 111 $self->_set_info if ($self->{inform}->{size} < 0);
1532     return $self->{inform}->{size};
1533 dpavlin 48 }
1534    
1535    
1536 dpavlin 51 =head2 search
1537 dpavlin 48
1538 dpavlin 51 Search documents which match condition
1539    
1540     my $nres = $node->search( $cond, $depth );
1541    
1542     C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1543     depth for meta search.
1544    
1545     Function results C<Search::Estraier::NodeResult> object.
1546    
1547     =cut
1548    
1549     sub search {
1550     my $self = shift;
1551     my ($cond, $depth) = @_;
1552     return unless ($cond && defined($depth) && $self->{url});
1553     croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1554     croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1555    
1556 dpavlin 52 my $resbody;
1557 dpavlin 51
1558 dpavlin 52 my $rv = $self->shuttle_url( $self->{url} . '/search',
1559 dpavlin 53 'application/x-www-form-urlencoded',
1560 dpavlin 61 $self->cond_to_query( $cond, $depth ),
1561 dpavlin 52 \$resbody,
1562     );
1563     return if ($rv != 200);
1564    
1565 dpavlin 126 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1566     my $hintsText = splice @records, 0, 2; # starts with empty record
1567     my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1568    
1569     # process records
1570 dpavlin 128 my $docs = [];
1571 dpavlin 126 foreach my $record (@records)
1572     {
1573     # split into keys and snippets
1574     my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1575    
1576     # create document hash
1577     my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1578     $doc->{'@keywords'} = $doc->{keywords};
1579     ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1580     $doc->{snippet} = $snippet;
1581    
1582     push @$docs, new Search::Estraier::ResultDocument(
1583     attrs => $doc,
1584     uri => $doc->{'@uri'},
1585     snippet => $snippet,
1586     keywords => $doc->{'keywords'},
1587     );
1588     }
1589    
1590     return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1591     }
1592    
1593    
1594 dpavlin 51 =head2 cond_to_query
1595    
1596 dpavlin 55 Return URI encoded string generated from Search::Estraier::Condition
1597    
1598 dpavlin 61 my $args = $node->cond_to_query( $cond, $depth );
1599 dpavlin 51
1600     =cut
1601    
1602     sub cond_to_query {
1603     my $self = shift;
1604    
1605     my $cond = shift || return;
1606     croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1607 dpavlin 61 my $depth = shift;
1608 dpavlin 51
1609     my @args;
1610    
1611     if (my $phrase = $cond->phrase) {
1612     push @args, 'phrase=' . uri_escape($phrase);
1613     }
1614    
1615     if (my @attrs = $cond->attrs) {
1616     for my $i ( 0 .. $#attrs ) {
1617 dpavlin 63 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1618 dpavlin 51 }
1619     }
1620    
1621     if (my $order = $cond->order) {
1622     push @args, 'order=' . uri_escape($order);
1623     }
1624    
1625     if (my $max = $cond->max) {
1626     push @args, 'max=' . $max;
1627     } else {
1628     push @args, 'max=' . (1 << 30);
1629     }
1630    
1631     if (my $options = $cond->options) {
1632     push @args, 'options=' . $options;
1633     }
1634    
1635 dpavlin 61 push @args, 'depth=' . $depth if ($depth);
1636 dpavlin 51 push @args, 'wwidth=' . $self->{wwidth};
1637     push @args, 'hwidth=' . $self->{hwidth};
1638     push @args, 'awidth=' . $self->{awidth};
1639 dpavlin 155 push @args, 'skip=' . $cond->{skip} if ($cond->{skip});
1640 dpavlin 51
1641 dpavlin 184 if (my $distinct = $cond->distinct) {
1642     push @args, 'distinct=' . uri_escape($distinct);
1643     }
1644    
1645 dpavlin 173 if ($cond->{mask}) {
1646     my $mask = 0;
1647     map { $mask += ( 2 ** $_ ) } @{ $cond->{mask} };
1648    
1649     push @args, 'mask=' . $mask if ($mask);
1650     }
1651    
1652 dpavlin 51 return join('&', @args);
1653     }
1654    
1655    
1656 dpavlin 33 =head2 shuttle_url
1657 dpavlin 32
1658 dpavlin 68 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1659 dpavlin 33 master.
1660 dpavlin 2
1661 dpavlin 52 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1662 dpavlin 2
1663 dpavlin 33 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1664     body will be saved within object.
1665 dpavlin 2
1666     =cut
1667    
1668 dpavlin 59 use LWP::UserAgent;
1669    
1670 dpavlin 33 sub shuttle_url {
1671     my $self = shift;
1672 dpavlin 2
1673 dpavlin 103 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1674 dpavlin 2
1675 dpavlin 103 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1676    
1677 dpavlin 40 $self->{status} = -1;
1678 dpavlin 33
1679 dpavlin 41 warn "## $url\n" if ($self->{debug});
1680 dpavlin 36
1681 dpavlin 33 $url = new URI($url);
1682 dpavlin 37 if (
1683     !$url || !$url->scheme || !$url->scheme eq 'http' ||
1684     !$url->host || !$url->port || $url->port < 1
1685     ) {
1686     carp "can't parse $url\n";
1687     return -1;
1688     }
1689 dpavlin 33
1690 dpavlin 59 my $ua = LWP::UserAgent->new;
1691     $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1692 dpavlin 33
1693 dpavlin 59 my $req;
1694 dpavlin 37 if ($reqbody) {
1695 dpavlin 59 $req = HTTP::Request->new(POST => $url);
1696 dpavlin 37 } else {
1697 dpavlin 59 $req = HTTP::Request->new(GET => $url);
1698 dpavlin 37 }
1699    
1700 dpavlin 59 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1701     $req->headers->header( 'Connection', 'close' );
1702 dpavlin 77 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1703 dpavlin 59 $req->content_type( $content_type );
1704 dpavlin 37
1705 dpavlin 59 warn $req->headers->as_string,"\n" if ($self->{debug});
1706 dpavlin 2
1707 dpavlin 37 if ($reqbody) {
1708 dpavlin 41 warn "$reqbody\n" if ($self->{debug});
1709 dpavlin 59 $req->content( $reqbody );
1710 dpavlin 33 }
1711 dpavlin 2
1712 dpavlin 59 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1713 dpavlin 2
1714 dpavlin 59 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1715 dpavlin 2
1716 dpavlin 76 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1717    
1718 dpavlin 78 if (! $res->is_success) {
1719 dpavlin 103 if ($croak_on_error) {
1720 dpavlin 78 croak("can't get $url: ",$res->status_line);
1721     } else {
1722     return -1;
1723     }
1724     }
1725 dpavlin 2
1726 dpavlin 59 $$resbody .= $res->content;
1727    
1728 dpavlin 40 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1729 dpavlin 39
1730 dpavlin 40 return $self->{status};
1731 dpavlin 2 }
1732    
1733 dpavlin 48
1734 dpavlin 55 =head2 set_snippet_width
1735 dpavlin 48
1736 dpavlin 55 Set width of snippets in results
1737    
1738     $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1739    
1740     C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1741     is not sent with results. If it is negative, whole document text is sent instead of snippet.
1742    
1743     C<$hwidth> specified width of strings from beginning of string. Default
1744     value is C<96>. Negative or zero value keep previous value.
1745    
1746     C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1747     If negative of zero value is provided previous value is kept unchanged.
1748    
1749     =cut
1750    
1751     sub set_snippet_width {
1752     my $self = shift;
1753    
1754     my ($wwidth, $hwidth, $awidth) = @_;
1755     $self->{wwidth} = $wwidth;
1756     $self->{hwidth} = $hwidth if ($hwidth >= 0);
1757     $self->{awidth} = $awidth if ($awidth >= 0);
1758     }
1759    
1760    
1761 dpavlin 56 =head2 set_user
1762 dpavlin 55
1763 dpavlin 56 Manage users of node
1764    
1765     $node->set_user( 'name', $mode );
1766    
1767     C<$mode> can be one of:
1768    
1769     =over 4
1770    
1771     =item 0
1772    
1773     delete account
1774    
1775     =item 1
1776    
1777     set administrative right for user
1778    
1779     =item 2
1780    
1781     set user account as guest
1782    
1783     =back
1784    
1785     Return true on success, otherwise false.
1786    
1787     =cut
1788    
1789     sub set_user {
1790     my $self = shift;
1791     my ($name, $mode) = @_;
1792    
1793     return unless ($self->{url});
1794     croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1795    
1796     $self->shuttle_url( $self->{url} . '/_set_user',
1797 dpavlin 154 'application/x-www-form-urlencoded',
1798 dpavlin 56 'name=' . uri_escape($name) . '&mode=' . $mode,
1799     undef
1800     ) == 200;
1801     }
1802    
1803    
1804 dpavlin 57 =head2 set_link
1805    
1806     Manage node links
1807    
1808     $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1809    
1810     If C<$credit> is negative, link is removed.
1811    
1812     =cut
1813    
1814     sub set_link {
1815     my $self = shift;
1816     my ($url, $label, $credit) = @_;
1817    
1818     return unless ($self->{url});
1819     croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1820    
1821     my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1822     $reqbody .= '&credit=' . $credit if ($credit > 0);
1823    
1824 dpavlin 107 if ($self->shuttle_url( $self->{url} . '/_set_link',
1825 dpavlin 71 'application/x-www-form-urlencoded',
1826 dpavlin 57 $reqbody,
1827     undef
1828 dpavlin 107 ) == 200) {
1829     # refresh node info after adding link
1830 dpavlin 151 $self->_clear_info;
1831 dpavlin 107 return 1;
1832     }
1833 dpavlin 150 return undef;
1834 dpavlin 57 }
1835    
1836 dpavlin 107 =head2 admins
1837 dpavlin 57
1838 dpavlin 107 my @admins = @{ $node->admins };
1839    
1840     Return array of users with admin rights on node
1841    
1842     =cut
1843    
1844     sub admins {
1845     my $self = shift;
1846 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1847     return $self->{inform}->{admins};
1848 dpavlin 107 }
1849    
1850     =head2 guests
1851    
1852     my @guests = @{ $node->guests };
1853    
1854     Return array of users with guest rights on node
1855    
1856     =cut
1857    
1858     sub guests {
1859     my $self = shift;
1860 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1861     return $self->{inform}->{guests};
1862 dpavlin 107 }
1863    
1864     =head2 links
1865    
1866     my $links = @{ $node->links };
1867    
1868     Return array of links for this node
1869    
1870     =cut
1871    
1872     sub links {
1873     my $self = shift;
1874 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1875     return $self->{inform}->{links};
1876 dpavlin 107 }
1877    
1878 dpavlin 160 =head2 cacheusage
1879    
1880     Return cache usage for a node
1881    
1882     my $cache = $node->cacheusage;
1883    
1884     =cut
1885    
1886     sub cacheusage {
1887     my $self = shift;
1888    
1889     return unless ($self->{url});
1890    
1891     my $resbody;
1892     my $rv = $self->shuttle_url( $self->{url} . '/cacheusage',
1893     'text/plain',
1894     undef,
1895     \$resbody,
1896     );
1897    
1898     return if ($rv != 200 || !$resbody);
1899    
1900     return $resbody;
1901     }
1902    
1903 dpavlin 134 =head2 master
1904 dpavlin 107
1905 dpavlin 134 Set actions on Hyper Estraier node master (C<estmaster> process)
1906    
1907     $node->master(
1908     action => 'sync'
1909     );
1910    
1911     All available actions are documented in
1912     L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1913    
1914     =cut
1915    
1916     my $estmaster_rest = {
1917     shutdown => {
1918     status => 202,
1919     },
1920     sync => {
1921     status => 202,
1922     },
1923     backup => {
1924     status => 202,
1925     },
1926     userlist => {
1927     status => 200,
1928 dpavlin 135 returns => [ qw/name passwd flags fname misc/ ],
1929 dpavlin 134 },
1930     useradd => {
1931 dpavlin 135 required => [ qw/name passwd flags/ ],
1932     optional => [ qw/fname misc/ ],
1933 dpavlin 134 status => 200,
1934     },
1935     userdel => {
1936 dpavlin 135 required => [ qw/name/ ],
1937 dpavlin 134 status => 200,
1938     },
1939     nodelist => {
1940     status => 200,
1941 dpavlin 135 returns => [ qw/name label doc_num word_num size/ ],
1942 dpavlin 134 },
1943     nodeadd => {
1944 dpavlin 135 required => [ qw/name/ ],
1945     optional => [ qw/label/ ],
1946 dpavlin 134 status => 200,
1947     },
1948     nodedel => {
1949 dpavlin 135 required => [ qw/name/ ],
1950 dpavlin 134 status => 200,
1951     },
1952     nodeclr => {
1953 dpavlin 135 required => [ qw/name/ ],
1954 dpavlin 134 status => 200,
1955     },
1956     nodertt => {
1957     status => 200,
1958     },
1959     };
1960    
1961     sub master {
1962     my $self = shift;
1963    
1964     my $args = {@_};
1965    
1966     # have action?
1967     my $action = $args->{action} || croak "need action, available: ",
1968     join(", ",keys %{ $estmaster_rest });
1969    
1970     # check if action is valid
1971     my $rest = $estmaster_rest->{$action};
1972     croak "action '$action' is not supported, available actions: ",
1973     join(", ",keys %{ $estmaster_rest }) unless ($rest);
1974    
1975     croak "BUG: action '$action' needs return status" unless ($rest->{status});
1976    
1977     my @args;
1978    
1979     if ($rest->{required} || $rest->{optional}) {
1980    
1981     map {
1982     croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1983     push @args, $_ . '=' . uri_escape( $args->{$_} );
1984 dpavlin 136 } ( @{ $rest->{required} } );
1985 dpavlin 134
1986     map {
1987     push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1988 dpavlin 136 } ( @{ $rest->{optional} } );
1989 dpavlin 134
1990     }
1991    
1992     my $uri = new URI( $self->{url} );
1993    
1994     my $resbody;
1995    
1996 dpavlin 135 my $status = $self->shuttle_url(
1997 dpavlin 134 'http://' . $uri->host_port . '/master?action=' . $action ,
1998     'application/x-www-form-urlencoded',
1999     join('&', @args),
2000     \$resbody,
2001     1,
2002 dpavlin 135 ) or confess "shuttle_url failed";
2003 dpavlin 134
2004 dpavlin 135 if ($status == $rest->{status}) {
2005 dpavlin 149
2006     # refresh node info after sync
2007 dpavlin 151 $self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/);
2008 dpavlin 149
2009 dpavlin 135 if ($rest->{returns} && wantarray) {
2010 dpavlin 134
2011     my @results;
2012 dpavlin 135 my $fields = $#{$rest->{returns}};
2013 dpavlin 134
2014     foreach my $line ( split(/[\r\n]/,$resbody) ) {
2015 dpavlin 135 my @e = split(/\t/, $line, $fields + 1);
2016 dpavlin 134 my $row;
2017 dpavlin 135 foreach my $i ( 0 .. $fields) {
2018     $row->{ $rest->{returns}->[$i] } = $e[ $i ];
2019     }
2020 dpavlin 134 push @results, $row;
2021     }
2022    
2023     return @results;
2024    
2025 dpavlin 135 } elsif ($resbody) {
2026 dpavlin 136 chomp $resbody;
2027 dpavlin 134 return $resbody;
2028 dpavlin 135 } else {
2029     return 0E0;
2030 dpavlin 134 }
2031     }
2032 dpavlin 135
2033     carp "expected status $rest->{status}, but got $status";
2034     return undef;
2035 dpavlin 134 }
2036    
2037 dpavlin 55 =head1 PRIVATE METHODS
2038    
2039     You could call those directly, but you don't have to. I hope.
2040    
2041     =head2 _set_info
2042    
2043 dpavlin 48 Set information for node
2044    
2045 dpavlin 55 $node->_set_info;
2046 dpavlin 48
2047     =cut
2048    
2049 dpavlin 55 sub _set_info {
2050 dpavlin 48 my $self = shift;
2051    
2052     $self->{status} = -1;
2053     return unless ($self->{url});
2054    
2055     my $resbody;
2056     my $rv = $self->shuttle_url( $self->{url} . '/inform',
2057     'text/plain',
2058     undef,
2059     \$resbody,
2060     );
2061    
2062     return if ($rv != 200 || !$resbody);
2063    
2064 dpavlin 107 my @lines = split(/[\r\n]/,$resbody);
2065 dpavlin 48
2066 dpavlin 151 $self->_clear_info;
2067 dpavlin 111
2068     ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
2069     $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
2070    
2071 dpavlin 107 return $resbody unless (@lines);
2072    
2073     shift @lines;
2074    
2075     while(my $admin = shift @lines) {
2076 dpavlin 111 push @{$self->{inform}->{admins}}, $admin;
2077 dpavlin 107 }
2078 dpavlin 111
2079 dpavlin 107 while(my $guest = shift @lines) {
2080 dpavlin 111 push @{$self->{inform}->{guests}}, $guest;
2081 dpavlin 107 }
2082    
2083     while(my $link = shift @lines) {
2084 dpavlin 111 push @{$self->{inform}->{links}}, $link;
2085 dpavlin 107 }
2086    
2087     return $resbody;
2088    
2089 dpavlin 48 }
2090    
2091 dpavlin 151 =head2 _clear_info
2092    
2093     Clear information for node
2094    
2095     $node->_clear_info;
2096    
2097     On next call to C<name>, C<label>, C<doc_num>, C<word_num> or C<size> node
2098     info will be fetch again from Hyper Estraier.
2099    
2100     =cut
2101     sub _clear_info {
2102     my $self = shift;
2103     $self->{inform} = {
2104     dnum => -1,
2105     wnum => -1,
2106     size => -1.0,
2107     };
2108     }
2109    
2110 dpavlin 2 ###
2111    
2112     =head1 EXPORT
2113    
2114     Nothing.
2115    
2116     =head1 SEE ALSO
2117    
2118     L<http://hyperestraier.sourceforge.net/>
2119    
2120     Hyper Estraier Ruby interface on which this module is based.
2121    
2122 dpavlin 164 Hyper Estraier now also has pure-perl binding included in distribution. It's
2123     a faster way to access databases directly if you are not running
2124     C<estmaster> P2P server.
2125    
2126 dpavlin 2 =head1 AUTHOR
2127    
2128     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
2129    
2130 dpavlin 128 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
2131 dpavlin 2
2132     =head1 COPYRIGHT AND LICENSE
2133    
2134 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
2135 dpavlin 2
2136     This library is free software; you can redistribute it and/or modify
2137     it under the GPL v2 or later.
2138    
2139     =cut
2140    
2141     1;

  ViewVC Help
Powered by ViewVC 1.1.26