/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 166 - (hide annotations)
Sun Aug 6 12:48:02 2006 UTC (17 years, 7 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 36760 byte(s)
add_vectors added [0.07_3] and fixed vector handling which was broken
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7 dpavlin 166 our $VERSION = '0.07_3';
8 dpavlin 2
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15 dpavlin 68 =head2 Simple indexer
16 dpavlin 2
17 dpavlin 68 use Search::Estraier;
18    
19     # create and configure node
20 dpavlin 102 my $node = new Search::Estraier::Node(
21     url => 'http://localhost:1978/node/test',
22     user => 'admin',
23 dpavlin 139 passwd => 'admin',
24     create => 1,
25     label => 'Label for node',
26     croak_on_error => 1,
27 dpavlin 102 );
28 dpavlin 68
29     # create document
30     my $doc = new Search::Estraier::Document;
31    
32     # add attributes
33     $doc->add_attr('@uri', "http://estraier.gov/example.txt");
34     $doc->add_attr('@title', "Over the Rainbow");
35    
36     # add body text to document
37     $doc->add_text("Somewhere over the rainbow. Way up high.");
38     $doc->add_text("There's a land that I heard of once in a lullaby.");
39    
40 dpavlin 102 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
41 dpavlin 68
42     =head2 Simple searcher
43    
44     use Search::Estraier;
45    
46     # create and configure node
47 dpavlin 102 my $node = new Search::Estraier::Node(
48     url => 'http://localhost:1978/node/test',
49     user => 'admin',
50     passwd => 'admin',
51     croak_on_error => 1,
52     );
53 dpavlin 68
54     # create condition
55     my $cond = new Search::Estraier::Condition;
56    
57     # set search phrase
58     $cond->set_phrase("rainbow AND lullaby");
59    
60     my $nres = $node->search($cond, 0);
61 dpavlin 100
62 dpavlin 68 if (defined($nres)) {
63 dpavlin 102 print "Got ", $nres->hits, " results\n";
64    
65 dpavlin 68 # for each document in results
66     for my $i ( 0 ... $nres->doc_num - 1 ) {
67     # get result document
68     my $rdoc = $nres->get_doc($i);
69     # display attribte
70     print "URI: ", $rdoc->attr('@uri'),"\n";
71     print "Title: ", $rdoc->attr('@title'),"\n";
72     print $rdoc->snippet,"\n";
73     }
74     } else {
75     die "error: ", $node->status,"\n";
76     }
77    
78 dpavlin 2 =head1 DESCRIPTION
79    
80     This module is implementation of node API of Hyper Estraier. Since it's
81     perl-only module with dependencies only on standard perl modules, it will
82     run on all platforms on which perl runs. It doesn't require compilation
83     or Hyper Estraier development files on target machine.
84    
85     It is implemented as multiple packages which closly resamble Ruby
86     implementation. It also includes methods to manage nodes.
87    
88 dpavlin 68 There are few examples in C<scripts> directory of this distribution.
89    
90 dpavlin 2 =cut
91    
92 dpavlin 42 =head1 Inheritable common methods
93    
94     This methods should really move somewhere else.
95    
96 dpavlin 15 =head2 _s
97    
98     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
99    
100     my $text = $self->_s(" this is a text ");
101     $text = 'this is a text';
102    
103     =cut
104    
105     sub _s {
106 dpavlin 96 my $text = $_[1];
107     return unless defined($text);
108 dpavlin 15 $text =~ s/\s\s+/ /gs;
109     $text =~ s/^\s+//;
110     $text =~ s/\s+$//;
111     return $text;
112     }
113    
114 dpavlin 2 package Search::Estraier::Document;
115    
116 dpavlin 9 use Carp qw/croak confess/;
117 dpavlin 7
118 dpavlin 15 use Search::Estraier;
119     our @ISA = qw/Search::Estraier/;
120    
121 dpavlin 2 =head1 Search::Estraier::Document
122    
123 dpavlin 164 This class implements Document which is single item in Hyper Estraier.
124 dpavlin 14
125 dpavlin 164 It's is collection of:
126 dpavlin 42
127 dpavlin 164 =over 4
128    
129     =item attributes
130    
131     C<< 'key' => 'value' >> pairs which can later be used for filtering of results
132    
133     You can add common filters to C<attrindex> in estmaster's C<_conf>
134     file for better performance. See C<attrindex> in
135     L<Hyper Estraier P2P Guide|http://hyperestraier.sourceforge.net/nguide-en.html>.
136    
137     =item vectors
138    
139     also C<< 'key' => 'value' >> pairs
140    
141     =item display text
142    
143     Text which will be used to create searchable corpus of your index and
144     included in snippet output.
145    
146     =item hidden text
147    
148     Text which will be searchable, but will not be included in snippet.
149    
150     =back
151    
152 dpavlin 2 =head2 new
153    
154 dpavlin 14 Create new document, empty or from draft.
155    
156 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
157 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
158 dpavlin 2
159     =cut
160    
161     sub new {
162     my $class = shift;
163 dpavlin 14 my $self = {};
164 dpavlin 2 bless($self, $class);
165    
166 dpavlin 6 $self->{id} = -1;
167    
168 dpavlin 14 my $draft = shift;
169    
170     if ($draft) {
171     my $in_text = 0;
172     foreach my $line (split(/\n/, $draft)) {
173    
174     if ($in_text) {
175     if ($line =~ /^\t/) {
176     push @{ $self->{htexts} }, substr($line, 1);
177     } else {
178     push @{ $self->{dtexts} }, $line;
179     }
180     next;
181     }
182    
183     if ($line =~ m/^%VECTOR\t(.+)$/) {
184     my @fields = split(/\t/, $1);
185 dpavlin 166 if ($#fields % 2 == 1) {
186     $self->{kwords} = { @fields };
187     } else {
188     warn "can't decode $line\n";
189 dpavlin 14 }
190     next;
191     } elsif ($line =~ m/^%/) {
192     # What is this? comment?
193     #warn "$line\n";
194     next;
195     } elsif ($line =~ m/^$/) {
196     $in_text = 1;
197     next;
198 dpavlin 81 } elsif ($line =~ m/^(.+)=(.*)$/) {
199 dpavlin 14 $self->{attrs}->{ $1 } = $2;
200     next;
201     }
202    
203 dpavlin 81 warn "draft ignored: '$line'\n";
204 dpavlin 14 }
205     }
206    
207 dpavlin 2 $self ? return $self : return undef;
208     }
209    
210 dpavlin 4
211 dpavlin 2 =head2 add_attr
212    
213 dpavlin 6 Add an attribute.
214    
215 dpavlin 2 $doc->add_attr( name => 'value' );
216    
217 dpavlin 9 Delete attribute using
218 dpavlin 5
219     $doc->add_attr( name => undef );
220    
221 dpavlin 2 =cut
222    
223     sub add_attr {
224     my $self = shift;
225     my $attrs = {@_};
226    
227     while (my ($name, $value) = each %{ $attrs }) {
228 dpavlin 9 if (! defined($value)) {
229 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
230 dpavlin 9 } else {
231 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
232 dpavlin 9 }
233 dpavlin 2 }
234 dpavlin 8
235     return 1;
236 dpavlin 2 }
237    
238 dpavlin 5
239     =head2 add_text
240    
241 dpavlin 6 Add a sentence of text.
242    
243 dpavlin 5 $doc->add_text('this is example text to display');
244    
245     =cut
246    
247     sub add_text {
248     my $self = shift;
249     my $text = shift;
250     return unless defined($text);
251    
252 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
253 dpavlin 5 }
254    
255    
256     =head2 add_hidden_text
257    
258 dpavlin 6 Add a hidden sentence.
259    
260 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
261    
262     =cut
263    
264     sub add_hidden_text {
265     my $self = shift;
266     my $text = shift;
267     return unless defined($text);
268    
269 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
270 dpavlin 5 }
271    
272 dpavlin 166 =head2 add_vectors
273 dpavlin 42
274 dpavlin 166 Add a vectors
275    
276     $doc->add_vector(
277     'vector_name' => 42,
278     'another' => 12345,
279     );
280    
281     =cut
282    
283     sub add_vectors {
284     my $self = shift;
285     return unless (@_);
286    
287     # this is ugly, but works
288     die "add_vector needs HASH as argument" unless ($#_ % 2 == 1);
289    
290     $self->{kwords} = {@_};
291     }
292    
293    
294 dpavlin 6 =head2 id
295    
296     Get the ID number of document. If the object has never been registred, C<-1> is returned.
297    
298     print $doc->id;
299    
300     =cut
301    
302     sub id {
303     my $self = shift;
304     return $self->{id};
305     }
306    
307 dpavlin 42
308 dpavlin 7 =head2 attr_names
309    
310 dpavlin 9 Returns array with attribute names from document object.
311 dpavlin 7
312     my @attrs = $doc->attr_names;
313    
314     =cut
315    
316     sub attr_names {
317     my $self = shift;
318 dpavlin 60 return unless ($self->{attrs});
319     #croak "attr_names return array, not scalar" if (! wantarray);
320 dpavlin 7 return sort keys %{ $self->{attrs} };
321     }
322    
323 dpavlin 8
324     =head2 attr
325    
326 dpavlin 9 Returns value of an attribute.
327 dpavlin 8
328     my $value = $doc->attr( 'attribute' );
329    
330     =cut
331    
332     sub attr {
333     my $self = shift;
334     my $name = shift;
335 dpavlin 60 return unless (defined($name) && $self->{attrs});
336     return $self->{attrs}->{ $name };
337 dpavlin 8 }
338    
339 dpavlin 9
340     =head2 texts
341    
342     Returns array with text sentences.
343    
344     my @texts = $doc->texts;
345    
346     =cut
347    
348     sub texts {
349     my $self = shift;
350 dpavlin 60 #confess "texts return array, not scalar" if (! wantarray);
351     return @{ $self->{dtexts} } if ($self->{dtexts});
352 dpavlin 9 }
353    
354 dpavlin 42
355 dpavlin 12 =head2 cat_texts
356    
357     Return whole text as single scalar.
358    
359     my $text = $doc->cat_texts;
360    
361     =cut
362    
363     sub cat_texts {
364     my $self = shift;
365 dpavlin 60 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
366 dpavlin 12 }
367    
368 dpavlin 42
369 dpavlin 5 =head2 dump_draft
370    
371 dpavlin 13 Dump draft data from document object.
372    
373 dpavlin 5 print $doc->dump_draft;
374    
375     =cut
376    
377     sub dump_draft {
378 dpavlin 13 my $self = shift;
379     my $draft;
380    
381     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
382 dpavlin 97 next unless defined(my $v = $self->{attrs}->{$attr_name});
383 dpavlin 93 $draft .= $attr_name . '=' . $v . "\n";
384 dpavlin 13 }
385    
386     if ($self->{kwords}) {
387 dpavlin 166 $draft .= '%VECTOR';
388 dpavlin 13 while (my ($key, $value) = each %{ $self->{kwords} }) {
389     $draft .= "\t$key\t$value";
390     }
391     $draft .= "\n";
392     }
393    
394     $draft .= "\n";
395    
396 dpavlin 40 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
397     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
398 dpavlin 13
399     return $draft;
400 dpavlin 5 }
401    
402 dpavlin 42
403 dpavlin 4 =head2 delete
404 dpavlin 2
405 dpavlin 4 Empty document object
406 dpavlin 2
407 dpavlin 4 $doc->delete;
408    
409 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
410     convinience. Document objects which go out of scope will be destroyed
411     automatically.
412    
413 dpavlin 4 =cut
414    
415     sub delete {
416     my $self = shift;
417    
418 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
419 dpavlin 5 delete($self->{$data});
420     }
421 dpavlin 4
422 dpavlin 10 $self->{id} = -1;
423    
424 dpavlin 4 return 1;
425     }
426    
427    
428    
429 dpavlin 15 package Search::Estraier::Condition;
430 dpavlin 4
431 dpavlin 98 use Carp qw/carp confess croak/;
432 dpavlin 16
433 dpavlin 15 use Search::Estraier;
434     our @ISA = qw/Search::Estraier/;
435 dpavlin 4
436 dpavlin 16 =head1 Search::Estraier::Condition
437    
438     =head2 new
439    
440     my $cond = new Search::HyperEstraier::Condition;
441    
442     =cut
443    
444     sub new {
445     my $class = shift;
446     my $self = {};
447     bless($self, $class);
448    
449 dpavlin 19 $self->{max} = -1;
450     $self->{options} = 0;
451    
452 dpavlin 16 $self ? return $self : return undef;
453     }
454    
455 dpavlin 42
456 dpavlin 16 =head2 set_phrase
457    
458     $cond->set_phrase('search phrase');
459    
460     =cut
461    
462     sub set_phrase {
463     my $self = shift;
464     $self->{phrase} = $self->_s( shift );
465     }
466    
467 dpavlin 42
468 dpavlin 16 =head2 add_attr
469    
470     $cond->add_attr('@URI STRINC /~dpavlin/');
471    
472     =cut
473    
474     sub add_attr {
475     my $self = shift;
476     my $attr = shift || return;
477     push @{ $self->{attrs} }, $self->_s( $attr );
478     }
479    
480 dpavlin 42
481 dpavlin 16 =head2 set_order
482    
483     $cond->set_order('@mdate NUMD');
484    
485     =cut
486    
487     sub set_order {
488     my $self = shift;
489     $self->{order} = shift;
490     }
491    
492 dpavlin 42
493 dpavlin 16 =head2 set_max
494    
495     $cond->set_max(42);
496    
497     =cut
498    
499     sub set_max {
500     my $self = shift;
501     my $max = shift;
502 dpavlin 43 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
503 dpavlin 16 $self->{max} = $max;
504     }
505    
506 dpavlin 42
507 dpavlin 16 =head2 set_options
508    
509 dpavlin 98 $cond->set_options( 'SURE' );
510 dpavlin 16
511 dpavlin 98 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
512    
513     Possible options are:
514    
515     =over 8
516    
517     =item SURE
518    
519     check every N-gram
520    
521     =item USUAL
522    
523     check every second N-gram
524    
525     =item FAST
526    
527     check every third N-gram
528    
529     =item AGITO
530    
531     check every fourth N-gram
532    
533     =item NOIDF
534    
535     don't perform TF-IDF tuning
536    
537     =item SIMPLE
538    
539     use simplified query phrase
540    
541     =back
542    
543     Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
544     options;
545    
546     This option changed in version C<0.04> of this module. It's backwards compatibile.
547    
548 dpavlin 16 =cut
549    
550 dpavlin 15 my $options = {
551     SURE => 1 << 0,
552     USUAL => 1 << 1,
553     FAST => 1 << 2,
554     AGITO => 1 << 3,
555     NOIDF => 1 << 4,
556     SIMPLE => 1 << 10,
557     };
558    
559 dpavlin 16 sub set_options {
560     my $self = shift;
561 dpavlin 98 my $opt = 0;
562     foreach my $option (@_) {
563     my $mask;
564     unless ($mask = $options->{$option}) {
565     if ($option eq '1') {
566     next;
567     } else {
568     croak "unknown option $option";
569     }
570     }
571     $opt += $mask;
572     }
573     $self->{options} = $opt;
574 dpavlin 4 }
575    
576 dpavlin 42
577 dpavlin 18 =head2 phrase
578    
579     Return search phrase.
580    
581     print $cond->phrase;
582    
583     =cut
584    
585     sub phrase {
586     my $self = shift;
587     return $self->{phrase};
588     }
589    
590 dpavlin 42
591 dpavlin 19 =head2 order
592 dpavlin 18
593 dpavlin 19 Return search result order.
594    
595     print $cond->order;
596    
597     =cut
598    
599     sub order {
600     my $self = shift;
601     return $self->{order};
602     }
603    
604 dpavlin 42
605 dpavlin 19 =head2 attrs
606    
607     Return search result attrs.
608    
609     my @cond_attrs = $cond->attrs;
610    
611     =cut
612    
613     sub attrs {
614     my $self = shift;
615     #croak "attrs return array, not scalar" if (! wantarray);
616 dpavlin 60 return @{ $self->{attrs} } if ($self->{attrs});
617 dpavlin 19 }
618    
619 dpavlin 42
620 dpavlin 19 =head2 max
621    
622     Return maximum number of results.
623    
624     print $cond->max;
625    
626     C<-1> is returned for unitialized value, C<0> is unlimited.
627    
628     =cut
629    
630     sub max {
631     my $self = shift;
632     return $self->{max};
633     }
634    
635 dpavlin 42
636 dpavlin 19 =head2 options
637    
638     Return options for this condition.
639    
640     print $cond->options;
641    
642     Options are returned in numerical form.
643    
644     =cut
645    
646     sub options {
647     my $self = shift;
648     return $self->{options};
649     }
650    
651    
652 dpavlin 116 =head2 set_skip
653    
654     Set number of skipped documents from beginning of results
655    
656     $cond->set_skip(42);
657    
658     Similar to C<offset> in RDBMS.
659    
660     =cut
661    
662     sub set_skip {
663     my $self = shift;
664     $self->{skip} = shift;
665     }
666    
667     =head2 skip
668    
669     Return skip for this condition.
670    
671     print $cond->skip;
672    
673     =cut
674    
675     sub skip {
676     my $self = shift;
677     return $self->{skip};
678     }
679    
680    
681 dpavlin 20 package Search::Estraier::ResultDocument;
682    
683 dpavlin 24 use Carp qw/croak/;
684 dpavlin 20
685 dpavlin 24 #use Search::Estraier;
686     #our @ISA = qw/Search::Estraier/;
687 dpavlin 20
688     =head1 Search::Estraier::ResultDocument
689    
690     =head2 new
691    
692 dpavlin 23 my $rdoc = new Search::HyperEstraier::ResultDocument(
693 dpavlin 20 uri => 'http://localhost/document/uri/42',
694     attrs => {
695     foo => 1,
696     bar => 2,
697     },
698     snippet => 'this is a text of snippet'
699     keywords => 'this\tare\tkeywords'
700     );
701    
702     =cut
703    
704     sub new {
705     my $class = shift;
706     my $self = {@_};
707     bless($self, $class);
708    
709 dpavlin 62 croak "missing uri for ResultDocument" unless defined($self->{uri});
710 dpavlin 20
711     $self ? return $self : return undef;
712     }
713    
714 dpavlin 42
715 dpavlin 23 =head2 uri
716 dpavlin 20
717 dpavlin 23 Return URI of result document
718 dpavlin 20
719 dpavlin 23 print $rdoc->uri;
720    
721     =cut
722    
723     sub uri {
724     my $self = shift;
725     return $self->{uri};
726     }
727    
728    
729     =head2 attr_names
730    
731     Returns array with attribute names from result document object.
732    
733     my @attrs = $rdoc->attr_names;
734    
735     =cut
736    
737     sub attr_names {
738     my $self = shift;
739     croak "attr_names return array, not scalar" if (! wantarray);
740     return sort keys %{ $self->{attrs} };
741     }
742    
743 dpavlin 42
744 dpavlin 23 =head2 attr
745    
746     Returns value of an attribute.
747    
748     my $value = $rdoc->attr( 'attribute' );
749    
750     =cut
751    
752     sub attr {
753     my $self = shift;
754     my $name = shift || return;
755     return $self->{attrs}->{ $name };
756     }
757    
758 dpavlin 42
759 dpavlin 23 =head2 snippet
760    
761     Return snippet from result document
762    
763     print $rdoc->snippet;
764    
765     =cut
766    
767     sub snippet {
768     my $self = shift;
769     return $self->{snippet};
770     }
771    
772 dpavlin 42
773 dpavlin 23 =head2 keywords
774    
775     Return keywords from result document
776    
777     print $rdoc->keywords;
778    
779     =cut
780    
781     sub keywords {
782     my $self = shift;
783     return $self->{keywords};
784     }
785    
786    
787 dpavlin 25 package Search::Estraier::NodeResult;
788    
789     use Carp qw/croak/;
790    
791     #use Search::Estraier;
792     #our @ISA = qw/Search::Estraier/;
793    
794     =head1 Search::Estraier::NodeResult
795    
796     =head2 new
797    
798     my $res = new Search::HyperEstraier::NodeResult(
799     docs => @array_of_rdocs,
800     hits => %hash_with_hints,
801     );
802    
803     =cut
804    
805     sub new {
806     my $class = shift;
807     my $self = {@_};
808     bless($self, $class);
809    
810     foreach my $f (qw/docs hints/) {
811     croak "missing $f for ResultDocument" unless defined($self->{$f});
812     }
813    
814     $self ? return $self : return undef;
815     }
816    
817 dpavlin 42
818 dpavlin 25 =head2 doc_num
819    
820     Return number of documents
821    
822     print $res->doc_num;
823    
824 dpavlin 100 This will return real number of documents (limited by C<max>).
825     If you want to get total number of hits, see C<hits>.
826    
827 dpavlin 25 =cut
828    
829     sub doc_num {
830     my $self = shift;
831 dpavlin 53 return $#{$self->{docs}} + 1;
832 dpavlin 25 }
833    
834 dpavlin 42
835 dpavlin 25 =head2 get_doc
836    
837     Return single document
838    
839     my $doc = $res->get_doc( 42 );
840    
841     Returns undef if document doesn't exist.
842    
843     =cut
844    
845     sub get_doc {
846     my $self = shift;
847     my $num = shift;
848 dpavlin 43 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
849 dpavlin 25 return undef if ($num < 0 || $num > $self->{docs});
850     return $self->{docs}->[$num];
851     }
852    
853 dpavlin 42
854 dpavlin 25 =head2 hint
855    
856     Return specific hint from results.
857    
858 dpavlin 100 print $res->hint( 'VERSION' );
859 dpavlin 25
860     Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
861     C<TIME>, C<LINK#n>, C<VIEW>.
862    
863     =cut
864    
865     sub hint {
866     my $self = shift;
867     my $key = shift || return;
868     return $self->{hints}->{$key};
869     }
870    
871 dpavlin 108 =head2 hints
872 dpavlin 25
873 dpavlin 91 More perlish version of C<hint>. This one returns hash.
874    
875 dpavlin 100 my %hints = $res->hints;
876 dpavlin 91
877     =cut
878    
879     sub hints {
880     my $self = shift;
881     return $self->{hints};
882     }
883    
884 dpavlin 100 =head2 hits
885    
886     Syntaxtic sugar for total number of hits for this query
887    
888     print $res->hits;
889    
890     It's same as
891    
892     print $res->hint('HIT');
893    
894     but shorter.
895    
896     =cut
897    
898     sub hits {
899     my $self = shift;
900     return $self->{hints}->{'HIT'} || 0;
901     }
902    
903 dpavlin 27 package Search::Estraier::Node;
904    
905 dpavlin 44 use Carp qw/carp croak confess/;
906 dpavlin 33 use URI;
907 dpavlin 36 use MIME::Base64;
908 dpavlin 33 use IO::Socket::INET;
909 dpavlin 49 use URI::Escape qw/uri_escape/;
910 dpavlin 29
911 dpavlin 27 =head1 Search::Estraier::Node
912    
913     =head2 new
914    
915     my $node = new Search::HyperEstraier::Node;
916    
917 dpavlin 65 or optionally with C<url> as parametar
918    
919     my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
920    
921 dpavlin 78 or in more verbose form
922    
923     my $node = new Search::HyperEstraier::Node(
924     url => 'http://localhost:1978/node/test',
925 dpavlin 132 user => 'admin',
926     passwd => 'admin'
927 dpavlin 139 create => 1,
928     label => 'optional node label',
929 dpavlin 78 debug => 1,
930     croak_on_error => 1
931     );
932    
933     with following arguments:
934    
935     =over 4
936    
937     =item url
938    
939     URL to node
940    
941 dpavlin 132 =item user
942    
943     specify username for node server authentication
944    
945     =item passwd
946    
947     password for authentication
948    
949 dpavlin 139 =item create
950    
951     create node if it doesn't exists
952    
953     =item label
954    
955     optional label for new node if C<create> is used
956    
957 dpavlin 78 =item debug
958    
959     dumps a B<lot> of debugging output
960    
961     =item croak_on_error
962    
963     very helpful during development. It will croak on all errors instead of
964     silently returning C<-1> (which is convention of Hyper Estraier API in other
965     languages).
966    
967     =back
968    
969 dpavlin 27 =cut
970    
971     sub new {
972     my $class = shift;
973     my $self = {
974     pxport => -1,
975 dpavlin 33 timeout => 0, # this used to be -1
976 dpavlin 27 wwidth => 480,
977     hwidth => 96,
978     awidth => 96,
979     status => -1,
980     };
981 dpavlin 111
982 dpavlin 27 bless($self, $class);
983    
984 dpavlin 65 if ($#_ == 0) {
985     $self->{url} = shift;
986     } else {
987 dpavlin 78 %$self = ( %$self, @_ );
988    
989 dpavlin 132 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
990    
991 dpavlin 65 warn "## Node debug on\n" if ($self->{debug});
992     }
993 dpavlin 57
994 dpavlin 111 $self->{inform} = {
995     dnum => -1,
996     wnum => -1,
997     size => -1.0,
998     };
999    
1000 dpavlin 139 if ($self->{create}) {
1001 dpavlin 140 if (! eval { $self->name } || $@) {
1002 dpavlin 139 my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
1003     croak "can't find node name in '$self->{url}'" unless ($name);
1004     my $label = $self->{label} || $name;
1005     $self->master(
1006     action => 'nodeadd',
1007     name => $name,
1008     label => $label,
1009     ) || croak "can't create node $name ($label)";
1010     }
1011     }
1012    
1013 dpavlin 27 $self ? return $self : return undef;
1014     }
1015    
1016 dpavlin 42
1017 dpavlin 29 =head2 set_url
1018    
1019     Specify URL to node server
1020    
1021     $node->set_url('http://localhost:1978');
1022    
1023     =cut
1024    
1025     sub set_url {
1026     my $self = shift;
1027     $self->{url} = shift;
1028     }
1029    
1030 dpavlin 42
1031 dpavlin 29 =head2 set_proxy
1032    
1033     Specify proxy server to connect to node server
1034    
1035     $node->set_proxy('proxy.example.com', 8080);
1036    
1037     =cut
1038    
1039     sub set_proxy {
1040     my $self = shift;
1041     my ($host,$port) = @_;
1042 dpavlin 43 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
1043 dpavlin 29 $self->{pxhost} = $host;
1044     $self->{pxport} = $port;
1045     }
1046    
1047 dpavlin 42
1048 dpavlin 30 =head2 set_timeout
1049    
1050     Specify timeout of connection in seconds
1051    
1052     $node->set_timeout( 15 );
1053    
1054     =cut
1055    
1056     sub set_timeout {
1057     my $self = shift;
1058     my $sec = shift;
1059 dpavlin 43 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
1060 dpavlin 30 $self->{timeout} = $sec;
1061     }
1062    
1063 dpavlin 42
1064 dpavlin 31 =head2 set_auth
1065    
1066     Specify name and password for authentication to node server.
1067    
1068     $node->set_auth('clint','eastwood');
1069    
1070     =cut
1071    
1072     sub set_auth {
1073     my $self = shift;
1074     my ($login,$passwd) = @_;
1075 dpavlin 40 my $basic_auth = encode_base64( "$login:$passwd" );
1076     chomp($basic_auth);
1077     $self->{auth} = $basic_auth;
1078 dpavlin 31 }
1079    
1080 dpavlin 42
1081 dpavlin 32 =head2 status
1082    
1083     Return status code of last request.
1084    
1085 dpavlin 40 print $node->status;
1086 dpavlin 32
1087     C<-1> means connection failure.
1088    
1089     =cut
1090    
1091     sub status {
1092     my $self = shift;
1093     return $self->{status};
1094     }
1095    
1096 dpavlin 42
1097 dpavlin 40 =head2 put_doc
1098    
1099 dpavlin 41 Add a document
1100 dpavlin 40
1101 dpavlin 41 $node->put_doc( $document_draft ) or die "can't add document";
1102    
1103 dpavlin 151 Return true on success or false on failure.
1104 dpavlin 41
1105 dpavlin 40 =cut
1106    
1107     sub put_doc {
1108     my $self = shift;
1109     my $doc = shift || return;
1110 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1111 dpavlin 151 if ($self->shuttle_url( $self->{url} . '/put_doc',
1112 dpavlin 41 'text/x-estraier-draft',
1113     $doc->dump_draft,
1114     undef
1115 dpavlin 151 ) == 200) {
1116     $self->_clear_info;
1117     return 1;
1118     }
1119     return undef;
1120 dpavlin 40 }
1121    
1122 dpavlin 41
1123     =head2 out_doc
1124    
1125     Remove a document
1126    
1127     $node->out_doc( document_id ) or "can't remove document";
1128    
1129     Return true on success or false on failture.
1130    
1131     =cut
1132    
1133     sub out_doc {
1134     my $self = shift;
1135     my $id = shift || return;
1136     return unless ($self->{url});
1137 dpavlin 43 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1138 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1139 dpavlin 41 'application/x-www-form-urlencoded',
1140     "id=$id",
1141     undef
1142 dpavlin 150 ) == 200) {
1143 dpavlin 151 $self->_clear_info;
1144     return 1;
1145 dpavlin 150 }
1146     return undef;
1147 dpavlin 41 }
1148    
1149    
1150     =head2 out_doc_by_uri
1151    
1152     Remove a registrated document using it's uri
1153    
1154 dpavlin 45 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1155 dpavlin 41
1156     Return true on success or false on failture.
1157    
1158     =cut
1159    
1160     sub out_doc_by_uri {
1161     my $self = shift;
1162     my $uri = shift || return;
1163     return unless ($self->{url});
1164 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1165 dpavlin 41 'application/x-www-form-urlencoded',
1166 dpavlin 50 "uri=" . uri_escape($uri),
1167 dpavlin 41 undef
1168 dpavlin 150 ) == 200) {
1169 dpavlin 151 $self->_clear_info;
1170     return 1;
1171 dpavlin 150 }
1172     return undef;
1173 dpavlin 41 }
1174    
1175 dpavlin 42
1176     =head2 edit_doc
1177    
1178     Edit attributes of a document
1179    
1180     $node->edit_doc( $document_draft ) or die "can't edit document";
1181    
1182     Return true on success or false on failture.
1183    
1184     =cut
1185    
1186     sub edit_doc {
1187     my $self = shift;
1188     my $doc = shift || return;
1189 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1190 dpavlin 151 if ($self->shuttle_url( $self->{url} . '/edit_doc',
1191 dpavlin 42 'text/x-estraier-draft',
1192     $doc->dump_draft,
1193     undef
1194 dpavlin 151 ) == 200) {
1195     $self->_clear_info;
1196     return 1;
1197     }
1198     return undef;
1199 dpavlin 42 }
1200    
1201    
1202 dpavlin 43 =head2 get_doc
1203    
1204     Retreive document
1205    
1206     my $doc = $node->get_doc( document_id ) or die "can't get document";
1207    
1208     Return true on success or false on failture.
1209    
1210     =cut
1211    
1212     sub get_doc {
1213     my $self = shift;
1214     my $id = shift || return;
1215     return $self->_fetch_doc( id => $id );
1216     }
1217    
1218 dpavlin 44
1219 dpavlin 43 =head2 get_doc_by_uri
1220    
1221     Retreive document
1222    
1223 dpavlin 45 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1224 dpavlin 43
1225     Return true on success or false on failture.
1226    
1227     =cut
1228    
1229     sub get_doc_by_uri {
1230     my $self = shift;
1231     my $uri = shift || return;
1232     return $self->_fetch_doc( uri => $uri );
1233     }
1234    
1235 dpavlin 44
1236 dpavlin 49 =head2 get_doc_attr
1237    
1238     Retrieve the value of an atribute from object
1239    
1240     my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1241     die "can't get document attribute";
1242    
1243     =cut
1244    
1245     sub get_doc_attr {
1246     my $self = shift;
1247     my ($id,$name) = @_;
1248     return unless ($id && $name);
1249     return $self->_fetch_doc( id => $id, attr => $name );
1250     }
1251    
1252    
1253     =head2 get_doc_attr_by_uri
1254    
1255     Retrieve the value of an atribute from object
1256    
1257     my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1258     die "can't get document attribute";
1259    
1260     =cut
1261    
1262     sub get_doc_attr_by_uri {
1263     my $self = shift;
1264     my ($uri,$name) = @_;
1265     return unless ($uri && $name);
1266     return $self->_fetch_doc( uri => $uri, attr => $name );
1267     }
1268    
1269    
1270 dpavlin 44 =head2 etch_doc
1271    
1272     Exctract document keywords
1273    
1274     my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1275    
1276     =cut
1277    
1278 dpavlin 49 sub etch_doc {
1279 dpavlin 44 my $self = shift;
1280     my $id = shift || return;
1281     return $self->_fetch_doc( id => $id, etch => 1 );
1282     }
1283    
1284     =head2 etch_doc_by_uri
1285    
1286     Retreive document
1287    
1288 dpavlin 45 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1289 dpavlin 44
1290     Return true on success or false on failture.
1291    
1292     =cut
1293    
1294     sub etch_doc_by_uri {
1295     my $self = shift;
1296     my $uri = shift || return;
1297     return $self->_fetch_doc( uri => $uri, etch => 1 );
1298     }
1299    
1300    
1301 dpavlin 45 =head2 uri_to_id
1302    
1303     Get ID of document specified by URI
1304    
1305     my $id = $node->uri_to_id( 'file:///document/uri/42' );
1306    
1307 dpavlin 103 This method won't croak, even if using C<croak_on_error>.
1308    
1309 dpavlin 45 =cut
1310    
1311     sub uri_to_id {
1312     my $self = shift;
1313     my $uri = shift || return;
1314 dpavlin 103 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1315 dpavlin 45 }
1316    
1317    
1318 dpavlin 43 =head2 _fetch_doc
1319    
1320 dpavlin 44 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1321     C<etch_doc>, C<etch_doc_by_uri>.
1322 dpavlin 43
1323 dpavlin 45 # this will decode received draft into Search::Estraier::Document object
1324     my $doc = $node->_fetch_doc( id => 42 );
1325     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1326 dpavlin 43
1327 dpavlin 45 # to extract keywords, add etch
1328     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1329     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1330    
1331 dpavlin 49 # to get document attrubute add attr
1332     my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1333     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1334    
1335 dpavlin 45 # more general form which allows implementation of
1336     # uri_to_id
1337     my $id = $node->_fetch_doc(
1338     uri => 'file:///document/uri/42',
1339     path => '/uri_to_id',
1340     chomp_resbody => 1
1341     );
1342    
1343 dpavlin 43 =cut
1344    
1345     sub _fetch_doc {
1346     my $self = shift;
1347 dpavlin 44 my $a = {@_};
1348     return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1349    
1350     my ($arg, $resbody);
1351    
1352 dpavlin 45 my $path = $a->{path} || '/get_doc';
1353 dpavlin 44 $path = '/etch_doc' if ($a->{etch});
1354    
1355     if ($a->{id}) {
1356     croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1357     $arg = 'id=' . $a->{id};
1358     } elsif ($a->{uri}) {
1359 dpavlin 50 $arg = 'uri=' . uri_escape($a->{uri});
1360 dpavlin 44 } else {
1361     confess "unhandled argument. Need id or uri.";
1362 dpavlin 43 }
1363 dpavlin 44
1364 dpavlin 49 if ($a->{attr}) {
1365     $path = '/get_doc_attr';
1366     $arg .= '&attr=' . uri_escape($a->{attr});
1367     $a->{chomp_resbody} = 1;
1368     }
1369    
1370 dpavlin 44 my $rv = $self->shuttle_url( $self->{url} . $path,
1371 dpavlin 43 'application/x-www-form-urlencoded',
1372 dpavlin 44 $arg,
1373 dpavlin 45 \$resbody,
1374 dpavlin 103 $a->{croak_on_error},
1375 dpavlin 43 );
1376 dpavlin 44
1377 dpavlin 43 return if ($rv != 200);
1378 dpavlin 44
1379     if ($a->{etch}) {
1380     $self->{kwords} = {};
1381     return +{} unless ($resbody);
1382     foreach my $l (split(/\n/, $resbody)) {
1383     my ($k,$v) = split(/\t/, $l, 2);
1384     $self->{kwords}->{$k} = $v if ($v);
1385     }
1386     return $self->{kwords};
1387 dpavlin 45 } elsif ($a->{chomp_resbody}) {
1388     return unless (defined($resbody));
1389     chomp($resbody);
1390     return $resbody;
1391 dpavlin 44 } else {
1392     return new Search::Estraier::Document($resbody);
1393     }
1394 dpavlin 43 }
1395    
1396    
1397 dpavlin 48 =head2 name
1398 dpavlin 43
1399 dpavlin 48 my $node_name = $node->name;
1400 dpavlin 43
1401 dpavlin 48 =cut
1402    
1403     sub name {
1404     my $self = shift;
1405 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1406     return $self->{inform}->{name};
1407 dpavlin 48 }
1408    
1409    
1410     =head2 label
1411    
1412     my $node_label = $node->label;
1413    
1414     =cut
1415    
1416     sub label {
1417     my $self = shift;
1418 dpavlin 111 $self->_set_info unless ($self->{inform}->{label});
1419     return $self->{inform}->{label};
1420 dpavlin 48 }
1421    
1422    
1423     =head2 doc_num
1424    
1425     my $documents_in_node = $node->doc_num;
1426    
1427     =cut
1428    
1429     sub doc_num {
1430     my $self = shift;
1431 dpavlin 111 $self->_set_info if ($self->{inform}->{dnum} < 0);
1432     return $self->{inform}->{dnum};
1433 dpavlin 48 }
1434    
1435    
1436     =head2 word_num
1437    
1438     my $words_in_node = $node->word_num;
1439    
1440     =cut
1441    
1442     sub word_num {
1443     my $self = shift;
1444 dpavlin 111 $self->_set_info if ($self->{inform}->{wnum} < 0);
1445     return $self->{inform}->{wnum};
1446 dpavlin 48 }
1447    
1448    
1449     =head2 size
1450    
1451     my $node_size = $node->size;
1452    
1453     =cut
1454    
1455     sub size {
1456     my $self = shift;
1457 dpavlin 111 $self->_set_info if ($self->{inform}->{size} < 0);
1458     return $self->{inform}->{size};
1459 dpavlin 48 }
1460    
1461    
1462 dpavlin 51 =head2 search
1463 dpavlin 48
1464 dpavlin 51 Search documents which match condition
1465    
1466     my $nres = $node->search( $cond, $depth );
1467    
1468     C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1469     depth for meta search.
1470    
1471     Function results C<Search::Estraier::NodeResult> object.
1472    
1473     =cut
1474    
1475     sub search {
1476     my $self = shift;
1477     my ($cond, $depth) = @_;
1478     return unless ($cond && defined($depth) && $self->{url});
1479     croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1480     croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1481    
1482 dpavlin 52 my $resbody;
1483 dpavlin 51
1484 dpavlin 52 my $rv = $self->shuttle_url( $self->{url} . '/search',
1485 dpavlin 53 'application/x-www-form-urlencoded',
1486 dpavlin 61 $self->cond_to_query( $cond, $depth ),
1487 dpavlin 52 \$resbody,
1488     );
1489     return if ($rv != 200);
1490    
1491 dpavlin 126 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1492     my $hintsText = splice @records, 0, 2; # starts with empty record
1493     my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1494    
1495     # process records
1496 dpavlin 128 my $docs = [];
1497 dpavlin 126 foreach my $record (@records)
1498     {
1499     # split into keys and snippets
1500     my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1501    
1502     # create document hash
1503     my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1504     $doc->{'@keywords'} = $doc->{keywords};
1505     ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1506     $doc->{snippet} = $snippet;
1507    
1508     push @$docs, new Search::Estraier::ResultDocument(
1509     attrs => $doc,
1510     uri => $doc->{'@uri'},
1511     snippet => $snippet,
1512     keywords => $doc->{'keywords'},
1513     );
1514     }
1515    
1516     return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1517     }
1518    
1519    
1520 dpavlin 51 =head2 cond_to_query
1521    
1522 dpavlin 55 Return URI encoded string generated from Search::Estraier::Condition
1523    
1524 dpavlin 61 my $args = $node->cond_to_query( $cond, $depth );
1525 dpavlin 51
1526     =cut
1527    
1528     sub cond_to_query {
1529     my $self = shift;
1530    
1531     my $cond = shift || return;
1532     croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1533 dpavlin 61 my $depth = shift;
1534 dpavlin 51
1535     my @args;
1536    
1537     if (my $phrase = $cond->phrase) {
1538     push @args, 'phrase=' . uri_escape($phrase);
1539     }
1540    
1541     if (my @attrs = $cond->attrs) {
1542     for my $i ( 0 .. $#attrs ) {
1543 dpavlin 63 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1544 dpavlin 51 }
1545     }
1546    
1547     if (my $order = $cond->order) {
1548     push @args, 'order=' . uri_escape($order);
1549     }
1550    
1551     if (my $max = $cond->max) {
1552     push @args, 'max=' . $max;
1553     } else {
1554     push @args, 'max=' . (1 << 30);
1555     }
1556    
1557     if (my $options = $cond->options) {
1558     push @args, 'options=' . $options;
1559     }
1560    
1561 dpavlin 61 push @args, 'depth=' . $depth if ($depth);
1562 dpavlin 51 push @args, 'wwidth=' . $self->{wwidth};
1563     push @args, 'hwidth=' . $self->{hwidth};
1564     push @args, 'awidth=' . $self->{awidth};
1565 dpavlin 155 push @args, 'skip=' . $cond->{skip} if ($cond->{skip});
1566 dpavlin 51
1567     return join('&', @args);
1568     }
1569    
1570    
1571 dpavlin 33 =head2 shuttle_url
1572 dpavlin 32
1573 dpavlin 68 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1574 dpavlin 33 master.
1575 dpavlin 2
1576 dpavlin 52 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1577 dpavlin 2
1578 dpavlin 33 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1579     body will be saved within object.
1580 dpavlin 2
1581     =cut
1582    
1583 dpavlin 59 use LWP::UserAgent;
1584    
1585 dpavlin 33 sub shuttle_url {
1586     my $self = shift;
1587 dpavlin 2
1588 dpavlin 103 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1589 dpavlin 2
1590 dpavlin 103 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1591    
1592 dpavlin 40 $self->{status} = -1;
1593 dpavlin 33
1594 dpavlin 41 warn "## $url\n" if ($self->{debug});
1595 dpavlin 36
1596 dpavlin 33 $url = new URI($url);
1597 dpavlin 37 if (
1598     !$url || !$url->scheme || !$url->scheme eq 'http' ||
1599     !$url->host || !$url->port || $url->port < 1
1600     ) {
1601     carp "can't parse $url\n";
1602     return -1;
1603     }
1604 dpavlin 33
1605 dpavlin 59 my $ua = LWP::UserAgent->new;
1606     $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1607 dpavlin 33
1608 dpavlin 59 my $req;
1609 dpavlin 37 if ($reqbody) {
1610 dpavlin 59 $req = HTTP::Request->new(POST => $url);
1611 dpavlin 37 } else {
1612 dpavlin 59 $req = HTTP::Request->new(GET => $url);
1613 dpavlin 37 }
1614    
1615 dpavlin 59 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1616     $req->headers->header( 'Connection', 'close' );
1617 dpavlin 77 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1618 dpavlin 59 $req->content_type( $content_type );
1619 dpavlin 37
1620 dpavlin 59 warn $req->headers->as_string,"\n" if ($self->{debug});
1621 dpavlin 2
1622 dpavlin 37 if ($reqbody) {
1623 dpavlin 41 warn "$reqbody\n" if ($self->{debug});
1624 dpavlin 59 $req->content( $reqbody );
1625 dpavlin 33 }
1626 dpavlin 2
1627 dpavlin 59 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1628 dpavlin 2
1629 dpavlin 59 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1630 dpavlin 2
1631 dpavlin 76 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1632    
1633 dpavlin 78 if (! $res->is_success) {
1634 dpavlin 103 if ($croak_on_error) {
1635 dpavlin 78 croak("can't get $url: ",$res->status_line);
1636     } else {
1637     return -1;
1638     }
1639     }
1640 dpavlin 2
1641 dpavlin 59 $$resbody .= $res->content;
1642    
1643 dpavlin 40 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1644 dpavlin 39
1645 dpavlin 40 return $self->{status};
1646 dpavlin 2 }
1647    
1648 dpavlin 48
1649 dpavlin 55 =head2 set_snippet_width
1650 dpavlin 48
1651 dpavlin 55 Set width of snippets in results
1652    
1653     $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1654    
1655     C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1656     is not sent with results. If it is negative, whole document text is sent instead of snippet.
1657    
1658     C<$hwidth> specified width of strings from beginning of string. Default
1659     value is C<96>. Negative or zero value keep previous value.
1660    
1661     C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1662     If negative of zero value is provided previous value is kept unchanged.
1663    
1664     =cut
1665    
1666     sub set_snippet_width {
1667     my $self = shift;
1668    
1669     my ($wwidth, $hwidth, $awidth) = @_;
1670     $self->{wwidth} = $wwidth;
1671     $self->{hwidth} = $hwidth if ($hwidth >= 0);
1672     $self->{awidth} = $awidth if ($awidth >= 0);
1673     }
1674    
1675    
1676 dpavlin 56 =head2 set_user
1677 dpavlin 55
1678 dpavlin 56 Manage users of node
1679    
1680     $node->set_user( 'name', $mode );
1681    
1682     C<$mode> can be one of:
1683    
1684     =over 4
1685    
1686     =item 0
1687    
1688     delete account
1689    
1690     =item 1
1691    
1692     set administrative right for user
1693    
1694     =item 2
1695    
1696     set user account as guest
1697    
1698     =back
1699    
1700     Return true on success, otherwise false.
1701    
1702     =cut
1703    
1704     sub set_user {
1705     my $self = shift;
1706     my ($name, $mode) = @_;
1707    
1708     return unless ($self->{url});
1709     croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1710    
1711     $self->shuttle_url( $self->{url} . '/_set_user',
1712 dpavlin 154 'application/x-www-form-urlencoded',
1713 dpavlin 56 'name=' . uri_escape($name) . '&mode=' . $mode,
1714     undef
1715     ) == 200;
1716     }
1717    
1718    
1719 dpavlin 57 =head2 set_link
1720    
1721     Manage node links
1722    
1723     $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1724    
1725     If C<$credit> is negative, link is removed.
1726    
1727     =cut
1728    
1729     sub set_link {
1730     my $self = shift;
1731     my ($url, $label, $credit) = @_;
1732    
1733     return unless ($self->{url});
1734     croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1735    
1736     my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1737     $reqbody .= '&credit=' . $credit if ($credit > 0);
1738    
1739 dpavlin 107 if ($self->shuttle_url( $self->{url} . '/_set_link',
1740 dpavlin 71 'application/x-www-form-urlencoded',
1741 dpavlin 57 $reqbody,
1742     undef
1743 dpavlin 107 ) == 200) {
1744     # refresh node info after adding link
1745 dpavlin 151 $self->_clear_info;
1746 dpavlin 107 return 1;
1747     }
1748 dpavlin 150 return undef;
1749 dpavlin 57 }
1750    
1751 dpavlin 107 =head2 admins
1752 dpavlin 57
1753 dpavlin 107 my @admins = @{ $node->admins };
1754    
1755     Return array of users with admin rights on node
1756    
1757     =cut
1758    
1759     sub admins {
1760     my $self = shift;
1761 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1762     return $self->{inform}->{admins};
1763 dpavlin 107 }
1764    
1765     =head2 guests
1766    
1767     my @guests = @{ $node->guests };
1768    
1769     Return array of users with guest rights on node
1770    
1771     =cut
1772    
1773     sub guests {
1774     my $self = shift;
1775 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1776     return $self->{inform}->{guests};
1777 dpavlin 107 }
1778    
1779     =head2 links
1780    
1781     my $links = @{ $node->links };
1782    
1783     Return array of links for this node
1784    
1785     =cut
1786    
1787     sub links {
1788     my $self = shift;
1789 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1790     return $self->{inform}->{links};
1791 dpavlin 107 }
1792    
1793 dpavlin 160 =head2 cacheusage
1794    
1795     Return cache usage for a node
1796    
1797     my $cache = $node->cacheusage;
1798    
1799     =cut
1800    
1801     sub cacheusage {
1802     my $self = shift;
1803    
1804     return unless ($self->{url});
1805    
1806     my $resbody;
1807     my $rv = $self->shuttle_url( $self->{url} . '/cacheusage',
1808     'text/plain',
1809     undef,
1810     \$resbody,
1811     );
1812    
1813     return if ($rv != 200 || !$resbody);
1814    
1815     return $resbody;
1816     }
1817    
1818 dpavlin 134 =head2 master
1819 dpavlin 107
1820 dpavlin 134 Set actions on Hyper Estraier node master (C<estmaster> process)
1821    
1822     $node->master(
1823     action => 'sync'
1824     );
1825    
1826     All available actions are documented in
1827     L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1828    
1829     =cut
1830    
1831     my $estmaster_rest = {
1832     shutdown => {
1833     status => 202,
1834     },
1835     sync => {
1836     status => 202,
1837     },
1838     backup => {
1839     status => 202,
1840     },
1841     userlist => {
1842     status => 200,
1843 dpavlin 135 returns => [ qw/name passwd flags fname misc/ ],
1844 dpavlin 134 },
1845     useradd => {
1846 dpavlin 135 required => [ qw/name passwd flags/ ],
1847     optional => [ qw/fname misc/ ],
1848 dpavlin 134 status => 200,
1849     },
1850     userdel => {
1851 dpavlin 135 required => [ qw/name/ ],
1852 dpavlin 134 status => 200,
1853     },
1854     nodelist => {
1855     status => 200,
1856 dpavlin 135 returns => [ qw/name label doc_num word_num size/ ],
1857 dpavlin 134 },
1858     nodeadd => {
1859 dpavlin 135 required => [ qw/name/ ],
1860     optional => [ qw/label/ ],
1861 dpavlin 134 status => 200,
1862     },
1863     nodedel => {
1864 dpavlin 135 required => [ qw/name/ ],
1865 dpavlin 134 status => 200,
1866     },
1867     nodeclr => {
1868 dpavlin 135 required => [ qw/name/ ],
1869 dpavlin 134 status => 200,
1870     },
1871     nodertt => {
1872     status => 200,
1873     },
1874     };
1875    
1876     sub master {
1877     my $self = shift;
1878    
1879     my $args = {@_};
1880    
1881     # have action?
1882     my $action = $args->{action} || croak "need action, available: ",
1883     join(", ",keys %{ $estmaster_rest });
1884    
1885     # check if action is valid
1886     my $rest = $estmaster_rest->{$action};
1887     croak "action '$action' is not supported, available actions: ",
1888     join(", ",keys %{ $estmaster_rest }) unless ($rest);
1889    
1890     croak "BUG: action '$action' needs return status" unless ($rest->{status});
1891    
1892     my @args;
1893    
1894     if ($rest->{required} || $rest->{optional}) {
1895    
1896     map {
1897     croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1898     push @args, $_ . '=' . uri_escape( $args->{$_} );
1899 dpavlin 136 } ( @{ $rest->{required} } );
1900 dpavlin 134
1901     map {
1902     push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1903 dpavlin 136 } ( @{ $rest->{optional} } );
1904 dpavlin 134
1905     }
1906    
1907     my $uri = new URI( $self->{url} );
1908    
1909     my $resbody;
1910    
1911 dpavlin 135 my $status = $self->shuttle_url(
1912 dpavlin 134 'http://' . $uri->host_port . '/master?action=' . $action ,
1913     'application/x-www-form-urlencoded',
1914     join('&', @args),
1915     \$resbody,
1916     1,
1917 dpavlin 135 ) or confess "shuttle_url failed";
1918 dpavlin 134
1919 dpavlin 135 if ($status == $rest->{status}) {
1920 dpavlin 149
1921     # refresh node info after sync
1922 dpavlin 151 $self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/);
1923 dpavlin 149
1924 dpavlin 135 if ($rest->{returns} && wantarray) {
1925 dpavlin 134
1926     my @results;
1927 dpavlin 135 my $fields = $#{$rest->{returns}};
1928 dpavlin 134
1929     foreach my $line ( split(/[\r\n]/,$resbody) ) {
1930 dpavlin 135 my @e = split(/\t/, $line, $fields + 1);
1931 dpavlin 134 my $row;
1932 dpavlin 135 foreach my $i ( 0 .. $fields) {
1933     $row->{ $rest->{returns}->[$i] } = $e[ $i ];
1934     }
1935 dpavlin 134 push @results, $row;
1936     }
1937    
1938     return @results;
1939    
1940 dpavlin 135 } elsif ($resbody) {
1941 dpavlin 136 chomp $resbody;
1942 dpavlin 134 return $resbody;
1943 dpavlin 135 } else {
1944     return 0E0;
1945 dpavlin 134 }
1946     }
1947 dpavlin 135
1948     carp "expected status $rest->{status}, but got $status";
1949     return undef;
1950 dpavlin 134 }
1951    
1952 dpavlin 55 =head1 PRIVATE METHODS
1953    
1954     You could call those directly, but you don't have to. I hope.
1955    
1956     =head2 _set_info
1957    
1958 dpavlin 48 Set information for node
1959    
1960 dpavlin 55 $node->_set_info;
1961 dpavlin 48
1962     =cut
1963    
1964 dpavlin 55 sub _set_info {
1965 dpavlin 48 my $self = shift;
1966    
1967     $self->{status} = -1;
1968     return unless ($self->{url});
1969    
1970     my $resbody;
1971     my $rv = $self->shuttle_url( $self->{url} . '/inform',
1972     'text/plain',
1973     undef,
1974     \$resbody,
1975     );
1976    
1977     return if ($rv != 200 || !$resbody);
1978    
1979 dpavlin 107 my @lines = split(/[\r\n]/,$resbody);
1980 dpavlin 48
1981 dpavlin 151 $self->_clear_info;
1982 dpavlin 111
1983     ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
1984     $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
1985    
1986 dpavlin 107 return $resbody unless (@lines);
1987    
1988     shift @lines;
1989    
1990     while(my $admin = shift @lines) {
1991 dpavlin 111 push @{$self->{inform}->{admins}}, $admin;
1992 dpavlin 107 }
1993 dpavlin 111
1994 dpavlin 107 while(my $guest = shift @lines) {
1995 dpavlin 111 push @{$self->{inform}->{guests}}, $guest;
1996 dpavlin 107 }
1997    
1998     while(my $link = shift @lines) {
1999 dpavlin 111 push @{$self->{inform}->{links}}, $link;
2000 dpavlin 107 }
2001    
2002     return $resbody;
2003    
2004 dpavlin 48 }
2005    
2006 dpavlin 151 =head2 _clear_info
2007    
2008     Clear information for node
2009    
2010     $node->_clear_info;
2011    
2012     On next call to C<name>, C<label>, C<doc_num>, C<word_num> or C<size> node
2013     info will be fetch again from Hyper Estraier.
2014    
2015     =cut
2016     sub _clear_info {
2017     my $self = shift;
2018     $self->{inform} = {
2019     dnum => -1,
2020     wnum => -1,
2021     size => -1.0,
2022     };
2023     }
2024    
2025 dpavlin 2 ###
2026    
2027     =head1 EXPORT
2028    
2029     Nothing.
2030    
2031     =head1 SEE ALSO
2032    
2033     L<http://hyperestraier.sourceforge.net/>
2034    
2035     Hyper Estraier Ruby interface on which this module is based.
2036    
2037 dpavlin 164 Hyper Estraier now also has pure-perl binding included in distribution. It's
2038     a faster way to access databases directly if you are not running
2039     C<estmaster> P2P server.
2040    
2041 dpavlin 2 =head1 AUTHOR
2042    
2043     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
2044    
2045 dpavlin 128 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
2046 dpavlin 2
2047     =head1 COPYRIGHT AND LICENSE
2048    
2049 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
2050 dpavlin 2
2051     This library is free software; you can redistribute it and/or modify
2052     it under the GPL v2 or later.
2053    
2054     =cut
2055    
2056     1;

  ViewVC Help
Powered by ViewVC 1.1.26