/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 173 - (hide annotations)
Sun Aug 6 18:15:11 2006 UTC (17 years, 7 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 37131 byte(s)
added $cond->set_mask
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7 dpavlin 166 our $VERSION = '0.07_3';
8 dpavlin 2
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15 dpavlin 68 =head2 Simple indexer
16 dpavlin 2
17 dpavlin 68 use Search::Estraier;
18    
19     # create and configure node
20 dpavlin 102 my $node = new Search::Estraier::Node(
21     url => 'http://localhost:1978/node/test',
22     user => 'admin',
23 dpavlin 139 passwd => 'admin',
24     create => 1,
25     label => 'Label for node',
26     croak_on_error => 1,
27 dpavlin 102 );
28 dpavlin 68
29     # create document
30     my $doc = new Search::Estraier::Document;
31    
32     # add attributes
33     $doc->add_attr('@uri', "http://estraier.gov/example.txt");
34     $doc->add_attr('@title', "Over the Rainbow");
35    
36     # add body text to document
37     $doc->add_text("Somewhere over the rainbow. Way up high.");
38     $doc->add_text("There's a land that I heard of once in a lullaby.");
39    
40 dpavlin 102 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
41 dpavlin 68
42     =head2 Simple searcher
43    
44     use Search::Estraier;
45    
46     # create and configure node
47 dpavlin 102 my $node = new Search::Estraier::Node(
48     url => 'http://localhost:1978/node/test',
49     user => 'admin',
50     passwd => 'admin',
51     croak_on_error => 1,
52     );
53 dpavlin 68
54     # create condition
55     my $cond = new Search::Estraier::Condition;
56    
57     # set search phrase
58     $cond->set_phrase("rainbow AND lullaby");
59    
60     my $nres = $node->search($cond, 0);
61 dpavlin 100
62 dpavlin 68 if (defined($nres)) {
63 dpavlin 102 print "Got ", $nres->hits, " results\n";
64    
65 dpavlin 68 # for each document in results
66     for my $i ( 0 ... $nres->doc_num - 1 ) {
67     # get result document
68     my $rdoc = $nres->get_doc($i);
69     # display attribte
70     print "URI: ", $rdoc->attr('@uri'),"\n";
71     print "Title: ", $rdoc->attr('@title'),"\n";
72     print $rdoc->snippet,"\n";
73     }
74     } else {
75     die "error: ", $node->status,"\n";
76     }
77    
78 dpavlin 2 =head1 DESCRIPTION
79    
80     This module is implementation of node API of Hyper Estraier. Since it's
81     perl-only module with dependencies only on standard perl modules, it will
82     run on all platforms on which perl runs. It doesn't require compilation
83     or Hyper Estraier development files on target machine.
84    
85     It is implemented as multiple packages which closly resamble Ruby
86     implementation. It also includes methods to manage nodes.
87    
88 dpavlin 68 There are few examples in C<scripts> directory of this distribution.
89    
90 dpavlin 2 =cut
91    
92 dpavlin 42 =head1 Inheritable common methods
93    
94     This methods should really move somewhere else.
95    
96 dpavlin 15 =head2 _s
97    
98     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
99    
100     my $text = $self->_s(" this is a text ");
101     $text = 'this is a text';
102    
103     =cut
104    
105     sub _s {
106 dpavlin 96 my $text = $_[1];
107     return unless defined($text);
108 dpavlin 15 $text =~ s/\s\s+/ /gs;
109     $text =~ s/^\s+//;
110     $text =~ s/\s+$//;
111     return $text;
112     }
113    
114 dpavlin 2 package Search::Estraier::Document;
115    
116 dpavlin 9 use Carp qw/croak confess/;
117 dpavlin 7
118 dpavlin 15 use Search::Estraier;
119     our @ISA = qw/Search::Estraier/;
120    
121 dpavlin 2 =head1 Search::Estraier::Document
122    
123 dpavlin 164 This class implements Document which is single item in Hyper Estraier.
124 dpavlin 14
125 dpavlin 164 It's is collection of:
126 dpavlin 42
127 dpavlin 164 =over 4
128    
129     =item attributes
130    
131     C<< 'key' => 'value' >> pairs which can later be used for filtering of results
132    
133     You can add common filters to C<attrindex> in estmaster's C<_conf>
134     file for better performance. See C<attrindex> in
135     L<Hyper Estraier P2P Guide|http://hyperestraier.sourceforge.net/nguide-en.html>.
136    
137     =item vectors
138    
139     also C<< 'key' => 'value' >> pairs
140    
141     =item display text
142    
143     Text which will be used to create searchable corpus of your index and
144     included in snippet output.
145    
146     =item hidden text
147    
148     Text which will be searchable, but will not be included in snippet.
149    
150     =back
151    
152 dpavlin 2 =head2 new
153    
154 dpavlin 14 Create new document, empty or from draft.
155    
156 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
157 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
158 dpavlin 2
159     =cut
160    
161     sub new {
162     my $class = shift;
163 dpavlin 14 my $self = {};
164 dpavlin 2 bless($self, $class);
165    
166 dpavlin 6 $self->{id} = -1;
167    
168 dpavlin 14 my $draft = shift;
169    
170     if ($draft) {
171     my $in_text = 0;
172     foreach my $line (split(/\n/, $draft)) {
173    
174     if ($in_text) {
175     if ($line =~ /^\t/) {
176     push @{ $self->{htexts} }, substr($line, 1);
177     } else {
178     push @{ $self->{dtexts} }, $line;
179     }
180     next;
181     }
182    
183     if ($line =~ m/^%VECTOR\t(.+)$/) {
184     my @fields = split(/\t/, $1);
185 dpavlin 166 if ($#fields % 2 == 1) {
186     $self->{kwords} = { @fields };
187     } else {
188     warn "can't decode $line\n";
189 dpavlin 14 }
190     next;
191     } elsif ($line =~ m/^%/) {
192     # What is this? comment?
193     #warn "$line\n";
194     next;
195     } elsif ($line =~ m/^$/) {
196     $in_text = 1;
197     next;
198 dpavlin 81 } elsif ($line =~ m/^(.+)=(.*)$/) {
199 dpavlin 14 $self->{attrs}->{ $1 } = $2;
200     next;
201     }
202    
203 dpavlin 81 warn "draft ignored: '$line'\n";
204 dpavlin 14 }
205     }
206    
207 dpavlin 2 $self ? return $self : return undef;
208     }
209    
210 dpavlin 4
211 dpavlin 2 =head2 add_attr
212    
213 dpavlin 6 Add an attribute.
214    
215 dpavlin 2 $doc->add_attr( name => 'value' );
216    
217 dpavlin 9 Delete attribute using
218 dpavlin 5
219     $doc->add_attr( name => undef );
220    
221 dpavlin 2 =cut
222    
223     sub add_attr {
224     my $self = shift;
225     my $attrs = {@_};
226    
227     while (my ($name, $value) = each %{ $attrs }) {
228 dpavlin 9 if (! defined($value)) {
229 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
230 dpavlin 9 } else {
231 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
232 dpavlin 9 }
233 dpavlin 2 }
234 dpavlin 8
235     return 1;
236 dpavlin 2 }
237    
238 dpavlin 5
239     =head2 add_text
240    
241 dpavlin 6 Add a sentence of text.
242    
243 dpavlin 5 $doc->add_text('this is example text to display');
244    
245     =cut
246    
247     sub add_text {
248     my $self = shift;
249     my $text = shift;
250     return unless defined($text);
251    
252 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
253 dpavlin 5 }
254    
255    
256     =head2 add_hidden_text
257    
258 dpavlin 6 Add a hidden sentence.
259    
260 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
261    
262     =cut
263    
264     sub add_hidden_text {
265     my $self = shift;
266     my $text = shift;
267     return unless defined($text);
268    
269 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
270 dpavlin 5 }
271    
272 dpavlin 166 =head2 add_vectors
273 dpavlin 42
274 dpavlin 166 Add a vectors
275    
276     $doc->add_vector(
277     'vector_name' => 42,
278     'another' => 12345,
279     );
280    
281     =cut
282    
283     sub add_vectors {
284     my $self = shift;
285     return unless (@_);
286    
287     # this is ugly, but works
288     die "add_vector needs HASH as argument" unless ($#_ % 2 == 1);
289    
290     $self->{kwords} = {@_};
291     }
292    
293    
294 dpavlin 6 =head2 id
295    
296     Get the ID number of document. If the object has never been registred, C<-1> is returned.
297    
298     print $doc->id;
299    
300     =cut
301    
302     sub id {
303     my $self = shift;
304     return $self->{id};
305     }
306    
307 dpavlin 42
308 dpavlin 7 =head2 attr_names
309    
310 dpavlin 9 Returns array with attribute names from document object.
311 dpavlin 7
312     my @attrs = $doc->attr_names;
313    
314     =cut
315    
316     sub attr_names {
317     my $self = shift;
318 dpavlin 60 return unless ($self->{attrs});
319     #croak "attr_names return array, not scalar" if (! wantarray);
320 dpavlin 7 return sort keys %{ $self->{attrs} };
321     }
322    
323 dpavlin 8
324     =head2 attr
325    
326 dpavlin 9 Returns value of an attribute.
327 dpavlin 8
328     my $value = $doc->attr( 'attribute' );
329    
330     =cut
331    
332     sub attr {
333     my $self = shift;
334     my $name = shift;
335 dpavlin 60 return unless (defined($name) && $self->{attrs});
336     return $self->{attrs}->{ $name };
337 dpavlin 8 }
338    
339 dpavlin 9
340     =head2 texts
341    
342     Returns array with text sentences.
343    
344     my @texts = $doc->texts;
345    
346     =cut
347    
348     sub texts {
349     my $self = shift;
350 dpavlin 60 #confess "texts return array, not scalar" if (! wantarray);
351     return @{ $self->{dtexts} } if ($self->{dtexts});
352 dpavlin 9 }
353    
354 dpavlin 42
355 dpavlin 12 =head2 cat_texts
356    
357     Return whole text as single scalar.
358    
359     my $text = $doc->cat_texts;
360    
361     =cut
362    
363     sub cat_texts {
364     my $self = shift;
365 dpavlin 60 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
366 dpavlin 12 }
367    
368 dpavlin 42
369 dpavlin 5 =head2 dump_draft
370    
371 dpavlin 13 Dump draft data from document object.
372    
373 dpavlin 5 print $doc->dump_draft;
374    
375     =cut
376    
377     sub dump_draft {
378 dpavlin 13 my $self = shift;
379     my $draft;
380    
381     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
382 dpavlin 97 next unless defined(my $v = $self->{attrs}->{$attr_name});
383 dpavlin 93 $draft .= $attr_name . '=' . $v . "\n";
384 dpavlin 13 }
385    
386     if ($self->{kwords}) {
387 dpavlin 166 $draft .= '%VECTOR';
388 dpavlin 13 while (my ($key, $value) = each %{ $self->{kwords} }) {
389     $draft .= "\t$key\t$value";
390     }
391     $draft .= "\n";
392     }
393    
394     $draft .= "\n";
395    
396 dpavlin 40 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
397     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
398 dpavlin 13
399     return $draft;
400 dpavlin 5 }
401    
402 dpavlin 42
403 dpavlin 4 =head2 delete
404 dpavlin 2
405 dpavlin 4 Empty document object
406 dpavlin 2
407 dpavlin 4 $doc->delete;
408    
409 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
410     convinience. Document objects which go out of scope will be destroyed
411     automatically.
412    
413 dpavlin 4 =cut
414    
415     sub delete {
416     my $self = shift;
417    
418 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
419 dpavlin 5 delete($self->{$data});
420     }
421 dpavlin 4
422 dpavlin 10 $self->{id} = -1;
423    
424 dpavlin 4 return 1;
425     }
426    
427    
428    
429 dpavlin 15 package Search::Estraier::Condition;
430 dpavlin 4
431 dpavlin 98 use Carp qw/carp confess croak/;
432 dpavlin 16
433 dpavlin 15 use Search::Estraier;
434     our @ISA = qw/Search::Estraier/;
435 dpavlin 4
436 dpavlin 16 =head1 Search::Estraier::Condition
437    
438     =head2 new
439    
440     my $cond = new Search::HyperEstraier::Condition;
441    
442     =cut
443    
444     sub new {
445     my $class = shift;
446     my $self = {};
447     bless($self, $class);
448    
449 dpavlin 19 $self->{max} = -1;
450     $self->{options} = 0;
451    
452 dpavlin 16 $self ? return $self : return undef;
453     }
454    
455 dpavlin 42
456 dpavlin 16 =head2 set_phrase
457    
458     $cond->set_phrase('search phrase');
459    
460     =cut
461    
462     sub set_phrase {
463     my $self = shift;
464     $self->{phrase} = $self->_s( shift );
465     }
466    
467 dpavlin 42
468 dpavlin 16 =head2 add_attr
469    
470     $cond->add_attr('@URI STRINC /~dpavlin/');
471    
472     =cut
473    
474     sub add_attr {
475     my $self = shift;
476     my $attr = shift || return;
477     push @{ $self->{attrs} }, $self->_s( $attr );
478     }
479    
480 dpavlin 42
481 dpavlin 16 =head2 set_order
482    
483     $cond->set_order('@mdate NUMD');
484    
485     =cut
486    
487     sub set_order {
488     my $self = shift;
489     $self->{order} = shift;
490     }
491    
492 dpavlin 42
493 dpavlin 16 =head2 set_max
494    
495     $cond->set_max(42);
496    
497     =cut
498    
499     sub set_max {
500     my $self = shift;
501     my $max = shift;
502 dpavlin 43 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
503 dpavlin 16 $self->{max} = $max;
504     }
505    
506 dpavlin 42
507 dpavlin 16 =head2 set_options
508    
509 dpavlin 98 $cond->set_options( 'SURE' );
510 dpavlin 16
511 dpavlin 98 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
512    
513     Possible options are:
514    
515     =over 8
516    
517     =item SURE
518    
519     check every N-gram
520    
521     =item USUAL
522    
523     check every second N-gram
524    
525     =item FAST
526    
527     check every third N-gram
528    
529     =item AGITO
530    
531     check every fourth N-gram
532    
533     =item NOIDF
534    
535     don't perform TF-IDF tuning
536    
537     =item SIMPLE
538    
539     use simplified query phrase
540    
541     =back
542    
543     Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
544     options;
545    
546     This option changed in version C<0.04> of this module. It's backwards compatibile.
547    
548 dpavlin 16 =cut
549    
550 dpavlin 15 my $options = {
551     SURE => 1 << 0,
552     USUAL => 1 << 1,
553     FAST => 1 << 2,
554     AGITO => 1 << 3,
555     NOIDF => 1 << 4,
556     SIMPLE => 1 << 10,
557     };
558    
559 dpavlin 16 sub set_options {
560     my $self = shift;
561 dpavlin 98 my $opt = 0;
562     foreach my $option (@_) {
563     my $mask;
564     unless ($mask = $options->{$option}) {
565     if ($option eq '1') {
566     next;
567     } else {
568     croak "unknown option $option";
569     }
570     }
571     $opt += $mask;
572     }
573     $self->{options} = $opt;
574 dpavlin 4 }
575    
576 dpavlin 42
577 dpavlin 18 =head2 phrase
578    
579     Return search phrase.
580    
581     print $cond->phrase;
582    
583     =cut
584    
585     sub phrase {
586     my $self = shift;
587     return $self->{phrase};
588     }
589    
590 dpavlin 42
591 dpavlin 19 =head2 order
592 dpavlin 18
593 dpavlin 19 Return search result order.
594    
595     print $cond->order;
596    
597     =cut
598    
599     sub order {
600     my $self = shift;
601     return $self->{order};
602     }
603    
604 dpavlin 42
605 dpavlin 19 =head2 attrs
606    
607     Return search result attrs.
608    
609     my @cond_attrs = $cond->attrs;
610    
611     =cut
612    
613     sub attrs {
614     my $self = shift;
615     #croak "attrs return array, not scalar" if (! wantarray);
616 dpavlin 60 return @{ $self->{attrs} } if ($self->{attrs});
617 dpavlin 19 }
618    
619 dpavlin 42
620 dpavlin 19 =head2 max
621    
622     Return maximum number of results.
623    
624     print $cond->max;
625    
626     C<-1> is returned for unitialized value, C<0> is unlimited.
627    
628     =cut
629    
630     sub max {
631     my $self = shift;
632     return $self->{max};
633     }
634    
635 dpavlin 42
636 dpavlin 19 =head2 options
637    
638     Return options for this condition.
639    
640     print $cond->options;
641    
642     Options are returned in numerical form.
643    
644     =cut
645    
646     sub options {
647     my $self = shift;
648     return $self->{options};
649     }
650    
651    
652 dpavlin 116 =head2 set_skip
653    
654     Set number of skipped documents from beginning of results
655    
656     $cond->set_skip(42);
657    
658     Similar to C<offset> in RDBMS.
659    
660     =cut
661    
662     sub set_skip {
663     my $self = shift;
664     $self->{skip} = shift;
665     }
666    
667     =head2 skip
668    
669     Return skip for this condition.
670    
671     print $cond->skip;
672    
673     =cut
674    
675     sub skip {
676     my $self = shift;
677     return $self->{skip};
678     }
679    
680 dpavlin 173 =head2 set_mask
681 dpavlin 116
682 dpavlin 173 Select just some links when searching and not all.
683    
684     Argument array of link numbers, starting with 0.
685    
686     $cond->set_mask(qw/0 1 4/);
687    
688     =cut
689    
690     sub set_mask {
691     my $self = shift;
692     return unless (@_);
693     $self->{mask} = \@_;
694     }
695    
696    
697 dpavlin 20 package Search::Estraier::ResultDocument;
698    
699 dpavlin 24 use Carp qw/croak/;
700 dpavlin 20
701 dpavlin 24 #use Search::Estraier;
702     #our @ISA = qw/Search::Estraier/;
703 dpavlin 20
704     =head1 Search::Estraier::ResultDocument
705    
706     =head2 new
707    
708 dpavlin 23 my $rdoc = new Search::HyperEstraier::ResultDocument(
709 dpavlin 20 uri => 'http://localhost/document/uri/42',
710     attrs => {
711     foo => 1,
712     bar => 2,
713     },
714     snippet => 'this is a text of snippet'
715     keywords => 'this\tare\tkeywords'
716     );
717    
718     =cut
719    
720     sub new {
721     my $class = shift;
722     my $self = {@_};
723     bless($self, $class);
724    
725 dpavlin 62 croak "missing uri for ResultDocument" unless defined($self->{uri});
726 dpavlin 20
727     $self ? return $self : return undef;
728     }
729    
730 dpavlin 42
731 dpavlin 23 =head2 uri
732 dpavlin 20
733 dpavlin 23 Return URI of result document
734 dpavlin 20
735 dpavlin 23 print $rdoc->uri;
736    
737     =cut
738    
739     sub uri {
740     my $self = shift;
741     return $self->{uri};
742     }
743    
744    
745     =head2 attr_names
746    
747     Returns array with attribute names from result document object.
748    
749     my @attrs = $rdoc->attr_names;
750    
751     =cut
752    
753     sub attr_names {
754     my $self = shift;
755     croak "attr_names return array, not scalar" if (! wantarray);
756     return sort keys %{ $self->{attrs} };
757     }
758    
759 dpavlin 42
760 dpavlin 23 =head2 attr
761    
762     Returns value of an attribute.
763    
764     my $value = $rdoc->attr( 'attribute' );
765    
766     =cut
767    
768     sub attr {
769     my $self = shift;
770     my $name = shift || return;
771     return $self->{attrs}->{ $name };
772     }
773    
774 dpavlin 42
775 dpavlin 23 =head2 snippet
776    
777     Return snippet from result document
778    
779     print $rdoc->snippet;
780    
781     =cut
782    
783     sub snippet {
784     my $self = shift;
785     return $self->{snippet};
786     }
787    
788 dpavlin 42
789 dpavlin 23 =head2 keywords
790    
791     Return keywords from result document
792    
793     print $rdoc->keywords;
794    
795     =cut
796    
797     sub keywords {
798     my $self = shift;
799     return $self->{keywords};
800     }
801    
802    
803 dpavlin 25 package Search::Estraier::NodeResult;
804    
805     use Carp qw/croak/;
806    
807     #use Search::Estraier;
808     #our @ISA = qw/Search::Estraier/;
809    
810     =head1 Search::Estraier::NodeResult
811    
812     =head2 new
813    
814     my $res = new Search::HyperEstraier::NodeResult(
815     docs => @array_of_rdocs,
816     hits => %hash_with_hints,
817     );
818    
819     =cut
820    
821     sub new {
822     my $class = shift;
823     my $self = {@_};
824     bless($self, $class);
825    
826     foreach my $f (qw/docs hints/) {
827     croak "missing $f for ResultDocument" unless defined($self->{$f});
828     }
829    
830     $self ? return $self : return undef;
831     }
832    
833 dpavlin 42
834 dpavlin 25 =head2 doc_num
835    
836     Return number of documents
837    
838     print $res->doc_num;
839    
840 dpavlin 100 This will return real number of documents (limited by C<max>).
841     If you want to get total number of hits, see C<hits>.
842    
843 dpavlin 25 =cut
844    
845     sub doc_num {
846     my $self = shift;
847 dpavlin 53 return $#{$self->{docs}} + 1;
848 dpavlin 25 }
849    
850 dpavlin 42
851 dpavlin 25 =head2 get_doc
852    
853     Return single document
854    
855     my $doc = $res->get_doc( 42 );
856    
857     Returns undef if document doesn't exist.
858    
859     =cut
860    
861     sub get_doc {
862     my $self = shift;
863     my $num = shift;
864 dpavlin 43 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
865 dpavlin 25 return undef if ($num < 0 || $num > $self->{docs});
866     return $self->{docs}->[$num];
867     }
868    
869 dpavlin 42
870 dpavlin 25 =head2 hint
871    
872     Return specific hint from results.
873    
874 dpavlin 100 print $res->hint( 'VERSION' );
875 dpavlin 25
876     Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
877     C<TIME>, C<LINK#n>, C<VIEW>.
878    
879     =cut
880    
881     sub hint {
882     my $self = shift;
883     my $key = shift || return;
884     return $self->{hints}->{$key};
885     }
886    
887 dpavlin 108 =head2 hints
888 dpavlin 25
889 dpavlin 91 More perlish version of C<hint>. This one returns hash.
890    
891 dpavlin 100 my %hints = $res->hints;
892 dpavlin 91
893     =cut
894    
895     sub hints {
896     my $self = shift;
897     return $self->{hints};
898     }
899    
900 dpavlin 100 =head2 hits
901    
902     Syntaxtic sugar for total number of hits for this query
903    
904     print $res->hits;
905    
906     It's same as
907    
908     print $res->hint('HIT');
909    
910     but shorter.
911    
912     =cut
913    
914     sub hits {
915     my $self = shift;
916     return $self->{hints}->{'HIT'} || 0;
917     }
918    
919 dpavlin 27 package Search::Estraier::Node;
920    
921 dpavlin 44 use Carp qw/carp croak confess/;
922 dpavlin 33 use URI;
923 dpavlin 36 use MIME::Base64;
924 dpavlin 33 use IO::Socket::INET;
925 dpavlin 49 use URI::Escape qw/uri_escape/;
926 dpavlin 29
927 dpavlin 27 =head1 Search::Estraier::Node
928    
929     =head2 new
930    
931     my $node = new Search::HyperEstraier::Node;
932    
933 dpavlin 65 or optionally with C<url> as parametar
934    
935     my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
936    
937 dpavlin 78 or in more verbose form
938    
939     my $node = new Search::HyperEstraier::Node(
940     url => 'http://localhost:1978/node/test',
941 dpavlin 132 user => 'admin',
942     passwd => 'admin'
943 dpavlin 139 create => 1,
944     label => 'optional node label',
945 dpavlin 78 debug => 1,
946     croak_on_error => 1
947     );
948    
949     with following arguments:
950    
951     =over 4
952    
953     =item url
954    
955     URL to node
956    
957 dpavlin 132 =item user
958    
959     specify username for node server authentication
960    
961     =item passwd
962    
963     password for authentication
964    
965 dpavlin 139 =item create
966    
967     create node if it doesn't exists
968    
969     =item label
970    
971     optional label for new node if C<create> is used
972    
973 dpavlin 78 =item debug
974    
975     dumps a B<lot> of debugging output
976    
977     =item croak_on_error
978    
979     very helpful during development. It will croak on all errors instead of
980     silently returning C<-1> (which is convention of Hyper Estraier API in other
981     languages).
982    
983     =back
984    
985 dpavlin 27 =cut
986    
987     sub new {
988     my $class = shift;
989     my $self = {
990     pxport => -1,
991 dpavlin 33 timeout => 0, # this used to be -1
992 dpavlin 27 wwidth => 480,
993     hwidth => 96,
994     awidth => 96,
995     status => -1,
996     };
997 dpavlin 111
998 dpavlin 27 bless($self, $class);
999    
1000 dpavlin 65 if ($#_ == 0) {
1001     $self->{url} = shift;
1002     } else {
1003 dpavlin 78 %$self = ( %$self, @_ );
1004    
1005 dpavlin 132 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
1006    
1007 dpavlin 65 warn "## Node debug on\n" if ($self->{debug});
1008     }
1009 dpavlin 57
1010 dpavlin 111 $self->{inform} = {
1011     dnum => -1,
1012     wnum => -1,
1013     size => -1.0,
1014     };
1015    
1016 dpavlin 139 if ($self->{create}) {
1017 dpavlin 140 if (! eval { $self->name } || $@) {
1018 dpavlin 139 my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
1019     croak "can't find node name in '$self->{url}'" unless ($name);
1020     my $label = $self->{label} || $name;
1021     $self->master(
1022     action => 'nodeadd',
1023     name => $name,
1024     label => $label,
1025     ) || croak "can't create node $name ($label)";
1026     }
1027     }
1028    
1029 dpavlin 27 $self ? return $self : return undef;
1030     }
1031    
1032 dpavlin 42
1033 dpavlin 29 =head2 set_url
1034    
1035     Specify URL to node server
1036    
1037     $node->set_url('http://localhost:1978');
1038    
1039     =cut
1040    
1041     sub set_url {
1042     my $self = shift;
1043     $self->{url} = shift;
1044     }
1045    
1046 dpavlin 42
1047 dpavlin 29 =head2 set_proxy
1048    
1049     Specify proxy server to connect to node server
1050    
1051     $node->set_proxy('proxy.example.com', 8080);
1052    
1053     =cut
1054    
1055     sub set_proxy {
1056     my $self = shift;
1057     my ($host,$port) = @_;
1058 dpavlin 43 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
1059 dpavlin 29 $self->{pxhost} = $host;
1060     $self->{pxport} = $port;
1061     }
1062    
1063 dpavlin 42
1064 dpavlin 30 =head2 set_timeout
1065    
1066     Specify timeout of connection in seconds
1067    
1068     $node->set_timeout( 15 );
1069    
1070     =cut
1071    
1072     sub set_timeout {
1073     my $self = shift;
1074     my $sec = shift;
1075 dpavlin 43 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
1076 dpavlin 30 $self->{timeout} = $sec;
1077     }
1078    
1079 dpavlin 42
1080 dpavlin 31 =head2 set_auth
1081    
1082     Specify name and password for authentication to node server.
1083    
1084     $node->set_auth('clint','eastwood');
1085    
1086     =cut
1087    
1088     sub set_auth {
1089     my $self = shift;
1090     my ($login,$passwd) = @_;
1091 dpavlin 40 my $basic_auth = encode_base64( "$login:$passwd" );
1092     chomp($basic_auth);
1093     $self->{auth} = $basic_auth;
1094 dpavlin 31 }
1095    
1096 dpavlin 42
1097 dpavlin 32 =head2 status
1098    
1099     Return status code of last request.
1100    
1101 dpavlin 40 print $node->status;
1102 dpavlin 32
1103     C<-1> means connection failure.
1104    
1105     =cut
1106    
1107     sub status {
1108     my $self = shift;
1109     return $self->{status};
1110     }
1111    
1112 dpavlin 42
1113 dpavlin 40 =head2 put_doc
1114    
1115 dpavlin 41 Add a document
1116 dpavlin 40
1117 dpavlin 41 $node->put_doc( $document_draft ) or die "can't add document";
1118    
1119 dpavlin 151 Return true on success or false on failure.
1120 dpavlin 41
1121 dpavlin 40 =cut
1122    
1123     sub put_doc {
1124     my $self = shift;
1125     my $doc = shift || return;
1126 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1127 dpavlin 151 if ($self->shuttle_url( $self->{url} . '/put_doc',
1128 dpavlin 41 'text/x-estraier-draft',
1129     $doc->dump_draft,
1130     undef
1131 dpavlin 151 ) == 200) {
1132     $self->_clear_info;
1133     return 1;
1134     }
1135     return undef;
1136 dpavlin 40 }
1137    
1138 dpavlin 41
1139     =head2 out_doc
1140    
1141     Remove a document
1142    
1143     $node->out_doc( document_id ) or "can't remove document";
1144    
1145     Return true on success or false on failture.
1146    
1147     =cut
1148    
1149     sub out_doc {
1150     my $self = shift;
1151     my $id = shift || return;
1152     return unless ($self->{url});
1153 dpavlin 43 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1154 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1155 dpavlin 41 'application/x-www-form-urlencoded',
1156     "id=$id",
1157     undef
1158 dpavlin 150 ) == 200) {
1159 dpavlin 151 $self->_clear_info;
1160     return 1;
1161 dpavlin 150 }
1162     return undef;
1163 dpavlin 41 }
1164    
1165    
1166     =head2 out_doc_by_uri
1167    
1168     Remove a registrated document using it's uri
1169    
1170 dpavlin 45 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1171 dpavlin 41
1172     Return true on success or false on failture.
1173    
1174     =cut
1175    
1176     sub out_doc_by_uri {
1177     my $self = shift;
1178     my $uri = shift || return;
1179     return unless ($self->{url});
1180 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1181 dpavlin 41 'application/x-www-form-urlencoded',
1182 dpavlin 50 "uri=" . uri_escape($uri),
1183 dpavlin 41 undef
1184 dpavlin 150 ) == 200) {
1185 dpavlin 151 $self->_clear_info;
1186     return 1;
1187 dpavlin 150 }
1188     return undef;
1189 dpavlin 41 }
1190    
1191 dpavlin 42
1192     =head2 edit_doc
1193    
1194     Edit attributes of a document
1195    
1196     $node->edit_doc( $document_draft ) or die "can't edit document";
1197    
1198     Return true on success or false on failture.
1199    
1200     =cut
1201    
1202     sub edit_doc {
1203     my $self = shift;
1204     my $doc = shift || return;
1205 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1206 dpavlin 151 if ($self->shuttle_url( $self->{url} . '/edit_doc',
1207 dpavlin 42 'text/x-estraier-draft',
1208     $doc->dump_draft,
1209     undef
1210 dpavlin 151 ) == 200) {
1211     $self->_clear_info;
1212     return 1;
1213     }
1214     return undef;
1215 dpavlin 42 }
1216    
1217    
1218 dpavlin 43 =head2 get_doc
1219    
1220     Retreive document
1221    
1222     my $doc = $node->get_doc( document_id ) or die "can't get document";
1223    
1224     Return true on success or false on failture.
1225    
1226     =cut
1227    
1228     sub get_doc {
1229     my $self = shift;
1230     my $id = shift || return;
1231     return $self->_fetch_doc( id => $id );
1232     }
1233    
1234 dpavlin 44
1235 dpavlin 43 =head2 get_doc_by_uri
1236    
1237     Retreive document
1238    
1239 dpavlin 45 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1240 dpavlin 43
1241     Return true on success or false on failture.
1242    
1243     =cut
1244    
1245     sub get_doc_by_uri {
1246     my $self = shift;
1247     my $uri = shift || return;
1248     return $self->_fetch_doc( uri => $uri );
1249     }
1250    
1251 dpavlin 44
1252 dpavlin 49 =head2 get_doc_attr
1253    
1254     Retrieve the value of an atribute from object
1255    
1256     my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1257     die "can't get document attribute";
1258    
1259     =cut
1260    
1261     sub get_doc_attr {
1262     my $self = shift;
1263     my ($id,$name) = @_;
1264     return unless ($id && $name);
1265     return $self->_fetch_doc( id => $id, attr => $name );
1266     }
1267    
1268    
1269     =head2 get_doc_attr_by_uri
1270    
1271     Retrieve the value of an atribute from object
1272    
1273     my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1274     die "can't get document attribute";
1275    
1276     =cut
1277    
1278     sub get_doc_attr_by_uri {
1279     my $self = shift;
1280     my ($uri,$name) = @_;
1281     return unless ($uri && $name);
1282     return $self->_fetch_doc( uri => $uri, attr => $name );
1283     }
1284    
1285    
1286 dpavlin 44 =head2 etch_doc
1287    
1288     Exctract document keywords
1289    
1290     my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1291    
1292     =cut
1293    
1294 dpavlin 49 sub etch_doc {
1295 dpavlin 44 my $self = shift;
1296     my $id = shift || return;
1297     return $self->_fetch_doc( id => $id, etch => 1 );
1298     }
1299    
1300     =head2 etch_doc_by_uri
1301    
1302     Retreive document
1303    
1304 dpavlin 45 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1305 dpavlin 44
1306     Return true on success or false on failture.
1307    
1308     =cut
1309    
1310     sub etch_doc_by_uri {
1311     my $self = shift;
1312     my $uri = shift || return;
1313     return $self->_fetch_doc( uri => $uri, etch => 1 );
1314     }
1315    
1316    
1317 dpavlin 45 =head2 uri_to_id
1318    
1319     Get ID of document specified by URI
1320    
1321     my $id = $node->uri_to_id( 'file:///document/uri/42' );
1322    
1323 dpavlin 103 This method won't croak, even if using C<croak_on_error>.
1324    
1325 dpavlin 45 =cut
1326    
1327     sub uri_to_id {
1328     my $self = shift;
1329     my $uri = shift || return;
1330 dpavlin 103 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1331 dpavlin 45 }
1332    
1333    
1334 dpavlin 43 =head2 _fetch_doc
1335    
1336 dpavlin 44 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1337     C<etch_doc>, C<etch_doc_by_uri>.
1338 dpavlin 43
1339 dpavlin 45 # this will decode received draft into Search::Estraier::Document object
1340     my $doc = $node->_fetch_doc( id => 42 );
1341     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1342 dpavlin 43
1343 dpavlin 45 # to extract keywords, add etch
1344     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1345     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1346    
1347 dpavlin 49 # to get document attrubute add attr
1348     my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1349     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1350    
1351 dpavlin 45 # more general form which allows implementation of
1352     # uri_to_id
1353     my $id = $node->_fetch_doc(
1354     uri => 'file:///document/uri/42',
1355     path => '/uri_to_id',
1356     chomp_resbody => 1
1357     );
1358    
1359 dpavlin 43 =cut
1360    
1361     sub _fetch_doc {
1362     my $self = shift;
1363 dpavlin 44 my $a = {@_};
1364     return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1365    
1366     my ($arg, $resbody);
1367    
1368 dpavlin 45 my $path = $a->{path} || '/get_doc';
1369 dpavlin 44 $path = '/etch_doc' if ($a->{etch});
1370    
1371     if ($a->{id}) {
1372 dpavlin 168 croak "id must be number not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1373 dpavlin 44 $arg = 'id=' . $a->{id};
1374     } elsif ($a->{uri}) {
1375 dpavlin 50 $arg = 'uri=' . uri_escape($a->{uri});
1376 dpavlin 44 } else {
1377     confess "unhandled argument. Need id or uri.";
1378 dpavlin 43 }
1379 dpavlin 44
1380 dpavlin 49 if ($a->{attr}) {
1381     $path = '/get_doc_attr';
1382     $arg .= '&attr=' . uri_escape($a->{attr});
1383     $a->{chomp_resbody} = 1;
1384     }
1385    
1386 dpavlin 44 my $rv = $self->shuttle_url( $self->{url} . $path,
1387 dpavlin 43 'application/x-www-form-urlencoded',
1388 dpavlin 44 $arg,
1389 dpavlin 45 \$resbody,
1390 dpavlin 103 $a->{croak_on_error},
1391 dpavlin 43 );
1392 dpavlin 44
1393 dpavlin 43 return if ($rv != 200);
1394 dpavlin 44
1395     if ($a->{etch}) {
1396     $self->{kwords} = {};
1397     return +{} unless ($resbody);
1398     foreach my $l (split(/\n/, $resbody)) {
1399     my ($k,$v) = split(/\t/, $l, 2);
1400     $self->{kwords}->{$k} = $v if ($v);
1401     }
1402     return $self->{kwords};
1403 dpavlin 45 } elsif ($a->{chomp_resbody}) {
1404     return unless (defined($resbody));
1405     chomp($resbody);
1406     return $resbody;
1407 dpavlin 44 } else {
1408     return new Search::Estraier::Document($resbody);
1409     }
1410 dpavlin 43 }
1411    
1412    
1413 dpavlin 48 =head2 name
1414 dpavlin 43
1415 dpavlin 48 my $node_name = $node->name;
1416 dpavlin 43
1417 dpavlin 48 =cut
1418    
1419     sub name {
1420     my $self = shift;
1421 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1422     return $self->{inform}->{name};
1423 dpavlin 48 }
1424    
1425    
1426     =head2 label
1427    
1428     my $node_label = $node->label;
1429    
1430     =cut
1431    
1432     sub label {
1433     my $self = shift;
1434 dpavlin 111 $self->_set_info unless ($self->{inform}->{label});
1435     return $self->{inform}->{label};
1436 dpavlin 48 }
1437    
1438    
1439     =head2 doc_num
1440    
1441     my $documents_in_node = $node->doc_num;
1442    
1443     =cut
1444    
1445     sub doc_num {
1446     my $self = shift;
1447 dpavlin 111 $self->_set_info if ($self->{inform}->{dnum} < 0);
1448     return $self->{inform}->{dnum};
1449 dpavlin 48 }
1450    
1451    
1452     =head2 word_num
1453    
1454     my $words_in_node = $node->word_num;
1455    
1456     =cut
1457    
1458     sub word_num {
1459     my $self = shift;
1460 dpavlin 111 $self->_set_info if ($self->{inform}->{wnum} < 0);
1461     return $self->{inform}->{wnum};
1462 dpavlin 48 }
1463    
1464    
1465     =head2 size
1466    
1467     my $node_size = $node->size;
1468    
1469     =cut
1470    
1471     sub size {
1472     my $self = shift;
1473 dpavlin 111 $self->_set_info if ($self->{inform}->{size} < 0);
1474     return $self->{inform}->{size};
1475 dpavlin 48 }
1476    
1477    
1478 dpavlin 51 =head2 search
1479 dpavlin 48
1480 dpavlin 51 Search documents which match condition
1481    
1482     my $nres = $node->search( $cond, $depth );
1483    
1484     C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1485     depth for meta search.
1486    
1487     Function results C<Search::Estraier::NodeResult> object.
1488    
1489     =cut
1490    
1491     sub search {
1492     my $self = shift;
1493     my ($cond, $depth) = @_;
1494     return unless ($cond && defined($depth) && $self->{url});
1495     croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1496     croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1497    
1498 dpavlin 52 my $resbody;
1499 dpavlin 51
1500 dpavlin 52 my $rv = $self->shuttle_url( $self->{url} . '/search',
1501 dpavlin 53 'application/x-www-form-urlencoded',
1502 dpavlin 61 $self->cond_to_query( $cond, $depth ),
1503 dpavlin 52 \$resbody,
1504     );
1505     return if ($rv != 200);
1506    
1507 dpavlin 126 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1508     my $hintsText = splice @records, 0, 2; # starts with empty record
1509     my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1510    
1511     # process records
1512 dpavlin 128 my $docs = [];
1513 dpavlin 126 foreach my $record (@records)
1514     {
1515     # split into keys and snippets
1516     my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1517    
1518     # create document hash
1519     my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1520     $doc->{'@keywords'} = $doc->{keywords};
1521     ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1522     $doc->{snippet} = $snippet;
1523    
1524     push @$docs, new Search::Estraier::ResultDocument(
1525     attrs => $doc,
1526     uri => $doc->{'@uri'},
1527     snippet => $snippet,
1528     keywords => $doc->{'keywords'},
1529     );
1530     }
1531    
1532     return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1533     }
1534    
1535    
1536 dpavlin 51 =head2 cond_to_query
1537    
1538 dpavlin 55 Return URI encoded string generated from Search::Estraier::Condition
1539    
1540 dpavlin 61 my $args = $node->cond_to_query( $cond, $depth );
1541 dpavlin 51
1542     =cut
1543    
1544     sub cond_to_query {
1545     my $self = shift;
1546    
1547     my $cond = shift || return;
1548     croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1549 dpavlin 61 my $depth = shift;
1550 dpavlin 51
1551     my @args;
1552    
1553     if (my $phrase = $cond->phrase) {
1554     push @args, 'phrase=' . uri_escape($phrase);
1555     }
1556    
1557     if (my @attrs = $cond->attrs) {
1558     for my $i ( 0 .. $#attrs ) {
1559 dpavlin 63 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1560 dpavlin 51 }
1561     }
1562    
1563     if (my $order = $cond->order) {
1564     push @args, 'order=' . uri_escape($order);
1565     }
1566    
1567     if (my $max = $cond->max) {
1568     push @args, 'max=' . $max;
1569     } else {
1570     push @args, 'max=' . (1 << 30);
1571     }
1572    
1573     if (my $options = $cond->options) {
1574     push @args, 'options=' . $options;
1575     }
1576    
1577 dpavlin 61 push @args, 'depth=' . $depth if ($depth);
1578 dpavlin 51 push @args, 'wwidth=' . $self->{wwidth};
1579     push @args, 'hwidth=' . $self->{hwidth};
1580     push @args, 'awidth=' . $self->{awidth};
1581 dpavlin 155 push @args, 'skip=' . $cond->{skip} if ($cond->{skip});
1582 dpavlin 51
1583 dpavlin 173 if ($cond->{mask}) {
1584     my $mask = 0;
1585     map { $mask += ( 2 ** $_ ) } @{ $cond->{mask} };
1586    
1587     push @args, 'mask=' . $mask if ($mask);
1588     }
1589    
1590 dpavlin 51 return join('&', @args);
1591     }
1592    
1593    
1594 dpavlin 33 =head2 shuttle_url
1595 dpavlin 32
1596 dpavlin 68 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1597 dpavlin 33 master.
1598 dpavlin 2
1599 dpavlin 52 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1600 dpavlin 2
1601 dpavlin 33 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1602     body will be saved within object.
1603 dpavlin 2
1604     =cut
1605    
1606 dpavlin 59 use LWP::UserAgent;
1607    
1608 dpavlin 33 sub shuttle_url {
1609     my $self = shift;
1610 dpavlin 2
1611 dpavlin 103 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1612 dpavlin 2
1613 dpavlin 103 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1614    
1615 dpavlin 40 $self->{status} = -1;
1616 dpavlin 33
1617 dpavlin 41 warn "## $url\n" if ($self->{debug});
1618 dpavlin 36
1619 dpavlin 33 $url = new URI($url);
1620 dpavlin 37 if (
1621     !$url || !$url->scheme || !$url->scheme eq 'http' ||
1622     !$url->host || !$url->port || $url->port < 1
1623     ) {
1624     carp "can't parse $url\n";
1625     return -1;
1626     }
1627 dpavlin 33
1628 dpavlin 59 my $ua = LWP::UserAgent->new;
1629     $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1630 dpavlin 33
1631 dpavlin 59 my $req;
1632 dpavlin 37 if ($reqbody) {
1633 dpavlin 59 $req = HTTP::Request->new(POST => $url);
1634 dpavlin 37 } else {
1635 dpavlin 59 $req = HTTP::Request->new(GET => $url);
1636 dpavlin 37 }
1637    
1638 dpavlin 59 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1639     $req->headers->header( 'Connection', 'close' );
1640 dpavlin 77 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1641 dpavlin 59 $req->content_type( $content_type );
1642 dpavlin 37
1643 dpavlin 59 warn $req->headers->as_string,"\n" if ($self->{debug});
1644 dpavlin 2
1645 dpavlin 37 if ($reqbody) {
1646 dpavlin 41 warn "$reqbody\n" if ($self->{debug});
1647 dpavlin 59 $req->content( $reqbody );
1648 dpavlin 33 }
1649 dpavlin 2
1650 dpavlin 59 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1651 dpavlin 2
1652 dpavlin 59 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1653 dpavlin 2
1654 dpavlin 76 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1655    
1656 dpavlin 78 if (! $res->is_success) {
1657 dpavlin 103 if ($croak_on_error) {
1658 dpavlin 78 croak("can't get $url: ",$res->status_line);
1659     } else {
1660     return -1;
1661     }
1662     }
1663 dpavlin 2
1664 dpavlin 59 $$resbody .= $res->content;
1665    
1666 dpavlin 40 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1667 dpavlin 39
1668 dpavlin 40 return $self->{status};
1669 dpavlin 2 }
1670    
1671 dpavlin 48
1672 dpavlin 55 =head2 set_snippet_width
1673 dpavlin 48
1674 dpavlin 55 Set width of snippets in results
1675    
1676     $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1677    
1678     C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1679     is not sent with results. If it is negative, whole document text is sent instead of snippet.
1680    
1681     C<$hwidth> specified width of strings from beginning of string. Default
1682     value is C<96>. Negative or zero value keep previous value.
1683    
1684     C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1685     If negative of zero value is provided previous value is kept unchanged.
1686    
1687     =cut
1688    
1689     sub set_snippet_width {
1690     my $self = shift;
1691    
1692     my ($wwidth, $hwidth, $awidth) = @_;
1693     $self->{wwidth} = $wwidth;
1694     $self->{hwidth} = $hwidth if ($hwidth >= 0);
1695     $self->{awidth} = $awidth if ($awidth >= 0);
1696     }
1697    
1698    
1699 dpavlin 56 =head2 set_user
1700 dpavlin 55
1701 dpavlin 56 Manage users of node
1702    
1703     $node->set_user( 'name', $mode );
1704    
1705     C<$mode> can be one of:
1706    
1707     =over 4
1708    
1709     =item 0
1710    
1711     delete account
1712    
1713     =item 1
1714    
1715     set administrative right for user
1716    
1717     =item 2
1718    
1719     set user account as guest
1720    
1721     =back
1722    
1723     Return true on success, otherwise false.
1724    
1725     =cut
1726    
1727     sub set_user {
1728     my $self = shift;
1729     my ($name, $mode) = @_;
1730    
1731     return unless ($self->{url});
1732     croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1733    
1734     $self->shuttle_url( $self->{url} . '/_set_user',
1735 dpavlin 154 'application/x-www-form-urlencoded',
1736 dpavlin 56 'name=' . uri_escape($name) . '&mode=' . $mode,
1737     undef
1738     ) == 200;
1739     }
1740    
1741    
1742 dpavlin 57 =head2 set_link
1743    
1744     Manage node links
1745    
1746     $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1747    
1748     If C<$credit> is negative, link is removed.
1749    
1750     =cut
1751    
1752     sub set_link {
1753     my $self = shift;
1754     my ($url, $label, $credit) = @_;
1755    
1756     return unless ($self->{url});
1757     croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1758    
1759     my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1760     $reqbody .= '&credit=' . $credit if ($credit > 0);
1761    
1762 dpavlin 107 if ($self->shuttle_url( $self->{url} . '/_set_link',
1763 dpavlin 71 'application/x-www-form-urlencoded',
1764 dpavlin 57 $reqbody,
1765     undef
1766 dpavlin 107 ) == 200) {
1767     # refresh node info after adding link
1768 dpavlin 151 $self->_clear_info;
1769 dpavlin 107 return 1;
1770     }
1771 dpavlin 150 return undef;
1772 dpavlin 57 }
1773    
1774 dpavlin 107 =head2 admins
1775 dpavlin 57
1776 dpavlin 107 my @admins = @{ $node->admins };
1777    
1778     Return array of users with admin rights on node
1779    
1780     =cut
1781    
1782     sub admins {
1783     my $self = shift;
1784 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1785     return $self->{inform}->{admins};
1786 dpavlin 107 }
1787    
1788     =head2 guests
1789    
1790     my @guests = @{ $node->guests };
1791    
1792     Return array of users with guest rights on node
1793    
1794     =cut
1795    
1796     sub guests {
1797     my $self = shift;
1798 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1799     return $self->{inform}->{guests};
1800 dpavlin 107 }
1801    
1802     =head2 links
1803    
1804     my $links = @{ $node->links };
1805    
1806     Return array of links for this node
1807    
1808     =cut
1809    
1810     sub links {
1811     my $self = shift;
1812 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1813     return $self->{inform}->{links};
1814 dpavlin 107 }
1815    
1816 dpavlin 160 =head2 cacheusage
1817    
1818     Return cache usage for a node
1819    
1820     my $cache = $node->cacheusage;
1821    
1822     =cut
1823    
1824     sub cacheusage {
1825     my $self = shift;
1826    
1827     return unless ($self->{url});
1828    
1829     my $resbody;
1830     my $rv = $self->shuttle_url( $self->{url} . '/cacheusage',
1831     'text/plain',
1832     undef,
1833     \$resbody,
1834     );
1835    
1836     return if ($rv != 200 || !$resbody);
1837    
1838     return $resbody;
1839     }
1840    
1841 dpavlin 134 =head2 master
1842 dpavlin 107
1843 dpavlin 134 Set actions on Hyper Estraier node master (C<estmaster> process)
1844    
1845     $node->master(
1846     action => 'sync'
1847     );
1848    
1849     All available actions are documented in
1850     L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1851    
1852     =cut
1853    
1854     my $estmaster_rest = {
1855     shutdown => {
1856     status => 202,
1857     },
1858     sync => {
1859     status => 202,
1860     },
1861     backup => {
1862     status => 202,
1863     },
1864     userlist => {
1865     status => 200,
1866 dpavlin 135 returns => [ qw/name passwd flags fname misc/ ],
1867 dpavlin 134 },
1868     useradd => {
1869 dpavlin 135 required => [ qw/name passwd flags/ ],
1870     optional => [ qw/fname misc/ ],
1871 dpavlin 134 status => 200,
1872     },
1873     userdel => {
1874 dpavlin 135 required => [ qw/name/ ],
1875 dpavlin 134 status => 200,
1876     },
1877     nodelist => {
1878     status => 200,
1879 dpavlin 135 returns => [ qw/name label doc_num word_num size/ ],
1880 dpavlin 134 },
1881     nodeadd => {
1882 dpavlin 135 required => [ qw/name/ ],
1883     optional => [ qw/label/ ],
1884 dpavlin 134 status => 200,
1885     },
1886     nodedel => {
1887 dpavlin 135 required => [ qw/name/ ],
1888 dpavlin 134 status => 200,
1889     },
1890     nodeclr => {
1891 dpavlin 135 required => [ qw/name/ ],
1892 dpavlin 134 status => 200,
1893     },
1894     nodertt => {
1895     status => 200,
1896     },
1897     };
1898    
1899     sub master {
1900     my $self = shift;
1901    
1902     my $args = {@_};
1903    
1904     # have action?
1905     my $action = $args->{action} || croak "need action, available: ",
1906     join(", ",keys %{ $estmaster_rest });
1907    
1908     # check if action is valid
1909     my $rest = $estmaster_rest->{$action};
1910     croak "action '$action' is not supported, available actions: ",
1911     join(", ",keys %{ $estmaster_rest }) unless ($rest);
1912    
1913     croak "BUG: action '$action' needs return status" unless ($rest->{status});
1914    
1915     my @args;
1916    
1917     if ($rest->{required} || $rest->{optional}) {
1918    
1919     map {
1920     croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1921     push @args, $_ . '=' . uri_escape( $args->{$_} );
1922 dpavlin 136 } ( @{ $rest->{required} } );
1923 dpavlin 134
1924     map {
1925     push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1926 dpavlin 136 } ( @{ $rest->{optional} } );
1927 dpavlin 134
1928     }
1929    
1930     my $uri = new URI( $self->{url} );
1931    
1932     my $resbody;
1933    
1934 dpavlin 135 my $status = $self->shuttle_url(
1935 dpavlin 134 'http://' . $uri->host_port . '/master?action=' . $action ,
1936     'application/x-www-form-urlencoded',
1937     join('&', @args),
1938     \$resbody,
1939     1,
1940 dpavlin 135 ) or confess "shuttle_url failed";
1941 dpavlin 134
1942 dpavlin 135 if ($status == $rest->{status}) {
1943 dpavlin 149
1944     # refresh node info after sync
1945 dpavlin 151 $self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/);
1946 dpavlin 149
1947 dpavlin 135 if ($rest->{returns} && wantarray) {
1948 dpavlin 134
1949     my @results;
1950 dpavlin 135 my $fields = $#{$rest->{returns}};
1951 dpavlin 134
1952     foreach my $line ( split(/[\r\n]/,$resbody) ) {
1953 dpavlin 135 my @e = split(/\t/, $line, $fields + 1);
1954 dpavlin 134 my $row;
1955 dpavlin 135 foreach my $i ( 0 .. $fields) {
1956     $row->{ $rest->{returns}->[$i] } = $e[ $i ];
1957     }
1958 dpavlin 134 push @results, $row;
1959     }
1960    
1961     return @results;
1962    
1963 dpavlin 135 } elsif ($resbody) {
1964 dpavlin 136 chomp $resbody;
1965 dpavlin 134 return $resbody;
1966 dpavlin 135 } else {
1967     return 0E0;
1968 dpavlin 134 }
1969     }
1970 dpavlin 135
1971     carp "expected status $rest->{status}, but got $status";
1972     return undef;
1973 dpavlin 134 }
1974    
1975 dpavlin 55 =head1 PRIVATE METHODS
1976    
1977     You could call those directly, but you don't have to. I hope.
1978    
1979     =head2 _set_info
1980    
1981 dpavlin 48 Set information for node
1982    
1983 dpavlin 55 $node->_set_info;
1984 dpavlin 48
1985     =cut
1986    
1987 dpavlin 55 sub _set_info {
1988 dpavlin 48 my $self = shift;
1989    
1990     $self->{status} = -1;
1991     return unless ($self->{url});
1992    
1993     my $resbody;
1994     my $rv = $self->shuttle_url( $self->{url} . '/inform',
1995     'text/plain',
1996     undef,
1997     \$resbody,
1998     );
1999    
2000     return if ($rv != 200 || !$resbody);
2001    
2002 dpavlin 107 my @lines = split(/[\r\n]/,$resbody);
2003 dpavlin 48
2004 dpavlin 151 $self->_clear_info;
2005 dpavlin 111
2006     ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
2007     $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
2008    
2009 dpavlin 107 return $resbody unless (@lines);
2010    
2011     shift @lines;
2012    
2013     while(my $admin = shift @lines) {
2014 dpavlin 111 push @{$self->{inform}->{admins}}, $admin;
2015 dpavlin 107 }
2016 dpavlin 111
2017 dpavlin 107 while(my $guest = shift @lines) {
2018 dpavlin 111 push @{$self->{inform}->{guests}}, $guest;
2019 dpavlin 107 }
2020    
2021     while(my $link = shift @lines) {
2022 dpavlin 111 push @{$self->{inform}->{links}}, $link;
2023 dpavlin 107 }
2024    
2025     return $resbody;
2026    
2027 dpavlin 48 }
2028    
2029 dpavlin 151 =head2 _clear_info
2030    
2031     Clear information for node
2032    
2033     $node->_clear_info;
2034    
2035     On next call to C<name>, C<label>, C<doc_num>, C<word_num> or C<size> node
2036     info will be fetch again from Hyper Estraier.
2037    
2038     =cut
2039     sub _clear_info {
2040     my $self = shift;
2041     $self->{inform} = {
2042     dnum => -1,
2043     wnum => -1,
2044     size => -1.0,
2045     };
2046     }
2047    
2048 dpavlin 2 ###
2049    
2050     =head1 EXPORT
2051    
2052     Nothing.
2053    
2054     =head1 SEE ALSO
2055    
2056     L<http://hyperestraier.sourceforge.net/>
2057    
2058     Hyper Estraier Ruby interface on which this module is based.
2059    
2060 dpavlin 164 Hyper Estraier now also has pure-perl binding included in distribution. It's
2061     a faster way to access databases directly if you are not running
2062     C<estmaster> P2P server.
2063    
2064 dpavlin 2 =head1 AUTHOR
2065    
2066     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
2067    
2068 dpavlin 128 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
2069 dpavlin 2
2070     =head1 COPYRIGHT AND LICENSE
2071    
2072 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
2073 dpavlin 2
2074     This library is free software; you can redistribute it and/or modify
2075     it under the GPL v2 or later.
2076    
2077     =cut
2078    
2079     1;

  ViewVC Help
Powered by ViewVC 1.1.26