/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 164 - (hide annotations)
Sun Aug 6 12:19:19 2006 UTC (17 years, 7 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 36470 byte(s)
documentation improvements
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7 dpavlin 160 our $VERSION = '0.07_2';
8 dpavlin 2
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15 dpavlin 68 =head2 Simple indexer
16 dpavlin 2
17 dpavlin 68 use Search::Estraier;
18    
19     # create and configure node
20 dpavlin 102 my $node = new Search::Estraier::Node(
21     url => 'http://localhost:1978/node/test',
22     user => 'admin',
23 dpavlin 139 passwd => 'admin',
24     create => 1,
25     label => 'Label for node',
26     croak_on_error => 1,
27 dpavlin 102 );
28 dpavlin 68
29     # create document
30     my $doc = new Search::Estraier::Document;
31    
32     # add attributes
33     $doc->add_attr('@uri', "http://estraier.gov/example.txt");
34     $doc->add_attr('@title', "Over the Rainbow");
35    
36     # add body text to document
37     $doc->add_text("Somewhere over the rainbow. Way up high.");
38     $doc->add_text("There's a land that I heard of once in a lullaby.");
39    
40 dpavlin 102 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
41 dpavlin 68
42     =head2 Simple searcher
43    
44     use Search::Estraier;
45    
46     # create and configure node
47 dpavlin 102 my $node = new Search::Estraier::Node(
48     url => 'http://localhost:1978/node/test',
49     user => 'admin',
50     passwd => 'admin',
51     croak_on_error => 1,
52     );
53 dpavlin 68
54     # create condition
55     my $cond = new Search::Estraier::Condition;
56    
57     # set search phrase
58     $cond->set_phrase("rainbow AND lullaby");
59    
60     my $nres = $node->search($cond, 0);
61 dpavlin 100
62 dpavlin 68 if (defined($nres)) {
63 dpavlin 102 print "Got ", $nres->hits, " results\n";
64    
65 dpavlin 68 # for each document in results
66     for my $i ( 0 ... $nres->doc_num - 1 ) {
67     # get result document
68     my $rdoc = $nres->get_doc($i);
69     # display attribte
70     print "URI: ", $rdoc->attr('@uri'),"\n";
71     print "Title: ", $rdoc->attr('@title'),"\n";
72     print $rdoc->snippet,"\n";
73     }
74     } else {
75     die "error: ", $node->status,"\n";
76     }
77    
78 dpavlin 2 =head1 DESCRIPTION
79    
80     This module is implementation of node API of Hyper Estraier. Since it's
81     perl-only module with dependencies only on standard perl modules, it will
82     run on all platforms on which perl runs. It doesn't require compilation
83     or Hyper Estraier development files on target machine.
84    
85     It is implemented as multiple packages which closly resamble Ruby
86     implementation. It also includes methods to manage nodes.
87    
88 dpavlin 68 There are few examples in C<scripts> directory of this distribution.
89    
90 dpavlin 2 =cut
91    
92 dpavlin 42 =head1 Inheritable common methods
93    
94     This methods should really move somewhere else.
95    
96 dpavlin 15 =head2 _s
97    
98     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
99    
100     my $text = $self->_s(" this is a text ");
101     $text = 'this is a text';
102    
103     =cut
104    
105     sub _s {
106 dpavlin 96 my $text = $_[1];
107     return unless defined($text);
108 dpavlin 15 $text =~ s/\s\s+/ /gs;
109     $text =~ s/^\s+//;
110     $text =~ s/\s+$//;
111     return $text;
112     }
113    
114 dpavlin 2 package Search::Estraier::Document;
115    
116 dpavlin 9 use Carp qw/croak confess/;
117 dpavlin 7
118 dpavlin 15 use Search::Estraier;
119     our @ISA = qw/Search::Estraier/;
120    
121 dpavlin 2 =head1 Search::Estraier::Document
122    
123 dpavlin 164 This class implements Document which is single item in Hyper Estraier.
124 dpavlin 14
125 dpavlin 164 It's is collection of:
126 dpavlin 42
127 dpavlin 164 =over 4
128    
129     =item attributes
130    
131     C<< 'key' => 'value' >> pairs which can later be used for filtering of results
132    
133     You can add common filters to C<attrindex> in estmaster's C<_conf>
134     file for better performance. See C<attrindex> in
135     L<Hyper Estraier P2P Guide|http://hyperestraier.sourceforge.net/nguide-en.html>.
136    
137     =item vectors
138    
139     also C<< 'key' => 'value' >> pairs
140    
141     =item display text
142    
143     Text which will be used to create searchable corpus of your index and
144     included in snippet output.
145    
146     =item hidden text
147    
148     Text which will be searchable, but will not be included in snippet.
149    
150     =back
151    
152 dpavlin 2 =head2 new
153    
154 dpavlin 14 Create new document, empty or from draft.
155    
156 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
157 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
158 dpavlin 2
159     =cut
160    
161     sub new {
162     my $class = shift;
163 dpavlin 14 my $self = {};
164 dpavlin 2 bless($self, $class);
165    
166 dpavlin 6 $self->{id} = -1;
167    
168 dpavlin 14 my $draft = shift;
169    
170     if ($draft) {
171     my $in_text = 0;
172     foreach my $line (split(/\n/, $draft)) {
173    
174     if ($in_text) {
175     if ($line =~ /^\t/) {
176     push @{ $self->{htexts} }, substr($line, 1);
177     } else {
178     push @{ $self->{dtexts} }, $line;
179     }
180     next;
181     }
182    
183     if ($line =~ m/^%VECTOR\t(.+)$/) {
184     my @fields = split(/\t/, $1);
185     for my $i ( 0 .. ($#fields - 1) ) {
186     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
187     $i++;
188     }
189     next;
190     } elsif ($line =~ m/^%/) {
191     # What is this? comment?
192     #warn "$line\n";
193     next;
194     } elsif ($line =~ m/^$/) {
195     $in_text = 1;
196     next;
197 dpavlin 81 } elsif ($line =~ m/^(.+)=(.*)$/) {
198 dpavlin 14 $self->{attrs}->{ $1 } = $2;
199     next;
200     }
201    
202 dpavlin 81 warn "draft ignored: '$line'\n";
203 dpavlin 14 }
204     }
205    
206 dpavlin 2 $self ? return $self : return undef;
207     }
208    
209 dpavlin 4
210 dpavlin 2 =head2 add_attr
211    
212 dpavlin 6 Add an attribute.
213    
214 dpavlin 2 $doc->add_attr( name => 'value' );
215    
216 dpavlin 9 Delete attribute using
217 dpavlin 5
218     $doc->add_attr( name => undef );
219    
220 dpavlin 2 =cut
221    
222     sub add_attr {
223     my $self = shift;
224     my $attrs = {@_};
225    
226     while (my ($name, $value) = each %{ $attrs }) {
227 dpavlin 9 if (! defined($value)) {
228 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
229 dpavlin 9 } else {
230 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
231 dpavlin 9 }
232 dpavlin 2 }
233 dpavlin 8
234     return 1;
235 dpavlin 2 }
236    
237 dpavlin 5
238     =head2 add_text
239    
240 dpavlin 6 Add a sentence of text.
241    
242 dpavlin 5 $doc->add_text('this is example text to display');
243    
244     =cut
245    
246     sub add_text {
247     my $self = shift;
248     my $text = shift;
249     return unless defined($text);
250    
251 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
252 dpavlin 5 }
253    
254    
255     =head2 add_hidden_text
256    
257 dpavlin 6 Add a hidden sentence.
258    
259 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
260    
261     =cut
262    
263     sub add_hidden_text {
264     my $self = shift;
265     my $text = shift;
266     return unless defined($text);
267    
268 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
269 dpavlin 5 }
270    
271 dpavlin 42
272 dpavlin 6 =head2 id
273    
274     Get the ID number of document. If the object has never been registred, C<-1> is returned.
275    
276     print $doc->id;
277    
278     =cut
279    
280     sub id {
281     my $self = shift;
282     return $self->{id};
283     }
284    
285 dpavlin 42
286 dpavlin 7 =head2 attr_names
287    
288 dpavlin 9 Returns array with attribute names from document object.
289 dpavlin 7
290     my @attrs = $doc->attr_names;
291    
292     =cut
293    
294     sub attr_names {
295     my $self = shift;
296 dpavlin 60 return unless ($self->{attrs});
297     #croak "attr_names return array, not scalar" if (! wantarray);
298 dpavlin 7 return sort keys %{ $self->{attrs} };
299     }
300    
301 dpavlin 8
302     =head2 attr
303    
304 dpavlin 9 Returns value of an attribute.
305 dpavlin 8
306     my $value = $doc->attr( 'attribute' );
307    
308     =cut
309    
310     sub attr {
311     my $self = shift;
312     my $name = shift;
313 dpavlin 60 return unless (defined($name) && $self->{attrs});
314     return $self->{attrs}->{ $name };
315 dpavlin 8 }
316    
317 dpavlin 9
318     =head2 texts
319    
320     Returns array with text sentences.
321    
322     my @texts = $doc->texts;
323    
324     =cut
325    
326     sub texts {
327     my $self = shift;
328 dpavlin 60 #confess "texts return array, not scalar" if (! wantarray);
329     return @{ $self->{dtexts} } if ($self->{dtexts});
330 dpavlin 9 }
331    
332 dpavlin 42
333 dpavlin 12 =head2 cat_texts
334    
335     Return whole text as single scalar.
336    
337     my $text = $doc->cat_texts;
338    
339     =cut
340    
341     sub cat_texts {
342     my $self = shift;
343 dpavlin 60 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
344 dpavlin 12 }
345    
346 dpavlin 42
347 dpavlin 5 =head2 dump_draft
348    
349 dpavlin 13 Dump draft data from document object.
350    
351 dpavlin 5 print $doc->dump_draft;
352    
353     =cut
354    
355     sub dump_draft {
356 dpavlin 13 my $self = shift;
357     my $draft;
358    
359     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
360 dpavlin 97 next unless defined(my $v = $self->{attrs}->{$attr_name});
361 dpavlin 93 $draft .= $attr_name . '=' . $v . "\n";
362 dpavlin 13 }
363    
364     if ($self->{kwords}) {
365     $draft .= '%%VECTOR';
366     while (my ($key, $value) = each %{ $self->{kwords} }) {
367     $draft .= "\t$key\t$value";
368     }
369     $draft .= "\n";
370     }
371    
372     $draft .= "\n";
373    
374 dpavlin 40 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
375     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
376 dpavlin 13
377     return $draft;
378 dpavlin 5 }
379    
380 dpavlin 42
381 dpavlin 4 =head2 delete
382 dpavlin 2
383 dpavlin 4 Empty document object
384 dpavlin 2
385 dpavlin 4 $doc->delete;
386    
387 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
388     convinience. Document objects which go out of scope will be destroyed
389     automatically.
390    
391 dpavlin 4 =cut
392    
393     sub delete {
394     my $self = shift;
395    
396 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
397 dpavlin 5 delete($self->{$data});
398     }
399 dpavlin 4
400 dpavlin 10 $self->{id} = -1;
401    
402 dpavlin 4 return 1;
403     }
404    
405    
406    
407 dpavlin 15 package Search::Estraier::Condition;
408 dpavlin 4
409 dpavlin 98 use Carp qw/carp confess croak/;
410 dpavlin 16
411 dpavlin 15 use Search::Estraier;
412     our @ISA = qw/Search::Estraier/;
413 dpavlin 4
414 dpavlin 16 =head1 Search::Estraier::Condition
415    
416     =head2 new
417    
418     my $cond = new Search::HyperEstraier::Condition;
419    
420     =cut
421    
422     sub new {
423     my $class = shift;
424     my $self = {};
425     bless($self, $class);
426    
427 dpavlin 19 $self->{max} = -1;
428     $self->{options} = 0;
429    
430 dpavlin 16 $self ? return $self : return undef;
431     }
432    
433 dpavlin 42
434 dpavlin 16 =head2 set_phrase
435    
436     $cond->set_phrase('search phrase');
437    
438     =cut
439    
440     sub set_phrase {
441     my $self = shift;
442     $self->{phrase} = $self->_s( shift );
443     }
444    
445 dpavlin 42
446 dpavlin 16 =head2 add_attr
447    
448     $cond->add_attr('@URI STRINC /~dpavlin/');
449    
450     =cut
451    
452     sub add_attr {
453     my $self = shift;
454     my $attr = shift || return;
455     push @{ $self->{attrs} }, $self->_s( $attr );
456     }
457    
458 dpavlin 42
459 dpavlin 16 =head2 set_order
460    
461     $cond->set_order('@mdate NUMD');
462    
463     =cut
464    
465     sub set_order {
466     my $self = shift;
467     $self->{order} = shift;
468     }
469    
470 dpavlin 42
471 dpavlin 16 =head2 set_max
472    
473     $cond->set_max(42);
474    
475     =cut
476    
477     sub set_max {
478     my $self = shift;
479     my $max = shift;
480 dpavlin 43 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
481 dpavlin 16 $self->{max} = $max;
482     }
483    
484 dpavlin 42
485 dpavlin 16 =head2 set_options
486    
487 dpavlin 98 $cond->set_options( 'SURE' );
488 dpavlin 16
489 dpavlin 98 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
490    
491     Possible options are:
492    
493     =over 8
494    
495     =item SURE
496    
497     check every N-gram
498    
499     =item USUAL
500    
501     check every second N-gram
502    
503     =item FAST
504    
505     check every third N-gram
506    
507     =item AGITO
508    
509     check every fourth N-gram
510    
511     =item NOIDF
512    
513     don't perform TF-IDF tuning
514    
515     =item SIMPLE
516    
517     use simplified query phrase
518    
519     =back
520    
521     Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
522     options;
523    
524     This option changed in version C<0.04> of this module. It's backwards compatibile.
525    
526 dpavlin 16 =cut
527    
528 dpavlin 15 my $options = {
529     SURE => 1 << 0,
530     USUAL => 1 << 1,
531     FAST => 1 << 2,
532     AGITO => 1 << 3,
533     NOIDF => 1 << 4,
534     SIMPLE => 1 << 10,
535     };
536    
537 dpavlin 16 sub set_options {
538     my $self = shift;
539 dpavlin 98 my $opt = 0;
540     foreach my $option (@_) {
541     my $mask;
542     unless ($mask = $options->{$option}) {
543     if ($option eq '1') {
544     next;
545     } else {
546     croak "unknown option $option";
547     }
548     }
549     $opt += $mask;
550     }
551     $self->{options} = $opt;
552 dpavlin 4 }
553    
554 dpavlin 42
555 dpavlin 18 =head2 phrase
556    
557     Return search phrase.
558    
559     print $cond->phrase;
560    
561     =cut
562    
563     sub phrase {
564     my $self = shift;
565     return $self->{phrase};
566     }
567    
568 dpavlin 42
569 dpavlin 19 =head2 order
570 dpavlin 18
571 dpavlin 19 Return search result order.
572    
573     print $cond->order;
574    
575     =cut
576    
577     sub order {
578     my $self = shift;
579     return $self->{order};
580     }
581    
582 dpavlin 42
583 dpavlin 19 =head2 attrs
584    
585     Return search result attrs.
586    
587     my @cond_attrs = $cond->attrs;
588    
589     =cut
590    
591     sub attrs {
592     my $self = shift;
593     #croak "attrs return array, not scalar" if (! wantarray);
594 dpavlin 60 return @{ $self->{attrs} } if ($self->{attrs});
595 dpavlin 19 }
596    
597 dpavlin 42
598 dpavlin 19 =head2 max
599    
600     Return maximum number of results.
601    
602     print $cond->max;
603    
604     C<-1> is returned for unitialized value, C<0> is unlimited.
605    
606     =cut
607    
608     sub max {
609     my $self = shift;
610     return $self->{max};
611     }
612    
613 dpavlin 42
614 dpavlin 19 =head2 options
615    
616     Return options for this condition.
617    
618     print $cond->options;
619    
620     Options are returned in numerical form.
621    
622     =cut
623    
624     sub options {
625     my $self = shift;
626     return $self->{options};
627     }
628    
629    
630 dpavlin 116 =head2 set_skip
631    
632     Set number of skipped documents from beginning of results
633    
634     $cond->set_skip(42);
635    
636     Similar to C<offset> in RDBMS.
637    
638     =cut
639    
640     sub set_skip {
641     my $self = shift;
642     $self->{skip} = shift;
643     }
644    
645     =head2 skip
646    
647     Return skip for this condition.
648    
649     print $cond->skip;
650    
651     =cut
652    
653     sub skip {
654     my $self = shift;
655     return $self->{skip};
656     }
657    
658    
659 dpavlin 20 package Search::Estraier::ResultDocument;
660    
661 dpavlin 24 use Carp qw/croak/;
662 dpavlin 20
663 dpavlin 24 #use Search::Estraier;
664     #our @ISA = qw/Search::Estraier/;
665 dpavlin 20
666     =head1 Search::Estraier::ResultDocument
667    
668     =head2 new
669    
670 dpavlin 23 my $rdoc = new Search::HyperEstraier::ResultDocument(
671 dpavlin 20 uri => 'http://localhost/document/uri/42',
672     attrs => {
673     foo => 1,
674     bar => 2,
675     },
676     snippet => 'this is a text of snippet'
677     keywords => 'this\tare\tkeywords'
678     );
679    
680     =cut
681    
682     sub new {
683     my $class = shift;
684     my $self = {@_};
685     bless($self, $class);
686    
687 dpavlin 62 croak "missing uri for ResultDocument" unless defined($self->{uri});
688 dpavlin 20
689     $self ? return $self : return undef;
690     }
691    
692 dpavlin 42
693 dpavlin 23 =head2 uri
694 dpavlin 20
695 dpavlin 23 Return URI of result document
696 dpavlin 20
697 dpavlin 23 print $rdoc->uri;
698    
699     =cut
700    
701     sub uri {
702     my $self = shift;
703     return $self->{uri};
704     }
705    
706    
707     =head2 attr_names
708    
709     Returns array with attribute names from result document object.
710    
711     my @attrs = $rdoc->attr_names;
712    
713     =cut
714    
715     sub attr_names {
716     my $self = shift;
717     croak "attr_names return array, not scalar" if (! wantarray);
718     return sort keys %{ $self->{attrs} };
719     }
720    
721 dpavlin 42
722 dpavlin 23 =head2 attr
723    
724     Returns value of an attribute.
725    
726     my $value = $rdoc->attr( 'attribute' );
727    
728     =cut
729    
730     sub attr {
731     my $self = shift;
732     my $name = shift || return;
733     return $self->{attrs}->{ $name };
734     }
735    
736 dpavlin 42
737 dpavlin 23 =head2 snippet
738    
739     Return snippet from result document
740    
741     print $rdoc->snippet;
742    
743     =cut
744    
745     sub snippet {
746     my $self = shift;
747     return $self->{snippet};
748     }
749    
750 dpavlin 42
751 dpavlin 23 =head2 keywords
752    
753     Return keywords from result document
754    
755     print $rdoc->keywords;
756    
757     =cut
758    
759     sub keywords {
760     my $self = shift;
761     return $self->{keywords};
762     }
763    
764    
765 dpavlin 25 package Search::Estraier::NodeResult;
766    
767     use Carp qw/croak/;
768    
769     #use Search::Estraier;
770     #our @ISA = qw/Search::Estraier/;
771    
772     =head1 Search::Estraier::NodeResult
773    
774     =head2 new
775    
776     my $res = new Search::HyperEstraier::NodeResult(
777     docs => @array_of_rdocs,
778     hits => %hash_with_hints,
779     );
780    
781     =cut
782    
783     sub new {
784     my $class = shift;
785     my $self = {@_};
786     bless($self, $class);
787    
788     foreach my $f (qw/docs hints/) {
789     croak "missing $f for ResultDocument" unless defined($self->{$f});
790     }
791    
792     $self ? return $self : return undef;
793     }
794    
795 dpavlin 42
796 dpavlin 25 =head2 doc_num
797    
798     Return number of documents
799    
800     print $res->doc_num;
801    
802 dpavlin 100 This will return real number of documents (limited by C<max>).
803     If you want to get total number of hits, see C<hits>.
804    
805 dpavlin 25 =cut
806    
807     sub doc_num {
808     my $self = shift;
809 dpavlin 53 return $#{$self->{docs}} + 1;
810 dpavlin 25 }
811    
812 dpavlin 42
813 dpavlin 25 =head2 get_doc
814    
815     Return single document
816    
817     my $doc = $res->get_doc( 42 );
818    
819     Returns undef if document doesn't exist.
820    
821     =cut
822    
823     sub get_doc {
824     my $self = shift;
825     my $num = shift;
826 dpavlin 43 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
827 dpavlin 25 return undef if ($num < 0 || $num > $self->{docs});
828     return $self->{docs}->[$num];
829     }
830    
831 dpavlin 42
832 dpavlin 25 =head2 hint
833    
834     Return specific hint from results.
835    
836 dpavlin 100 print $res->hint( 'VERSION' );
837 dpavlin 25
838     Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
839     C<TIME>, C<LINK#n>, C<VIEW>.
840    
841     =cut
842    
843     sub hint {
844     my $self = shift;
845     my $key = shift || return;
846     return $self->{hints}->{$key};
847     }
848    
849 dpavlin 108 =head2 hints
850 dpavlin 25
851 dpavlin 91 More perlish version of C<hint>. This one returns hash.
852    
853 dpavlin 100 my %hints = $res->hints;
854 dpavlin 91
855     =cut
856    
857     sub hints {
858     my $self = shift;
859     return $self->{hints};
860     }
861    
862 dpavlin 100 =head2 hits
863    
864     Syntaxtic sugar for total number of hits for this query
865    
866     print $res->hits;
867    
868     It's same as
869    
870     print $res->hint('HIT');
871    
872     but shorter.
873    
874     =cut
875    
876     sub hits {
877     my $self = shift;
878     return $self->{hints}->{'HIT'} || 0;
879     }
880    
881 dpavlin 27 package Search::Estraier::Node;
882    
883 dpavlin 44 use Carp qw/carp croak confess/;
884 dpavlin 33 use URI;
885 dpavlin 36 use MIME::Base64;
886 dpavlin 33 use IO::Socket::INET;
887 dpavlin 49 use URI::Escape qw/uri_escape/;
888 dpavlin 29
889 dpavlin 27 =head1 Search::Estraier::Node
890    
891     =head2 new
892    
893     my $node = new Search::HyperEstraier::Node;
894    
895 dpavlin 65 or optionally with C<url> as parametar
896    
897     my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
898    
899 dpavlin 78 or in more verbose form
900    
901     my $node = new Search::HyperEstraier::Node(
902     url => 'http://localhost:1978/node/test',
903 dpavlin 132 user => 'admin',
904     passwd => 'admin'
905 dpavlin 139 create => 1,
906     label => 'optional node label',
907 dpavlin 78 debug => 1,
908     croak_on_error => 1
909     );
910    
911     with following arguments:
912    
913     =over 4
914    
915     =item url
916    
917     URL to node
918    
919 dpavlin 132 =item user
920    
921     specify username for node server authentication
922    
923     =item passwd
924    
925     password for authentication
926    
927 dpavlin 139 =item create
928    
929     create node if it doesn't exists
930    
931     =item label
932    
933     optional label for new node if C<create> is used
934    
935 dpavlin 78 =item debug
936    
937     dumps a B<lot> of debugging output
938    
939     =item croak_on_error
940    
941     very helpful during development. It will croak on all errors instead of
942     silently returning C<-1> (which is convention of Hyper Estraier API in other
943     languages).
944    
945     =back
946    
947 dpavlin 27 =cut
948    
949     sub new {
950     my $class = shift;
951     my $self = {
952     pxport => -1,
953 dpavlin 33 timeout => 0, # this used to be -1
954 dpavlin 27 wwidth => 480,
955     hwidth => 96,
956     awidth => 96,
957     status => -1,
958     };
959 dpavlin 111
960 dpavlin 27 bless($self, $class);
961    
962 dpavlin 65 if ($#_ == 0) {
963     $self->{url} = shift;
964     } else {
965 dpavlin 78 %$self = ( %$self, @_ );
966    
967 dpavlin 132 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
968    
969 dpavlin 65 warn "## Node debug on\n" if ($self->{debug});
970     }
971 dpavlin 57
972 dpavlin 111 $self->{inform} = {
973     dnum => -1,
974     wnum => -1,
975     size => -1.0,
976     };
977    
978 dpavlin 139 if ($self->{create}) {
979 dpavlin 140 if (! eval { $self->name } || $@) {
980 dpavlin 139 my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
981     croak "can't find node name in '$self->{url}'" unless ($name);
982     my $label = $self->{label} || $name;
983     $self->master(
984     action => 'nodeadd',
985     name => $name,
986     label => $label,
987     ) || croak "can't create node $name ($label)";
988     }
989     }
990    
991 dpavlin 27 $self ? return $self : return undef;
992     }
993    
994 dpavlin 42
995 dpavlin 29 =head2 set_url
996    
997     Specify URL to node server
998    
999     $node->set_url('http://localhost:1978');
1000    
1001     =cut
1002    
1003     sub set_url {
1004     my $self = shift;
1005     $self->{url} = shift;
1006     }
1007    
1008 dpavlin 42
1009 dpavlin 29 =head2 set_proxy
1010    
1011     Specify proxy server to connect to node server
1012    
1013     $node->set_proxy('proxy.example.com', 8080);
1014    
1015     =cut
1016    
1017     sub set_proxy {
1018     my $self = shift;
1019     my ($host,$port) = @_;
1020 dpavlin 43 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
1021 dpavlin 29 $self->{pxhost} = $host;
1022     $self->{pxport} = $port;
1023     }
1024    
1025 dpavlin 42
1026 dpavlin 30 =head2 set_timeout
1027    
1028     Specify timeout of connection in seconds
1029    
1030     $node->set_timeout( 15 );
1031    
1032     =cut
1033    
1034     sub set_timeout {
1035     my $self = shift;
1036     my $sec = shift;
1037 dpavlin 43 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
1038 dpavlin 30 $self->{timeout} = $sec;
1039     }
1040    
1041 dpavlin 42
1042 dpavlin 31 =head2 set_auth
1043    
1044     Specify name and password for authentication to node server.
1045    
1046     $node->set_auth('clint','eastwood');
1047    
1048     =cut
1049    
1050     sub set_auth {
1051     my $self = shift;
1052     my ($login,$passwd) = @_;
1053 dpavlin 40 my $basic_auth = encode_base64( "$login:$passwd" );
1054     chomp($basic_auth);
1055     $self->{auth} = $basic_auth;
1056 dpavlin 31 }
1057    
1058 dpavlin 42
1059 dpavlin 32 =head2 status
1060    
1061     Return status code of last request.
1062    
1063 dpavlin 40 print $node->status;
1064 dpavlin 32
1065     C<-1> means connection failure.
1066    
1067     =cut
1068    
1069     sub status {
1070     my $self = shift;
1071     return $self->{status};
1072     }
1073    
1074 dpavlin 42
1075 dpavlin 40 =head2 put_doc
1076    
1077 dpavlin 41 Add a document
1078 dpavlin 40
1079 dpavlin 41 $node->put_doc( $document_draft ) or die "can't add document";
1080    
1081 dpavlin 151 Return true on success or false on failure.
1082 dpavlin 41
1083 dpavlin 40 =cut
1084    
1085     sub put_doc {
1086     my $self = shift;
1087     my $doc = shift || return;
1088 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1089 dpavlin 151 if ($self->shuttle_url( $self->{url} . '/put_doc',
1090 dpavlin 41 'text/x-estraier-draft',
1091     $doc->dump_draft,
1092     undef
1093 dpavlin 151 ) == 200) {
1094     $self->_clear_info;
1095     return 1;
1096     }
1097     return undef;
1098 dpavlin 40 }
1099    
1100 dpavlin 41
1101     =head2 out_doc
1102    
1103     Remove a document
1104    
1105     $node->out_doc( document_id ) or "can't remove document";
1106    
1107     Return true on success or false on failture.
1108    
1109     =cut
1110    
1111     sub out_doc {
1112     my $self = shift;
1113     my $id = shift || return;
1114     return unless ($self->{url});
1115 dpavlin 43 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1116 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1117 dpavlin 41 'application/x-www-form-urlencoded',
1118     "id=$id",
1119     undef
1120 dpavlin 150 ) == 200) {
1121 dpavlin 151 $self->_clear_info;
1122     return 1;
1123 dpavlin 150 }
1124     return undef;
1125 dpavlin 41 }
1126    
1127    
1128     =head2 out_doc_by_uri
1129    
1130     Remove a registrated document using it's uri
1131    
1132 dpavlin 45 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1133 dpavlin 41
1134     Return true on success or false on failture.
1135    
1136     =cut
1137    
1138     sub out_doc_by_uri {
1139     my $self = shift;
1140     my $uri = shift || return;
1141     return unless ($self->{url});
1142 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1143 dpavlin 41 'application/x-www-form-urlencoded',
1144 dpavlin 50 "uri=" . uri_escape($uri),
1145 dpavlin 41 undef
1146 dpavlin 150 ) == 200) {
1147 dpavlin 151 $self->_clear_info;
1148     return 1;
1149 dpavlin 150 }
1150     return undef;
1151 dpavlin 41 }
1152    
1153 dpavlin 42
1154     =head2 edit_doc
1155    
1156     Edit attributes of a document
1157    
1158     $node->edit_doc( $document_draft ) or die "can't edit document";
1159    
1160     Return true on success or false on failture.
1161    
1162     =cut
1163    
1164     sub edit_doc {
1165     my $self = shift;
1166     my $doc = shift || return;
1167 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1168 dpavlin 151 if ($self->shuttle_url( $self->{url} . '/edit_doc',
1169 dpavlin 42 'text/x-estraier-draft',
1170     $doc->dump_draft,
1171     undef
1172 dpavlin 151 ) == 200) {
1173     $self->_clear_info;
1174     return 1;
1175     }
1176     return undef;
1177 dpavlin 42 }
1178    
1179    
1180 dpavlin 43 =head2 get_doc
1181    
1182     Retreive document
1183    
1184     my $doc = $node->get_doc( document_id ) or die "can't get document";
1185    
1186     Return true on success or false on failture.
1187    
1188     =cut
1189    
1190     sub get_doc {
1191     my $self = shift;
1192     my $id = shift || return;
1193     return $self->_fetch_doc( id => $id );
1194     }
1195    
1196 dpavlin 44
1197 dpavlin 43 =head2 get_doc_by_uri
1198    
1199     Retreive document
1200    
1201 dpavlin 45 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1202 dpavlin 43
1203     Return true on success or false on failture.
1204    
1205     =cut
1206    
1207     sub get_doc_by_uri {
1208     my $self = shift;
1209     my $uri = shift || return;
1210     return $self->_fetch_doc( uri => $uri );
1211     }
1212    
1213 dpavlin 44
1214 dpavlin 49 =head2 get_doc_attr
1215    
1216     Retrieve the value of an atribute from object
1217    
1218     my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1219     die "can't get document attribute";
1220    
1221     =cut
1222    
1223     sub get_doc_attr {
1224     my $self = shift;
1225     my ($id,$name) = @_;
1226     return unless ($id && $name);
1227     return $self->_fetch_doc( id => $id, attr => $name );
1228     }
1229    
1230    
1231     =head2 get_doc_attr_by_uri
1232    
1233     Retrieve the value of an atribute from object
1234    
1235     my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1236     die "can't get document attribute";
1237    
1238     =cut
1239    
1240     sub get_doc_attr_by_uri {
1241     my $self = shift;
1242     my ($uri,$name) = @_;
1243     return unless ($uri && $name);
1244     return $self->_fetch_doc( uri => $uri, attr => $name );
1245     }
1246    
1247    
1248 dpavlin 44 =head2 etch_doc
1249    
1250     Exctract document keywords
1251    
1252     my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1253    
1254     =cut
1255    
1256 dpavlin 49 sub etch_doc {
1257 dpavlin 44 my $self = shift;
1258     my $id = shift || return;
1259     return $self->_fetch_doc( id => $id, etch => 1 );
1260     }
1261    
1262     =head2 etch_doc_by_uri
1263    
1264     Retreive document
1265    
1266 dpavlin 45 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1267 dpavlin 44
1268     Return true on success or false on failture.
1269    
1270     =cut
1271    
1272     sub etch_doc_by_uri {
1273     my $self = shift;
1274     my $uri = shift || return;
1275     return $self->_fetch_doc( uri => $uri, etch => 1 );
1276     }
1277    
1278    
1279 dpavlin 45 =head2 uri_to_id
1280    
1281     Get ID of document specified by URI
1282    
1283     my $id = $node->uri_to_id( 'file:///document/uri/42' );
1284    
1285 dpavlin 103 This method won't croak, even if using C<croak_on_error>.
1286    
1287 dpavlin 45 =cut
1288    
1289     sub uri_to_id {
1290     my $self = shift;
1291     my $uri = shift || return;
1292 dpavlin 103 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1293 dpavlin 45 }
1294    
1295    
1296 dpavlin 43 =head2 _fetch_doc
1297    
1298 dpavlin 44 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1299     C<etch_doc>, C<etch_doc_by_uri>.
1300 dpavlin 43
1301 dpavlin 45 # this will decode received draft into Search::Estraier::Document object
1302     my $doc = $node->_fetch_doc( id => 42 );
1303     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1304 dpavlin 43
1305 dpavlin 45 # to extract keywords, add etch
1306     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1307     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1308    
1309 dpavlin 49 # to get document attrubute add attr
1310     my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1311     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1312    
1313 dpavlin 45 # more general form which allows implementation of
1314     # uri_to_id
1315     my $id = $node->_fetch_doc(
1316     uri => 'file:///document/uri/42',
1317     path => '/uri_to_id',
1318     chomp_resbody => 1
1319     );
1320    
1321 dpavlin 43 =cut
1322    
1323     sub _fetch_doc {
1324     my $self = shift;
1325 dpavlin 44 my $a = {@_};
1326     return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1327    
1328     my ($arg, $resbody);
1329    
1330 dpavlin 45 my $path = $a->{path} || '/get_doc';
1331 dpavlin 44 $path = '/etch_doc' if ($a->{etch});
1332    
1333     if ($a->{id}) {
1334     croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1335     $arg = 'id=' . $a->{id};
1336     } elsif ($a->{uri}) {
1337 dpavlin 50 $arg = 'uri=' . uri_escape($a->{uri});
1338 dpavlin 44 } else {
1339     confess "unhandled argument. Need id or uri.";
1340 dpavlin 43 }
1341 dpavlin 44
1342 dpavlin 49 if ($a->{attr}) {
1343     $path = '/get_doc_attr';
1344     $arg .= '&attr=' . uri_escape($a->{attr});
1345     $a->{chomp_resbody} = 1;
1346     }
1347    
1348 dpavlin 44 my $rv = $self->shuttle_url( $self->{url} . $path,
1349 dpavlin 43 'application/x-www-form-urlencoded',
1350 dpavlin 44 $arg,
1351 dpavlin 45 \$resbody,
1352 dpavlin 103 $a->{croak_on_error},
1353 dpavlin 43 );
1354 dpavlin 44
1355 dpavlin 43 return if ($rv != 200);
1356 dpavlin 44
1357     if ($a->{etch}) {
1358     $self->{kwords} = {};
1359     return +{} unless ($resbody);
1360     foreach my $l (split(/\n/, $resbody)) {
1361     my ($k,$v) = split(/\t/, $l, 2);
1362     $self->{kwords}->{$k} = $v if ($v);
1363     }
1364     return $self->{kwords};
1365 dpavlin 45 } elsif ($a->{chomp_resbody}) {
1366     return unless (defined($resbody));
1367     chomp($resbody);
1368     return $resbody;
1369 dpavlin 44 } else {
1370     return new Search::Estraier::Document($resbody);
1371     }
1372 dpavlin 43 }
1373    
1374    
1375 dpavlin 48 =head2 name
1376 dpavlin 43
1377 dpavlin 48 my $node_name = $node->name;
1378 dpavlin 43
1379 dpavlin 48 =cut
1380    
1381     sub name {
1382     my $self = shift;
1383 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1384     return $self->{inform}->{name};
1385 dpavlin 48 }
1386    
1387    
1388     =head2 label
1389    
1390     my $node_label = $node->label;
1391    
1392     =cut
1393    
1394     sub label {
1395     my $self = shift;
1396 dpavlin 111 $self->_set_info unless ($self->{inform}->{label});
1397     return $self->{inform}->{label};
1398 dpavlin 48 }
1399    
1400    
1401     =head2 doc_num
1402    
1403     my $documents_in_node = $node->doc_num;
1404    
1405     =cut
1406    
1407     sub doc_num {
1408     my $self = shift;
1409 dpavlin 111 $self->_set_info if ($self->{inform}->{dnum} < 0);
1410     return $self->{inform}->{dnum};
1411 dpavlin 48 }
1412    
1413    
1414     =head2 word_num
1415    
1416     my $words_in_node = $node->word_num;
1417    
1418     =cut
1419    
1420     sub word_num {
1421     my $self = shift;
1422 dpavlin 111 $self->_set_info if ($self->{inform}->{wnum} < 0);
1423     return $self->{inform}->{wnum};
1424 dpavlin 48 }
1425    
1426    
1427     =head2 size
1428    
1429     my $node_size = $node->size;
1430    
1431     =cut
1432    
1433     sub size {
1434     my $self = shift;
1435 dpavlin 111 $self->_set_info if ($self->{inform}->{size} < 0);
1436     return $self->{inform}->{size};
1437 dpavlin 48 }
1438    
1439    
1440 dpavlin 51 =head2 search
1441 dpavlin 48
1442 dpavlin 51 Search documents which match condition
1443    
1444     my $nres = $node->search( $cond, $depth );
1445    
1446     C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1447     depth for meta search.
1448    
1449     Function results C<Search::Estraier::NodeResult> object.
1450    
1451     =cut
1452    
1453     sub search {
1454     my $self = shift;
1455     my ($cond, $depth) = @_;
1456     return unless ($cond && defined($depth) && $self->{url});
1457     croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1458     croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1459    
1460 dpavlin 52 my $resbody;
1461 dpavlin 51
1462 dpavlin 52 my $rv = $self->shuttle_url( $self->{url} . '/search',
1463 dpavlin 53 'application/x-www-form-urlencoded',
1464 dpavlin 61 $self->cond_to_query( $cond, $depth ),
1465 dpavlin 52 \$resbody,
1466     );
1467     return if ($rv != 200);
1468    
1469 dpavlin 126 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1470     my $hintsText = splice @records, 0, 2; # starts with empty record
1471     my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1472    
1473     # process records
1474 dpavlin 128 my $docs = [];
1475 dpavlin 126 foreach my $record (@records)
1476     {
1477     # split into keys and snippets
1478     my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1479    
1480     # create document hash
1481     my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1482     $doc->{'@keywords'} = $doc->{keywords};
1483     ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1484     $doc->{snippet} = $snippet;
1485    
1486     push @$docs, new Search::Estraier::ResultDocument(
1487     attrs => $doc,
1488     uri => $doc->{'@uri'},
1489     snippet => $snippet,
1490     keywords => $doc->{'keywords'},
1491     );
1492     }
1493    
1494     return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1495     }
1496    
1497    
1498 dpavlin 51 =head2 cond_to_query
1499    
1500 dpavlin 55 Return URI encoded string generated from Search::Estraier::Condition
1501    
1502 dpavlin 61 my $args = $node->cond_to_query( $cond, $depth );
1503 dpavlin 51
1504     =cut
1505    
1506     sub cond_to_query {
1507     my $self = shift;
1508    
1509     my $cond = shift || return;
1510     croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1511 dpavlin 61 my $depth = shift;
1512 dpavlin 51
1513     my @args;
1514    
1515     if (my $phrase = $cond->phrase) {
1516     push @args, 'phrase=' . uri_escape($phrase);
1517     }
1518    
1519     if (my @attrs = $cond->attrs) {
1520     for my $i ( 0 .. $#attrs ) {
1521 dpavlin 63 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1522 dpavlin 51 }
1523     }
1524    
1525     if (my $order = $cond->order) {
1526     push @args, 'order=' . uri_escape($order);
1527     }
1528    
1529     if (my $max = $cond->max) {
1530     push @args, 'max=' . $max;
1531     } else {
1532     push @args, 'max=' . (1 << 30);
1533     }
1534    
1535     if (my $options = $cond->options) {
1536     push @args, 'options=' . $options;
1537     }
1538    
1539 dpavlin 61 push @args, 'depth=' . $depth if ($depth);
1540 dpavlin 51 push @args, 'wwidth=' . $self->{wwidth};
1541     push @args, 'hwidth=' . $self->{hwidth};
1542     push @args, 'awidth=' . $self->{awidth};
1543 dpavlin 155 push @args, 'skip=' . $cond->{skip} if ($cond->{skip});
1544 dpavlin 51
1545     return join('&', @args);
1546     }
1547    
1548    
1549 dpavlin 33 =head2 shuttle_url
1550 dpavlin 32
1551 dpavlin 68 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1552 dpavlin 33 master.
1553 dpavlin 2
1554 dpavlin 52 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1555 dpavlin 2
1556 dpavlin 33 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1557     body will be saved within object.
1558 dpavlin 2
1559     =cut
1560    
1561 dpavlin 59 use LWP::UserAgent;
1562    
1563 dpavlin 33 sub shuttle_url {
1564     my $self = shift;
1565 dpavlin 2
1566 dpavlin 103 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1567 dpavlin 2
1568 dpavlin 103 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1569    
1570 dpavlin 40 $self->{status} = -1;
1571 dpavlin 33
1572 dpavlin 41 warn "## $url\n" if ($self->{debug});
1573 dpavlin 36
1574 dpavlin 33 $url = new URI($url);
1575 dpavlin 37 if (
1576     !$url || !$url->scheme || !$url->scheme eq 'http' ||
1577     !$url->host || !$url->port || $url->port < 1
1578     ) {
1579     carp "can't parse $url\n";
1580     return -1;
1581     }
1582 dpavlin 33
1583 dpavlin 59 my $ua = LWP::UserAgent->new;
1584     $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1585 dpavlin 33
1586 dpavlin 59 my $req;
1587 dpavlin 37 if ($reqbody) {
1588 dpavlin 59 $req = HTTP::Request->new(POST => $url);
1589 dpavlin 37 } else {
1590 dpavlin 59 $req = HTTP::Request->new(GET => $url);
1591 dpavlin 37 }
1592    
1593 dpavlin 59 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1594     $req->headers->header( 'Connection', 'close' );
1595 dpavlin 77 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1596 dpavlin 59 $req->content_type( $content_type );
1597 dpavlin 37
1598 dpavlin 59 warn $req->headers->as_string,"\n" if ($self->{debug});
1599 dpavlin 2
1600 dpavlin 37 if ($reqbody) {
1601 dpavlin 41 warn "$reqbody\n" if ($self->{debug});
1602 dpavlin 59 $req->content( $reqbody );
1603 dpavlin 33 }
1604 dpavlin 2
1605 dpavlin 59 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1606 dpavlin 2
1607 dpavlin 59 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1608 dpavlin 2
1609 dpavlin 76 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1610    
1611 dpavlin 78 if (! $res->is_success) {
1612 dpavlin 103 if ($croak_on_error) {
1613 dpavlin 78 croak("can't get $url: ",$res->status_line);
1614     } else {
1615     return -1;
1616     }
1617     }
1618 dpavlin 2
1619 dpavlin 59 $$resbody .= $res->content;
1620    
1621 dpavlin 40 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1622 dpavlin 39
1623 dpavlin 40 return $self->{status};
1624 dpavlin 2 }
1625    
1626 dpavlin 48
1627 dpavlin 55 =head2 set_snippet_width
1628 dpavlin 48
1629 dpavlin 55 Set width of snippets in results
1630    
1631     $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1632    
1633     C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1634     is not sent with results. If it is negative, whole document text is sent instead of snippet.
1635    
1636     C<$hwidth> specified width of strings from beginning of string. Default
1637     value is C<96>. Negative or zero value keep previous value.
1638    
1639     C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1640     If negative of zero value is provided previous value is kept unchanged.
1641    
1642     =cut
1643    
1644     sub set_snippet_width {
1645     my $self = shift;
1646    
1647     my ($wwidth, $hwidth, $awidth) = @_;
1648     $self->{wwidth} = $wwidth;
1649     $self->{hwidth} = $hwidth if ($hwidth >= 0);
1650     $self->{awidth} = $awidth if ($awidth >= 0);
1651     }
1652    
1653    
1654 dpavlin 56 =head2 set_user
1655 dpavlin 55
1656 dpavlin 56 Manage users of node
1657    
1658     $node->set_user( 'name', $mode );
1659    
1660     C<$mode> can be one of:
1661    
1662     =over 4
1663    
1664     =item 0
1665    
1666     delete account
1667    
1668     =item 1
1669    
1670     set administrative right for user
1671    
1672     =item 2
1673    
1674     set user account as guest
1675    
1676     =back
1677    
1678     Return true on success, otherwise false.
1679    
1680     =cut
1681    
1682     sub set_user {
1683     my $self = shift;
1684     my ($name, $mode) = @_;
1685    
1686     return unless ($self->{url});
1687     croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1688    
1689     $self->shuttle_url( $self->{url} . '/_set_user',
1690 dpavlin 154 'application/x-www-form-urlencoded',
1691 dpavlin 56 'name=' . uri_escape($name) . '&mode=' . $mode,
1692     undef
1693     ) == 200;
1694     }
1695    
1696    
1697 dpavlin 57 =head2 set_link
1698    
1699     Manage node links
1700    
1701     $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1702    
1703     If C<$credit> is negative, link is removed.
1704    
1705     =cut
1706    
1707     sub set_link {
1708     my $self = shift;
1709     my ($url, $label, $credit) = @_;
1710    
1711     return unless ($self->{url});
1712     croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1713    
1714     my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1715     $reqbody .= '&credit=' . $credit if ($credit > 0);
1716    
1717 dpavlin 107 if ($self->shuttle_url( $self->{url} . '/_set_link',
1718 dpavlin 71 'application/x-www-form-urlencoded',
1719 dpavlin 57 $reqbody,
1720     undef
1721 dpavlin 107 ) == 200) {
1722     # refresh node info after adding link
1723 dpavlin 151 $self->_clear_info;
1724 dpavlin 107 return 1;
1725     }
1726 dpavlin 150 return undef;
1727 dpavlin 57 }
1728    
1729 dpavlin 107 =head2 admins
1730 dpavlin 57
1731 dpavlin 107 my @admins = @{ $node->admins };
1732    
1733     Return array of users with admin rights on node
1734    
1735     =cut
1736    
1737     sub admins {
1738     my $self = shift;
1739 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1740     return $self->{inform}->{admins};
1741 dpavlin 107 }
1742    
1743     =head2 guests
1744    
1745     my @guests = @{ $node->guests };
1746    
1747     Return array of users with guest rights on node
1748    
1749     =cut
1750    
1751     sub guests {
1752     my $self = shift;
1753 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1754     return $self->{inform}->{guests};
1755 dpavlin 107 }
1756    
1757     =head2 links
1758    
1759     my $links = @{ $node->links };
1760    
1761     Return array of links for this node
1762    
1763     =cut
1764    
1765     sub links {
1766     my $self = shift;
1767 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1768     return $self->{inform}->{links};
1769 dpavlin 107 }
1770    
1771 dpavlin 160 =head2 cacheusage
1772    
1773     Return cache usage for a node
1774    
1775     my $cache = $node->cacheusage;
1776    
1777     =cut
1778    
1779     sub cacheusage {
1780     my $self = shift;
1781    
1782     return unless ($self->{url});
1783    
1784     my $resbody;
1785     my $rv = $self->shuttle_url( $self->{url} . '/cacheusage',
1786     'text/plain',
1787     undef,
1788     \$resbody,
1789     );
1790    
1791     return if ($rv != 200 || !$resbody);
1792    
1793     return $resbody;
1794     }
1795    
1796 dpavlin 134 =head2 master
1797 dpavlin 107
1798 dpavlin 134 Set actions on Hyper Estraier node master (C<estmaster> process)
1799    
1800     $node->master(
1801     action => 'sync'
1802     );
1803    
1804     All available actions are documented in
1805     L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1806    
1807     =cut
1808    
1809     my $estmaster_rest = {
1810     shutdown => {
1811     status => 202,
1812     },
1813     sync => {
1814     status => 202,
1815     },
1816     backup => {
1817     status => 202,
1818     },
1819     userlist => {
1820     status => 200,
1821 dpavlin 135 returns => [ qw/name passwd flags fname misc/ ],
1822 dpavlin 134 },
1823     useradd => {
1824 dpavlin 135 required => [ qw/name passwd flags/ ],
1825     optional => [ qw/fname misc/ ],
1826 dpavlin 134 status => 200,
1827     },
1828     userdel => {
1829 dpavlin 135 required => [ qw/name/ ],
1830 dpavlin 134 status => 200,
1831     },
1832     nodelist => {
1833     status => 200,
1834 dpavlin 135 returns => [ qw/name label doc_num word_num size/ ],
1835 dpavlin 134 },
1836     nodeadd => {
1837 dpavlin 135 required => [ qw/name/ ],
1838     optional => [ qw/label/ ],
1839 dpavlin 134 status => 200,
1840     },
1841     nodedel => {
1842 dpavlin 135 required => [ qw/name/ ],
1843 dpavlin 134 status => 200,
1844     },
1845     nodeclr => {
1846 dpavlin 135 required => [ qw/name/ ],
1847 dpavlin 134 status => 200,
1848     },
1849     nodertt => {
1850     status => 200,
1851     },
1852     };
1853    
1854     sub master {
1855     my $self = shift;
1856    
1857     my $args = {@_};
1858    
1859     # have action?
1860     my $action = $args->{action} || croak "need action, available: ",
1861     join(", ",keys %{ $estmaster_rest });
1862    
1863     # check if action is valid
1864     my $rest = $estmaster_rest->{$action};
1865     croak "action '$action' is not supported, available actions: ",
1866     join(", ",keys %{ $estmaster_rest }) unless ($rest);
1867    
1868     croak "BUG: action '$action' needs return status" unless ($rest->{status});
1869    
1870     my @args;
1871    
1872     if ($rest->{required} || $rest->{optional}) {
1873    
1874     map {
1875     croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1876     push @args, $_ . '=' . uri_escape( $args->{$_} );
1877 dpavlin 136 } ( @{ $rest->{required} } );
1878 dpavlin 134
1879     map {
1880     push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1881 dpavlin 136 } ( @{ $rest->{optional} } );
1882 dpavlin 134
1883     }
1884    
1885     my $uri = new URI( $self->{url} );
1886    
1887     my $resbody;
1888    
1889 dpavlin 135 my $status = $self->shuttle_url(
1890 dpavlin 134 'http://' . $uri->host_port . '/master?action=' . $action ,
1891     'application/x-www-form-urlencoded',
1892     join('&', @args),
1893     \$resbody,
1894     1,
1895 dpavlin 135 ) or confess "shuttle_url failed";
1896 dpavlin 134
1897 dpavlin 135 if ($status == $rest->{status}) {
1898 dpavlin 149
1899     # refresh node info after sync
1900 dpavlin 151 $self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/);
1901 dpavlin 149
1902 dpavlin 135 if ($rest->{returns} && wantarray) {
1903 dpavlin 134
1904     my @results;
1905 dpavlin 135 my $fields = $#{$rest->{returns}};
1906 dpavlin 134
1907     foreach my $line ( split(/[\r\n]/,$resbody) ) {
1908 dpavlin 135 my @e = split(/\t/, $line, $fields + 1);
1909 dpavlin 134 my $row;
1910 dpavlin 135 foreach my $i ( 0 .. $fields) {
1911     $row->{ $rest->{returns}->[$i] } = $e[ $i ];
1912     }
1913 dpavlin 134 push @results, $row;
1914     }
1915    
1916     return @results;
1917    
1918 dpavlin 135 } elsif ($resbody) {
1919 dpavlin 136 chomp $resbody;
1920 dpavlin 134 return $resbody;
1921 dpavlin 135 } else {
1922     return 0E0;
1923 dpavlin 134 }
1924     }
1925 dpavlin 135
1926     carp "expected status $rest->{status}, but got $status";
1927     return undef;
1928 dpavlin 134 }
1929    
1930 dpavlin 55 =head1 PRIVATE METHODS
1931    
1932     You could call those directly, but you don't have to. I hope.
1933    
1934     =head2 _set_info
1935    
1936 dpavlin 48 Set information for node
1937    
1938 dpavlin 55 $node->_set_info;
1939 dpavlin 48
1940     =cut
1941    
1942 dpavlin 55 sub _set_info {
1943 dpavlin 48 my $self = shift;
1944    
1945     $self->{status} = -1;
1946     return unless ($self->{url});
1947    
1948     my $resbody;
1949     my $rv = $self->shuttle_url( $self->{url} . '/inform',
1950     'text/plain',
1951     undef,
1952     \$resbody,
1953     );
1954    
1955     return if ($rv != 200 || !$resbody);
1956    
1957 dpavlin 107 my @lines = split(/[\r\n]/,$resbody);
1958 dpavlin 48
1959 dpavlin 151 $self->_clear_info;
1960 dpavlin 111
1961     ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
1962     $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
1963    
1964 dpavlin 107 return $resbody unless (@lines);
1965    
1966     shift @lines;
1967    
1968     while(my $admin = shift @lines) {
1969 dpavlin 111 push @{$self->{inform}->{admins}}, $admin;
1970 dpavlin 107 }
1971 dpavlin 111
1972 dpavlin 107 while(my $guest = shift @lines) {
1973 dpavlin 111 push @{$self->{inform}->{guests}}, $guest;
1974 dpavlin 107 }
1975    
1976     while(my $link = shift @lines) {
1977 dpavlin 111 push @{$self->{inform}->{links}}, $link;
1978 dpavlin 107 }
1979    
1980     return $resbody;
1981    
1982 dpavlin 48 }
1983    
1984 dpavlin 151 =head2 _clear_info
1985    
1986     Clear information for node
1987    
1988     $node->_clear_info;
1989    
1990     On next call to C<name>, C<label>, C<doc_num>, C<word_num> or C<size> node
1991     info will be fetch again from Hyper Estraier.
1992    
1993     =cut
1994     sub _clear_info {
1995     my $self = shift;
1996     $self->{inform} = {
1997     dnum => -1,
1998     wnum => -1,
1999     size => -1.0,
2000     };
2001     }
2002    
2003 dpavlin 2 ###
2004    
2005     =head1 EXPORT
2006    
2007     Nothing.
2008    
2009     =head1 SEE ALSO
2010    
2011     L<http://hyperestraier.sourceforge.net/>
2012    
2013     Hyper Estraier Ruby interface on which this module is based.
2014    
2015 dpavlin 164 Hyper Estraier now also has pure-perl binding included in distribution. It's
2016     a faster way to access databases directly if you are not running
2017     C<estmaster> P2P server.
2018    
2019 dpavlin 2 =head1 AUTHOR
2020    
2021     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
2022    
2023 dpavlin 128 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
2024 dpavlin 2
2025     =head1 COPYRIGHT AND LICENSE
2026    
2027 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
2028 dpavlin 2
2029     This library is free software; you can redistribute it and/or modify
2030     it under the GPL v2 or later.
2031    
2032     =cut
2033    
2034     1;

  ViewVC Help
Powered by ViewVC 1.1.26