/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 160 - (hide annotations)
Sat Jun 24 15:34:42 2006 UTC (17 years, 9 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 35771 byte(s)
added cacheusage, version bumped to 0.07_2
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7 dpavlin 160 our $VERSION = '0.07_2';
8 dpavlin 2
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15 dpavlin 68 =head2 Simple indexer
16 dpavlin 2
17 dpavlin 68 use Search::Estraier;
18    
19     # create and configure node
20 dpavlin 102 my $node = new Search::Estraier::Node(
21     url => 'http://localhost:1978/node/test',
22     user => 'admin',
23 dpavlin 139 passwd => 'admin',
24     create => 1,
25     label => 'Label for node',
26     croak_on_error => 1,
27 dpavlin 102 );
28 dpavlin 68
29     # create document
30     my $doc = new Search::Estraier::Document;
31    
32     # add attributes
33     $doc->add_attr('@uri', "http://estraier.gov/example.txt");
34     $doc->add_attr('@title', "Over the Rainbow");
35    
36     # add body text to document
37     $doc->add_text("Somewhere over the rainbow. Way up high.");
38     $doc->add_text("There's a land that I heard of once in a lullaby.");
39    
40 dpavlin 102 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
41 dpavlin 68
42     =head2 Simple searcher
43    
44     use Search::Estraier;
45    
46     # create and configure node
47 dpavlin 102 my $node = new Search::Estraier::Node(
48     url => 'http://localhost:1978/node/test',
49     user => 'admin',
50     passwd => 'admin',
51     croak_on_error => 1,
52     );
53 dpavlin 68
54     # create condition
55     my $cond = new Search::Estraier::Condition;
56    
57     # set search phrase
58     $cond->set_phrase("rainbow AND lullaby");
59    
60     my $nres = $node->search($cond, 0);
61 dpavlin 100
62 dpavlin 68 if (defined($nres)) {
63 dpavlin 102 print "Got ", $nres->hits, " results\n";
64    
65 dpavlin 68 # for each document in results
66     for my $i ( 0 ... $nres->doc_num - 1 ) {
67     # get result document
68     my $rdoc = $nres->get_doc($i);
69     # display attribte
70     print "URI: ", $rdoc->attr('@uri'),"\n";
71     print "Title: ", $rdoc->attr('@title'),"\n";
72     print $rdoc->snippet,"\n";
73     }
74     } else {
75     die "error: ", $node->status,"\n";
76     }
77    
78 dpavlin 2 =head1 DESCRIPTION
79    
80     This module is implementation of node API of Hyper Estraier. Since it's
81     perl-only module with dependencies only on standard perl modules, it will
82     run on all platforms on which perl runs. It doesn't require compilation
83     or Hyper Estraier development files on target machine.
84    
85     It is implemented as multiple packages which closly resamble Ruby
86     implementation. It also includes methods to manage nodes.
87    
88 dpavlin 68 There are few examples in C<scripts> directory of this distribution.
89    
90 dpavlin 2 =cut
91    
92 dpavlin 42 =head1 Inheritable common methods
93    
94     This methods should really move somewhere else.
95    
96 dpavlin 15 =head2 _s
97    
98     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
99    
100     my $text = $self->_s(" this is a text ");
101     $text = 'this is a text';
102    
103     =cut
104    
105     sub _s {
106 dpavlin 96 my $text = $_[1];
107     return unless defined($text);
108 dpavlin 15 $text =~ s/\s\s+/ /gs;
109     $text =~ s/^\s+//;
110     $text =~ s/\s+$//;
111     return $text;
112     }
113    
114 dpavlin 2 package Search::Estraier::Document;
115    
116 dpavlin 9 use Carp qw/croak confess/;
117 dpavlin 7
118 dpavlin 15 use Search::Estraier;
119     our @ISA = qw/Search::Estraier/;
120    
121 dpavlin 2 =head1 Search::Estraier::Document
122    
123 dpavlin 14 This class implements Document which is collection of attributes
124     (key=value), vectors (also key value) display text and hidden text.
125    
126 dpavlin 42
127 dpavlin 2 =head2 new
128    
129 dpavlin 14 Create new document, empty or from draft.
130    
131 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
132 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
133 dpavlin 2
134     =cut
135    
136     sub new {
137     my $class = shift;
138 dpavlin 14 my $self = {};
139 dpavlin 2 bless($self, $class);
140    
141 dpavlin 6 $self->{id} = -1;
142    
143 dpavlin 14 my $draft = shift;
144    
145     if ($draft) {
146     my $in_text = 0;
147     foreach my $line (split(/\n/, $draft)) {
148    
149     if ($in_text) {
150     if ($line =~ /^\t/) {
151     push @{ $self->{htexts} }, substr($line, 1);
152     } else {
153     push @{ $self->{dtexts} }, $line;
154     }
155     next;
156     }
157    
158     if ($line =~ m/^%VECTOR\t(.+)$/) {
159     my @fields = split(/\t/, $1);
160     for my $i ( 0 .. ($#fields - 1) ) {
161     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
162     $i++;
163     }
164     next;
165     } elsif ($line =~ m/^%/) {
166     # What is this? comment?
167     #warn "$line\n";
168     next;
169     } elsif ($line =~ m/^$/) {
170     $in_text = 1;
171     next;
172 dpavlin 81 } elsif ($line =~ m/^(.+)=(.*)$/) {
173 dpavlin 14 $self->{attrs}->{ $1 } = $2;
174     next;
175     }
176    
177 dpavlin 81 warn "draft ignored: '$line'\n";
178 dpavlin 14 }
179     }
180    
181 dpavlin 2 $self ? return $self : return undef;
182     }
183    
184 dpavlin 4
185 dpavlin 2 =head2 add_attr
186    
187 dpavlin 6 Add an attribute.
188    
189 dpavlin 2 $doc->add_attr( name => 'value' );
190    
191 dpavlin 9 Delete attribute using
192 dpavlin 5
193     $doc->add_attr( name => undef );
194    
195 dpavlin 2 =cut
196    
197     sub add_attr {
198     my $self = shift;
199     my $attrs = {@_};
200    
201     while (my ($name, $value) = each %{ $attrs }) {
202 dpavlin 9 if (! defined($value)) {
203 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
204 dpavlin 9 } else {
205 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
206 dpavlin 9 }
207 dpavlin 2 }
208 dpavlin 8
209     return 1;
210 dpavlin 2 }
211    
212 dpavlin 5
213     =head2 add_text
214    
215 dpavlin 6 Add a sentence of text.
216    
217 dpavlin 5 $doc->add_text('this is example text to display');
218    
219     =cut
220    
221     sub add_text {
222     my $self = shift;
223     my $text = shift;
224     return unless defined($text);
225    
226 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
227 dpavlin 5 }
228    
229    
230     =head2 add_hidden_text
231    
232 dpavlin 6 Add a hidden sentence.
233    
234 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
235    
236     =cut
237    
238     sub add_hidden_text {
239     my $self = shift;
240     my $text = shift;
241     return unless defined($text);
242    
243 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
244 dpavlin 5 }
245    
246 dpavlin 42
247 dpavlin 6 =head2 id
248    
249     Get the ID number of document. If the object has never been registred, C<-1> is returned.
250    
251     print $doc->id;
252    
253     =cut
254    
255     sub id {
256     my $self = shift;
257     return $self->{id};
258     }
259    
260 dpavlin 42
261 dpavlin 7 =head2 attr_names
262    
263 dpavlin 9 Returns array with attribute names from document object.
264 dpavlin 7
265     my @attrs = $doc->attr_names;
266    
267     =cut
268    
269     sub attr_names {
270     my $self = shift;
271 dpavlin 60 return unless ($self->{attrs});
272     #croak "attr_names return array, not scalar" if (! wantarray);
273 dpavlin 7 return sort keys %{ $self->{attrs} };
274     }
275    
276 dpavlin 8
277     =head2 attr
278    
279 dpavlin 9 Returns value of an attribute.
280 dpavlin 8
281     my $value = $doc->attr( 'attribute' );
282    
283     =cut
284    
285     sub attr {
286     my $self = shift;
287     my $name = shift;
288 dpavlin 60 return unless (defined($name) && $self->{attrs});
289     return $self->{attrs}->{ $name };
290 dpavlin 8 }
291    
292 dpavlin 9
293     =head2 texts
294    
295     Returns array with text sentences.
296    
297     my @texts = $doc->texts;
298    
299     =cut
300    
301     sub texts {
302     my $self = shift;
303 dpavlin 60 #confess "texts return array, not scalar" if (! wantarray);
304     return @{ $self->{dtexts} } if ($self->{dtexts});
305 dpavlin 9 }
306    
307 dpavlin 42
308 dpavlin 12 =head2 cat_texts
309    
310     Return whole text as single scalar.
311    
312     my $text = $doc->cat_texts;
313    
314     =cut
315    
316     sub cat_texts {
317     my $self = shift;
318 dpavlin 60 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
319 dpavlin 12 }
320    
321 dpavlin 42
322 dpavlin 5 =head2 dump_draft
323    
324 dpavlin 13 Dump draft data from document object.
325    
326 dpavlin 5 print $doc->dump_draft;
327    
328     =cut
329    
330     sub dump_draft {
331 dpavlin 13 my $self = shift;
332     my $draft;
333    
334     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
335 dpavlin 97 next unless defined(my $v = $self->{attrs}->{$attr_name});
336 dpavlin 93 $draft .= $attr_name . '=' . $v . "\n";
337 dpavlin 13 }
338    
339     if ($self->{kwords}) {
340     $draft .= '%%VECTOR';
341     while (my ($key, $value) = each %{ $self->{kwords} }) {
342     $draft .= "\t$key\t$value";
343     }
344     $draft .= "\n";
345     }
346    
347     $draft .= "\n";
348    
349 dpavlin 40 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
350     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
351 dpavlin 13
352     return $draft;
353 dpavlin 5 }
354    
355 dpavlin 42
356 dpavlin 4 =head2 delete
357 dpavlin 2
358 dpavlin 4 Empty document object
359 dpavlin 2
360 dpavlin 4 $doc->delete;
361    
362 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
363     convinience. Document objects which go out of scope will be destroyed
364     automatically.
365    
366 dpavlin 4 =cut
367    
368     sub delete {
369     my $self = shift;
370    
371 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
372 dpavlin 5 delete($self->{$data});
373     }
374 dpavlin 4
375 dpavlin 10 $self->{id} = -1;
376    
377 dpavlin 4 return 1;
378     }
379    
380    
381    
382 dpavlin 15 package Search::Estraier::Condition;
383 dpavlin 4
384 dpavlin 98 use Carp qw/carp confess croak/;
385 dpavlin 16
386 dpavlin 15 use Search::Estraier;
387     our @ISA = qw/Search::Estraier/;
388 dpavlin 4
389 dpavlin 16 =head1 Search::Estraier::Condition
390    
391     =head2 new
392    
393     my $cond = new Search::HyperEstraier::Condition;
394    
395     =cut
396    
397     sub new {
398     my $class = shift;
399     my $self = {};
400     bless($self, $class);
401    
402 dpavlin 19 $self->{max} = -1;
403     $self->{options} = 0;
404    
405 dpavlin 16 $self ? return $self : return undef;
406     }
407    
408 dpavlin 42
409 dpavlin 16 =head2 set_phrase
410    
411     $cond->set_phrase('search phrase');
412    
413     =cut
414    
415     sub set_phrase {
416     my $self = shift;
417     $self->{phrase} = $self->_s( shift );
418     }
419    
420 dpavlin 42
421 dpavlin 16 =head2 add_attr
422    
423     $cond->add_attr('@URI STRINC /~dpavlin/');
424    
425     =cut
426    
427     sub add_attr {
428     my $self = shift;
429     my $attr = shift || return;
430     push @{ $self->{attrs} }, $self->_s( $attr );
431     }
432    
433 dpavlin 42
434 dpavlin 16 =head2 set_order
435    
436     $cond->set_order('@mdate NUMD');
437    
438     =cut
439    
440     sub set_order {
441     my $self = shift;
442     $self->{order} = shift;
443     }
444    
445 dpavlin 42
446 dpavlin 16 =head2 set_max
447    
448     $cond->set_max(42);
449    
450     =cut
451    
452     sub set_max {
453     my $self = shift;
454     my $max = shift;
455 dpavlin 43 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
456 dpavlin 16 $self->{max} = $max;
457     }
458    
459 dpavlin 42
460 dpavlin 16 =head2 set_options
461    
462 dpavlin 98 $cond->set_options( 'SURE' );
463 dpavlin 16
464 dpavlin 98 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
465    
466     Possible options are:
467    
468     =over 8
469    
470     =item SURE
471    
472     check every N-gram
473    
474     =item USUAL
475    
476     check every second N-gram
477    
478     =item FAST
479    
480     check every third N-gram
481    
482     =item AGITO
483    
484     check every fourth N-gram
485    
486     =item NOIDF
487    
488     don't perform TF-IDF tuning
489    
490     =item SIMPLE
491    
492     use simplified query phrase
493    
494     =back
495    
496     Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
497     options;
498    
499     This option changed in version C<0.04> of this module. It's backwards compatibile.
500    
501 dpavlin 16 =cut
502    
503 dpavlin 15 my $options = {
504     SURE => 1 << 0,
505     USUAL => 1 << 1,
506     FAST => 1 << 2,
507     AGITO => 1 << 3,
508     NOIDF => 1 << 4,
509     SIMPLE => 1 << 10,
510     };
511    
512 dpavlin 16 sub set_options {
513     my $self = shift;
514 dpavlin 98 my $opt = 0;
515     foreach my $option (@_) {
516     my $mask;
517     unless ($mask = $options->{$option}) {
518     if ($option eq '1') {
519     next;
520     } else {
521     croak "unknown option $option";
522     }
523     }
524     $opt += $mask;
525     }
526     $self->{options} = $opt;
527 dpavlin 4 }
528    
529 dpavlin 42
530 dpavlin 18 =head2 phrase
531    
532     Return search phrase.
533    
534     print $cond->phrase;
535    
536     =cut
537    
538     sub phrase {
539     my $self = shift;
540     return $self->{phrase};
541     }
542    
543 dpavlin 42
544 dpavlin 19 =head2 order
545 dpavlin 18
546 dpavlin 19 Return search result order.
547    
548     print $cond->order;
549    
550     =cut
551    
552     sub order {
553     my $self = shift;
554     return $self->{order};
555     }
556    
557 dpavlin 42
558 dpavlin 19 =head2 attrs
559    
560     Return search result attrs.
561    
562     my @cond_attrs = $cond->attrs;
563    
564     =cut
565    
566     sub attrs {
567     my $self = shift;
568     #croak "attrs return array, not scalar" if (! wantarray);
569 dpavlin 60 return @{ $self->{attrs} } if ($self->{attrs});
570 dpavlin 19 }
571    
572 dpavlin 42
573 dpavlin 19 =head2 max
574    
575     Return maximum number of results.
576    
577     print $cond->max;
578    
579     C<-1> is returned for unitialized value, C<0> is unlimited.
580    
581     =cut
582    
583     sub max {
584     my $self = shift;
585     return $self->{max};
586     }
587    
588 dpavlin 42
589 dpavlin 19 =head2 options
590    
591     Return options for this condition.
592    
593     print $cond->options;
594    
595     Options are returned in numerical form.
596    
597     =cut
598    
599     sub options {
600     my $self = shift;
601     return $self->{options};
602     }
603    
604    
605 dpavlin 116 =head2 set_skip
606    
607     Set number of skipped documents from beginning of results
608    
609     $cond->set_skip(42);
610    
611     Similar to C<offset> in RDBMS.
612    
613     =cut
614    
615     sub set_skip {
616     my $self = shift;
617     $self->{skip} = shift;
618     }
619    
620     =head2 skip
621    
622     Return skip for this condition.
623    
624     print $cond->skip;
625    
626     =cut
627    
628     sub skip {
629     my $self = shift;
630     return $self->{skip};
631     }
632    
633    
634 dpavlin 20 package Search::Estraier::ResultDocument;
635    
636 dpavlin 24 use Carp qw/croak/;
637 dpavlin 20
638 dpavlin 24 #use Search::Estraier;
639     #our @ISA = qw/Search::Estraier/;
640 dpavlin 20
641     =head1 Search::Estraier::ResultDocument
642    
643     =head2 new
644    
645 dpavlin 23 my $rdoc = new Search::HyperEstraier::ResultDocument(
646 dpavlin 20 uri => 'http://localhost/document/uri/42',
647     attrs => {
648     foo => 1,
649     bar => 2,
650     },
651     snippet => 'this is a text of snippet'
652     keywords => 'this\tare\tkeywords'
653     );
654    
655     =cut
656    
657     sub new {
658     my $class = shift;
659     my $self = {@_};
660     bless($self, $class);
661    
662 dpavlin 62 croak "missing uri for ResultDocument" unless defined($self->{uri});
663 dpavlin 20
664     $self ? return $self : return undef;
665     }
666    
667 dpavlin 42
668 dpavlin 23 =head2 uri
669 dpavlin 20
670 dpavlin 23 Return URI of result document
671 dpavlin 20
672 dpavlin 23 print $rdoc->uri;
673    
674     =cut
675    
676     sub uri {
677     my $self = shift;
678     return $self->{uri};
679     }
680    
681    
682     =head2 attr_names
683    
684     Returns array with attribute names from result document object.
685    
686     my @attrs = $rdoc->attr_names;
687    
688     =cut
689    
690     sub attr_names {
691     my $self = shift;
692     croak "attr_names return array, not scalar" if (! wantarray);
693     return sort keys %{ $self->{attrs} };
694     }
695    
696 dpavlin 42
697 dpavlin 23 =head2 attr
698    
699     Returns value of an attribute.
700    
701     my $value = $rdoc->attr( 'attribute' );
702    
703     =cut
704    
705     sub attr {
706     my $self = shift;
707     my $name = shift || return;
708     return $self->{attrs}->{ $name };
709     }
710    
711 dpavlin 42
712 dpavlin 23 =head2 snippet
713    
714     Return snippet from result document
715    
716     print $rdoc->snippet;
717    
718     =cut
719    
720     sub snippet {
721     my $self = shift;
722     return $self->{snippet};
723     }
724    
725 dpavlin 42
726 dpavlin 23 =head2 keywords
727    
728     Return keywords from result document
729    
730     print $rdoc->keywords;
731    
732     =cut
733    
734     sub keywords {
735     my $self = shift;
736     return $self->{keywords};
737     }
738    
739    
740 dpavlin 25 package Search::Estraier::NodeResult;
741    
742     use Carp qw/croak/;
743    
744     #use Search::Estraier;
745     #our @ISA = qw/Search::Estraier/;
746    
747     =head1 Search::Estraier::NodeResult
748    
749     =head2 new
750    
751     my $res = new Search::HyperEstraier::NodeResult(
752     docs => @array_of_rdocs,
753     hits => %hash_with_hints,
754     );
755    
756     =cut
757    
758     sub new {
759     my $class = shift;
760     my $self = {@_};
761     bless($self, $class);
762    
763     foreach my $f (qw/docs hints/) {
764     croak "missing $f for ResultDocument" unless defined($self->{$f});
765     }
766    
767     $self ? return $self : return undef;
768     }
769    
770 dpavlin 42
771 dpavlin 25 =head2 doc_num
772    
773     Return number of documents
774    
775     print $res->doc_num;
776    
777 dpavlin 100 This will return real number of documents (limited by C<max>).
778     If you want to get total number of hits, see C<hits>.
779    
780 dpavlin 25 =cut
781    
782     sub doc_num {
783     my $self = shift;
784 dpavlin 53 return $#{$self->{docs}} + 1;
785 dpavlin 25 }
786    
787 dpavlin 42
788 dpavlin 25 =head2 get_doc
789    
790     Return single document
791    
792     my $doc = $res->get_doc( 42 );
793    
794     Returns undef if document doesn't exist.
795    
796     =cut
797    
798     sub get_doc {
799     my $self = shift;
800     my $num = shift;
801 dpavlin 43 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
802 dpavlin 25 return undef if ($num < 0 || $num > $self->{docs});
803     return $self->{docs}->[$num];
804     }
805    
806 dpavlin 42
807 dpavlin 25 =head2 hint
808    
809     Return specific hint from results.
810    
811 dpavlin 100 print $res->hint( 'VERSION' );
812 dpavlin 25
813     Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
814     C<TIME>, C<LINK#n>, C<VIEW>.
815    
816     =cut
817    
818     sub hint {
819     my $self = shift;
820     my $key = shift || return;
821     return $self->{hints}->{$key};
822     }
823    
824 dpavlin 108 =head2 hints
825 dpavlin 25
826 dpavlin 91 More perlish version of C<hint>. This one returns hash.
827    
828 dpavlin 100 my %hints = $res->hints;
829 dpavlin 91
830     =cut
831    
832     sub hints {
833     my $self = shift;
834     return $self->{hints};
835     }
836    
837 dpavlin 100 =head2 hits
838    
839     Syntaxtic sugar for total number of hits for this query
840    
841     print $res->hits;
842    
843     It's same as
844    
845     print $res->hint('HIT');
846    
847     but shorter.
848    
849     =cut
850    
851     sub hits {
852     my $self = shift;
853     return $self->{hints}->{'HIT'} || 0;
854     }
855    
856 dpavlin 27 package Search::Estraier::Node;
857    
858 dpavlin 44 use Carp qw/carp croak confess/;
859 dpavlin 33 use URI;
860 dpavlin 36 use MIME::Base64;
861 dpavlin 33 use IO::Socket::INET;
862 dpavlin 49 use URI::Escape qw/uri_escape/;
863 dpavlin 29
864 dpavlin 27 =head1 Search::Estraier::Node
865    
866     =head2 new
867    
868     my $node = new Search::HyperEstraier::Node;
869    
870 dpavlin 65 or optionally with C<url> as parametar
871    
872     my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
873    
874 dpavlin 78 or in more verbose form
875    
876     my $node = new Search::HyperEstraier::Node(
877     url => 'http://localhost:1978/node/test',
878 dpavlin 132 user => 'admin',
879     passwd => 'admin'
880 dpavlin 139 create => 1,
881     label => 'optional node label',
882 dpavlin 78 debug => 1,
883     croak_on_error => 1
884     );
885    
886     with following arguments:
887    
888     =over 4
889    
890     =item url
891    
892     URL to node
893    
894 dpavlin 132 =item user
895    
896     specify username for node server authentication
897    
898     =item passwd
899    
900     password for authentication
901    
902 dpavlin 139 =item create
903    
904     create node if it doesn't exists
905    
906     =item label
907    
908     optional label for new node if C<create> is used
909    
910 dpavlin 78 =item debug
911    
912     dumps a B<lot> of debugging output
913    
914     =item croak_on_error
915    
916     very helpful during development. It will croak on all errors instead of
917     silently returning C<-1> (which is convention of Hyper Estraier API in other
918     languages).
919    
920     =back
921    
922 dpavlin 27 =cut
923    
924     sub new {
925     my $class = shift;
926     my $self = {
927     pxport => -1,
928 dpavlin 33 timeout => 0, # this used to be -1
929 dpavlin 27 wwidth => 480,
930     hwidth => 96,
931     awidth => 96,
932     status => -1,
933     };
934 dpavlin 111
935 dpavlin 27 bless($self, $class);
936    
937 dpavlin 65 if ($#_ == 0) {
938     $self->{url} = shift;
939     } else {
940 dpavlin 78 %$self = ( %$self, @_ );
941    
942 dpavlin 132 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
943    
944 dpavlin 65 warn "## Node debug on\n" if ($self->{debug});
945     }
946 dpavlin 57
947 dpavlin 111 $self->{inform} = {
948     dnum => -1,
949     wnum => -1,
950     size => -1.0,
951     };
952    
953 dpavlin 139 if ($self->{create}) {
954 dpavlin 140 if (! eval { $self->name } || $@) {
955 dpavlin 139 my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
956     croak "can't find node name in '$self->{url}'" unless ($name);
957     my $label = $self->{label} || $name;
958     $self->master(
959     action => 'nodeadd',
960     name => $name,
961     label => $label,
962     ) || croak "can't create node $name ($label)";
963     }
964     }
965    
966 dpavlin 27 $self ? return $self : return undef;
967     }
968    
969 dpavlin 42
970 dpavlin 29 =head2 set_url
971    
972     Specify URL to node server
973    
974     $node->set_url('http://localhost:1978');
975    
976     =cut
977    
978     sub set_url {
979     my $self = shift;
980     $self->{url} = shift;
981     }
982    
983 dpavlin 42
984 dpavlin 29 =head2 set_proxy
985    
986     Specify proxy server to connect to node server
987    
988     $node->set_proxy('proxy.example.com', 8080);
989    
990     =cut
991    
992     sub set_proxy {
993     my $self = shift;
994     my ($host,$port) = @_;
995 dpavlin 43 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
996 dpavlin 29 $self->{pxhost} = $host;
997     $self->{pxport} = $port;
998     }
999    
1000 dpavlin 42
1001 dpavlin 30 =head2 set_timeout
1002    
1003     Specify timeout of connection in seconds
1004    
1005     $node->set_timeout( 15 );
1006    
1007     =cut
1008    
1009     sub set_timeout {
1010     my $self = shift;
1011     my $sec = shift;
1012 dpavlin 43 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
1013 dpavlin 30 $self->{timeout} = $sec;
1014     }
1015    
1016 dpavlin 42
1017 dpavlin 31 =head2 set_auth
1018    
1019     Specify name and password for authentication to node server.
1020    
1021     $node->set_auth('clint','eastwood');
1022    
1023     =cut
1024    
1025     sub set_auth {
1026     my $self = shift;
1027     my ($login,$passwd) = @_;
1028 dpavlin 40 my $basic_auth = encode_base64( "$login:$passwd" );
1029     chomp($basic_auth);
1030     $self->{auth} = $basic_auth;
1031 dpavlin 31 }
1032    
1033 dpavlin 42
1034 dpavlin 32 =head2 status
1035    
1036     Return status code of last request.
1037    
1038 dpavlin 40 print $node->status;
1039 dpavlin 32
1040     C<-1> means connection failure.
1041    
1042     =cut
1043    
1044     sub status {
1045     my $self = shift;
1046     return $self->{status};
1047     }
1048    
1049 dpavlin 42
1050 dpavlin 40 =head2 put_doc
1051    
1052 dpavlin 41 Add a document
1053 dpavlin 40
1054 dpavlin 41 $node->put_doc( $document_draft ) or die "can't add document";
1055    
1056 dpavlin 151 Return true on success or false on failure.
1057 dpavlin 41
1058 dpavlin 40 =cut
1059    
1060     sub put_doc {
1061     my $self = shift;
1062     my $doc = shift || return;
1063 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1064 dpavlin 151 if ($self->shuttle_url( $self->{url} . '/put_doc',
1065 dpavlin 41 'text/x-estraier-draft',
1066     $doc->dump_draft,
1067     undef
1068 dpavlin 151 ) == 200) {
1069     $self->_clear_info;
1070     return 1;
1071     }
1072     return undef;
1073 dpavlin 40 }
1074    
1075 dpavlin 41
1076     =head2 out_doc
1077    
1078     Remove a document
1079    
1080     $node->out_doc( document_id ) or "can't remove document";
1081    
1082     Return true on success or false on failture.
1083    
1084     =cut
1085    
1086     sub out_doc {
1087     my $self = shift;
1088     my $id = shift || return;
1089     return unless ($self->{url});
1090 dpavlin 43 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1091 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1092 dpavlin 41 'application/x-www-form-urlencoded',
1093     "id=$id",
1094     undef
1095 dpavlin 150 ) == 200) {
1096 dpavlin 151 $self->_clear_info;
1097     return 1;
1098 dpavlin 150 }
1099     return undef;
1100 dpavlin 41 }
1101    
1102    
1103     =head2 out_doc_by_uri
1104    
1105     Remove a registrated document using it's uri
1106    
1107 dpavlin 45 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1108 dpavlin 41
1109     Return true on success or false on failture.
1110    
1111     =cut
1112    
1113     sub out_doc_by_uri {
1114     my $self = shift;
1115     my $uri = shift || return;
1116     return unless ($self->{url});
1117 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1118 dpavlin 41 'application/x-www-form-urlencoded',
1119 dpavlin 50 "uri=" . uri_escape($uri),
1120 dpavlin 41 undef
1121 dpavlin 150 ) == 200) {
1122 dpavlin 151 $self->_clear_info;
1123     return 1;
1124 dpavlin 150 }
1125     return undef;
1126 dpavlin 41 }
1127    
1128 dpavlin 42
1129     =head2 edit_doc
1130    
1131     Edit attributes of a document
1132    
1133     $node->edit_doc( $document_draft ) or die "can't edit document";
1134    
1135     Return true on success or false on failture.
1136    
1137     =cut
1138    
1139     sub edit_doc {
1140     my $self = shift;
1141     my $doc = shift || return;
1142 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1143 dpavlin 151 if ($self->shuttle_url( $self->{url} . '/edit_doc',
1144 dpavlin 42 'text/x-estraier-draft',
1145     $doc->dump_draft,
1146     undef
1147 dpavlin 151 ) == 200) {
1148     $self->_clear_info;
1149     return 1;
1150     }
1151     return undef;
1152 dpavlin 42 }
1153    
1154    
1155 dpavlin 43 =head2 get_doc
1156    
1157     Retreive document
1158    
1159     my $doc = $node->get_doc( document_id ) or die "can't get document";
1160    
1161     Return true on success or false on failture.
1162    
1163     =cut
1164    
1165     sub get_doc {
1166     my $self = shift;
1167     my $id = shift || return;
1168     return $self->_fetch_doc( id => $id );
1169     }
1170    
1171 dpavlin 44
1172 dpavlin 43 =head2 get_doc_by_uri
1173    
1174     Retreive document
1175    
1176 dpavlin 45 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1177 dpavlin 43
1178     Return true on success or false on failture.
1179    
1180     =cut
1181    
1182     sub get_doc_by_uri {
1183     my $self = shift;
1184     my $uri = shift || return;
1185     return $self->_fetch_doc( uri => $uri );
1186     }
1187    
1188 dpavlin 44
1189 dpavlin 49 =head2 get_doc_attr
1190    
1191     Retrieve the value of an atribute from object
1192    
1193     my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1194     die "can't get document attribute";
1195    
1196     =cut
1197    
1198     sub get_doc_attr {
1199     my $self = shift;
1200     my ($id,$name) = @_;
1201     return unless ($id && $name);
1202     return $self->_fetch_doc( id => $id, attr => $name );
1203     }
1204    
1205    
1206     =head2 get_doc_attr_by_uri
1207    
1208     Retrieve the value of an atribute from object
1209    
1210     my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1211     die "can't get document attribute";
1212    
1213     =cut
1214    
1215     sub get_doc_attr_by_uri {
1216     my $self = shift;
1217     my ($uri,$name) = @_;
1218     return unless ($uri && $name);
1219     return $self->_fetch_doc( uri => $uri, attr => $name );
1220     }
1221    
1222    
1223 dpavlin 44 =head2 etch_doc
1224    
1225     Exctract document keywords
1226    
1227     my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1228    
1229     =cut
1230    
1231 dpavlin 49 sub etch_doc {
1232 dpavlin 44 my $self = shift;
1233     my $id = shift || return;
1234     return $self->_fetch_doc( id => $id, etch => 1 );
1235     }
1236    
1237     =head2 etch_doc_by_uri
1238    
1239     Retreive document
1240    
1241 dpavlin 45 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1242 dpavlin 44
1243     Return true on success or false on failture.
1244    
1245     =cut
1246    
1247     sub etch_doc_by_uri {
1248     my $self = shift;
1249     my $uri = shift || return;
1250     return $self->_fetch_doc( uri => $uri, etch => 1 );
1251     }
1252    
1253    
1254 dpavlin 45 =head2 uri_to_id
1255    
1256     Get ID of document specified by URI
1257    
1258     my $id = $node->uri_to_id( 'file:///document/uri/42' );
1259    
1260 dpavlin 103 This method won't croak, even if using C<croak_on_error>.
1261    
1262 dpavlin 45 =cut
1263    
1264     sub uri_to_id {
1265     my $self = shift;
1266     my $uri = shift || return;
1267 dpavlin 103 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1268 dpavlin 45 }
1269    
1270    
1271 dpavlin 43 =head2 _fetch_doc
1272    
1273 dpavlin 44 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1274     C<etch_doc>, C<etch_doc_by_uri>.
1275 dpavlin 43
1276 dpavlin 45 # this will decode received draft into Search::Estraier::Document object
1277     my $doc = $node->_fetch_doc( id => 42 );
1278     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1279 dpavlin 43
1280 dpavlin 45 # to extract keywords, add etch
1281     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1282     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1283    
1284 dpavlin 49 # to get document attrubute add attr
1285     my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1286     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1287    
1288 dpavlin 45 # more general form which allows implementation of
1289     # uri_to_id
1290     my $id = $node->_fetch_doc(
1291     uri => 'file:///document/uri/42',
1292     path => '/uri_to_id',
1293     chomp_resbody => 1
1294     );
1295    
1296 dpavlin 43 =cut
1297    
1298     sub _fetch_doc {
1299     my $self = shift;
1300 dpavlin 44 my $a = {@_};
1301     return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1302    
1303     my ($arg, $resbody);
1304    
1305 dpavlin 45 my $path = $a->{path} || '/get_doc';
1306 dpavlin 44 $path = '/etch_doc' if ($a->{etch});
1307    
1308     if ($a->{id}) {
1309     croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1310     $arg = 'id=' . $a->{id};
1311     } elsif ($a->{uri}) {
1312 dpavlin 50 $arg = 'uri=' . uri_escape($a->{uri});
1313 dpavlin 44 } else {
1314     confess "unhandled argument. Need id or uri.";
1315 dpavlin 43 }
1316 dpavlin 44
1317 dpavlin 49 if ($a->{attr}) {
1318     $path = '/get_doc_attr';
1319     $arg .= '&attr=' . uri_escape($a->{attr});
1320     $a->{chomp_resbody} = 1;
1321     }
1322    
1323 dpavlin 44 my $rv = $self->shuttle_url( $self->{url} . $path,
1324 dpavlin 43 'application/x-www-form-urlencoded',
1325 dpavlin 44 $arg,
1326 dpavlin 45 \$resbody,
1327 dpavlin 103 $a->{croak_on_error},
1328 dpavlin 43 );
1329 dpavlin 44
1330 dpavlin 43 return if ($rv != 200);
1331 dpavlin 44
1332     if ($a->{etch}) {
1333     $self->{kwords} = {};
1334     return +{} unless ($resbody);
1335     foreach my $l (split(/\n/, $resbody)) {
1336     my ($k,$v) = split(/\t/, $l, 2);
1337     $self->{kwords}->{$k} = $v if ($v);
1338     }
1339     return $self->{kwords};
1340 dpavlin 45 } elsif ($a->{chomp_resbody}) {
1341     return unless (defined($resbody));
1342     chomp($resbody);
1343     return $resbody;
1344 dpavlin 44 } else {
1345     return new Search::Estraier::Document($resbody);
1346     }
1347 dpavlin 43 }
1348    
1349    
1350 dpavlin 48 =head2 name
1351 dpavlin 43
1352 dpavlin 48 my $node_name = $node->name;
1353 dpavlin 43
1354 dpavlin 48 =cut
1355    
1356     sub name {
1357     my $self = shift;
1358 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1359     return $self->{inform}->{name};
1360 dpavlin 48 }
1361    
1362    
1363     =head2 label
1364    
1365     my $node_label = $node->label;
1366    
1367     =cut
1368    
1369     sub label {
1370     my $self = shift;
1371 dpavlin 111 $self->_set_info unless ($self->{inform}->{label});
1372     return $self->{inform}->{label};
1373 dpavlin 48 }
1374    
1375    
1376     =head2 doc_num
1377    
1378     my $documents_in_node = $node->doc_num;
1379    
1380     =cut
1381    
1382     sub doc_num {
1383     my $self = shift;
1384 dpavlin 111 $self->_set_info if ($self->{inform}->{dnum} < 0);
1385     return $self->{inform}->{dnum};
1386 dpavlin 48 }
1387    
1388    
1389     =head2 word_num
1390    
1391     my $words_in_node = $node->word_num;
1392    
1393     =cut
1394    
1395     sub word_num {
1396     my $self = shift;
1397 dpavlin 111 $self->_set_info if ($self->{inform}->{wnum} < 0);
1398     return $self->{inform}->{wnum};
1399 dpavlin 48 }
1400    
1401    
1402     =head2 size
1403    
1404     my $node_size = $node->size;
1405    
1406     =cut
1407    
1408     sub size {
1409     my $self = shift;
1410 dpavlin 111 $self->_set_info if ($self->{inform}->{size} < 0);
1411     return $self->{inform}->{size};
1412 dpavlin 48 }
1413    
1414    
1415 dpavlin 51 =head2 search
1416 dpavlin 48
1417 dpavlin 51 Search documents which match condition
1418    
1419     my $nres = $node->search( $cond, $depth );
1420    
1421     C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1422     depth for meta search.
1423    
1424     Function results C<Search::Estraier::NodeResult> object.
1425    
1426     =cut
1427    
1428     sub search {
1429     my $self = shift;
1430     my ($cond, $depth) = @_;
1431     return unless ($cond && defined($depth) && $self->{url});
1432     croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1433     croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1434    
1435 dpavlin 52 my $resbody;
1436 dpavlin 51
1437 dpavlin 52 my $rv = $self->shuttle_url( $self->{url} . '/search',
1438 dpavlin 53 'application/x-www-form-urlencoded',
1439 dpavlin 61 $self->cond_to_query( $cond, $depth ),
1440 dpavlin 52 \$resbody,
1441     );
1442     return if ($rv != 200);
1443    
1444 dpavlin 126 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1445     my $hintsText = splice @records, 0, 2; # starts with empty record
1446     my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1447    
1448     # process records
1449 dpavlin 128 my $docs = [];
1450 dpavlin 126 foreach my $record (@records)
1451     {
1452     # split into keys and snippets
1453     my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1454    
1455     # create document hash
1456     my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1457     $doc->{'@keywords'} = $doc->{keywords};
1458     ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1459     $doc->{snippet} = $snippet;
1460    
1461     push @$docs, new Search::Estraier::ResultDocument(
1462     attrs => $doc,
1463     uri => $doc->{'@uri'},
1464     snippet => $snippet,
1465     keywords => $doc->{'keywords'},
1466     );
1467     }
1468    
1469     return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1470     }
1471    
1472    
1473 dpavlin 51 =head2 cond_to_query
1474    
1475 dpavlin 55 Return URI encoded string generated from Search::Estraier::Condition
1476    
1477 dpavlin 61 my $args = $node->cond_to_query( $cond, $depth );
1478 dpavlin 51
1479     =cut
1480    
1481     sub cond_to_query {
1482     my $self = shift;
1483    
1484     my $cond = shift || return;
1485     croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1486 dpavlin 61 my $depth = shift;
1487 dpavlin 51
1488     my @args;
1489    
1490     if (my $phrase = $cond->phrase) {
1491     push @args, 'phrase=' . uri_escape($phrase);
1492     }
1493    
1494     if (my @attrs = $cond->attrs) {
1495     for my $i ( 0 .. $#attrs ) {
1496 dpavlin 63 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1497 dpavlin 51 }
1498     }
1499    
1500     if (my $order = $cond->order) {
1501     push @args, 'order=' . uri_escape($order);
1502     }
1503    
1504     if (my $max = $cond->max) {
1505     push @args, 'max=' . $max;
1506     } else {
1507     push @args, 'max=' . (1 << 30);
1508     }
1509    
1510     if (my $options = $cond->options) {
1511     push @args, 'options=' . $options;
1512     }
1513    
1514 dpavlin 61 push @args, 'depth=' . $depth if ($depth);
1515 dpavlin 51 push @args, 'wwidth=' . $self->{wwidth};
1516     push @args, 'hwidth=' . $self->{hwidth};
1517     push @args, 'awidth=' . $self->{awidth};
1518 dpavlin 155 push @args, 'skip=' . $cond->{skip} if ($cond->{skip});
1519 dpavlin 51
1520     return join('&', @args);
1521     }
1522    
1523    
1524 dpavlin 33 =head2 shuttle_url
1525 dpavlin 32
1526 dpavlin 68 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1527 dpavlin 33 master.
1528 dpavlin 2
1529 dpavlin 52 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1530 dpavlin 2
1531 dpavlin 33 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1532     body will be saved within object.
1533 dpavlin 2
1534     =cut
1535    
1536 dpavlin 59 use LWP::UserAgent;
1537    
1538 dpavlin 33 sub shuttle_url {
1539     my $self = shift;
1540 dpavlin 2
1541 dpavlin 103 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1542 dpavlin 2
1543 dpavlin 103 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1544    
1545 dpavlin 40 $self->{status} = -1;
1546 dpavlin 33
1547 dpavlin 41 warn "## $url\n" if ($self->{debug});
1548 dpavlin 36
1549 dpavlin 33 $url = new URI($url);
1550 dpavlin 37 if (
1551     !$url || !$url->scheme || !$url->scheme eq 'http' ||
1552     !$url->host || !$url->port || $url->port < 1
1553     ) {
1554     carp "can't parse $url\n";
1555     return -1;
1556     }
1557 dpavlin 33
1558 dpavlin 59 my $ua = LWP::UserAgent->new;
1559     $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1560 dpavlin 33
1561 dpavlin 59 my $req;
1562 dpavlin 37 if ($reqbody) {
1563 dpavlin 59 $req = HTTP::Request->new(POST => $url);
1564 dpavlin 37 } else {
1565 dpavlin 59 $req = HTTP::Request->new(GET => $url);
1566 dpavlin 37 }
1567    
1568 dpavlin 59 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1569     $req->headers->header( 'Connection', 'close' );
1570 dpavlin 77 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1571 dpavlin 59 $req->content_type( $content_type );
1572 dpavlin 37
1573 dpavlin 59 warn $req->headers->as_string,"\n" if ($self->{debug});
1574 dpavlin 2
1575 dpavlin 37 if ($reqbody) {
1576 dpavlin 41 warn "$reqbody\n" if ($self->{debug});
1577 dpavlin 59 $req->content( $reqbody );
1578 dpavlin 33 }
1579 dpavlin 2
1580 dpavlin 59 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1581 dpavlin 2
1582 dpavlin 59 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1583 dpavlin 2
1584 dpavlin 76 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1585    
1586 dpavlin 78 if (! $res->is_success) {
1587 dpavlin 103 if ($croak_on_error) {
1588 dpavlin 78 croak("can't get $url: ",$res->status_line);
1589     } else {
1590     return -1;
1591     }
1592     }
1593 dpavlin 2
1594 dpavlin 59 $$resbody .= $res->content;
1595    
1596 dpavlin 40 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1597 dpavlin 39
1598 dpavlin 40 return $self->{status};
1599 dpavlin 2 }
1600    
1601 dpavlin 48
1602 dpavlin 55 =head2 set_snippet_width
1603 dpavlin 48
1604 dpavlin 55 Set width of snippets in results
1605    
1606     $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1607    
1608     C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1609     is not sent with results. If it is negative, whole document text is sent instead of snippet.
1610    
1611     C<$hwidth> specified width of strings from beginning of string. Default
1612     value is C<96>. Negative or zero value keep previous value.
1613    
1614     C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1615     If negative of zero value is provided previous value is kept unchanged.
1616    
1617     =cut
1618    
1619     sub set_snippet_width {
1620     my $self = shift;
1621    
1622     my ($wwidth, $hwidth, $awidth) = @_;
1623     $self->{wwidth} = $wwidth;
1624     $self->{hwidth} = $hwidth if ($hwidth >= 0);
1625     $self->{awidth} = $awidth if ($awidth >= 0);
1626     }
1627    
1628    
1629 dpavlin 56 =head2 set_user
1630 dpavlin 55
1631 dpavlin 56 Manage users of node
1632    
1633     $node->set_user( 'name', $mode );
1634    
1635     C<$mode> can be one of:
1636    
1637     =over 4
1638    
1639     =item 0
1640    
1641     delete account
1642    
1643     =item 1
1644    
1645     set administrative right for user
1646    
1647     =item 2
1648    
1649     set user account as guest
1650    
1651     =back
1652    
1653     Return true on success, otherwise false.
1654    
1655     =cut
1656    
1657     sub set_user {
1658     my $self = shift;
1659     my ($name, $mode) = @_;
1660    
1661     return unless ($self->{url});
1662     croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1663    
1664     $self->shuttle_url( $self->{url} . '/_set_user',
1665 dpavlin 154 'application/x-www-form-urlencoded',
1666 dpavlin 56 'name=' . uri_escape($name) . '&mode=' . $mode,
1667     undef
1668     ) == 200;
1669     }
1670    
1671    
1672 dpavlin 57 =head2 set_link
1673    
1674     Manage node links
1675    
1676     $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1677    
1678     If C<$credit> is negative, link is removed.
1679    
1680     =cut
1681    
1682     sub set_link {
1683     my $self = shift;
1684     my ($url, $label, $credit) = @_;
1685    
1686     return unless ($self->{url});
1687     croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1688    
1689     my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1690     $reqbody .= '&credit=' . $credit if ($credit > 0);
1691    
1692 dpavlin 107 if ($self->shuttle_url( $self->{url} . '/_set_link',
1693 dpavlin 71 'application/x-www-form-urlencoded',
1694 dpavlin 57 $reqbody,
1695     undef
1696 dpavlin 107 ) == 200) {
1697     # refresh node info after adding link
1698 dpavlin 151 $self->_clear_info;
1699 dpavlin 107 return 1;
1700     }
1701 dpavlin 150 return undef;
1702 dpavlin 57 }
1703    
1704 dpavlin 107 =head2 admins
1705 dpavlin 57
1706 dpavlin 107 my @admins = @{ $node->admins };
1707    
1708     Return array of users with admin rights on node
1709    
1710     =cut
1711    
1712     sub admins {
1713     my $self = shift;
1714 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1715     return $self->{inform}->{admins};
1716 dpavlin 107 }
1717    
1718     =head2 guests
1719    
1720     my @guests = @{ $node->guests };
1721    
1722     Return array of users with guest rights on node
1723    
1724     =cut
1725    
1726     sub guests {
1727     my $self = shift;
1728 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1729     return $self->{inform}->{guests};
1730 dpavlin 107 }
1731    
1732     =head2 links
1733    
1734     my $links = @{ $node->links };
1735    
1736     Return array of links for this node
1737    
1738     =cut
1739    
1740     sub links {
1741     my $self = shift;
1742 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1743     return $self->{inform}->{links};
1744 dpavlin 107 }
1745    
1746 dpavlin 160 =head2 cacheusage
1747    
1748     Return cache usage for a node
1749    
1750     my $cache = $node->cacheusage;
1751    
1752     =cut
1753    
1754     sub cacheusage {
1755     my $self = shift;
1756    
1757     return unless ($self->{url});
1758    
1759     my $resbody;
1760     my $rv = $self->shuttle_url( $self->{url} . '/cacheusage',
1761     'text/plain',
1762     undef,
1763     \$resbody,
1764     );
1765    
1766     return if ($rv != 200 || !$resbody);
1767    
1768     return $resbody;
1769     }
1770    
1771 dpavlin 134 =head2 master
1772 dpavlin 107
1773 dpavlin 134 Set actions on Hyper Estraier node master (C<estmaster> process)
1774    
1775     $node->master(
1776     action => 'sync'
1777     );
1778    
1779     All available actions are documented in
1780     L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1781    
1782     =cut
1783    
1784     my $estmaster_rest = {
1785     shutdown => {
1786     status => 202,
1787     },
1788     sync => {
1789     status => 202,
1790     },
1791     backup => {
1792     status => 202,
1793     },
1794     userlist => {
1795     status => 200,
1796 dpavlin 135 returns => [ qw/name passwd flags fname misc/ ],
1797 dpavlin 134 },
1798     useradd => {
1799 dpavlin 135 required => [ qw/name passwd flags/ ],
1800     optional => [ qw/fname misc/ ],
1801 dpavlin 134 status => 200,
1802     },
1803     userdel => {
1804 dpavlin 135 required => [ qw/name/ ],
1805 dpavlin 134 status => 200,
1806     },
1807     nodelist => {
1808     status => 200,
1809 dpavlin 135 returns => [ qw/name label doc_num word_num size/ ],
1810 dpavlin 134 },
1811     nodeadd => {
1812 dpavlin 135 required => [ qw/name/ ],
1813     optional => [ qw/label/ ],
1814 dpavlin 134 status => 200,
1815     },
1816     nodedel => {
1817 dpavlin 135 required => [ qw/name/ ],
1818 dpavlin 134 status => 200,
1819     },
1820     nodeclr => {
1821 dpavlin 135 required => [ qw/name/ ],
1822 dpavlin 134 status => 200,
1823     },
1824     nodertt => {
1825     status => 200,
1826     },
1827     };
1828    
1829     sub master {
1830     my $self = shift;
1831    
1832     my $args = {@_};
1833    
1834     # have action?
1835     my $action = $args->{action} || croak "need action, available: ",
1836     join(", ",keys %{ $estmaster_rest });
1837    
1838     # check if action is valid
1839     my $rest = $estmaster_rest->{$action};
1840     croak "action '$action' is not supported, available actions: ",
1841     join(", ",keys %{ $estmaster_rest }) unless ($rest);
1842    
1843     croak "BUG: action '$action' needs return status" unless ($rest->{status});
1844    
1845     my @args;
1846    
1847     if ($rest->{required} || $rest->{optional}) {
1848    
1849     map {
1850     croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1851     push @args, $_ . '=' . uri_escape( $args->{$_} );
1852 dpavlin 136 } ( @{ $rest->{required} } );
1853 dpavlin 134
1854     map {
1855     push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1856 dpavlin 136 } ( @{ $rest->{optional} } );
1857 dpavlin 134
1858     }
1859    
1860     my $uri = new URI( $self->{url} );
1861    
1862     my $resbody;
1863    
1864 dpavlin 135 my $status = $self->shuttle_url(
1865 dpavlin 134 'http://' . $uri->host_port . '/master?action=' . $action ,
1866     'application/x-www-form-urlencoded',
1867     join('&', @args),
1868     \$resbody,
1869     1,
1870 dpavlin 135 ) or confess "shuttle_url failed";
1871 dpavlin 134
1872 dpavlin 135 if ($status == $rest->{status}) {
1873 dpavlin 149
1874     # refresh node info after sync
1875 dpavlin 151 $self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/);
1876 dpavlin 149
1877 dpavlin 135 if ($rest->{returns} && wantarray) {
1878 dpavlin 134
1879     my @results;
1880 dpavlin 135 my $fields = $#{$rest->{returns}};
1881 dpavlin 134
1882     foreach my $line ( split(/[\r\n]/,$resbody) ) {
1883 dpavlin 135 my @e = split(/\t/, $line, $fields + 1);
1884 dpavlin 134 my $row;
1885 dpavlin 135 foreach my $i ( 0 .. $fields) {
1886     $row->{ $rest->{returns}->[$i] } = $e[ $i ];
1887     }
1888 dpavlin 134 push @results, $row;
1889     }
1890    
1891     return @results;
1892    
1893 dpavlin 135 } elsif ($resbody) {
1894 dpavlin 136 chomp $resbody;
1895 dpavlin 134 return $resbody;
1896 dpavlin 135 } else {
1897     return 0E0;
1898 dpavlin 134 }
1899     }
1900 dpavlin 135
1901     carp "expected status $rest->{status}, but got $status";
1902     return undef;
1903 dpavlin 134 }
1904    
1905 dpavlin 55 =head1 PRIVATE METHODS
1906    
1907     You could call those directly, but you don't have to. I hope.
1908    
1909     =head2 _set_info
1910    
1911 dpavlin 48 Set information for node
1912    
1913 dpavlin 55 $node->_set_info;
1914 dpavlin 48
1915     =cut
1916    
1917 dpavlin 55 sub _set_info {
1918 dpavlin 48 my $self = shift;
1919    
1920     $self->{status} = -1;
1921     return unless ($self->{url});
1922    
1923     my $resbody;
1924     my $rv = $self->shuttle_url( $self->{url} . '/inform',
1925     'text/plain',
1926     undef,
1927     \$resbody,
1928     );
1929    
1930     return if ($rv != 200 || !$resbody);
1931    
1932 dpavlin 107 my @lines = split(/[\r\n]/,$resbody);
1933 dpavlin 48
1934 dpavlin 151 $self->_clear_info;
1935 dpavlin 111
1936     ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
1937     $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
1938    
1939 dpavlin 107 return $resbody unless (@lines);
1940    
1941     shift @lines;
1942    
1943     while(my $admin = shift @lines) {
1944 dpavlin 111 push @{$self->{inform}->{admins}}, $admin;
1945 dpavlin 107 }
1946 dpavlin 111
1947 dpavlin 107 while(my $guest = shift @lines) {
1948 dpavlin 111 push @{$self->{inform}->{guests}}, $guest;
1949 dpavlin 107 }
1950    
1951     while(my $link = shift @lines) {
1952 dpavlin 111 push @{$self->{inform}->{links}}, $link;
1953 dpavlin 107 }
1954    
1955     return $resbody;
1956    
1957 dpavlin 48 }
1958    
1959 dpavlin 151 =head2 _clear_info
1960    
1961     Clear information for node
1962    
1963     $node->_clear_info;
1964    
1965     On next call to C<name>, C<label>, C<doc_num>, C<word_num> or C<size> node
1966     info will be fetch again from Hyper Estraier.
1967    
1968     =cut
1969     sub _clear_info {
1970     my $self = shift;
1971     $self->{inform} = {
1972     dnum => -1,
1973     wnum => -1,
1974     size => -1.0,
1975     };
1976     }
1977    
1978 dpavlin 2 ###
1979    
1980     =head1 EXPORT
1981    
1982     Nothing.
1983    
1984     =head1 SEE ALSO
1985    
1986     L<http://hyperestraier.sourceforge.net/>
1987    
1988     Hyper Estraier Ruby interface on which this module is based.
1989    
1990     =head1 AUTHOR
1991    
1992     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
1993    
1994 dpavlin 128 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
1995 dpavlin 2
1996     =head1 COPYRIGHT AND LICENSE
1997    
1998 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
1999 dpavlin 2
2000     This library is free software; you can redistribute it and/or modify
2001     it under the GPL v2 or later.
2002    
2003     =cut
2004    
2005     1;

  ViewVC Help
Powered by ViewVC 1.1.26