/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 150 - (hide annotations)
Mon May 15 22:26:08 2006 UTC (17 years, 11 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 34950 byte(s)
call _set_info to refresh data about node after calling out_doc*
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7 dpavlin 149 our $VERSION = '0.07_1';
8 dpavlin 2
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15 dpavlin 68 =head2 Simple indexer
16 dpavlin 2
17 dpavlin 68 use Search::Estraier;
18    
19     # create and configure node
20 dpavlin 102 my $node = new Search::Estraier::Node(
21     url => 'http://localhost:1978/node/test',
22     user => 'admin',
23 dpavlin 139 passwd => 'admin',
24     create => 1,
25     label => 'Label for node',
26     croak_on_error => 1,
27 dpavlin 102 );
28 dpavlin 68
29     # create document
30     my $doc = new Search::Estraier::Document;
31    
32     # add attributes
33     $doc->add_attr('@uri', "http://estraier.gov/example.txt");
34     $doc->add_attr('@title', "Over the Rainbow");
35    
36     # add body text to document
37     $doc->add_text("Somewhere over the rainbow. Way up high.");
38     $doc->add_text("There's a land that I heard of once in a lullaby.");
39    
40 dpavlin 102 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
41 dpavlin 68
42     =head2 Simple searcher
43    
44     use Search::Estraier;
45    
46     # create and configure node
47 dpavlin 102 my $node = new Search::Estraier::Node(
48     url => 'http://localhost:1978/node/test',
49     user => 'admin',
50     passwd => 'admin',
51     croak_on_error => 1,
52     );
53 dpavlin 68
54     # create condition
55     my $cond = new Search::Estraier::Condition;
56    
57     # set search phrase
58     $cond->set_phrase("rainbow AND lullaby");
59    
60     my $nres = $node->search($cond, 0);
61 dpavlin 100
62 dpavlin 68 if (defined($nres)) {
63 dpavlin 102 print "Got ", $nres->hits, " results\n";
64    
65 dpavlin 68 # for each document in results
66     for my $i ( 0 ... $nres->doc_num - 1 ) {
67     # get result document
68     my $rdoc = $nres->get_doc($i);
69     # display attribte
70     print "URI: ", $rdoc->attr('@uri'),"\n";
71     print "Title: ", $rdoc->attr('@title'),"\n";
72     print $rdoc->snippet,"\n";
73     }
74     } else {
75     die "error: ", $node->status,"\n";
76     }
77    
78 dpavlin 2 =head1 DESCRIPTION
79    
80     This module is implementation of node API of Hyper Estraier. Since it's
81     perl-only module with dependencies only on standard perl modules, it will
82     run on all platforms on which perl runs. It doesn't require compilation
83     or Hyper Estraier development files on target machine.
84    
85     It is implemented as multiple packages which closly resamble Ruby
86     implementation. It also includes methods to manage nodes.
87    
88 dpavlin 68 There are few examples in C<scripts> directory of this distribution.
89    
90 dpavlin 2 =cut
91    
92 dpavlin 42 =head1 Inheritable common methods
93    
94     This methods should really move somewhere else.
95    
96 dpavlin 15 =head2 _s
97    
98     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
99    
100     my $text = $self->_s(" this is a text ");
101     $text = 'this is a text';
102    
103     =cut
104    
105     sub _s {
106 dpavlin 96 my $text = $_[1];
107     return unless defined($text);
108 dpavlin 15 $text =~ s/\s\s+/ /gs;
109     $text =~ s/^\s+//;
110     $text =~ s/\s+$//;
111     return $text;
112     }
113    
114 dpavlin 2 package Search::Estraier::Document;
115    
116 dpavlin 9 use Carp qw/croak confess/;
117 dpavlin 7
118 dpavlin 15 use Search::Estraier;
119     our @ISA = qw/Search::Estraier/;
120    
121 dpavlin 2 =head1 Search::Estraier::Document
122    
123 dpavlin 14 This class implements Document which is collection of attributes
124     (key=value), vectors (also key value) display text and hidden text.
125    
126 dpavlin 42
127 dpavlin 2 =head2 new
128    
129 dpavlin 14 Create new document, empty or from draft.
130    
131 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
132 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
133 dpavlin 2
134     =cut
135    
136     sub new {
137     my $class = shift;
138 dpavlin 14 my $self = {};
139 dpavlin 2 bless($self, $class);
140    
141 dpavlin 6 $self->{id} = -1;
142    
143 dpavlin 14 my $draft = shift;
144    
145     if ($draft) {
146     my $in_text = 0;
147     foreach my $line (split(/\n/, $draft)) {
148    
149     if ($in_text) {
150     if ($line =~ /^\t/) {
151     push @{ $self->{htexts} }, substr($line, 1);
152     } else {
153     push @{ $self->{dtexts} }, $line;
154     }
155     next;
156     }
157    
158     if ($line =~ m/^%VECTOR\t(.+)$/) {
159     my @fields = split(/\t/, $1);
160     for my $i ( 0 .. ($#fields - 1) ) {
161     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
162     $i++;
163     }
164     next;
165     } elsif ($line =~ m/^%/) {
166     # What is this? comment?
167     #warn "$line\n";
168     next;
169     } elsif ($line =~ m/^$/) {
170     $in_text = 1;
171     next;
172 dpavlin 81 } elsif ($line =~ m/^(.+)=(.*)$/) {
173 dpavlin 14 $self->{attrs}->{ $1 } = $2;
174     next;
175     }
176    
177 dpavlin 81 warn "draft ignored: '$line'\n";
178 dpavlin 14 }
179     }
180    
181 dpavlin 2 $self ? return $self : return undef;
182     }
183    
184 dpavlin 4
185 dpavlin 2 =head2 add_attr
186    
187 dpavlin 6 Add an attribute.
188    
189 dpavlin 2 $doc->add_attr( name => 'value' );
190    
191 dpavlin 9 Delete attribute using
192 dpavlin 5
193     $doc->add_attr( name => undef );
194    
195 dpavlin 2 =cut
196    
197     sub add_attr {
198     my $self = shift;
199     my $attrs = {@_};
200    
201     while (my ($name, $value) = each %{ $attrs }) {
202 dpavlin 9 if (! defined($value)) {
203 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
204 dpavlin 9 } else {
205 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
206 dpavlin 9 }
207 dpavlin 2 }
208 dpavlin 8
209     return 1;
210 dpavlin 2 }
211    
212 dpavlin 5
213     =head2 add_text
214    
215 dpavlin 6 Add a sentence of text.
216    
217 dpavlin 5 $doc->add_text('this is example text to display');
218    
219     =cut
220    
221     sub add_text {
222     my $self = shift;
223     my $text = shift;
224     return unless defined($text);
225    
226 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
227 dpavlin 5 }
228    
229    
230     =head2 add_hidden_text
231    
232 dpavlin 6 Add a hidden sentence.
233    
234 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
235    
236     =cut
237    
238     sub add_hidden_text {
239     my $self = shift;
240     my $text = shift;
241     return unless defined($text);
242    
243 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
244 dpavlin 5 }
245    
246 dpavlin 42
247 dpavlin 6 =head2 id
248    
249     Get the ID number of document. If the object has never been registred, C<-1> is returned.
250    
251     print $doc->id;
252    
253     =cut
254    
255     sub id {
256     my $self = shift;
257     return $self->{id};
258     }
259    
260 dpavlin 42
261 dpavlin 7 =head2 attr_names
262    
263 dpavlin 9 Returns array with attribute names from document object.
264 dpavlin 7
265     my @attrs = $doc->attr_names;
266    
267     =cut
268    
269     sub attr_names {
270     my $self = shift;
271 dpavlin 60 return unless ($self->{attrs});
272     #croak "attr_names return array, not scalar" if (! wantarray);
273 dpavlin 7 return sort keys %{ $self->{attrs} };
274     }
275    
276 dpavlin 8
277     =head2 attr
278    
279 dpavlin 9 Returns value of an attribute.
280 dpavlin 8
281     my $value = $doc->attr( 'attribute' );
282    
283     =cut
284    
285     sub attr {
286     my $self = shift;
287     my $name = shift;
288 dpavlin 60 return unless (defined($name) && $self->{attrs});
289     return $self->{attrs}->{ $name };
290 dpavlin 8 }
291    
292 dpavlin 9
293     =head2 texts
294    
295     Returns array with text sentences.
296    
297     my @texts = $doc->texts;
298    
299     =cut
300    
301     sub texts {
302     my $self = shift;
303 dpavlin 60 #confess "texts return array, not scalar" if (! wantarray);
304     return @{ $self->{dtexts} } if ($self->{dtexts});
305 dpavlin 9 }
306    
307 dpavlin 42
308 dpavlin 12 =head2 cat_texts
309    
310     Return whole text as single scalar.
311    
312     my $text = $doc->cat_texts;
313    
314     =cut
315    
316     sub cat_texts {
317     my $self = shift;
318 dpavlin 60 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
319 dpavlin 12 }
320    
321 dpavlin 42
322 dpavlin 5 =head2 dump_draft
323    
324 dpavlin 13 Dump draft data from document object.
325    
326 dpavlin 5 print $doc->dump_draft;
327    
328     =cut
329    
330     sub dump_draft {
331 dpavlin 13 my $self = shift;
332     my $draft;
333    
334     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
335 dpavlin 97 next unless defined(my $v = $self->{attrs}->{$attr_name});
336 dpavlin 93 $draft .= $attr_name . '=' . $v . "\n";
337 dpavlin 13 }
338    
339     if ($self->{kwords}) {
340     $draft .= '%%VECTOR';
341     while (my ($key, $value) = each %{ $self->{kwords} }) {
342     $draft .= "\t$key\t$value";
343     }
344     $draft .= "\n";
345     }
346    
347     $draft .= "\n";
348    
349 dpavlin 40 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
350     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
351 dpavlin 13
352     return $draft;
353 dpavlin 5 }
354    
355 dpavlin 42
356 dpavlin 4 =head2 delete
357 dpavlin 2
358 dpavlin 4 Empty document object
359 dpavlin 2
360 dpavlin 4 $doc->delete;
361    
362 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
363     convinience. Document objects which go out of scope will be destroyed
364     automatically.
365    
366 dpavlin 4 =cut
367    
368     sub delete {
369     my $self = shift;
370    
371 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
372 dpavlin 5 delete($self->{$data});
373     }
374 dpavlin 4
375 dpavlin 10 $self->{id} = -1;
376    
377 dpavlin 4 return 1;
378     }
379    
380    
381    
382 dpavlin 15 package Search::Estraier::Condition;
383 dpavlin 4
384 dpavlin 98 use Carp qw/carp confess croak/;
385 dpavlin 16
386 dpavlin 15 use Search::Estraier;
387     our @ISA = qw/Search::Estraier/;
388 dpavlin 4
389 dpavlin 16 =head1 Search::Estraier::Condition
390    
391     =head2 new
392    
393     my $cond = new Search::HyperEstraier::Condition;
394    
395     =cut
396    
397     sub new {
398     my $class = shift;
399     my $self = {};
400     bless($self, $class);
401    
402 dpavlin 19 $self->{max} = -1;
403     $self->{options} = 0;
404    
405 dpavlin 16 $self ? return $self : return undef;
406     }
407    
408 dpavlin 42
409 dpavlin 16 =head2 set_phrase
410    
411     $cond->set_phrase('search phrase');
412    
413     =cut
414    
415     sub set_phrase {
416     my $self = shift;
417     $self->{phrase} = $self->_s( shift );
418     }
419    
420 dpavlin 42
421 dpavlin 16 =head2 add_attr
422    
423     $cond->add_attr('@URI STRINC /~dpavlin/');
424    
425     =cut
426    
427     sub add_attr {
428     my $self = shift;
429     my $attr = shift || return;
430     push @{ $self->{attrs} }, $self->_s( $attr );
431     }
432    
433 dpavlin 42
434 dpavlin 16 =head2 set_order
435    
436     $cond->set_order('@mdate NUMD');
437    
438     =cut
439    
440     sub set_order {
441     my $self = shift;
442     $self->{order} = shift;
443     }
444    
445 dpavlin 42
446 dpavlin 16 =head2 set_max
447    
448     $cond->set_max(42);
449    
450     =cut
451    
452     sub set_max {
453     my $self = shift;
454     my $max = shift;
455 dpavlin 43 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
456 dpavlin 16 $self->{max} = $max;
457     }
458    
459 dpavlin 42
460 dpavlin 16 =head2 set_options
461    
462 dpavlin 98 $cond->set_options( 'SURE' );
463 dpavlin 16
464 dpavlin 98 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
465    
466     Possible options are:
467    
468     =over 8
469    
470     =item SURE
471    
472     check every N-gram
473    
474     =item USUAL
475    
476     check every second N-gram
477    
478     =item FAST
479    
480     check every third N-gram
481    
482     =item AGITO
483    
484     check every fourth N-gram
485    
486     =item NOIDF
487    
488     don't perform TF-IDF tuning
489    
490     =item SIMPLE
491    
492     use simplified query phrase
493    
494     =back
495    
496     Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
497     options;
498    
499     This option changed in version C<0.04> of this module. It's backwards compatibile.
500    
501 dpavlin 16 =cut
502    
503 dpavlin 15 my $options = {
504     SURE => 1 << 0,
505     USUAL => 1 << 1,
506     FAST => 1 << 2,
507     AGITO => 1 << 3,
508     NOIDF => 1 << 4,
509     SIMPLE => 1 << 10,
510     };
511    
512 dpavlin 16 sub set_options {
513     my $self = shift;
514 dpavlin 98 my $opt = 0;
515     foreach my $option (@_) {
516     my $mask;
517     unless ($mask = $options->{$option}) {
518     if ($option eq '1') {
519     next;
520     } else {
521     croak "unknown option $option";
522     }
523     }
524     $opt += $mask;
525     }
526     $self->{options} = $opt;
527 dpavlin 4 }
528    
529 dpavlin 42
530 dpavlin 18 =head2 phrase
531    
532     Return search phrase.
533    
534     print $cond->phrase;
535    
536     =cut
537    
538     sub phrase {
539     my $self = shift;
540     return $self->{phrase};
541     }
542    
543 dpavlin 42
544 dpavlin 19 =head2 order
545 dpavlin 18
546 dpavlin 19 Return search result order.
547    
548     print $cond->order;
549    
550     =cut
551    
552     sub order {
553     my $self = shift;
554     return $self->{order};
555     }
556    
557 dpavlin 42
558 dpavlin 19 =head2 attrs
559    
560     Return search result attrs.
561    
562     my @cond_attrs = $cond->attrs;
563    
564     =cut
565    
566     sub attrs {
567     my $self = shift;
568     #croak "attrs return array, not scalar" if (! wantarray);
569 dpavlin 60 return @{ $self->{attrs} } if ($self->{attrs});
570 dpavlin 19 }
571    
572 dpavlin 42
573 dpavlin 19 =head2 max
574    
575     Return maximum number of results.
576    
577     print $cond->max;
578    
579     C<-1> is returned for unitialized value, C<0> is unlimited.
580    
581     =cut
582    
583     sub max {
584     my $self = shift;
585     return $self->{max};
586     }
587    
588 dpavlin 42
589 dpavlin 19 =head2 options
590    
591     Return options for this condition.
592    
593     print $cond->options;
594    
595     Options are returned in numerical form.
596    
597     =cut
598    
599     sub options {
600     my $self = shift;
601     return $self->{options};
602     }
603    
604    
605 dpavlin 116 =head2 set_skip
606    
607     Set number of skipped documents from beginning of results
608    
609     $cond->set_skip(42);
610    
611     Similar to C<offset> in RDBMS.
612    
613     =cut
614    
615     sub set_skip {
616     my $self = shift;
617     $self->{skip} = shift;
618     }
619    
620     =head2 skip
621    
622     Return skip for this condition.
623    
624     print $cond->skip;
625    
626     =cut
627    
628     sub skip {
629     my $self = shift;
630     return $self->{skip};
631     }
632    
633    
634 dpavlin 20 package Search::Estraier::ResultDocument;
635    
636 dpavlin 24 use Carp qw/croak/;
637 dpavlin 20
638 dpavlin 24 #use Search::Estraier;
639     #our @ISA = qw/Search::Estraier/;
640 dpavlin 20
641     =head1 Search::Estraier::ResultDocument
642    
643     =head2 new
644    
645 dpavlin 23 my $rdoc = new Search::HyperEstraier::ResultDocument(
646 dpavlin 20 uri => 'http://localhost/document/uri/42',
647     attrs => {
648     foo => 1,
649     bar => 2,
650     },
651     snippet => 'this is a text of snippet'
652     keywords => 'this\tare\tkeywords'
653     );
654    
655     =cut
656    
657     sub new {
658     my $class = shift;
659     my $self = {@_};
660     bless($self, $class);
661    
662 dpavlin 62 croak "missing uri for ResultDocument" unless defined($self->{uri});
663 dpavlin 20
664     $self ? return $self : return undef;
665     }
666    
667 dpavlin 42
668 dpavlin 23 =head2 uri
669 dpavlin 20
670 dpavlin 23 Return URI of result document
671 dpavlin 20
672 dpavlin 23 print $rdoc->uri;
673    
674     =cut
675    
676     sub uri {
677     my $self = shift;
678     return $self->{uri};
679     }
680    
681    
682     =head2 attr_names
683    
684     Returns array with attribute names from result document object.
685    
686     my @attrs = $rdoc->attr_names;
687    
688     =cut
689    
690     sub attr_names {
691     my $self = shift;
692     croak "attr_names return array, not scalar" if (! wantarray);
693     return sort keys %{ $self->{attrs} };
694     }
695    
696 dpavlin 42
697 dpavlin 23 =head2 attr
698    
699     Returns value of an attribute.
700    
701     my $value = $rdoc->attr( 'attribute' );
702    
703     =cut
704    
705     sub attr {
706     my $self = shift;
707     my $name = shift || return;
708     return $self->{attrs}->{ $name };
709     }
710    
711 dpavlin 42
712 dpavlin 23 =head2 snippet
713    
714     Return snippet from result document
715    
716     print $rdoc->snippet;
717    
718     =cut
719    
720     sub snippet {
721     my $self = shift;
722     return $self->{snippet};
723     }
724    
725 dpavlin 42
726 dpavlin 23 =head2 keywords
727    
728     Return keywords from result document
729    
730     print $rdoc->keywords;
731    
732     =cut
733    
734     sub keywords {
735     my $self = shift;
736     return $self->{keywords};
737     }
738    
739    
740 dpavlin 25 package Search::Estraier::NodeResult;
741    
742     use Carp qw/croak/;
743    
744     #use Search::Estraier;
745     #our @ISA = qw/Search::Estraier/;
746    
747     =head1 Search::Estraier::NodeResult
748    
749     =head2 new
750    
751     my $res = new Search::HyperEstraier::NodeResult(
752     docs => @array_of_rdocs,
753     hits => %hash_with_hints,
754     );
755    
756     =cut
757    
758     sub new {
759     my $class = shift;
760     my $self = {@_};
761     bless($self, $class);
762    
763     foreach my $f (qw/docs hints/) {
764     croak "missing $f for ResultDocument" unless defined($self->{$f});
765     }
766    
767     $self ? return $self : return undef;
768     }
769    
770 dpavlin 42
771 dpavlin 25 =head2 doc_num
772    
773     Return number of documents
774    
775     print $res->doc_num;
776    
777 dpavlin 100 This will return real number of documents (limited by C<max>).
778     If you want to get total number of hits, see C<hits>.
779    
780 dpavlin 25 =cut
781    
782     sub doc_num {
783     my $self = shift;
784 dpavlin 53 return $#{$self->{docs}} + 1;
785 dpavlin 25 }
786    
787 dpavlin 42
788 dpavlin 25 =head2 get_doc
789    
790     Return single document
791    
792     my $doc = $res->get_doc( 42 );
793    
794     Returns undef if document doesn't exist.
795    
796     =cut
797    
798     sub get_doc {
799     my $self = shift;
800     my $num = shift;
801 dpavlin 43 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
802 dpavlin 25 return undef if ($num < 0 || $num > $self->{docs});
803     return $self->{docs}->[$num];
804     }
805    
806 dpavlin 42
807 dpavlin 25 =head2 hint
808    
809     Return specific hint from results.
810    
811 dpavlin 100 print $res->hint( 'VERSION' );
812 dpavlin 25
813     Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
814     C<TIME>, C<LINK#n>, C<VIEW>.
815    
816     =cut
817    
818     sub hint {
819     my $self = shift;
820     my $key = shift || return;
821     return $self->{hints}->{$key};
822     }
823    
824 dpavlin 108 =head2 hints
825 dpavlin 25
826 dpavlin 91 More perlish version of C<hint>. This one returns hash.
827    
828 dpavlin 100 my %hints = $res->hints;
829 dpavlin 91
830     =cut
831    
832     sub hints {
833     my $self = shift;
834     return $self->{hints};
835     }
836    
837 dpavlin 100 =head2 hits
838    
839     Syntaxtic sugar for total number of hits for this query
840    
841     print $res->hits;
842    
843     It's same as
844    
845     print $res->hint('HIT');
846    
847     but shorter.
848    
849     =cut
850    
851     sub hits {
852     my $self = shift;
853     return $self->{hints}->{'HIT'} || 0;
854     }
855    
856 dpavlin 27 package Search::Estraier::Node;
857    
858 dpavlin 44 use Carp qw/carp croak confess/;
859 dpavlin 33 use URI;
860 dpavlin 36 use MIME::Base64;
861 dpavlin 33 use IO::Socket::INET;
862 dpavlin 49 use URI::Escape qw/uri_escape/;
863 dpavlin 29
864 dpavlin 27 =head1 Search::Estraier::Node
865    
866     =head2 new
867    
868     my $node = new Search::HyperEstraier::Node;
869    
870 dpavlin 65 or optionally with C<url> as parametar
871    
872     my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
873    
874 dpavlin 78 or in more verbose form
875    
876     my $node = new Search::HyperEstraier::Node(
877     url => 'http://localhost:1978/node/test',
878 dpavlin 132 user => 'admin',
879     passwd => 'admin'
880 dpavlin 139 create => 1,
881     label => 'optional node label',
882 dpavlin 78 debug => 1,
883     croak_on_error => 1
884     );
885    
886     with following arguments:
887    
888     =over 4
889    
890     =item url
891    
892     URL to node
893    
894 dpavlin 132 =item user
895    
896     specify username for node server authentication
897    
898     =item passwd
899    
900     password for authentication
901    
902 dpavlin 139 =item create
903    
904     create node if it doesn't exists
905    
906     =item label
907    
908     optional label for new node if C<create> is used
909    
910 dpavlin 78 =item debug
911    
912     dumps a B<lot> of debugging output
913    
914     =item croak_on_error
915    
916     very helpful during development. It will croak on all errors instead of
917     silently returning C<-1> (which is convention of Hyper Estraier API in other
918     languages).
919    
920     =back
921    
922 dpavlin 27 =cut
923    
924     sub new {
925     my $class = shift;
926     my $self = {
927     pxport => -1,
928 dpavlin 33 timeout => 0, # this used to be -1
929 dpavlin 27 wwidth => 480,
930     hwidth => 96,
931     awidth => 96,
932     status => -1,
933     };
934 dpavlin 111
935 dpavlin 27 bless($self, $class);
936    
937 dpavlin 65 if ($#_ == 0) {
938     $self->{url} = shift;
939     } else {
940 dpavlin 78 %$self = ( %$self, @_ );
941    
942 dpavlin 132 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
943    
944 dpavlin 65 warn "## Node debug on\n" if ($self->{debug});
945     }
946 dpavlin 57
947 dpavlin 111 $self->{inform} = {
948     dnum => -1,
949     wnum => -1,
950     size => -1.0,
951     };
952    
953 dpavlin 139 if ($self->{create}) {
954 dpavlin 140 if (! eval { $self->name } || $@) {
955 dpavlin 139 my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
956     croak "can't find node name in '$self->{url}'" unless ($name);
957     my $label = $self->{label} || $name;
958     $self->master(
959     action => 'nodeadd',
960     name => $name,
961     label => $label,
962     ) || croak "can't create node $name ($label)";
963     }
964     }
965    
966 dpavlin 27 $self ? return $self : return undef;
967     }
968    
969 dpavlin 42
970 dpavlin 29 =head2 set_url
971    
972     Specify URL to node server
973    
974     $node->set_url('http://localhost:1978');
975    
976     =cut
977    
978     sub set_url {
979     my $self = shift;
980     $self->{url} = shift;
981     }
982    
983 dpavlin 42
984 dpavlin 29 =head2 set_proxy
985    
986     Specify proxy server to connect to node server
987    
988     $node->set_proxy('proxy.example.com', 8080);
989    
990     =cut
991    
992     sub set_proxy {
993     my $self = shift;
994     my ($host,$port) = @_;
995 dpavlin 43 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
996 dpavlin 29 $self->{pxhost} = $host;
997     $self->{pxport} = $port;
998     }
999    
1000 dpavlin 42
1001 dpavlin 30 =head2 set_timeout
1002    
1003     Specify timeout of connection in seconds
1004    
1005     $node->set_timeout( 15 );
1006    
1007     =cut
1008    
1009     sub set_timeout {
1010     my $self = shift;
1011     my $sec = shift;
1012 dpavlin 43 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
1013 dpavlin 30 $self->{timeout} = $sec;
1014     }
1015    
1016 dpavlin 42
1017 dpavlin 31 =head2 set_auth
1018    
1019     Specify name and password for authentication to node server.
1020    
1021     $node->set_auth('clint','eastwood');
1022    
1023     =cut
1024    
1025     sub set_auth {
1026     my $self = shift;
1027     my ($login,$passwd) = @_;
1028 dpavlin 40 my $basic_auth = encode_base64( "$login:$passwd" );
1029     chomp($basic_auth);
1030     $self->{auth} = $basic_auth;
1031 dpavlin 31 }
1032    
1033 dpavlin 42
1034 dpavlin 32 =head2 status
1035    
1036     Return status code of last request.
1037    
1038 dpavlin 40 print $node->status;
1039 dpavlin 32
1040     C<-1> means connection failure.
1041    
1042     =cut
1043    
1044     sub status {
1045     my $self = shift;
1046     return $self->{status};
1047     }
1048    
1049 dpavlin 42
1050 dpavlin 40 =head2 put_doc
1051    
1052 dpavlin 41 Add a document
1053 dpavlin 40
1054 dpavlin 41 $node->put_doc( $document_draft ) or die "can't add document";
1055    
1056     Return true on success or false on failture.
1057    
1058 dpavlin 40 =cut
1059    
1060     sub put_doc {
1061     my $self = shift;
1062     my $doc = shift || return;
1063 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1064 dpavlin 41 $self->shuttle_url( $self->{url} . '/put_doc',
1065     'text/x-estraier-draft',
1066     $doc->dump_draft,
1067     undef
1068     ) == 200;
1069 dpavlin 40 }
1070    
1071 dpavlin 41
1072     =head2 out_doc
1073    
1074     Remove a document
1075    
1076     $node->out_doc( document_id ) or "can't remove document";
1077    
1078     Return true on success or false on failture.
1079    
1080     =cut
1081    
1082     sub out_doc {
1083     my $self = shift;
1084     my $id = shift || return;
1085     return unless ($self->{url});
1086 dpavlin 43 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1087 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1088 dpavlin 41 'application/x-www-form-urlencoded',
1089     "id=$id",
1090     undef
1091 dpavlin 150 ) == 200) {
1092     $self->_set_info;
1093     return $id;
1094     }
1095     return undef;
1096 dpavlin 41 }
1097    
1098    
1099     =head2 out_doc_by_uri
1100    
1101     Remove a registrated document using it's uri
1102    
1103 dpavlin 45 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1104 dpavlin 41
1105     Return true on success or false on failture.
1106    
1107     =cut
1108    
1109     sub out_doc_by_uri {
1110     my $self = shift;
1111     my $uri = shift || return;
1112     return unless ($self->{url});
1113 dpavlin 150 if ($self->shuttle_url( $self->{url} . '/out_doc',
1114 dpavlin 41 'application/x-www-form-urlencoded',
1115 dpavlin 50 "uri=" . uri_escape($uri),
1116 dpavlin 41 undef
1117 dpavlin 150 ) == 200) {
1118     $self->_set_info;
1119     return $uri;
1120     }
1121     return undef;
1122 dpavlin 41 }
1123    
1124 dpavlin 42
1125     =head2 edit_doc
1126    
1127     Edit attributes of a document
1128    
1129     $node->edit_doc( $document_draft ) or die "can't edit document";
1130    
1131     Return true on success or false on failture.
1132    
1133     =cut
1134    
1135     sub edit_doc {
1136     my $self = shift;
1137     my $doc = shift || return;
1138 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1139 dpavlin 42 $self->shuttle_url( $self->{url} . '/edit_doc',
1140     'text/x-estraier-draft',
1141     $doc->dump_draft,
1142     undef
1143     ) == 200;
1144     }
1145    
1146    
1147 dpavlin 43 =head2 get_doc
1148    
1149     Retreive document
1150    
1151     my $doc = $node->get_doc( document_id ) or die "can't get document";
1152    
1153     Return true on success or false on failture.
1154    
1155     =cut
1156    
1157     sub get_doc {
1158     my $self = shift;
1159     my $id = shift || return;
1160     return $self->_fetch_doc( id => $id );
1161     }
1162    
1163 dpavlin 44
1164 dpavlin 43 =head2 get_doc_by_uri
1165    
1166     Retreive document
1167    
1168 dpavlin 45 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1169 dpavlin 43
1170     Return true on success or false on failture.
1171    
1172     =cut
1173    
1174     sub get_doc_by_uri {
1175     my $self = shift;
1176     my $uri = shift || return;
1177     return $self->_fetch_doc( uri => $uri );
1178     }
1179    
1180 dpavlin 44
1181 dpavlin 49 =head2 get_doc_attr
1182    
1183     Retrieve the value of an atribute from object
1184    
1185     my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1186     die "can't get document attribute";
1187    
1188     =cut
1189    
1190     sub get_doc_attr {
1191     my $self = shift;
1192     my ($id,$name) = @_;
1193     return unless ($id && $name);
1194     return $self->_fetch_doc( id => $id, attr => $name );
1195     }
1196    
1197    
1198     =head2 get_doc_attr_by_uri
1199    
1200     Retrieve the value of an atribute from object
1201    
1202     my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1203     die "can't get document attribute";
1204    
1205     =cut
1206    
1207     sub get_doc_attr_by_uri {
1208     my $self = shift;
1209     my ($uri,$name) = @_;
1210     return unless ($uri && $name);
1211     return $self->_fetch_doc( uri => $uri, attr => $name );
1212     }
1213    
1214    
1215 dpavlin 44 =head2 etch_doc
1216    
1217     Exctract document keywords
1218    
1219     my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1220    
1221     =cut
1222    
1223 dpavlin 49 sub etch_doc {
1224 dpavlin 44 my $self = shift;
1225     my $id = shift || return;
1226     return $self->_fetch_doc( id => $id, etch => 1 );
1227     }
1228    
1229     =head2 etch_doc_by_uri
1230    
1231     Retreive document
1232    
1233 dpavlin 45 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1234 dpavlin 44
1235     Return true on success or false on failture.
1236    
1237     =cut
1238    
1239     sub etch_doc_by_uri {
1240     my $self = shift;
1241     my $uri = shift || return;
1242     return $self->_fetch_doc( uri => $uri, etch => 1 );
1243     }
1244    
1245    
1246 dpavlin 45 =head2 uri_to_id
1247    
1248     Get ID of document specified by URI
1249    
1250     my $id = $node->uri_to_id( 'file:///document/uri/42' );
1251    
1252 dpavlin 103 This method won't croak, even if using C<croak_on_error>.
1253    
1254 dpavlin 45 =cut
1255    
1256     sub uri_to_id {
1257     my $self = shift;
1258     my $uri = shift || return;
1259 dpavlin 103 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1260 dpavlin 45 }
1261    
1262    
1263 dpavlin 43 =head2 _fetch_doc
1264    
1265 dpavlin 44 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1266     C<etch_doc>, C<etch_doc_by_uri>.
1267 dpavlin 43
1268 dpavlin 45 # this will decode received draft into Search::Estraier::Document object
1269     my $doc = $node->_fetch_doc( id => 42 );
1270     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1271 dpavlin 43
1272 dpavlin 45 # to extract keywords, add etch
1273     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1274     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1275    
1276 dpavlin 49 # to get document attrubute add attr
1277     my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1278     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1279    
1280 dpavlin 45 # more general form which allows implementation of
1281     # uri_to_id
1282     my $id = $node->_fetch_doc(
1283     uri => 'file:///document/uri/42',
1284     path => '/uri_to_id',
1285     chomp_resbody => 1
1286     );
1287    
1288 dpavlin 43 =cut
1289    
1290     sub _fetch_doc {
1291     my $self = shift;
1292 dpavlin 44 my $a = {@_};
1293     return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1294    
1295     my ($arg, $resbody);
1296    
1297 dpavlin 45 my $path = $a->{path} || '/get_doc';
1298 dpavlin 44 $path = '/etch_doc' if ($a->{etch});
1299    
1300     if ($a->{id}) {
1301     croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1302     $arg = 'id=' . $a->{id};
1303     } elsif ($a->{uri}) {
1304 dpavlin 50 $arg = 'uri=' . uri_escape($a->{uri});
1305 dpavlin 44 } else {
1306     confess "unhandled argument. Need id or uri.";
1307 dpavlin 43 }
1308 dpavlin 44
1309 dpavlin 49 if ($a->{attr}) {
1310     $path = '/get_doc_attr';
1311     $arg .= '&attr=' . uri_escape($a->{attr});
1312     $a->{chomp_resbody} = 1;
1313     }
1314    
1315 dpavlin 44 my $rv = $self->shuttle_url( $self->{url} . $path,
1316 dpavlin 43 'application/x-www-form-urlencoded',
1317 dpavlin 44 $arg,
1318 dpavlin 45 \$resbody,
1319 dpavlin 103 $a->{croak_on_error},
1320 dpavlin 43 );
1321 dpavlin 44
1322 dpavlin 43 return if ($rv != 200);
1323 dpavlin 44
1324     if ($a->{etch}) {
1325     $self->{kwords} = {};
1326     return +{} unless ($resbody);
1327     foreach my $l (split(/\n/, $resbody)) {
1328     my ($k,$v) = split(/\t/, $l, 2);
1329     $self->{kwords}->{$k} = $v if ($v);
1330     }
1331     return $self->{kwords};
1332 dpavlin 45 } elsif ($a->{chomp_resbody}) {
1333     return unless (defined($resbody));
1334     chomp($resbody);
1335     return $resbody;
1336 dpavlin 44 } else {
1337     return new Search::Estraier::Document($resbody);
1338     }
1339 dpavlin 43 }
1340    
1341    
1342 dpavlin 48 =head2 name
1343 dpavlin 43
1344 dpavlin 48 my $node_name = $node->name;
1345 dpavlin 43
1346 dpavlin 48 =cut
1347    
1348     sub name {
1349     my $self = shift;
1350 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1351     return $self->{inform}->{name};
1352 dpavlin 48 }
1353    
1354    
1355     =head2 label
1356    
1357     my $node_label = $node->label;
1358    
1359     =cut
1360    
1361     sub label {
1362     my $self = shift;
1363 dpavlin 111 $self->_set_info unless ($self->{inform}->{label});
1364     return $self->{inform}->{label};
1365 dpavlin 48 }
1366    
1367    
1368     =head2 doc_num
1369    
1370     my $documents_in_node = $node->doc_num;
1371    
1372     =cut
1373    
1374     sub doc_num {
1375     my $self = shift;
1376 dpavlin 111 $self->_set_info if ($self->{inform}->{dnum} < 0);
1377     return $self->{inform}->{dnum};
1378 dpavlin 48 }
1379    
1380    
1381     =head2 word_num
1382    
1383     my $words_in_node = $node->word_num;
1384    
1385     =cut
1386    
1387     sub word_num {
1388     my $self = shift;
1389 dpavlin 111 $self->_set_info if ($self->{inform}->{wnum} < 0);
1390     return $self->{inform}->{wnum};
1391 dpavlin 48 }
1392    
1393    
1394     =head2 size
1395    
1396     my $node_size = $node->size;
1397    
1398     =cut
1399    
1400     sub size {
1401     my $self = shift;
1402 dpavlin 111 $self->_set_info if ($self->{inform}->{size} < 0);
1403     return $self->{inform}->{size};
1404 dpavlin 48 }
1405    
1406    
1407 dpavlin 51 =head2 search
1408 dpavlin 48
1409 dpavlin 51 Search documents which match condition
1410    
1411     my $nres = $node->search( $cond, $depth );
1412    
1413     C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1414     depth for meta search.
1415    
1416     Function results C<Search::Estraier::NodeResult> object.
1417    
1418     =cut
1419    
1420     sub search {
1421     my $self = shift;
1422     my ($cond, $depth) = @_;
1423     return unless ($cond && defined($depth) && $self->{url});
1424     croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1425     croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1426    
1427 dpavlin 52 my $resbody;
1428 dpavlin 51
1429 dpavlin 52 my $rv = $self->shuttle_url( $self->{url} . '/search',
1430 dpavlin 53 'application/x-www-form-urlencoded',
1431 dpavlin 61 $self->cond_to_query( $cond, $depth ),
1432 dpavlin 52 \$resbody,
1433     );
1434     return if ($rv != 200);
1435    
1436 dpavlin 126 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1437     my $hintsText = splice @records, 0, 2; # starts with empty record
1438     my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1439    
1440     # process records
1441 dpavlin 128 my $docs = [];
1442 dpavlin 126 foreach my $record (@records)
1443     {
1444     # split into keys and snippets
1445     my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1446    
1447     # create document hash
1448     my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1449     $doc->{'@keywords'} = $doc->{keywords};
1450     ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1451     $doc->{snippet} = $snippet;
1452    
1453     push @$docs, new Search::Estraier::ResultDocument(
1454     attrs => $doc,
1455     uri => $doc->{'@uri'},
1456     snippet => $snippet,
1457     keywords => $doc->{'keywords'},
1458     );
1459     }
1460    
1461     return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1462     }
1463    
1464    
1465 dpavlin 51 =head2 cond_to_query
1466    
1467 dpavlin 55 Return URI encoded string generated from Search::Estraier::Condition
1468    
1469 dpavlin 61 my $args = $node->cond_to_query( $cond, $depth );
1470 dpavlin 51
1471     =cut
1472    
1473     sub cond_to_query {
1474     my $self = shift;
1475    
1476     my $cond = shift || return;
1477     croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1478 dpavlin 61 my $depth = shift;
1479 dpavlin 51
1480     my @args;
1481    
1482     if (my $phrase = $cond->phrase) {
1483     push @args, 'phrase=' . uri_escape($phrase);
1484     }
1485    
1486     if (my @attrs = $cond->attrs) {
1487     for my $i ( 0 .. $#attrs ) {
1488 dpavlin 63 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1489 dpavlin 51 }
1490     }
1491    
1492     if (my $order = $cond->order) {
1493     push @args, 'order=' . uri_escape($order);
1494     }
1495    
1496     if (my $max = $cond->max) {
1497     push @args, 'max=' . $max;
1498     } else {
1499     push @args, 'max=' . (1 << 30);
1500     }
1501    
1502     if (my $options = $cond->options) {
1503     push @args, 'options=' . $options;
1504     }
1505    
1506 dpavlin 61 push @args, 'depth=' . $depth if ($depth);
1507 dpavlin 51 push @args, 'wwidth=' . $self->{wwidth};
1508     push @args, 'hwidth=' . $self->{hwidth};
1509     push @args, 'awidth=' . $self->{awidth};
1510 dpavlin 122 push @args, 'skip=' . $self->{skip} if ($self->{skip});
1511 dpavlin 51
1512     return join('&', @args);
1513     }
1514    
1515    
1516 dpavlin 33 =head2 shuttle_url
1517 dpavlin 32
1518 dpavlin 68 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1519 dpavlin 33 master.
1520 dpavlin 2
1521 dpavlin 52 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1522 dpavlin 2
1523 dpavlin 33 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1524     body will be saved within object.
1525 dpavlin 2
1526     =cut
1527    
1528 dpavlin 59 use LWP::UserAgent;
1529    
1530 dpavlin 33 sub shuttle_url {
1531     my $self = shift;
1532 dpavlin 2
1533 dpavlin 103 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1534 dpavlin 2
1535 dpavlin 103 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1536    
1537 dpavlin 40 $self->{status} = -1;
1538 dpavlin 33
1539 dpavlin 41 warn "## $url\n" if ($self->{debug});
1540 dpavlin 36
1541 dpavlin 33 $url = new URI($url);
1542 dpavlin 37 if (
1543     !$url || !$url->scheme || !$url->scheme eq 'http' ||
1544     !$url->host || !$url->port || $url->port < 1
1545     ) {
1546     carp "can't parse $url\n";
1547     return -1;
1548     }
1549 dpavlin 33
1550 dpavlin 59 my $ua = LWP::UserAgent->new;
1551     $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1552 dpavlin 33
1553 dpavlin 59 my $req;
1554 dpavlin 37 if ($reqbody) {
1555 dpavlin 59 $req = HTTP::Request->new(POST => $url);
1556 dpavlin 37 } else {
1557 dpavlin 59 $req = HTTP::Request->new(GET => $url);
1558 dpavlin 37 }
1559    
1560 dpavlin 59 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1561     $req->headers->header( 'Connection', 'close' );
1562 dpavlin 77 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1563 dpavlin 59 $req->content_type( $content_type );
1564 dpavlin 37
1565 dpavlin 59 warn $req->headers->as_string,"\n" if ($self->{debug});
1566 dpavlin 2
1567 dpavlin 37 if ($reqbody) {
1568 dpavlin 41 warn "$reqbody\n" if ($self->{debug});
1569 dpavlin 59 $req->content( $reqbody );
1570 dpavlin 33 }
1571 dpavlin 2
1572 dpavlin 59 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1573 dpavlin 2
1574 dpavlin 59 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1575 dpavlin 2
1576 dpavlin 76 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1577    
1578 dpavlin 78 if (! $res->is_success) {
1579 dpavlin 103 if ($croak_on_error) {
1580 dpavlin 78 croak("can't get $url: ",$res->status_line);
1581     } else {
1582     return -1;
1583     }
1584     }
1585 dpavlin 2
1586 dpavlin 59 $$resbody .= $res->content;
1587    
1588 dpavlin 40 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1589 dpavlin 39
1590 dpavlin 40 return $self->{status};
1591 dpavlin 2 }
1592    
1593 dpavlin 48
1594 dpavlin 55 =head2 set_snippet_width
1595 dpavlin 48
1596 dpavlin 55 Set width of snippets in results
1597    
1598     $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1599    
1600     C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1601     is not sent with results. If it is negative, whole document text is sent instead of snippet.
1602    
1603     C<$hwidth> specified width of strings from beginning of string. Default
1604     value is C<96>. Negative or zero value keep previous value.
1605    
1606     C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1607     If negative of zero value is provided previous value is kept unchanged.
1608    
1609     =cut
1610    
1611     sub set_snippet_width {
1612     my $self = shift;
1613    
1614     my ($wwidth, $hwidth, $awidth) = @_;
1615     $self->{wwidth} = $wwidth;
1616     $self->{hwidth} = $hwidth if ($hwidth >= 0);
1617     $self->{awidth} = $awidth if ($awidth >= 0);
1618     }
1619    
1620    
1621 dpavlin 56 =head2 set_user
1622 dpavlin 55
1623 dpavlin 56 Manage users of node
1624    
1625     $node->set_user( 'name', $mode );
1626    
1627     C<$mode> can be one of:
1628    
1629     =over 4
1630    
1631     =item 0
1632    
1633     delete account
1634    
1635     =item 1
1636    
1637     set administrative right for user
1638    
1639     =item 2
1640    
1641     set user account as guest
1642    
1643     =back
1644    
1645     Return true on success, otherwise false.
1646    
1647     =cut
1648    
1649     sub set_user {
1650     my $self = shift;
1651     my ($name, $mode) = @_;
1652    
1653     return unless ($self->{url});
1654     croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1655    
1656     $self->shuttle_url( $self->{url} . '/_set_user',
1657     'text/plain',
1658     'name=' . uri_escape($name) . '&mode=' . $mode,
1659     undef
1660     ) == 200;
1661     }
1662    
1663    
1664 dpavlin 57 =head2 set_link
1665    
1666     Manage node links
1667    
1668     $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1669    
1670     If C<$credit> is negative, link is removed.
1671    
1672     =cut
1673    
1674     sub set_link {
1675     my $self = shift;
1676     my ($url, $label, $credit) = @_;
1677    
1678     return unless ($self->{url});
1679     croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1680    
1681     my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1682     $reqbody .= '&credit=' . $credit if ($credit > 0);
1683    
1684 dpavlin 107 if ($self->shuttle_url( $self->{url} . '/_set_link',
1685 dpavlin 71 'application/x-www-form-urlencoded',
1686 dpavlin 57 $reqbody,
1687     undef
1688 dpavlin 107 ) == 200) {
1689     # refresh node info after adding link
1690     $self->_set_info;
1691     return 1;
1692     }
1693 dpavlin 150 return undef;
1694 dpavlin 57 }
1695    
1696 dpavlin 107 =head2 admins
1697 dpavlin 57
1698 dpavlin 107 my @admins = @{ $node->admins };
1699    
1700     Return array of users with admin rights on node
1701    
1702     =cut
1703    
1704     sub admins {
1705     my $self = shift;
1706 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1707     return $self->{inform}->{admins};
1708 dpavlin 107 }
1709    
1710     =head2 guests
1711    
1712     my @guests = @{ $node->guests };
1713    
1714     Return array of users with guest rights on node
1715    
1716     =cut
1717    
1718     sub guests {
1719     my $self = shift;
1720 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1721     return $self->{inform}->{guests};
1722 dpavlin 107 }
1723    
1724     =head2 links
1725    
1726     my $links = @{ $node->links };
1727    
1728     Return array of links for this node
1729    
1730     =cut
1731    
1732     sub links {
1733     my $self = shift;
1734 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1735     return $self->{inform}->{links};
1736 dpavlin 107 }
1737    
1738 dpavlin 134 =head2 master
1739 dpavlin 107
1740 dpavlin 134 Set actions on Hyper Estraier node master (C<estmaster> process)
1741    
1742     $node->master(
1743     action => 'sync'
1744     );
1745    
1746     All available actions are documented in
1747     L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1748    
1749     =cut
1750    
1751     my $estmaster_rest = {
1752     shutdown => {
1753     status => 202,
1754     },
1755     sync => {
1756     status => 202,
1757     },
1758     backup => {
1759     status => 202,
1760     },
1761     userlist => {
1762     status => 200,
1763 dpavlin 135 returns => [ qw/name passwd flags fname misc/ ],
1764 dpavlin 134 },
1765     useradd => {
1766 dpavlin 135 required => [ qw/name passwd flags/ ],
1767     optional => [ qw/fname misc/ ],
1768 dpavlin 134 status => 200,
1769     },
1770     userdel => {
1771 dpavlin 135 required => [ qw/name/ ],
1772 dpavlin 134 status => 200,
1773     },
1774     nodelist => {
1775     status => 200,
1776 dpavlin 135 returns => [ qw/name label doc_num word_num size/ ],
1777 dpavlin 134 },
1778     nodeadd => {
1779 dpavlin 135 required => [ qw/name/ ],
1780     optional => [ qw/label/ ],
1781 dpavlin 134 status => 200,
1782     },
1783     nodedel => {
1784 dpavlin 135 required => [ qw/name/ ],
1785 dpavlin 134 status => 200,
1786     },
1787     nodeclr => {
1788 dpavlin 135 required => [ qw/name/ ],
1789 dpavlin 134 status => 200,
1790     },
1791     nodertt => {
1792     status => 200,
1793     },
1794     };
1795    
1796     sub master {
1797     my $self = shift;
1798    
1799     my $args = {@_};
1800    
1801     # have action?
1802     my $action = $args->{action} || croak "need action, available: ",
1803     join(", ",keys %{ $estmaster_rest });
1804    
1805     # check if action is valid
1806     my $rest = $estmaster_rest->{$action};
1807     croak "action '$action' is not supported, available actions: ",
1808     join(", ",keys %{ $estmaster_rest }) unless ($rest);
1809    
1810     croak "BUG: action '$action' needs return status" unless ($rest->{status});
1811    
1812     my @args;
1813    
1814     if ($rest->{required} || $rest->{optional}) {
1815    
1816     map {
1817     croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1818     push @args, $_ . '=' . uri_escape( $args->{$_} );
1819 dpavlin 136 } ( @{ $rest->{required} } );
1820 dpavlin 134
1821     map {
1822     push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1823 dpavlin 136 } ( @{ $rest->{optional} } );
1824 dpavlin 134
1825     }
1826    
1827     my $uri = new URI( $self->{url} );
1828    
1829     my $resbody;
1830    
1831 dpavlin 135 my $status = $self->shuttle_url(
1832 dpavlin 134 'http://' . $uri->host_port . '/master?action=' . $action ,
1833     'application/x-www-form-urlencoded',
1834     join('&', @args),
1835     \$resbody,
1836     1,
1837 dpavlin 135 ) or confess "shuttle_url failed";
1838 dpavlin 134
1839 dpavlin 135 if ($status == $rest->{status}) {
1840 dpavlin 149
1841     # refresh node info after sync
1842     $self->_set_info if ($action eq 'sync');
1843    
1844 dpavlin 135 if ($rest->{returns} && wantarray) {
1845 dpavlin 134
1846     my @results;
1847 dpavlin 135 my $fields = $#{$rest->{returns}};
1848 dpavlin 134
1849     foreach my $line ( split(/[\r\n]/,$resbody) ) {
1850 dpavlin 135 my @e = split(/\t/, $line, $fields + 1);
1851 dpavlin 134 my $row;
1852 dpavlin 135 foreach my $i ( 0 .. $fields) {
1853     $row->{ $rest->{returns}->[$i] } = $e[ $i ];
1854     }
1855 dpavlin 134 push @results, $row;
1856     }
1857    
1858     return @results;
1859    
1860 dpavlin 135 } elsif ($resbody) {
1861 dpavlin 136 chomp $resbody;
1862 dpavlin 134 return $resbody;
1863 dpavlin 135 } else {
1864     return 0E0;
1865 dpavlin 134 }
1866     }
1867 dpavlin 135
1868     carp "expected status $rest->{status}, but got $status";
1869     return undef;
1870 dpavlin 134 }
1871    
1872 dpavlin 55 =head1 PRIVATE METHODS
1873    
1874     You could call those directly, but you don't have to. I hope.
1875    
1876     =head2 _set_info
1877    
1878 dpavlin 48 Set information for node
1879    
1880 dpavlin 55 $node->_set_info;
1881 dpavlin 48
1882     =cut
1883    
1884 dpavlin 55 sub _set_info {
1885 dpavlin 48 my $self = shift;
1886    
1887     $self->{status} = -1;
1888     return unless ($self->{url});
1889    
1890     my $resbody;
1891     my $rv = $self->shuttle_url( $self->{url} . '/inform',
1892     'text/plain',
1893     undef,
1894     \$resbody,
1895     );
1896    
1897     return if ($rv != 200 || !$resbody);
1898    
1899 dpavlin 107 my @lines = split(/[\r\n]/,$resbody);
1900 dpavlin 48
1901 dpavlin 111 $self->{inform} = {};
1902    
1903     ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
1904     $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
1905    
1906 dpavlin 107 return $resbody unless (@lines);
1907    
1908     shift @lines;
1909    
1910     while(my $admin = shift @lines) {
1911 dpavlin 111 push @{$self->{inform}->{admins}}, $admin;
1912 dpavlin 107 }
1913 dpavlin 111
1914 dpavlin 107 while(my $guest = shift @lines) {
1915 dpavlin 111 push @{$self->{inform}->{guests}}, $guest;
1916 dpavlin 107 }
1917    
1918     while(my $link = shift @lines) {
1919 dpavlin 111 push @{$self->{inform}->{links}}, $link;
1920 dpavlin 107 }
1921    
1922     return $resbody;
1923    
1924 dpavlin 48 }
1925    
1926 dpavlin 2 ###
1927    
1928     =head1 EXPORT
1929    
1930     Nothing.
1931    
1932     =head1 SEE ALSO
1933    
1934     L<http://hyperestraier.sourceforge.net/>
1935    
1936     Hyper Estraier Ruby interface on which this module is based.
1937    
1938     =head1 AUTHOR
1939    
1940     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
1941    
1942 dpavlin 128 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
1943 dpavlin 2
1944     =head1 COPYRIGHT AND LICENSE
1945    
1946 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
1947 dpavlin 2
1948     This library is free software; you can redistribute it and/or modify
1949     it under the GPL v2 or later.
1950    
1951     =cut
1952    
1953     1;

  ViewVC Help
Powered by ViewVC 1.1.26