/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 134 - (hide annotations)
Tue May 9 12:21:26 2006 UTC (17 years, 11 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 33965 byte(s)
added Search::Estraier::Node->master to controll estmaster and beginning of tests for it
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7 dpavlin 126 our $VERSION = '0.06_1';
8 dpavlin 2
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15 dpavlin 68 =head2 Simple indexer
16 dpavlin 2
17 dpavlin 68 use Search::Estraier;
18    
19     # create and configure node
20 dpavlin 102 my $node = new Search::Estraier::Node(
21     url => 'http://localhost:1978/node/test',
22     user => 'admin',
23     passwd => 'admin'
24     );
25 dpavlin 68
26     # create document
27     my $doc = new Search::Estraier::Document;
28    
29     # add attributes
30     $doc->add_attr('@uri', "http://estraier.gov/example.txt");
31     $doc->add_attr('@title', "Over the Rainbow");
32    
33     # add body text to document
34     $doc->add_text("Somewhere over the rainbow. Way up high.");
35     $doc->add_text("There's a land that I heard of once in a lullaby.");
36    
37 dpavlin 102 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
38 dpavlin 68
39     =head2 Simple searcher
40    
41     use Search::Estraier;
42    
43     # create and configure node
44 dpavlin 102 my $node = new Search::Estraier::Node(
45     url => 'http://localhost:1978/node/test',
46     user => 'admin',
47     passwd => 'admin',
48     croak_on_error => 1,
49     );
50 dpavlin 68
51     # create condition
52     my $cond = new Search::Estraier::Condition;
53    
54     # set search phrase
55     $cond->set_phrase("rainbow AND lullaby");
56    
57     my $nres = $node->search($cond, 0);
58 dpavlin 100
59 dpavlin 68 if (defined($nres)) {
60 dpavlin 102 print "Got ", $nres->hits, " results\n";
61    
62 dpavlin 68 # for each document in results
63     for my $i ( 0 ... $nres->doc_num - 1 ) {
64     # get result document
65     my $rdoc = $nres->get_doc($i);
66     # display attribte
67     print "URI: ", $rdoc->attr('@uri'),"\n";
68     print "Title: ", $rdoc->attr('@title'),"\n";
69     print $rdoc->snippet,"\n";
70     }
71     } else {
72     die "error: ", $node->status,"\n";
73     }
74    
75 dpavlin 2 =head1 DESCRIPTION
76    
77     This module is implementation of node API of Hyper Estraier. Since it's
78     perl-only module with dependencies only on standard perl modules, it will
79     run on all platforms on which perl runs. It doesn't require compilation
80     or Hyper Estraier development files on target machine.
81    
82     It is implemented as multiple packages which closly resamble Ruby
83     implementation. It also includes methods to manage nodes.
84    
85 dpavlin 68 There are few examples in C<scripts> directory of this distribution.
86    
87 dpavlin 2 =cut
88    
89 dpavlin 42 =head1 Inheritable common methods
90    
91     This methods should really move somewhere else.
92    
93 dpavlin 15 =head2 _s
94    
95     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
96    
97     my $text = $self->_s(" this is a text ");
98     $text = 'this is a text';
99    
100     =cut
101    
102     sub _s {
103 dpavlin 96 my $text = $_[1];
104     return unless defined($text);
105 dpavlin 15 $text =~ s/\s\s+/ /gs;
106     $text =~ s/^\s+//;
107     $text =~ s/\s+$//;
108     return $text;
109     }
110    
111 dpavlin 2 package Search::Estraier::Document;
112    
113 dpavlin 9 use Carp qw/croak confess/;
114 dpavlin 7
115 dpavlin 15 use Search::Estraier;
116     our @ISA = qw/Search::Estraier/;
117    
118 dpavlin 2 =head1 Search::Estraier::Document
119    
120 dpavlin 14 This class implements Document which is collection of attributes
121     (key=value), vectors (also key value) display text and hidden text.
122    
123 dpavlin 42
124 dpavlin 2 =head2 new
125    
126 dpavlin 14 Create new document, empty or from draft.
127    
128 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
129 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
130 dpavlin 2
131     =cut
132    
133     sub new {
134     my $class = shift;
135 dpavlin 14 my $self = {};
136 dpavlin 2 bless($self, $class);
137    
138 dpavlin 6 $self->{id} = -1;
139    
140 dpavlin 14 my $draft = shift;
141    
142     if ($draft) {
143     my $in_text = 0;
144     foreach my $line (split(/\n/, $draft)) {
145    
146     if ($in_text) {
147     if ($line =~ /^\t/) {
148     push @{ $self->{htexts} }, substr($line, 1);
149     } else {
150     push @{ $self->{dtexts} }, $line;
151     }
152     next;
153     }
154    
155     if ($line =~ m/^%VECTOR\t(.+)$/) {
156     my @fields = split(/\t/, $1);
157     for my $i ( 0 .. ($#fields - 1) ) {
158     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
159     $i++;
160     }
161     next;
162     } elsif ($line =~ m/^%/) {
163     # What is this? comment?
164     #warn "$line\n";
165     next;
166     } elsif ($line =~ m/^$/) {
167     $in_text = 1;
168     next;
169 dpavlin 81 } elsif ($line =~ m/^(.+)=(.*)$/) {
170 dpavlin 14 $self->{attrs}->{ $1 } = $2;
171     next;
172     }
173    
174 dpavlin 81 warn "draft ignored: '$line'\n";
175 dpavlin 14 }
176     }
177    
178 dpavlin 2 $self ? return $self : return undef;
179     }
180    
181 dpavlin 4
182 dpavlin 2 =head2 add_attr
183    
184 dpavlin 6 Add an attribute.
185    
186 dpavlin 2 $doc->add_attr( name => 'value' );
187    
188 dpavlin 9 Delete attribute using
189 dpavlin 5
190     $doc->add_attr( name => undef );
191    
192 dpavlin 2 =cut
193    
194     sub add_attr {
195     my $self = shift;
196     my $attrs = {@_};
197    
198     while (my ($name, $value) = each %{ $attrs }) {
199 dpavlin 9 if (! defined($value)) {
200 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
201 dpavlin 9 } else {
202 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
203 dpavlin 9 }
204 dpavlin 2 }
205 dpavlin 8
206     return 1;
207 dpavlin 2 }
208    
209 dpavlin 5
210     =head2 add_text
211    
212 dpavlin 6 Add a sentence of text.
213    
214 dpavlin 5 $doc->add_text('this is example text to display');
215    
216     =cut
217    
218     sub add_text {
219     my $self = shift;
220     my $text = shift;
221     return unless defined($text);
222    
223 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
224 dpavlin 5 }
225    
226    
227     =head2 add_hidden_text
228    
229 dpavlin 6 Add a hidden sentence.
230    
231 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
232    
233     =cut
234    
235     sub add_hidden_text {
236     my $self = shift;
237     my $text = shift;
238     return unless defined($text);
239    
240 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
241 dpavlin 5 }
242    
243 dpavlin 42
244 dpavlin 6 =head2 id
245    
246     Get the ID number of document. If the object has never been registred, C<-1> is returned.
247    
248     print $doc->id;
249    
250     =cut
251    
252     sub id {
253     my $self = shift;
254     return $self->{id};
255     }
256    
257 dpavlin 42
258 dpavlin 7 =head2 attr_names
259    
260 dpavlin 9 Returns array with attribute names from document object.
261 dpavlin 7
262     my @attrs = $doc->attr_names;
263    
264     =cut
265    
266     sub attr_names {
267     my $self = shift;
268 dpavlin 60 return unless ($self->{attrs});
269     #croak "attr_names return array, not scalar" if (! wantarray);
270 dpavlin 7 return sort keys %{ $self->{attrs} };
271     }
272    
273 dpavlin 8
274     =head2 attr
275    
276 dpavlin 9 Returns value of an attribute.
277 dpavlin 8
278     my $value = $doc->attr( 'attribute' );
279    
280     =cut
281    
282     sub attr {
283     my $self = shift;
284     my $name = shift;
285 dpavlin 60 return unless (defined($name) && $self->{attrs});
286     return $self->{attrs}->{ $name };
287 dpavlin 8 }
288    
289 dpavlin 9
290     =head2 texts
291    
292     Returns array with text sentences.
293    
294     my @texts = $doc->texts;
295    
296     =cut
297    
298     sub texts {
299     my $self = shift;
300 dpavlin 60 #confess "texts return array, not scalar" if (! wantarray);
301     return @{ $self->{dtexts} } if ($self->{dtexts});
302 dpavlin 9 }
303    
304 dpavlin 42
305 dpavlin 12 =head2 cat_texts
306    
307     Return whole text as single scalar.
308    
309     my $text = $doc->cat_texts;
310    
311     =cut
312    
313     sub cat_texts {
314     my $self = shift;
315 dpavlin 60 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
316 dpavlin 12 }
317    
318 dpavlin 42
319 dpavlin 5 =head2 dump_draft
320    
321 dpavlin 13 Dump draft data from document object.
322    
323 dpavlin 5 print $doc->dump_draft;
324    
325     =cut
326    
327     sub dump_draft {
328 dpavlin 13 my $self = shift;
329     my $draft;
330    
331     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
332 dpavlin 97 next unless defined(my $v = $self->{attrs}->{$attr_name});
333 dpavlin 93 $draft .= $attr_name . '=' . $v . "\n";
334 dpavlin 13 }
335    
336     if ($self->{kwords}) {
337     $draft .= '%%VECTOR';
338     while (my ($key, $value) = each %{ $self->{kwords} }) {
339     $draft .= "\t$key\t$value";
340     }
341     $draft .= "\n";
342     }
343    
344     $draft .= "\n";
345    
346 dpavlin 40 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
347     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
348 dpavlin 13
349     return $draft;
350 dpavlin 5 }
351    
352 dpavlin 42
353 dpavlin 4 =head2 delete
354 dpavlin 2
355 dpavlin 4 Empty document object
356 dpavlin 2
357 dpavlin 4 $doc->delete;
358    
359 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
360     convinience. Document objects which go out of scope will be destroyed
361     automatically.
362    
363 dpavlin 4 =cut
364    
365     sub delete {
366     my $self = shift;
367    
368 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
369 dpavlin 5 delete($self->{$data});
370     }
371 dpavlin 4
372 dpavlin 10 $self->{id} = -1;
373    
374 dpavlin 4 return 1;
375     }
376    
377    
378    
379 dpavlin 15 package Search::Estraier::Condition;
380 dpavlin 4
381 dpavlin 98 use Carp qw/carp confess croak/;
382 dpavlin 16
383 dpavlin 15 use Search::Estraier;
384     our @ISA = qw/Search::Estraier/;
385 dpavlin 4
386 dpavlin 16 =head1 Search::Estraier::Condition
387    
388     =head2 new
389    
390     my $cond = new Search::HyperEstraier::Condition;
391    
392     =cut
393    
394     sub new {
395     my $class = shift;
396     my $self = {};
397     bless($self, $class);
398    
399 dpavlin 19 $self->{max} = -1;
400     $self->{options} = 0;
401    
402 dpavlin 16 $self ? return $self : return undef;
403     }
404    
405 dpavlin 42
406 dpavlin 16 =head2 set_phrase
407    
408     $cond->set_phrase('search phrase');
409    
410     =cut
411    
412     sub set_phrase {
413     my $self = shift;
414     $self->{phrase} = $self->_s( shift );
415     }
416    
417 dpavlin 42
418 dpavlin 16 =head2 add_attr
419    
420     $cond->add_attr('@URI STRINC /~dpavlin/');
421    
422     =cut
423    
424     sub add_attr {
425     my $self = shift;
426     my $attr = shift || return;
427     push @{ $self->{attrs} }, $self->_s( $attr );
428     }
429    
430 dpavlin 42
431 dpavlin 16 =head2 set_order
432    
433     $cond->set_order('@mdate NUMD');
434    
435     =cut
436    
437     sub set_order {
438     my $self = shift;
439     $self->{order} = shift;
440     }
441    
442 dpavlin 42
443 dpavlin 16 =head2 set_max
444    
445     $cond->set_max(42);
446    
447     =cut
448    
449     sub set_max {
450     my $self = shift;
451     my $max = shift;
452 dpavlin 43 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
453 dpavlin 16 $self->{max} = $max;
454     }
455    
456 dpavlin 42
457 dpavlin 16 =head2 set_options
458    
459 dpavlin 98 $cond->set_options( 'SURE' );
460 dpavlin 16
461 dpavlin 98 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
462    
463     Possible options are:
464    
465     =over 8
466    
467     =item SURE
468    
469     check every N-gram
470    
471     =item USUAL
472    
473     check every second N-gram
474    
475     =item FAST
476    
477     check every third N-gram
478    
479     =item AGITO
480    
481     check every fourth N-gram
482    
483     =item NOIDF
484    
485     don't perform TF-IDF tuning
486    
487     =item SIMPLE
488    
489     use simplified query phrase
490    
491     =back
492    
493     Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
494     options;
495    
496     This option changed in version C<0.04> of this module. It's backwards compatibile.
497    
498 dpavlin 16 =cut
499    
500 dpavlin 15 my $options = {
501     SURE => 1 << 0,
502     USUAL => 1 << 1,
503     FAST => 1 << 2,
504     AGITO => 1 << 3,
505     NOIDF => 1 << 4,
506     SIMPLE => 1 << 10,
507     };
508    
509 dpavlin 16 sub set_options {
510     my $self = shift;
511 dpavlin 98 my $opt = 0;
512     foreach my $option (@_) {
513     my $mask;
514     unless ($mask = $options->{$option}) {
515     if ($option eq '1') {
516     next;
517     } else {
518     croak "unknown option $option";
519     }
520     }
521     $opt += $mask;
522     }
523     $self->{options} = $opt;
524 dpavlin 4 }
525    
526 dpavlin 42
527 dpavlin 18 =head2 phrase
528    
529     Return search phrase.
530    
531     print $cond->phrase;
532    
533     =cut
534    
535     sub phrase {
536     my $self = shift;
537     return $self->{phrase};
538     }
539    
540 dpavlin 42
541 dpavlin 19 =head2 order
542 dpavlin 18
543 dpavlin 19 Return search result order.
544    
545     print $cond->order;
546    
547     =cut
548    
549     sub order {
550     my $self = shift;
551     return $self->{order};
552     }
553    
554 dpavlin 42
555 dpavlin 19 =head2 attrs
556    
557     Return search result attrs.
558    
559     my @cond_attrs = $cond->attrs;
560    
561     =cut
562    
563     sub attrs {
564     my $self = shift;
565     #croak "attrs return array, not scalar" if (! wantarray);
566 dpavlin 60 return @{ $self->{attrs} } if ($self->{attrs});
567 dpavlin 19 }
568    
569 dpavlin 42
570 dpavlin 19 =head2 max
571    
572     Return maximum number of results.
573    
574     print $cond->max;
575    
576     C<-1> is returned for unitialized value, C<0> is unlimited.
577    
578     =cut
579    
580     sub max {
581     my $self = shift;
582     return $self->{max};
583     }
584    
585 dpavlin 42
586 dpavlin 19 =head2 options
587    
588     Return options for this condition.
589    
590     print $cond->options;
591    
592     Options are returned in numerical form.
593    
594     =cut
595    
596     sub options {
597     my $self = shift;
598     return $self->{options};
599     }
600    
601    
602 dpavlin 116 =head2 set_skip
603    
604     Set number of skipped documents from beginning of results
605    
606     $cond->set_skip(42);
607    
608     Similar to C<offset> in RDBMS.
609    
610     =cut
611    
612     sub set_skip {
613     my $self = shift;
614     $self->{skip} = shift;
615     }
616    
617     =head2 skip
618    
619     Return skip for this condition.
620    
621     print $cond->skip;
622    
623     =cut
624    
625     sub skip {
626     my $self = shift;
627     return $self->{skip};
628     }
629    
630    
631 dpavlin 20 package Search::Estraier::ResultDocument;
632    
633 dpavlin 24 use Carp qw/croak/;
634 dpavlin 20
635 dpavlin 24 #use Search::Estraier;
636     #our @ISA = qw/Search::Estraier/;
637 dpavlin 20
638     =head1 Search::Estraier::ResultDocument
639    
640     =head2 new
641    
642 dpavlin 23 my $rdoc = new Search::HyperEstraier::ResultDocument(
643 dpavlin 20 uri => 'http://localhost/document/uri/42',
644     attrs => {
645     foo => 1,
646     bar => 2,
647     },
648     snippet => 'this is a text of snippet'
649     keywords => 'this\tare\tkeywords'
650     );
651    
652     =cut
653    
654     sub new {
655     my $class = shift;
656     my $self = {@_};
657     bless($self, $class);
658    
659 dpavlin 62 croak "missing uri for ResultDocument" unless defined($self->{uri});
660 dpavlin 20
661     $self ? return $self : return undef;
662     }
663    
664 dpavlin 42
665 dpavlin 23 =head2 uri
666 dpavlin 20
667 dpavlin 23 Return URI of result document
668 dpavlin 20
669 dpavlin 23 print $rdoc->uri;
670    
671     =cut
672    
673     sub uri {
674     my $self = shift;
675     return $self->{uri};
676     }
677    
678    
679     =head2 attr_names
680    
681     Returns array with attribute names from result document object.
682    
683     my @attrs = $rdoc->attr_names;
684    
685     =cut
686    
687     sub attr_names {
688     my $self = shift;
689     croak "attr_names return array, not scalar" if (! wantarray);
690     return sort keys %{ $self->{attrs} };
691     }
692    
693 dpavlin 42
694 dpavlin 23 =head2 attr
695    
696     Returns value of an attribute.
697    
698     my $value = $rdoc->attr( 'attribute' );
699    
700     =cut
701    
702     sub attr {
703     my $self = shift;
704     my $name = shift || return;
705     return $self->{attrs}->{ $name };
706     }
707    
708 dpavlin 42
709 dpavlin 23 =head2 snippet
710    
711     Return snippet from result document
712    
713     print $rdoc->snippet;
714    
715     =cut
716    
717     sub snippet {
718     my $self = shift;
719     return $self->{snippet};
720     }
721    
722 dpavlin 42
723 dpavlin 23 =head2 keywords
724    
725     Return keywords from result document
726    
727     print $rdoc->keywords;
728    
729     =cut
730    
731     sub keywords {
732     my $self = shift;
733     return $self->{keywords};
734     }
735    
736    
737 dpavlin 25 package Search::Estraier::NodeResult;
738    
739     use Carp qw/croak/;
740    
741     #use Search::Estraier;
742     #our @ISA = qw/Search::Estraier/;
743    
744     =head1 Search::Estraier::NodeResult
745    
746     =head2 new
747    
748     my $res = new Search::HyperEstraier::NodeResult(
749     docs => @array_of_rdocs,
750     hits => %hash_with_hints,
751     );
752    
753     =cut
754    
755     sub new {
756     my $class = shift;
757     my $self = {@_};
758     bless($self, $class);
759    
760     foreach my $f (qw/docs hints/) {
761     croak "missing $f for ResultDocument" unless defined($self->{$f});
762     }
763    
764     $self ? return $self : return undef;
765     }
766    
767 dpavlin 42
768 dpavlin 25 =head2 doc_num
769    
770     Return number of documents
771    
772     print $res->doc_num;
773    
774 dpavlin 100 This will return real number of documents (limited by C<max>).
775     If you want to get total number of hits, see C<hits>.
776    
777 dpavlin 25 =cut
778    
779     sub doc_num {
780     my $self = shift;
781 dpavlin 53 return $#{$self->{docs}} + 1;
782 dpavlin 25 }
783    
784 dpavlin 42
785 dpavlin 25 =head2 get_doc
786    
787     Return single document
788    
789     my $doc = $res->get_doc( 42 );
790    
791     Returns undef if document doesn't exist.
792    
793     =cut
794    
795     sub get_doc {
796     my $self = shift;
797     my $num = shift;
798 dpavlin 43 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
799 dpavlin 25 return undef if ($num < 0 || $num > $self->{docs});
800     return $self->{docs}->[$num];
801     }
802    
803 dpavlin 42
804 dpavlin 25 =head2 hint
805    
806     Return specific hint from results.
807    
808 dpavlin 100 print $res->hint( 'VERSION' );
809 dpavlin 25
810     Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
811     C<TIME>, C<LINK#n>, C<VIEW>.
812    
813     =cut
814    
815     sub hint {
816     my $self = shift;
817     my $key = shift || return;
818     return $self->{hints}->{$key};
819     }
820    
821 dpavlin 108 =head2 hints
822 dpavlin 25
823 dpavlin 91 More perlish version of C<hint>. This one returns hash.
824    
825 dpavlin 100 my %hints = $res->hints;
826 dpavlin 91
827     =cut
828    
829     sub hints {
830     my $self = shift;
831     return $self->{hints};
832     }
833    
834 dpavlin 100 =head2 hits
835    
836     Syntaxtic sugar for total number of hits for this query
837    
838     print $res->hits;
839    
840     It's same as
841    
842     print $res->hint('HIT');
843    
844     but shorter.
845    
846     =cut
847    
848     sub hits {
849     my $self = shift;
850     return $self->{hints}->{'HIT'} || 0;
851     }
852    
853 dpavlin 27 package Search::Estraier::Node;
854    
855 dpavlin 44 use Carp qw/carp croak confess/;
856 dpavlin 33 use URI;
857 dpavlin 36 use MIME::Base64;
858 dpavlin 33 use IO::Socket::INET;
859 dpavlin 49 use URI::Escape qw/uri_escape/;
860 dpavlin 29
861 dpavlin 27 =head1 Search::Estraier::Node
862    
863     =head2 new
864    
865     my $node = new Search::HyperEstraier::Node;
866    
867 dpavlin 65 or optionally with C<url> as parametar
868    
869     my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
870    
871 dpavlin 78 or in more verbose form
872    
873     my $node = new Search::HyperEstraier::Node(
874     url => 'http://localhost:1978/node/test',
875 dpavlin 132 user => 'admin',
876     passwd => 'admin'
877 dpavlin 78 debug => 1,
878     croak_on_error => 1
879     );
880    
881     with following arguments:
882    
883     =over 4
884    
885     =item url
886    
887     URL to node
888    
889 dpavlin 132 =item user
890    
891     specify username for node server authentication
892    
893     =item passwd
894    
895     password for authentication
896    
897 dpavlin 78 =item debug
898    
899     dumps a B<lot> of debugging output
900    
901     =item croak_on_error
902    
903     very helpful during development. It will croak on all errors instead of
904     silently returning C<-1> (which is convention of Hyper Estraier API in other
905     languages).
906    
907     =back
908    
909 dpavlin 27 =cut
910    
911     sub new {
912     my $class = shift;
913     my $self = {
914     pxport => -1,
915 dpavlin 33 timeout => 0, # this used to be -1
916 dpavlin 27 wwidth => 480,
917     hwidth => 96,
918     awidth => 96,
919     status => -1,
920     };
921 dpavlin 111
922 dpavlin 27 bless($self, $class);
923    
924 dpavlin 65 if ($#_ == 0) {
925     $self->{url} = shift;
926     } else {
927 dpavlin 78 %$self = ( %$self, @_ );
928    
929 dpavlin 132 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
930    
931 dpavlin 65 warn "## Node debug on\n" if ($self->{debug});
932     }
933 dpavlin 57
934 dpavlin 111 $self->{inform} = {
935     dnum => -1,
936     wnum => -1,
937     size => -1.0,
938     };
939    
940 dpavlin 27 $self ? return $self : return undef;
941     }
942    
943 dpavlin 42
944 dpavlin 29 =head2 set_url
945    
946     Specify URL to node server
947    
948     $node->set_url('http://localhost:1978');
949    
950     =cut
951    
952     sub set_url {
953     my $self = shift;
954     $self->{url} = shift;
955     }
956    
957 dpavlin 42
958 dpavlin 29 =head2 set_proxy
959    
960     Specify proxy server to connect to node server
961    
962     $node->set_proxy('proxy.example.com', 8080);
963    
964     =cut
965    
966     sub set_proxy {
967     my $self = shift;
968     my ($host,$port) = @_;
969 dpavlin 43 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
970 dpavlin 29 $self->{pxhost} = $host;
971     $self->{pxport} = $port;
972     }
973    
974 dpavlin 42
975 dpavlin 30 =head2 set_timeout
976    
977     Specify timeout of connection in seconds
978    
979     $node->set_timeout( 15 );
980    
981     =cut
982    
983     sub set_timeout {
984     my $self = shift;
985     my $sec = shift;
986 dpavlin 43 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
987 dpavlin 30 $self->{timeout} = $sec;
988     }
989    
990 dpavlin 42
991 dpavlin 31 =head2 set_auth
992    
993     Specify name and password for authentication to node server.
994    
995     $node->set_auth('clint','eastwood');
996    
997     =cut
998    
999     sub set_auth {
1000     my $self = shift;
1001     my ($login,$passwd) = @_;
1002 dpavlin 40 my $basic_auth = encode_base64( "$login:$passwd" );
1003     chomp($basic_auth);
1004     $self->{auth} = $basic_auth;
1005 dpavlin 31 }
1006    
1007 dpavlin 42
1008 dpavlin 32 =head2 status
1009    
1010     Return status code of last request.
1011    
1012 dpavlin 40 print $node->status;
1013 dpavlin 32
1014     C<-1> means connection failure.
1015    
1016     =cut
1017    
1018     sub status {
1019     my $self = shift;
1020     return $self->{status};
1021     }
1022    
1023 dpavlin 42
1024 dpavlin 40 =head2 put_doc
1025    
1026 dpavlin 41 Add a document
1027 dpavlin 40
1028 dpavlin 41 $node->put_doc( $document_draft ) or die "can't add document";
1029    
1030     Return true on success or false on failture.
1031    
1032 dpavlin 40 =cut
1033    
1034     sub put_doc {
1035     my $self = shift;
1036     my $doc = shift || return;
1037 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1038 dpavlin 41 $self->shuttle_url( $self->{url} . '/put_doc',
1039     'text/x-estraier-draft',
1040     $doc->dump_draft,
1041     undef
1042     ) == 200;
1043 dpavlin 40 }
1044    
1045 dpavlin 41
1046     =head2 out_doc
1047    
1048     Remove a document
1049    
1050     $node->out_doc( document_id ) or "can't remove document";
1051    
1052     Return true on success or false on failture.
1053    
1054     =cut
1055    
1056     sub out_doc {
1057     my $self = shift;
1058     my $id = shift || return;
1059     return unless ($self->{url});
1060 dpavlin 43 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1061 dpavlin 41 $self->shuttle_url( $self->{url} . '/out_doc',
1062     'application/x-www-form-urlencoded',
1063     "id=$id",
1064     undef
1065     ) == 200;
1066     }
1067    
1068    
1069     =head2 out_doc_by_uri
1070    
1071     Remove a registrated document using it's uri
1072    
1073 dpavlin 45 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1074 dpavlin 41
1075     Return true on success or false on failture.
1076    
1077     =cut
1078    
1079     sub out_doc_by_uri {
1080     my $self = shift;
1081     my $uri = shift || return;
1082     return unless ($self->{url});
1083     $self->shuttle_url( $self->{url} . '/out_doc',
1084     'application/x-www-form-urlencoded',
1085 dpavlin 50 "uri=" . uri_escape($uri),
1086 dpavlin 41 undef
1087     ) == 200;
1088     }
1089    
1090 dpavlin 42
1091     =head2 edit_doc
1092    
1093     Edit attributes of a document
1094    
1095     $node->edit_doc( $document_draft ) or die "can't edit document";
1096    
1097     Return true on success or false on failture.
1098    
1099     =cut
1100    
1101     sub edit_doc {
1102     my $self = shift;
1103     my $doc = shift || return;
1104 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1105 dpavlin 42 $self->shuttle_url( $self->{url} . '/edit_doc',
1106     'text/x-estraier-draft',
1107     $doc->dump_draft,
1108     undef
1109     ) == 200;
1110     }
1111    
1112    
1113 dpavlin 43 =head2 get_doc
1114    
1115     Retreive document
1116    
1117     my $doc = $node->get_doc( document_id ) or die "can't get document";
1118    
1119     Return true on success or false on failture.
1120    
1121     =cut
1122    
1123     sub get_doc {
1124     my $self = shift;
1125     my $id = shift || return;
1126     return $self->_fetch_doc( id => $id );
1127     }
1128    
1129 dpavlin 44
1130 dpavlin 43 =head2 get_doc_by_uri
1131    
1132     Retreive document
1133    
1134 dpavlin 45 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1135 dpavlin 43
1136     Return true on success or false on failture.
1137    
1138     =cut
1139    
1140     sub get_doc_by_uri {
1141     my $self = shift;
1142     my $uri = shift || return;
1143     return $self->_fetch_doc( uri => $uri );
1144     }
1145    
1146 dpavlin 44
1147 dpavlin 49 =head2 get_doc_attr
1148    
1149     Retrieve the value of an atribute from object
1150    
1151     my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1152     die "can't get document attribute";
1153    
1154     =cut
1155    
1156     sub get_doc_attr {
1157     my $self = shift;
1158     my ($id,$name) = @_;
1159     return unless ($id && $name);
1160     return $self->_fetch_doc( id => $id, attr => $name );
1161     }
1162    
1163    
1164     =head2 get_doc_attr_by_uri
1165    
1166     Retrieve the value of an atribute from object
1167    
1168     my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1169     die "can't get document attribute";
1170    
1171     =cut
1172    
1173     sub get_doc_attr_by_uri {
1174     my $self = shift;
1175     my ($uri,$name) = @_;
1176     return unless ($uri && $name);
1177     return $self->_fetch_doc( uri => $uri, attr => $name );
1178     }
1179    
1180    
1181 dpavlin 44 =head2 etch_doc
1182    
1183     Exctract document keywords
1184    
1185     my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1186    
1187     =cut
1188    
1189 dpavlin 49 sub etch_doc {
1190 dpavlin 44 my $self = shift;
1191     my $id = shift || return;
1192     return $self->_fetch_doc( id => $id, etch => 1 );
1193     }
1194    
1195     =head2 etch_doc_by_uri
1196    
1197     Retreive document
1198    
1199 dpavlin 45 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1200 dpavlin 44
1201     Return true on success or false on failture.
1202    
1203     =cut
1204    
1205     sub etch_doc_by_uri {
1206     my $self = shift;
1207     my $uri = shift || return;
1208     return $self->_fetch_doc( uri => $uri, etch => 1 );
1209     }
1210    
1211    
1212 dpavlin 45 =head2 uri_to_id
1213    
1214     Get ID of document specified by URI
1215    
1216     my $id = $node->uri_to_id( 'file:///document/uri/42' );
1217    
1218 dpavlin 103 This method won't croak, even if using C<croak_on_error>.
1219    
1220 dpavlin 45 =cut
1221    
1222     sub uri_to_id {
1223     my $self = shift;
1224     my $uri = shift || return;
1225 dpavlin 103 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1226 dpavlin 45 }
1227    
1228    
1229 dpavlin 43 =head2 _fetch_doc
1230    
1231 dpavlin 44 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1232     C<etch_doc>, C<etch_doc_by_uri>.
1233 dpavlin 43
1234 dpavlin 45 # this will decode received draft into Search::Estraier::Document object
1235     my $doc = $node->_fetch_doc( id => 42 );
1236     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1237 dpavlin 43
1238 dpavlin 45 # to extract keywords, add etch
1239     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1240     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1241    
1242 dpavlin 49 # to get document attrubute add attr
1243     my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1244     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1245    
1246 dpavlin 45 # more general form which allows implementation of
1247     # uri_to_id
1248     my $id = $node->_fetch_doc(
1249     uri => 'file:///document/uri/42',
1250     path => '/uri_to_id',
1251     chomp_resbody => 1
1252     );
1253    
1254 dpavlin 43 =cut
1255    
1256     sub _fetch_doc {
1257     my $self = shift;
1258 dpavlin 44 my $a = {@_};
1259     return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1260    
1261     my ($arg, $resbody);
1262    
1263 dpavlin 45 my $path = $a->{path} || '/get_doc';
1264 dpavlin 44 $path = '/etch_doc' if ($a->{etch});
1265    
1266     if ($a->{id}) {
1267     croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1268     $arg = 'id=' . $a->{id};
1269     } elsif ($a->{uri}) {
1270 dpavlin 50 $arg = 'uri=' . uri_escape($a->{uri});
1271 dpavlin 44 } else {
1272     confess "unhandled argument. Need id or uri.";
1273 dpavlin 43 }
1274 dpavlin 44
1275 dpavlin 49 if ($a->{attr}) {
1276     $path = '/get_doc_attr';
1277     $arg .= '&attr=' . uri_escape($a->{attr});
1278     $a->{chomp_resbody} = 1;
1279     }
1280    
1281 dpavlin 44 my $rv = $self->shuttle_url( $self->{url} . $path,
1282 dpavlin 43 'application/x-www-form-urlencoded',
1283 dpavlin 44 $arg,
1284 dpavlin 45 \$resbody,
1285 dpavlin 103 $a->{croak_on_error},
1286 dpavlin 43 );
1287 dpavlin 44
1288 dpavlin 43 return if ($rv != 200);
1289 dpavlin 44
1290     if ($a->{etch}) {
1291     $self->{kwords} = {};
1292     return +{} unless ($resbody);
1293     foreach my $l (split(/\n/, $resbody)) {
1294     my ($k,$v) = split(/\t/, $l, 2);
1295     $self->{kwords}->{$k} = $v if ($v);
1296     }
1297     return $self->{kwords};
1298 dpavlin 45 } elsif ($a->{chomp_resbody}) {
1299     return unless (defined($resbody));
1300     chomp($resbody);
1301     return $resbody;
1302 dpavlin 44 } else {
1303     return new Search::Estraier::Document($resbody);
1304     }
1305 dpavlin 43 }
1306    
1307    
1308 dpavlin 48 =head2 name
1309 dpavlin 43
1310 dpavlin 48 my $node_name = $node->name;
1311 dpavlin 43
1312 dpavlin 48 =cut
1313    
1314     sub name {
1315     my $self = shift;
1316 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1317     return $self->{inform}->{name};
1318 dpavlin 48 }
1319    
1320    
1321     =head2 label
1322    
1323     my $node_label = $node->label;
1324    
1325     =cut
1326    
1327     sub label {
1328     my $self = shift;
1329 dpavlin 111 $self->_set_info unless ($self->{inform}->{label});
1330     return $self->{inform}->{label};
1331 dpavlin 48 }
1332    
1333    
1334     =head2 doc_num
1335    
1336     my $documents_in_node = $node->doc_num;
1337    
1338     =cut
1339    
1340     sub doc_num {
1341     my $self = shift;
1342 dpavlin 111 $self->_set_info if ($self->{inform}->{dnum} < 0);
1343     return $self->{inform}->{dnum};
1344 dpavlin 48 }
1345    
1346    
1347     =head2 word_num
1348    
1349     my $words_in_node = $node->word_num;
1350    
1351     =cut
1352    
1353     sub word_num {
1354     my $self = shift;
1355 dpavlin 111 $self->_set_info if ($self->{inform}->{wnum} < 0);
1356     return $self->{inform}->{wnum};
1357 dpavlin 48 }
1358    
1359    
1360     =head2 size
1361    
1362     my $node_size = $node->size;
1363    
1364     =cut
1365    
1366     sub size {
1367     my $self = shift;
1368 dpavlin 111 $self->_set_info if ($self->{inform}->{size} < 0);
1369     return $self->{inform}->{size};
1370 dpavlin 48 }
1371    
1372    
1373 dpavlin 51 =head2 search
1374 dpavlin 48
1375 dpavlin 51 Search documents which match condition
1376    
1377     my $nres = $node->search( $cond, $depth );
1378    
1379     C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1380     depth for meta search.
1381    
1382     Function results C<Search::Estraier::NodeResult> object.
1383    
1384     =cut
1385    
1386     sub search {
1387     my $self = shift;
1388     my ($cond, $depth) = @_;
1389     return unless ($cond && defined($depth) && $self->{url});
1390     croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1391     croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1392    
1393 dpavlin 52 my $resbody;
1394 dpavlin 51
1395 dpavlin 52 my $rv = $self->shuttle_url( $self->{url} . '/search',
1396 dpavlin 53 'application/x-www-form-urlencoded',
1397 dpavlin 61 $self->cond_to_query( $cond, $depth ),
1398 dpavlin 52 \$resbody,
1399     );
1400     return if ($rv != 200);
1401    
1402 dpavlin 126 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1403     my $hintsText = splice @records, 0, 2; # starts with empty record
1404     my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1405    
1406     # process records
1407 dpavlin 128 my $docs = [];
1408 dpavlin 126 foreach my $record (@records)
1409     {
1410     # split into keys and snippets
1411     my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1412    
1413     # create document hash
1414     my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1415     $doc->{'@keywords'} = $doc->{keywords};
1416     ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1417     $doc->{snippet} = $snippet;
1418    
1419     push @$docs, new Search::Estraier::ResultDocument(
1420     attrs => $doc,
1421     uri => $doc->{'@uri'},
1422     snippet => $snippet,
1423     keywords => $doc->{'keywords'},
1424     );
1425     }
1426    
1427     return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1428     }
1429    
1430    
1431 dpavlin 51 =head2 cond_to_query
1432    
1433 dpavlin 55 Return URI encoded string generated from Search::Estraier::Condition
1434    
1435 dpavlin 61 my $args = $node->cond_to_query( $cond, $depth );
1436 dpavlin 51
1437     =cut
1438    
1439     sub cond_to_query {
1440     my $self = shift;
1441    
1442     my $cond = shift || return;
1443     croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1444 dpavlin 61 my $depth = shift;
1445 dpavlin 51
1446     my @args;
1447    
1448     if (my $phrase = $cond->phrase) {
1449     push @args, 'phrase=' . uri_escape($phrase);
1450     }
1451    
1452     if (my @attrs = $cond->attrs) {
1453     for my $i ( 0 .. $#attrs ) {
1454 dpavlin 63 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1455 dpavlin 51 }
1456     }
1457    
1458     if (my $order = $cond->order) {
1459     push @args, 'order=' . uri_escape($order);
1460     }
1461    
1462     if (my $max = $cond->max) {
1463     push @args, 'max=' . $max;
1464     } else {
1465     push @args, 'max=' . (1 << 30);
1466     }
1467    
1468     if (my $options = $cond->options) {
1469     push @args, 'options=' . $options;
1470     }
1471    
1472 dpavlin 61 push @args, 'depth=' . $depth if ($depth);
1473 dpavlin 51 push @args, 'wwidth=' . $self->{wwidth};
1474     push @args, 'hwidth=' . $self->{hwidth};
1475     push @args, 'awidth=' . $self->{awidth};
1476 dpavlin 122 push @args, 'skip=' . $self->{skip} if ($self->{skip});
1477 dpavlin 51
1478     return join('&', @args);
1479     }
1480    
1481    
1482 dpavlin 33 =head2 shuttle_url
1483 dpavlin 32
1484 dpavlin 68 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1485 dpavlin 33 master.
1486 dpavlin 2
1487 dpavlin 52 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1488 dpavlin 2
1489 dpavlin 33 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1490     body will be saved within object.
1491 dpavlin 2
1492     =cut
1493    
1494 dpavlin 59 use LWP::UserAgent;
1495    
1496 dpavlin 33 sub shuttle_url {
1497     my $self = shift;
1498 dpavlin 2
1499 dpavlin 103 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1500 dpavlin 2
1501 dpavlin 103 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1502    
1503 dpavlin 40 $self->{status} = -1;
1504 dpavlin 33
1505 dpavlin 41 warn "## $url\n" if ($self->{debug});
1506 dpavlin 36
1507 dpavlin 33 $url = new URI($url);
1508 dpavlin 37 if (
1509     !$url || !$url->scheme || !$url->scheme eq 'http' ||
1510     !$url->host || !$url->port || $url->port < 1
1511     ) {
1512     carp "can't parse $url\n";
1513     return -1;
1514     }
1515 dpavlin 33
1516 dpavlin 59 my $ua = LWP::UserAgent->new;
1517     $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1518 dpavlin 33
1519 dpavlin 59 my $req;
1520 dpavlin 37 if ($reqbody) {
1521 dpavlin 59 $req = HTTP::Request->new(POST => $url);
1522 dpavlin 37 } else {
1523 dpavlin 59 $req = HTTP::Request->new(GET => $url);
1524 dpavlin 37 }
1525    
1526 dpavlin 59 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1527     $req->headers->header( 'Connection', 'close' );
1528 dpavlin 77 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1529 dpavlin 59 $req->content_type( $content_type );
1530 dpavlin 37
1531 dpavlin 59 warn $req->headers->as_string,"\n" if ($self->{debug});
1532 dpavlin 2
1533 dpavlin 37 if ($reqbody) {
1534 dpavlin 41 warn "$reqbody\n" if ($self->{debug});
1535 dpavlin 59 $req->content( $reqbody );
1536 dpavlin 33 }
1537 dpavlin 2
1538 dpavlin 59 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1539 dpavlin 2
1540 dpavlin 59 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1541 dpavlin 2
1542 dpavlin 76 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1543    
1544 dpavlin 78 if (! $res->is_success) {
1545 dpavlin 103 if ($croak_on_error) {
1546 dpavlin 78 croak("can't get $url: ",$res->status_line);
1547     } else {
1548     return -1;
1549     }
1550     }
1551 dpavlin 2
1552 dpavlin 59 $$resbody .= $res->content;
1553    
1554 dpavlin 40 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1555 dpavlin 39
1556 dpavlin 40 return $self->{status};
1557 dpavlin 2 }
1558    
1559 dpavlin 48
1560 dpavlin 55 =head2 set_snippet_width
1561 dpavlin 48
1562 dpavlin 55 Set width of snippets in results
1563    
1564     $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1565    
1566     C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1567     is not sent with results. If it is negative, whole document text is sent instead of snippet.
1568    
1569     C<$hwidth> specified width of strings from beginning of string. Default
1570     value is C<96>. Negative or zero value keep previous value.
1571    
1572     C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1573     If negative of zero value is provided previous value is kept unchanged.
1574    
1575     =cut
1576    
1577     sub set_snippet_width {
1578     my $self = shift;
1579    
1580     my ($wwidth, $hwidth, $awidth) = @_;
1581     $self->{wwidth} = $wwidth;
1582     $self->{hwidth} = $hwidth if ($hwidth >= 0);
1583     $self->{awidth} = $awidth if ($awidth >= 0);
1584     }
1585    
1586    
1587 dpavlin 56 =head2 set_user
1588 dpavlin 55
1589 dpavlin 56 Manage users of node
1590    
1591     $node->set_user( 'name', $mode );
1592    
1593     C<$mode> can be one of:
1594    
1595     =over 4
1596    
1597     =item 0
1598    
1599     delete account
1600    
1601     =item 1
1602    
1603     set administrative right for user
1604    
1605     =item 2
1606    
1607     set user account as guest
1608    
1609     =back
1610    
1611     Return true on success, otherwise false.
1612    
1613     =cut
1614    
1615     sub set_user {
1616     my $self = shift;
1617     my ($name, $mode) = @_;
1618    
1619     return unless ($self->{url});
1620     croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1621    
1622     $self->shuttle_url( $self->{url} . '/_set_user',
1623     'text/plain',
1624     'name=' . uri_escape($name) . '&mode=' . $mode,
1625     undef
1626     ) == 200;
1627     }
1628    
1629    
1630 dpavlin 57 =head2 set_link
1631    
1632     Manage node links
1633    
1634     $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1635    
1636     If C<$credit> is negative, link is removed.
1637    
1638     =cut
1639    
1640     sub set_link {
1641     my $self = shift;
1642     my ($url, $label, $credit) = @_;
1643    
1644     return unless ($self->{url});
1645     croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1646    
1647     my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1648     $reqbody .= '&credit=' . $credit if ($credit > 0);
1649    
1650 dpavlin 107 if ($self->shuttle_url( $self->{url} . '/_set_link',
1651 dpavlin 71 'application/x-www-form-urlencoded',
1652 dpavlin 57 $reqbody,
1653     undef
1654 dpavlin 107 ) == 200) {
1655     # refresh node info after adding link
1656     $self->_set_info;
1657     return 1;
1658     }
1659 dpavlin 57 }
1660    
1661 dpavlin 107 =head2 admins
1662 dpavlin 57
1663 dpavlin 107 my @admins = @{ $node->admins };
1664    
1665     Return array of users with admin rights on node
1666    
1667     =cut
1668    
1669     sub admins {
1670     my $self = shift;
1671 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1672     return $self->{inform}->{admins};
1673 dpavlin 107 }
1674    
1675     =head2 guests
1676    
1677     my @guests = @{ $node->guests };
1678    
1679     Return array of users with guest rights on node
1680    
1681     =cut
1682    
1683     sub guests {
1684     my $self = shift;
1685 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1686     return $self->{inform}->{guests};
1687 dpavlin 107 }
1688    
1689     =head2 links
1690    
1691     my $links = @{ $node->links };
1692    
1693     Return array of links for this node
1694    
1695     =cut
1696    
1697     sub links {
1698     my $self = shift;
1699 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1700     return $self->{inform}->{links};
1701 dpavlin 107 }
1702    
1703 dpavlin 134 =head2 master
1704 dpavlin 107
1705 dpavlin 134 Set actions on Hyper Estraier node master (C<estmaster> process)
1706    
1707     $node->master(
1708     action => 'sync'
1709     );
1710    
1711     All available actions are documented in
1712     L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1713    
1714     =cut
1715    
1716     my $estmaster_rest = {
1717     shutdown => {
1718     status => 202,
1719     },
1720     sync => {
1721     status => 202,
1722     },
1723     backup => {
1724     status => 202,
1725     },
1726     userlist => {
1727     status => 200,
1728     returns => qw/name passwd flags fname misc/,
1729     },
1730     useradd => {
1731     required => qw/name passwd flags/,
1732     optional => qw/fname misc/,
1733     status => 200,
1734     },
1735     userdel => {
1736     required => qw/name/,
1737     status => 200,
1738     },
1739     nodelist => {
1740     status => 200,
1741     returns => qw/name label doc_num word_num size/,
1742     },
1743     nodeadd => {
1744     required => qw/name/,
1745     optional => qw/label/,
1746     status => 200,
1747     },
1748     nodedel => {
1749     required => qw/name/,
1750     status => 200,
1751     },
1752     nodeclr => {
1753     required => qw/name/,
1754     status => 200,
1755     },
1756     nodertt => {
1757     status => 200,
1758     },
1759     };
1760    
1761     sub master {
1762     my $self = shift;
1763    
1764     my $args = {@_};
1765    
1766     # have action?
1767     my $action = $args->{action} || croak "need action, available: ",
1768     join(", ",keys %{ $estmaster_rest });
1769    
1770     # check if action is valid
1771     my $rest = $estmaster_rest->{$action};
1772     croak "action '$action' is not supported, available actions: ",
1773     join(", ",keys %{ $estmaster_rest }) unless ($rest);
1774    
1775     croak "BUG: action '$action' needs return status" unless ($rest->{status});
1776    
1777     my @args;
1778    
1779     if ($rest->{required} || $rest->{optional}) {
1780    
1781     map {
1782     croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1783     push @args, $_ . '=' . uri_escape( $args->{$_} );
1784     } ( keys %{ $rest->{required} } );
1785    
1786     map {
1787     push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1788     } ( keys %{ $rest->{optional} } );
1789    
1790     }
1791    
1792     my $uri = new URI( $self->{url} );
1793    
1794     my $resbody;
1795    
1796     if ($self->shuttle_url(
1797     'http://' . $uri->host_port . '/master?action=' . $action ,
1798     'application/x-www-form-urlencoded',
1799     join('&', @args),
1800     \$resbody,
1801     1,
1802     ) == $rest->{status}) {
1803     return 0E0 unless ($rest->{returns});
1804    
1805     if (wantarray) {
1806    
1807     my @results;
1808    
1809     foreach my $line ( split(/[\r\n]/,$resbody) ) {
1810     my @e = split(/\t/, $line);
1811     my $row;
1812     map { $row->{$_} = shift @e; } @{ $rest->{returns} };
1813     push @results, $row;
1814     }
1815    
1816     return @results;
1817     } else {
1818    
1819     carp "calling master action '$action', but not expecting array back, returning whole body";
1820     return $resbody;
1821     }
1822     }
1823     }
1824    
1825 dpavlin 55 =head1 PRIVATE METHODS
1826    
1827     You could call those directly, but you don't have to. I hope.
1828    
1829     =head2 _set_info
1830    
1831 dpavlin 48 Set information for node
1832    
1833 dpavlin 55 $node->_set_info;
1834 dpavlin 48
1835     =cut
1836    
1837 dpavlin 55 sub _set_info {
1838 dpavlin 48 my $self = shift;
1839    
1840     $self->{status} = -1;
1841     return unless ($self->{url});
1842    
1843     my $resbody;
1844     my $rv = $self->shuttle_url( $self->{url} . '/inform',
1845     'text/plain',
1846     undef,
1847     \$resbody,
1848     );
1849    
1850     return if ($rv != 200 || !$resbody);
1851    
1852 dpavlin 107 my @lines = split(/[\r\n]/,$resbody);
1853 dpavlin 48
1854 dpavlin 111 $self->{inform} = {};
1855    
1856     ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
1857     $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
1858    
1859 dpavlin 107 return $resbody unless (@lines);
1860    
1861     shift @lines;
1862    
1863     while(my $admin = shift @lines) {
1864 dpavlin 111 push @{$self->{inform}->{admins}}, $admin;
1865 dpavlin 107 }
1866 dpavlin 111
1867 dpavlin 107 while(my $guest = shift @lines) {
1868 dpavlin 111 push @{$self->{inform}->{guests}}, $guest;
1869 dpavlin 107 }
1870    
1871     while(my $link = shift @lines) {
1872 dpavlin 111 push @{$self->{inform}->{links}}, $link;
1873 dpavlin 107 }
1874    
1875     return $resbody;
1876    
1877 dpavlin 48 }
1878    
1879 dpavlin 2 ###
1880    
1881     =head1 EXPORT
1882    
1883     Nothing.
1884    
1885     =head1 SEE ALSO
1886    
1887     L<http://hyperestraier.sourceforge.net/>
1888    
1889     Hyper Estraier Ruby interface on which this module is based.
1890    
1891     =head1 AUTHOR
1892    
1893     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
1894    
1895 dpavlin 128 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
1896 dpavlin 2
1897     =head1 COPYRIGHT AND LICENSE
1898    
1899 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
1900 dpavlin 2
1901     This library is free software; you can redistribute it and/or modify
1902     it under the GPL v2 or later.
1903    
1904     =cut
1905    
1906     1;

  ViewVC Help
Powered by ViewVC 1.1.26