/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 142 - (hide annotations)
Wed May 10 14:57:50 2006 UTC (17 years, 11 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 34738 byte(s)
getting ready for 0.06
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7 dpavlin 142 our $VERSION = '0.06';
8 dpavlin 2
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15 dpavlin 68 =head2 Simple indexer
16 dpavlin 2
17 dpavlin 68 use Search::Estraier;
18    
19     # create and configure node
20 dpavlin 102 my $node = new Search::Estraier::Node(
21     url => 'http://localhost:1978/node/test',
22     user => 'admin',
23 dpavlin 139 passwd => 'admin',
24     create => 1,
25     label => 'Label for node',
26     croak_on_error => 1,
27 dpavlin 102 );
28 dpavlin 68
29     # create document
30     my $doc = new Search::Estraier::Document;
31    
32     # add attributes
33     $doc->add_attr('@uri', "http://estraier.gov/example.txt");
34     $doc->add_attr('@title', "Over the Rainbow");
35    
36     # add body text to document
37     $doc->add_text("Somewhere over the rainbow. Way up high.");
38     $doc->add_text("There's a land that I heard of once in a lullaby.");
39    
40 dpavlin 102 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
41 dpavlin 68
42     =head2 Simple searcher
43    
44     use Search::Estraier;
45    
46     # create and configure node
47 dpavlin 102 my $node = new Search::Estraier::Node(
48     url => 'http://localhost:1978/node/test',
49     user => 'admin',
50     passwd => 'admin',
51     croak_on_error => 1,
52     );
53 dpavlin 68
54     # create condition
55     my $cond = new Search::Estraier::Condition;
56    
57     # set search phrase
58     $cond->set_phrase("rainbow AND lullaby");
59    
60     my $nres = $node->search($cond, 0);
61 dpavlin 100
62 dpavlin 68 if (defined($nres)) {
63 dpavlin 102 print "Got ", $nres->hits, " results\n";
64    
65 dpavlin 68 # for each document in results
66     for my $i ( 0 ... $nres->doc_num - 1 ) {
67     # get result document
68     my $rdoc = $nres->get_doc($i);
69     # display attribte
70     print "URI: ", $rdoc->attr('@uri'),"\n";
71     print "Title: ", $rdoc->attr('@title'),"\n";
72     print $rdoc->snippet,"\n";
73     }
74     } else {
75     die "error: ", $node->status,"\n";
76     }
77    
78 dpavlin 2 =head1 DESCRIPTION
79    
80     This module is implementation of node API of Hyper Estraier. Since it's
81     perl-only module with dependencies only on standard perl modules, it will
82     run on all platforms on which perl runs. It doesn't require compilation
83     or Hyper Estraier development files on target machine.
84    
85     It is implemented as multiple packages which closly resamble Ruby
86     implementation. It also includes methods to manage nodes.
87    
88 dpavlin 68 There are few examples in C<scripts> directory of this distribution.
89    
90 dpavlin 2 =cut
91    
92 dpavlin 42 =head1 Inheritable common methods
93    
94     This methods should really move somewhere else.
95    
96 dpavlin 15 =head2 _s
97    
98     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
99    
100     my $text = $self->_s(" this is a text ");
101     $text = 'this is a text';
102    
103     =cut
104    
105     sub _s {
106 dpavlin 96 my $text = $_[1];
107     return unless defined($text);
108 dpavlin 15 $text =~ s/\s\s+/ /gs;
109     $text =~ s/^\s+//;
110     $text =~ s/\s+$//;
111     return $text;
112     }
113    
114 dpavlin 2 package Search::Estraier::Document;
115    
116 dpavlin 9 use Carp qw/croak confess/;
117 dpavlin 7
118 dpavlin 15 use Search::Estraier;
119     our @ISA = qw/Search::Estraier/;
120    
121 dpavlin 2 =head1 Search::Estraier::Document
122    
123 dpavlin 14 This class implements Document which is collection of attributes
124     (key=value), vectors (also key value) display text and hidden text.
125    
126 dpavlin 42
127 dpavlin 2 =head2 new
128    
129 dpavlin 14 Create new document, empty or from draft.
130    
131 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
132 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
133 dpavlin 2
134     =cut
135    
136     sub new {
137     my $class = shift;
138 dpavlin 14 my $self = {};
139 dpavlin 2 bless($self, $class);
140    
141 dpavlin 6 $self->{id} = -1;
142    
143 dpavlin 14 my $draft = shift;
144    
145     if ($draft) {
146     my $in_text = 0;
147     foreach my $line (split(/\n/, $draft)) {
148    
149     if ($in_text) {
150     if ($line =~ /^\t/) {
151     push @{ $self->{htexts} }, substr($line, 1);
152     } else {
153     push @{ $self->{dtexts} }, $line;
154     }
155     next;
156     }
157    
158     if ($line =~ m/^%VECTOR\t(.+)$/) {
159     my @fields = split(/\t/, $1);
160     for my $i ( 0 .. ($#fields - 1) ) {
161     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
162     $i++;
163     }
164     next;
165     } elsif ($line =~ m/^%/) {
166     # What is this? comment?
167     #warn "$line\n";
168     next;
169     } elsif ($line =~ m/^$/) {
170     $in_text = 1;
171     next;
172 dpavlin 81 } elsif ($line =~ m/^(.+)=(.*)$/) {
173 dpavlin 14 $self->{attrs}->{ $1 } = $2;
174     next;
175     }
176    
177 dpavlin 81 warn "draft ignored: '$line'\n";
178 dpavlin 14 }
179     }
180    
181 dpavlin 2 $self ? return $self : return undef;
182     }
183    
184 dpavlin 4
185 dpavlin 2 =head2 add_attr
186    
187 dpavlin 6 Add an attribute.
188    
189 dpavlin 2 $doc->add_attr( name => 'value' );
190    
191 dpavlin 9 Delete attribute using
192 dpavlin 5
193     $doc->add_attr( name => undef );
194    
195 dpavlin 2 =cut
196    
197     sub add_attr {
198     my $self = shift;
199     my $attrs = {@_};
200    
201     while (my ($name, $value) = each %{ $attrs }) {
202 dpavlin 9 if (! defined($value)) {
203 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
204 dpavlin 9 } else {
205 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
206 dpavlin 9 }
207 dpavlin 2 }
208 dpavlin 8
209     return 1;
210 dpavlin 2 }
211    
212 dpavlin 5
213     =head2 add_text
214    
215 dpavlin 6 Add a sentence of text.
216    
217 dpavlin 5 $doc->add_text('this is example text to display');
218    
219     =cut
220    
221     sub add_text {
222     my $self = shift;
223     my $text = shift;
224     return unless defined($text);
225    
226 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
227 dpavlin 5 }
228    
229    
230     =head2 add_hidden_text
231    
232 dpavlin 6 Add a hidden sentence.
233    
234 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
235    
236     =cut
237    
238     sub add_hidden_text {
239     my $self = shift;
240     my $text = shift;
241     return unless defined($text);
242    
243 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
244 dpavlin 5 }
245    
246 dpavlin 42
247 dpavlin 6 =head2 id
248    
249     Get the ID number of document. If the object has never been registred, C<-1> is returned.
250    
251     print $doc->id;
252    
253     =cut
254    
255     sub id {
256     my $self = shift;
257     return $self->{id};
258     }
259    
260 dpavlin 42
261 dpavlin 7 =head2 attr_names
262    
263 dpavlin 9 Returns array with attribute names from document object.
264 dpavlin 7
265     my @attrs = $doc->attr_names;
266    
267     =cut
268    
269     sub attr_names {
270     my $self = shift;
271 dpavlin 60 return unless ($self->{attrs});
272     #croak "attr_names return array, not scalar" if (! wantarray);
273 dpavlin 7 return sort keys %{ $self->{attrs} };
274     }
275    
276 dpavlin 8
277     =head2 attr
278    
279 dpavlin 9 Returns value of an attribute.
280 dpavlin 8
281     my $value = $doc->attr( 'attribute' );
282    
283     =cut
284    
285     sub attr {
286     my $self = shift;
287     my $name = shift;
288 dpavlin 60 return unless (defined($name) && $self->{attrs});
289     return $self->{attrs}->{ $name };
290 dpavlin 8 }
291    
292 dpavlin 9
293     =head2 texts
294    
295     Returns array with text sentences.
296    
297     my @texts = $doc->texts;
298    
299     =cut
300    
301     sub texts {
302     my $self = shift;
303 dpavlin 60 #confess "texts return array, not scalar" if (! wantarray);
304     return @{ $self->{dtexts} } if ($self->{dtexts});
305 dpavlin 9 }
306    
307 dpavlin 42
308 dpavlin 12 =head2 cat_texts
309    
310     Return whole text as single scalar.
311    
312     my $text = $doc->cat_texts;
313    
314     =cut
315    
316     sub cat_texts {
317     my $self = shift;
318 dpavlin 60 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
319 dpavlin 12 }
320    
321 dpavlin 42
322 dpavlin 5 =head2 dump_draft
323    
324 dpavlin 13 Dump draft data from document object.
325    
326 dpavlin 5 print $doc->dump_draft;
327    
328     =cut
329    
330     sub dump_draft {
331 dpavlin 13 my $self = shift;
332     my $draft;
333    
334     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
335 dpavlin 97 next unless defined(my $v = $self->{attrs}->{$attr_name});
336 dpavlin 93 $draft .= $attr_name . '=' . $v . "\n";
337 dpavlin 13 }
338    
339     if ($self->{kwords}) {
340     $draft .= '%%VECTOR';
341     while (my ($key, $value) = each %{ $self->{kwords} }) {
342     $draft .= "\t$key\t$value";
343     }
344     $draft .= "\n";
345     }
346    
347     $draft .= "\n";
348    
349 dpavlin 40 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
350     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
351 dpavlin 13
352     return $draft;
353 dpavlin 5 }
354    
355 dpavlin 42
356 dpavlin 4 =head2 delete
357 dpavlin 2
358 dpavlin 4 Empty document object
359 dpavlin 2
360 dpavlin 4 $doc->delete;
361    
362 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
363     convinience. Document objects which go out of scope will be destroyed
364     automatically.
365    
366 dpavlin 4 =cut
367    
368     sub delete {
369     my $self = shift;
370    
371 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
372 dpavlin 5 delete($self->{$data});
373     }
374 dpavlin 4
375 dpavlin 10 $self->{id} = -1;
376    
377 dpavlin 4 return 1;
378     }
379    
380    
381    
382 dpavlin 15 package Search::Estraier::Condition;
383 dpavlin 4
384 dpavlin 98 use Carp qw/carp confess croak/;
385 dpavlin 16
386 dpavlin 15 use Search::Estraier;
387     our @ISA = qw/Search::Estraier/;
388 dpavlin 4
389 dpavlin 16 =head1 Search::Estraier::Condition
390    
391     =head2 new
392    
393     my $cond = new Search::HyperEstraier::Condition;
394    
395     =cut
396    
397     sub new {
398     my $class = shift;
399     my $self = {};
400     bless($self, $class);
401    
402 dpavlin 19 $self->{max} = -1;
403     $self->{options} = 0;
404    
405 dpavlin 16 $self ? return $self : return undef;
406     }
407    
408 dpavlin 42
409 dpavlin 16 =head2 set_phrase
410    
411     $cond->set_phrase('search phrase');
412    
413     =cut
414    
415     sub set_phrase {
416     my $self = shift;
417     $self->{phrase} = $self->_s( shift );
418     }
419    
420 dpavlin 42
421 dpavlin 16 =head2 add_attr
422    
423     $cond->add_attr('@URI STRINC /~dpavlin/');
424    
425     =cut
426    
427     sub add_attr {
428     my $self = shift;
429     my $attr = shift || return;
430     push @{ $self->{attrs} }, $self->_s( $attr );
431     }
432    
433 dpavlin 42
434 dpavlin 16 =head2 set_order
435    
436     $cond->set_order('@mdate NUMD');
437    
438     =cut
439    
440     sub set_order {
441     my $self = shift;
442     $self->{order} = shift;
443     }
444    
445 dpavlin 42
446 dpavlin 16 =head2 set_max
447    
448     $cond->set_max(42);
449    
450     =cut
451    
452     sub set_max {
453     my $self = shift;
454     my $max = shift;
455 dpavlin 43 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
456 dpavlin 16 $self->{max} = $max;
457     }
458    
459 dpavlin 42
460 dpavlin 16 =head2 set_options
461    
462 dpavlin 98 $cond->set_options( 'SURE' );
463 dpavlin 16
464 dpavlin 98 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
465    
466     Possible options are:
467    
468     =over 8
469    
470     =item SURE
471    
472     check every N-gram
473    
474     =item USUAL
475    
476     check every second N-gram
477    
478     =item FAST
479    
480     check every third N-gram
481    
482     =item AGITO
483    
484     check every fourth N-gram
485    
486     =item NOIDF
487    
488     don't perform TF-IDF tuning
489    
490     =item SIMPLE
491    
492     use simplified query phrase
493    
494     =back
495    
496     Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
497     options;
498    
499     This option changed in version C<0.04> of this module. It's backwards compatibile.
500    
501 dpavlin 16 =cut
502    
503 dpavlin 15 my $options = {
504     SURE => 1 << 0,
505     USUAL => 1 << 1,
506     FAST => 1 << 2,
507     AGITO => 1 << 3,
508     NOIDF => 1 << 4,
509     SIMPLE => 1 << 10,
510     };
511    
512 dpavlin 16 sub set_options {
513     my $self = shift;
514 dpavlin 98 my $opt = 0;
515     foreach my $option (@_) {
516     my $mask;
517     unless ($mask = $options->{$option}) {
518     if ($option eq '1') {
519     next;
520     } else {
521     croak "unknown option $option";
522     }
523     }
524     $opt += $mask;
525     }
526     $self->{options} = $opt;
527 dpavlin 4 }
528    
529 dpavlin 42
530 dpavlin 18 =head2 phrase
531    
532     Return search phrase.
533    
534     print $cond->phrase;
535    
536     =cut
537    
538     sub phrase {
539     my $self = shift;
540     return $self->{phrase};
541     }
542    
543 dpavlin 42
544 dpavlin 19 =head2 order
545 dpavlin 18
546 dpavlin 19 Return search result order.
547    
548     print $cond->order;
549    
550     =cut
551    
552     sub order {
553     my $self = shift;
554     return $self->{order};
555     }
556    
557 dpavlin 42
558 dpavlin 19 =head2 attrs
559    
560     Return search result attrs.
561    
562     my @cond_attrs = $cond->attrs;
563    
564     =cut
565    
566     sub attrs {
567     my $self = shift;
568     #croak "attrs return array, not scalar" if (! wantarray);
569 dpavlin 60 return @{ $self->{attrs} } if ($self->{attrs});
570 dpavlin 19 }
571    
572 dpavlin 42
573 dpavlin 19 =head2 max
574    
575     Return maximum number of results.
576    
577     print $cond->max;
578    
579     C<-1> is returned for unitialized value, C<0> is unlimited.
580    
581     =cut
582    
583     sub max {
584     my $self = shift;
585     return $self->{max};
586     }
587    
588 dpavlin 42
589 dpavlin 19 =head2 options
590    
591     Return options for this condition.
592    
593     print $cond->options;
594    
595     Options are returned in numerical form.
596    
597     =cut
598    
599     sub options {
600     my $self = shift;
601     return $self->{options};
602     }
603    
604    
605 dpavlin 116 =head2 set_skip
606    
607     Set number of skipped documents from beginning of results
608    
609     $cond->set_skip(42);
610    
611     Similar to C<offset> in RDBMS.
612    
613     =cut
614    
615     sub set_skip {
616     my $self = shift;
617     $self->{skip} = shift;
618     }
619    
620     =head2 skip
621    
622     Return skip for this condition.
623    
624     print $cond->skip;
625    
626     =cut
627    
628     sub skip {
629     my $self = shift;
630     return $self->{skip};
631     }
632    
633    
634 dpavlin 20 package Search::Estraier::ResultDocument;
635    
636 dpavlin 24 use Carp qw/croak/;
637 dpavlin 20
638 dpavlin 24 #use Search::Estraier;
639     #our @ISA = qw/Search::Estraier/;
640 dpavlin 20
641     =head1 Search::Estraier::ResultDocument
642    
643     =head2 new
644    
645 dpavlin 23 my $rdoc = new Search::HyperEstraier::ResultDocument(
646 dpavlin 20 uri => 'http://localhost/document/uri/42',
647     attrs => {
648     foo => 1,
649     bar => 2,
650     },
651     snippet => 'this is a text of snippet'
652     keywords => 'this\tare\tkeywords'
653     );
654    
655     =cut
656    
657     sub new {
658     my $class = shift;
659     my $self = {@_};
660     bless($self, $class);
661    
662 dpavlin 62 croak "missing uri for ResultDocument" unless defined($self->{uri});
663 dpavlin 20
664     $self ? return $self : return undef;
665     }
666    
667 dpavlin 42
668 dpavlin 23 =head2 uri
669 dpavlin 20
670 dpavlin 23 Return URI of result document
671 dpavlin 20
672 dpavlin 23 print $rdoc->uri;
673    
674     =cut
675    
676     sub uri {
677     my $self = shift;
678     return $self->{uri};
679     }
680    
681    
682     =head2 attr_names
683    
684     Returns array with attribute names from result document object.
685    
686     my @attrs = $rdoc->attr_names;
687    
688     =cut
689    
690     sub attr_names {
691     my $self = shift;
692     croak "attr_names return array, not scalar" if (! wantarray);
693     return sort keys %{ $self->{attrs} };
694     }
695    
696 dpavlin 42
697 dpavlin 23 =head2 attr
698    
699     Returns value of an attribute.
700    
701     my $value = $rdoc->attr( 'attribute' );
702    
703     =cut
704    
705     sub attr {
706     my $self = shift;
707     my $name = shift || return;
708     return $self->{attrs}->{ $name };
709     }
710    
711 dpavlin 42
712 dpavlin 23 =head2 snippet
713    
714     Return snippet from result document
715    
716     print $rdoc->snippet;
717    
718     =cut
719    
720     sub snippet {
721     my $self = shift;
722     return $self->{snippet};
723     }
724    
725 dpavlin 42
726 dpavlin 23 =head2 keywords
727    
728     Return keywords from result document
729    
730     print $rdoc->keywords;
731    
732     =cut
733    
734     sub keywords {
735     my $self = shift;
736     return $self->{keywords};
737     }
738    
739    
740 dpavlin 25 package Search::Estraier::NodeResult;
741    
742     use Carp qw/croak/;
743    
744     #use Search::Estraier;
745     #our @ISA = qw/Search::Estraier/;
746    
747     =head1 Search::Estraier::NodeResult
748    
749     =head2 new
750    
751     my $res = new Search::HyperEstraier::NodeResult(
752     docs => @array_of_rdocs,
753     hits => %hash_with_hints,
754     );
755    
756     =cut
757    
758     sub new {
759     my $class = shift;
760     my $self = {@_};
761     bless($self, $class);
762    
763     foreach my $f (qw/docs hints/) {
764     croak "missing $f for ResultDocument" unless defined($self->{$f});
765     }
766    
767     $self ? return $self : return undef;
768     }
769    
770 dpavlin 42
771 dpavlin 25 =head2 doc_num
772    
773     Return number of documents
774    
775     print $res->doc_num;
776    
777 dpavlin 100 This will return real number of documents (limited by C<max>).
778     If you want to get total number of hits, see C<hits>.
779    
780 dpavlin 25 =cut
781    
782     sub doc_num {
783     my $self = shift;
784 dpavlin 53 return $#{$self->{docs}} + 1;
785 dpavlin 25 }
786    
787 dpavlin 42
788 dpavlin 25 =head2 get_doc
789    
790     Return single document
791    
792     my $doc = $res->get_doc( 42 );
793    
794     Returns undef if document doesn't exist.
795    
796     =cut
797    
798     sub get_doc {
799     my $self = shift;
800     my $num = shift;
801 dpavlin 43 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
802 dpavlin 25 return undef if ($num < 0 || $num > $self->{docs});
803     return $self->{docs}->[$num];
804     }
805    
806 dpavlin 42
807 dpavlin 25 =head2 hint
808    
809     Return specific hint from results.
810    
811 dpavlin 100 print $res->hint( 'VERSION' );
812 dpavlin 25
813     Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
814     C<TIME>, C<LINK#n>, C<VIEW>.
815    
816     =cut
817    
818     sub hint {
819     my $self = shift;
820     my $key = shift || return;
821     return $self->{hints}->{$key};
822     }
823    
824 dpavlin 108 =head2 hints
825 dpavlin 25
826 dpavlin 91 More perlish version of C<hint>. This one returns hash.
827    
828 dpavlin 100 my %hints = $res->hints;
829 dpavlin 91
830     =cut
831    
832     sub hints {
833     my $self = shift;
834     return $self->{hints};
835     }
836    
837 dpavlin 100 =head2 hits
838    
839     Syntaxtic sugar for total number of hits for this query
840    
841     print $res->hits;
842    
843     It's same as
844    
845     print $res->hint('HIT');
846    
847     but shorter.
848    
849     =cut
850    
851     sub hits {
852     my $self = shift;
853     return $self->{hints}->{'HIT'} || 0;
854     }
855    
856 dpavlin 27 package Search::Estraier::Node;
857    
858 dpavlin 44 use Carp qw/carp croak confess/;
859 dpavlin 33 use URI;
860 dpavlin 36 use MIME::Base64;
861 dpavlin 33 use IO::Socket::INET;
862 dpavlin 49 use URI::Escape qw/uri_escape/;
863 dpavlin 29
864 dpavlin 27 =head1 Search::Estraier::Node
865    
866     =head2 new
867    
868     my $node = new Search::HyperEstraier::Node;
869    
870 dpavlin 65 or optionally with C<url> as parametar
871    
872     my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
873    
874 dpavlin 78 or in more verbose form
875    
876     my $node = new Search::HyperEstraier::Node(
877     url => 'http://localhost:1978/node/test',
878 dpavlin 132 user => 'admin',
879     passwd => 'admin'
880 dpavlin 139 create => 1,
881     label => 'optional node label',
882 dpavlin 78 debug => 1,
883     croak_on_error => 1
884     );
885    
886     with following arguments:
887    
888     =over 4
889    
890     =item url
891    
892     URL to node
893    
894 dpavlin 132 =item user
895    
896     specify username for node server authentication
897    
898     =item passwd
899    
900     password for authentication
901    
902 dpavlin 139 =item create
903    
904     create node if it doesn't exists
905    
906     =item label
907    
908     optional label for new node if C<create> is used
909    
910 dpavlin 78 =item debug
911    
912     dumps a B<lot> of debugging output
913    
914     =item croak_on_error
915    
916     very helpful during development. It will croak on all errors instead of
917     silently returning C<-1> (which is convention of Hyper Estraier API in other
918     languages).
919    
920     =back
921    
922 dpavlin 27 =cut
923    
924     sub new {
925     my $class = shift;
926     my $self = {
927     pxport => -1,
928 dpavlin 33 timeout => 0, # this used to be -1
929 dpavlin 27 wwidth => 480,
930     hwidth => 96,
931     awidth => 96,
932     status => -1,
933     };
934 dpavlin 111
935 dpavlin 27 bless($self, $class);
936    
937 dpavlin 65 if ($#_ == 0) {
938     $self->{url} = shift;
939     } else {
940 dpavlin 78 %$self = ( %$self, @_ );
941    
942 dpavlin 132 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
943    
944 dpavlin 65 warn "## Node debug on\n" if ($self->{debug});
945     }
946 dpavlin 57
947 dpavlin 111 $self->{inform} = {
948     dnum => -1,
949     wnum => -1,
950     size => -1.0,
951     };
952    
953 dpavlin 139 if ($self->{create}) {
954 dpavlin 140 if (! eval { $self->name } || $@) {
955 dpavlin 139 my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
956     croak "can't find node name in '$self->{url}'" unless ($name);
957     my $label = $self->{label} || $name;
958     $self->master(
959     action => 'nodeadd',
960     name => $name,
961     label => $label,
962     ) || croak "can't create node $name ($label)";
963     }
964     }
965    
966 dpavlin 27 $self ? return $self : return undef;
967     }
968    
969 dpavlin 42
970 dpavlin 29 =head2 set_url
971    
972     Specify URL to node server
973    
974     $node->set_url('http://localhost:1978');
975    
976     =cut
977    
978     sub set_url {
979     my $self = shift;
980     $self->{url} = shift;
981     }
982    
983 dpavlin 42
984 dpavlin 29 =head2 set_proxy
985    
986     Specify proxy server to connect to node server
987    
988     $node->set_proxy('proxy.example.com', 8080);
989    
990     =cut
991    
992     sub set_proxy {
993     my $self = shift;
994     my ($host,$port) = @_;
995 dpavlin 43 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
996 dpavlin 29 $self->{pxhost} = $host;
997     $self->{pxport} = $port;
998     }
999    
1000 dpavlin 42
1001 dpavlin 30 =head2 set_timeout
1002    
1003     Specify timeout of connection in seconds
1004    
1005     $node->set_timeout( 15 );
1006    
1007     =cut
1008    
1009     sub set_timeout {
1010     my $self = shift;
1011     my $sec = shift;
1012 dpavlin 43 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
1013 dpavlin 30 $self->{timeout} = $sec;
1014     }
1015    
1016 dpavlin 42
1017 dpavlin 31 =head2 set_auth
1018    
1019     Specify name and password for authentication to node server.
1020    
1021     $node->set_auth('clint','eastwood');
1022    
1023     =cut
1024    
1025     sub set_auth {
1026     my $self = shift;
1027     my ($login,$passwd) = @_;
1028 dpavlin 40 my $basic_auth = encode_base64( "$login:$passwd" );
1029     chomp($basic_auth);
1030     $self->{auth} = $basic_auth;
1031 dpavlin 31 }
1032    
1033 dpavlin 42
1034 dpavlin 32 =head2 status
1035    
1036     Return status code of last request.
1037    
1038 dpavlin 40 print $node->status;
1039 dpavlin 32
1040     C<-1> means connection failure.
1041    
1042     =cut
1043    
1044     sub status {
1045     my $self = shift;
1046     return $self->{status};
1047     }
1048    
1049 dpavlin 42
1050 dpavlin 40 =head2 put_doc
1051    
1052 dpavlin 41 Add a document
1053 dpavlin 40
1054 dpavlin 41 $node->put_doc( $document_draft ) or die "can't add document";
1055    
1056     Return true on success or false on failture.
1057    
1058 dpavlin 40 =cut
1059    
1060     sub put_doc {
1061     my $self = shift;
1062     my $doc = shift || return;
1063 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1064 dpavlin 41 $self->shuttle_url( $self->{url} . '/put_doc',
1065     'text/x-estraier-draft',
1066     $doc->dump_draft,
1067     undef
1068     ) == 200;
1069 dpavlin 40 }
1070    
1071 dpavlin 41
1072     =head2 out_doc
1073    
1074     Remove a document
1075    
1076     $node->out_doc( document_id ) or "can't remove document";
1077    
1078     Return true on success or false on failture.
1079    
1080     =cut
1081    
1082     sub out_doc {
1083     my $self = shift;
1084     my $id = shift || return;
1085     return unless ($self->{url});
1086 dpavlin 43 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1087 dpavlin 41 $self->shuttle_url( $self->{url} . '/out_doc',
1088     'application/x-www-form-urlencoded',
1089     "id=$id",
1090     undef
1091     ) == 200;
1092     }
1093    
1094    
1095     =head2 out_doc_by_uri
1096    
1097     Remove a registrated document using it's uri
1098    
1099 dpavlin 45 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1100 dpavlin 41
1101     Return true on success or false on failture.
1102    
1103     =cut
1104    
1105     sub out_doc_by_uri {
1106     my $self = shift;
1107     my $uri = shift || return;
1108     return unless ($self->{url});
1109     $self->shuttle_url( $self->{url} . '/out_doc',
1110     'application/x-www-form-urlencoded',
1111 dpavlin 50 "uri=" . uri_escape($uri),
1112 dpavlin 41 undef
1113     ) == 200;
1114     }
1115    
1116 dpavlin 42
1117     =head2 edit_doc
1118    
1119     Edit attributes of a document
1120    
1121     $node->edit_doc( $document_draft ) or die "can't edit document";
1122    
1123     Return true on success or false on failture.
1124    
1125     =cut
1126    
1127     sub edit_doc {
1128     my $self = shift;
1129     my $doc = shift || return;
1130 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1131 dpavlin 42 $self->shuttle_url( $self->{url} . '/edit_doc',
1132     'text/x-estraier-draft',
1133     $doc->dump_draft,
1134     undef
1135     ) == 200;
1136     }
1137    
1138    
1139 dpavlin 43 =head2 get_doc
1140    
1141     Retreive document
1142    
1143     my $doc = $node->get_doc( document_id ) or die "can't get document";
1144    
1145     Return true on success or false on failture.
1146    
1147     =cut
1148    
1149     sub get_doc {
1150     my $self = shift;
1151     my $id = shift || return;
1152     return $self->_fetch_doc( id => $id );
1153     }
1154    
1155 dpavlin 44
1156 dpavlin 43 =head2 get_doc_by_uri
1157    
1158     Retreive document
1159    
1160 dpavlin 45 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1161 dpavlin 43
1162     Return true on success or false on failture.
1163    
1164     =cut
1165    
1166     sub get_doc_by_uri {
1167     my $self = shift;
1168     my $uri = shift || return;
1169     return $self->_fetch_doc( uri => $uri );
1170     }
1171    
1172 dpavlin 44
1173 dpavlin 49 =head2 get_doc_attr
1174    
1175     Retrieve the value of an atribute from object
1176    
1177     my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1178     die "can't get document attribute";
1179    
1180     =cut
1181    
1182     sub get_doc_attr {
1183     my $self = shift;
1184     my ($id,$name) = @_;
1185     return unless ($id && $name);
1186     return $self->_fetch_doc( id => $id, attr => $name );
1187     }
1188    
1189    
1190     =head2 get_doc_attr_by_uri
1191    
1192     Retrieve the value of an atribute from object
1193    
1194     my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1195     die "can't get document attribute";
1196    
1197     =cut
1198    
1199     sub get_doc_attr_by_uri {
1200     my $self = shift;
1201     my ($uri,$name) = @_;
1202     return unless ($uri && $name);
1203     return $self->_fetch_doc( uri => $uri, attr => $name );
1204     }
1205    
1206    
1207 dpavlin 44 =head2 etch_doc
1208    
1209     Exctract document keywords
1210    
1211     my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1212    
1213     =cut
1214    
1215 dpavlin 49 sub etch_doc {
1216 dpavlin 44 my $self = shift;
1217     my $id = shift || return;
1218     return $self->_fetch_doc( id => $id, etch => 1 );
1219     }
1220    
1221     =head2 etch_doc_by_uri
1222    
1223     Retreive document
1224    
1225 dpavlin 45 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1226 dpavlin 44
1227     Return true on success or false on failture.
1228    
1229     =cut
1230    
1231     sub etch_doc_by_uri {
1232     my $self = shift;
1233     my $uri = shift || return;
1234     return $self->_fetch_doc( uri => $uri, etch => 1 );
1235     }
1236    
1237    
1238 dpavlin 45 =head2 uri_to_id
1239    
1240     Get ID of document specified by URI
1241    
1242     my $id = $node->uri_to_id( 'file:///document/uri/42' );
1243    
1244 dpavlin 103 This method won't croak, even if using C<croak_on_error>.
1245    
1246 dpavlin 45 =cut
1247    
1248     sub uri_to_id {
1249     my $self = shift;
1250     my $uri = shift || return;
1251 dpavlin 103 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1252 dpavlin 45 }
1253    
1254    
1255 dpavlin 43 =head2 _fetch_doc
1256    
1257 dpavlin 44 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1258     C<etch_doc>, C<etch_doc_by_uri>.
1259 dpavlin 43
1260 dpavlin 45 # this will decode received draft into Search::Estraier::Document object
1261     my $doc = $node->_fetch_doc( id => 42 );
1262     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1263 dpavlin 43
1264 dpavlin 45 # to extract keywords, add etch
1265     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1266     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1267    
1268 dpavlin 49 # to get document attrubute add attr
1269     my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1270     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1271    
1272 dpavlin 45 # more general form which allows implementation of
1273     # uri_to_id
1274     my $id = $node->_fetch_doc(
1275     uri => 'file:///document/uri/42',
1276     path => '/uri_to_id',
1277     chomp_resbody => 1
1278     );
1279    
1280 dpavlin 43 =cut
1281    
1282     sub _fetch_doc {
1283     my $self = shift;
1284 dpavlin 44 my $a = {@_};
1285     return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1286    
1287     my ($arg, $resbody);
1288    
1289 dpavlin 45 my $path = $a->{path} || '/get_doc';
1290 dpavlin 44 $path = '/etch_doc' if ($a->{etch});
1291    
1292     if ($a->{id}) {
1293     croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1294     $arg = 'id=' . $a->{id};
1295     } elsif ($a->{uri}) {
1296 dpavlin 50 $arg = 'uri=' . uri_escape($a->{uri});
1297 dpavlin 44 } else {
1298     confess "unhandled argument. Need id or uri.";
1299 dpavlin 43 }
1300 dpavlin 44
1301 dpavlin 49 if ($a->{attr}) {
1302     $path = '/get_doc_attr';
1303     $arg .= '&attr=' . uri_escape($a->{attr});
1304     $a->{chomp_resbody} = 1;
1305     }
1306    
1307 dpavlin 44 my $rv = $self->shuttle_url( $self->{url} . $path,
1308 dpavlin 43 'application/x-www-form-urlencoded',
1309 dpavlin 44 $arg,
1310 dpavlin 45 \$resbody,
1311 dpavlin 103 $a->{croak_on_error},
1312 dpavlin 43 );
1313 dpavlin 44
1314 dpavlin 43 return if ($rv != 200);
1315 dpavlin 44
1316     if ($a->{etch}) {
1317     $self->{kwords} = {};
1318     return +{} unless ($resbody);
1319     foreach my $l (split(/\n/, $resbody)) {
1320     my ($k,$v) = split(/\t/, $l, 2);
1321     $self->{kwords}->{$k} = $v if ($v);
1322     }
1323     return $self->{kwords};
1324 dpavlin 45 } elsif ($a->{chomp_resbody}) {
1325     return unless (defined($resbody));
1326     chomp($resbody);
1327     return $resbody;
1328 dpavlin 44 } else {
1329     return new Search::Estraier::Document($resbody);
1330     }
1331 dpavlin 43 }
1332    
1333    
1334 dpavlin 48 =head2 name
1335 dpavlin 43
1336 dpavlin 48 my $node_name = $node->name;
1337 dpavlin 43
1338 dpavlin 48 =cut
1339    
1340     sub name {
1341     my $self = shift;
1342 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1343     return $self->{inform}->{name};
1344 dpavlin 48 }
1345    
1346    
1347     =head2 label
1348    
1349     my $node_label = $node->label;
1350    
1351     =cut
1352    
1353     sub label {
1354     my $self = shift;
1355 dpavlin 111 $self->_set_info unless ($self->{inform}->{label});
1356     return $self->{inform}->{label};
1357 dpavlin 48 }
1358    
1359    
1360     =head2 doc_num
1361    
1362     my $documents_in_node = $node->doc_num;
1363    
1364     =cut
1365    
1366     sub doc_num {
1367     my $self = shift;
1368 dpavlin 111 $self->_set_info if ($self->{inform}->{dnum} < 0);
1369     return $self->{inform}->{dnum};
1370 dpavlin 48 }
1371    
1372    
1373     =head2 word_num
1374    
1375     my $words_in_node = $node->word_num;
1376    
1377     =cut
1378    
1379     sub word_num {
1380     my $self = shift;
1381 dpavlin 111 $self->_set_info if ($self->{inform}->{wnum} < 0);
1382     return $self->{inform}->{wnum};
1383 dpavlin 48 }
1384    
1385    
1386     =head2 size
1387    
1388     my $node_size = $node->size;
1389    
1390     =cut
1391    
1392     sub size {
1393     my $self = shift;
1394 dpavlin 111 $self->_set_info if ($self->{inform}->{size} < 0);
1395     return $self->{inform}->{size};
1396 dpavlin 48 }
1397    
1398    
1399 dpavlin 51 =head2 search
1400 dpavlin 48
1401 dpavlin 51 Search documents which match condition
1402    
1403     my $nres = $node->search( $cond, $depth );
1404    
1405     C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1406     depth for meta search.
1407    
1408     Function results C<Search::Estraier::NodeResult> object.
1409    
1410     =cut
1411    
1412     sub search {
1413     my $self = shift;
1414     my ($cond, $depth) = @_;
1415     return unless ($cond && defined($depth) && $self->{url});
1416     croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1417     croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1418    
1419 dpavlin 52 my $resbody;
1420 dpavlin 51
1421 dpavlin 52 my $rv = $self->shuttle_url( $self->{url} . '/search',
1422 dpavlin 53 'application/x-www-form-urlencoded',
1423 dpavlin 61 $self->cond_to_query( $cond, $depth ),
1424 dpavlin 52 \$resbody,
1425     );
1426     return if ($rv != 200);
1427    
1428 dpavlin 126 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1429     my $hintsText = splice @records, 0, 2; # starts with empty record
1430     my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1431    
1432     # process records
1433 dpavlin 128 my $docs = [];
1434 dpavlin 126 foreach my $record (@records)
1435     {
1436     # split into keys and snippets
1437     my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1438    
1439     # create document hash
1440     my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1441     $doc->{'@keywords'} = $doc->{keywords};
1442     ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1443     $doc->{snippet} = $snippet;
1444    
1445     push @$docs, new Search::Estraier::ResultDocument(
1446     attrs => $doc,
1447     uri => $doc->{'@uri'},
1448     snippet => $snippet,
1449     keywords => $doc->{'keywords'},
1450     );
1451     }
1452    
1453     return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1454     }
1455    
1456    
1457 dpavlin 51 =head2 cond_to_query
1458    
1459 dpavlin 55 Return URI encoded string generated from Search::Estraier::Condition
1460    
1461 dpavlin 61 my $args = $node->cond_to_query( $cond, $depth );
1462 dpavlin 51
1463     =cut
1464    
1465     sub cond_to_query {
1466     my $self = shift;
1467    
1468     my $cond = shift || return;
1469     croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1470 dpavlin 61 my $depth = shift;
1471 dpavlin 51
1472     my @args;
1473    
1474     if (my $phrase = $cond->phrase) {
1475     push @args, 'phrase=' . uri_escape($phrase);
1476     }
1477    
1478     if (my @attrs = $cond->attrs) {
1479     for my $i ( 0 .. $#attrs ) {
1480 dpavlin 63 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1481 dpavlin 51 }
1482     }
1483    
1484     if (my $order = $cond->order) {
1485     push @args, 'order=' . uri_escape($order);
1486     }
1487    
1488     if (my $max = $cond->max) {
1489     push @args, 'max=' . $max;
1490     } else {
1491     push @args, 'max=' . (1 << 30);
1492     }
1493    
1494     if (my $options = $cond->options) {
1495     push @args, 'options=' . $options;
1496     }
1497    
1498 dpavlin 61 push @args, 'depth=' . $depth if ($depth);
1499 dpavlin 51 push @args, 'wwidth=' . $self->{wwidth};
1500     push @args, 'hwidth=' . $self->{hwidth};
1501     push @args, 'awidth=' . $self->{awidth};
1502 dpavlin 122 push @args, 'skip=' . $self->{skip} if ($self->{skip});
1503 dpavlin 51
1504     return join('&', @args);
1505     }
1506    
1507    
1508 dpavlin 33 =head2 shuttle_url
1509 dpavlin 32
1510 dpavlin 68 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1511 dpavlin 33 master.
1512 dpavlin 2
1513 dpavlin 52 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1514 dpavlin 2
1515 dpavlin 33 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1516     body will be saved within object.
1517 dpavlin 2
1518     =cut
1519    
1520 dpavlin 59 use LWP::UserAgent;
1521    
1522 dpavlin 33 sub shuttle_url {
1523     my $self = shift;
1524 dpavlin 2
1525 dpavlin 103 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1526 dpavlin 2
1527 dpavlin 103 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1528    
1529 dpavlin 40 $self->{status} = -1;
1530 dpavlin 33
1531 dpavlin 41 warn "## $url\n" if ($self->{debug});
1532 dpavlin 36
1533 dpavlin 33 $url = new URI($url);
1534 dpavlin 37 if (
1535     !$url || !$url->scheme || !$url->scheme eq 'http' ||
1536     !$url->host || !$url->port || $url->port < 1
1537     ) {
1538     carp "can't parse $url\n";
1539     return -1;
1540     }
1541 dpavlin 33
1542 dpavlin 59 my $ua = LWP::UserAgent->new;
1543     $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1544 dpavlin 33
1545 dpavlin 59 my $req;
1546 dpavlin 37 if ($reqbody) {
1547 dpavlin 59 $req = HTTP::Request->new(POST => $url);
1548 dpavlin 37 } else {
1549 dpavlin 59 $req = HTTP::Request->new(GET => $url);
1550 dpavlin 37 }
1551    
1552 dpavlin 59 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1553     $req->headers->header( 'Connection', 'close' );
1554 dpavlin 77 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1555 dpavlin 59 $req->content_type( $content_type );
1556 dpavlin 37
1557 dpavlin 59 warn $req->headers->as_string,"\n" if ($self->{debug});
1558 dpavlin 2
1559 dpavlin 37 if ($reqbody) {
1560 dpavlin 41 warn "$reqbody\n" if ($self->{debug});
1561 dpavlin 59 $req->content( $reqbody );
1562 dpavlin 33 }
1563 dpavlin 2
1564 dpavlin 59 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1565 dpavlin 2
1566 dpavlin 59 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1567 dpavlin 2
1568 dpavlin 76 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1569    
1570 dpavlin 78 if (! $res->is_success) {
1571 dpavlin 103 if ($croak_on_error) {
1572 dpavlin 78 croak("can't get $url: ",$res->status_line);
1573     } else {
1574     return -1;
1575     }
1576     }
1577 dpavlin 2
1578 dpavlin 59 $$resbody .= $res->content;
1579    
1580 dpavlin 40 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1581 dpavlin 39
1582 dpavlin 40 return $self->{status};
1583 dpavlin 2 }
1584    
1585 dpavlin 48
1586 dpavlin 55 =head2 set_snippet_width
1587 dpavlin 48
1588 dpavlin 55 Set width of snippets in results
1589    
1590     $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1591    
1592     C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1593     is not sent with results. If it is negative, whole document text is sent instead of snippet.
1594    
1595     C<$hwidth> specified width of strings from beginning of string. Default
1596     value is C<96>. Negative or zero value keep previous value.
1597    
1598     C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1599     If negative of zero value is provided previous value is kept unchanged.
1600    
1601     =cut
1602    
1603     sub set_snippet_width {
1604     my $self = shift;
1605    
1606     my ($wwidth, $hwidth, $awidth) = @_;
1607     $self->{wwidth} = $wwidth;
1608     $self->{hwidth} = $hwidth if ($hwidth >= 0);
1609     $self->{awidth} = $awidth if ($awidth >= 0);
1610     }
1611    
1612    
1613 dpavlin 56 =head2 set_user
1614 dpavlin 55
1615 dpavlin 56 Manage users of node
1616    
1617     $node->set_user( 'name', $mode );
1618    
1619     C<$mode> can be one of:
1620    
1621     =over 4
1622    
1623     =item 0
1624    
1625     delete account
1626    
1627     =item 1
1628    
1629     set administrative right for user
1630    
1631     =item 2
1632    
1633     set user account as guest
1634    
1635     =back
1636    
1637     Return true on success, otherwise false.
1638    
1639     =cut
1640    
1641     sub set_user {
1642     my $self = shift;
1643     my ($name, $mode) = @_;
1644    
1645     return unless ($self->{url});
1646     croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1647    
1648     $self->shuttle_url( $self->{url} . '/_set_user',
1649     'text/plain',
1650     'name=' . uri_escape($name) . '&mode=' . $mode,
1651     undef
1652     ) == 200;
1653     }
1654    
1655    
1656 dpavlin 57 =head2 set_link
1657    
1658     Manage node links
1659    
1660     $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1661    
1662     If C<$credit> is negative, link is removed.
1663    
1664     =cut
1665    
1666     sub set_link {
1667     my $self = shift;
1668     my ($url, $label, $credit) = @_;
1669    
1670     return unless ($self->{url});
1671     croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1672    
1673     my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1674     $reqbody .= '&credit=' . $credit if ($credit > 0);
1675    
1676 dpavlin 107 if ($self->shuttle_url( $self->{url} . '/_set_link',
1677 dpavlin 71 'application/x-www-form-urlencoded',
1678 dpavlin 57 $reqbody,
1679     undef
1680 dpavlin 107 ) == 200) {
1681     # refresh node info after adding link
1682     $self->_set_info;
1683     return 1;
1684     }
1685 dpavlin 57 }
1686    
1687 dpavlin 107 =head2 admins
1688 dpavlin 57
1689 dpavlin 107 my @admins = @{ $node->admins };
1690    
1691     Return array of users with admin rights on node
1692    
1693     =cut
1694    
1695     sub admins {
1696     my $self = shift;
1697 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1698     return $self->{inform}->{admins};
1699 dpavlin 107 }
1700    
1701     =head2 guests
1702    
1703     my @guests = @{ $node->guests };
1704    
1705     Return array of users with guest rights on node
1706    
1707     =cut
1708    
1709     sub guests {
1710     my $self = shift;
1711 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1712     return $self->{inform}->{guests};
1713 dpavlin 107 }
1714    
1715     =head2 links
1716    
1717     my $links = @{ $node->links };
1718    
1719     Return array of links for this node
1720    
1721     =cut
1722    
1723     sub links {
1724     my $self = shift;
1725 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1726     return $self->{inform}->{links};
1727 dpavlin 107 }
1728    
1729 dpavlin 134 =head2 master
1730 dpavlin 107
1731 dpavlin 134 Set actions on Hyper Estraier node master (C<estmaster> process)
1732    
1733     $node->master(
1734     action => 'sync'
1735     );
1736    
1737     All available actions are documented in
1738     L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1739    
1740     =cut
1741    
1742     my $estmaster_rest = {
1743     shutdown => {
1744     status => 202,
1745     },
1746     sync => {
1747     status => 202,
1748     },
1749     backup => {
1750     status => 202,
1751     },
1752     userlist => {
1753     status => 200,
1754 dpavlin 135 returns => [ qw/name passwd flags fname misc/ ],
1755 dpavlin 134 },
1756     useradd => {
1757 dpavlin 135 required => [ qw/name passwd flags/ ],
1758     optional => [ qw/fname misc/ ],
1759 dpavlin 134 status => 200,
1760     },
1761     userdel => {
1762 dpavlin 135 required => [ qw/name/ ],
1763 dpavlin 134 status => 200,
1764     },
1765     nodelist => {
1766     status => 200,
1767 dpavlin 135 returns => [ qw/name label doc_num word_num size/ ],
1768 dpavlin 134 },
1769     nodeadd => {
1770 dpavlin 135 required => [ qw/name/ ],
1771     optional => [ qw/label/ ],
1772 dpavlin 134 status => 200,
1773     },
1774     nodedel => {
1775 dpavlin 135 required => [ qw/name/ ],
1776 dpavlin 134 status => 200,
1777     },
1778     nodeclr => {
1779 dpavlin 135 required => [ qw/name/ ],
1780 dpavlin 134 status => 200,
1781     },
1782     nodertt => {
1783     status => 200,
1784     },
1785     };
1786    
1787     sub master {
1788     my $self = shift;
1789    
1790     my $args = {@_};
1791    
1792     # have action?
1793     my $action = $args->{action} || croak "need action, available: ",
1794     join(", ",keys %{ $estmaster_rest });
1795    
1796     # check if action is valid
1797     my $rest = $estmaster_rest->{$action};
1798     croak "action '$action' is not supported, available actions: ",
1799     join(", ",keys %{ $estmaster_rest }) unless ($rest);
1800    
1801     croak "BUG: action '$action' needs return status" unless ($rest->{status});
1802    
1803     my @args;
1804    
1805     if ($rest->{required} || $rest->{optional}) {
1806    
1807     map {
1808     croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1809     push @args, $_ . '=' . uri_escape( $args->{$_} );
1810 dpavlin 136 } ( @{ $rest->{required} } );
1811 dpavlin 134
1812     map {
1813     push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1814 dpavlin 136 } ( @{ $rest->{optional} } );
1815 dpavlin 134
1816     }
1817    
1818     my $uri = new URI( $self->{url} );
1819    
1820     my $resbody;
1821    
1822 dpavlin 135 my $status = $self->shuttle_url(
1823 dpavlin 134 'http://' . $uri->host_port . '/master?action=' . $action ,
1824     'application/x-www-form-urlencoded',
1825     join('&', @args),
1826     \$resbody,
1827     1,
1828 dpavlin 135 ) or confess "shuttle_url failed";
1829 dpavlin 134
1830 dpavlin 135 if ($status == $rest->{status}) {
1831     if ($rest->{returns} && wantarray) {
1832 dpavlin 134
1833     my @results;
1834 dpavlin 135 my $fields = $#{$rest->{returns}};
1835 dpavlin 134
1836     foreach my $line ( split(/[\r\n]/,$resbody) ) {
1837 dpavlin 135 my @e = split(/\t/, $line, $fields + 1);
1838 dpavlin 134 my $row;
1839 dpavlin 135 foreach my $i ( 0 .. $fields) {
1840     $row->{ $rest->{returns}->[$i] } = $e[ $i ];
1841     }
1842 dpavlin 134 push @results, $row;
1843     }
1844    
1845     return @results;
1846    
1847 dpavlin 135 } elsif ($resbody) {
1848 dpavlin 136 chomp $resbody;
1849 dpavlin 134 return $resbody;
1850 dpavlin 135 } else {
1851     return 0E0;
1852 dpavlin 134 }
1853     }
1854 dpavlin 135
1855     carp "expected status $rest->{status}, but got $status";
1856     return undef;
1857 dpavlin 134 }
1858    
1859 dpavlin 55 =head1 PRIVATE METHODS
1860    
1861     You could call those directly, but you don't have to. I hope.
1862    
1863     =head2 _set_info
1864    
1865 dpavlin 48 Set information for node
1866    
1867 dpavlin 55 $node->_set_info;
1868 dpavlin 48
1869     =cut
1870    
1871 dpavlin 55 sub _set_info {
1872 dpavlin 48 my $self = shift;
1873    
1874     $self->{status} = -1;
1875     return unless ($self->{url});
1876    
1877     my $resbody;
1878     my $rv = $self->shuttle_url( $self->{url} . '/inform',
1879     'text/plain',
1880     undef,
1881     \$resbody,
1882     );
1883    
1884     return if ($rv != 200 || !$resbody);
1885    
1886 dpavlin 107 my @lines = split(/[\r\n]/,$resbody);
1887 dpavlin 48
1888 dpavlin 111 $self->{inform} = {};
1889    
1890     ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
1891     $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
1892    
1893 dpavlin 107 return $resbody unless (@lines);
1894    
1895     shift @lines;
1896    
1897     while(my $admin = shift @lines) {
1898 dpavlin 111 push @{$self->{inform}->{admins}}, $admin;
1899 dpavlin 107 }
1900 dpavlin 111
1901 dpavlin 107 while(my $guest = shift @lines) {
1902 dpavlin 111 push @{$self->{inform}->{guests}}, $guest;
1903 dpavlin 107 }
1904    
1905     while(my $link = shift @lines) {
1906 dpavlin 111 push @{$self->{inform}->{links}}, $link;
1907 dpavlin 107 }
1908    
1909     return $resbody;
1910    
1911 dpavlin 48 }
1912    
1913 dpavlin 2 ###
1914    
1915     =head1 EXPORT
1916    
1917     Nothing.
1918    
1919     =head1 SEE ALSO
1920    
1921     L<http://hyperestraier.sourceforge.net/>
1922    
1923     Hyper Estraier Ruby interface on which this module is based.
1924    
1925     =head1 AUTHOR
1926    
1927     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
1928    
1929 dpavlin 128 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
1930 dpavlin 2
1931     =head1 COPYRIGHT AND LICENSE
1932    
1933 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
1934 dpavlin 2
1935     This library is free software; you can redistribute it and/or modify
1936     it under the GPL v2 or later.
1937    
1938     =cut
1939    
1940     1;

  ViewVC Help
Powered by ViewVC 1.1.26