/[Search-Estraier]/trunk/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 139 - (hide annotations)
Wed May 10 13:45:08 2006 UTC (17 years, 11 months ago) by dpavlin
File size: 34744 byte(s)
added create and label to new Search::Estraier::Node, so that nodes will be automatically
created if needed.
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7 dpavlin 126 our $VERSION = '0.06_1';
8 dpavlin 2
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15 dpavlin 68 =head2 Simple indexer
16 dpavlin 2
17 dpavlin 68 use Search::Estraier;
18    
19     # create and configure node
20 dpavlin 102 my $node = new Search::Estraier::Node(
21     url => 'http://localhost:1978/node/test',
22     user => 'admin',
23 dpavlin 139 passwd => 'admin',
24     create => 1,
25     label => 'Label for node',
26     croak_on_error => 1,
27 dpavlin 102 );
28 dpavlin 68
29     # create document
30     my $doc = new Search::Estraier::Document;
31    
32     # add attributes
33     $doc->add_attr('@uri', "http://estraier.gov/example.txt");
34     $doc->add_attr('@title', "Over the Rainbow");
35    
36     # add body text to document
37     $doc->add_text("Somewhere over the rainbow. Way up high.");
38     $doc->add_text("There's a land that I heard of once in a lullaby.");
39    
40 dpavlin 102 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
41 dpavlin 68
42     =head2 Simple searcher
43    
44     use Search::Estraier;
45    
46     # create and configure node
47 dpavlin 102 my $node = new Search::Estraier::Node(
48     url => 'http://localhost:1978/node/test',
49     user => 'admin',
50     passwd => 'admin',
51     croak_on_error => 1,
52     );
53 dpavlin 68
54     # create condition
55     my $cond = new Search::Estraier::Condition;
56    
57     # set search phrase
58     $cond->set_phrase("rainbow AND lullaby");
59    
60     my $nres = $node->search($cond, 0);
61 dpavlin 100
62 dpavlin 68 if (defined($nres)) {
63 dpavlin 102 print "Got ", $nres->hits, " results\n";
64    
65 dpavlin 68 # for each document in results
66     for my $i ( 0 ... $nres->doc_num - 1 ) {
67     # get result document
68     my $rdoc = $nres->get_doc($i);
69     # display attribte
70     print "URI: ", $rdoc->attr('@uri'),"\n";
71     print "Title: ", $rdoc->attr('@title'),"\n";
72     print $rdoc->snippet,"\n";
73     }
74     } else {
75     die "error: ", $node->status,"\n";
76     }
77    
78 dpavlin 2 =head1 DESCRIPTION
79    
80     This module is implementation of node API of Hyper Estraier. Since it's
81     perl-only module with dependencies only on standard perl modules, it will
82     run on all platforms on which perl runs. It doesn't require compilation
83     or Hyper Estraier development files on target machine.
84    
85     It is implemented as multiple packages which closly resamble Ruby
86     implementation. It also includes methods to manage nodes.
87    
88 dpavlin 68 There are few examples in C<scripts> directory of this distribution.
89    
90 dpavlin 2 =cut
91    
92 dpavlin 42 =head1 Inheritable common methods
93    
94     This methods should really move somewhere else.
95    
96 dpavlin 15 =head2 _s
97    
98     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
99    
100     my $text = $self->_s(" this is a text ");
101     $text = 'this is a text';
102    
103     =cut
104    
105     sub _s {
106 dpavlin 96 my $text = $_[1];
107     return unless defined($text);
108 dpavlin 15 $text =~ s/\s\s+/ /gs;
109     $text =~ s/^\s+//;
110     $text =~ s/\s+$//;
111     return $text;
112     }
113    
114 dpavlin 2 package Search::Estraier::Document;
115    
116 dpavlin 9 use Carp qw/croak confess/;
117 dpavlin 7
118 dpavlin 15 use Search::Estraier;
119     our @ISA = qw/Search::Estraier/;
120    
121 dpavlin 2 =head1 Search::Estraier::Document
122    
123 dpavlin 14 This class implements Document which is collection of attributes
124     (key=value), vectors (also key value) display text and hidden text.
125    
126 dpavlin 42
127 dpavlin 2 =head2 new
128    
129 dpavlin 14 Create new document, empty or from draft.
130    
131 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
132 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
133 dpavlin 2
134     =cut
135    
136     sub new {
137     my $class = shift;
138 dpavlin 14 my $self = {};
139 dpavlin 2 bless($self, $class);
140    
141 dpavlin 6 $self->{id} = -1;
142    
143 dpavlin 14 my $draft = shift;
144    
145     if ($draft) {
146     my $in_text = 0;
147     foreach my $line (split(/\n/, $draft)) {
148    
149     if ($in_text) {
150     if ($line =~ /^\t/) {
151     push @{ $self->{htexts} }, substr($line, 1);
152     } else {
153     push @{ $self->{dtexts} }, $line;
154     }
155     next;
156     }
157    
158     if ($line =~ m/^%VECTOR\t(.+)$/) {
159     my @fields = split(/\t/, $1);
160     for my $i ( 0 .. ($#fields - 1) ) {
161     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
162     $i++;
163     }
164     next;
165     } elsif ($line =~ m/^%/) {
166     # What is this? comment?
167     #warn "$line\n";
168     next;
169     } elsif ($line =~ m/^$/) {
170     $in_text = 1;
171     next;
172 dpavlin 81 } elsif ($line =~ m/^(.+)=(.*)$/) {
173 dpavlin 14 $self->{attrs}->{ $1 } = $2;
174     next;
175     }
176    
177 dpavlin 81 warn "draft ignored: '$line'\n";
178 dpavlin 14 }
179     }
180    
181 dpavlin 2 $self ? return $self : return undef;
182     }
183    
184 dpavlin 4
185 dpavlin 2 =head2 add_attr
186    
187 dpavlin 6 Add an attribute.
188    
189 dpavlin 2 $doc->add_attr( name => 'value' );
190    
191 dpavlin 9 Delete attribute using
192 dpavlin 5
193     $doc->add_attr( name => undef );
194    
195 dpavlin 2 =cut
196    
197     sub add_attr {
198     my $self = shift;
199     my $attrs = {@_};
200    
201     while (my ($name, $value) = each %{ $attrs }) {
202 dpavlin 9 if (! defined($value)) {
203 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
204 dpavlin 9 } else {
205 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
206 dpavlin 9 }
207 dpavlin 2 }
208 dpavlin 8
209     return 1;
210 dpavlin 2 }
211    
212 dpavlin 5
213     =head2 add_text
214    
215 dpavlin 6 Add a sentence of text.
216    
217 dpavlin 5 $doc->add_text('this is example text to display');
218    
219     =cut
220    
221     sub add_text {
222     my $self = shift;
223     my $text = shift;
224     return unless defined($text);
225    
226 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
227 dpavlin 5 }
228    
229    
230     =head2 add_hidden_text
231    
232 dpavlin 6 Add a hidden sentence.
233    
234 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
235    
236     =cut
237    
238     sub add_hidden_text {
239     my $self = shift;
240     my $text = shift;
241     return unless defined($text);
242    
243 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
244 dpavlin 5 }
245    
246 dpavlin 42
247 dpavlin 6 =head2 id
248    
249     Get the ID number of document. If the object has never been registred, C<-1> is returned.
250    
251     print $doc->id;
252    
253     =cut
254    
255     sub id {
256     my $self = shift;
257     return $self->{id};
258     }
259    
260 dpavlin 42
261 dpavlin 7 =head2 attr_names
262    
263 dpavlin 9 Returns array with attribute names from document object.
264 dpavlin 7
265     my @attrs = $doc->attr_names;
266    
267     =cut
268    
269     sub attr_names {
270     my $self = shift;
271 dpavlin 60 return unless ($self->{attrs});
272     #croak "attr_names return array, not scalar" if (! wantarray);
273 dpavlin 7 return sort keys %{ $self->{attrs} };
274     }
275    
276 dpavlin 8
277     =head2 attr
278    
279 dpavlin 9 Returns value of an attribute.
280 dpavlin 8
281     my $value = $doc->attr( 'attribute' );
282    
283     =cut
284    
285     sub attr {
286     my $self = shift;
287     my $name = shift;
288 dpavlin 60 return unless (defined($name) && $self->{attrs});
289     return $self->{attrs}->{ $name };
290 dpavlin 8 }
291    
292 dpavlin 9
293     =head2 texts
294    
295     Returns array with text sentences.
296    
297     my @texts = $doc->texts;
298    
299     =cut
300    
301     sub texts {
302     my $self = shift;
303 dpavlin 60 #confess "texts return array, not scalar" if (! wantarray);
304     return @{ $self->{dtexts} } if ($self->{dtexts});
305 dpavlin 9 }
306    
307 dpavlin 42
308 dpavlin 12 =head2 cat_texts
309    
310     Return whole text as single scalar.
311    
312     my $text = $doc->cat_texts;
313    
314     =cut
315    
316     sub cat_texts {
317     my $self = shift;
318 dpavlin 60 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
319 dpavlin 12 }
320    
321 dpavlin 42
322 dpavlin 5 =head2 dump_draft
323    
324 dpavlin 13 Dump draft data from document object.
325    
326 dpavlin 5 print $doc->dump_draft;
327    
328     =cut
329    
330     sub dump_draft {
331 dpavlin 13 my $self = shift;
332     my $draft;
333    
334     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
335 dpavlin 97 next unless defined(my $v = $self->{attrs}->{$attr_name});
336 dpavlin 93 $draft .= $attr_name . '=' . $v . "\n";
337 dpavlin 13 }
338    
339     if ($self->{kwords}) {
340     $draft .= '%%VECTOR';
341     while (my ($key, $value) = each %{ $self->{kwords} }) {
342     $draft .= "\t$key\t$value";
343     }
344     $draft .= "\n";
345     }
346    
347     $draft .= "\n";
348    
349 dpavlin 40 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
350     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
351 dpavlin 13
352     return $draft;
353 dpavlin 5 }
354    
355 dpavlin 42
356 dpavlin 4 =head2 delete
357 dpavlin 2
358 dpavlin 4 Empty document object
359 dpavlin 2
360 dpavlin 4 $doc->delete;
361    
362 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
363     convinience. Document objects which go out of scope will be destroyed
364     automatically.
365    
366 dpavlin 4 =cut
367    
368     sub delete {
369     my $self = shift;
370    
371 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
372 dpavlin 5 delete($self->{$data});
373     }
374 dpavlin 4
375 dpavlin 10 $self->{id} = -1;
376    
377 dpavlin 4 return 1;
378     }
379    
380    
381    
382 dpavlin 15 package Search::Estraier::Condition;
383 dpavlin 4
384 dpavlin 98 use Carp qw/carp confess croak/;
385 dpavlin 16
386 dpavlin 15 use Search::Estraier;
387     our @ISA = qw/Search::Estraier/;
388 dpavlin 4
389 dpavlin 16 =head1 Search::Estraier::Condition
390    
391     =head2 new
392    
393     my $cond = new Search::HyperEstraier::Condition;
394    
395     =cut
396    
397     sub new {
398     my $class = shift;
399     my $self = {};
400     bless($self, $class);
401    
402 dpavlin 19 $self->{max} = -1;
403     $self->{options} = 0;
404    
405 dpavlin 16 $self ? return $self : return undef;
406     }
407    
408 dpavlin 42
409 dpavlin 16 =head2 set_phrase
410    
411     $cond->set_phrase('search phrase');
412    
413     =cut
414    
415     sub set_phrase {
416     my $self = shift;
417     $self->{phrase} = $self->_s( shift );
418     }
419    
420 dpavlin 42
421 dpavlin 16 =head2 add_attr
422    
423     $cond->add_attr('@URI STRINC /~dpavlin/');
424    
425     =cut
426    
427     sub add_attr {
428     my $self = shift;
429     my $attr = shift || return;
430     push @{ $self->{attrs} }, $self->_s( $attr );
431     }
432    
433 dpavlin 42
434 dpavlin 16 =head2 set_order
435    
436     $cond->set_order('@mdate NUMD');
437    
438     =cut
439    
440     sub set_order {
441     my $self = shift;
442     $self->{order} = shift;
443     }
444    
445 dpavlin 42
446 dpavlin 16 =head2 set_max
447    
448     $cond->set_max(42);
449    
450     =cut
451    
452     sub set_max {
453     my $self = shift;
454     my $max = shift;
455 dpavlin 43 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
456 dpavlin 16 $self->{max} = $max;
457     }
458    
459 dpavlin 42
460 dpavlin 16 =head2 set_options
461    
462 dpavlin 98 $cond->set_options( 'SURE' );
463 dpavlin 16
464 dpavlin 98 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
465    
466     Possible options are:
467    
468     =over 8
469    
470     =item SURE
471    
472     check every N-gram
473    
474     =item USUAL
475    
476     check every second N-gram
477    
478     =item FAST
479    
480     check every third N-gram
481    
482     =item AGITO
483    
484     check every fourth N-gram
485    
486     =item NOIDF
487    
488     don't perform TF-IDF tuning
489    
490     =item SIMPLE
491    
492     use simplified query phrase
493    
494     =back
495    
496     Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
497     options;
498    
499     This option changed in version C<0.04> of this module. It's backwards compatibile.
500    
501 dpavlin 16 =cut
502    
503 dpavlin 15 my $options = {
504     SURE => 1 << 0,
505     USUAL => 1 << 1,
506     FAST => 1 << 2,
507     AGITO => 1 << 3,
508     NOIDF => 1 << 4,
509     SIMPLE => 1 << 10,
510     };
511    
512 dpavlin 16 sub set_options {
513     my $self = shift;
514 dpavlin 98 my $opt = 0;
515     foreach my $option (@_) {
516     my $mask;
517     unless ($mask = $options->{$option}) {
518     if ($option eq '1') {
519     next;
520     } else {
521     croak "unknown option $option";
522     }
523     }
524     $opt += $mask;
525     }
526     $self->{options} = $opt;
527 dpavlin 4 }
528    
529 dpavlin 42
530 dpavlin 18 =head2 phrase
531    
532     Return search phrase.
533    
534     print $cond->phrase;
535    
536     =cut
537    
538     sub phrase {
539     my $self = shift;
540     return $self->{phrase};
541     }
542    
543 dpavlin 42
544 dpavlin 19 =head2 order
545 dpavlin 18
546 dpavlin 19 Return search result order.
547    
548     print $cond->order;
549    
550     =cut
551    
552     sub order {
553     my $self = shift;
554     return $self->{order};
555     }
556    
557 dpavlin 42
558 dpavlin 19 =head2 attrs
559    
560     Return search result attrs.
561    
562     my @cond_attrs = $cond->attrs;
563    
564     =cut
565    
566     sub attrs {
567     my $self = shift;
568     #croak "attrs return array, not scalar" if (! wantarray);
569 dpavlin 60 return @{ $self->{attrs} } if ($self->{attrs});
570 dpavlin 19 }
571    
572 dpavlin 42
573 dpavlin 19 =head2 max
574    
575     Return maximum number of results.
576    
577     print $cond->max;
578    
579     C<-1> is returned for unitialized value, C<0> is unlimited.
580    
581     =cut
582    
583     sub max {
584     my $self = shift;
585     return $self->{max};
586     }
587    
588 dpavlin 42
589 dpavlin 19 =head2 options
590    
591     Return options for this condition.
592    
593     print $cond->options;
594    
595     Options are returned in numerical form.
596    
597     =cut
598    
599     sub options {
600     my $self = shift;
601     return $self->{options};
602     }
603    
604    
605 dpavlin 116 =head2 set_skip
606    
607     Set number of skipped documents from beginning of results
608    
609     $cond->set_skip(42);
610    
611     Similar to C<offset> in RDBMS.
612    
613     =cut
614    
615     sub set_skip {
616     my $self = shift;
617     $self->{skip} = shift;
618     }
619    
620     =head2 skip
621    
622     Return skip for this condition.
623    
624     print $cond->skip;
625    
626     =cut
627    
628     sub skip {
629     my $self = shift;
630     return $self->{skip};
631     }
632    
633    
634 dpavlin 20 package Search::Estraier::ResultDocument;
635    
636 dpavlin 24 use Carp qw/croak/;
637 dpavlin 20
638 dpavlin 24 #use Search::Estraier;
639     #our @ISA = qw/Search::Estraier/;
640 dpavlin 20
641     =head1 Search::Estraier::ResultDocument
642    
643     =head2 new
644    
645 dpavlin 23 my $rdoc = new Search::HyperEstraier::ResultDocument(
646 dpavlin 20 uri => 'http://localhost/document/uri/42',
647     attrs => {
648     foo => 1,
649     bar => 2,
650     },
651     snippet => 'this is a text of snippet'
652     keywords => 'this\tare\tkeywords'
653     );
654    
655     =cut
656    
657     sub new {
658     my $class = shift;
659     my $self = {@_};
660     bless($self, $class);
661    
662 dpavlin 62 croak "missing uri for ResultDocument" unless defined($self->{uri});
663 dpavlin 20
664     $self ? return $self : return undef;
665     }
666    
667 dpavlin 42
668 dpavlin 23 =head2 uri
669 dpavlin 20
670 dpavlin 23 Return URI of result document
671 dpavlin 20
672 dpavlin 23 print $rdoc->uri;
673    
674     =cut
675    
676     sub uri {
677     my $self = shift;
678     return $self->{uri};
679     }
680    
681    
682     =head2 attr_names
683    
684     Returns array with attribute names from result document object.
685    
686     my @attrs = $rdoc->attr_names;
687    
688     =cut
689    
690     sub attr_names {
691     my $self = shift;
692     croak "attr_names return array, not scalar" if (! wantarray);
693     return sort keys %{ $self->{attrs} };
694     }
695    
696 dpavlin 42
697 dpavlin 23 =head2 attr
698    
699     Returns value of an attribute.
700    
701     my $value = $rdoc->attr( 'attribute' );
702    
703     =cut
704    
705     sub attr {
706     my $self = shift;
707     my $name = shift || return;
708     return $self->{attrs}->{ $name };
709     }
710    
711 dpavlin 42
712 dpavlin 23 =head2 snippet
713    
714     Return snippet from result document
715    
716     print $rdoc->snippet;
717    
718     =cut
719    
720     sub snippet {
721     my $self = shift;
722     return $self->{snippet};
723     }
724    
725 dpavlin 42
726 dpavlin 23 =head2 keywords
727    
728     Return keywords from result document
729    
730     print $rdoc->keywords;
731    
732     =cut
733    
734     sub keywords {
735     my $self = shift;
736     return $self->{keywords};
737     }
738    
739    
740 dpavlin 25 package Search::Estraier::NodeResult;
741    
742     use Carp qw/croak/;
743    
744     #use Search::Estraier;
745     #our @ISA = qw/Search::Estraier/;
746    
747     =head1 Search::Estraier::NodeResult
748    
749     =head2 new
750    
751     my $res = new Search::HyperEstraier::NodeResult(
752     docs => @array_of_rdocs,
753     hits => %hash_with_hints,
754     );
755    
756     =cut
757    
758     sub new {
759     my $class = shift;
760     my $self = {@_};
761     bless($self, $class);
762    
763     foreach my $f (qw/docs hints/) {
764     croak "missing $f for ResultDocument" unless defined($self->{$f});
765     }
766    
767     $self ? return $self : return undef;
768     }
769    
770 dpavlin 42
771 dpavlin 25 =head2 doc_num
772    
773     Return number of documents
774    
775     print $res->doc_num;
776    
777 dpavlin 100 This will return real number of documents (limited by C<max>).
778     If you want to get total number of hits, see C<hits>.
779    
780 dpavlin 25 =cut
781    
782     sub doc_num {
783     my $self = shift;
784 dpavlin 53 return $#{$self->{docs}} + 1;
785 dpavlin 25 }
786    
787 dpavlin 42
788 dpavlin 25 =head2 get_doc
789    
790     Return single document
791    
792     my $doc = $res->get_doc( 42 );
793    
794     Returns undef if document doesn't exist.
795    
796     =cut
797    
798     sub get_doc {
799     my $self = shift;
800     my $num = shift;
801 dpavlin 43 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
802 dpavlin 25 return undef if ($num < 0 || $num > $self->{docs});
803     return $self->{docs}->[$num];
804     }
805    
806 dpavlin 42
807 dpavlin 25 =head2 hint
808    
809     Return specific hint from results.
810    
811 dpavlin 100 print $res->hint( 'VERSION' );
812 dpavlin 25
813     Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
814     C<TIME>, C<LINK#n>, C<VIEW>.
815    
816     =cut
817    
818     sub hint {
819     my $self = shift;
820     my $key = shift || return;
821     return $self->{hints}->{$key};
822     }
823    
824 dpavlin 108 =head2 hints
825 dpavlin 25
826 dpavlin 91 More perlish version of C<hint>. This one returns hash.
827    
828 dpavlin 100 my %hints = $res->hints;
829 dpavlin 91
830     =cut
831    
832     sub hints {
833     my $self = shift;
834     return $self->{hints};
835     }
836    
837 dpavlin 100 =head2 hits
838    
839     Syntaxtic sugar for total number of hits for this query
840    
841     print $res->hits;
842    
843     It's same as
844    
845     print $res->hint('HIT');
846    
847     but shorter.
848    
849     =cut
850    
851     sub hits {
852     my $self = shift;
853     return $self->{hints}->{'HIT'} || 0;
854     }
855    
856 dpavlin 27 package Search::Estraier::Node;
857    
858 dpavlin 44 use Carp qw/carp croak confess/;
859 dpavlin 33 use URI;
860 dpavlin 36 use MIME::Base64;
861 dpavlin 33 use IO::Socket::INET;
862 dpavlin 49 use URI::Escape qw/uri_escape/;
863 dpavlin 29
864 dpavlin 27 =head1 Search::Estraier::Node
865    
866     =head2 new
867    
868     my $node = new Search::HyperEstraier::Node;
869    
870 dpavlin 65 or optionally with C<url> as parametar
871    
872     my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
873    
874 dpavlin 78 or in more verbose form
875    
876     my $node = new Search::HyperEstraier::Node(
877     url => 'http://localhost:1978/node/test',
878 dpavlin 132 user => 'admin',
879     passwd => 'admin'
880 dpavlin 139 create => 1,
881     label => 'optional node label',
882 dpavlin 78 debug => 1,
883     croak_on_error => 1
884     );
885    
886     with following arguments:
887    
888     =over 4
889    
890     =item url
891    
892     URL to node
893    
894 dpavlin 132 =item user
895    
896     specify username for node server authentication
897    
898     =item passwd
899    
900     password for authentication
901    
902 dpavlin 139 =item create
903    
904     create node if it doesn't exists
905    
906     =item label
907    
908     optional label for new node if C<create> is used
909    
910 dpavlin 78 =item debug
911    
912     dumps a B<lot> of debugging output
913    
914     =item croak_on_error
915    
916     very helpful during development. It will croak on all errors instead of
917     silently returning C<-1> (which is convention of Hyper Estraier API in other
918     languages).
919    
920     =back
921    
922 dpavlin 27 =cut
923    
924     sub new {
925     my $class = shift;
926     my $self = {
927     pxport => -1,
928 dpavlin 33 timeout => 0, # this used to be -1
929 dpavlin 27 wwidth => 480,
930     hwidth => 96,
931     awidth => 96,
932     status => -1,
933     };
934 dpavlin 111
935 dpavlin 27 bless($self, $class);
936    
937 dpavlin 65 if ($#_ == 0) {
938     $self->{url} = shift;
939     } else {
940 dpavlin 78 %$self = ( %$self, @_ );
941    
942 dpavlin 132 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
943    
944 dpavlin 65 warn "## Node debug on\n" if ($self->{debug});
945     }
946 dpavlin 57
947 dpavlin 111 $self->{inform} = {
948     dnum => -1,
949     wnum => -1,
950     size => -1.0,
951     };
952    
953 dpavlin 139 if ($self->{create}) {
954     eval {
955     $self->name;
956     };
957     if ($@) {
958     my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
959     croak "can't find node name in '$self->{url}'" unless ($name);
960     my $label = $self->{label} || $name;
961     $self->master(
962     action => 'nodeadd',
963     name => $name,
964     label => $label,
965     ) || croak "can't create node $name ($label)";
966     }
967     }
968    
969 dpavlin 27 $self ? return $self : return undef;
970     }
971    
972 dpavlin 42
973 dpavlin 29 =head2 set_url
974    
975     Specify URL to node server
976    
977     $node->set_url('http://localhost:1978');
978    
979     =cut
980    
981     sub set_url {
982     my $self = shift;
983     $self->{url} = shift;
984     }
985    
986 dpavlin 42
987 dpavlin 29 =head2 set_proxy
988    
989     Specify proxy server to connect to node server
990    
991     $node->set_proxy('proxy.example.com', 8080);
992    
993     =cut
994    
995     sub set_proxy {
996     my $self = shift;
997     my ($host,$port) = @_;
998 dpavlin 43 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
999 dpavlin 29 $self->{pxhost} = $host;
1000     $self->{pxport} = $port;
1001     }
1002    
1003 dpavlin 42
1004 dpavlin 30 =head2 set_timeout
1005    
1006     Specify timeout of connection in seconds
1007    
1008     $node->set_timeout( 15 );
1009    
1010     =cut
1011    
1012     sub set_timeout {
1013     my $self = shift;
1014     my $sec = shift;
1015 dpavlin 43 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
1016 dpavlin 30 $self->{timeout} = $sec;
1017     }
1018    
1019 dpavlin 42
1020 dpavlin 31 =head2 set_auth
1021    
1022     Specify name and password for authentication to node server.
1023    
1024     $node->set_auth('clint','eastwood');
1025    
1026     =cut
1027    
1028     sub set_auth {
1029     my $self = shift;
1030     my ($login,$passwd) = @_;
1031 dpavlin 40 my $basic_auth = encode_base64( "$login:$passwd" );
1032     chomp($basic_auth);
1033     $self->{auth} = $basic_auth;
1034 dpavlin 31 }
1035    
1036 dpavlin 42
1037 dpavlin 32 =head2 status
1038    
1039     Return status code of last request.
1040    
1041 dpavlin 40 print $node->status;
1042 dpavlin 32
1043     C<-1> means connection failure.
1044    
1045     =cut
1046    
1047     sub status {
1048     my $self = shift;
1049     return $self->{status};
1050     }
1051    
1052 dpavlin 42
1053 dpavlin 40 =head2 put_doc
1054    
1055 dpavlin 41 Add a document
1056 dpavlin 40
1057 dpavlin 41 $node->put_doc( $document_draft ) or die "can't add document";
1058    
1059     Return true on success or false on failture.
1060    
1061 dpavlin 40 =cut
1062    
1063     sub put_doc {
1064     my $self = shift;
1065     my $doc = shift || return;
1066 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1067 dpavlin 41 $self->shuttle_url( $self->{url} . '/put_doc',
1068     'text/x-estraier-draft',
1069     $doc->dump_draft,
1070     undef
1071     ) == 200;
1072 dpavlin 40 }
1073    
1074 dpavlin 41
1075     =head2 out_doc
1076    
1077     Remove a document
1078    
1079     $node->out_doc( document_id ) or "can't remove document";
1080    
1081     Return true on success or false on failture.
1082    
1083     =cut
1084    
1085     sub out_doc {
1086     my $self = shift;
1087     my $id = shift || return;
1088     return unless ($self->{url});
1089 dpavlin 43 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1090 dpavlin 41 $self->shuttle_url( $self->{url} . '/out_doc',
1091     'application/x-www-form-urlencoded',
1092     "id=$id",
1093     undef
1094     ) == 200;
1095     }
1096    
1097    
1098     =head2 out_doc_by_uri
1099    
1100     Remove a registrated document using it's uri
1101    
1102 dpavlin 45 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1103 dpavlin 41
1104     Return true on success or false on failture.
1105    
1106     =cut
1107    
1108     sub out_doc_by_uri {
1109     my $self = shift;
1110     my $uri = shift || return;
1111     return unless ($self->{url});
1112     $self->shuttle_url( $self->{url} . '/out_doc',
1113     'application/x-www-form-urlencoded',
1114 dpavlin 50 "uri=" . uri_escape($uri),
1115 dpavlin 41 undef
1116     ) == 200;
1117     }
1118    
1119 dpavlin 42
1120     =head2 edit_doc
1121    
1122     Edit attributes of a document
1123    
1124     $node->edit_doc( $document_draft ) or die "can't edit document";
1125    
1126     Return true on success or false on failture.
1127    
1128     =cut
1129    
1130     sub edit_doc {
1131     my $self = shift;
1132     my $doc = shift || return;
1133 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1134 dpavlin 42 $self->shuttle_url( $self->{url} . '/edit_doc',
1135     'text/x-estraier-draft',
1136     $doc->dump_draft,
1137     undef
1138     ) == 200;
1139     }
1140    
1141    
1142 dpavlin 43 =head2 get_doc
1143    
1144     Retreive document
1145    
1146     my $doc = $node->get_doc( document_id ) or die "can't get document";
1147    
1148     Return true on success or false on failture.
1149    
1150     =cut
1151    
1152     sub get_doc {
1153     my $self = shift;
1154     my $id = shift || return;
1155     return $self->_fetch_doc( id => $id );
1156     }
1157    
1158 dpavlin 44
1159 dpavlin 43 =head2 get_doc_by_uri
1160    
1161     Retreive document
1162    
1163 dpavlin 45 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1164 dpavlin 43
1165     Return true on success or false on failture.
1166    
1167     =cut
1168    
1169     sub get_doc_by_uri {
1170     my $self = shift;
1171     my $uri = shift || return;
1172     return $self->_fetch_doc( uri => $uri );
1173     }
1174    
1175 dpavlin 44
1176 dpavlin 49 =head2 get_doc_attr
1177    
1178     Retrieve the value of an atribute from object
1179    
1180     my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1181     die "can't get document attribute";
1182    
1183     =cut
1184    
1185     sub get_doc_attr {
1186     my $self = shift;
1187     my ($id,$name) = @_;
1188     return unless ($id && $name);
1189     return $self->_fetch_doc( id => $id, attr => $name );
1190     }
1191    
1192    
1193     =head2 get_doc_attr_by_uri
1194    
1195     Retrieve the value of an atribute from object
1196    
1197     my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1198     die "can't get document attribute";
1199    
1200     =cut
1201    
1202     sub get_doc_attr_by_uri {
1203     my $self = shift;
1204     my ($uri,$name) = @_;
1205     return unless ($uri && $name);
1206     return $self->_fetch_doc( uri => $uri, attr => $name );
1207     }
1208    
1209    
1210 dpavlin 44 =head2 etch_doc
1211    
1212     Exctract document keywords
1213    
1214     my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1215    
1216     =cut
1217    
1218 dpavlin 49 sub etch_doc {
1219 dpavlin 44 my $self = shift;
1220     my $id = shift || return;
1221     return $self->_fetch_doc( id => $id, etch => 1 );
1222     }
1223    
1224     =head2 etch_doc_by_uri
1225    
1226     Retreive document
1227    
1228 dpavlin 45 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1229 dpavlin 44
1230     Return true on success or false on failture.
1231    
1232     =cut
1233    
1234     sub etch_doc_by_uri {
1235     my $self = shift;
1236     my $uri = shift || return;
1237     return $self->_fetch_doc( uri => $uri, etch => 1 );
1238     }
1239    
1240    
1241 dpavlin 45 =head2 uri_to_id
1242    
1243     Get ID of document specified by URI
1244    
1245     my $id = $node->uri_to_id( 'file:///document/uri/42' );
1246    
1247 dpavlin 103 This method won't croak, even if using C<croak_on_error>.
1248    
1249 dpavlin 45 =cut
1250    
1251     sub uri_to_id {
1252     my $self = shift;
1253     my $uri = shift || return;
1254 dpavlin 103 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1255 dpavlin 45 }
1256    
1257    
1258 dpavlin 43 =head2 _fetch_doc
1259    
1260 dpavlin 44 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1261     C<etch_doc>, C<etch_doc_by_uri>.
1262 dpavlin 43
1263 dpavlin 45 # this will decode received draft into Search::Estraier::Document object
1264     my $doc = $node->_fetch_doc( id => 42 );
1265     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1266 dpavlin 43
1267 dpavlin 45 # to extract keywords, add etch
1268     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1269     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1270    
1271 dpavlin 49 # to get document attrubute add attr
1272     my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1273     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1274    
1275 dpavlin 45 # more general form which allows implementation of
1276     # uri_to_id
1277     my $id = $node->_fetch_doc(
1278     uri => 'file:///document/uri/42',
1279     path => '/uri_to_id',
1280     chomp_resbody => 1
1281     );
1282    
1283 dpavlin 43 =cut
1284    
1285     sub _fetch_doc {
1286     my $self = shift;
1287 dpavlin 44 my $a = {@_};
1288     return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1289    
1290     my ($arg, $resbody);
1291    
1292 dpavlin 45 my $path = $a->{path} || '/get_doc';
1293 dpavlin 44 $path = '/etch_doc' if ($a->{etch});
1294    
1295     if ($a->{id}) {
1296     croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1297     $arg = 'id=' . $a->{id};
1298     } elsif ($a->{uri}) {
1299 dpavlin 50 $arg = 'uri=' . uri_escape($a->{uri});
1300 dpavlin 44 } else {
1301     confess "unhandled argument. Need id or uri.";
1302 dpavlin 43 }
1303 dpavlin 44
1304 dpavlin 49 if ($a->{attr}) {
1305     $path = '/get_doc_attr';
1306     $arg .= '&attr=' . uri_escape($a->{attr});
1307     $a->{chomp_resbody} = 1;
1308     }
1309    
1310 dpavlin 44 my $rv = $self->shuttle_url( $self->{url} . $path,
1311 dpavlin 43 'application/x-www-form-urlencoded',
1312 dpavlin 44 $arg,
1313 dpavlin 45 \$resbody,
1314 dpavlin 103 $a->{croak_on_error},
1315 dpavlin 43 );
1316 dpavlin 44
1317 dpavlin 43 return if ($rv != 200);
1318 dpavlin 44
1319     if ($a->{etch}) {
1320     $self->{kwords} = {};
1321     return +{} unless ($resbody);
1322     foreach my $l (split(/\n/, $resbody)) {
1323     my ($k,$v) = split(/\t/, $l, 2);
1324     $self->{kwords}->{$k} = $v if ($v);
1325     }
1326     return $self->{kwords};
1327 dpavlin 45 } elsif ($a->{chomp_resbody}) {
1328     return unless (defined($resbody));
1329     chomp($resbody);
1330     return $resbody;
1331 dpavlin 44 } else {
1332     return new Search::Estraier::Document($resbody);
1333     }
1334 dpavlin 43 }
1335    
1336    
1337 dpavlin 48 =head2 name
1338 dpavlin 43
1339 dpavlin 48 my $node_name = $node->name;
1340 dpavlin 43
1341 dpavlin 48 =cut
1342    
1343     sub name {
1344     my $self = shift;
1345 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1346     return $self->{inform}->{name};
1347 dpavlin 48 }
1348    
1349    
1350     =head2 label
1351    
1352     my $node_label = $node->label;
1353    
1354     =cut
1355    
1356     sub label {
1357     my $self = shift;
1358 dpavlin 111 $self->_set_info unless ($self->{inform}->{label});
1359     return $self->{inform}->{label};
1360 dpavlin 48 }
1361    
1362    
1363     =head2 doc_num
1364    
1365     my $documents_in_node = $node->doc_num;
1366    
1367     =cut
1368    
1369     sub doc_num {
1370     my $self = shift;
1371 dpavlin 111 $self->_set_info if ($self->{inform}->{dnum} < 0);
1372     return $self->{inform}->{dnum};
1373 dpavlin 48 }
1374    
1375    
1376     =head2 word_num
1377    
1378     my $words_in_node = $node->word_num;
1379    
1380     =cut
1381    
1382     sub word_num {
1383     my $self = shift;
1384 dpavlin 111 $self->_set_info if ($self->{inform}->{wnum} < 0);
1385     return $self->{inform}->{wnum};
1386 dpavlin 48 }
1387    
1388    
1389     =head2 size
1390    
1391     my $node_size = $node->size;
1392    
1393     =cut
1394    
1395     sub size {
1396     my $self = shift;
1397 dpavlin 111 $self->_set_info if ($self->{inform}->{size} < 0);
1398     return $self->{inform}->{size};
1399 dpavlin 48 }
1400    
1401    
1402 dpavlin 51 =head2 search
1403 dpavlin 48
1404 dpavlin 51 Search documents which match condition
1405    
1406     my $nres = $node->search( $cond, $depth );
1407    
1408     C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1409     depth for meta search.
1410    
1411     Function results C<Search::Estraier::NodeResult> object.
1412    
1413     =cut
1414    
1415     sub search {
1416     my $self = shift;
1417     my ($cond, $depth) = @_;
1418     return unless ($cond && defined($depth) && $self->{url});
1419     croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1420     croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1421    
1422 dpavlin 52 my $resbody;
1423 dpavlin 51
1424 dpavlin 52 my $rv = $self->shuttle_url( $self->{url} . '/search',
1425 dpavlin 53 'application/x-www-form-urlencoded',
1426 dpavlin 61 $self->cond_to_query( $cond, $depth ),
1427 dpavlin 52 \$resbody,
1428     );
1429     return if ($rv != 200);
1430    
1431 dpavlin 126 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1432     my $hintsText = splice @records, 0, 2; # starts with empty record
1433     my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1434    
1435     # process records
1436 dpavlin 128 my $docs = [];
1437 dpavlin 126 foreach my $record (@records)
1438     {
1439     # split into keys and snippets
1440     my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1441    
1442     # create document hash
1443     my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1444     $doc->{'@keywords'} = $doc->{keywords};
1445     ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1446     $doc->{snippet} = $snippet;
1447    
1448     push @$docs, new Search::Estraier::ResultDocument(
1449     attrs => $doc,
1450     uri => $doc->{'@uri'},
1451     snippet => $snippet,
1452     keywords => $doc->{'keywords'},
1453     );
1454     }
1455    
1456     return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1457     }
1458    
1459    
1460 dpavlin 51 =head2 cond_to_query
1461    
1462 dpavlin 55 Return URI encoded string generated from Search::Estraier::Condition
1463    
1464 dpavlin 61 my $args = $node->cond_to_query( $cond, $depth );
1465 dpavlin 51
1466     =cut
1467    
1468     sub cond_to_query {
1469     my $self = shift;
1470    
1471     my $cond = shift || return;
1472     croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1473 dpavlin 61 my $depth = shift;
1474 dpavlin 51
1475     my @args;
1476    
1477     if (my $phrase = $cond->phrase) {
1478     push @args, 'phrase=' . uri_escape($phrase);
1479     }
1480    
1481     if (my @attrs = $cond->attrs) {
1482     for my $i ( 0 .. $#attrs ) {
1483 dpavlin 63 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1484 dpavlin 51 }
1485     }
1486    
1487     if (my $order = $cond->order) {
1488     push @args, 'order=' . uri_escape($order);
1489     }
1490    
1491     if (my $max = $cond->max) {
1492     push @args, 'max=' . $max;
1493     } else {
1494     push @args, 'max=' . (1 << 30);
1495     }
1496    
1497     if (my $options = $cond->options) {
1498     push @args, 'options=' . $options;
1499     }
1500    
1501 dpavlin 61 push @args, 'depth=' . $depth if ($depth);
1502 dpavlin 51 push @args, 'wwidth=' . $self->{wwidth};
1503     push @args, 'hwidth=' . $self->{hwidth};
1504     push @args, 'awidth=' . $self->{awidth};
1505 dpavlin 122 push @args, 'skip=' . $self->{skip} if ($self->{skip});
1506 dpavlin 51
1507     return join('&', @args);
1508     }
1509    
1510    
1511 dpavlin 33 =head2 shuttle_url
1512 dpavlin 32
1513 dpavlin 68 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1514 dpavlin 33 master.
1515 dpavlin 2
1516 dpavlin 52 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1517 dpavlin 2
1518 dpavlin 33 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1519     body will be saved within object.
1520 dpavlin 2
1521     =cut
1522    
1523 dpavlin 59 use LWP::UserAgent;
1524    
1525 dpavlin 33 sub shuttle_url {
1526     my $self = shift;
1527 dpavlin 2
1528 dpavlin 103 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1529 dpavlin 2
1530 dpavlin 103 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1531    
1532 dpavlin 40 $self->{status} = -1;
1533 dpavlin 33
1534 dpavlin 41 warn "## $url\n" if ($self->{debug});
1535 dpavlin 36
1536 dpavlin 33 $url = new URI($url);
1537 dpavlin 37 if (
1538     !$url || !$url->scheme || !$url->scheme eq 'http' ||
1539     !$url->host || !$url->port || $url->port < 1
1540     ) {
1541     carp "can't parse $url\n";
1542     return -1;
1543     }
1544 dpavlin 33
1545 dpavlin 59 my $ua = LWP::UserAgent->new;
1546     $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1547 dpavlin 33
1548 dpavlin 59 my $req;
1549 dpavlin 37 if ($reqbody) {
1550 dpavlin 59 $req = HTTP::Request->new(POST => $url);
1551 dpavlin 37 } else {
1552 dpavlin 59 $req = HTTP::Request->new(GET => $url);
1553 dpavlin 37 }
1554    
1555 dpavlin 59 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1556     $req->headers->header( 'Connection', 'close' );
1557 dpavlin 77 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1558 dpavlin 59 $req->content_type( $content_type );
1559 dpavlin 37
1560 dpavlin 59 warn $req->headers->as_string,"\n" if ($self->{debug});
1561 dpavlin 2
1562 dpavlin 37 if ($reqbody) {
1563 dpavlin 41 warn "$reqbody\n" if ($self->{debug});
1564 dpavlin 59 $req->content( $reqbody );
1565 dpavlin 33 }
1566 dpavlin 2
1567 dpavlin 59 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1568 dpavlin 2
1569 dpavlin 59 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1570 dpavlin 2
1571 dpavlin 76 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1572    
1573 dpavlin 78 if (! $res->is_success) {
1574 dpavlin 103 if ($croak_on_error) {
1575 dpavlin 78 croak("can't get $url: ",$res->status_line);
1576     } else {
1577     return -1;
1578     }
1579     }
1580 dpavlin 2
1581 dpavlin 59 $$resbody .= $res->content;
1582    
1583 dpavlin 40 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1584 dpavlin 39
1585 dpavlin 40 return $self->{status};
1586 dpavlin 2 }
1587    
1588 dpavlin 48
1589 dpavlin 55 =head2 set_snippet_width
1590 dpavlin 48
1591 dpavlin 55 Set width of snippets in results
1592    
1593     $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1594    
1595     C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1596     is not sent with results. If it is negative, whole document text is sent instead of snippet.
1597    
1598     C<$hwidth> specified width of strings from beginning of string. Default
1599     value is C<96>. Negative or zero value keep previous value.
1600    
1601     C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1602     If negative of zero value is provided previous value is kept unchanged.
1603    
1604     =cut
1605    
1606     sub set_snippet_width {
1607     my $self = shift;
1608    
1609     my ($wwidth, $hwidth, $awidth) = @_;
1610     $self->{wwidth} = $wwidth;
1611     $self->{hwidth} = $hwidth if ($hwidth >= 0);
1612     $self->{awidth} = $awidth if ($awidth >= 0);
1613     }
1614    
1615    
1616 dpavlin 56 =head2 set_user
1617 dpavlin 55
1618 dpavlin 56 Manage users of node
1619    
1620     $node->set_user( 'name', $mode );
1621    
1622     C<$mode> can be one of:
1623    
1624     =over 4
1625    
1626     =item 0
1627    
1628     delete account
1629    
1630     =item 1
1631    
1632     set administrative right for user
1633    
1634     =item 2
1635    
1636     set user account as guest
1637    
1638     =back
1639    
1640     Return true on success, otherwise false.
1641    
1642     =cut
1643    
1644     sub set_user {
1645     my $self = shift;
1646     my ($name, $mode) = @_;
1647    
1648     return unless ($self->{url});
1649     croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1650    
1651     $self->shuttle_url( $self->{url} . '/_set_user',
1652     'text/plain',
1653     'name=' . uri_escape($name) . '&mode=' . $mode,
1654     undef
1655     ) == 200;
1656     }
1657    
1658    
1659 dpavlin 57 =head2 set_link
1660    
1661     Manage node links
1662    
1663     $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1664    
1665     If C<$credit> is negative, link is removed.
1666    
1667     =cut
1668    
1669     sub set_link {
1670     my $self = shift;
1671     my ($url, $label, $credit) = @_;
1672    
1673     return unless ($self->{url});
1674     croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1675    
1676     my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1677     $reqbody .= '&credit=' . $credit if ($credit > 0);
1678    
1679 dpavlin 107 if ($self->shuttle_url( $self->{url} . '/_set_link',
1680 dpavlin 71 'application/x-www-form-urlencoded',
1681 dpavlin 57 $reqbody,
1682     undef
1683 dpavlin 107 ) == 200) {
1684     # refresh node info after adding link
1685     $self->_set_info;
1686     return 1;
1687     }
1688 dpavlin 57 }
1689    
1690 dpavlin 107 =head2 admins
1691 dpavlin 57
1692 dpavlin 107 my @admins = @{ $node->admins };
1693    
1694     Return array of users with admin rights on node
1695    
1696     =cut
1697    
1698     sub admins {
1699     my $self = shift;
1700 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1701     return $self->{inform}->{admins};
1702 dpavlin 107 }
1703    
1704     =head2 guests
1705    
1706     my @guests = @{ $node->guests };
1707    
1708     Return array of users with guest rights on node
1709    
1710     =cut
1711    
1712     sub guests {
1713     my $self = shift;
1714 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1715     return $self->{inform}->{guests};
1716 dpavlin 107 }
1717    
1718     =head2 links
1719    
1720     my $links = @{ $node->links };
1721    
1722     Return array of links for this node
1723    
1724     =cut
1725    
1726     sub links {
1727     my $self = shift;
1728 dpavlin 111 $self->_set_info unless ($self->{inform}->{name});
1729     return $self->{inform}->{links};
1730 dpavlin 107 }
1731    
1732 dpavlin 134 =head2 master
1733 dpavlin 107
1734 dpavlin 134 Set actions on Hyper Estraier node master (C<estmaster> process)
1735    
1736     $node->master(
1737     action => 'sync'
1738     );
1739    
1740     All available actions are documented in
1741     L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1742    
1743     =cut
1744    
1745     my $estmaster_rest = {
1746     shutdown => {
1747     status => 202,
1748     },
1749     sync => {
1750     status => 202,
1751     },
1752     backup => {
1753     status => 202,
1754     },
1755     userlist => {
1756     status => 200,
1757 dpavlin 135 returns => [ qw/name passwd flags fname misc/ ],
1758 dpavlin 134 },
1759     useradd => {
1760 dpavlin 135 required => [ qw/name passwd flags/ ],
1761     optional => [ qw/fname misc/ ],
1762 dpavlin 134 status => 200,
1763     },
1764     userdel => {
1765 dpavlin 135 required => [ qw/name/ ],
1766 dpavlin 134 status => 200,
1767     },
1768     nodelist => {
1769     status => 200,
1770 dpavlin 135 returns => [ qw/name label doc_num word_num size/ ],
1771 dpavlin 134 },
1772     nodeadd => {
1773 dpavlin 135 required => [ qw/name/ ],
1774     optional => [ qw/label/ ],
1775 dpavlin 134 status => 200,
1776     },
1777     nodedel => {
1778 dpavlin 135 required => [ qw/name/ ],
1779 dpavlin 134 status => 200,
1780     },
1781     nodeclr => {
1782 dpavlin 135 required => [ qw/name/ ],
1783 dpavlin 134 status => 200,
1784     },
1785     nodertt => {
1786     status => 200,
1787     },
1788     };
1789    
1790     sub master {
1791     my $self = shift;
1792    
1793     my $args = {@_};
1794    
1795     # have action?
1796     my $action = $args->{action} || croak "need action, available: ",
1797     join(", ",keys %{ $estmaster_rest });
1798    
1799     # check if action is valid
1800     my $rest = $estmaster_rest->{$action};
1801     croak "action '$action' is not supported, available actions: ",
1802     join(", ",keys %{ $estmaster_rest }) unless ($rest);
1803    
1804     croak "BUG: action '$action' needs return status" unless ($rest->{status});
1805    
1806     my @args;
1807    
1808     if ($rest->{required} || $rest->{optional}) {
1809    
1810     map {
1811     croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1812     push @args, $_ . '=' . uri_escape( $args->{$_} );
1813 dpavlin 136 } ( @{ $rest->{required} } );
1814 dpavlin 134
1815     map {
1816     push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1817 dpavlin 136 } ( @{ $rest->{optional} } );
1818 dpavlin 134
1819     }
1820    
1821     my $uri = new URI( $self->{url} );
1822    
1823     my $resbody;
1824    
1825 dpavlin 135 my $status = $self->shuttle_url(
1826 dpavlin 134 'http://' . $uri->host_port . '/master?action=' . $action ,
1827     'application/x-www-form-urlencoded',
1828     join('&', @args),
1829     \$resbody,
1830     1,
1831 dpavlin 135 ) or confess "shuttle_url failed";
1832 dpavlin 134
1833 dpavlin 135 if ($status == $rest->{status}) {
1834     if ($rest->{returns} && wantarray) {
1835 dpavlin 134
1836     my @results;
1837 dpavlin 135 my $fields = $#{$rest->{returns}};
1838 dpavlin 134
1839     foreach my $line ( split(/[\r\n]/,$resbody) ) {
1840 dpavlin 135 my @e = split(/\t/, $line, $fields + 1);
1841 dpavlin 134 my $row;
1842 dpavlin 135 foreach my $i ( 0 .. $fields) {
1843     $row->{ $rest->{returns}->[$i] } = $e[ $i ];
1844     }
1845 dpavlin 134 push @results, $row;
1846     }
1847    
1848     return @results;
1849    
1850 dpavlin 135 } elsif ($resbody) {
1851 dpavlin 136 chomp $resbody;
1852 dpavlin 134 return $resbody;
1853 dpavlin 135 } else {
1854     return 0E0;
1855 dpavlin 134 }
1856     }
1857 dpavlin 135
1858     carp "expected status $rest->{status}, but got $status";
1859     return undef;
1860 dpavlin 134 }
1861    
1862 dpavlin 55 =head1 PRIVATE METHODS
1863    
1864     You could call those directly, but you don't have to. I hope.
1865    
1866     =head2 _set_info
1867    
1868 dpavlin 48 Set information for node
1869    
1870 dpavlin 55 $node->_set_info;
1871 dpavlin 48
1872     =cut
1873    
1874 dpavlin 55 sub _set_info {
1875 dpavlin 48 my $self = shift;
1876    
1877     $self->{status} = -1;
1878     return unless ($self->{url});
1879    
1880     my $resbody;
1881     my $rv = $self->shuttle_url( $self->{url} . '/inform',
1882     'text/plain',
1883     undef,
1884     \$resbody,
1885     );
1886    
1887     return if ($rv != 200 || !$resbody);
1888    
1889 dpavlin 107 my @lines = split(/[\r\n]/,$resbody);
1890 dpavlin 48
1891 dpavlin 111 $self->{inform} = {};
1892    
1893     ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
1894     $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
1895    
1896 dpavlin 107 return $resbody unless (@lines);
1897    
1898     shift @lines;
1899    
1900     while(my $admin = shift @lines) {
1901 dpavlin 111 push @{$self->{inform}->{admins}}, $admin;
1902 dpavlin 107 }
1903 dpavlin 111
1904 dpavlin 107 while(my $guest = shift @lines) {
1905 dpavlin 111 push @{$self->{inform}->{guests}}, $guest;
1906 dpavlin 107 }
1907    
1908     while(my $link = shift @lines) {
1909 dpavlin 111 push @{$self->{inform}->{links}}, $link;
1910 dpavlin 107 }
1911    
1912     return $resbody;
1913    
1914 dpavlin 48 }
1915    
1916 dpavlin 2 ###
1917    
1918     =head1 EXPORT
1919    
1920     Nothing.
1921    
1922     =head1 SEE ALSO
1923    
1924     L<http://hyperestraier.sourceforge.net/>
1925    
1926     Hyper Estraier Ruby interface on which this module is based.
1927    
1928     =head1 AUTHOR
1929    
1930     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
1931    
1932 dpavlin 128 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
1933 dpavlin 2
1934     =head1 COPYRIGHT AND LICENSE
1935    
1936 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
1937 dpavlin 2
1938     This library is free software; you can redistribute it and/or modify
1939     it under the GPL v2 or later.
1940    
1941     =cut
1942    
1943     1;

  ViewVC Help
Powered by ViewVC 1.1.26