/[Search-Estraier]/trunk/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 184 - (show annotations)
Sat Nov 4 13:10:29 2006 UTC (17 years, 4 months ago) by dpavlin
File size: 38084 byte(s)
set_distinct and set_score patch from Mikio Hirabayashi <mikio@users.sourceforge.net>
1 package Search::Estraier;
2
3 use 5.008;
4 use strict;
5 use warnings;
6
7 our $VERSION = '0.08_1';
8
9 =head1 NAME
10
11 Search::Estraier - pure perl module to use Hyper Estraier search engine
12
13 =head1 SYNOPSIS
14
15 =head2 Simple indexer
16
17 use Search::Estraier;
18
19 # create and configure node
20 my $node = new Search::Estraier::Node(
21 url => 'http://localhost:1978/node/test',
22 user => 'admin',
23 passwd => 'admin',
24 create => 1,
25 label => 'Label for node',
26 croak_on_error => 1,
27 );
28
29 # create document
30 my $doc = new Search::Estraier::Document;
31
32 # add attributes
33 $doc->add_attr('@uri', "http://estraier.gov/example.txt");
34 $doc->add_attr('@title', "Over the Rainbow");
35
36 # add body text to document
37 $doc->add_text("Somewhere over the rainbow. Way up high.");
38 $doc->add_text("There's a land that I heard of once in a lullaby.");
39
40 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
41
42 =head2 Simple searcher
43
44 use Search::Estraier;
45
46 # create and configure node
47 my $node = new Search::Estraier::Node(
48 url => 'http://localhost:1978/node/test',
49 user => 'admin',
50 passwd => 'admin',
51 croak_on_error => 1,
52 );
53
54 # create condition
55 my $cond = new Search::Estraier::Condition;
56
57 # set search phrase
58 $cond->set_phrase("rainbow AND lullaby");
59
60 my $nres = $node->search($cond, 0);
61
62 if (defined($nres)) {
63 print "Got ", $nres->hits, " results\n";
64
65 # for each document in results
66 for my $i ( 0 ... $nres->doc_num - 1 ) {
67 # get result document
68 my $rdoc = $nres->get_doc($i);
69 # display attribte
70 print "URI: ", $rdoc->attr('@uri'),"\n";
71 print "Title: ", $rdoc->attr('@title'),"\n";
72 print $rdoc->snippet,"\n";
73 }
74 } else {
75 die "error: ", $node->status,"\n";
76 }
77
78 =head1 DESCRIPTION
79
80 This module is implementation of node API of Hyper Estraier. Since it's
81 perl-only module with dependencies only on standard perl modules, it will
82 run on all platforms on which perl runs. It doesn't require compilation
83 or Hyper Estraier development files on target machine.
84
85 It is implemented as multiple packages which closly resamble Ruby
86 implementation. It also includes methods to manage nodes.
87
88 There are few examples in C<scripts> directory of this distribution.
89
90 =cut
91
92 =head1 Inheritable common methods
93
94 This methods should really move somewhere else.
95
96 =head2 _s
97
98 Remove multiple whitespaces from string, as well as whitespaces at beginning or end
99
100 my $text = $self->_s(" this is a text ");
101 $text = 'this is a text';
102
103 =cut
104
105 sub _s {
106 my $text = $_[1];
107 return unless defined($text);
108 $text =~ s/\s\s+/ /gs;
109 $text =~ s/^\s+//;
110 $text =~ s/\s+$//;
111 return $text;
112 }
113
114 package Search::Estraier::Document;
115
116 use Carp qw/croak confess/;
117
118 use Search::Estraier;
119 our @ISA = qw/Search::Estraier/;
120
121 =head1 Search::Estraier::Document
122
123 This class implements Document which is single item in Hyper Estraier.
124
125 It's is collection of:
126
127 =over 4
128
129 =item attributes
130
131 C<< 'key' => 'value' >> pairs which can later be used for filtering of results
132
133 You can add common filters to C<attrindex> in estmaster's C<_conf>
134 file for better performance. See C<attrindex> in
135 L<Hyper Estraier P2P Guide|http://hyperestraier.sourceforge.net/nguide-en.html>.
136
137 =item vectors
138
139 also C<< 'key' => 'value' >> pairs
140
141 =item display text
142
143 Text which will be used to create searchable corpus of your index and
144 included in snippet output.
145
146 =item hidden text
147
148 Text which will be searchable, but will not be included in snippet.
149
150 =back
151
152 =head2 new
153
154 Create new document, empty or from draft.
155
156 my $doc = new Search::HyperEstraier::Document;
157 my $doc2 = new Search::HyperEstraier::Document( $draft );
158
159 =cut
160
161 sub new {
162 my $class = shift;
163 my $self = {};
164 bless($self, $class);
165
166 $self->{id} = -1;
167
168 my $draft = shift;
169
170 if ($draft) {
171 my $in_text = 0;
172 foreach my $line (split(/\n/, $draft)) {
173
174 if ($in_text) {
175 if ($line =~ /^\t/) {
176 push @{ $self->{htexts} }, substr($line, 1);
177 } else {
178 push @{ $self->{dtexts} }, $line;
179 }
180 next;
181 }
182
183 if ($line =~ m/^%VECTOR\t(.+)$/) {
184 my @fields = split(/\t/, $1);
185 if ($#fields % 2 == 1) {
186 $self->{kwords} = { @fields };
187 } else {
188 warn "can't decode $line\n";
189 }
190 next;
191 } elsif ($line =~ m/^%SCORE\t(.+)$/) {
192 $self->{score} = $1;
193 next;
194 } elsif ($line =~ m/^%/) {
195 # What is this? comment?
196 #warn "$line\n";
197 next;
198 } elsif ($line =~ m/^$/) {
199 $in_text = 1;
200 next;
201 } elsif ($line =~ m/^(.+)=(.*)$/) {
202 $self->{attrs}->{ $1 } = $2;
203 next;
204 }
205
206 warn "draft ignored: '$line'\n";
207 }
208 }
209
210 $self ? return $self : return undef;
211 }
212
213
214 =head2 add_attr
215
216 Add an attribute.
217
218 $doc->add_attr( name => 'value' );
219
220 Delete attribute using
221
222 $doc->add_attr( name => undef );
223
224 =cut
225
226 sub add_attr {
227 my $self = shift;
228 my $attrs = {@_};
229
230 while (my ($name, $value) = each %{ $attrs }) {
231 if (! defined($value)) {
232 delete( $self->{attrs}->{ $self->_s($name) } );
233 } else {
234 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
235 }
236 }
237
238 return 1;
239 }
240
241
242 =head2 add_text
243
244 Add a sentence of text.
245
246 $doc->add_text('this is example text to display');
247
248 =cut
249
250 sub add_text {
251 my $self = shift;
252 my $text = shift;
253 return unless defined($text);
254
255 push @{ $self->{dtexts} }, $self->_s($text);
256 }
257
258
259 =head2 add_hidden_text
260
261 Add a hidden sentence.
262
263 $doc->add_hidden_text('this is example text just for search');
264
265 =cut
266
267 sub add_hidden_text {
268 my $self = shift;
269 my $text = shift;
270 return unless defined($text);
271
272 push @{ $self->{htexts} }, $self->_s($text);
273 }
274
275 =head2 add_vectors
276
277 Add a vectors
278
279 $doc->add_vector(
280 'vector_name' => 42,
281 'another' => 12345,
282 );
283
284 =cut
285
286 sub add_vectors {
287 my $self = shift;
288 return unless (@_);
289
290 # this is ugly, but works
291 die "add_vector needs HASH as argument" unless ($#_ % 2 == 1);
292
293 $self->{kwords} = {@_};
294 }
295
296 =head2 set_score
297
298 Set the substitute score
299
300 $doc->set_score(12345);
301
302 =cut
303
304 sub set_score {
305 my $self = shift;
306 my $score = shift;
307 return unless (defined($score));
308 $self->{score} = $score;
309 }
310
311 =head2 score
312
313 Get the substitute score
314
315 =cut
316
317 sub score {
318 my $self = shift;
319 return -1 unless (defined($self->{score}));
320 return $self->{score};
321 }
322
323 =head2 id
324
325 Get the ID number of document. If the object has never been registred, C<-1> is returned.
326
327 print $doc->id;
328
329 =cut
330
331 sub id {
332 my $self = shift;
333 return $self->{id};
334 }
335
336
337 =head2 attr_names
338
339 Returns array with attribute names from document object.
340
341 my @attrs = $doc->attr_names;
342
343 =cut
344
345 sub attr_names {
346 my $self = shift;
347 return unless ($self->{attrs});
348 #croak "attr_names return array, not scalar" if (! wantarray);
349 return sort keys %{ $self->{attrs} };
350 }
351
352
353 =head2 attr
354
355 Returns value of an attribute.
356
357 my $value = $doc->attr( 'attribute' );
358
359 =cut
360
361 sub attr {
362 my $self = shift;
363 my $name = shift;
364 return unless (defined($name) && $self->{attrs});
365 return $self->{attrs}->{ $name };
366 }
367
368
369 =head2 texts
370
371 Returns array with text sentences.
372
373 my @texts = $doc->texts;
374
375 =cut
376
377 sub texts {
378 my $self = shift;
379 #confess "texts return array, not scalar" if (! wantarray);
380 return @{ $self->{dtexts} } if ($self->{dtexts});
381 }
382
383
384 =head2 cat_texts
385
386 Return whole text as single scalar.
387
388 my $text = $doc->cat_texts;
389
390 =cut
391
392 sub cat_texts {
393 my $self = shift;
394 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
395 }
396
397
398 =head2 dump_draft
399
400 Dump draft data from document object.
401
402 print $doc->dump_draft;
403
404 =cut
405
406 sub dump_draft {
407 my $self = shift;
408 my $draft;
409
410 foreach my $attr_name (sort keys %{ $self->{attrs} }) {
411 next unless defined(my $v = $self->{attrs}->{$attr_name});
412 $draft .= $attr_name . '=' . $v . "\n";
413 }
414
415 if ($self->{kwords}) {
416 $draft .= '%VECTOR';
417 while (my ($key, $value) = each %{ $self->{kwords} }) {
418 $draft .= "\t$key\t$value";
419 }
420 $draft .= "\n";
421 }
422
423 if (defined($self->{score}) && $self->{score} >= 0) {
424 $draft .= "%SCORE\t" . $self->{score} . "\n";
425 }
426
427 $draft .= "\n";
428
429 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
430 $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
431
432 printf("[%s]\n", $draft);
433
434 return $draft;
435 }
436
437
438 =head2 delete
439
440 Empty document object
441
442 $doc->delete;
443
444 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
445 convinience. Document objects which go out of scope will be destroyed
446 automatically.
447
448 =cut
449
450 sub delete {
451 my $self = shift;
452
453 foreach my $data (qw/attrs dtexts stexts kwords/) {
454 delete($self->{$data});
455 }
456
457 $self->{id} = -1;
458
459 return 1;
460 }
461
462
463
464 package Search::Estraier::Condition;
465
466 use Carp qw/carp confess croak/;
467
468 use Search::Estraier;
469 our @ISA = qw/Search::Estraier/;
470
471 =head1 Search::Estraier::Condition
472
473 =head2 new
474
475 my $cond = new Search::HyperEstraier::Condition;
476
477 =cut
478
479 sub new {
480 my $class = shift;
481 my $self = {};
482 bless($self, $class);
483
484 $self->{max} = -1;
485 $self->{options} = 0;
486
487 $self ? return $self : return undef;
488 }
489
490
491 =head2 set_phrase
492
493 $cond->set_phrase('search phrase');
494
495 =cut
496
497 sub set_phrase {
498 my $self = shift;
499 $self->{phrase} = $self->_s( shift );
500 }
501
502
503 =head2 add_attr
504
505 $cond->add_attr('@URI STRINC /~dpavlin/');
506
507 =cut
508
509 sub add_attr {
510 my $self = shift;
511 my $attr = shift || return;
512 push @{ $self->{attrs} }, $self->_s( $attr );
513 }
514
515
516 =head2 set_order
517
518 $cond->set_order('@mdate NUMD');
519
520 =cut
521
522 sub set_order {
523 my $self = shift;
524 $self->{order} = shift;
525 }
526
527
528 =head2 set_max
529
530 $cond->set_max(42);
531
532 =cut
533
534 sub set_max {
535 my $self = shift;
536 my $max = shift;
537 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
538 $self->{max} = $max;
539 }
540
541
542 =head2 set_options
543
544 $cond->set_options( 'SURE' );
545
546 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
547
548 Possible options are:
549
550 =over 8
551
552 =item SURE
553
554 check every N-gram
555
556 =item USUAL
557
558 check every second N-gram
559
560 =item FAST
561
562 check every third N-gram
563
564 =item AGITO
565
566 check every fourth N-gram
567
568 =item NOIDF
569
570 don't perform TF-IDF tuning
571
572 =item SIMPLE
573
574 use simplified query phrase
575
576 =back
577
578 Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
579 options;
580
581 This option changed in version C<0.04> of this module. It's backwards compatibile.
582
583 =cut
584
585 my $options = {
586 SURE => 1 << 0,
587 USUAL => 1 << 1,
588 FAST => 1 << 2,
589 AGITO => 1 << 3,
590 NOIDF => 1 << 4,
591 SIMPLE => 1 << 10,
592 };
593
594 sub set_options {
595 my $self = shift;
596 my $opt = 0;
597 foreach my $option (@_) {
598 my $mask;
599 unless ($mask = $options->{$option}) {
600 if ($option eq '1') {
601 next;
602 } else {
603 croak "unknown option $option";
604 }
605 }
606 $opt += $mask;
607 }
608 $self->{options} = $opt;
609 }
610
611
612 =head2 phrase
613
614 Return search phrase.
615
616 print $cond->phrase;
617
618 =cut
619
620 sub phrase {
621 my $self = shift;
622 return $self->{phrase};
623 }
624
625
626 =head2 order
627
628 Return search result order.
629
630 print $cond->order;
631
632 =cut
633
634 sub order {
635 my $self = shift;
636 return $self->{order};
637 }
638
639
640 =head2 attrs
641
642 Return search result attrs.
643
644 my @cond_attrs = $cond->attrs;
645
646 =cut
647
648 sub attrs {
649 my $self = shift;
650 #croak "attrs return array, not scalar" if (! wantarray);
651 return @{ $self->{attrs} } if ($self->{attrs});
652 }
653
654
655 =head2 max
656
657 Return maximum number of results.
658
659 print $cond->max;
660
661 C<-1> is returned for unitialized value, C<0> is unlimited.
662
663 =cut
664
665 sub max {
666 my $self = shift;
667 return $self->{max};
668 }
669
670
671 =head2 options
672
673 Return options for this condition.
674
675 print $cond->options;
676
677 Options are returned in numerical form.
678
679 =cut
680
681 sub options {
682 my $self = shift;
683 return $self->{options};
684 }
685
686
687 =head2 set_skip
688
689 Set number of skipped documents from beginning of results
690
691 $cond->set_skip(42);
692
693 Similar to C<offset> in RDBMS.
694
695 =cut
696
697 sub set_skip {
698 my $self = shift;
699 $self->{skip} = shift;
700 }
701
702 =head2 skip
703
704 Return skip for this condition.
705
706 print $cond->skip;
707
708 =cut
709
710 sub skip {
711 my $self = shift;
712 return $self->{skip};
713 }
714
715
716 =head2 set_distinct
717
718 $cond->set_distinct('@author');
719
720 =cut
721
722 sub set_distinct {
723 my $self = shift;
724 $self->{distinct} = shift;
725 }
726
727 =head2 distinct
728
729 Return distinct attribute
730
731 print $cond->distinct;
732
733 =cut
734
735 sub distinct {
736 my $self = shift;
737 return $self->{distinct};
738 }
739
740 =head2 set_mask
741
742 Filter out some links when searching.
743
744 Argument array of link numbers, starting with 0 (current node).
745
746 $cond->set_mask(qw/0 1 4/);
747
748 =cut
749
750 sub set_mask {
751 my $self = shift;
752 return unless (@_);
753 $self->{mask} = \@_;
754 }
755
756
757 package Search::Estraier::ResultDocument;
758
759 use Carp qw/croak/;
760
761 #use Search::Estraier;
762 #our @ISA = qw/Search::Estraier/;
763
764 =head1 Search::Estraier::ResultDocument
765
766 =head2 new
767
768 my $rdoc = new Search::HyperEstraier::ResultDocument(
769 uri => 'http://localhost/document/uri/42',
770 attrs => {
771 foo => 1,
772 bar => 2,
773 },
774 snippet => 'this is a text of snippet'
775 keywords => 'this\tare\tkeywords'
776 );
777
778 =cut
779
780 sub new {
781 my $class = shift;
782 my $self = {@_};
783 bless($self, $class);
784
785 croak "missing uri for ResultDocument" unless defined($self->{uri});
786
787 $self ? return $self : return undef;
788 }
789
790
791 =head2 uri
792
793 Return URI of result document
794
795 print $rdoc->uri;
796
797 =cut
798
799 sub uri {
800 my $self = shift;
801 return $self->{uri};
802 }
803
804
805 =head2 attr_names
806
807 Returns array with attribute names from result document object.
808
809 my @attrs = $rdoc->attr_names;
810
811 =cut
812
813 sub attr_names {
814 my $self = shift;
815 croak "attr_names return array, not scalar" if (! wantarray);
816 return sort keys %{ $self->{attrs} };
817 }
818
819
820 =head2 attr
821
822 Returns value of an attribute.
823
824 my $value = $rdoc->attr( 'attribute' );
825
826 =cut
827
828 sub attr {
829 my $self = shift;
830 my $name = shift || return;
831 return $self->{attrs}->{ $name };
832 }
833
834
835 =head2 snippet
836
837 Return snippet from result document
838
839 print $rdoc->snippet;
840
841 =cut
842
843 sub snippet {
844 my $self = shift;
845 return $self->{snippet};
846 }
847
848
849 =head2 keywords
850
851 Return keywords from result document
852
853 print $rdoc->keywords;
854
855 =cut
856
857 sub keywords {
858 my $self = shift;
859 return $self->{keywords};
860 }
861
862
863 package Search::Estraier::NodeResult;
864
865 use Carp qw/croak/;
866
867 #use Search::Estraier;
868 #our @ISA = qw/Search::Estraier/;
869
870 =head1 Search::Estraier::NodeResult
871
872 =head2 new
873
874 my $res = new Search::HyperEstraier::NodeResult(
875 docs => @array_of_rdocs,
876 hits => %hash_with_hints,
877 );
878
879 =cut
880
881 sub new {
882 my $class = shift;
883 my $self = {@_};
884 bless($self, $class);
885
886 foreach my $f (qw/docs hints/) {
887 croak "missing $f for ResultDocument" unless defined($self->{$f});
888 }
889
890 $self ? return $self : return undef;
891 }
892
893
894 =head2 doc_num
895
896 Return number of documents
897
898 print $res->doc_num;
899
900 This will return real number of documents (limited by C<max>).
901 If you want to get total number of hits, see C<hits>.
902
903 =cut
904
905 sub doc_num {
906 my $self = shift;
907 return $#{$self->{docs}} + 1;
908 }
909
910
911 =head2 get_doc
912
913 Return single document
914
915 my $doc = $res->get_doc( 42 );
916
917 Returns undef if document doesn't exist.
918
919 =cut
920
921 sub get_doc {
922 my $self = shift;
923 my $num = shift;
924 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
925 return undef if ($num < 0 || $num > $self->{docs});
926 return $self->{docs}->[$num];
927 }
928
929
930 =head2 hint
931
932 Return specific hint from results.
933
934 print $res->hint( 'VERSION' );
935
936 Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
937 C<TIME>, C<LINK#n>, C<VIEW>.
938
939 =cut
940
941 sub hint {
942 my $self = shift;
943 my $key = shift || return;
944 return $self->{hints}->{$key};
945 }
946
947 =head2 hints
948
949 More perlish version of C<hint>. This one returns hash.
950
951 my %hints = $res->hints;
952
953 =cut
954
955 sub hints {
956 my $self = shift;
957 return $self->{hints};
958 }
959
960 =head2 hits
961
962 Syntaxtic sugar for total number of hits for this query
963
964 print $res->hits;
965
966 It's same as
967
968 print $res->hint('HIT');
969
970 but shorter.
971
972 =cut
973
974 sub hits {
975 my $self = shift;
976 return $self->{hints}->{'HIT'} || 0;
977 }
978
979 package Search::Estraier::Node;
980
981 use Carp qw/carp croak confess/;
982 use URI;
983 use MIME::Base64;
984 use IO::Socket::INET;
985 use URI::Escape qw/uri_escape/;
986
987 =head1 Search::Estraier::Node
988
989 =head2 new
990
991 my $node = new Search::HyperEstraier::Node;
992
993 or optionally with C<url> as parametar
994
995 my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
996
997 or in more verbose form
998
999 my $node = new Search::HyperEstraier::Node(
1000 url => 'http://localhost:1978/node/test',
1001 user => 'admin',
1002 passwd => 'admin'
1003 create => 1,
1004 label => 'optional node label',
1005 debug => 1,
1006 croak_on_error => 1
1007 );
1008
1009 with following arguments:
1010
1011 =over 4
1012
1013 =item url
1014
1015 URL to node
1016
1017 =item user
1018
1019 specify username for node server authentication
1020
1021 =item passwd
1022
1023 password for authentication
1024
1025 =item create
1026
1027 create node if it doesn't exists
1028
1029 =item label
1030
1031 optional label for new node if C<create> is used
1032
1033 =item debug
1034
1035 dumps a B<lot> of debugging output
1036
1037 =item croak_on_error
1038
1039 very helpful during development. It will croak on all errors instead of
1040 silently returning C<-1> (which is convention of Hyper Estraier API in other
1041 languages).
1042
1043 =back
1044
1045 =cut
1046
1047 sub new {
1048 my $class = shift;
1049 my $self = {
1050 pxport => -1,
1051 timeout => 0, # this used to be -1
1052 wwidth => 480,
1053 hwidth => 96,
1054 awidth => 96,
1055 status => -1,
1056 };
1057
1058 bless($self, $class);
1059
1060 if ($#_ == 0) {
1061 $self->{url} = shift;
1062 } else {
1063 %$self = ( %$self, @_ );
1064
1065 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
1066
1067 warn "## Node debug on\n" if ($self->{debug});
1068 }
1069
1070 $self->{inform} = {
1071 dnum => -1,
1072 wnum => -1,
1073 size => -1.0,
1074 };
1075
1076 if ($self->{create}) {
1077 if (! eval { $self->name } || $@) {
1078 my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
1079 croak "can't find node name in '$self->{url}'" unless ($name);
1080 my $label = $self->{label} || $name;
1081 $self->master(
1082 action => 'nodeadd',
1083 name => $name,
1084 label => $label,
1085 ) || croak "can't create node $name ($label)";
1086 }
1087 }
1088
1089 $self ? return $self : return undef;
1090 }
1091
1092
1093 =head2 set_url
1094
1095 Specify URL to node server
1096
1097 $node->set_url('http://localhost:1978');
1098
1099 =cut
1100
1101 sub set_url {
1102 my $self = shift;
1103 $self->{url} = shift;
1104 }
1105
1106
1107 =head2 set_proxy
1108
1109 Specify proxy server to connect to node server
1110
1111 $node->set_proxy('proxy.example.com', 8080);
1112
1113 =cut
1114
1115 sub set_proxy {
1116 my $self = shift;
1117 my ($host,$port) = @_;
1118 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
1119 $self->{pxhost} = $host;
1120 $self->{pxport} = $port;
1121 }
1122
1123
1124 =head2 set_timeout
1125
1126 Specify timeout of connection in seconds
1127
1128 $node->set_timeout( 15 );
1129
1130 =cut
1131
1132 sub set_timeout {
1133 my $self = shift;
1134 my $sec = shift;
1135 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
1136 $self->{timeout} = $sec;
1137 }
1138
1139
1140 =head2 set_auth
1141
1142 Specify name and password for authentication to node server.
1143
1144 $node->set_auth('clint','eastwood');
1145
1146 =cut
1147
1148 sub set_auth {
1149 my $self = shift;
1150 my ($login,$passwd) = @_;
1151 my $basic_auth = encode_base64( "$login:$passwd" );
1152 chomp($basic_auth);
1153 $self->{auth} = $basic_auth;
1154 }
1155
1156
1157 =head2 status
1158
1159 Return status code of last request.
1160
1161 print $node->status;
1162
1163 C<-1> means connection failure.
1164
1165 =cut
1166
1167 sub status {
1168 my $self = shift;
1169 return $self->{status};
1170 }
1171
1172
1173 =head2 put_doc
1174
1175 Add a document
1176
1177 $node->put_doc( $document_draft ) or die "can't add document";
1178
1179 Return true on success or false on failure.
1180
1181 =cut
1182
1183 sub put_doc {
1184 my $self = shift;
1185 my $doc = shift || return;
1186 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1187 if ($self->shuttle_url( $self->{url} . '/put_doc',
1188 'text/x-estraier-draft',
1189 $doc->dump_draft,
1190 undef
1191 ) == 200) {
1192 $self->_clear_info;
1193 return 1;
1194 }
1195 return undef;
1196 }
1197
1198
1199 =head2 out_doc
1200
1201 Remove a document
1202
1203 $node->out_doc( document_id ) or "can't remove document";
1204
1205 Return true on success or false on failture.
1206
1207 =cut
1208
1209 sub out_doc {
1210 my $self = shift;
1211 my $id = shift || return;
1212 return unless ($self->{url});
1213 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1214 if ($self->shuttle_url( $self->{url} . '/out_doc',
1215 'application/x-www-form-urlencoded',
1216 "id=$id",
1217 undef
1218 ) == 200) {
1219 $self->_clear_info;
1220 return 1;
1221 }
1222 return undef;
1223 }
1224
1225
1226 =head2 out_doc_by_uri
1227
1228 Remove a registrated document using it's uri
1229
1230 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1231
1232 Return true on success or false on failture.
1233
1234 =cut
1235
1236 sub out_doc_by_uri {
1237 my $self = shift;
1238 my $uri = shift || return;
1239 return unless ($self->{url});
1240 if ($self->shuttle_url( $self->{url} . '/out_doc',
1241 'application/x-www-form-urlencoded',
1242 "uri=" . uri_escape($uri),
1243 undef
1244 ) == 200) {
1245 $self->_clear_info;
1246 return 1;
1247 }
1248 return undef;
1249 }
1250
1251
1252 =head2 edit_doc
1253
1254 Edit attributes of a document
1255
1256 $node->edit_doc( $document_draft ) or die "can't edit document";
1257
1258 Return true on success or false on failture.
1259
1260 =cut
1261
1262 sub edit_doc {
1263 my $self = shift;
1264 my $doc = shift || return;
1265 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1266 if ($self->shuttle_url( $self->{url} . '/edit_doc',
1267 'text/x-estraier-draft',
1268 $doc->dump_draft,
1269 undef
1270 ) == 200) {
1271 $self->_clear_info;
1272 return 1;
1273 }
1274 return undef;
1275 }
1276
1277
1278 =head2 get_doc
1279
1280 Retreive document
1281
1282 my $doc = $node->get_doc( document_id ) or die "can't get document";
1283
1284 Return true on success or false on failture.
1285
1286 =cut
1287
1288 sub get_doc {
1289 my $self = shift;
1290 my $id = shift || return;
1291 return $self->_fetch_doc( id => $id );
1292 }
1293
1294
1295 =head2 get_doc_by_uri
1296
1297 Retreive document
1298
1299 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1300
1301 Return true on success or false on failture.
1302
1303 =cut
1304
1305 sub get_doc_by_uri {
1306 my $self = shift;
1307 my $uri = shift || return;
1308 return $self->_fetch_doc( uri => $uri );
1309 }
1310
1311
1312 =head2 get_doc_attr
1313
1314 Retrieve the value of an atribute from object
1315
1316 my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1317 die "can't get document attribute";
1318
1319 =cut
1320
1321 sub get_doc_attr {
1322 my $self = shift;
1323 my ($id,$name) = @_;
1324 return unless ($id && $name);
1325 return $self->_fetch_doc( id => $id, attr => $name );
1326 }
1327
1328
1329 =head2 get_doc_attr_by_uri
1330
1331 Retrieve the value of an atribute from object
1332
1333 my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1334 die "can't get document attribute";
1335
1336 =cut
1337
1338 sub get_doc_attr_by_uri {
1339 my $self = shift;
1340 my ($uri,$name) = @_;
1341 return unless ($uri && $name);
1342 return $self->_fetch_doc( uri => $uri, attr => $name );
1343 }
1344
1345
1346 =head2 etch_doc
1347
1348 Exctract document keywords
1349
1350 my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1351
1352 =cut
1353
1354 sub etch_doc {
1355 my $self = shift;
1356 my $id = shift || return;
1357 return $self->_fetch_doc( id => $id, etch => 1 );
1358 }
1359
1360 =head2 etch_doc_by_uri
1361
1362 Retreive document
1363
1364 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1365
1366 Return true on success or false on failture.
1367
1368 =cut
1369
1370 sub etch_doc_by_uri {
1371 my $self = shift;
1372 my $uri = shift || return;
1373 return $self->_fetch_doc( uri => $uri, etch => 1 );
1374 }
1375
1376
1377 =head2 uri_to_id
1378
1379 Get ID of document specified by URI
1380
1381 my $id = $node->uri_to_id( 'file:///document/uri/42' );
1382
1383 This method won't croak, even if using C<croak_on_error>.
1384
1385 =cut
1386
1387 sub uri_to_id {
1388 my $self = shift;
1389 my $uri = shift || return;
1390 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1391 }
1392
1393
1394 =head2 _fetch_doc
1395
1396 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1397 C<etch_doc>, C<etch_doc_by_uri>.
1398
1399 # this will decode received draft into Search::Estraier::Document object
1400 my $doc = $node->_fetch_doc( id => 42 );
1401 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1402
1403 # to extract keywords, add etch
1404 my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1405 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1406
1407 # to get document attrubute add attr
1408 my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1409 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1410
1411 # more general form which allows implementation of
1412 # uri_to_id
1413 my $id = $node->_fetch_doc(
1414 uri => 'file:///document/uri/42',
1415 path => '/uri_to_id',
1416 chomp_resbody => 1
1417 );
1418
1419 =cut
1420
1421 sub _fetch_doc {
1422 my $self = shift;
1423 my $a = {@_};
1424 return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1425
1426 my ($arg, $resbody);
1427
1428 my $path = $a->{path} || '/get_doc';
1429 $path = '/etch_doc' if ($a->{etch});
1430
1431 if ($a->{id}) {
1432 croak "id must be number not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1433 $arg = 'id=' . $a->{id};
1434 } elsif ($a->{uri}) {
1435 $arg = 'uri=' . uri_escape($a->{uri});
1436 } else {
1437 confess "unhandled argument. Need id or uri.";
1438 }
1439
1440 if ($a->{attr}) {
1441 $path = '/get_doc_attr';
1442 $arg .= '&attr=' . uri_escape($a->{attr});
1443 $a->{chomp_resbody} = 1;
1444 }
1445
1446 my $rv = $self->shuttle_url( $self->{url} . $path,
1447 'application/x-www-form-urlencoded',
1448 $arg,
1449 \$resbody,
1450 $a->{croak_on_error},
1451 );
1452
1453 return if ($rv != 200);
1454
1455 if ($a->{etch}) {
1456 $self->{kwords} = {};
1457 return +{} unless ($resbody);
1458 foreach my $l (split(/\n/, $resbody)) {
1459 my ($k,$v) = split(/\t/, $l, 2);
1460 $self->{kwords}->{$k} = $v if ($v);
1461 }
1462 return $self->{kwords};
1463 } elsif ($a->{chomp_resbody}) {
1464 return unless (defined($resbody));
1465 chomp($resbody);
1466 return $resbody;
1467 } else {
1468 return new Search::Estraier::Document($resbody);
1469 }
1470 }
1471
1472
1473 =head2 name
1474
1475 my $node_name = $node->name;
1476
1477 =cut
1478
1479 sub name {
1480 my $self = shift;
1481 $self->_set_info unless ($self->{inform}->{name});
1482 return $self->{inform}->{name};
1483 }
1484
1485
1486 =head2 label
1487
1488 my $node_label = $node->label;
1489
1490 =cut
1491
1492 sub label {
1493 my $self = shift;
1494 $self->_set_info unless ($self->{inform}->{label});
1495 return $self->{inform}->{label};
1496 }
1497
1498
1499 =head2 doc_num
1500
1501 my $documents_in_node = $node->doc_num;
1502
1503 =cut
1504
1505 sub doc_num {
1506 my $self = shift;
1507 $self->_set_info if ($self->{inform}->{dnum} < 0);
1508 return $self->{inform}->{dnum};
1509 }
1510
1511
1512 =head2 word_num
1513
1514 my $words_in_node = $node->word_num;
1515
1516 =cut
1517
1518 sub word_num {
1519 my $self = shift;
1520 $self->_set_info if ($self->{inform}->{wnum} < 0);
1521 return $self->{inform}->{wnum};
1522 }
1523
1524
1525 =head2 size
1526
1527 my $node_size = $node->size;
1528
1529 =cut
1530
1531 sub size {
1532 my $self = shift;
1533 $self->_set_info if ($self->{inform}->{size} < 0);
1534 return $self->{inform}->{size};
1535 }
1536
1537
1538 =head2 search
1539
1540 Search documents which match condition
1541
1542 my $nres = $node->search( $cond, $depth );
1543
1544 C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1545 depth for meta search.
1546
1547 Function results C<Search::Estraier::NodeResult> object.
1548
1549 =cut
1550
1551 sub search {
1552 my $self = shift;
1553 my ($cond, $depth) = @_;
1554 return unless ($cond && defined($depth) && $self->{url});
1555 croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1556 croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1557
1558 my $resbody;
1559
1560 my $rv = $self->shuttle_url( $self->{url} . '/search',
1561 'application/x-www-form-urlencoded',
1562 $self->cond_to_query( $cond, $depth ),
1563 \$resbody,
1564 );
1565 return if ($rv != 200);
1566
1567 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1568 my $hintsText = splice @records, 0, 2; # starts with empty record
1569 my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1570
1571 # process records
1572 my $docs = [];
1573 foreach my $record (@records)
1574 {
1575 # split into keys and snippets
1576 my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1577
1578 # create document hash
1579 my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1580 $doc->{'@keywords'} = $doc->{keywords};
1581 ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1582 $doc->{snippet} = $snippet;
1583
1584 push @$docs, new Search::Estraier::ResultDocument(
1585 attrs => $doc,
1586 uri => $doc->{'@uri'},
1587 snippet => $snippet,
1588 keywords => $doc->{'keywords'},
1589 );
1590 }
1591
1592 return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1593 }
1594
1595
1596 =head2 cond_to_query
1597
1598 Return URI encoded string generated from Search::Estraier::Condition
1599
1600 my $args = $node->cond_to_query( $cond, $depth );
1601
1602 =cut
1603
1604 sub cond_to_query {
1605 my $self = shift;
1606
1607 my $cond = shift || return;
1608 croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1609 my $depth = shift;
1610
1611 my @args;
1612
1613 if (my $phrase = $cond->phrase) {
1614 push @args, 'phrase=' . uri_escape($phrase);
1615 }
1616
1617 if (my @attrs = $cond->attrs) {
1618 for my $i ( 0 .. $#attrs ) {
1619 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1620 }
1621 }
1622
1623 if (my $order = $cond->order) {
1624 push @args, 'order=' . uri_escape($order);
1625 }
1626
1627 if (my $max = $cond->max) {
1628 push @args, 'max=' . $max;
1629 } else {
1630 push @args, 'max=' . (1 << 30);
1631 }
1632
1633 if (my $options = $cond->options) {
1634 push @args, 'options=' . $options;
1635 }
1636
1637 push @args, 'depth=' . $depth if ($depth);
1638 push @args, 'wwidth=' . $self->{wwidth};
1639 push @args, 'hwidth=' . $self->{hwidth};
1640 push @args, 'awidth=' . $self->{awidth};
1641 push @args, 'skip=' . $cond->{skip} if ($cond->{skip});
1642
1643 if (my $distinct = $cond->distinct) {
1644 push @args, 'distinct=' . uri_escape($distinct);
1645 }
1646
1647 if ($cond->{mask}) {
1648 my $mask = 0;
1649 map { $mask += ( 2 ** $_ ) } @{ $cond->{mask} };
1650
1651 push @args, 'mask=' . $mask if ($mask);
1652 }
1653
1654 return join('&', @args);
1655 }
1656
1657
1658 =head2 shuttle_url
1659
1660 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1661 master.
1662
1663 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1664
1665 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1666 body will be saved within object.
1667
1668 =cut
1669
1670 use LWP::UserAgent;
1671
1672 sub shuttle_url {
1673 my $self = shift;
1674
1675 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1676
1677 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1678
1679 $self->{status} = -1;
1680
1681 warn "## $url\n" if ($self->{debug});
1682
1683 $url = new URI($url);
1684 if (
1685 !$url || !$url->scheme || !$url->scheme eq 'http' ||
1686 !$url->host || !$url->port || $url->port < 1
1687 ) {
1688 carp "can't parse $url\n";
1689 return -1;
1690 }
1691
1692 my $ua = LWP::UserAgent->new;
1693 $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1694
1695 my $req;
1696 if ($reqbody) {
1697 $req = HTTP::Request->new(POST => $url);
1698 } else {
1699 $req = HTTP::Request->new(GET => $url);
1700 }
1701
1702 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1703 $req->headers->header( 'Connection', 'close' );
1704 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1705 $req->content_type( $content_type );
1706
1707 warn $req->headers->as_string,"\n" if ($self->{debug});
1708
1709 if ($reqbody) {
1710 warn "$reqbody\n" if ($self->{debug});
1711 $req->content( $reqbody );
1712 }
1713
1714 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1715
1716 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1717
1718 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1719
1720 if (! $res->is_success) {
1721 if ($croak_on_error) {
1722 croak("can't get $url: ",$res->status_line);
1723 } else {
1724 return -1;
1725 }
1726 }
1727
1728 $$resbody .= $res->content;
1729
1730 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1731
1732 return $self->{status};
1733 }
1734
1735
1736 =head2 set_snippet_width
1737
1738 Set width of snippets in results
1739
1740 $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1741
1742 C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1743 is not sent with results. If it is negative, whole document text is sent instead of snippet.
1744
1745 C<$hwidth> specified width of strings from beginning of string. Default
1746 value is C<96>. Negative or zero value keep previous value.
1747
1748 C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1749 If negative of zero value is provided previous value is kept unchanged.
1750
1751 =cut
1752
1753 sub set_snippet_width {
1754 my $self = shift;
1755
1756 my ($wwidth, $hwidth, $awidth) = @_;
1757 $self->{wwidth} = $wwidth;
1758 $self->{hwidth} = $hwidth if ($hwidth >= 0);
1759 $self->{awidth} = $awidth if ($awidth >= 0);
1760 }
1761
1762
1763 =head2 set_user
1764
1765 Manage users of node
1766
1767 $node->set_user( 'name', $mode );
1768
1769 C<$mode> can be one of:
1770
1771 =over 4
1772
1773 =item 0
1774
1775 delete account
1776
1777 =item 1
1778
1779 set administrative right for user
1780
1781 =item 2
1782
1783 set user account as guest
1784
1785 =back
1786
1787 Return true on success, otherwise false.
1788
1789 =cut
1790
1791 sub set_user {
1792 my $self = shift;
1793 my ($name, $mode) = @_;
1794
1795 return unless ($self->{url});
1796 croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1797
1798 $self->shuttle_url( $self->{url} . '/_set_user',
1799 'application/x-www-form-urlencoded',
1800 'name=' . uri_escape($name) . '&mode=' . $mode,
1801 undef
1802 ) == 200;
1803 }
1804
1805
1806 =head2 set_link
1807
1808 Manage node links
1809
1810 $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1811
1812 If C<$credit> is negative, link is removed.
1813
1814 =cut
1815
1816 sub set_link {
1817 my $self = shift;
1818 my ($url, $label, $credit) = @_;
1819
1820 return unless ($self->{url});
1821 croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1822
1823 my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1824 $reqbody .= '&credit=' . $credit if ($credit > 0);
1825
1826 if ($self->shuttle_url( $self->{url} . '/_set_link',
1827 'application/x-www-form-urlencoded',
1828 $reqbody,
1829 undef
1830 ) == 200) {
1831 # refresh node info after adding link
1832 $self->_clear_info;
1833 return 1;
1834 }
1835 return undef;
1836 }
1837
1838 =head2 admins
1839
1840 my @admins = @{ $node->admins };
1841
1842 Return array of users with admin rights on node
1843
1844 =cut
1845
1846 sub admins {
1847 my $self = shift;
1848 $self->_set_info unless ($self->{inform}->{name});
1849 return $self->{inform}->{admins};
1850 }
1851
1852 =head2 guests
1853
1854 my @guests = @{ $node->guests };
1855
1856 Return array of users with guest rights on node
1857
1858 =cut
1859
1860 sub guests {
1861 my $self = shift;
1862 $self->_set_info unless ($self->{inform}->{name});
1863 return $self->{inform}->{guests};
1864 }
1865
1866 =head2 links
1867
1868 my $links = @{ $node->links };
1869
1870 Return array of links for this node
1871
1872 =cut
1873
1874 sub links {
1875 my $self = shift;
1876 $self->_set_info unless ($self->{inform}->{name});
1877 return $self->{inform}->{links};
1878 }
1879
1880 =head2 cacheusage
1881
1882 Return cache usage for a node
1883
1884 my $cache = $node->cacheusage;
1885
1886 =cut
1887
1888 sub cacheusage {
1889 my $self = shift;
1890
1891 return unless ($self->{url});
1892
1893 my $resbody;
1894 my $rv = $self->shuttle_url( $self->{url} . '/cacheusage',
1895 'text/plain',
1896 undef,
1897 \$resbody,
1898 );
1899
1900 return if ($rv != 200 || !$resbody);
1901
1902 return $resbody;
1903 }
1904
1905 =head2 master
1906
1907 Set actions on Hyper Estraier node master (C<estmaster> process)
1908
1909 $node->master(
1910 action => 'sync'
1911 );
1912
1913 All available actions are documented in
1914 L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1915
1916 =cut
1917
1918 my $estmaster_rest = {
1919 shutdown => {
1920 status => 202,
1921 },
1922 sync => {
1923 status => 202,
1924 },
1925 backup => {
1926 status => 202,
1927 },
1928 userlist => {
1929 status => 200,
1930 returns => [ qw/name passwd flags fname misc/ ],
1931 },
1932 useradd => {
1933 required => [ qw/name passwd flags/ ],
1934 optional => [ qw/fname misc/ ],
1935 status => 200,
1936 },
1937 userdel => {
1938 required => [ qw/name/ ],
1939 status => 200,
1940 },
1941 nodelist => {
1942 status => 200,
1943 returns => [ qw/name label doc_num word_num size/ ],
1944 },
1945 nodeadd => {
1946 required => [ qw/name/ ],
1947 optional => [ qw/label/ ],
1948 status => 200,
1949 },
1950 nodedel => {
1951 required => [ qw/name/ ],
1952 status => 200,
1953 },
1954 nodeclr => {
1955 required => [ qw/name/ ],
1956 status => 200,
1957 },
1958 nodertt => {
1959 status => 200,
1960 },
1961 };
1962
1963 sub master {
1964 my $self = shift;
1965
1966 my $args = {@_};
1967
1968 # have action?
1969 my $action = $args->{action} || croak "need action, available: ",
1970 join(", ",keys %{ $estmaster_rest });
1971
1972 # check if action is valid
1973 my $rest = $estmaster_rest->{$action};
1974 croak "action '$action' is not supported, available actions: ",
1975 join(", ",keys %{ $estmaster_rest }) unless ($rest);
1976
1977 croak "BUG: action '$action' needs return status" unless ($rest->{status});
1978
1979 my @args;
1980
1981 if ($rest->{required} || $rest->{optional}) {
1982
1983 map {
1984 croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1985 push @args, $_ . '=' . uri_escape( $args->{$_} );
1986 } ( @{ $rest->{required} } );
1987
1988 map {
1989 push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1990 } ( @{ $rest->{optional} } );
1991
1992 }
1993
1994 my $uri = new URI( $self->{url} );
1995
1996 my $resbody;
1997
1998 my $status = $self->shuttle_url(
1999 'http://' . $uri->host_port . '/master?action=' . $action ,
2000 'application/x-www-form-urlencoded',
2001 join('&', @args),
2002 \$resbody,
2003 1,
2004 ) or confess "shuttle_url failed";
2005
2006 if ($status == $rest->{status}) {
2007
2008 # refresh node info after sync
2009 $self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/);
2010
2011 if ($rest->{returns} && wantarray) {
2012
2013 my @results;
2014 my $fields = $#{$rest->{returns}};
2015
2016 foreach my $line ( split(/[\r\n]/,$resbody) ) {
2017 my @e = split(/\t/, $line, $fields + 1);
2018 my $row;
2019 foreach my $i ( 0 .. $fields) {
2020 $row->{ $rest->{returns}->[$i] } = $e[ $i ];
2021 }
2022 push @results, $row;
2023 }
2024
2025 return @results;
2026
2027 } elsif ($resbody) {
2028 chomp $resbody;
2029 return $resbody;
2030 } else {
2031 return 0E0;
2032 }
2033 }
2034
2035 carp "expected status $rest->{status}, but got $status";
2036 return undef;
2037 }
2038
2039 =head1 PRIVATE METHODS
2040
2041 You could call those directly, but you don't have to. I hope.
2042
2043 =head2 _set_info
2044
2045 Set information for node
2046
2047 $node->_set_info;
2048
2049 =cut
2050
2051 sub _set_info {
2052 my $self = shift;
2053
2054 $self->{status} = -1;
2055 return unless ($self->{url});
2056
2057 my $resbody;
2058 my $rv = $self->shuttle_url( $self->{url} . '/inform',
2059 'text/plain',
2060 undef,
2061 \$resbody,
2062 );
2063
2064 return if ($rv != 200 || !$resbody);
2065
2066 my @lines = split(/[\r\n]/,$resbody);
2067
2068 $self->_clear_info;
2069
2070 ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
2071 $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
2072
2073 return $resbody unless (@lines);
2074
2075 shift @lines;
2076
2077 while(my $admin = shift @lines) {
2078 push @{$self->{inform}->{admins}}, $admin;
2079 }
2080
2081 while(my $guest = shift @lines) {
2082 push @{$self->{inform}->{guests}}, $guest;
2083 }
2084
2085 while(my $link = shift @lines) {
2086 push @{$self->{inform}->{links}}, $link;
2087 }
2088
2089 return $resbody;
2090
2091 }
2092
2093 =head2 _clear_info
2094
2095 Clear information for node
2096
2097 $node->_clear_info;
2098
2099 On next call to C<name>, C<label>, C<doc_num>, C<word_num> or C<size> node
2100 info will be fetch again from Hyper Estraier.
2101
2102 =cut
2103 sub _clear_info {
2104 my $self = shift;
2105 $self->{inform} = {
2106 dnum => -1,
2107 wnum => -1,
2108 size => -1.0,
2109 };
2110 }
2111
2112 ###
2113
2114 =head1 EXPORT
2115
2116 Nothing.
2117
2118 =head1 SEE ALSO
2119
2120 L<http://hyperestraier.sourceforge.net/>
2121
2122 Hyper Estraier Ruby interface on which this module is based.
2123
2124 Hyper Estraier now also has pure-perl binding included in distribution. It's
2125 a faster way to access databases directly if you are not running
2126 C<estmaster> P2P server.
2127
2128 =head1 AUTHOR
2129
2130 Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
2131
2132 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
2133
2134 =head1 COPYRIGHT AND LICENSE
2135
2136 Copyright (C) 2005-2006 by Dobrica Pavlinusic
2137
2138 This library is free software; you can redistribute it and/or modify
2139 it under the GPL v2 or later.
2140
2141 =cut
2142
2143 1;

  ViewVC Help
Powered by ViewVC 1.1.26