/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 199 - (show annotations)
Sun Jan 20 16:51:47 2008 UTC (15 years ago) by dpavlin
File size: 38054 byte(s)
version bump [0.09]
1 package Search::Estraier;
2
3 use 5.008;
4 use strict;
5 use warnings;
6
7 our $VERSION = '0.09';
8
9 =head1 NAME
10
11 Search::Estraier - pure perl module to use Hyper Estraier search engine
12
13 =head1 SYNOPSIS
14
15 =head2 Simple indexer
16
17 use Search::Estraier;
18
19 # create and configure node
20 my $node = new Search::Estraier::Node(
21 url => 'http://localhost:1978/node/test',
22 user => 'admin',
23 passwd => 'admin',
24 create => 1,
25 label => 'Label for node',
26 croak_on_error => 1,
27 );
28
29 # create document
30 my $doc = new Search::Estraier::Document;
31
32 # add attributes
33 $doc->add_attr('@uri', "http://estraier.gov/example.txt");
34 $doc->add_attr('@title', "Over the Rainbow");
35
36 # add body text to document
37 $doc->add_text("Somewhere over the rainbow. Way up high.");
38 $doc->add_text("There's a land that I heard of once in a lullaby.");
39
40 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
41
42 =head2 Simple searcher
43
44 use Search::Estraier;
45
46 # create and configure node
47 my $node = new Search::Estraier::Node(
48 url => 'http://localhost:1978/node/test',
49 user => 'admin',
50 passwd => 'admin',
51 croak_on_error => 1,
52 );
53
54 # create condition
55 my $cond = new Search::Estraier::Condition;
56
57 # set search phrase
58 $cond->set_phrase("rainbow AND lullaby");
59
60 my $nres = $node->search($cond, 0);
61
62 if (defined($nres)) {
63 print "Got ", $nres->hits, " results\n";
64
65 # for each document in results
66 for my $i ( 0 ... $nres->doc_num - 1 ) {
67 # get result document
68 my $rdoc = $nres->get_doc($i);
69 # display attribte
70 print "URI: ", $rdoc->attr('@uri'),"\n";
71 print "Title: ", $rdoc->attr('@title'),"\n";
72 print $rdoc->snippet,"\n";
73 }
74 } else {
75 die "error: ", $node->status,"\n";
76 }
77
78 =head1 DESCRIPTION
79
80 This module is implementation of node API of Hyper Estraier. Since it's
81 perl-only module with dependencies only on standard perl modules, it will
82 run on all platforms on which perl runs. It doesn't require compilation
83 or Hyper Estraier development files on target machine.
84
85 It is implemented as multiple packages which closly resamble Ruby
86 implementation. It also includes methods to manage nodes.
87
88 There are few examples in C<scripts> directory of this distribution.
89
90 =cut
91
92 =head1 Inheritable common methods
93
94 This methods should really move somewhere else.
95
96 =head2 _s
97
98 Remove multiple whitespaces from string, as well as whitespaces at beginning or end
99
100 my $text = $self->_s(" this is a text ");
101 $text = 'this is a text';
102
103 =cut
104
105 sub _s {
106 my $text = $_[1];
107 return unless defined($text);
108 $text =~ s/\s\s+/ /gs;
109 $text =~ s/^\s+//;
110 $text =~ s/\s+$//;
111 return $text;
112 }
113
114 package Search::Estraier::Document;
115
116 use Carp qw/croak confess/;
117
118 use Search::Estraier;
119 our @ISA = qw/Search::Estraier/;
120
121 =head1 Search::Estraier::Document
122
123 This class implements Document which is single item in Hyper Estraier.
124
125 It's is collection of:
126
127 =over 4
128
129 =item attributes
130
131 C<< 'key' => 'value' >> pairs which can later be used for filtering of results
132
133 You can add common filters to C<attrindex> in estmaster's C<_conf>
134 file for better performance. See C<attrindex> in
135 L<Hyper Estraier P2P Guide|http://hyperestraier.sourceforge.net/nguide-en.html>.
136
137 =item vectors
138
139 also C<< 'key' => 'value' >> pairs
140
141 =item display text
142
143 Text which will be used to create searchable corpus of your index and
144 included in snippet output.
145
146 =item hidden text
147
148 Text which will be searchable, but will not be included in snippet.
149
150 =back
151
152 =head2 new
153
154 Create new document, empty or from draft.
155
156 my $doc = new Search::HyperEstraier::Document;
157 my $doc2 = new Search::HyperEstraier::Document( $draft );
158
159 =cut
160
161 sub new {
162 my $class = shift;
163 my $self = {};
164 bless($self, $class);
165
166 $self->{id} = -1;
167
168 my $draft = shift;
169
170 if ($draft) {
171 my $in_text = 0;
172 foreach my $line (split(/\n/, $draft)) {
173
174 if ($in_text) {
175 if ($line =~ /^\t/) {
176 push @{ $self->{htexts} }, substr($line, 1);
177 } else {
178 push @{ $self->{dtexts} }, $line;
179 }
180 next;
181 }
182
183 if ($line =~ m/^%VECTOR\t(.+)$/) {
184 my @fields = split(/\t/, $1);
185 if ($#fields % 2 == 1) {
186 $self->{kwords} = { @fields };
187 } else {
188 warn "can't decode $line\n";
189 }
190 next;
191 } elsif ($line =~ m/^%SCORE\t(.+)$/) {
192 $self->{score} = $1;
193 next;
194 } elsif ($line =~ m/^%/) {
195 # What is this? comment?
196 #warn "$line\n";
197 next;
198 } elsif ($line =~ m/^$/) {
199 $in_text = 1;
200 next;
201 } elsif ($line =~ m/^(.+)=(.*)$/) {
202 $self->{attrs}->{ $1 } = $2;
203 next;
204 }
205
206 warn "draft ignored: '$line'\n";
207 }
208 }
209
210 $self ? return $self : return undef;
211 }
212
213
214 =head2 add_attr
215
216 Add an attribute.
217
218 $doc->add_attr( name => 'value' );
219
220 Delete attribute using
221
222 $doc->add_attr( name => undef );
223
224 =cut
225
226 sub add_attr {
227 my $self = shift;
228 my $attrs = {@_};
229
230 while (my ($name, $value) = each %{ $attrs }) {
231 if (! defined($value)) {
232 delete( $self->{attrs}->{ $self->_s($name) } );
233 } else {
234 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
235 }
236 }
237
238 return 1;
239 }
240
241
242 =head2 add_text
243
244 Add a sentence of text.
245
246 $doc->add_text('this is example text to display');
247
248 =cut
249
250 sub add_text {
251 my $self = shift;
252 my $text = shift;
253 return unless defined($text);
254
255 push @{ $self->{dtexts} }, $self->_s($text);
256 }
257
258
259 =head2 add_hidden_text
260
261 Add a hidden sentence.
262
263 $doc->add_hidden_text('this is example text just for search');
264
265 =cut
266
267 sub add_hidden_text {
268 my $self = shift;
269 my $text = shift;
270 return unless defined($text);
271
272 push @{ $self->{htexts} }, $self->_s($text);
273 }
274
275 =head2 add_vectors
276
277 Add a vectors
278
279 $doc->add_vector(
280 'vector_name' => 42,
281 'another' => 12345,
282 );
283
284 =cut
285
286 sub add_vectors {
287 my $self = shift;
288 return unless (@_);
289
290 # this is ugly, but works
291 die "add_vector needs HASH as argument" unless ($#_ % 2 == 1);
292
293 $self->{kwords} = {@_};
294 }
295
296 =head2 set_score
297
298 Set the substitute score
299
300 $doc->set_score(12345);
301
302 =cut
303
304 sub set_score {
305 my $self = shift;
306 my $score = shift;
307 return unless (defined($score));
308 $self->{score} = $score;
309 }
310
311 =head2 score
312
313 Get the substitute score
314
315 =cut
316
317 sub score {
318 my $self = shift;
319 return -1 unless (defined($self->{score}));
320 return $self->{score};
321 }
322
323 =head2 id
324
325 Get the ID number of document. If the object has never been registred, C<-1> is returned.
326
327 print $doc->id;
328
329 =cut
330
331 sub id {
332 my $self = shift;
333 return $self->{id};
334 }
335
336
337 =head2 attr_names
338
339 Returns array with attribute names from document object.
340
341 my @attrs = $doc->attr_names;
342
343 =cut
344
345 sub attr_names {
346 my $self = shift;
347 return unless ($self->{attrs});
348 #croak "attr_names return array, not scalar" if (! wantarray);
349 return sort keys %{ $self->{attrs} };
350 }
351
352
353 =head2 attr
354
355 Returns value of an attribute.
356
357 my $value = $doc->attr( 'attribute' );
358
359 =cut
360
361 sub attr {
362 my $self = shift;
363 my $name = shift;
364 return unless (defined($name) && $self->{attrs});
365 return $self->{attrs}->{ $name };
366 }
367
368
369 =head2 texts
370
371 Returns array with text sentences.
372
373 my @texts = $doc->texts;
374
375 =cut
376
377 sub texts {
378 my $self = shift;
379 #confess "texts return array, not scalar" if (! wantarray);
380 return @{ $self->{dtexts} } if ($self->{dtexts});
381 }
382
383
384 =head2 cat_texts
385
386 Return whole text as single scalar.
387
388 my $text = $doc->cat_texts;
389
390 =cut
391
392 sub cat_texts {
393 my $self = shift;
394 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
395 }
396
397
398 =head2 dump_draft
399
400 Dump draft data from document object.
401
402 print $doc->dump_draft;
403
404 =cut
405
406 sub dump_draft {
407 my $self = shift;
408 my $draft;
409
410 foreach my $attr_name (sort keys %{ $self->{attrs} }) {
411 next unless defined(my $v = $self->{attrs}->{$attr_name});
412 $draft .= $attr_name . '=' . $v . "\n";
413 }
414
415 if ($self->{kwords}) {
416 $draft .= '%VECTOR';
417 while (my ($key, $value) = each %{ $self->{kwords} }) {
418 $draft .= "\t$key\t$value";
419 }
420 $draft .= "\n";
421 }
422
423 if (defined($self->{score}) && $self->{score} >= 0) {
424 $draft .= "%SCORE\t" . $self->{score} . "\n";
425 }
426
427 $draft .= "\n";
428
429 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
430 $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
431
432 return $draft;
433 }
434
435
436 =head2 delete
437
438 Empty document object
439
440 $doc->delete;
441
442 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
443 convinience. Document objects which go out of scope will be destroyed
444 automatically.
445
446 =cut
447
448 sub delete {
449 my $self = shift;
450
451 foreach my $data (qw/attrs dtexts stexts kwords/) {
452 delete($self->{$data});
453 }
454
455 $self->{id} = -1;
456
457 return 1;
458 }
459
460
461
462 package Search::Estraier::Condition;
463
464 use Carp qw/carp confess croak/;
465
466 use Search::Estraier;
467 our @ISA = qw/Search::Estraier/;
468
469 =head1 Search::Estraier::Condition
470
471 =head2 new
472
473 my $cond = new Search::HyperEstraier::Condition;
474
475 =cut
476
477 sub new {
478 my $class = shift;
479 my $self = {};
480 bless($self, $class);
481
482 $self->{max} = -1;
483 $self->{options} = 0;
484
485 $self ? return $self : return undef;
486 }
487
488
489 =head2 set_phrase
490
491 $cond->set_phrase('search phrase');
492
493 =cut
494
495 sub set_phrase {
496 my $self = shift;
497 $self->{phrase} = $self->_s( shift );
498 }
499
500
501 =head2 add_attr
502
503 $cond->add_attr('@URI STRINC /~dpavlin/');
504
505 =cut
506
507 sub add_attr {
508 my $self = shift;
509 my $attr = shift || return;
510 push @{ $self->{attrs} }, $self->_s( $attr );
511 }
512
513
514 =head2 set_order
515
516 $cond->set_order('@mdate NUMD');
517
518 =cut
519
520 sub set_order {
521 my $self = shift;
522 $self->{order} = shift;
523 }
524
525
526 =head2 set_max
527
528 $cond->set_max(42);
529
530 =cut
531
532 sub set_max {
533 my $self = shift;
534 my $max = shift;
535 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
536 $self->{max} = $max;
537 }
538
539
540 =head2 set_options
541
542 $cond->set_options( 'SURE' );
543
544 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
545
546 Possible options are:
547
548 =over 8
549
550 =item SURE
551
552 check every N-gram
553
554 =item USUAL
555
556 check every second N-gram
557
558 =item FAST
559
560 check every third N-gram
561
562 =item AGITO
563
564 check every fourth N-gram
565
566 =item NOIDF
567
568 don't perform TF-IDF tuning
569
570 =item SIMPLE
571
572 use simplified query phrase
573
574 =back
575
576 Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
577 options;
578
579 This option changed in version C<0.04> of this module. It's backwards compatibile.
580
581 =cut
582
583 my $options = {
584 SURE => 1 << 0,
585 USUAL => 1 << 1,
586 FAST => 1 << 2,
587 AGITO => 1 << 3,
588 NOIDF => 1 << 4,
589 SIMPLE => 1 << 10,
590 };
591
592 sub set_options {
593 my $self = shift;
594 my $opt = 0;
595 foreach my $option (@_) {
596 my $mask;
597 unless ($mask = $options->{$option}) {
598 if ($option eq '1') {
599 next;
600 } else {
601 croak "unknown option $option";
602 }
603 }
604 $opt += $mask;
605 }
606 $self->{options} = $opt;
607 }
608
609
610 =head2 phrase
611
612 Return search phrase.
613
614 print $cond->phrase;
615
616 =cut
617
618 sub phrase {
619 my $self = shift;
620 return $self->{phrase};
621 }
622
623
624 =head2 order
625
626 Return search result order.
627
628 print $cond->order;
629
630 =cut
631
632 sub order {
633 my $self = shift;
634 return $self->{order};
635 }
636
637
638 =head2 attrs
639
640 Return search result attrs.
641
642 my @cond_attrs = $cond->attrs;
643
644 =cut
645
646 sub attrs {
647 my $self = shift;
648 #croak "attrs return array, not scalar" if (! wantarray);
649 return @{ $self->{attrs} } if ($self->{attrs});
650 }
651
652
653 =head2 max
654
655 Return maximum number of results.
656
657 print $cond->max;
658
659 C<-1> is returned for unitialized value, C<0> is unlimited.
660
661 =cut
662
663 sub max {
664 my $self = shift;
665 return $self->{max};
666 }
667
668
669 =head2 options
670
671 Return options for this condition.
672
673 print $cond->options;
674
675 Options are returned in numerical form.
676
677 =cut
678
679 sub options {
680 my $self = shift;
681 return $self->{options};
682 }
683
684
685 =head2 set_skip
686
687 Set number of skipped documents from beginning of results
688
689 $cond->set_skip(42);
690
691 Similar to C<offset> in RDBMS.
692
693 =cut
694
695 sub set_skip {
696 my $self = shift;
697 $self->{skip} = shift;
698 }
699
700 =head2 skip
701
702 Return skip for this condition.
703
704 print $cond->skip;
705
706 =cut
707
708 sub skip {
709 my $self = shift;
710 return $self->{skip};
711 }
712
713
714 =head2 set_distinct
715
716 $cond->set_distinct('@author');
717
718 =cut
719
720 sub set_distinct {
721 my $self = shift;
722 $self->{distinct} = shift;
723 }
724
725 =head2 distinct
726
727 Return distinct attribute
728
729 print $cond->distinct;
730
731 =cut
732
733 sub distinct {
734 my $self = shift;
735 return $self->{distinct};
736 }
737
738 =head2 set_mask
739
740 Filter out some links when searching.
741
742 Argument array of link numbers, starting with 0 (current node).
743
744 $cond->set_mask(qw/0 1 4/);
745
746 =cut
747
748 sub set_mask {
749 my $self = shift;
750 return unless (@_);
751 $self->{mask} = \@_;
752 }
753
754
755 package Search::Estraier::ResultDocument;
756
757 use Carp qw/croak/;
758
759 #use Search::Estraier;
760 #our @ISA = qw/Search::Estraier/;
761
762 =head1 Search::Estraier::ResultDocument
763
764 =head2 new
765
766 my $rdoc = new Search::HyperEstraier::ResultDocument(
767 uri => 'http://localhost/document/uri/42',
768 attrs => {
769 foo => 1,
770 bar => 2,
771 },
772 snippet => 'this is a text of snippet'
773 keywords => 'this\tare\tkeywords'
774 );
775
776 =cut
777
778 sub new {
779 my $class = shift;
780 my $self = {@_};
781 bless($self, $class);
782
783 croak "missing uri for ResultDocument" unless defined($self->{uri});
784
785 $self ? return $self : return undef;
786 }
787
788
789 =head2 uri
790
791 Return URI of result document
792
793 print $rdoc->uri;
794
795 =cut
796
797 sub uri {
798 my $self = shift;
799 return $self->{uri};
800 }
801
802
803 =head2 attr_names
804
805 Returns array with attribute names from result document object.
806
807 my @attrs = $rdoc->attr_names;
808
809 =cut
810
811 sub attr_names {
812 my $self = shift;
813 croak "attr_names return array, not scalar" if (! wantarray);
814 return sort keys %{ $self->{attrs} };
815 }
816
817
818 =head2 attr
819
820 Returns value of an attribute.
821
822 my $value = $rdoc->attr( 'attribute' );
823
824 =cut
825
826 sub attr {
827 my $self = shift;
828 my $name = shift || return;
829 return $self->{attrs}->{ $name };
830 }
831
832
833 =head2 snippet
834
835 Return snippet from result document
836
837 print $rdoc->snippet;
838
839 =cut
840
841 sub snippet {
842 my $self = shift;
843 return $self->{snippet};
844 }
845
846
847 =head2 keywords
848
849 Return keywords from result document
850
851 print $rdoc->keywords;
852
853 =cut
854
855 sub keywords {
856 my $self = shift;
857 return $self->{keywords};
858 }
859
860
861 package Search::Estraier::NodeResult;
862
863 use Carp qw/croak/;
864
865 #use Search::Estraier;
866 #our @ISA = qw/Search::Estraier/;
867
868 =head1 Search::Estraier::NodeResult
869
870 =head2 new
871
872 my $res = new Search::HyperEstraier::NodeResult(
873 docs => @array_of_rdocs,
874 hits => %hash_with_hints,
875 );
876
877 =cut
878
879 sub new {
880 my $class = shift;
881 my $self = {@_};
882 bless($self, $class);
883
884 foreach my $f (qw/docs hints/) {
885 croak "missing $f for ResultDocument" unless defined($self->{$f});
886 }
887
888 $self ? return $self : return undef;
889 }
890
891
892 =head2 doc_num
893
894 Return number of documents
895
896 print $res->doc_num;
897
898 This will return real number of documents (limited by C<max>).
899 If you want to get total number of hits, see C<hits>.
900
901 =cut
902
903 sub doc_num {
904 my $self = shift;
905 return $#{$self->{docs}} + 1;
906 }
907
908
909 =head2 get_doc
910
911 Return single document
912
913 my $doc = $res->get_doc( 42 );
914
915 Returns undef if document doesn't exist.
916
917 =cut
918
919 sub get_doc {
920 my $self = shift;
921 my $num = shift;
922 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
923 return undef if ($num < 0 || $num > $self->{docs});
924 return $self->{docs}->[$num];
925 }
926
927
928 =head2 hint
929
930 Return specific hint from results.
931
932 print $res->hint( 'VERSION' );
933
934 Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
935 C<TIME>, C<LINK#n>, C<VIEW>.
936
937 =cut
938
939 sub hint {
940 my $self = shift;
941 my $key = shift || return;
942 return $self->{hints}->{$key};
943 }
944
945 =head2 hints
946
947 More perlish version of C<hint>. This one returns hash.
948
949 my %hints = $res->hints;
950
951 =cut
952
953 sub hints {
954 my $self = shift;
955 return $self->{hints};
956 }
957
958 =head2 hits
959
960 Syntaxtic sugar for total number of hits for this query
961
962 print $res->hits;
963
964 It's same as
965
966 print $res->hint('HIT');
967
968 but shorter.
969
970 =cut
971
972 sub hits {
973 my $self = shift;
974 return $self->{hints}->{'HIT'} || 0;
975 }
976
977 package Search::Estraier::Node;
978
979 use Carp qw/carp croak confess/;
980 use URI;
981 use MIME::Base64;
982 use IO::Socket::INET;
983 use URI::Escape qw/uri_escape/;
984
985 =head1 Search::Estraier::Node
986
987 =head2 new
988
989 my $node = new Search::HyperEstraier::Node;
990
991 or optionally with C<url> as parametar
992
993 my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
994
995 or in more verbose form
996
997 my $node = new Search::HyperEstraier::Node(
998 url => 'http://localhost:1978/node/test',
999 user => 'admin',
1000 passwd => 'admin'
1001 create => 1,
1002 label => 'optional node label',
1003 debug => 1,
1004 croak_on_error => 1
1005 );
1006
1007 with following arguments:
1008
1009 =over 4
1010
1011 =item url
1012
1013 URL to node
1014
1015 =item user
1016
1017 specify username for node server authentication
1018
1019 =item passwd
1020
1021 password for authentication
1022
1023 =item create
1024
1025 create node if it doesn't exists
1026
1027 =item label
1028
1029 optional label for new node if C<create> is used
1030
1031 =item debug
1032
1033 dumps a B<lot> of debugging output
1034
1035 =item croak_on_error
1036
1037 very helpful during development. It will croak on all errors instead of
1038 silently returning C<-1> (which is convention of Hyper Estraier API in other
1039 languages).
1040
1041 =back
1042
1043 =cut
1044
1045 sub new {
1046 my $class = shift;
1047 my $self = {
1048 pxport => -1,
1049 timeout => 0, # this used to be -1
1050 wwidth => 480,
1051 hwidth => 96,
1052 awidth => 96,
1053 status => -1,
1054 };
1055
1056 bless($self, $class);
1057
1058 if ($#_ == 0) {
1059 $self->{url} = shift;
1060 } else {
1061 %$self = ( %$self, @_ );
1062
1063 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
1064
1065 warn "## Node debug on\n" if ($self->{debug});
1066 }
1067
1068 $self->{inform} = {
1069 dnum => -1,
1070 wnum => -1,
1071 size => -1.0,
1072 };
1073
1074 if ($self->{create}) {
1075 if (! eval { $self->name } || $@) {
1076 my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
1077 croak "can't find node name in '$self->{url}'" unless ($name);
1078 my $label = $self->{label} || $name;
1079 $self->master(
1080 action => 'nodeadd',
1081 name => $name,
1082 label => $label,
1083 ) || croak "can't create node $name ($label)";
1084 }
1085 }
1086
1087 $self ? return $self : return undef;
1088 }
1089
1090
1091 =head2 set_url
1092
1093 Specify URL to node server
1094
1095 $node->set_url('http://localhost:1978');
1096
1097 =cut
1098
1099 sub set_url {
1100 my $self = shift;
1101 $self->{url} = shift;
1102 }
1103
1104
1105 =head2 set_proxy
1106
1107 Specify proxy server to connect to node server
1108
1109 $node->set_proxy('proxy.example.com', 8080);
1110
1111 =cut
1112
1113 sub set_proxy {
1114 my $self = shift;
1115 my ($host,$port) = @_;
1116 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
1117 $self->{pxhost} = $host;
1118 $self->{pxport} = $port;
1119 }
1120
1121
1122 =head2 set_timeout
1123
1124 Specify timeout of connection in seconds
1125
1126 $node->set_timeout( 15 );
1127
1128 =cut
1129
1130 sub set_timeout {
1131 my $self = shift;
1132 my $sec = shift;
1133 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
1134 $self->{timeout} = $sec;
1135 }
1136
1137
1138 =head2 set_auth
1139
1140 Specify name and password for authentication to node server.
1141
1142 $node->set_auth('clint','eastwood');
1143
1144 =cut
1145
1146 sub set_auth {
1147 my $self = shift;
1148 my ($login,$passwd) = @_;
1149 my $basic_auth = encode_base64( "$login:$passwd" );
1150 chomp($basic_auth);
1151 $self->{auth} = $basic_auth;
1152 }
1153
1154
1155 =head2 status
1156
1157 Return status code of last request.
1158
1159 print $node->status;
1160
1161 C<-1> means connection failure.
1162
1163 =cut
1164
1165 sub status {
1166 my $self = shift;
1167 return $self->{status};
1168 }
1169
1170
1171 =head2 put_doc
1172
1173 Add a document
1174
1175 $node->put_doc( $document_draft ) or die "can't add document";
1176
1177 Return true on success or false on failure.
1178
1179 =cut
1180
1181 sub put_doc {
1182 my $self = shift;
1183 my $doc = shift || return;
1184 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1185 if ($self->shuttle_url( $self->{url} . '/put_doc',
1186 'text/x-estraier-draft',
1187 $doc->dump_draft,
1188 undef
1189 ) == 200) {
1190 $self->_clear_info;
1191 return 1;
1192 }
1193 return undef;
1194 }
1195
1196
1197 =head2 out_doc
1198
1199 Remove a document
1200
1201 $node->out_doc( document_id ) or "can't remove document";
1202
1203 Return true on success or false on failture.
1204
1205 =cut
1206
1207 sub out_doc {
1208 my $self = shift;
1209 my $id = shift || return;
1210 return unless ($self->{url});
1211 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1212 if ($self->shuttle_url( $self->{url} . '/out_doc',
1213 'application/x-www-form-urlencoded',
1214 "id=$id",
1215 undef
1216 ) == 200) {
1217 $self->_clear_info;
1218 return 1;
1219 }
1220 return undef;
1221 }
1222
1223
1224 =head2 out_doc_by_uri
1225
1226 Remove a registrated document using it's uri
1227
1228 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1229
1230 Return true on success or false on failture.
1231
1232 =cut
1233
1234 sub out_doc_by_uri {
1235 my $self = shift;
1236 my $uri = shift || return;
1237 return unless ($self->{url});
1238 if ($self->shuttle_url( $self->{url} . '/out_doc',
1239 'application/x-www-form-urlencoded',
1240 "uri=" . uri_escape($uri),
1241 undef
1242 ) == 200) {
1243 $self->_clear_info;
1244 return 1;
1245 }
1246 return undef;
1247 }
1248
1249
1250 =head2 edit_doc
1251
1252 Edit attributes of a document
1253
1254 $node->edit_doc( $document_draft ) or die "can't edit document";
1255
1256 Return true on success or false on failture.
1257
1258 =cut
1259
1260 sub edit_doc {
1261 my $self = shift;
1262 my $doc = shift || return;
1263 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1264 if ($self->shuttle_url( $self->{url} . '/edit_doc',
1265 'text/x-estraier-draft',
1266 $doc->dump_draft,
1267 undef
1268 ) == 200) {
1269 $self->_clear_info;
1270 return 1;
1271 }
1272 return undef;
1273 }
1274
1275
1276 =head2 get_doc
1277
1278 Retreive document
1279
1280 my $doc = $node->get_doc( document_id ) or die "can't get document";
1281
1282 Return true on success or false on failture.
1283
1284 =cut
1285
1286 sub get_doc {
1287 my $self = shift;
1288 my $id = shift || return;
1289 return $self->_fetch_doc( id => $id );
1290 }
1291
1292
1293 =head2 get_doc_by_uri
1294
1295 Retreive document
1296
1297 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1298
1299 Return true on success or false on failture.
1300
1301 =cut
1302
1303 sub get_doc_by_uri {
1304 my $self = shift;
1305 my $uri = shift || return;
1306 return $self->_fetch_doc( uri => $uri );
1307 }
1308
1309
1310 =head2 get_doc_attr
1311
1312 Retrieve the value of an atribute from object
1313
1314 my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1315 die "can't get document attribute";
1316
1317 =cut
1318
1319 sub get_doc_attr {
1320 my $self = shift;
1321 my ($id,$name) = @_;
1322 return unless ($id && $name);
1323 return $self->_fetch_doc( id => $id, attr => $name );
1324 }
1325
1326
1327 =head2 get_doc_attr_by_uri
1328
1329 Retrieve the value of an atribute from object
1330
1331 my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1332 die "can't get document attribute";
1333
1334 =cut
1335
1336 sub get_doc_attr_by_uri {
1337 my $self = shift;
1338 my ($uri,$name) = @_;
1339 return unless ($uri && $name);
1340 return $self->_fetch_doc( uri => $uri, attr => $name );
1341 }
1342
1343
1344 =head2 etch_doc
1345
1346 Exctract document keywords
1347
1348 my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1349
1350 =cut
1351
1352 sub etch_doc {
1353 my $self = shift;
1354 my $id = shift || return;
1355 return $self->_fetch_doc( id => $id, etch => 1 );
1356 }
1357
1358 =head2 etch_doc_by_uri
1359
1360 Retreive document
1361
1362 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1363
1364 Return true on success or false on failture.
1365
1366 =cut
1367
1368 sub etch_doc_by_uri {
1369 my $self = shift;
1370 my $uri = shift || return;
1371 return $self->_fetch_doc( uri => $uri, etch => 1 );
1372 }
1373
1374
1375 =head2 uri_to_id
1376
1377 Get ID of document specified by URI
1378
1379 my $id = $node->uri_to_id( 'file:///document/uri/42' );
1380
1381 This method won't croak, even if using C<croak_on_error>.
1382
1383 =cut
1384
1385 sub uri_to_id {
1386 my $self = shift;
1387 my $uri = shift || return;
1388 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1389 }
1390
1391
1392 =head2 _fetch_doc
1393
1394 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1395 C<etch_doc>, C<etch_doc_by_uri>.
1396
1397 # this will decode received draft into Search::Estraier::Document object
1398 my $doc = $node->_fetch_doc( id => 42 );
1399 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1400
1401 # to extract keywords, add etch
1402 my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1403 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1404
1405 # to get document attrubute add attr
1406 my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1407 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1408
1409 # more general form which allows implementation of
1410 # uri_to_id
1411 my $id = $node->_fetch_doc(
1412 uri => 'file:///document/uri/42',
1413 path => '/uri_to_id',
1414 chomp_resbody => 1
1415 );
1416
1417 =cut
1418
1419 sub _fetch_doc {
1420 my $self = shift;
1421 my $a = {@_};
1422 return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1423
1424 my ($arg, $resbody);
1425
1426 my $path = $a->{path} || '/get_doc';
1427 $path = '/etch_doc' if ($a->{etch});
1428
1429 if ($a->{id}) {
1430 croak "id must be number not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1431 $arg = 'id=' . $a->{id};
1432 } elsif ($a->{uri}) {
1433 $arg = 'uri=' . uri_escape($a->{uri});
1434 } else {
1435 confess "unhandled argument. Need id or uri.";
1436 }
1437
1438 if ($a->{attr}) {
1439 $path = '/get_doc_attr';
1440 $arg .= '&attr=' . uri_escape($a->{attr});
1441 $a->{chomp_resbody} = 1;
1442 }
1443
1444 my $rv = $self->shuttle_url( $self->{url} . $path,
1445 'application/x-www-form-urlencoded',
1446 $arg,
1447 \$resbody,
1448 $a->{croak_on_error},
1449 );
1450
1451 return if ($rv != 200);
1452
1453 if ($a->{etch}) {
1454 $self->{kwords} = {};
1455 return +{} unless ($resbody);
1456 foreach my $l (split(/\n/, $resbody)) {
1457 my ($k,$v) = split(/\t/, $l, 2);
1458 $self->{kwords}->{$k} = $v if ($v);
1459 }
1460 return $self->{kwords};
1461 } elsif ($a->{chomp_resbody}) {
1462 return unless (defined($resbody));
1463 chomp($resbody);
1464 return $resbody;
1465 } else {
1466 return new Search::Estraier::Document($resbody);
1467 }
1468 }
1469
1470
1471 =head2 name
1472
1473 my $node_name = $node->name;
1474
1475 =cut
1476
1477 sub name {
1478 my $self = shift;
1479 $self->_set_info unless ($self->{inform}->{name});
1480 return $self->{inform}->{name};
1481 }
1482
1483
1484 =head2 label
1485
1486 my $node_label = $node->label;
1487
1488 =cut
1489
1490 sub label {
1491 my $self = shift;
1492 $self->_set_info unless ($self->{inform}->{label});
1493 return $self->{inform}->{label};
1494 }
1495
1496
1497 =head2 doc_num
1498
1499 my $documents_in_node = $node->doc_num;
1500
1501 =cut
1502
1503 sub doc_num {
1504 my $self = shift;
1505 $self->_set_info if ($self->{inform}->{dnum} < 0);
1506 return $self->{inform}->{dnum};
1507 }
1508
1509
1510 =head2 word_num
1511
1512 my $words_in_node = $node->word_num;
1513
1514 =cut
1515
1516 sub word_num {
1517 my $self = shift;
1518 $self->_set_info if ($self->{inform}->{wnum} < 0);
1519 return $self->{inform}->{wnum};
1520 }
1521
1522
1523 =head2 size
1524
1525 my $node_size = $node->size;
1526
1527 =cut
1528
1529 sub size {
1530 my $self = shift;
1531 $self->_set_info if ($self->{inform}->{size} < 0);
1532 return $self->{inform}->{size};
1533 }
1534
1535
1536 =head2 search
1537
1538 Search documents which match condition
1539
1540 my $nres = $node->search( $cond, $depth );
1541
1542 C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1543 depth for meta search.
1544
1545 Function results C<Search::Estraier::NodeResult> object.
1546
1547 =cut
1548
1549 sub search {
1550 my $self = shift;
1551 my ($cond, $depth) = @_;
1552 return unless ($cond && defined($depth) && $self->{url});
1553 croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1554 croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1555
1556 my $resbody;
1557
1558 my $rv = $self->shuttle_url( $self->{url} . '/search',
1559 'application/x-www-form-urlencoded',
1560 $self->cond_to_query( $cond, $depth ),
1561 \$resbody,
1562 );
1563 return if ($rv != 200);
1564
1565 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1566 my $hintsText = splice @records, 0, 2; # starts with empty record
1567 my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1568
1569 # process records
1570 my $docs = [];
1571 foreach my $record (@records)
1572 {
1573 # split into keys and snippets
1574 my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1575
1576 # create document hash
1577 my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1578 $doc->{'@keywords'} = $doc->{keywords};
1579 ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1580 $doc->{snippet} = $snippet;
1581
1582 push @$docs, new Search::Estraier::ResultDocument(
1583 attrs => $doc,
1584 uri => $doc->{'@uri'},
1585 snippet => $snippet,
1586 keywords => $doc->{'keywords'},
1587 );
1588 }
1589
1590 return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1591 }
1592
1593
1594 =head2 cond_to_query
1595
1596 Return URI encoded string generated from Search::Estraier::Condition
1597
1598 my $args = $node->cond_to_query( $cond, $depth );
1599
1600 =cut
1601
1602 sub cond_to_query {
1603 my $self = shift;
1604
1605 my $cond = shift || return;
1606 croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1607 my $depth = shift;
1608
1609 my @args;
1610
1611 if (my $phrase = $cond->phrase) {
1612 push @args, 'phrase=' . uri_escape($phrase);
1613 }
1614
1615 if (my @attrs = $cond->attrs) {
1616 for my $i ( 0 .. $#attrs ) {
1617 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1618 }
1619 }
1620
1621 if (my $order = $cond->order) {
1622 push @args, 'order=' . uri_escape($order);
1623 }
1624
1625 if (my $max = $cond->max) {
1626 push @args, 'max=' . $max;
1627 } else {
1628 push @args, 'max=' . (1 << 30);
1629 }
1630
1631 if (my $options = $cond->options) {
1632 push @args, 'options=' . $options;
1633 }
1634
1635 push @args, 'depth=' . $depth if ($depth);
1636 push @args, 'wwidth=' . $self->{wwidth};
1637 push @args, 'hwidth=' . $self->{hwidth};
1638 push @args, 'awidth=' . $self->{awidth};
1639 push @args, 'skip=' . $cond->{skip} if ($cond->{skip});
1640
1641 if (my $distinct = $cond->distinct) {
1642 push @args, 'distinct=' . uri_escape($distinct);
1643 }
1644
1645 if ($cond->{mask}) {
1646 my $mask = 0;
1647 map { $mask += ( 2 ** $_ ) } @{ $cond->{mask} };
1648
1649 push @args, 'mask=' . $mask if ($mask);
1650 }
1651
1652 return join('&', @args);
1653 }
1654
1655
1656 =head2 shuttle_url
1657
1658 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1659 master.
1660
1661 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1662
1663 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1664 body will be saved within object.
1665
1666 =cut
1667
1668 use LWP::UserAgent;
1669
1670 sub shuttle_url {
1671 my $self = shift;
1672
1673 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1674
1675 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1676
1677 $self->{status} = -1;
1678
1679 warn "## $url\n" if ($self->{debug});
1680
1681 $url = new URI($url);
1682 if (
1683 !$url || !$url->scheme || !$url->scheme eq 'http' ||
1684 !$url->host || !$url->port || $url->port < 1
1685 ) {
1686 carp "can't parse $url\n";
1687 return -1;
1688 }
1689
1690 my $ua = LWP::UserAgent->new;
1691 $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1692
1693 my $req;
1694 if ($reqbody) {
1695 $req = HTTP::Request->new(POST => $url);
1696 } else {
1697 $req = HTTP::Request->new(GET => $url);
1698 }
1699
1700 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1701 $req->headers->header( 'Connection', 'close' );
1702 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1703 $req->content_type( $content_type );
1704
1705 warn $req->headers->as_string,"\n" if ($self->{debug});
1706
1707 if ($reqbody) {
1708 warn "$reqbody\n" if ($self->{debug});
1709 $req->content( $reqbody );
1710 }
1711
1712 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1713
1714 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1715
1716 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1717
1718 if (! $res->is_success) {
1719 if ($croak_on_error) {
1720 croak("can't get $url: ",$res->status_line);
1721 } else {
1722 return -1;
1723 }
1724 }
1725
1726 $$resbody .= $res->content;
1727
1728 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1729
1730 return $self->{status};
1731 }
1732
1733
1734 =head2 set_snippet_width
1735
1736 Set width of snippets in results
1737
1738 $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1739
1740 C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1741 is not sent with results. If it is negative, whole document text is sent instead of snippet.
1742
1743 C<$hwidth> specified width of strings from beginning of string. Default
1744 value is C<96>. Negative or zero value keep previous value.
1745
1746 C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1747 If negative of zero value is provided previous value is kept unchanged.
1748
1749 =cut
1750
1751 sub set_snippet_width {
1752 my $self = shift;
1753
1754 my ($wwidth, $hwidth, $awidth) = @_;
1755 $self->{wwidth} = $wwidth;
1756 $self->{hwidth} = $hwidth if ($hwidth >= 0);
1757 $self->{awidth} = $awidth if ($awidth >= 0);
1758 }
1759
1760
1761 =head2 set_user
1762
1763 Manage users of node
1764
1765 $node->set_user( 'name', $mode );
1766
1767 C<$mode> can be one of:
1768
1769 =over 4
1770
1771 =item 0
1772
1773 delete account
1774
1775 =item 1
1776
1777 set administrative right for user
1778
1779 =item 2
1780
1781 set user account as guest
1782
1783 =back
1784
1785 Return true on success, otherwise false.
1786
1787 =cut
1788
1789 sub set_user {
1790 my $self = shift;
1791 my ($name, $mode) = @_;
1792
1793 return unless ($self->{url});
1794 croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1795
1796 $self->shuttle_url( $self->{url} . '/_set_user',
1797 'application/x-www-form-urlencoded',
1798 'name=' . uri_escape($name) . '&mode=' . $mode,
1799 undef
1800 ) == 200;
1801 }
1802
1803
1804 =head2 set_link
1805
1806 Manage node links
1807
1808 $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1809
1810 If C<$credit> is negative, link is removed.
1811
1812 =cut
1813
1814 sub set_link {
1815 my $self = shift;
1816 my ($url, $label, $credit) = @_;
1817
1818 return unless ($self->{url});
1819 croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1820
1821 my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1822 $reqbody .= '&credit=' . $credit if ($credit > 0);
1823
1824 if ($self->shuttle_url( $self->{url} . '/_set_link',
1825 'application/x-www-form-urlencoded',
1826 $reqbody,
1827 undef
1828 ) == 200) {
1829 # refresh node info after adding link
1830 $self->_clear_info;
1831 return 1;
1832 }
1833 return undef;
1834 }
1835
1836 =head2 admins
1837
1838 my @admins = @{ $node->admins };
1839
1840 Return array of users with admin rights on node
1841
1842 =cut
1843
1844 sub admins {
1845 my $self = shift;
1846 $self->_set_info unless ($self->{inform}->{name});
1847 return $self->{inform}->{admins};
1848 }
1849
1850 =head2 guests
1851
1852 my @guests = @{ $node->guests };
1853
1854 Return array of users with guest rights on node
1855
1856 =cut
1857
1858 sub guests {
1859 my $self = shift;
1860 $self->_set_info unless ($self->{inform}->{name});
1861 return $self->{inform}->{guests};
1862 }
1863
1864 =head2 links
1865
1866 my $links = @{ $node->links };
1867
1868 Return array of links for this node
1869
1870 =cut
1871
1872 sub links {
1873 my $self = shift;
1874 $self->_set_info unless ($self->{inform}->{name});
1875 return $self->{inform}->{links};
1876 }
1877
1878 =head2 cacheusage
1879
1880 Return cache usage for a node
1881
1882 my $cache = $node->cacheusage;
1883
1884 =cut
1885
1886 sub cacheusage {
1887 my $self = shift;
1888
1889 return unless ($self->{url});
1890
1891 my $resbody;
1892 my $rv = $self->shuttle_url( $self->{url} . '/cacheusage',
1893 'text/plain',
1894 undef,
1895 \$resbody,
1896 );
1897
1898 return if ($rv != 200 || !$resbody);
1899
1900 return $resbody;
1901 }
1902
1903 =head2 master
1904
1905 Set actions on Hyper Estraier node master (C<estmaster> process)
1906
1907 $node->master(
1908 action => 'sync'
1909 );
1910
1911 All available actions are documented in
1912 L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1913
1914 =cut
1915
1916 my $estmaster_rest = {
1917 shutdown => {
1918 status => 202,
1919 },
1920 sync => {
1921 status => 202,
1922 },
1923 backup => {
1924 status => 202,
1925 },
1926 userlist => {
1927 status => 200,
1928 returns => [ qw/name passwd flags fname misc/ ],
1929 },
1930 useradd => {
1931 required => [ qw/name passwd flags/ ],
1932 optional => [ qw/fname misc/ ],
1933 status => 200,
1934 },
1935 userdel => {
1936 required => [ qw/name/ ],
1937 status => 200,
1938 },
1939 nodelist => {
1940 status => 200,
1941 returns => [ qw/name label doc_num word_num size/ ],
1942 },
1943 nodeadd => {
1944 required => [ qw/name/ ],
1945 optional => [ qw/label/ ],
1946 status => 200,
1947 },
1948 nodedel => {
1949 required => [ qw/name/ ],
1950 status => 200,
1951 },
1952 nodeclr => {
1953 required => [ qw/name/ ],
1954 status => 200,
1955 },
1956 nodertt => {
1957 status => 200,
1958 },
1959 };
1960
1961 sub master {
1962 my $self = shift;
1963
1964 my $args = {@_};
1965
1966 # have action?
1967 my $action = $args->{action} || croak "need action, available: ",
1968 join(", ",keys %{ $estmaster_rest });
1969
1970 # check if action is valid
1971 my $rest = $estmaster_rest->{$action};
1972 croak "action '$action' is not supported, available actions: ",
1973 join(", ",keys %{ $estmaster_rest }) unless ($rest);
1974
1975 croak "BUG: action '$action' needs return status" unless ($rest->{status});
1976
1977 my @args;
1978
1979 if ($rest->{required} || $rest->{optional}) {
1980
1981 map {
1982 croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1983 push @args, $_ . '=' . uri_escape( $args->{$_} );
1984 } ( @{ $rest->{required} } );
1985
1986 map {
1987 push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1988 } ( @{ $rest->{optional} } );
1989
1990 }
1991
1992 my $uri = new URI( $self->{url} );
1993
1994 my $resbody;
1995
1996 my $status = $self->shuttle_url(
1997 'http://' . $uri->host_port . '/master?action=' . $action ,
1998 'application/x-www-form-urlencoded',
1999 join('&', @args),
2000 \$resbody,
2001 1,
2002 ) or confess "shuttle_url failed";
2003
2004 if ($status == $rest->{status}) {
2005
2006 # refresh node info after sync
2007 $self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/);
2008
2009 if ($rest->{returns} && wantarray) {
2010
2011 my @results;
2012 my $fields = $#{$rest->{returns}};
2013
2014 foreach my $line ( split(/[\r\n]/,$resbody) ) {
2015 my @e = split(/\t/, $line, $fields + 1);
2016 my $row;
2017 foreach my $i ( 0 .. $fields) {
2018 $row->{ $rest->{returns}->[$i] } = $e[ $i ];
2019 }
2020 push @results, $row;
2021 }
2022
2023 return @results;
2024
2025 } elsif ($resbody) {
2026 chomp $resbody;
2027 return $resbody;
2028 } else {
2029 return 0E0;
2030 }
2031 }
2032
2033 carp "expected status $rest->{status}, but got $status";
2034 return undef;
2035 }
2036
2037 =head1 PRIVATE METHODS
2038
2039 You could call those directly, but you don't have to. I hope.
2040
2041 =head2 _set_info
2042
2043 Set information for node
2044
2045 $node->_set_info;
2046
2047 =cut
2048
2049 sub _set_info {
2050 my $self = shift;
2051
2052 $self->{status} = -1;
2053 return unless ($self->{url});
2054
2055 my $resbody;
2056 my $rv = $self->shuttle_url( $self->{url} . '/inform',
2057 'text/plain',
2058 undef,
2059 \$resbody,
2060 );
2061
2062 return if ($rv != 200 || !$resbody);
2063
2064 my @lines = split(/[\r\n]/,$resbody);
2065
2066 $self->_clear_info;
2067
2068 ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
2069 $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
2070
2071 return $resbody unless (@lines);
2072
2073 shift @lines;
2074
2075 while(my $admin = shift @lines) {
2076 push @{$self->{inform}->{admins}}, $admin;
2077 }
2078
2079 while(my $guest = shift @lines) {
2080 push @{$self->{inform}->{guests}}, $guest;
2081 }
2082
2083 while(my $link = shift @lines) {
2084 push @{$self->{inform}->{links}}, $link;
2085 }
2086
2087 return $resbody;
2088
2089 }
2090
2091 =head2 _clear_info
2092
2093 Clear information for node
2094
2095 $node->_clear_info;
2096
2097 On next call to C<name>, C<label>, C<doc_num>, C<word_num> or C<size> node
2098 info will be fetch again from Hyper Estraier.
2099
2100 =cut
2101 sub _clear_info {
2102 my $self = shift;
2103 $self->{inform} = {
2104 dnum => -1,
2105 wnum => -1,
2106 size => -1.0,
2107 };
2108 }
2109
2110 ###
2111
2112 =head1 EXPORT
2113
2114 Nothing.
2115
2116 =head1 SEE ALSO
2117
2118 L<http://hyperestraier.sourceforge.net/>
2119
2120 Hyper Estraier Ruby interface on which this module is based.
2121
2122 Hyper Estraier now also has pure-perl binding included in distribution. It's
2123 a faster way to access databases directly if you are not running
2124 C<estmaster> P2P server.
2125
2126 =head1 AUTHOR
2127
2128 Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
2129
2130 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
2131
2132 =head1 COPYRIGHT AND LICENSE
2133
2134 Copyright (C) 2005-2006 by Dobrica Pavlinusic
2135
2136 This library is free software; you can redistribute it and/or modify
2137 it under the GPL v2 or later.
2138
2139 =cut
2140
2141 1;

  ViewVC Help
Powered by ViewVC 1.1.26