/[Search-Estraier]/trunk/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 174 - (show annotations)
Sun Aug 6 18:15:56 2006 UTC (17 years, 7 months ago) by dpavlin
File size: 37133 byte(s)
fixed docs
1 package Search::Estraier;
2
3 use 5.008;
4 use strict;
5 use warnings;
6
7 our $VERSION = '0.07_3';
8
9 =head1 NAME
10
11 Search::Estraier - pure perl module to use Hyper Estraier search engine
12
13 =head1 SYNOPSIS
14
15 =head2 Simple indexer
16
17 use Search::Estraier;
18
19 # create and configure node
20 my $node = new Search::Estraier::Node(
21 url => 'http://localhost:1978/node/test',
22 user => 'admin',
23 passwd => 'admin',
24 create => 1,
25 label => 'Label for node',
26 croak_on_error => 1,
27 );
28
29 # create document
30 my $doc = new Search::Estraier::Document;
31
32 # add attributes
33 $doc->add_attr('@uri', "http://estraier.gov/example.txt");
34 $doc->add_attr('@title', "Over the Rainbow");
35
36 # add body text to document
37 $doc->add_text("Somewhere over the rainbow. Way up high.");
38 $doc->add_text("There's a land that I heard of once in a lullaby.");
39
40 die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
41
42 =head2 Simple searcher
43
44 use Search::Estraier;
45
46 # create and configure node
47 my $node = new Search::Estraier::Node(
48 url => 'http://localhost:1978/node/test',
49 user => 'admin',
50 passwd => 'admin',
51 croak_on_error => 1,
52 );
53
54 # create condition
55 my $cond = new Search::Estraier::Condition;
56
57 # set search phrase
58 $cond->set_phrase("rainbow AND lullaby");
59
60 my $nres = $node->search($cond, 0);
61
62 if (defined($nres)) {
63 print "Got ", $nres->hits, " results\n";
64
65 # for each document in results
66 for my $i ( 0 ... $nres->doc_num - 1 ) {
67 # get result document
68 my $rdoc = $nres->get_doc($i);
69 # display attribte
70 print "URI: ", $rdoc->attr('@uri'),"\n";
71 print "Title: ", $rdoc->attr('@title'),"\n";
72 print $rdoc->snippet,"\n";
73 }
74 } else {
75 die "error: ", $node->status,"\n";
76 }
77
78 =head1 DESCRIPTION
79
80 This module is implementation of node API of Hyper Estraier. Since it's
81 perl-only module with dependencies only on standard perl modules, it will
82 run on all platforms on which perl runs. It doesn't require compilation
83 or Hyper Estraier development files on target machine.
84
85 It is implemented as multiple packages which closly resamble Ruby
86 implementation. It also includes methods to manage nodes.
87
88 There are few examples in C<scripts> directory of this distribution.
89
90 =cut
91
92 =head1 Inheritable common methods
93
94 This methods should really move somewhere else.
95
96 =head2 _s
97
98 Remove multiple whitespaces from string, as well as whitespaces at beginning or end
99
100 my $text = $self->_s(" this is a text ");
101 $text = 'this is a text';
102
103 =cut
104
105 sub _s {
106 my $text = $_[1];
107 return unless defined($text);
108 $text =~ s/\s\s+/ /gs;
109 $text =~ s/^\s+//;
110 $text =~ s/\s+$//;
111 return $text;
112 }
113
114 package Search::Estraier::Document;
115
116 use Carp qw/croak confess/;
117
118 use Search::Estraier;
119 our @ISA = qw/Search::Estraier/;
120
121 =head1 Search::Estraier::Document
122
123 This class implements Document which is single item in Hyper Estraier.
124
125 It's is collection of:
126
127 =over 4
128
129 =item attributes
130
131 C<< 'key' => 'value' >> pairs which can later be used for filtering of results
132
133 You can add common filters to C<attrindex> in estmaster's C<_conf>
134 file for better performance. See C<attrindex> in
135 L<Hyper Estraier P2P Guide|http://hyperestraier.sourceforge.net/nguide-en.html>.
136
137 =item vectors
138
139 also C<< 'key' => 'value' >> pairs
140
141 =item display text
142
143 Text which will be used to create searchable corpus of your index and
144 included in snippet output.
145
146 =item hidden text
147
148 Text which will be searchable, but will not be included in snippet.
149
150 =back
151
152 =head2 new
153
154 Create new document, empty or from draft.
155
156 my $doc = new Search::HyperEstraier::Document;
157 my $doc2 = new Search::HyperEstraier::Document( $draft );
158
159 =cut
160
161 sub new {
162 my $class = shift;
163 my $self = {};
164 bless($self, $class);
165
166 $self->{id} = -1;
167
168 my $draft = shift;
169
170 if ($draft) {
171 my $in_text = 0;
172 foreach my $line (split(/\n/, $draft)) {
173
174 if ($in_text) {
175 if ($line =~ /^\t/) {
176 push @{ $self->{htexts} }, substr($line, 1);
177 } else {
178 push @{ $self->{dtexts} }, $line;
179 }
180 next;
181 }
182
183 if ($line =~ m/^%VECTOR\t(.+)$/) {
184 my @fields = split(/\t/, $1);
185 if ($#fields % 2 == 1) {
186 $self->{kwords} = { @fields };
187 } else {
188 warn "can't decode $line\n";
189 }
190 next;
191 } elsif ($line =~ m/^%/) {
192 # What is this? comment?
193 #warn "$line\n";
194 next;
195 } elsif ($line =~ m/^$/) {
196 $in_text = 1;
197 next;
198 } elsif ($line =~ m/^(.+)=(.*)$/) {
199 $self->{attrs}->{ $1 } = $2;
200 next;
201 }
202
203 warn "draft ignored: '$line'\n";
204 }
205 }
206
207 $self ? return $self : return undef;
208 }
209
210
211 =head2 add_attr
212
213 Add an attribute.
214
215 $doc->add_attr( name => 'value' );
216
217 Delete attribute using
218
219 $doc->add_attr( name => undef );
220
221 =cut
222
223 sub add_attr {
224 my $self = shift;
225 my $attrs = {@_};
226
227 while (my ($name, $value) = each %{ $attrs }) {
228 if (! defined($value)) {
229 delete( $self->{attrs}->{ $self->_s($name) } );
230 } else {
231 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
232 }
233 }
234
235 return 1;
236 }
237
238
239 =head2 add_text
240
241 Add a sentence of text.
242
243 $doc->add_text('this is example text to display');
244
245 =cut
246
247 sub add_text {
248 my $self = shift;
249 my $text = shift;
250 return unless defined($text);
251
252 push @{ $self->{dtexts} }, $self->_s($text);
253 }
254
255
256 =head2 add_hidden_text
257
258 Add a hidden sentence.
259
260 $doc->add_hidden_text('this is example text just for search');
261
262 =cut
263
264 sub add_hidden_text {
265 my $self = shift;
266 my $text = shift;
267 return unless defined($text);
268
269 push @{ $self->{htexts} }, $self->_s($text);
270 }
271
272 =head2 add_vectors
273
274 Add a vectors
275
276 $doc->add_vector(
277 'vector_name' => 42,
278 'another' => 12345,
279 );
280
281 =cut
282
283 sub add_vectors {
284 my $self = shift;
285 return unless (@_);
286
287 # this is ugly, but works
288 die "add_vector needs HASH as argument" unless ($#_ % 2 == 1);
289
290 $self->{kwords} = {@_};
291 }
292
293
294 =head2 id
295
296 Get the ID number of document. If the object has never been registred, C<-1> is returned.
297
298 print $doc->id;
299
300 =cut
301
302 sub id {
303 my $self = shift;
304 return $self->{id};
305 }
306
307
308 =head2 attr_names
309
310 Returns array with attribute names from document object.
311
312 my @attrs = $doc->attr_names;
313
314 =cut
315
316 sub attr_names {
317 my $self = shift;
318 return unless ($self->{attrs});
319 #croak "attr_names return array, not scalar" if (! wantarray);
320 return sort keys %{ $self->{attrs} };
321 }
322
323
324 =head2 attr
325
326 Returns value of an attribute.
327
328 my $value = $doc->attr( 'attribute' );
329
330 =cut
331
332 sub attr {
333 my $self = shift;
334 my $name = shift;
335 return unless (defined($name) && $self->{attrs});
336 return $self->{attrs}->{ $name };
337 }
338
339
340 =head2 texts
341
342 Returns array with text sentences.
343
344 my @texts = $doc->texts;
345
346 =cut
347
348 sub texts {
349 my $self = shift;
350 #confess "texts return array, not scalar" if (! wantarray);
351 return @{ $self->{dtexts} } if ($self->{dtexts});
352 }
353
354
355 =head2 cat_texts
356
357 Return whole text as single scalar.
358
359 my $text = $doc->cat_texts;
360
361 =cut
362
363 sub cat_texts {
364 my $self = shift;
365 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
366 }
367
368
369 =head2 dump_draft
370
371 Dump draft data from document object.
372
373 print $doc->dump_draft;
374
375 =cut
376
377 sub dump_draft {
378 my $self = shift;
379 my $draft;
380
381 foreach my $attr_name (sort keys %{ $self->{attrs} }) {
382 next unless defined(my $v = $self->{attrs}->{$attr_name});
383 $draft .= $attr_name . '=' . $v . "\n";
384 }
385
386 if ($self->{kwords}) {
387 $draft .= '%VECTOR';
388 while (my ($key, $value) = each %{ $self->{kwords} }) {
389 $draft .= "\t$key\t$value";
390 }
391 $draft .= "\n";
392 }
393
394 $draft .= "\n";
395
396 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
397 $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
398
399 return $draft;
400 }
401
402
403 =head2 delete
404
405 Empty document object
406
407 $doc->delete;
408
409 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
410 convinience. Document objects which go out of scope will be destroyed
411 automatically.
412
413 =cut
414
415 sub delete {
416 my $self = shift;
417
418 foreach my $data (qw/attrs dtexts stexts kwords/) {
419 delete($self->{$data});
420 }
421
422 $self->{id} = -1;
423
424 return 1;
425 }
426
427
428
429 package Search::Estraier::Condition;
430
431 use Carp qw/carp confess croak/;
432
433 use Search::Estraier;
434 our @ISA = qw/Search::Estraier/;
435
436 =head1 Search::Estraier::Condition
437
438 =head2 new
439
440 my $cond = new Search::HyperEstraier::Condition;
441
442 =cut
443
444 sub new {
445 my $class = shift;
446 my $self = {};
447 bless($self, $class);
448
449 $self->{max} = -1;
450 $self->{options} = 0;
451
452 $self ? return $self : return undef;
453 }
454
455
456 =head2 set_phrase
457
458 $cond->set_phrase('search phrase');
459
460 =cut
461
462 sub set_phrase {
463 my $self = shift;
464 $self->{phrase} = $self->_s( shift );
465 }
466
467
468 =head2 add_attr
469
470 $cond->add_attr('@URI STRINC /~dpavlin/');
471
472 =cut
473
474 sub add_attr {
475 my $self = shift;
476 my $attr = shift || return;
477 push @{ $self->{attrs} }, $self->_s( $attr );
478 }
479
480
481 =head2 set_order
482
483 $cond->set_order('@mdate NUMD');
484
485 =cut
486
487 sub set_order {
488 my $self = shift;
489 $self->{order} = shift;
490 }
491
492
493 =head2 set_max
494
495 $cond->set_max(42);
496
497 =cut
498
499 sub set_max {
500 my $self = shift;
501 my $max = shift;
502 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
503 $self->{max} = $max;
504 }
505
506
507 =head2 set_options
508
509 $cond->set_options( 'SURE' );
510
511 $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
512
513 Possible options are:
514
515 =over 8
516
517 =item SURE
518
519 check every N-gram
520
521 =item USUAL
522
523 check every second N-gram
524
525 =item FAST
526
527 check every third N-gram
528
529 =item AGITO
530
531 check every fourth N-gram
532
533 =item NOIDF
534
535 don't perform TF-IDF tuning
536
537 =item SIMPLE
538
539 use simplified query phrase
540
541 =back
542
543 Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
544 options;
545
546 This option changed in version C<0.04> of this module. It's backwards compatibile.
547
548 =cut
549
550 my $options = {
551 SURE => 1 << 0,
552 USUAL => 1 << 1,
553 FAST => 1 << 2,
554 AGITO => 1 << 3,
555 NOIDF => 1 << 4,
556 SIMPLE => 1 << 10,
557 };
558
559 sub set_options {
560 my $self = shift;
561 my $opt = 0;
562 foreach my $option (@_) {
563 my $mask;
564 unless ($mask = $options->{$option}) {
565 if ($option eq '1') {
566 next;
567 } else {
568 croak "unknown option $option";
569 }
570 }
571 $opt += $mask;
572 }
573 $self->{options} = $opt;
574 }
575
576
577 =head2 phrase
578
579 Return search phrase.
580
581 print $cond->phrase;
582
583 =cut
584
585 sub phrase {
586 my $self = shift;
587 return $self->{phrase};
588 }
589
590
591 =head2 order
592
593 Return search result order.
594
595 print $cond->order;
596
597 =cut
598
599 sub order {
600 my $self = shift;
601 return $self->{order};
602 }
603
604
605 =head2 attrs
606
607 Return search result attrs.
608
609 my @cond_attrs = $cond->attrs;
610
611 =cut
612
613 sub attrs {
614 my $self = shift;
615 #croak "attrs return array, not scalar" if (! wantarray);
616 return @{ $self->{attrs} } if ($self->{attrs});
617 }
618
619
620 =head2 max
621
622 Return maximum number of results.
623
624 print $cond->max;
625
626 C<-1> is returned for unitialized value, C<0> is unlimited.
627
628 =cut
629
630 sub max {
631 my $self = shift;
632 return $self->{max};
633 }
634
635
636 =head2 options
637
638 Return options for this condition.
639
640 print $cond->options;
641
642 Options are returned in numerical form.
643
644 =cut
645
646 sub options {
647 my $self = shift;
648 return $self->{options};
649 }
650
651
652 =head2 set_skip
653
654 Set number of skipped documents from beginning of results
655
656 $cond->set_skip(42);
657
658 Similar to C<offset> in RDBMS.
659
660 =cut
661
662 sub set_skip {
663 my $self = shift;
664 $self->{skip} = shift;
665 }
666
667 =head2 skip
668
669 Return skip for this condition.
670
671 print $cond->skip;
672
673 =cut
674
675 sub skip {
676 my $self = shift;
677 return $self->{skip};
678 }
679
680 =head2 set_mask
681
682 Filter out some links when searching.
683
684 Argument array of link numbers, starting with 0 (current node).
685
686 $cond->set_mask(qw/0 1 4/);
687
688 =cut
689
690 sub set_mask {
691 my $self = shift;
692 return unless (@_);
693 $self->{mask} = \@_;
694 }
695
696
697 package Search::Estraier::ResultDocument;
698
699 use Carp qw/croak/;
700
701 #use Search::Estraier;
702 #our @ISA = qw/Search::Estraier/;
703
704 =head1 Search::Estraier::ResultDocument
705
706 =head2 new
707
708 my $rdoc = new Search::HyperEstraier::ResultDocument(
709 uri => 'http://localhost/document/uri/42',
710 attrs => {
711 foo => 1,
712 bar => 2,
713 },
714 snippet => 'this is a text of snippet'
715 keywords => 'this\tare\tkeywords'
716 );
717
718 =cut
719
720 sub new {
721 my $class = shift;
722 my $self = {@_};
723 bless($self, $class);
724
725 croak "missing uri for ResultDocument" unless defined($self->{uri});
726
727 $self ? return $self : return undef;
728 }
729
730
731 =head2 uri
732
733 Return URI of result document
734
735 print $rdoc->uri;
736
737 =cut
738
739 sub uri {
740 my $self = shift;
741 return $self->{uri};
742 }
743
744
745 =head2 attr_names
746
747 Returns array with attribute names from result document object.
748
749 my @attrs = $rdoc->attr_names;
750
751 =cut
752
753 sub attr_names {
754 my $self = shift;
755 croak "attr_names return array, not scalar" if (! wantarray);
756 return sort keys %{ $self->{attrs} };
757 }
758
759
760 =head2 attr
761
762 Returns value of an attribute.
763
764 my $value = $rdoc->attr( 'attribute' );
765
766 =cut
767
768 sub attr {
769 my $self = shift;
770 my $name = shift || return;
771 return $self->{attrs}->{ $name };
772 }
773
774
775 =head2 snippet
776
777 Return snippet from result document
778
779 print $rdoc->snippet;
780
781 =cut
782
783 sub snippet {
784 my $self = shift;
785 return $self->{snippet};
786 }
787
788
789 =head2 keywords
790
791 Return keywords from result document
792
793 print $rdoc->keywords;
794
795 =cut
796
797 sub keywords {
798 my $self = shift;
799 return $self->{keywords};
800 }
801
802
803 package Search::Estraier::NodeResult;
804
805 use Carp qw/croak/;
806
807 #use Search::Estraier;
808 #our @ISA = qw/Search::Estraier/;
809
810 =head1 Search::Estraier::NodeResult
811
812 =head2 new
813
814 my $res = new Search::HyperEstraier::NodeResult(
815 docs => @array_of_rdocs,
816 hits => %hash_with_hints,
817 );
818
819 =cut
820
821 sub new {
822 my $class = shift;
823 my $self = {@_};
824 bless($self, $class);
825
826 foreach my $f (qw/docs hints/) {
827 croak "missing $f for ResultDocument" unless defined($self->{$f});
828 }
829
830 $self ? return $self : return undef;
831 }
832
833
834 =head2 doc_num
835
836 Return number of documents
837
838 print $res->doc_num;
839
840 This will return real number of documents (limited by C<max>).
841 If you want to get total number of hits, see C<hits>.
842
843 =cut
844
845 sub doc_num {
846 my $self = shift;
847 return $#{$self->{docs}} + 1;
848 }
849
850
851 =head2 get_doc
852
853 Return single document
854
855 my $doc = $res->get_doc( 42 );
856
857 Returns undef if document doesn't exist.
858
859 =cut
860
861 sub get_doc {
862 my $self = shift;
863 my $num = shift;
864 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
865 return undef if ($num < 0 || $num > $self->{docs});
866 return $self->{docs}->[$num];
867 }
868
869
870 =head2 hint
871
872 Return specific hint from results.
873
874 print $res->hint( 'VERSION' );
875
876 Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
877 C<TIME>, C<LINK#n>, C<VIEW>.
878
879 =cut
880
881 sub hint {
882 my $self = shift;
883 my $key = shift || return;
884 return $self->{hints}->{$key};
885 }
886
887 =head2 hints
888
889 More perlish version of C<hint>. This one returns hash.
890
891 my %hints = $res->hints;
892
893 =cut
894
895 sub hints {
896 my $self = shift;
897 return $self->{hints};
898 }
899
900 =head2 hits
901
902 Syntaxtic sugar for total number of hits for this query
903
904 print $res->hits;
905
906 It's same as
907
908 print $res->hint('HIT');
909
910 but shorter.
911
912 =cut
913
914 sub hits {
915 my $self = shift;
916 return $self->{hints}->{'HIT'} || 0;
917 }
918
919 package Search::Estraier::Node;
920
921 use Carp qw/carp croak confess/;
922 use URI;
923 use MIME::Base64;
924 use IO::Socket::INET;
925 use URI::Escape qw/uri_escape/;
926
927 =head1 Search::Estraier::Node
928
929 =head2 new
930
931 my $node = new Search::HyperEstraier::Node;
932
933 or optionally with C<url> as parametar
934
935 my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
936
937 or in more verbose form
938
939 my $node = new Search::HyperEstraier::Node(
940 url => 'http://localhost:1978/node/test',
941 user => 'admin',
942 passwd => 'admin'
943 create => 1,
944 label => 'optional node label',
945 debug => 1,
946 croak_on_error => 1
947 );
948
949 with following arguments:
950
951 =over 4
952
953 =item url
954
955 URL to node
956
957 =item user
958
959 specify username for node server authentication
960
961 =item passwd
962
963 password for authentication
964
965 =item create
966
967 create node if it doesn't exists
968
969 =item label
970
971 optional label for new node if C<create> is used
972
973 =item debug
974
975 dumps a B<lot> of debugging output
976
977 =item croak_on_error
978
979 very helpful during development. It will croak on all errors instead of
980 silently returning C<-1> (which is convention of Hyper Estraier API in other
981 languages).
982
983 =back
984
985 =cut
986
987 sub new {
988 my $class = shift;
989 my $self = {
990 pxport => -1,
991 timeout => 0, # this used to be -1
992 wwidth => 480,
993 hwidth => 96,
994 awidth => 96,
995 status => -1,
996 };
997
998 bless($self, $class);
999
1000 if ($#_ == 0) {
1001 $self->{url} = shift;
1002 } else {
1003 %$self = ( %$self, @_ );
1004
1005 $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user});
1006
1007 warn "## Node debug on\n" if ($self->{debug});
1008 }
1009
1010 $self->{inform} = {
1011 dnum => -1,
1012 wnum => -1,
1013 size => -1.0,
1014 };
1015
1016 if ($self->{create}) {
1017 if (! eval { $self->name } || $@) {
1018 my $name = $1 if ($self->{url} =~ m#/node/([^/]+)/*#);
1019 croak "can't find node name in '$self->{url}'" unless ($name);
1020 my $label = $self->{label} || $name;
1021 $self->master(
1022 action => 'nodeadd',
1023 name => $name,
1024 label => $label,
1025 ) || croak "can't create node $name ($label)";
1026 }
1027 }
1028
1029 $self ? return $self : return undef;
1030 }
1031
1032
1033 =head2 set_url
1034
1035 Specify URL to node server
1036
1037 $node->set_url('http://localhost:1978');
1038
1039 =cut
1040
1041 sub set_url {
1042 my $self = shift;
1043 $self->{url} = shift;
1044 }
1045
1046
1047 =head2 set_proxy
1048
1049 Specify proxy server to connect to node server
1050
1051 $node->set_proxy('proxy.example.com', 8080);
1052
1053 =cut
1054
1055 sub set_proxy {
1056 my $self = shift;
1057 my ($host,$port) = @_;
1058 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
1059 $self->{pxhost} = $host;
1060 $self->{pxport} = $port;
1061 }
1062
1063
1064 =head2 set_timeout
1065
1066 Specify timeout of connection in seconds
1067
1068 $node->set_timeout( 15 );
1069
1070 =cut
1071
1072 sub set_timeout {
1073 my $self = shift;
1074 my $sec = shift;
1075 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
1076 $self->{timeout} = $sec;
1077 }
1078
1079
1080 =head2 set_auth
1081
1082 Specify name and password for authentication to node server.
1083
1084 $node->set_auth('clint','eastwood');
1085
1086 =cut
1087
1088 sub set_auth {
1089 my $self = shift;
1090 my ($login,$passwd) = @_;
1091 my $basic_auth = encode_base64( "$login:$passwd" );
1092 chomp($basic_auth);
1093 $self->{auth} = $basic_auth;
1094 }
1095
1096
1097 =head2 status
1098
1099 Return status code of last request.
1100
1101 print $node->status;
1102
1103 C<-1> means connection failure.
1104
1105 =cut
1106
1107 sub status {
1108 my $self = shift;
1109 return $self->{status};
1110 }
1111
1112
1113 =head2 put_doc
1114
1115 Add a document
1116
1117 $node->put_doc( $document_draft ) or die "can't add document";
1118
1119 Return true on success or false on failure.
1120
1121 =cut
1122
1123 sub put_doc {
1124 my $self = shift;
1125 my $doc = shift || return;
1126 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1127 if ($self->shuttle_url( $self->{url} . '/put_doc',
1128 'text/x-estraier-draft',
1129 $doc->dump_draft,
1130 undef
1131 ) == 200) {
1132 $self->_clear_info;
1133 return 1;
1134 }
1135 return undef;
1136 }
1137
1138
1139 =head2 out_doc
1140
1141 Remove a document
1142
1143 $node->out_doc( document_id ) or "can't remove document";
1144
1145 Return true on success or false on failture.
1146
1147 =cut
1148
1149 sub out_doc {
1150 my $self = shift;
1151 my $id = shift || return;
1152 return unless ($self->{url});
1153 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
1154 if ($self->shuttle_url( $self->{url} . '/out_doc',
1155 'application/x-www-form-urlencoded',
1156 "id=$id",
1157 undef
1158 ) == 200) {
1159 $self->_clear_info;
1160 return 1;
1161 }
1162 return undef;
1163 }
1164
1165
1166 =head2 out_doc_by_uri
1167
1168 Remove a registrated document using it's uri
1169
1170 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
1171
1172 Return true on success or false on failture.
1173
1174 =cut
1175
1176 sub out_doc_by_uri {
1177 my $self = shift;
1178 my $uri = shift || return;
1179 return unless ($self->{url});
1180 if ($self->shuttle_url( $self->{url} . '/out_doc',
1181 'application/x-www-form-urlencoded',
1182 "uri=" . uri_escape($uri),
1183 undef
1184 ) == 200) {
1185 $self->_clear_info;
1186 return 1;
1187 }
1188 return undef;
1189 }
1190
1191
1192 =head2 edit_doc
1193
1194 Edit attributes of a document
1195
1196 $node->edit_doc( $document_draft ) or die "can't edit document";
1197
1198 Return true on success or false on failture.
1199
1200 =cut
1201
1202 sub edit_doc {
1203 my $self = shift;
1204 my $doc = shift || return;
1205 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
1206 if ($self->shuttle_url( $self->{url} . '/edit_doc',
1207 'text/x-estraier-draft',
1208 $doc->dump_draft,
1209 undef
1210 ) == 200) {
1211 $self->_clear_info;
1212 return 1;
1213 }
1214 return undef;
1215 }
1216
1217
1218 =head2 get_doc
1219
1220 Retreive document
1221
1222 my $doc = $node->get_doc( document_id ) or die "can't get document";
1223
1224 Return true on success or false on failture.
1225
1226 =cut
1227
1228 sub get_doc {
1229 my $self = shift;
1230 my $id = shift || return;
1231 return $self->_fetch_doc( id => $id );
1232 }
1233
1234
1235 =head2 get_doc_by_uri
1236
1237 Retreive document
1238
1239 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
1240
1241 Return true on success or false on failture.
1242
1243 =cut
1244
1245 sub get_doc_by_uri {
1246 my $self = shift;
1247 my $uri = shift || return;
1248 return $self->_fetch_doc( uri => $uri );
1249 }
1250
1251
1252 =head2 get_doc_attr
1253
1254 Retrieve the value of an atribute from object
1255
1256 my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
1257 die "can't get document attribute";
1258
1259 =cut
1260
1261 sub get_doc_attr {
1262 my $self = shift;
1263 my ($id,$name) = @_;
1264 return unless ($id && $name);
1265 return $self->_fetch_doc( id => $id, attr => $name );
1266 }
1267
1268
1269 =head2 get_doc_attr_by_uri
1270
1271 Retrieve the value of an atribute from object
1272
1273 my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
1274 die "can't get document attribute";
1275
1276 =cut
1277
1278 sub get_doc_attr_by_uri {
1279 my $self = shift;
1280 my ($uri,$name) = @_;
1281 return unless ($uri && $name);
1282 return $self->_fetch_doc( uri => $uri, attr => $name );
1283 }
1284
1285
1286 =head2 etch_doc
1287
1288 Exctract document keywords
1289
1290 my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
1291
1292 =cut
1293
1294 sub etch_doc {
1295 my $self = shift;
1296 my $id = shift || return;
1297 return $self->_fetch_doc( id => $id, etch => 1 );
1298 }
1299
1300 =head2 etch_doc_by_uri
1301
1302 Retreive document
1303
1304 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
1305
1306 Return true on success or false on failture.
1307
1308 =cut
1309
1310 sub etch_doc_by_uri {
1311 my $self = shift;
1312 my $uri = shift || return;
1313 return $self->_fetch_doc( uri => $uri, etch => 1 );
1314 }
1315
1316
1317 =head2 uri_to_id
1318
1319 Get ID of document specified by URI
1320
1321 my $id = $node->uri_to_id( 'file:///document/uri/42' );
1322
1323 This method won't croak, even if using C<croak_on_error>.
1324
1325 =cut
1326
1327 sub uri_to_id {
1328 my $self = shift;
1329 my $uri = shift || return;
1330 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1331 }
1332
1333
1334 =head2 _fetch_doc
1335
1336 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1337 C<etch_doc>, C<etch_doc_by_uri>.
1338
1339 # this will decode received draft into Search::Estraier::Document object
1340 my $doc = $node->_fetch_doc( id => 42 );
1341 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1342
1343 # to extract keywords, add etch
1344 my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1345 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1346
1347 # to get document attrubute add attr
1348 my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1349 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1350
1351 # more general form which allows implementation of
1352 # uri_to_id
1353 my $id = $node->_fetch_doc(
1354 uri => 'file:///document/uri/42',
1355 path => '/uri_to_id',
1356 chomp_resbody => 1
1357 );
1358
1359 =cut
1360
1361 sub _fetch_doc {
1362 my $self = shift;
1363 my $a = {@_};
1364 return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1365
1366 my ($arg, $resbody);
1367
1368 my $path = $a->{path} || '/get_doc';
1369 $path = '/etch_doc' if ($a->{etch});
1370
1371 if ($a->{id}) {
1372 croak "id must be number not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1373 $arg = 'id=' . $a->{id};
1374 } elsif ($a->{uri}) {
1375 $arg = 'uri=' . uri_escape($a->{uri});
1376 } else {
1377 confess "unhandled argument. Need id or uri.";
1378 }
1379
1380 if ($a->{attr}) {
1381 $path = '/get_doc_attr';
1382 $arg .= '&attr=' . uri_escape($a->{attr});
1383 $a->{chomp_resbody} = 1;
1384 }
1385
1386 my $rv = $self->shuttle_url( $self->{url} . $path,
1387 'application/x-www-form-urlencoded',
1388 $arg,
1389 \$resbody,
1390 $a->{croak_on_error},
1391 );
1392
1393 return if ($rv != 200);
1394
1395 if ($a->{etch}) {
1396 $self->{kwords} = {};
1397 return +{} unless ($resbody);
1398 foreach my $l (split(/\n/, $resbody)) {
1399 my ($k,$v) = split(/\t/, $l, 2);
1400 $self->{kwords}->{$k} = $v if ($v);
1401 }
1402 return $self->{kwords};
1403 } elsif ($a->{chomp_resbody}) {
1404 return unless (defined($resbody));
1405 chomp($resbody);
1406 return $resbody;
1407 } else {
1408 return new Search::Estraier::Document($resbody);
1409 }
1410 }
1411
1412
1413 =head2 name
1414
1415 my $node_name = $node->name;
1416
1417 =cut
1418
1419 sub name {
1420 my $self = shift;
1421 $self->_set_info unless ($self->{inform}->{name});
1422 return $self->{inform}->{name};
1423 }
1424
1425
1426 =head2 label
1427
1428 my $node_label = $node->label;
1429
1430 =cut
1431
1432 sub label {
1433 my $self = shift;
1434 $self->_set_info unless ($self->{inform}->{label});
1435 return $self->{inform}->{label};
1436 }
1437
1438
1439 =head2 doc_num
1440
1441 my $documents_in_node = $node->doc_num;
1442
1443 =cut
1444
1445 sub doc_num {
1446 my $self = shift;
1447 $self->_set_info if ($self->{inform}->{dnum} < 0);
1448 return $self->{inform}->{dnum};
1449 }
1450
1451
1452 =head2 word_num
1453
1454 my $words_in_node = $node->word_num;
1455
1456 =cut
1457
1458 sub word_num {
1459 my $self = shift;
1460 $self->_set_info if ($self->{inform}->{wnum} < 0);
1461 return $self->{inform}->{wnum};
1462 }
1463
1464
1465 =head2 size
1466
1467 my $node_size = $node->size;
1468
1469 =cut
1470
1471 sub size {
1472 my $self = shift;
1473 $self->_set_info if ($self->{inform}->{size} < 0);
1474 return $self->{inform}->{size};
1475 }
1476
1477
1478 =head2 search
1479
1480 Search documents which match condition
1481
1482 my $nres = $node->search( $cond, $depth );
1483
1484 C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1485 depth for meta search.
1486
1487 Function results C<Search::Estraier::NodeResult> object.
1488
1489 =cut
1490
1491 sub search {
1492 my $self = shift;
1493 my ($cond, $depth) = @_;
1494 return unless ($cond && defined($depth) && $self->{url});
1495 croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1496 croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1497
1498 my $resbody;
1499
1500 my $rv = $self->shuttle_url( $self->{url} . '/search',
1501 'application/x-www-form-urlencoded',
1502 $self->cond_to_query( $cond, $depth ),
1503 \$resbody,
1504 );
1505 return if ($rv != 200);
1506
1507 my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody;
1508 my $hintsText = splice @records, 0, 2; # starts with empty record
1509 my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm };
1510
1511 # process records
1512 my $docs = [];
1513 foreach my $record (@records)
1514 {
1515 # split into keys and snippets
1516 my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s;
1517
1518 # create document hash
1519 my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm };
1520 $doc->{'@keywords'} = $doc->{keywords};
1521 ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm;
1522 $doc->{snippet} = $snippet;
1523
1524 push @$docs, new Search::Estraier::ResultDocument(
1525 attrs => $doc,
1526 uri => $doc->{'@uri'},
1527 snippet => $snippet,
1528 keywords => $doc->{'keywords'},
1529 );
1530 }
1531
1532 return new Search::Estraier::NodeResult( docs => $docs, hints => $hints );
1533 }
1534
1535
1536 =head2 cond_to_query
1537
1538 Return URI encoded string generated from Search::Estraier::Condition
1539
1540 my $args = $node->cond_to_query( $cond, $depth );
1541
1542 =cut
1543
1544 sub cond_to_query {
1545 my $self = shift;
1546
1547 my $cond = shift || return;
1548 croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1549 my $depth = shift;
1550
1551 my @args;
1552
1553 if (my $phrase = $cond->phrase) {
1554 push @args, 'phrase=' . uri_escape($phrase);
1555 }
1556
1557 if (my @attrs = $cond->attrs) {
1558 for my $i ( 0 .. $#attrs ) {
1559 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1560 }
1561 }
1562
1563 if (my $order = $cond->order) {
1564 push @args, 'order=' . uri_escape($order);
1565 }
1566
1567 if (my $max = $cond->max) {
1568 push @args, 'max=' . $max;
1569 } else {
1570 push @args, 'max=' . (1 << 30);
1571 }
1572
1573 if (my $options = $cond->options) {
1574 push @args, 'options=' . $options;
1575 }
1576
1577 push @args, 'depth=' . $depth if ($depth);
1578 push @args, 'wwidth=' . $self->{wwidth};
1579 push @args, 'hwidth=' . $self->{hwidth};
1580 push @args, 'awidth=' . $self->{awidth};
1581 push @args, 'skip=' . $cond->{skip} if ($cond->{skip});
1582
1583 if ($cond->{mask}) {
1584 my $mask = 0;
1585 map { $mask += ( 2 ** $_ ) } @{ $cond->{mask} };
1586
1587 push @args, 'mask=' . $mask if ($mask);
1588 }
1589
1590 return join('&', @args);
1591 }
1592
1593
1594 =head2 shuttle_url
1595
1596 This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1597 master.
1598
1599 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1600
1601 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1602 body will be saved within object.
1603
1604 =cut
1605
1606 use LWP::UserAgent;
1607
1608 sub shuttle_url {
1609 my $self = shift;
1610
1611 my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1612
1613 $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1614
1615 $self->{status} = -1;
1616
1617 warn "## $url\n" if ($self->{debug});
1618
1619 $url = new URI($url);
1620 if (
1621 !$url || !$url->scheme || !$url->scheme eq 'http' ||
1622 !$url->host || !$url->port || $url->port < 1
1623 ) {
1624 carp "can't parse $url\n";
1625 return -1;
1626 }
1627
1628 my $ua = LWP::UserAgent->new;
1629 $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1630
1631 my $req;
1632 if ($reqbody) {
1633 $req = HTTP::Request->new(POST => $url);
1634 } else {
1635 $req = HTTP::Request->new(GET => $url);
1636 }
1637
1638 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1639 $req->headers->header( 'Connection', 'close' );
1640 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1641 $req->content_type( $content_type );
1642
1643 warn $req->headers->as_string,"\n" if ($self->{debug});
1644
1645 if ($reqbody) {
1646 warn "$reqbody\n" if ($self->{debug});
1647 $req->content( $reqbody );
1648 }
1649
1650 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1651
1652 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1653
1654 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1655
1656 if (! $res->is_success) {
1657 if ($croak_on_error) {
1658 croak("can't get $url: ",$res->status_line);
1659 } else {
1660 return -1;
1661 }
1662 }
1663
1664 $$resbody .= $res->content;
1665
1666 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1667
1668 return $self->{status};
1669 }
1670
1671
1672 =head2 set_snippet_width
1673
1674 Set width of snippets in results
1675
1676 $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1677
1678 C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1679 is not sent with results. If it is negative, whole document text is sent instead of snippet.
1680
1681 C<$hwidth> specified width of strings from beginning of string. Default
1682 value is C<96>. Negative or zero value keep previous value.
1683
1684 C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1685 If negative of zero value is provided previous value is kept unchanged.
1686
1687 =cut
1688
1689 sub set_snippet_width {
1690 my $self = shift;
1691
1692 my ($wwidth, $hwidth, $awidth) = @_;
1693 $self->{wwidth} = $wwidth;
1694 $self->{hwidth} = $hwidth if ($hwidth >= 0);
1695 $self->{awidth} = $awidth if ($awidth >= 0);
1696 }
1697
1698
1699 =head2 set_user
1700
1701 Manage users of node
1702
1703 $node->set_user( 'name', $mode );
1704
1705 C<$mode> can be one of:
1706
1707 =over 4
1708
1709 =item 0
1710
1711 delete account
1712
1713 =item 1
1714
1715 set administrative right for user
1716
1717 =item 2
1718
1719 set user account as guest
1720
1721 =back
1722
1723 Return true on success, otherwise false.
1724
1725 =cut
1726
1727 sub set_user {
1728 my $self = shift;
1729 my ($name, $mode) = @_;
1730
1731 return unless ($self->{url});
1732 croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1733
1734 $self->shuttle_url( $self->{url} . '/_set_user',
1735 'application/x-www-form-urlencoded',
1736 'name=' . uri_escape($name) . '&mode=' . $mode,
1737 undef
1738 ) == 200;
1739 }
1740
1741
1742 =head2 set_link
1743
1744 Manage node links
1745
1746 $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1747
1748 If C<$credit> is negative, link is removed.
1749
1750 =cut
1751
1752 sub set_link {
1753 my $self = shift;
1754 my ($url, $label, $credit) = @_;
1755
1756 return unless ($self->{url});
1757 croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1758
1759 my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1760 $reqbody .= '&credit=' . $credit if ($credit > 0);
1761
1762 if ($self->shuttle_url( $self->{url} . '/_set_link',
1763 'application/x-www-form-urlencoded',
1764 $reqbody,
1765 undef
1766 ) == 200) {
1767 # refresh node info after adding link
1768 $self->_clear_info;
1769 return 1;
1770 }
1771 return undef;
1772 }
1773
1774 =head2 admins
1775
1776 my @admins = @{ $node->admins };
1777
1778 Return array of users with admin rights on node
1779
1780 =cut
1781
1782 sub admins {
1783 my $self = shift;
1784 $self->_set_info unless ($self->{inform}->{name});
1785 return $self->{inform}->{admins};
1786 }
1787
1788 =head2 guests
1789
1790 my @guests = @{ $node->guests };
1791
1792 Return array of users with guest rights on node
1793
1794 =cut
1795
1796 sub guests {
1797 my $self = shift;
1798 $self->_set_info unless ($self->{inform}->{name});
1799 return $self->{inform}->{guests};
1800 }
1801
1802 =head2 links
1803
1804 my $links = @{ $node->links };
1805
1806 Return array of links for this node
1807
1808 =cut
1809
1810 sub links {
1811 my $self = shift;
1812 $self->_set_info unless ($self->{inform}->{name});
1813 return $self->{inform}->{links};
1814 }
1815
1816 =head2 cacheusage
1817
1818 Return cache usage for a node
1819
1820 my $cache = $node->cacheusage;
1821
1822 =cut
1823
1824 sub cacheusage {
1825 my $self = shift;
1826
1827 return unless ($self->{url});
1828
1829 my $resbody;
1830 my $rv = $self->shuttle_url( $self->{url} . '/cacheusage',
1831 'text/plain',
1832 undef,
1833 \$resbody,
1834 );
1835
1836 return if ($rv != 200 || !$resbody);
1837
1838 return $resbody;
1839 }
1840
1841 =head2 master
1842
1843 Set actions on Hyper Estraier node master (C<estmaster> process)
1844
1845 $node->master(
1846 action => 'sync'
1847 );
1848
1849 All available actions are documented in
1850 L<http://hyperestraier.sourceforge.net/nguide-en.html#protocol>
1851
1852 =cut
1853
1854 my $estmaster_rest = {
1855 shutdown => {
1856 status => 202,
1857 },
1858 sync => {
1859 status => 202,
1860 },
1861 backup => {
1862 status => 202,
1863 },
1864 userlist => {
1865 status => 200,
1866 returns => [ qw/name passwd flags fname misc/ ],
1867 },
1868 useradd => {
1869 required => [ qw/name passwd flags/ ],
1870 optional => [ qw/fname misc/ ],
1871 status => 200,
1872 },
1873 userdel => {
1874 required => [ qw/name/ ],
1875 status => 200,
1876 },
1877 nodelist => {
1878 status => 200,
1879 returns => [ qw/name label doc_num word_num size/ ],
1880 },
1881 nodeadd => {
1882 required => [ qw/name/ ],
1883 optional => [ qw/label/ ],
1884 status => 200,
1885 },
1886 nodedel => {
1887 required => [ qw/name/ ],
1888 status => 200,
1889 },
1890 nodeclr => {
1891 required => [ qw/name/ ],
1892 status => 200,
1893 },
1894 nodertt => {
1895 status => 200,
1896 },
1897 };
1898
1899 sub master {
1900 my $self = shift;
1901
1902 my $args = {@_};
1903
1904 # have action?
1905 my $action = $args->{action} || croak "need action, available: ",
1906 join(", ",keys %{ $estmaster_rest });
1907
1908 # check if action is valid
1909 my $rest = $estmaster_rest->{$action};
1910 croak "action '$action' is not supported, available actions: ",
1911 join(", ",keys %{ $estmaster_rest }) unless ($rest);
1912
1913 croak "BUG: action '$action' needs return status" unless ($rest->{status});
1914
1915 my @args;
1916
1917 if ($rest->{required} || $rest->{optional}) {
1918
1919 map {
1920 croak "need parametar '$_' for action '$action'" unless ($args->{$_});
1921 push @args, $_ . '=' . uri_escape( $args->{$_} );
1922 } ( @{ $rest->{required} } );
1923
1924 map {
1925 push @args, $_ . '=' . uri_escape( $args->{$_} ) if ($args->{$_});
1926 } ( @{ $rest->{optional} } );
1927
1928 }
1929
1930 my $uri = new URI( $self->{url} );
1931
1932 my $resbody;
1933
1934 my $status = $self->shuttle_url(
1935 'http://' . $uri->host_port . '/master?action=' . $action ,
1936 'application/x-www-form-urlencoded',
1937 join('&', @args),
1938 \$resbody,
1939 1,
1940 ) or confess "shuttle_url failed";
1941
1942 if ($status == $rest->{status}) {
1943
1944 # refresh node info after sync
1945 $self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/);
1946
1947 if ($rest->{returns} && wantarray) {
1948
1949 my @results;
1950 my $fields = $#{$rest->{returns}};
1951
1952 foreach my $line ( split(/[\r\n]/,$resbody) ) {
1953 my @e = split(/\t/, $line, $fields + 1);
1954 my $row;
1955 foreach my $i ( 0 .. $fields) {
1956 $row->{ $rest->{returns}->[$i] } = $e[ $i ];
1957 }
1958 push @results, $row;
1959 }
1960
1961 return @results;
1962
1963 } elsif ($resbody) {
1964 chomp $resbody;
1965 return $resbody;
1966 } else {
1967 return 0E0;
1968 }
1969 }
1970
1971 carp "expected status $rest->{status}, but got $status";
1972 return undef;
1973 }
1974
1975 =head1 PRIVATE METHODS
1976
1977 You could call those directly, but you don't have to. I hope.
1978
1979 =head2 _set_info
1980
1981 Set information for node
1982
1983 $node->_set_info;
1984
1985 =cut
1986
1987 sub _set_info {
1988 my $self = shift;
1989
1990 $self->{status} = -1;
1991 return unless ($self->{url});
1992
1993 my $resbody;
1994 my $rv = $self->shuttle_url( $self->{url} . '/inform',
1995 'text/plain',
1996 undef,
1997 \$resbody,
1998 );
1999
2000 return if ($rv != 200 || !$resbody);
2001
2002 my @lines = split(/[\r\n]/,$resbody);
2003
2004 $self->_clear_info;
2005
2006 ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
2007 $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
2008
2009 return $resbody unless (@lines);
2010
2011 shift @lines;
2012
2013 while(my $admin = shift @lines) {
2014 push @{$self->{inform}->{admins}}, $admin;
2015 }
2016
2017 while(my $guest = shift @lines) {
2018 push @{$self->{inform}->{guests}}, $guest;
2019 }
2020
2021 while(my $link = shift @lines) {
2022 push @{$self->{inform}->{links}}, $link;
2023 }
2024
2025 return $resbody;
2026
2027 }
2028
2029 =head2 _clear_info
2030
2031 Clear information for node
2032
2033 $node->_clear_info;
2034
2035 On next call to C<name>, C<label>, C<doc_num>, C<word_num> or C<size> node
2036 info will be fetch again from Hyper Estraier.
2037
2038 =cut
2039 sub _clear_info {
2040 my $self = shift;
2041 $self->{inform} = {
2042 dnum => -1,
2043 wnum => -1,
2044 size => -1.0,
2045 };
2046 }
2047
2048 ###
2049
2050 =head1 EXPORT
2051
2052 Nothing.
2053
2054 =head1 SEE ALSO
2055
2056 L<http://hyperestraier.sourceforge.net/>
2057
2058 Hyper Estraier Ruby interface on which this module is based.
2059
2060 Hyper Estraier now also has pure-perl binding included in distribution. It's
2061 a faster way to access databases directly if you are not running
2062 C<estmaster> P2P server.
2063
2064 =head1 AUTHOR
2065
2066 Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
2067
2068 Robert Klep E<lt>robert@klep.nameE<gt> contributed refactored search code
2069
2070 =head1 COPYRIGHT AND LICENSE
2071
2072 Copyright (C) 2005-2006 by Dobrica Pavlinusic
2073
2074 This library is free software; you can redistribute it and/or modify
2075 it under the GPL v2 or later.
2076
2077 =cut
2078
2079 1;

  ViewVC Help
Powered by ViewVC 1.1.26