/[Search-Estraier]/trunk/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 61 - (show annotations)
Sat Jan 7 01:21:28 2006 UTC (18 years, 2 months ago) by dpavlin
File size: 27270 byte(s)
transfer depth to cond_to_query
1 package Search::Estraier;
2
3 use 5.008;
4 use strict;
5 use warnings;
6
7 our $VERSION = '0.01';
8
9 =head1 NAME
10
11 Search::Estraier - pure perl module to use Hyper Estraier search engine
12
13 =head1 SYNOPSIS
14
15 use Search::Estraier;
16 my $est = new Search::Estraier();
17
18 =head1 DESCRIPTION
19
20 This module is implementation of node API of Hyper Estraier. Since it's
21 perl-only module with dependencies only on standard perl modules, it will
22 run on all platforms on which perl runs. It doesn't require compilation
23 or Hyper Estraier development files on target machine.
24
25 It is implemented as multiple packages which closly resamble Ruby
26 implementation. It also includes methods to manage nodes.
27
28 =cut
29
30 =head1 Inheritable common methods
31
32 This methods should really move somewhere else.
33
34 =head2 _s
35
36 Remove multiple whitespaces from string, as well as whitespaces at beginning or end
37
38 my $text = $self->_s(" this is a text ");
39 $text = 'this is a text';
40
41 =cut
42
43 sub _s {
44 my $text = $_[1] || return;
45 $text =~ s/\s\s+/ /gs;
46 $text =~ s/^\s+//;
47 $text =~ s/\s+$//;
48 return $text;
49 }
50
51 package Search::Estraier::Document;
52
53 use Carp qw/croak confess/;
54
55 use Search::Estraier;
56 our @ISA = qw/Search::Estraier/;
57
58 =head1 Search::Estraier::Document
59
60 This class implements Document which is collection of attributes
61 (key=value), vectors (also key value) display text and hidden text.
62
63
64 =head2 new
65
66 Create new document, empty or from draft.
67
68 my $doc = new Search::HyperEstraier::Document;
69 my $doc2 = new Search::HyperEstraier::Document( $draft );
70
71 =cut
72
73 sub new {
74 my $class = shift;
75 my $self = {};
76 bless($self, $class);
77
78 $self->{id} = -1;
79
80 my $draft = shift;
81
82 if ($draft) {
83 my $in_text = 0;
84 foreach my $line (split(/\n/, $draft)) {
85
86 if ($in_text) {
87 if ($line =~ /^\t/) {
88 push @{ $self->{htexts} }, substr($line, 1);
89 } else {
90 push @{ $self->{dtexts} }, $line;
91 }
92 next;
93 }
94
95 if ($line =~ m/^%VECTOR\t(.+)$/) {
96 my @fields = split(/\t/, $1);
97 for my $i ( 0 .. ($#fields - 1) ) {
98 $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
99 $i++;
100 }
101 next;
102 } elsif ($line =~ m/^%/) {
103 # What is this? comment?
104 #warn "$line\n";
105 next;
106 } elsif ($line =~ m/^$/) {
107 $in_text = 1;
108 next;
109 } elsif ($line =~ m/^(.+)=(.+)$/) {
110 $self->{attrs}->{ $1 } = $2;
111 next;
112 }
113
114 warn "draft ignored: $line\n";
115 }
116 }
117
118 $self ? return $self : return undef;
119 }
120
121
122 =head2 add_attr
123
124 Add an attribute.
125
126 $doc->add_attr( name => 'value' );
127
128 Delete attribute using
129
130 $doc->add_attr( name => undef );
131
132 =cut
133
134 sub add_attr {
135 my $self = shift;
136 my $attrs = {@_};
137
138 while (my ($name, $value) = each %{ $attrs }) {
139 if (! defined($value)) {
140 delete( $self->{attrs}->{ $self->_s($name) } );
141 } else {
142 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
143 }
144 }
145
146 return 1;
147 }
148
149
150 =head2 add_text
151
152 Add a sentence of text.
153
154 $doc->add_text('this is example text to display');
155
156 =cut
157
158 sub add_text {
159 my $self = shift;
160 my $text = shift;
161 return unless defined($text);
162
163 push @{ $self->{dtexts} }, $self->_s($text);
164 }
165
166
167 =head2 add_hidden_text
168
169 Add a hidden sentence.
170
171 $doc->add_hidden_text('this is example text just for search');
172
173 =cut
174
175 sub add_hidden_text {
176 my $self = shift;
177 my $text = shift;
178 return unless defined($text);
179
180 push @{ $self->{htexts} }, $self->_s($text);
181 }
182
183
184 =head2 id
185
186 Get the ID number of document. If the object has never been registred, C<-1> is returned.
187
188 print $doc->id;
189
190 =cut
191
192 sub id {
193 my $self = shift;
194 return $self->{id};
195 }
196
197
198 =head2 attr_names
199
200 Returns array with attribute names from document object.
201
202 my @attrs = $doc->attr_names;
203
204 =cut
205
206 sub attr_names {
207 my $self = shift;
208 return unless ($self->{attrs});
209 #croak "attr_names return array, not scalar" if (! wantarray);
210 return sort keys %{ $self->{attrs} };
211 }
212
213
214 =head2 attr
215
216 Returns value of an attribute.
217
218 my $value = $doc->attr( 'attribute' );
219
220 =cut
221
222 sub attr {
223 my $self = shift;
224 my $name = shift;
225 return unless (defined($name) && $self->{attrs});
226 return $self->{attrs}->{ $name };
227 }
228
229
230 =head2 texts
231
232 Returns array with text sentences.
233
234 my @texts = $doc->texts;
235
236 =cut
237
238 sub texts {
239 my $self = shift;
240 #confess "texts return array, not scalar" if (! wantarray);
241 return @{ $self->{dtexts} } if ($self->{dtexts});
242 }
243
244
245 =head2 cat_texts
246
247 Return whole text as single scalar.
248
249 my $text = $doc->cat_texts;
250
251 =cut
252
253 sub cat_texts {
254 my $self = shift;
255 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
256 }
257
258
259 =head2 dump_draft
260
261 Dump draft data from document object.
262
263 print $doc->dump_draft;
264
265 =cut
266
267 sub dump_draft {
268 my $self = shift;
269 my $draft;
270
271 foreach my $attr_name (sort keys %{ $self->{attrs} }) {
272 $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";
273 }
274
275 if ($self->{kwords}) {
276 $draft .= '%%VECTOR';
277 while (my ($key, $value) = each %{ $self->{kwords} }) {
278 $draft .= "\t$key\t$value";
279 }
280 $draft .= "\n";
281 }
282
283 $draft .= "\n";
284
285 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
286 $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
287
288 return $draft;
289 }
290
291
292 =head2 delete
293
294 Empty document object
295
296 $doc->delete;
297
298 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
299 convinience. Document objects which go out of scope will be destroyed
300 automatically.
301
302 =cut
303
304 sub delete {
305 my $self = shift;
306
307 foreach my $data (qw/attrs dtexts stexts kwords/) {
308 delete($self->{$data});
309 }
310
311 $self->{id} = -1;
312
313 return 1;
314 }
315
316
317
318 package Search::Estraier::Condition;
319
320 use Carp qw/confess croak/;
321
322 use Search::Estraier;
323 our @ISA = qw/Search::Estraier/;
324
325 =head1 Search::Estraier::Condition
326
327 =head2 new
328
329 my $cond = new Search::HyperEstraier::Condition;
330
331 =cut
332
333 sub new {
334 my $class = shift;
335 my $self = {};
336 bless($self, $class);
337
338 $self->{max} = -1;
339 $self->{options} = 0;
340
341 $self ? return $self : return undef;
342 }
343
344
345 =head2 set_phrase
346
347 $cond->set_phrase('search phrase');
348
349 =cut
350
351 sub set_phrase {
352 my $self = shift;
353 $self->{phrase} = $self->_s( shift );
354 }
355
356
357 =head2 add_attr
358
359 $cond->add_attr('@URI STRINC /~dpavlin/');
360
361 =cut
362
363 sub add_attr {
364 my $self = shift;
365 my $attr = shift || return;
366 push @{ $self->{attrs} }, $self->_s( $attr );
367 }
368
369
370 =head2 set_order
371
372 $cond->set_order('@mdate NUMD');
373
374 =cut
375
376 sub set_order {
377 my $self = shift;
378 $self->{order} = shift;
379 }
380
381
382 =head2 set_max
383
384 $cond->set_max(42);
385
386 =cut
387
388 sub set_max {
389 my $self = shift;
390 my $max = shift;
391 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
392 $self->{max} = $max;
393 }
394
395
396 =head2 set_options
397
398 $cond->set_options( SURE => 1 );
399
400 =cut
401
402 my $options = {
403 # check N-gram keys skipping by three
404 SURE => 1 << 0,
405 # check N-gram keys skipping by two
406 USUAL => 1 << 1,
407 # without TF-IDF tuning
408 FAST => 1 << 2,
409 # with the simplified phrase
410 AGITO => 1 << 3,
411 # check every N-gram key
412 NOIDF => 1 << 4,
413 # check N-gram keys skipping by one
414 SIMPLE => 1 << 10,
415 };
416
417 sub set_options {
418 my $self = shift;
419 my $option = shift;
420 confess "unknown option" unless ($options->{$option});
421 $self->{options} ||= $options->{$option};
422 }
423
424
425 =head2 phrase
426
427 Return search phrase.
428
429 print $cond->phrase;
430
431 =cut
432
433 sub phrase {
434 my $self = shift;
435 return $self->{phrase};
436 }
437
438
439 =head2 order
440
441 Return search result order.
442
443 print $cond->order;
444
445 =cut
446
447 sub order {
448 my $self = shift;
449 return $self->{order};
450 }
451
452
453 =head2 attrs
454
455 Return search result attrs.
456
457 my @cond_attrs = $cond->attrs;
458
459 =cut
460
461 sub attrs {
462 my $self = shift;
463 #croak "attrs return array, not scalar" if (! wantarray);
464 return @{ $self->{attrs} } if ($self->{attrs});
465 }
466
467
468 =head2 max
469
470 Return maximum number of results.
471
472 print $cond->max;
473
474 C<-1> is returned for unitialized value, C<0> is unlimited.
475
476 =cut
477
478 sub max {
479 my $self = shift;
480 return $self->{max};
481 }
482
483
484 =head2 options
485
486 Return options for this condition.
487
488 print $cond->options;
489
490 Options are returned in numerical form.
491
492 =cut
493
494 sub options {
495 my $self = shift;
496 return $self->{options};
497 }
498
499
500 package Search::Estraier::ResultDocument;
501
502 use Carp qw/croak/;
503
504 #use Search::Estraier;
505 #our @ISA = qw/Search::Estraier/;
506
507 =head1 Search::Estraier::ResultDocument
508
509 =head2 new
510
511 my $rdoc = new Search::HyperEstraier::ResultDocument(
512 uri => 'http://localhost/document/uri/42',
513 attrs => {
514 foo => 1,
515 bar => 2,
516 },
517 snippet => 'this is a text of snippet'
518 keywords => 'this\tare\tkeywords'
519 );
520
521 =cut
522
523 sub new {
524 my $class = shift;
525 my $self = {@_};
526 bless($self, $class);
527
528 foreach my $f (qw/uri attrs snippet keywords/) {
529 croak "missing $f for ResultDocument" unless defined($self->{$f});
530 }
531
532 $self ? return $self : return undef;
533 }
534
535
536 =head2 uri
537
538 Return URI of result document
539
540 print $rdoc->uri;
541
542 =cut
543
544 sub uri {
545 my $self = shift;
546 return $self->{uri};
547 }
548
549
550 =head2 attr_names
551
552 Returns array with attribute names from result document object.
553
554 my @attrs = $rdoc->attr_names;
555
556 =cut
557
558 sub attr_names {
559 my $self = shift;
560 croak "attr_names return array, not scalar" if (! wantarray);
561 return sort keys %{ $self->{attrs} };
562 }
563
564
565 =head2 attr
566
567 Returns value of an attribute.
568
569 my $value = $rdoc->attr( 'attribute' );
570
571 =cut
572
573 sub attr {
574 my $self = shift;
575 my $name = shift || return;
576 return $self->{attrs}->{ $name };
577 }
578
579
580 =head2 snippet
581
582 Return snippet from result document
583
584 print $rdoc->snippet;
585
586 =cut
587
588 sub snippet {
589 my $self = shift;
590 return $self->{snippet};
591 }
592
593
594 =head2 keywords
595
596 Return keywords from result document
597
598 print $rdoc->keywords;
599
600 =cut
601
602 sub keywords {
603 my $self = shift;
604 return $self->{keywords};
605 }
606
607
608 package Search::Estraier::NodeResult;
609
610 use Carp qw/croak/;
611
612 #use Search::Estraier;
613 #our @ISA = qw/Search::Estraier/;
614
615 =head1 Search::Estraier::NodeResult
616
617 =head2 new
618
619 my $res = new Search::HyperEstraier::NodeResult(
620 docs => @array_of_rdocs,
621 hits => %hash_with_hints,
622 );
623
624 =cut
625
626 sub new {
627 my $class = shift;
628 my $self = {@_};
629 bless($self, $class);
630
631 foreach my $f (qw/docs hints/) {
632 croak "missing $f for ResultDocument" unless defined($self->{$f});
633 }
634
635 $self ? return $self : return undef;
636 }
637
638
639 =head2 doc_num
640
641 Return number of documents
642
643 print $res->doc_num;
644
645 =cut
646
647 sub doc_num {
648 my $self = shift;
649 return $#{$self->{docs}} + 1;
650 }
651
652
653 =head2 get_doc
654
655 Return single document
656
657 my $doc = $res->get_doc( 42 );
658
659 Returns undef if document doesn't exist.
660
661 =cut
662
663 sub get_doc {
664 my $self = shift;
665 my $num = shift;
666 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
667 return undef if ($num < 0 || $num > $self->{docs});
668 return $self->{docs}->[$num];
669 }
670
671
672 =head2 hint
673
674 Return specific hint from results.
675
676 print $rec->hint( 'VERSION' );
677
678 Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
679 C<TIME>, C<LINK#n>, C<VIEW>.
680
681 =cut
682
683 sub hint {
684 my $self = shift;
685 my $key = shift || return;
686 return $self->{hints}->{$key};
687 }
688
689
690 package Search::Estraier::Node;
691
692 use Carp qw/carp croak confess/;
693 use URI;
694 use MIME::Base64;
695 use IO::Socket::INET;
696 use URI::Escape qw/uri_escape/;
697
698 =head1 Search::Estraier::Node
699
700 =head2 new
701
702 my $node = new Search::HyperEstraier::Node;
703
704 =cut
705
706 sub new {
707 my $class = shift;
708 my $self = {
709 pxport => -1,
710 timeout => 0, # this used to be -1
711 dnum => -1,
712 wnum => -1,
713 size => -1.0,
714 wwidth => 480,
715 hwidth => 96,
716 awidth => 96,
717 status => -1,
718 };
719 bless($self, $class);
720
721 my $args = {@_};
722
723 $self->{debug} = $args->{debug};
724 warn "## Node debug on\n" if ($self->{debug});
725
726 $self ? return $self : return undef;
727 }
728
729
730 =head2 set_url
731
732 Specify URL to node server
733
734 $node->set_url('http://localhost:1978');
735
736 =cut
737
738 sub set_url {
739 my $self = shift;
740 $self->{url} = shift;
741 }
742
743
744 =head2 set_proxy
745
746 Specify proxy server to connect to node server
747
748 $node->set_proxy('proxy.example.com', 8080);
749
750 =cut
751
752 sub set_proxy {
753 my $self = shift;
754 my ($host,$port) = @_;
755 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
756 $self->{pxhost} = $host;
757 $self->{pxport} = $port;
758 }
759
760
761 =head2 set_timeout
762
763 Specify timeout of connection in seconds
764
765 $node->set_timeout( 15 );
766
767 =cut
768
769 sub set_timeout {
770 my $self = shift;
771 my $sec = shift;
772 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
773 $self->{timeout} = $sec;
774 }
775
776
777 =head2 set_auth
778
779 Specify name and password for authentication to node server.
780
781 $node->set_auth('clint','eastwood');
782
783 =cut
784
785 sub set_auth {
786 my $self = shift;
787 my ($login,$passwd) = @_;
788 my $basic_auth = encode_base64( "$login:$passwd" );
789 chomp($basic_auth);
790 $self->{auth} = $basic_auth;
791 }
792
793
794 =head2 status
795
796 Return status code of last request.
797
798 print $node->status;
799
800 C<-1> means connection failure.
801
802 =cut
803
804 sub status {
805 my $self = shift;
806 return $self->{status};
807 }
808
809
810 =head2 put_doc
811
812 Add a document
813
814 $node->put_doc( $document_draft ) or die "can't add document";
815
816 Return true on success or false on failture.
817
818 =cut
819
820 sub put_doc {
821 my $self = shift;
822 my $doc = shift || return;
823 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
824 $self->shuttle_url( $self->{url} . '/put_doc',
825 'text/x-estraier-draft',
826 $doc->dump_draft,
827 undef
828 ) == 200;
829 }
830
831
832 =head2 out_doc
833
834 Remove a document
835
836 $node->out_doc( document_id ) or "can't remove document";
837
838 Return true on success or false on failture.
839
840 =cut
841
842 sub out_doc {
843 my $self = shift;
844 my $id = shift || return;
845 return unless ($self->{url});
846 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
847 $self->shuttle_url( $self->{url} . '/out_doc',
848 'application/x-www-form-urlencoded',
849 "id=$id",
850 undef
851 ) == 200;
852 }
853
854
855 =head2 out_doc_by_uri
856
857 Remove a registrated document using it's uri
858
859 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
860
861 Return true on success or false on failture.
862
863 =cut
864
865 sub out_doc_by_uri {
866 my $self = shift;
867 my $uri = shift || return;
868 return unless ($self->{url});
869 $self->shuttle_url( $self->{url} . '/out_doc',
870 'application/x-www-form-urlencoded',
871 "uri=" . uri_escape($uri),
872 undef
873 ) == 200;
874 }
875
876
877 =head2 edit_doc
878
879 Edit attributes of a document
880
881 $node->edit_doc( $document_draft ) or die "can't edit document";
882
883 Return true on success or false on failture.
884
885 =cut
886
887 sub edit_doc {
888 my $self = shift;
889 my $doc = shift || return;
890 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
891 $self->shuttle_url( $self->{url} . '/edit_doc',
892 'text/x-estraier-draft',
893 $doc->dump_draft,
894 undef
895 ) == 200;
896 }
897
898
899 =head2 get_doc
900
901 Retreive document
902
903 my $doc = $node->get_doc( document_id ) or die "can't get document";
904
905 Return true on success or false on failture.
906
907 =cut
908
909 sub get_doc {
910 my $self = shift;
911 my $id = shift || return;
912 return $self->_fetch_doc( id => $id );
913 }
914
915
916 =head2 get_doc_by_uri
917
918 Retreive document
919
920 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
921
922 Return true on success or false on failture.
923
924 =cut
925
926 sub get_doc_by_uri {
927 my $self = shift;
928 my $uri = shift || return;
929 return $self->_fetch_doc( uri => $uri );
930 }
931
932
933 =head2 get_doc_attr
934
935 Retrieve the value of an atribute from object
936
937 my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
938 die "can't get document attribute";
939
940 =cut
941
942 sub get_doc_attr {
943 my $self = shift;
944 my ($id,$name) = @_;
945 return unless ($id && $name);
946 return $self->_fetch_doc( id => $id, attr => $name );
947 }
948
949
950 =head2 get_doc_attr_by_uri
951
952 Retrieve the value of an atribute from object
953
954 my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
955 die "can't get document attribute";
956
957 =cut
958
959 sub get_doc_attr_by_uri {
960 my $self = shift;
961 my ($uri,$name) = @_;
962 return unless ($uri && $name);
963 return $self->_fetch_doc( uri => $uri, attr => $name );
964 }
965
966
967 =head2 etch_doc
968
969 Exctract document keywords
970
971 my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
972
973 =cut
974
975 sub etch_doc {
976 my $self = shift;
977 my $id = shift || return;
978 return $self->_fetch_doc( id => $id, etch => 1 );
979 }
980
981 =head2 etch_doc_by_uri
982
983 Retreive document
984
985 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
986
987 Return true on success or false on failture.
988
989 =cut
990
991 sub etch_doc_by_uri {
992 my $self = shift;
993 my $uri = shift || return;
994 return $self->_fetch_doc( uri => $uri, etch => 1 );
995 }
996
997
998 =head2 uri_to_id
999
1000 Get ID of document specified by URI
1001
1002 my $id = $node->uri_to_id( 'file:///document/uri/42' );
1003
1004 =cut
1005
1006 sub uri_to_id {
1007 my $self = shift;
1008 my $uri = shift || return;
1009 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1 );
1010 }
1011
1012
1013 =head2 _fetch_doc
1014
1015 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1016 C<etch_doc>, C<etch_doc_by_uri>.
1017
1018 # this will decode received draft into Search::Estraier::Document object
1019 my $doc = $node->_fetch_doc( id => 42 );
1020 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1021
1022 # to extract keywords, add etch
1023 my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1024 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1025
1026 # to get document attrubute add attr
1027 my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1028 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1029
1030 # more general form which allows implementation of
1031 # uri_to_id
1032 my $id = $node->_fetch_doc(
1033 uri => 'file:///document/uri/42',
1034 path => '/uri_to_id',
1035 chomp_resbody => 1
1036 );
1037
1038 =cut
1039
1040 sub _fetch_doc {
1041 my $self = shift;
1042 my $a = {@_};
1043 return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1044
1045 my ($arg, $resbody);
1046
1047 my $path = $a->{path} || '/get_doc';
1048 $path = '/etch_doc' if ($a->{etch});
1049
1050 if ($a->{id}) {
1051 croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1052 $arg = 'id=' . $a->{id};
1053 } elsif ($a->{uri}) {
1054 $arg = 'uri=' . uri_escape($a->{uri});
1055 } else {
1056 confess "unhandled argument. Need id or uri.";
1057 }
1058
1059 if ($a->{attr}) {
1060 $path = '/get_doc_attr';
1061 $arg .= '&attr=' . uri_escape($a->{attr});
1062 $a->{chomp_resbody} = 1;
1063 }
1064
1065 my $rv = $self->shuttle_url( $self->{url} . $path,
1066 'application/x-www-form-urlencoded',
1067 $arg,
1068 \$resbody,
1069 );
1070
1071 return if ($rv != 200);
1072
1073 if ($a->{etch}) {
1074 $self->{kwords} = {};
1075 return +{} unless ($resbody);
1076 foreach my $l (split(/\n/, $resbody)) {
1077 my ($k,$v) = split(/\t/, $l, 2);
1078 $self->{kwords}->{$k} = $v if ($v);
1079 }
1080 return $self->{kwords};
1081 } elsif ($a->{chomp_resbody}) {
1082 return unless (defined($resbody));
1083 chomp($resbody);
1084 return $resbody;
1085 } else {
1086 return new Search::Estraier::Document($resbody);
1087 }
1088 }
1089
1090
1091 =head2 name
1092
1093 my $node_name = $node->name;
1094
1095 =cut
1096
1097 sub name {
1098 my $self = shift;
1099 $self->_set_info unless ($self->{name});
1100 return $self->{name};
1101 }
1102
1103
1104 =head2 label
1105
1106 my $node_label = $node->label;
1107
1108 =cut
1109
1110 sub label {
1111 my $self = shift;
1112 $self->_set_info unless ($self->{label});
1113 return $self->{label};
1114 }
1115
1116
1117 =head2 doc_num
1118
1119 my $documents_in_node = $node->doc_num;
1120
1121 =cut
1122
1123 sub doc_num {
1124 my $self = shift;
1125 $self->_set_info if ($self->{dnum} < 0);
1126 return $self->{dnum};
1127 }
1128
1129
1130 =head2 word_num
1131
1132 my $words_in_node = $node->word_num;
1133
1134 =cut
1135
1136 sub word_num {
1137 my $self = shift;
1138 $self->_set_info if ($self->{wnum} < 0);
1139 return $self->{wnum};
1140 }
1141
1142
1143 =head2 size
1144
1145 my $node_size = $node->size;
1146
1147 =cut
1148
1149 sub size {
1150 my $self = shift;
1151 $self->_set_info if ($self->{size} < 0);
1152 return $self->{size};
1153 }
1154
1155
1156 =head2 search
1157
1158 Search documents which match condition
1159
1160 my $nres = $node->search( $cond, $depth );
1161
1162 C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1163 depth for meta search.
1164
1165 Function results C<Search::Estraier::NodeResult> object.
1166
1167 =cut
1168
1169 sub search {
1170 my $self = shift;
1171 my ($cond, $depth) = @_;
1172 return unless ($cond && defined($depth) && $self->{url});
1173 croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1174 croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1175
1176 my $resbody;
1177
1178 my $rv = $self->shuttle_url( $self->{url} . '/search',
1179 'application/x-www-form-urlencoded',
1180 $self->cond_to_query( $cond, $depth ),
1181 \$resbody,
1182 );
1183 return if ($rv != 200);
1184
1185 my (@docs, $hints);
1186
1187 my @lines = split(/\n/, $resbody);
1188 return unless (@lines);
1189
1190 my $border = $lines[0];
1191 my $isend = 0;
1192 my $lnum = 1;
1193
1194 while ( $lnum <= $#lines ) {
1195 my $line = $lines[$lnum];
1196 $lnum++;
1197
1198 #warn "## $line\n";
1199 if ($line && $line =~ m/^\Q$border\E(:END)*$/) {
1200 $isend = $1;
1201 last;
1202 }
1203
1204 if ($line =~ /\t/) {
1205 my ($k,$v) = split(/\t/, $line, 2);
1206 $hints->{$k} = $v;
1207 }
1208 }
1209
1210 my $snum = $lnum;
1211
1212 while( ! $isend && $lnum <= $#lines ) {
1213 my $line = $lines[$lnum];
1214 #warn "# $lnum: $line\n";
1215 $lnum++;
1216
1217 if ($line && $line =~ m/^\Q$border\E/) {
1218 if ($lnum > $snum) {
1219 my $rdattrs;
1220 my $rdvector;
1221 my $rdsnippet;
1222
1223 my $rlnum = $snum;
1224 while ($rlnum < $lnum - 1 ) {
1225 #my $rdline = $self->_s($lines[$rlnum]);
1226 my $rdline = $lines[$rlnum];
1227 $rlnum++;
1228 last unless ($rdline);
1229 if ($rdline =~ /^%/) {
1230 $rdvector = $1 if ($rdline =~ /^%VECTOR\t(.+)$/);
1231 } elsif($rdline =~ /=/) {
1232 $rdattrs->{$1} = $2 if ($rdline =~ /^(.+)=(.+)$/);
1233 } else {
1234 confess "invalid format of response";
1235 }
1236 }
1237 while($rlnum < $lnum - 1) {
1238 my $rdline = $lines[$rlnum];
1239 $rlnum++;
1240 $rdsnippet .= "$rdline\n";
1241 }
1242 #warn Dumper($rdvector, $rdattrs, $rdsnippet);
1243 if (my $rduri = $rdattrs->{'@uri'}) {
1244 push @docs, new Search::Estraier::ResultDocument(
1245 uri => $rduri,
1246 attrs => $rdattrs,
1247 snippet => $rdsnippet,
1248 keywords => $rdvector,
1249 );
1250 }
1251 }
1252 $snum = $lnum;
1253 #warn "### $line\n";
1254 $isend = 1 if ($line =~ /:END$/);
1255 }
1256
1257 }
1258
1259 if (! $isend) {
1260 warn "received result doesn't have :END\n$resbody";
1261 return;
1262 }
1263
1264 #warn Dumper(\@docs, $hints);
1265
1266 return new Search::Estraier::NodeResult( docs => \@docs, hints => $hints );
1267 }
1268
1269
1270 =head2 cond_to_query
1271
1272 Return URI encoded string generated from Search::Estraier::Condition
1273
1274 my $args = $node->cond_to_query( $cond, $depth );
1275
1276 =cut
1277
1278 sub cond_to_query {
1279 my $self = shift;
1280
1281 my $cond = shift || return;
1282 croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1283 my $depth = shift;
1284
1285 my @args;
1286
1287 if (my $phrase = $cond->phrase) {
1288 push @args, 'phrase=' . uri_escape($phrase);
1289 }
1290
1291 if (my @attrs = $cond->attrs) {
1292 for my $i ( 0 .. $#attrs ) {
1293 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] );
1294 }
1295 }
1296
1297 if (my $order = $cond->order) {
1298 push @args, 'order=' . uri_escape($order);
1299 }
1300
1301 if (my $max = $cond->max) {
1302 push @args, 'max=' . $max;
1303 } else {
1304 push @args, 'max=' . (1 << 30);
1305 }
1306
1307 if (my $options = $cond->options) {
1308 push @args, 'options=' . $options;
1309 }
1310
1311 push @args, 'depth=' . $depth if ($depth);
1312 push @args, 'wwidth=' . $self->{wwidth};
1313 push @args, 'hwidth=' . $self->{hwidth};
1314 push @args, 'awidth=' . $self->{awidth};
1315
1316 return join('&', @args);
1317 }
1318
1319
1320 =head2 shuttle_url
1321
1322 This is method which uses C<IO::Socket::INET> to communicate with Hyper Estraier node
1323 master.
1324
1325 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1326
1327 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1328 body will be saved within object.
1329
1330 =cut
1331
1332 use LWP::UserAgent;
1333
1334 sub shuttle_url {
1335 my $self = shift;
1336
1337 my ($url, $content_type, $reqbody, $resbody) = @_;
1338
1339 $self->{status} = -1;
1340
1341 warn "## $url\n" if ($self->{debug});
1342
1343 $url = new URI($url);
1344 if (
1345 !$url || !$url->scheme || !$url->scheme eq 'http' ||
1346 !$url->host || !$url->port || $url->port < 1
1347 ) {
1348 carp "can't parse $url\n";
1349 return -1;
1350 }
1351
1352 my $ua = LWP::UserAgent->new;
1353 $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1354
1355 my $req;
1356 if ($reqbody) {
1357 $req = HTTP::Request->new(POST => $url);
1358 } else {
1359 $req = HTTP::Request->new(GET => $url);
1360 }
1361
1362 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1363 $req->headers->header( 'Connection', 'close' );
1364 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} );
1365 $req->content_type( $content_type );
1366
1367 warn $req->headers->as_string,"\n" if ($self->{debug});
1368
1369 if ($reqbody) {
1370 warn "$reqbody\n" if ($self->{debug});
1371 $req->content( $reqbody );
1372 }
1373
1374 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1375
1376 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1377
1378 return -1 if (! $res->is_success);
1379
1380 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1381
1382 $$resbody .= $res->content;
1383
1384 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1385
1386 return $self->{status};
1387 }
1388
1389
1390 =head2 set_snippet_width
1391
1392 Set width of snippets in results
1393
1394 $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1395
1396 C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1397 is not sent with results. If it is negative, whole document text is sent instead of snippet.
1398
1399 C<$hwidth> specified width of strings from beginning of string. Default
1400 value is C<96>. Negative or zero value keep previous value.
1401
1402 C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1403 If negative of zero value is provided previous value is kept unchanged.
1404
1405 =cut
1406
1407 sub set_snippet_width {
1408 my $self = shift;
1409
1410 my ($wwidth, $hwidth, $awidth) = @_;
1411 $self->{wwidth} = $wwidth;
1412 $self->{hwidth} = $hwidth if ($hwidth >= 0);
1413 $self->{awidth} = $awidth if ($awidth >= 0);
1414 }
1415
1416
1417 =head2 set_user
1418
1419 Manage users of node
1420
1421 $node->set_user( 'name', $mode );
1422
1423 C<$mode> can be one of:
1424
1425 =over 4
1426
1427 =item 0
1428
1429 delete account
1430
1431 =item 1
1432
1433 set administrative right for user
1434
1435 =item 2
1436
1437 set user account as guest
1438
1439 =back
1440
1441 Return true on success, otherwise false.
1442
1443 =cut
1444
1445 sub set_user {
1446 my $self = shift;
1447 my ($name, $mode) = @_;
1448
1449 return unless ($self->{url});
1450 croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1451
1452 $self->shuttle_url( $self->{url} . '/_set_user',
1453 'text/plain',
1454 'name=' . uri_escape($name) . '&mode=' . $mode,
1455 undef
1456 ) == 200;
1457 }
1458
1459
1460 =head2 set_link
1461
1462 Manage node links
1463
1464 $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1465
1466 If C<$credit> is negative, link is removed.
1467
1468 =cut
1469
1470 sub set_link {
1471 my $self = shift;
1472 my ($url, $label, $credit) = @_;
1473
1474 return unless ($self->{url});
1475 croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1476
1477 my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1478 $reqbody .= '&credit=' . $credit if ($credit > 0);
1479
1480 $self->shuttle_url( $self->{url} . '/_set_link',
1481 'text/plain',
1482 $reqbody,
1483 undef
1484 ) == 200;
1485 }
1486
1487
1488 =head1 PRIVATE METHODS
1489
1490 You could call those directly, but you don't have to. I hope.
1491
1492 =head2 _set_info
1493
1494 Set information for node
1495
1496 $node->_set_info;
1497
1498 =cut
1499
1500 sub _set_info {
1501 my $self = shift;
1502
1503 $self->{status} = -1;
1504 return unless ($self->{url});
1505
1506 my $resbody;
1507 my $rv = $self->shuttle_url( $self->{url} . '/inform',
1508 'text/plain',
1509 undef,
1510 \$resbody,
1511 );
1512
1513 return if ($rv != 200 || !$resbody);
1514
1515 # it seems that response can have multiple line endings
1516 $resbody =~ s/[\r\n]+$//;
1517
1518 ( $self->{name}, $self->{label}, $self->{dnum}, $self->{wnum}, $self->{size} ) =
1519 split(/\t/, $resbody, 5);
1520
1521 }
1522
1523 ###
1524
1525 =head1 EXPORT
1526
1527 Nothing.
1528
1529 =head1 SEE ALSO
1530
1531 L<http://hyperestraier.sourceforge.net/>
1532
1533 Hyper Estraier Ruby interface on which this module is based.
1534
1535 =head1 AUTHOR
1536
1537 Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
1538
1539
1540 =head1 COPYRIGHT AND LICENSE
1541
1542 Copyright (C) 2005-2006 by Dobrica Pavlinusic
1543
1544 This library is free software; you can redistribute it and/or modify
1545 it under the GPL v2 or later.
1546
1547 =cut
1548
1549 1;

  ViewVC Help
Powered by ViewVC 1.1.26