/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 62 - (show annotations)
Sat Jan 7 02:40:57 2006 UTC (18 years, 2 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 27218 byte(s)
requre just uri for ResultDocument, all other parametars are optional
1 package Search::Estraier;
2
3 use 5.008;
4 use strict;
5 use warnings;
6
7 our $VERSION = '0.01';
8
9 =head1 NAME
10
11 Search::Estraier - pure perl module to use Hyper Estraier search engine
12
13 =head1 SYNOPSIS
14
15 use Search::Estraier;
16 my $est = new Search::Estraier();
17
18 =head1 DESCRIPTION
19
20 This module is implementation of node API of Hyper Estraier. Since it's
21 perl-only module with dependencies only on standard perl modules, it will
22 run on all platforms on which perl runs. It doesn't require compilation
23 or Hyper Estraier development files on target machine.
24
25 It is implemented as multiple packages which closly resamble Ruby
26 implementation. It also includes methods to manage nodes.
27
28 =cut
29
30 =head1 Inheritable common methods
31
32 This methods should really move somewhere else.
33
34 =head2 _s
35
36 Remove multiple whitespaces from string, as well as whitespaces at beginning or end
37
38 my $text = $self->_s(" this is a text ");
39 $text = 'this is a text';
40
41 =cut
42
43 sub _s {
44 my $text = $_[1] || return;
45 $text =~ s/\s\s+/ /gs;
46 $text =~ s/^\s+//;
47 $text =~ s/\s+$//;
48 return $text;
49 }
50
51 package Search::Estraier::Document;
52
53 use Carp qw/croak confess/;
54
55 use Search::Estraier;
56 our @ISA = qw/Search::Estraier/;
57
58 =head1 Search::Estraier::Document
59
60 This class implements Document which is collection of attributes
61 (key=value), vectors (also key value) display text and hidden text.
62
63
64 =head2 new
65
66 Create new document, empty or from draft.
67
68 my $doc = new Search::HyperEstraier::Document;
69 my $doc2 = new Search::HyperEstraier::Document( $draft );
70
71 =cut
72
73 sub new {
74 my $class = shift;
75 my $self = {};
76 bless($self, $class);
77
78 $self->{id} = -1;
79
80 my $draft = shift;
81
82 if ($draft) {
83 my $in_text = 0;
84 foreach my $line (split(/\n/, $draft)) {
85
86 if ($in_text) {
87 if ($line =~ /^\t/) {
88 push @{ $self->{htexts} }, substr($line, 1);
89 } else {
90 push @{ $self->{dtexts} }, $line;
91 }
92 next;
93 }
94
95 if ($line =~ m/^%VECTOR\t(.+)$/) {
96 my @fields = split(/\t/, $1);
97 for my $i ( 0 .. ($#fields - 1) ) {
98 $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
99 $i++;
100 }
101 next;
102 } elsif ($line =~ m/^%/) {
103 # What is this? comment?
104 #warn "$line\n";
105 next;
106 } elsif ($line =~ m/^$/) {
107 $in_text = 1;
108 next;
109 } elsif ($line =~ m/^(.+)=(.+)$/) {
110 $self->{attrs}->{ $1 } = $2;
111 next;
112 }
113
114 warn "draft ignored: $line\n";
115 }
116 }
117
118 $self ? return $self : return undef;
119 }
120
121
122 =head2 add_attr
123
124 Add an attribute.
125
126 $doc->add_attr( name => 'value' );
127
128 Delete attribute using
129
130 $doc->add_attr( name => undef );
131
132 =cut
133
134 sub add_attr {
135 my $self = shift;
136 my $attrs = {@_};
137
138 while (my ($name, $value) = each %{ $attrs }) {
139 if (! defined($value)) {
140 delete( $self->{attrs}->{ $self->_s($name) } );
141 } else {
142 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
143 }
144 }
145
146 return 1;
147 }
148
149
150 =head2 add_text
151
152 Add a sentence of text.
153
154 $doc->add_text('this is example text to display');
155
156 =cut
157
158 sub add_text {
159 my $self = shift;
160 my $text = shift;
161 return unless defined($text);
162
163 push @{ $self->{dtexts} }, $self->_s($text);
164 }
165
166
167 =head2 add_hidden_text
168
169 Add a hidden sentence.
170
171 $doc->add_hidden_text('this is example text just for search');
172
173 =cut
174
175 sub add_hidden_text {
176 my $self = shift;
177 my $text = shift;
178 return unless defined($text);
179
180 push @{ $self->{htexts} }, $self->_s($text);
181 }
182
183
184 =head2 id
185
186 Get the ID number of document. If the object has never been registred, C<-1> is returned.
187
188 print $doc->id;
189
190 =cut
191
192 sub id {
193 my $self = shift;
194 return $self->{id};
195 }
196
197
198 =head2 attr_names
199
200 Returns array with attribute names from document object.
201
202 my @attrs = $doc->attr_names;
203
204 =cut
205
206 sub attr_names {
207 my $self = shift;
208 return unless ($self->{attrs});
209 #croak "attr_names return array, not scalar" if (! wantarray);
210 return sort keys %{ $self->{attrs} };
211 }
212
213
214 =head2 attr
215
216 Returns value of an attribute.
217
218 my $value = $doc->attr( 'attribute' );
219
220 =cut
221
222 sub attr {
223 my $self = shift;
224 my $name = shift;
225 return unless (defined($name) && $self->{attrs});
226 return $self->{attrs}->{ $name };
227 }
228
229
230 =head2 texts
231
232 Returns array with text sentences.
233
234 my @texts = $doc->texts;
235
236 =cut
237
238 sub texts {
239 my $self = shift;
240 #confess "texts return array, not scalar" if (! wantarray);
241 return @{ $self->{dtexts} } if ($self->{dtexts});
242 }
243
244
245 =head2 cat_texts
246
247 Return whole text as single scalar.
248
249 my $text = $doc->cat_texts;
250
251 =cut
252
253 sub cat_texts {
254 my $self = shift;
255 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
256 }
257
258
259 =head2 dump_draft
260
261 Dump draft data from document object.
262
263 print $doc->dump_draft;
264
265 =cut
266
267 sub dump_draft {
268 my $self = shift;
269 my $draft;
270
271 foreach my $attr_name (sort keys %{ $self->{attrs} }) {
272 $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";
273 }
274
275 if ($self->{kwords}) {
276 $draft .= '%%VECTOR';
277 while (my ($key, $value) = each %{ $self->{kwords} }) {
278 $draft .= "\t$key\t$value";
279 }
280 $draft .= "\n";
281 }
282
283 $draft .= "\n";
284
285 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
286 $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
287
288 return $draft;
289 }
290
291
292 =head2 delete
293
294 Empty document object
295
296 $doc->delete;
297
298 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
299 convinience. Document objects which go out of scope will be destroyed
300 automatically.
301
302 =cut
303
304 sub delete {
305 my $self = shift;
306
307 foreach my $data (qw/attrs dtexts stexts kwords/) {
308 delete($self->{$data});
309 }
310
311 $self->{id} = -1;
312
313 return 1;
314 }
315
316
317
318 package Search::Estraier::Condition;
319
320 use Carp qw/confess croak/;
321
322 use Search::Estraier;
323 our @ISA = qw/Search::Estraier/;
324
325 =head1 Search::Estraier::Condition
326
327 =head2 new
328
329 my $cond = new Search::HyperEstraier::Condition;
330
331 =cut
332
333 sub new {
334 my $class = shift;
335 my $self = {};
336 bless($self, $class);
337
338 $self->{max} = -1;
339 $self->{options} = 0;
340
341 $self ? return $self : return undef;
342 }
343
344
345 =head2 set_phrase
346
347 $cond->set_phrase('search phrase');
348
349 =cut
350
351 sub set_phrase {
352 my $self = shift;
353 $self->{phrase} = $self->_s( shift );
354 }
355
356
357 =head2 add_attr
358
359 $cond->add_attr('@URI STRINC /~dpavlin/');
360
361 =cut
362
363 sub add_attr {
364 my $self = shift;
365 my $attr = shift || return;
366 push @{ $self->{attrs} }, $self->_s( $attr );
367 }
368
369
370 =head2 set_order
371
372 $cond->set_order('@mdate NUMD');
373
374 =cut
375
376 sub set_order {
377 my $self = shift;
378 $self->{order} = shift;
379 }
380
381
382 =head2 set_max
383
384 $cond->set_max(42);
385
386 =cut
387
388 sub set_max {
389 my $self = shift;
390 my $max = shift;
391 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
392 $self->{max} = $max;
393 }
394
395
396 =head2 set_options
397
398 $cond->set_options( SURE => 1 );
399
400 =cut
401
402 my $options = {
403 # check N-gram keys skipping by three
404 SURE => 1 << 0,
405 # check N-gram keys skipping by two
406 USUAL => 1 << 1,
407 # without TF-IDF tuning
408 FAST => 1 << 2,
409 # with the simplified phrase
410 AGITO => 1 << 3,
411 # check every N-gram key
412 NOIDF => 1 << 4,
413 # check N-gram keys skipping by one
414 SIMPLE => 1 << 10,
415 };
416
417 sub set_options {
418 my $self = shift;
419 my $option = shift;
420 confess "unknown option" unless ($options->{$option});
421 $self->{options} ||= $options->{$option};
422 }
423
424
425 =head2 phrase
426
427 Return search phrase.
428
429 print $cond->phrase;
430
431 =cut
432
433 sub phrase {
434 my $self = shift;
435 return $self->{phrase};
436 }
437
438
439 =head2 order
440
441 Return search result order.
442
443 print $cond->order;
444
445 =cut
446
447 sub order {
448 my $self = shift;
449 return $self->{order};
450 }
451
452
453 =head2 attrs
454
455 Return search result attrs.
456
457 my @cond_attrs = $cond->attrs;
458
459 =cut
460
461 sub attrs {
462 my $self = shift;
463 #croak "attrs return array, not scalar" if (! wantarray);
464 return @{ $self->{attrs} } if ($self->{attrs});
465 }
466
467
468 =head2 max
469
470 Return maximum number of results.
471
472 print $cond->max;
473
474 C<-1> is returned for unitialized value, C<0> is unlimited.
475
476 =cut
477
478 sub max {
479 my $self = shift;
480 return $self->{max};
481 }
482
483
484 =head2 options
485
486 Return options for this condition.
487
488 print $cond->options;
489
490 Options are returned in numerical form.
491
492 =cut
493
494 sub options {
495 my $self = shift;
496 return $self->{options};
497 }
498
499
500 package Search::Estraier::ResultDocument;
501
502 use Carp qw/croak/;
503
504 #use Search::Estraier;
505 #our @ISA = qw/Search::Estraier/;
506
507 =head1 Search::Estraier::ResultDocument
508
509 =head2 new
510
511 my $rdoc = new Search::HyperEstraier::ResultDocument(
512 uri => 'http://localhost/document/uri/42',
513 attrs => {
514 foo => 1,
515 bar => 2,
516 },
517 snippet => 'this is a text of snippet'
518 keywords => 'this\tare\tkeywords'
519 );
520
521 =cut
522
523 sub new {
524 my $class = shift;
525 my $self = {@_};
526 bless($self, $class);
527
528 croak "missing uri for ResultDocument" unless defined($self->{uri});
529
530 $self ? return $self : return undef;
531 }
532
533
534 =head2 uri
535
536 Return URI of result document
537
538 print $rdoc->uri;
539
540 =cut
541
542 sub uri {
543 my $self = shift;
544 return $self->{uri};
545 }
546
547
548 =head2 attr_names
549
550 Returns array with attribute names from result document object.
551
552 my @attrs = $rdoc->attr_names;
553
554 =cut
555
556 sub attr_names {
557 my $self = shift;
558 croak "attr_names return array, not scalar" if (! wantarray);
559 return sort keys %{ $self->{attrs} };
560 }
561
562
563 =head2 attr
564
565 Returns value of an attribute.
566
567 my $value = $rdoc->attr( 'attribute' );
568
569 =cut
570
571 sub attr {
572 my $self = shift;
573 my $name = shift || return;
574 return $self->{attrs}->{ $name };
575 }
576
577
578 =head2 snippet
579
580 Return snippet from result document
581
582 print $rdoc->snippet;
583
584 =cut
585
586 sub snippet {
587 my $self = shift;
588 return $self->{snippet};
589 }
590
591
592 =head2 keywords
593
594 Return keywords from result document
595
596 print $rdoc->keywords;
597
598 =cut
599
600 sub keywords {
601 my $self = shift;
602 return $self->{keywords};
603 }
604
605
606 package Search::Estraier::NodeResult;
607
608 use Carp qw/croak/;
609
610 #use Search::Estraier;
611 #our @ISA = qw/Search::Estraier/;
612
613 =head1 Search::Estraier::NodeResult
614
615 =head2 new
616
617 my $res = new Search::HyperEstraier::NodeResult(
618 docs => @array_of_rdocs,
619 hits => %hash_with_hints,
620 );
621
622 =cut
623
624 sub new {
625 my $class = shift;
626 my $self = {@_};
627 bless($self, $class);
628
629 foreach my $f (qw/docs hints/) {
630 croak "missing $f for ResultDocument" unless defined($self->{$f});
631 }
632
633 $self ? return $self : return undef;
634 }
635
636
637 =head2 doc_num
638
639 Return number of documents
640
641 print $res->doc_num;
642
643 =cut
644
645 sub doc_num {
646 my $self = shift;
647 return $#{$self->{docs}} + 1;
648 }
649
650
651 =head2 get_doc
652
653 Return single document
654
655 my $doc = $res->get_doc( 42 );
656
657 Returns undef if document doesn't exist.
658
659 =cut
660
661 sub get_doc {
662 my $self = shift;
663 my $num = shift;
664 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
665 return undef if ($num < 0 || $num > $self->{docs});
666 return $self->{docs}->[$num];
667 }
668
669
670 =head2 hint
671
672 Return specific hint from results.
673
674 print $rec->hint( 'VERSION' );
675
676 Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
677 C<TIME>, C<LINK#n>, C<VIEW>.
678
679 =cut
680
681 sub hint {
682 my $self = shift;
683 my $key = shift || return;
684 return $self->{hints}->{$key};
685 }
686
687
688 package Search::Estraier::Node;
689
690 use Carp qw/carp croak confess/;
691 use URI;
692 use MIME::Base64;
693 use IO::Socket::INET;
694 use URI::Escape qw/uri_escape/;
695
696 =head1 Search::Estraier::Node
697
698 =head2 new
699
700 my $node = new Search::HyperEstraier::Node;
701
702 =cut
703
704 sub new {
705 my $class = shift;
706 my $self = {
707 pxport => -1,
708 timeout => 0, # this used to be -1
709 dnum => -1,
710 wnum => -1,
711 size => -1.0,
712 wwidth => 480,
713 hwidth => 96,
714 awidth => 96,
715 status => -1,
716 };
717 bless($self, $class);
718
719 my $args = {@_};
720
721 $self->{debug} = $args->{debug};
722 warn "## Node debug on\n" if ($self->{debug});
723
724 $self ? return $self : return undef;
725 }
726
727
728 =head2 set_url
729
730 Specify URL to node server
731
732 $node->set_url('http://localhost:1978');
733
734 =cut
735
736 sub set_url {
737 my $self = shift;
738 $self->{url} = shift;
739 }
740
741
742 =head2 set_proxy
743
744 Specify proxy server to connect to node server
745
746 $node->set_proxy('proxy.example.com', 8080);
747
748 =cut
749
750 sub set_proxy {
751 my $self = shift;
752 my ($host,$port) = @_;
753 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
754 $self->{pxhost} = $host;
755 $self->{pxport} = $port;
756 }
757
758
759 =head2 set_timeout
760
761 Specify timeout of connection in seconds
762
763 $node->set_timeout( 15 );
764
765 =cut
766
767 sub set_timeout {
768 my $self = shift;
769 my $sec = shift;
770 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
771 $self->{timeout} = $sec;
772 }
773
774
775 =head2 set_auth
776
777 Specify name and password for authentication to node server.
778
779 $node->set_auth('clint','eastwood');
780
781 =cut
782
783 sub set_auth {
784 my $self = shift;
785 my ($login,$passwd) = @_;
786 my $basic_auth = encode_base64( "$login:$passwd" );
787 chomp($basic_auth);
788 $self->{auth} = $basic_auth;
789 }
790
791
792 =head2 status
793
794 Return status code of last request.
795
796 print $node->status;
797
798 C<-1> means connection failure.
799
800 =cut
801
802 sub status {
803 my $self = shift;
804 return $self->{status};
805 }
806
807
808 =head2 put_doc
809
810 Add a document
811
812 $node->put_doc( $document_draft ) or die "can't add document";
813
814 Return true on success or false on failture.
815
816 =cut
817
818 sub put_doc {
819 my $self = shift;
820 my $doc = shift || return;
821 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
822 $self->shuttle_url( $self->{url} . '/put_doc',
823 'text/x-estraier-draft',
824 $doc->dump_draft,
825 undef
826 ) == 200;
827 }
828
829
830 =head2 out_doc
831
832 Remove a document
833
834 $node->out_doc( document_id ) or "can't remove document";
835
836 Return true on success or false on failture.
837
838 =cut
839
840 sub out_doc {
841 my $self = shift;
842 my $id = shift || return;
843 return unless ($self->{url});
844 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
845 $self->shuttle_url( $self->{url} . '/out_doc',
846 'application/x-www-form-urlencoded',
847 "id=$id",
848 undef
849 ) == 200;
850 }
851
852
853 =head2 out_doc_by_uri
854
855 Remove a registrated document using it's uri
856
857 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
858
859 Return true on success or false on failture.
860
861 =cut
862
863 sub out_doc_by_uri {
864 my $self = shift;
865 my $uri = shift || return;
866 return unless ($self->{url});
867 $self->shuttle_url( $self->{url} . '/out_doc',
868 'application/x-www-form-urlencoded',
869 "uri=" . uri_escape($uri),
870 undef
871 ) == 200;
872 }
873
874
875 =head2 edit_doc
876
877 Edit attributes of a document
878
879 $node->edit_doc( $document_draft ) or die "can't edit document";
880
881 Return true on success or false on failture.
882
883 =cut
884
885 sub edit_doc {
886 my $self = shift;
887 my $doc = shift || return;
888 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
889 $self->shuttle_url( $self->{url} . '/edit_doc',
890 'text/x-estraier-draft',
891 $doc->dump_draft,
892 undef
893 ) == 200;
894 }
895
896
897 =head2 get_doc
898
899 Retreive document
900
901 my $doc = $node->get_doc( document_id ) or die "can't get document";
902
903 Return true on success or false on failture.
904
905 =cut
906
907 sub get_doc {
908 my $self = shift;
909 my $id = shift || return;
910 return $self->_fetch_doc( id => $id );
911 }
912
913
914 =head2 get_doc_by_uri
915
916 Retreive document
917
918 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
919
920 Return true on success or false on failture.
921
922 =cut
923
924 sub get_doc_by_uri {
925 my $self = shift;
926 my $uri = shift || return;
927 return $self->_fetch_doc( uri => $uri );
928 }
929
930
931 =head2 get_doc_attr
932
933 Retrieve the value of an atribute from object
934
935 my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
936 die "can't get document attribute";
937
938 =cut
939
940 sub get_doc_attr {
941 my $self = shift;
942 my ($id,$name) = @_;
943 return unless ($id && $name);
944 return $self->_fetch_doc( id => $id, attr => $name );
945 }
946
947
948 =head2 get_doc_attr_by_uri
949
950 Retrieve the value of an atribute from object
951
952 my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
953 die "can't get document attribute";
954
955 =cut
956
957 sub get_doc_attr_by_uri {
958 my $self = shift;
959 my ($uri,$name) = @_;
960 return unless ($uri && $name);
961 return $self->_fetch_doc( uri => $uri, attr => $name );
962 }
963
964
965 =head2 etch_doc
966
967 Exctract document keywords
968
969 my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
970
971 =cut
972
973 sub etch_doc {
974 my $self = shift;
975 my $id = shift || return;
976 return $self->_fetch_doc( id => $id, etch => 1 );
977 }
978
979 =head2 etch_doc_by_uri
980
981 Retreive document
982
983 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
984
985 Return true on success or false on failture.
986
987 =cut
988
989 sub etch_doc_by_uri {
990 my $self = shift;
991 my $uri = shift || return;
992 return $self->_fetch_doc( uri => $uri, etch => 1 );
993 }
994
995
996 =head2 uri_to_id
997
998 Get ID of document specified by URI
999
1000 my $id = $node->uri_to_id( 'file:///document/uri/42' );
1001
1002 =cut
1003
1004 sub uri_to_id {
1005 my $self = shift;
1006 my $uri = shift || return;
1007 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1 );
1008 }
1009
1010
1011 =head2 _fetch_doc
1012
1013 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1014 C<etch_doc>, C<etch_doc_by_uri>.
1015
1016 # this will decode received draft into Search::Estraier::Document object
1017 my $doc = $node->_fetch_doc( id => 42 );
1018 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1019
1020 # to extract keywords, add etch
1021 my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1022 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1023
1024 # to get document attrubute add attr
1025 my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1026 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1027
1028 # more general form which allows implementation of
1029 # uri_to_id
1030 my $id = $node->_fetch_doc(
1031 uri => 'file:///document/uri/42',
1032 path => '/uri_to_id',
1033 chomp_resbody => 1
1034 );
1035
1036 =cut
1037
1038 sub _fetch_doc {
1039 my $self = shift;
1040 my $a = {@_};
1041 return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1042
1043 my ($arg, $resbody);
1044
1045 my $path = $a->{path} || '/get_doc';
1046 $path = '/etch_doc' if ($a->{etch});
1047
1048 if ($a->{id}) {
1049 croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1050 $arg = 'id=' . $a->{id};
1051 } elsif ($a->{uri}) {
1052 $arg = 'uri=' . uri_escape($a->{uri});
1053 } else {
1054 confess "unhandled argument. Need id or uri.";
1055 }
1056
1057 if ($a->{attr}) {
1058 $path = '/get_doc_attr';
1059 $arg .= '&attr=' . uri_escape($a->{attr});
1060 $a->{chomp_resbody} = 1;
1061 }
1062
1063 my $rv = $self->shuttle_url( $self->{url} . $path,
1064 'application/x-www-form-urlencoded',
1065 $arg,
1066 \$resbody,
1067 );
1068
1069 return if ($rv != 200);
1070
1071 if ($a->{etch}) {
1072 $self->{kwords} = {};
1073 return +{} unless ($resbody);
1074 foreach my $l (split(/\n/, $resbody)) {
1075 my ($k,$v) = split(/\t/, $l, 2);
1076 $self->{kwords}->{$k} = $v if ($v);
1077 }
1078 return $self->{kwords};
1079 } elsif ($a->{chomp_resbody}) {
1080 return unless (defined($resbody));
1081 chomp($resbody);
1082 return $resbody;
1083 } else {
1084 return new Search::Estraier::Document($resbody);
1085 }
1086 }
1087
1088
1089 =head2 name
1090
1091 my $node_name = $node->name;
1092
1093 =cut
1094
1095 sub name {
1096 my $self = shift;
1097 $self->_set_info unless ($self->{name});
1098 return $self->{name};
1099 }
1100
1101
1102 =head2 label
1103
1104 my $node_label = $node->label;
1105
1106 =cut
1107
1108 sub label {
1109 my $self = shift;
1110 $self->_set_info unless ($self->{label});
1111 return $self->{label};
1112 }
1113
1114
1115 =head2 doc_num
1116
1117 my $documents_in_node = $node->doc_num;
1118
1119 =cut
1120
1121 sub doc_num {
1122 my $self = shift;
1123 $self->_set_info if ($self->{dnum} < 0);
1124 return $self->{dnum};
1125 }
1126
1127
1128 =head2 word_num
1129
1130 my $words_in_node = $node->word_num;
1131
1132 =cut
1133
1134 sub word_num {
1135 my $self = shift;
1136 $self->_set_info if ($self->{wnum} < 0);
1137 return $self->{wnum};
1138 }
1139
1140
1141 =head2 size
1142
1143 my $node_size = $node->size;
1144
1145 =cut
1146
1147 sub size {
1148 my $self = shift;
1149 $self->_set_info if ($self->{size} < 0);
1150 return $self->{size};
1151 }
1152
1153
1154 =head2 search
1155
1156 Search documents which match condition
1157
1158 my $nres = $node->search( $cond, $depth );
1159
1160 C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1161 depth for meta search.
1162
1163 Function results C<Search::Estraier::NodeResult> object.
1164
1165 =cut
1166
1167 sub search {
1168 my $self = shift;
1169 my ($cond, $depth) = @_;
1170 return unless ($cond && defined($depth) && $self->{url});
1171 croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1172 croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1173
1174 my $resbody;
1175
1176 my $rv = $self->shuttle_url( $self->{url} . '/search',
1177 'application/x-www-form-urlencoded',
1178 $self->cond_to_query( $cond, $depth ),
1179 \$resbody,
1180 );
1181 return if ($rv != 200);
1182
1183 my (@docs, $hints);
1184
1185 my @lines = split(/\n/, $resbody);
1186 return unless (@lines);
1187
1188 my $border = $lines[0];
1189 my $isend = 0;
1190 my $lnum = 1;
1191
1192 while ( $lnum <= $#lines ) {
1193 my $line = $lines[$lnum];
1194 $lnum++;
1195
1196 #warn "## $line\n";
1197 if ($line && $line =~ m/^\Q$border\E(:END)*$/) {
1198 $isend = $1;
1199 last;
1200 }
1201
1202 if ($line =~ /\t/) {
1203 my ($k,$v) = split(/\t/, $line, 2);
1204 $hints->{$k} = $v;
1205 }
1206 }
1207
1208 my $snum = $lnum;
1209
1210 while( ! $isend && $lnum <= $#lines ) {
1211 my $line = $lines[$lnum];
1212 #warn "# $lnum: $line\n";
1213 $lnum++;
1214
1215 if ($line && $line =~ m/^\Q$border\E/) {
1216 if ($lnum > $snum) {
1217 my $rdattrs;
1218 my $rdvector;
1219 my $rdsnippet;
1220
1221 my $rlnum = $snum;
1222 while ($rlnum < $lnum - 1 ) {
1223 #my $rdline = $self->_s($lines[$rlnum]);
1224 my $rdline = $lines[$rlnum];
1225 $rlnum++;
1226 last unless ($rdline);
1227 if ($rdline =~ /^%/) {
1228 $rdvector = $1 if ($rdline =~ /^%VECTOR\t(.+)$/);
1229 } elsif($rdline =~ /=/) {
1230 $rdattrs->{$1} = $2 if ($rdline =~ /^(.+)=(.+)$/);
1231 } else {
1232 confess "invalid format of response";
1233 }
1234 }
1235 while($rlnum < $lnum - 1) {
1236 my $rdline = $lines[$rlnum];
1237 $rlnum++;
1238 $rdsnippet .= "$rdline\n";
1239 }
1240 #warn Dumper($rdvector, $rdattrs, $rdsnippet);
1241 if (my $rduri = $rdattrs->{'@uri'}) {
1242 push @docs, new Search::Estraier::ResultDocument(
1243 uri => $rduri,
1244 attrs => $rdattrs,
1245 snippet => $rdsnippet,
1246 keywords => $rdvector,
1247 );
1248 }
1249 }
1250 $snum = $lnum;
1251 #warn "### $line\n";
1252 $isend = 1 if ($line =~ /:END$/);
1253 }
1254
1255 }
1256
1257 if (! $isend) {
1258 warn "received result doesn't have :END\n$resbody";
1259 return;
1260 }
1261
1262 #warn Dumper(\@docs, $hints);
1263
1264 return new Search::Estraier::NodeResult( docs => \@docs, hints => $hints );
1265 }
1266
1267
1268 =head2 cond_to_query
1269
1270 Return URI encoded string generated from Search::Estraier::Condition
1271
1272 my $args = $node->cond_to_query( $cond, $depth );
1273
1274 =cut
1275
1276 sub cond_to_query {
1277 my $self = shift;
1278
1279 my $cond = shift || return;
1280 croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1281 my $depth = shift;
1282
1283 my @args;
1284
1285 if (my $phrase = $cond->phrase) {
1286 push @args, 'phrase=' . uri_escape($phrase);
1287 }
1288
1289 if (my @attrs = $cond->attrs) {
1290 for my $i ( 0 .. $#attrs ) {
1291 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] );
1292 }
1293 }
1294
1295 if (my $order = $cond->order) {
1296 push @args, 'order=' . uri_escape($order);
1297 }
1298
1299 if (my $max = $cond->max) {
1300 push @args, 'max=' . $max;
1301 } else {
1302 push @args, 'max=' . (1 << 30);
1303 }
1304
1305 if (my $options = $cond->options) {
1306 push @args, 'options=' . $options;
1307 }
1308
1309 push @args, 'depth=' . $depth if ($depth);
1310 push @args, 'wwidth=' . $self->{wwidth};
1311 push @args, 'hwidth=' . $self->{hwidth};
1312 push @args, 'awidth=' . $self->{awidth};
1313
1314 return join('&', @args);
1315 }
1316
1317
1318 =head2 shuttle_url
1319
1320 This is method which uses C<IO::Socket::INET> to communicate with Hyper Estraier node
1321 master.
1322
1323 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1324
1325 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1326 body will be saved within object.
1327
1328 =cut
1329
1330 use LWP::UserAgent;
1331
1332 sub shuttle_url {
1333 my $self = shift;
1334
1335 my ($url, $content_type, $reqbody, $resbody) = @_;
1336
1337 $self->{status} = -1;
1338
1339 warn "## $url\n" if ($self->{debug});
1340
1341 $url = new URI($url);
1342 if (
1343 !$url || !$url->scheme || !$url->scheme eq 'http' ||
1344 !$url->host || !$url->port || $url->port < 1
1345 ) {
1346 carp "can't parse $url\n";
1347 return -1;
1348 }
1349
1350 my $ua = LWP::UserAgent->new;
1351 $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1352
1353 my $req;
1354 if ($reqbody) {
1355 $req = HTTP::Request->new(POST => $url);
1356 } else {
1357 $req = HTTP::Request->new(GET => $url);
1358 }
1359
1360 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1361 $req->headers->header( 'Connection', 'close' );
1362 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} );
1363 $req->content_type( $content_type );
1364
1365 warn $req->headers->as_string,"\n" if ($self->{debug});
1366
1367 if ($reqbody) {
1368 warn "$reqbody\n" if ($self->{debug});
1369 $req->content( $reqbody );
1370 }
1371
1372 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1373
1374 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1375
1376 return -1 if (! $res->is_success);
1377
1378 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1379
1380 $$resbody .= $res->content;
1381
1382 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1383
1384 return $self->{status};
1385 }
1386
1387
1388 =head2 set_snippet_width
1389
1390 Set width of snippets in results
1391
1392 $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1393
1394 C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1395 is not sent with results. If it is negative, whole document text is sent instead of snippet.
1396
1397 C<$hwidth> specified width of strings from beginning of string. Default
1398 value is C<96>. Negative or zero value keep previous value.
1399
1400 C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1401 If negative of zero value is provided previous value is kept unchanged.
1402
1403 =cut
1404
1405 sub set_snippet_width {
1406 my $self = shift;
1407
1408 my ($wwidth, $hwidth, $awidth) = @_;
1409 $self->{wwidth} = $wwidth;
1410 $self->{hwidth} = $hwidth if ($hwidth >= 0);
1411 $self->{awidth} = $awidth if ($awidth >= 0);
1412 }
1413
1414
1415 =head2 set_user
1416
1417 Manage users of node
1418
1419 $node->set_user( 'name', $mode );
1420
1421 C<$mode> can be one of:
1422
1423 =over 4
1424
1425 =item 0
1426
1427 delete account
1428
1429 =item 1
1430
1431 set administrative right for user
1432
1433 =item 2
1434
1435 set user account as guest
1436
1437 =back
1438
1439 Return true on success, otherwise false.
1440
1441 =cut
1442
1443 sub set_user {
1444 my $self = shift;
1445 my ($name, $mode) = @_;
1446
1447 return unless ($self->{url});
1448 croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1449
1450 $self->shuttle_url( $self->{url} . '/_set_user',
1451 'text/plain',
1452 'name=' . uri_escape($name) . '&mode=' . $mode,
1453 undef
1454 ) == 200;
1455 }
1456
1457
1458 =head2 set_link
1459
1460 Manage node links
1461
1462 $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1463
1464 If C<$credit> is negative, link is removed.
1465
1466 =cut
1467
1468 sub set_link {
1469 my $self = shift;
1470 my ($url, $label, $credit) = @_;
1471
1472 return unless ($self->{url});
1473 croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1474
1475 my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1476 $reqbody .= '&credit=' . $credit if ($credit > 0);
1477
1478 $self->shuttle_url( $self->{url} . '/_set_link',
1479 'text/plain',
1480 $reqbody,
1481 undef
1482 ) == 200;
1483 }
1484
1485
1486 =head1 PRIVATE METHODS
1487
1488 You could call those directly, but you don't have to. I hope.
1489
1490 =head2 _set_info
1491
1492 Set information for node
1493
1494 $node->_set_info;
1495
1496 =cut
1497
1498 sub _set_info {
1499 my $self = shift;
1500
1501 $self->{status} = -1;
1502 return unless ($self->{url});
1503
1504 my $resbody;
1505 my $rv = $self->shuttle_url( $self->{url} . '/inform',
1506 'text/plain',
1507 undef,
1508 \$resbody,
1509 );
1510
1511 return if ($rv != 200 || !$resbody);
1512
1513 # it seems that response can have multiple line endings
1514 $resbody =~ s/[\r\n]+$//;
1515
1516 ( $self->{name}, $self->{label}, $self->{dnum}, $self->{wnum}, $self->{size} ) =
1517 split(/\t/, $resbody, 5);
1518
1519 }
1520
1521 ###
1522
1523 =head1 EXPORT
1524
1525 Nothing.
1526
1527 =head1 SEE ALSO
1528
1529 L<http://hyperestraier.sourceforge.net/>
1530
1531 Hyper Estraier Ruby interface on which this module is based.
1532
1533 =head1 AUTHOR
1534
1535 Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
1536
1537
1538 =head1 COPYRIGHT AND LICENSE
1539
1540 Copyright (C) 2005-2006 by Dobrica Pavlinusic
1541
1542 This library is free software; you can redistribute it and/or modify
1543 it under the GPL v2 or later.
1544
1545 =cut
1546
1547 1;

  ViewVC Help
Powered by ViewVC 1.1.26