/[Search-Estraier]/trunk/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 65 - (show annotations)
Sat Jan 7 23:48:16 2006 UTC (18 years, 2 months ago) by dpavlin
File size: 27415 byte(s)
add optional node parametar to new Search::Estraier::Node
1 package Search::Estraier;
2
3 use 5.008;
4 use strict;
5 use warnings;
6
7 our $VERSION = '0.02';
8
9 =head1 NAME
10
11 Search::Estraier - pure perl module to use Hyper Estraier search engine
12
13 =head1 SYNOPSIS
14
15 use Search::Estraier;
16 my $est = new Search::Estraier();
17
18 =head1 DESCRIPTION
19
20 This module is implementation of node API of Hyper Estraier. Since it's
21 perl-only module with dependencies only on standard perl modules, it will
22 run on all platforms on which perl runs. It doesn't require compilation
23 or Hyper Estraier development files on target machine.
24
25 It is implemented as multiple packages which closly resamble Ruby
26 implementation. It also includes methods to manage nodes.
27
28 =cut
29
30 =head1 Inheritable common methods
31
32 This methods should really move somewhere else.
33
34 =head2 _s
35
36 Remove multiple whitespaces from string, as well as whitespaces at beginning or end
37
38 my $text = $self->_s(" this is a text ");
39 $text = 'this is a text';
40
41 =cut
42
43 sub _s {
44 my $text = $_[1] || return;
45 $text =~ s/\s\s+/ /gs;
46 $text =~ s/^\s+//;
47 $text =~ s/\s+$//;
48 return $text;
49 }
50
51 package Search::Estraier::Document;
52
53 use Carp qw/croak confess/;
54
55 use Search::Estraier;
56 our @ISA = qw/Search::Estraier/;
57
58 =head1 Search::Estraier::Document
59
60 This class implements Document which is collection of attributes
61 (key=value), vectors (also key value) display text and hidden text.
62
63
64 =head2 new
65
66 Create new document, empty or from draft.
67
68 my $doc = new Search::HyperEstraier::Document;
69 my $doc2 = new Search::HyperEstraier::Document( $draft );
70
71 =cut
72
73 sub new {
74 my $class = shift;
75 my $self = {};
76 bless($self, $class);
77
78 $self->{id} = -1;
79
80 my $draft = shift;
81
82 if ($draft) {
83 my $in_text = 0;
84 foreach my $line (split(/\n/, $draft)) {
85
86 if ($in_text) {
87 if ($line =~ /^\t/) {
88 push @{ $self->{htexts} }, substr($line, 1);
89 } else {
90 push @{ $self->{dtexts} }, $line;
91 }
92 next;
93 }
94
95 if ($line =~ m/^%VECTOR\t(.+)$/) {
96 my @fields = split(/\t/, $1);
97 for my $i ( 0 .. ($#fields - 1) ) {
98 $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
99 $i++;
100 }
101 next;
102 } elsif ($line =~ m/^%/) {
103 # What is this? comment?
104 #warn "$line\n";
105 next;
106 } elsif ($line =~ m/^$/) {
107 $in_text = 1;
108 next;
109 } elsif ($line =~ m/^(.+)=(.+)$/) {
110 $self->{attrs}->{ $1 } = $2;
111 next;
112 }
113
114 warn "draft ignored: $line\n";
115 }
116 }
117
118 $self ? return $self : return undef;
119 }
120
121
122 =head2 add_attr
123
124 Add an attribute.
125
126 $doc->add_attr( name => 'value' );
127
128 Delete attribute using
129
130 $doc->add_attr( name => undef );
131
132 =cut
133
134 sub add_attr {
135 my $self = shift;
136 my $attrs = {@_};
137
138 while (my ($name, $value) = each %{ $attrs }) {
139 if (! defined($value)) {
140 delete( $self->{attrs}->{ $self->_s($name) } );
141 } else {
142 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
143 }
144 }
145
146 return 1;
147 }
148
149
150 =head2 add_text
151
152 Add a sentence of text.
153
154 $doc->add_text('this is example text to display');
155
156 =cut
157
158 sub add_text {
159 my $self = shift;
160 my $text = shift;
161 return unless defined($text);
162
163 push @{ $self->{dtexts} }, $self->_s($text);
164 }
165
166
167 =head2 add_hidden_text
168
169 Add a hidden sentence.
170
171 $doc->add_hidden_text('this is example text just for search');
172
173 =cut
174
175 sub add_hidden_text {
176 my $self = shift;
177 my $text = shift;
178 return unless defined($text);
179
180 push @{ $self->{htexts} }, $self->_s($text);
181 }
182
183
184 =head2 id
185
186 Get the ID number of document. If the object has never been registred, C<-1> is returned.
187
188 print $doc->id;
189
190 =cut
191
192 sub id {
193 my $self = shift;
194 return $self->{id};
195 }
196
197
198 =head2 attr_names
199
200 Returns array with attribute names from document object.
201
202 my @attrs = $doc->attr_names;
203
204 =cut
205
206 sub attr_names {
207 my $self = shift;
208 return unless ($self->{attrs});
209 #croak "attr_names return array, not scalar" if (! wantarray);
210 return sort keys %{ $self->{attrs} };
211 }
212
213
214 =head2 attr
215
216 Returns value of an attribute.
217
218 my $value = $doc->attr( 'attribute' );
219
220 =cut
221
222 sub attr {
223 my $self = shift;
224 my $name = shift;
225 return unless (defined($name) && $self->{attrs});
226 return $self->{attrs}->{ $name };
227 }
228
229
230 =head2 texts
231
232 Returns array with text sentences.
233
234 my @texts = $doc->texts;
235
236 =cut
237
238 sub texts {
239 my $self = shift;
240 #confess "texts return array, not scalar" if (! wantarray);
241 return @{ $self->{dtexts} } if ($self->{dtexts});
242 }
243
244
245 =head2 cat_texts
246
247 Return whole text as single scalar.
248
249 my $text = $doc->cat_texts;
250
251 =cut
252
253 sub cat_texts {
254 my $self = shift;
255 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
256 }
257
258
259 =head2 dump_draft
260
261 Dump draft data from document object.
262
263 print $doc->dump_draft;
264
265 =cut
266
267 sub dump_draft {
268 my $self = shift;
269 my $draft;
270
271 foreach my $attr_name (sort keys %{ $self->{attrs} }) {
272 $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";
273 }
274
275 if ($self->{kwords}) {
276 $draft .= '%%VECTOR';
277 while (my ($key, $value) = each %{ $self->{kwords} }) {
278 $draft .= "\t$key\t$value";
279 }
280 $draft .= "\n";
281 }
282
283 $draft .= "\n";
284
285 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
286 $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
287
288 return $draft;
289 }
290
291
292 =head2 delete
293
294 Empty document object
295
296 $doc->delete;
297
298 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
299 convinience. Document objects which go out of scope will be destroyed
300 automatically.
301
302 =cut
303
304 sub delete {
305 my $self = shift;
306
307 foreach my $data (qw/attrs dtexts stexts kwords/) {
308 delete($self->{$data});
309 }
310
311 $self->{id} = -1;
312
313 return 1;
314 }
315
316
317
318 package Search::Estraier::Condition;
319
320 use Carp qw/confess croak/;
321
322 use Search::Estraier;
323 our @ISA = qw/Search::Estraier/;
324
325 =head1 Search::Estraier::Condition
326
327 =head2 new
328
329 my $cond = new Search::HyperEstraier::Condition;
330
331 =cut
332
333 sub new {
334 my $class = shift;
335 my $self = {};
336 bless($self, $class);
337
338 $self->{max} = -1;
339 $self->{options} = 0;
340
341 $self ? return $self : return undef;
342 }
343
344
345 =head2 set_phrase
346
347 $cond->set_phrase('search phrase');
348
349 =cut
350
351 sub set_phrase {
352 my $self = shift;
353 $self->{phrase} = $self->_s( shift );
354 }
355
356
357 =head2 add_attr
358
359 $cond->add_attr('@URI STRINC /~dpavlin/');
360
361 =cut
362
363 sub add_attr {
364 my $self = shift;
365 my $attr = shift || return;
366 push @{ $self->{attrs} }, $self->_s( $attr );
367 }
368
369
370 =head2 set_order
371
372 $cond->set_order('@mdate NUMD');
373
374 =cut
375
376 sub set_order {
377 my $self = shift;
378 $self->{order} = shift;
379 }
380
381
382 =head2 set_max
383
384 $cond->set_max(42);
385
386 =cut
387
388 sub set_max {
389 my $self = shift;
390 my $max = shift;
391 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
392 $self->{max} = $max;
393 }
394
395
396 =head2 set_options
397
398 $cond->set_options( SURE => 1 );
399
400 =cut
401
402 my $options = {
403 # check N-gram keys skipping by three
404 SURE => 1 << 0,
405 # check N-gram keys skipping by two
406 USUAL => 1 << 1,
407 # without TF-IDF tuning
408 FAST => 1 << 2,
409 # with the simplified phrase
410 AGITO => 1 << 3,
411 # check every N-gram key
412 NOIDF => 1 << 4,
413 # check N-gram keys skipping by one
414 SIMPLE => 1 << 10,
415 };
416
417 sub set_options {
418 my $self = shift;
419 my $option = shift;
420 confess "unknown option" unless ($options->{$option});
421 $self->{options} ||= $options->{$option};
422 }
423
424
425 =head2 phrase
426
427 Return search phrase.
428
429 print $cond->phrase;
430
431 =cut
432
433 sub phrase {
434 my $self = shift;
435 return $self->{phrase};
436 }
437
438
439 =head2 order
440
441 Return search result order.
442
443 print $cond->order;
444
445 =cut
446
447 sub order {
448 my $self = shift;
449 return $self->{order};
450 }
451
452
453 =head2 attrs
454
455 Return search result attrs.
456
457 my @cond_attrs = $cond->attrs;
458
459 =cut
460
461 sub attrs {
462 my $self = shift;
463 #croak "attrs return array, not scalar" if (! wantarray);
464 return @{ $self->{attrs} } if ($self->{attrs});
465 }
466
467
468 =head2 max
469
470 Return maximum number of results.
471
472 print $cond->max;
473
474 C<-1> is returned for unitialized value, C<0> is unlimited.
475
476 =cut
477
478 sub max {
479 my $self = shift;
480 return $self->{max};
481 }
482
483
484 =head2 options
485
486 Return options for this condition.
487
488 print $cond->options;
489
490 Options are returned in numerical form.
491
492 =cut
493
494 sub options {
495 my $self = shift;
496 return $self->{options};
497 }
498
499
500 package Search::Estraier::ResultDocument;
501
502 use Carp qw/croak/;
503
504 #use Search::Estraier;
505 #our @ISA = qw/Search::Estraier/;
506
507 =head1 Search::Estraier::ResultDocument
508
509 =head2 new
510
511 my $rdoc = new Search::HyperEstraier::ResultDocument(
512 uri => 'http://localhost/document/uri/42',
513 attrs => {
514 foo => 1,
515 bar => 2,
516 },
517 snippet => 'this is a text of snippet'
518 keywords => 'this\tare\tkeywords'
519 );
520
521 =cut
522
523 sub new {
524 my $class = shift;
525 my $self = {@_};
526 bless($self, $class);
527
528 croak "missing uri for ResultDocument" unless defined($self->{uri});
529
530 $self ? return $self : return undef;
531 }
532
533
534 =head2 uri
535
536 Return URI of result document
537
538 print $rdoc->uri;
539
540 =cut
541
542 sub uri {
543 my $self = shift;
544 return $self->{uri};
545 }
546
547
548 =head2 attr_names
549
550 Returns array with attribute names from result document object.
551
552 my @attrs = $rdoc->attr_names;
553
554 =cut
555
556 sub attr_names {
557 my $self = shift;
558 croak "attr_names return array, not scalar" if (! wantarray);
559 return sort keys %{ $self->{attrs} };
560 }
561
562
563 =head2 attr
564
565 Returns value of an attribute.
566
567 my $value = $rdoc->attr( 'attribute' );
568
569 =cut
570
571 sub attr {
572 my $self = shift;
573 my $name = shift || return;
574 return $self->{attrs}->{ $name };
575 }
576
577
578 =head2 snippet
579
580 Return snippet from result document
581
582 print $rdoc->snippet;
583
584 =cut
585
586 sub snippet {
587 my $self = shift;
588 return $self->{snippet};
589 }
590
591
592 =head2 keywords
593
594 Return keywords from result document
595
596 print $rdoc->keywords;
597
598 =cut
599
600 sub keywords {
601 my $self = shift;
602 return $self->{keywords};
603 }
604
605
606 package Search::Estraier::NodeResult;
607
608 use Carp qw/croak/;
609
610 #use Search::Estraier;
611 #our @ISA = qw/Search::Estraier/;
612
613 =head1 Search::Estraier::NodeResult
614
615 =head2 new
616
617 my $res = new Search::HyperEstraier::NodeResult(
618 docs => @array_of_rdocs,
619 hits => %hash_with_hints,
620 );
621
622 =cut
623
624 sub new {
625 my $class = shift;
626 my $self = {@_};
627 bless($self, $class);
628
629 foreach my $f (qw/docs hints/) {
630 croak "missing $f for ResultDocument" unless defined($self->{$f});
631 }
632
633 $self ? return $self : return undef;
634 }
635
636
637 =head2 doc_num
638
639 Return number of documents
640
641 print $res->doc_num;
642
643 =cut
644
645 sub doc_num {
646 my $self = shift;
647 return $#{$self->{docs}} + 1;
648 }
649
650
651 =head2 get_doc
652
653 Return single document
654
655 my $doc = $res->get_doc( 42 );
656
657 Returns undef if document doesn't exist.
658
659 =cut
660
661 sub get_doc {
662 my $self = shift;
663 my $num = shift;
664 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
665 return undef if ($num < 0 || $num > $self->{docs});
666 return $self->{docs}->[$num];
667 }
668
669
670 =head2 hint
671
672 Return specific hint from results.
673
674 print $rec->hint( 'VERSION' );
675
676 Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
677 C<TIME>, C<LINK#n>, C<VIEW>.
678
679 =cut
680
681 sub hint {
682 my $self = shift;
683 my $key = shift || return;
684 return $self->{hints}->{$key};
685 }
686
687
688 package Search::Estraier::Node;
689
690 use Carp qw/carp croak confess/;
691 use URI;
692 use MIME::Base64;
693 use IO::Socket::INET;
694 use URI::Escape qw/uri_escape/;
695
696 =head1 Search::Estraier::Node
697
698 =head2 new
699
700 my $node = new Search::HyperEstraier::Node;
701
702 or optionally with C<url> as parametar
703
704 my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
705
706 =cut
707
708 sub new {
709 my $class = shift;
710 my $self = {
711 pxport => -1,
712 timeout => 0, # this used to be -1
713 dnum => -1,
714 wnum => -1,
715 size => -1.0,
716 wwidth => 480,
717 hwidth => 96,
718 awidth => 96,
719 status => -1,
720 };
721 bless($self, $class);
722
723 if ($#_ == 0) {
724 $self->{url} = shift;
725 } else {
726 my $args = {@_};
727
728 $self->{debug} = $args->{debug};
729 warn "## Node debug on\n" if ($self->{debug});
730 }
731
732 $self ? return $self : return undef;
733 }
734
735
736 =head2 set_url
737
738 Specify URL to node server
739
740 $node->set_url('http://localhost:1978');
741
742 =cut
743
744 sub set_url {
745 my $self = shift;
746 $self->{url} = shift;
747 }
748
749
750 =head2 set_proxy
751
752 Specify proxy server to connect to node server
753
754 $node->set_proxy('proxy.example.com', 8080);
755
756 =cut
757
758 sub set_proxy {
759 my $self = shift;
760 my ($host,$port) = @_;
761 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
762 $self->{pxhost} = $host;
763 $self->{pxport} = $port;
764 }
765
766
767 =head2 set_timeout
768
769 Specify timeout of connection in seconds
770
771 $node->set_timeout( 15 );
772
773 =cut
774
775 sub set_timeout {
776 my $self = shift;
777 my $sec = shift;
778 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
779 $self->{timeout} = $sec;
780 }
781
782
783 =head2 set_auth
784
785 Specify name and password for authentication to node server.
786
787 $node->set_auth('clint','eastwood');
788
789 =cut
790
791 sub set_auth {
792 my $self = shift;
793 my ($login,$passwd) = @_;
794 my $basic_auth = encode_base64( "$login:$passwd" );
795 chomp($basic_auth);
796 $self->{auth} = $basic_auth;
797 }
798
799
800 =head2 status
801
802 Return status code of last request.
803
804 print $node->status;
805
806 C<-1> means connection failure.
807
808 =cut
809
810 sub status {
811 my $self = shift;
812 return $self->{status};
813 }
814
815
816 =head2 put_doc
817
818 Add a document
819
820 $node->put_doc( $document_draft ) or die "can't add document";
821
822 Return true on success or false on failture.
823
824 =cut
825
826 sub put_doc {
827 my $self = shift;
828 my $doc = shift || return;
829 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
830 $self->shuttle_url( $self->{url} . '/put_doc',
831 'text/x-estraier-draft',
832 $doc->dump_draft,
833 undef
834 ) == 200;
835 }
836
837
838 =head2 out_doc
839
840 Remove a document
841
842 $node->out_doc( document_id ) or "can't remove document";
843
844 Return true on success or false on failture.
845
846 =cut
847
848 sub out_doc {
849 my $self = shift;
850 my $id = shift || return;
851 return unless ($self->{url});
852 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
853 $self->shuttle_url( $self->{url} . '/out_doc',
854 'application/x-www-form-urlencoded',
855 "id=$id",
856 undef
857 ) == 200;
858 }
859
860
861 =head2 out_doc_by_uri
862
863 Remove a registrated document using it's uri
864
865 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
866
867 Return true on success or false on failture.
868
869 =cut
870
871 sub out_doc_by_uri {
872 my $self = shift;
873 my $uri = shift || return;
874 return unless ($self->{url});
875 $self->shuttle_url( $self->{url} . '/out_doc',
876 'application/x-www-form-urlencoded',
877 "uri=" . uri_escape($uri),
878 undef
879 ) == 200;
880 }
881
882
883 =head2 edit_doc
884
885 Edit attributes of a document
886
887 $node->edit_doc( $document_draft ) or die "can't edit document";
888
889 Return true on success or false on failture.
890
891 =cut
892
893 sub edit_doc {
894 my $self = shift;
895 my $doc = shift || return;
896 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
897 $self->shuttle_url( $self->{url} . '/edit_doc',
898 'text/x-estraier-draft',
899 $doc->dump_draft,
900 undef
901 ) == 200;
902 }
903
904
905 =head2 get_doc
906
907 Retreive document
908
909 my $doc = $node->get_doc( document_id ) or die "can't get document";
910
911 Return true on success or false on failture.
912
913 =cut
914
915 sub get_doc {
916 my $self = shift;
917 my $id = shift || return;
918 return $self->_fetch_doc( id => $id );
919 }
920
921
922 =head2 get_doc_by_uri
923
924 Retreive document
925
926 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
927
928 Return true on success or false on failture.
929
930 =cut
931
932 sub get_doc_by_uri {
933 my $self = shift;
934 my $uri = shift || return;
935 return $self->_fetch_doc( uri => $uri );
936 }
937
938
939 =head2 get_doc_attr
940
941 Retrieve the value of an atribute from object
942
943 my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
944 die "can't get document attribute";
945
946 =cut
947
948 sub get_doc_attr {
949 my $self = shift;
950 my ($id,$name) = @_;
951 return unless ($id && $name);
952 return $self->_fetch_doc( id => $id, attr => $name );
953 }
954
955
956 =head2 get_doc_attr_by_uri
957
958 Retrieve the value of an atribute from object
959
960 my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
961 die "can't get document attribute";
962
963 =cut
964
965 sub get_doc_attr_by_uri {
966 my $self = shift;
967 my ($uri,$name) = @_;
968 return unless ($uri && $name);
969 return $self->_fetch_doc( uri => $uri, attr => $name );
970 }
971
972
973 =head2 etch_doc
974
975 Exctract document keywords
976
977 my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
978
979 =cut
980
981 sub etch_doc {
982 my $self = shift;
983 my $id = shift || return;
984 return $self->_fetch_doc( id => $id, etch => 1 );
985 }
986
987 =head2 etch_doc_by_uri
988
989 Retreive document
990
991 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
992
993 Return true on success or false on failture.
994
995 =cut
996
997 sub etch_doc_by_uri {
998 my $self = shift;
999 my $uri = shift || return;
1000 return $self->_fetch_doc( uri => $uri, etch => 1 );
1001 }
1002
1003
1004 =head2 uri_to_id
1005
1006 Get ID of document specified by URI
1007
1008 my $id = $node->uri_to_id( 'file:///document/uri/42' );
1009
1010 =cut
1011
1012 sub uri_to_id {
1013 my $self = shift;
1014 my $uri = shift || return;
1015 return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1 );
1016 }
1017
1018
1019 =head2 _fetch_doc
1020
1021 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1022 C<etch_doc>, C<etch_doc_by_uri>.
1023
1024 # this will decode received draft into Search::Estraier::Document object
1025 my $doc = $node->_fetch_doc( id => 42 );
1026 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1027
1028 # to extract keywords, add etch
1029 my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1030 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1031
1032 # to get document attrubute add attr
1033 my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1034 my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1035
1036 # more general form which allows implementation of
1037 # uri_to_id
1038 my $id = $node->_fetch_doc(
1039 uri => 'file:///document/uri/42',
1040 path => '/uri_to_id',
1041 chomp_resbody => 1
1042 );
1043
1044 =cut
1045
1046 sub _fetch_doc {
1047 my $self = shift;
1048 my $a = {@_};
1049 return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1050
1051 my ($arg, $resbody);
1052
1053 my $path = $a->{path} || '/get_doc';
1054 $path = '/etch_doc' if ($a->{etch});
1055
1056 if ($a->{id}) {
1057 croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1058 $arg = 'id=' . $a->{id};
1059 } elsif ($a->{uri}) {
1060 $arg = 'uri=' . uri_escape($a->{uri});
1061 } else {
1062 confess "unhandled argument. Need id or uri.";
1063 }
1064
1065 if ($a->{attr}) {
1066 $path = '/get_doc_attr';
1067 $arg .= '&attr=' . uri_escape($a->{attr});
1068 $a->{chomp_resbody} = 1;
1069 }
1070
1071 my $rv = $self->shuttle_url( $self->{url} . $path,
1072 'application/x-www-form-urlencoded',
1073 $arg,
1074 \$resbody,
1075 );
1076
1077 return if ($rv != 200);
1078
1079 if ($a->{etch}) {
1080 $self->{kwords} = {};
1081 return +{} unless ($resbody);
1082 foreach my $l (split(/\n/, $resbody)) {
1083 my ($k,$v) = split(/\t/, $l, 2);
1084 $self->{kwords}->{$k} = $v if ($v);
1085 }
1086 return $self->{kwords};
1087 } elsif ($a->{chomp_resbody}) {
1088 return unless (defined($resbody));
1089 chomp($resbody);
1090 return $resbody;
1091 } else {
1092 return new Search::Estraier::Document($resbody);
1093 }
1094 }
1095
1096
1097 =head2 name
1098
1099 my $node_name = $node->name;
1100
1101 =cut
1102
1103 sub name {
1104 my $self = shift;
1105 $self->_set_info unless ($self->{name});
1106 return $self->{name};
1107 }
1108
1109
1110 =head2 label
1111
1112 my $node_label = $node->label;
1113
1114 =cut
1115
1116 sub label {
1117 my $self = shift;
1118 $self->_set_info unless ($self->{label});
1119 return $self->{label};
1120 }
1121
1122
1123 =head2 doc_num
1124
1125 my $documents_in_node = $node->doc_num;
1126
1127 =cut
1128
1129 sub doc_num {
1130 my $self = shift;
1131 $self->_set_info if ($self->{dnum} < 0);
1132 return $self->{dnum};
1133 }
1134
1135
1136 =head2 word_num
1137
1138 my $words_in_node = $node->word_num;
1139
1140 =cut
1141
1142 sub word_num {
1143 my $self = shift;
1144 $self->_set_info if ($self->{wnum} < 0);
1145 return $self->{wnum};
1146 }
1147
1148
1149 =head2 size
1150
1151 my $node_size = $node->size;
1152
1153 =cut
1154
1155 sub size {
1156 my $self = shift;
1157 $self->_set_info if ($self->{size} < 0);
1158 return $self->{size};
1159 }
1160
1161
1162 =head2 search
1163
1164 Search documents which match condition
1165
1166 my $nres = $node->search( $cond, $depth );
1167
1168 C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1169 depth for meta search.
1170
1171 Function results C<Search::Estraier::NodeResult> object.
1172
1173 =cut
1174
1175 sub search {
1176 my $self = shift;
1177 my ($cond, $depth) = @_;
1178 return unless ($cond && defined($depth) && $self->{url});
1179 croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1180 croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1181
1182 my $resbody;
1183
1184 my $rv = $self->shuttle_url( $self->{url} . '/search',
1185 'application/x-www-form-urlencoded',
1186 $self->cond_to_query( $cond, $depth ),
1187 \$resbody,
1188 );
1189 return if ($rv != 200);
1190
1191 my (@docs, $hints);
1192
1193 my @lines = split(/\n/, $resbody);
1194 return unless (@lines);
1195
1196 my $border = $lines[0];
1197 my $isend = 0;
1198 my $lnum = 1;
1199
1200 while ( $lnum <= $#lines ) {
1201 my $line = $lines[$lnum];
1202 $lnum++;
1203
1204 #warn "## $line\n";
1205 if ($line && $line =~ m/^\Q$border\E(:END)*$/) {
1206 $isend = $1;
1207 last;
1208 }
1209
1210 if ($line =~ /\t/) {
1211 my ($k,$v) = split(/\t/, $line, 2);
1212 $hints->{$k} = $v;
1213 }
1214 }
1215
1216 my $snum = $lnum;
1217
1218 while( ! $isend && $lnum <= $#lines ) {
1219 my $line = $lines[$lnum];
1220 #warn "# $lnum: $line\n";
1221 $lnum++;
1222
1223 if ($line && $line =~ m/^\Q$border\E/) {
1224 if ($lnum > $snum) {
1225 my $rdattrs;
1226 my $rdvector;
1227 my $rdsnippet;
1228
1229 my $rlnum = $snum;
1230 while ($rlnum < $lnum - 1 ) {
1231 #my $rdline = $self->_s($lines[$rlnum]);
1232 my $rdline = $lines[$rlnum];
1233 $rlnum++;
1234 last unless ($rdline);
1235 if ($rdline =~ /^%/) {
1236 $rdvector = $1 if ($rdline =~ /^%VECTOR\t(.+)$/);
1237 } elsif($rdline =~ /=/) {
1238 $rdattrs->{$1} = $2 if ($rdline =~ /^(.+)=(.+)$/);
1239 } else {
1240 confess "invalid format of response";
1241 }
1242 }
1243 while($rlnum < $lnum - 1) {
1244 my $rdline = $lines[$rlnum];
1245 $rlnum++;
1246 $rdsnippet .= "$rdline\n";
1247 }
1248 #warn Dumper($rdvector, $rdattrs, $rdsnippet);
1249 if (my $rduri = $rdattrs->{'@uri'}) {
1250 push @docs, new Search::Estraier::ResultDocument(
1251 uri => $rduri,
1252 attrs => $rdattrs,
1253 snippet => $rdsnippet,
1254 keywords => $rdvector,
1255 );
1256 }
1257 }
1258 $snum = $lnum;
1259 #warn "### $line\n";
1260 $isend = 1 if ($line =~ /:END$/);
1261 }
1262
1263 }
1264
1265 if (! $isend) {
1266 warn "received result doesn't have :END\n$resbody";
1267 return;
1268 }
1269
1270 #warn Dumper(\@docs, $hints);
1271
1272 return new Search::Estraier::NodeResult( docs => \@docs, hints => $hints );
1273 }
1274
1275
1276 =head2 cond_to_query
1277
1278 Return URI encoded string generated from Search::Estraier::Condition
1279
1280 my $args = $node->cond_to_query( $cond, $depth );
1281
1282 =cut
1283
1284 sub cond_to_query {
1285 my $self = shift;
1286
1287 my $cond = shift || return;
1288 croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1289 my $depth = shift;
1290
1291 my @args;
1292
1293 if (my $phrase = $cond->phrase) {
1294 push @args, 'phrase=' . uri_escape($phrase);
1295 }
1296
1297 if (my @attrs = $cond->attrs) {
1298 for my $i ( 0 .. $#attrs ) {
1299 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1300 }
1301 }
1302
1303 if (my $order = $cond->order) {
1304 push @args, 'order=' . uri_escape($order);
1305 }
1306
1307 if (my $max = $cond->max) {
1308 push @args, 'max=' . $max;
1309 } else {
1310 push @args, 'max=' . (1 << 30);
1311 }
1312
1313 if (my $options = $cond->options) {
1314 push @args, 'options=' . $options;
1315 }
1316
1317 push @args, 'depth=' . $depth if ($depth);
1318 push @args, 'wwidth=' . $self->{wwidth};
1319 push @args, 'hwidth=' . $self->{hwidth};
1320 push @args, 'awidth=' . $self->{awidth};
1321
1322 return join('&', @args);
1323 }
1324
1325
1326 =head2 shuttle_url
1327
1328 This is method which uses C<IO::Socket::INET> to communicate with Hyper Estraier node
1329 master.
1330
1331 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1332
1333 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1334 body will be saved within object.
1335
1336 =cut
1337
1338 use LWP::UserAgent;
1339
1340 sub shuttle_url {
1341 my $self = shift;
1342
1343 my ($url, $content_type, $reqbody, $resbody) = @_;
1344
1345 $self->{status} = -1;
1346
1347 warn "## $url\n" if ($self->{debug});
1348
1349 $url = new URI($url);
1350 if (
1351 !$url || !$url->scheme || !$url->scheme eq 'http' ||
1352 !$url->host || !$url->port || $url->port < 1
1353 ) {
1354 carp "can't parse $url\n";
1355 return -1;
1356 }
1357
1358 my $ua = LWP::UserAgent->new;
1359 $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1360
1361 my $req;
1362 if ($reqbody) {
1363 $req = HTTP::Request->new(POST => $url);
1364 } else {
1365 $req = HTTP::Request->new(GET => $url);
1366 }
1367
1368 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1369 $req->headers->header( 'Connection', 'close' );
1370 $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} );
1371 $req->content_type( $content_type );
1372
1373 warn $req->headers->as_string,"\n" if ($self->{debug});
1374
1375 if ($reqbody) {
1376 warn "$reqbody\n" if ($self->{debug});
1377 $req->content( $reqbody );
1378 }
1379
1380 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1381
1382 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1383
1384 return -1 if (! $res->is_success);
1385
1386 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1387
1388 $$resbody .= $res->content;
1389
1390 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1391
1392 return $self->{status};
1393 }
1394
1395
1396 =head2 set_snippet_width
1397
1398 Set width of snippets in results
1399
1400 $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1401
1402 C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1403 is not sent with results. If it is negative, whole document text is sent instead of snippet.
1404
1405 C<$hwidth> specified width of strings from beginning of string. Default
1406 value is C<96>. Negative or zero value keep previous value.
1407
1408 C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1409 If negative of zero value is provided previous value is kept unchanged.
1410
1411 =cut
1412
1413 sub set_snippet_width {
1414 my $self = shift;
1415
1416 my ($wwidth, $hwidth, $awidth) = @_;
1417 $self->{wwidth} = $wwidth;
1418 $self->{hwidth} = $hwidth if ($hwidth >= 0);
1419 $self->{awidth} = $awidth if ($awidth >= 0);
1420 }
1421
1422
1423 =head2 set_user
1424
1425 Manage users of node
1426
1427 $node->set_user( 'name', $mode );
1428
1429 C<$mode> can be one of:
1430
1431 =over 4
1432
1433 =item 0
1434
1435 delete account
1436
1437 =item 1
1438
1439 set administrative right for user
1440
1441 =item 2
1442
1443 set user account as guest
1444
1445 =back
1446
1447 Return true on success, otherwise false.
1448
1449 =cut
1450
1451 sub set_user {
1452 my $self = shift;
1453 my ($name, $mode) = @_;
1454
1455 return unless ($self->{url});
1456 croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1457
1458 $self->shuttle_url( $self->{url} . '/_set_user',
1459 'text/plain',
1460 'name=' . uri_escape($name) . '&mode=' . $mode,
1461 undef
1462 ) == 200;
1463 }
1464
1465
1466 =head2 set_link
1467
1468 Manage node links
1469
1470 $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1471
1472 If C<$credit> is negative, link is removed.
1473
1474 =cut
1475
1476 sub set_link {
1477 my $self = shift;
1478 my ($url, $label, $credit) = @_;
1479
1480 return unless ($self->{url});
1481 croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1482
1483 my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1484 $reqbody .= '&credit=' . $credit if ($credit > 0);
1485
1486 $self->shuttle_url( $self->{url} . '/_set_link',
1487 'text/plain',
1488 $reqbody,
1489 undef
1490 ) == 200;
1491 }
1492
1493
1494 =head1 PRIVATE METHODS
1495
1496 You could call those directly, but you don't have to. I hope.
1497
1498 =head2 _set_info
1499
1500 Set information for node
1501
1502 $node->_set_info;
1503
1504 =cut
1505
1506 sub _set_info {
1507 my $self = shift;
1508
1509 $self->{status} = -1;
1510 return unless ($self->{url});
1511
1512 my $resbody;
1513 my $rv = $self->shuttle_url( $self->{url} . '/inform',
1514 'text/plain',
1515 undef,
1516 \$resbody,
1517 );
1518
1519 return if ($rv != 200 || !$resbody);
1520
1521 # it seems that response can have multiple line endings
1522 $resbody =~ s/[\r\n]+$//;
1523
1524 ( $self->{name}, $self->{label}, $self->{dnum}, $self->{wnum}, $self->{size} ) =
1525 split(/\t/, $resbody, 5);
1526
1527 }
1528
1529 ###
1530
1531 =head1 EXPORT
1532
1533 Nothing.
1534
1535 =head1 SEE ALSO
1536
1537 L<http://hyperestraier.sourceforge.net/>
1538
1539 Hyper Estraier Ruby interface on which this module is based.
1540
1541 =head1 AUTHOR
1542
1543 Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
1544
1545
1546 =head1 COPYRIGHT AND LICENSE
1547
1548 Copyright (C) 2005-2006 by Dobrica Pavlinusic
1549
1550 This library is free software; you can redistribute it and/or modify
1551 it under the GPL v2 or later.
1552
1553 =cut
1554
1555 1;

  ViewVC Help
Powered by ViewVC 1.1.26