/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 43 - (hide annotations)
Fri Jan 6 00:04:28 2006 UTC (18 years, 2 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 18043 byte(s)
better error messages, added get_doc and get_doc_by_uri
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7     our $VERSION = '0.00';
8    
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15     use Search::Estraier;
16     my $est = new Search::Estraier();
17    
18     =head1 DESCRIPTION
19    
20     This module is implementation of node API of Hyper Estraier. Since it's
21     perl-only module with dependencies only on standard perl modules, it will
22     run on all platforms on which perl runs. It doesn't require compilation
23     or Hyper Estraier development files on target machine.
24    
25     It is implemented as multiple packages which closly resamble Ruby
26     implementation. It also includes methods to manage nodes.
27    
28     =cut
29    
30 dpavlin 42 =head1 Inheritable common methods
31    
32     This methods should really move somewhere else.
33    
34 dpavlin 15 =head2 _s
35    
36     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
37    
38     my $text = $self->_s(" this is a text ");
39     $text = 'this is a text';
40    
41     =cut
42    
43     sub _s {
44     my $text = $_[1] || return;
45     $text =~ s/\s\s+/ /gs;
46     $text =~ s/^\s+//;
47     $text =~ s/\s+$//;
48     return $text;
49     }
50    
51 dpavlin 2 package Search::Estraier::Document;
52    
53 dpavlin 9 use Carp qw/croak confess/;
54 dpavlin 7
55 dpavlin 15 use Search::Estraier;
56     our @ISA = qw/Search::Estraier/;
57    
58 dpavlin 2 =head1 Search::Estraier::Document
59    
60 dpavlin 14 This class implements Document which is collection of attributes
61     (key=value), vectors (also key value) display text and hidden text.
62    
63 dpavlin 42
64 dpavlin 2 =head2 new
65    
66 dpavlin 14 Create new document, empty or from draft.
67    
68 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
69 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
70 dpavlin 2
71     =cut
72    
73     sub new {
74     my $class = shift;
75 dpavlin 14 my $self = {};
76 dpavlin 2 bless($self, $class);
77    
78 dpavlin 6 $self->{id} = -1;
79    
80 dpavlin 14 my $draft = shift;
81    
82     if ($draft) {
83     my $in_text = 0;
84     foreach my $line (split(/\n/, $draft)) {
85    
86     if ($in_text) {
87     if ($line =~ /^\t/) {
88     push @{ $self->{htexts} }, substr($line, 1);
89     } else {
90     push @{ $self->{dtexts} }, $line;
91     }
92     next;
93     }
94    
95     if ($line =~ m/^%VECTOR\t(.+)$/) {
96     my @fields = split(/\t/, $1);
97     for my $i ( 0 .. ($#fields - 1) ) {
98     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
99     $i++;
100     }
101     next;
102     } elsif ($line =~ m/^%/) {
103     # What is this? comment?
104     #warn "$line\n";
105     next;
106     } elsif ($line =~ m/^$/) {
107     $in_text = 1;
108     next;
109     } elsif ($line =~ m/^(.+)=(.+)$/) {
110     $self->{attrs}->{ $1 } = $2;
111     next;
112     }
113    
114     warn "draft ignored: $line\n";
115     }
116     }
117    
118 dpavlin 2 $self ? return $self : return undef;
119     }
120    
121 dpavlin 4
122 dpavlin 2 =head2 add_attr
123    
124 dpavlin 6 Add an attribute.
125    
126 dpavlin 2 $doc->add_attr( name => 'value' );
127    
128 dpavlin 9 Delete attribute using
129 dpavlin 5
130     $doc->add_attr( name => undef );
131    
132 dpavlin 2 =cut
133    
134     sub add_attr {
135     my $self = shift;
136     my $attrs = {@_};
137    
138     while (my ($name, $value) = each %{ $attrs }) {
139 dpavlin 9 if (! defined($value)) {
140 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
141 dpavlin 9 } else {
142 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
143 dpavlin 9 }
144 dpavlin 2 }
145 dpavlin 8
146     return 1;
147 dpavlin 2 }
148    
149 dpavlin 5
150     =head2 add_text
151    
152 dpavlin 6 Add a sentence of text.
153    
154 dpavlin 5 $doc->add_text('this is example text to display');
155    
156     =cut
157    
158     sub add_text {
159     my $self = shift;
160     my $text = shift;
161     return unless defined($text);
162    
163 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
164 dpavlin 5 }
165    
166    
167     =head2 add_hidden_text
168    
169 dpavlin 6 Add a hidden sentence.
170    
171 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
172    
173     =cut
174    
175     sub add_hidden_text {
176     my $self = shift;
177     my $text = shift;
178     return unless defined($text);
179    
180 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
181 dpavlin 5 }
182    
183 dpavlin 42
184 dpavlin 6 =head2 id
185    
186     Get the ID number of document. If the object has never been registred, C<-1> is returned.
187    
188     print $doc->id;
189    
190     =cut
191    
192     sub id {
193     my $self = shift;
194     return $self->{id};
195     }
196    
197 dpavlin 42
198 dpavlin 7 =head2 attr_names
199    
200 dpavlin 9 Returns array with attribute names from document object.
201 dpavlin 7
202     my @attrs = $doc->attr_names;
203    
204     =cut
205    
206     sub attr_names {
207     my $self = shift;
208 dpavlin 9 croak "attr_names return array, not scalar" if (! wantarray);
209 dpavlin 7 return sort keys %{ $self->{attrs} };
210     }
211    
212 dpavlin 8
213     =head2 attr
214    
215 dpavlin 9 Returns value of an attribute.
216 dpavlin 8
217     my $value = $doc->attr( 'attribute' );
218    
219     =cut
220    
221     sub attr {
222     my $self = shift;
223     my $name = shift;
224    
225     return $self->{'attrs'}->{ $name };
226     }
227    
228 dpavlin 9
229     =head2 texts
230    
231     Returns array with text sentences.
232    
233     my @texts = $doc->texts;
234    
235     =cut
236    
237     sub texts {
238     my $self = shift;
239 dpavlin 12 confess "texts return array, not scalar" if (! wantarray);
240 dpavlin 11 return @{ $self->{dtexts} };
241 dpavlin 9 }
242    
243 dpavlin 42
244 dpavlin 12 =head2 cat_texts
245    
246     Return whole text as single scalar.
247    
248     my $text = $doc->cat_texts;
249    
250     =cut
251    
252     sub cat_texts {
253     my $self = shift;
254     return join(' ',@{ $self->{dtexts} });
255     }
256    
257 dpavlin 42
258 dpavlin 5 =head2 dump_draft
259    
260 dpavlin 13 Dump draft data from document object.
261    
262 dpavlin 5 print $doc->dump_draft;
263    
264     =cut
265    
266     sub dump_draft {
267 dpavlin 13 my $self = shift;
268     my $draft;
269    
270     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
271     $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";
272     }
273    
274     if ($self->{kwords}) {
275     $draft .= '%%VECTOR';
276     while (my ($key, $value) = each %{ $self->{kwords} }) {
277     $draft .= "\t$key\t$value";
278     }
279     $draft .= "\n";
280     }
281    
282     $draft .= "\n";
283    
284 dpavlin 40 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
285     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
286 dpavlin 13
287     return $draft;
288 dpavlin 5 }
289    
290 dpavlin 42
291 dpavlin 4 =head2 delete
292 dpavlin 2
293 dpavlin 4 Empty document object
294 dpavlin 2
295 dpavlin 4 $doc->delete;
296    
297 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
298     convinience. Document objects which go out of scope will be destroyed
299     automatically.
300    
301 dpavlin 4 =cut
302    
303     sub delete {
304     my $self = shift;
305    
306 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
307 dpavlin 5 delete($self->{$data});
308     }
309 dpavlin 4
310 dpavlin 10 $self->{id} = -1;
311    
312 dpavlin 4 return 1;
313     }
314    
315    
316    
317 dpavlin 15 package Search::Estraier::Condition;
318 dpavlin 4
319 dpavlin 16 use Carp qw/confess croak/;
320    
321 dpavlin 15 use Search::Estraier;
322     our @ISA = qw/Search::Estraier/;
323 dpavlin 4
324 dpavlin 16 =head1 Search::Estraier::Condition
325    
326     =head2 new
327    
328     my $cond = new Search::HyperEstraier::Condition;
329    
330     =cut
331    
332     sub new {
333     my $class = shift;
334     my $self = {};
335     bless($self, $class);
336    
337 dpavlin 19 $self->{max} = -1;
338     $self->{options} = 0;
339    
340 dpavlin 16 $self ? return $self : return undef;
341     }
342    
343 dpavlin 42
344 dpavlin 16 =head2 set_phrase
345    
346     $cond->set_phrase('search phrase');
347    
348     =cut
349    
350     sub set_phrase {
351     my $self = shift;
352     $self->{phrase} = $self->_s( shift );
353     }
354    
355 dpavlin 42
356 dpavlin 16 =head2 add_attr
357    
358     $cond->add_attr('@URI STRINC /~dpavlin/');
359    
360     =cut
361    
362     sub add_attr {
363     my $self = shift;
364     my $attr = shift || return;
365     push @{ $self->{attrs} }, $self->_s( $attr );
366     }
367    
368 dpavlin 42
369 dpavlin 16 =head2 set_order
370    
371     $cond->set_order('@mdate NUMD');
372    
373     =cut
374    
375     sub set_order {
376     my $self = shift;
377     $self->{order} = shift;
378     }
379    
380 dpavlin 42
381 dpavlin 16 =head2 set_max
382    
383     $cond->set_max(42);
384    
385     =cut
386    
387     sub set_max {
388     my $self = shift;
389     my $max = shift;
390 dpavlin 43 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
391 dpavlin 16 $self->{max} = $max;
392     }
393    
394 dpavlin 42
395 dpavlin 16 =head2 set_options
396    
397     $cond->set_options( SURE => 1 );
398    
399     =cut
400    
401 dpavlin 15 my $options = {
402     # check N-gram keys skipping by three
403     SURE => 1 << 0,
404     # check N-gram keys skipping by two
405     USUAL => 1 << 1,
406     # without TF-IDF tuning
407     FAST => 1 << 2,
408     # with the simplified phrase
409     AGITO => 1 << 3,
410     # check every N-gram key
411     NOIDF => 1 << 4,
412     # check N-gram keys skipping by one
413     SIMPLE => 1 << 10,
414     };
415    
416 dpavlin 16 sub set_options {
417     my $self = shift;
418     my $option = shift;
419     confess "unknown option" unless ($options->{$option});
420     $self->{options} ||= $options->{$option};
421 dpavlin 4 }
422    
423 dpavlin 42
424 dpavlin 18 =head2 phrase
425    
426     Return search phrase.
427    
428     print $cond->phrase;
429    
430     =cut
431    
432     sub phrase {
433     my $self = shift;
434     return $self->{phrase};
435     }
436    
437 dpavlin 42
438 dpavlin 19 =head2 order
439 dpavlin 18
440 dpavlin 19 Return search result order.
441    
442     print $cond->order;
443    
444     =cut
445    
446     sub order {
447     my $self = shift;
448     return $self->{order};
449     }
450    
451 dpavlin 42
452 dpavlin 19 =head2 attrs
453    
454     Return search result attrs.
455    
456     my @cond_attrs = $cond->attrs;
457    
458     =cut
459    
460     sub attrs {
461     my $self = shift;
462     #croak "attrs return array, not scalar" if (! wantarray);
463     return @{ $self->{attrs} };
464     }
465    
466 dpavlin 42
467 dpavlin 19 =head2 max
468    
469     Return maximum number of results.
470    
471     print $cond->max;
472    
473     C<-1> is returned for unitialized value, C<0> is unlimited.
474    
475     =cut
476    
477     sub max {
478     my $self = shift;
479     return $self->{max};
480     }
481    
482 dpavlin 42
483 dpavlin 19 =head2 options
484    
485     Return options for this condition.
486    
487     print $cond->options;
488    
489     Options are returned in numerical form.
490    
491     =cut
492    
493     sub options {
494     my $self = shift;
495     return $self->{options};
496     }
497    
498    
499 dpavlin 20 package Search::Estraier::ResultDocument;
500    
501 dpavlin 24 use Carp qw/croak/;
502 dpavlin 20
503 dpavlin 24 #use Search::Estraier;
504     #our @ISA = qw/Search::Estraier/;
505 dpavlin 20
506     =head1 Search::Estraier::ResultDocument
507    
508     =head2 new
509    
510 dpavlin 23 my $rdoc = new Search::HyperEstraier::ResultDocument(
511 dpavlin 20 uri => 'http://localhost/document/uri/42',
512     attrs => {
513     foo => 1,
514     bar => 2,
515     },
516     snippet => 'this is a text of snippet'
517     keywords => 'this\tare\tkeywords'
518     );
519    
520     =cut
521    
522     sub new {
523     my $class = shift;
524     my $self = {@_};
525     bless($self, $class);
526    
527     foreach my $f (qw/uri attrs snippet keywords/) {
528     croak "missing $f for ResultDocument" unless defined($self->{$f});
529     }
530    
531     $self ? return $self : return undef;
532     }
533    
534 dpavlin 42
535 dpavlin 23 =head2 uri
536 dpavlin 20
537 dpavlin 23 Return URI of result document
538 dpavlin 20
539 dpavlin 23 print $rdoc->uri;
540    
541     =cut
542    
543     sub uri {
544     my $self = shift;
545     return $self->{uri};
546     }
547    
548    
549     =head2 attr_names
550    
551     Returns array with attribute names from result document object.
552    
553     my @attrs = $rdoc->attr_names;
554    
555     =cut
556    
557     sub attr_names {
558     my $self = shift;
559     croak "attr_names return array, not scalar" if (! wantarray);
560     return sort keys %{ $self->{attrs} };
561     }
562    
563 dpavlin 42
564 dpavlin 23 =head2 attr
565    
566     Returns value of an attribute.
567    
568     my $value = $rdoc->attr( 'attribute' );
569    
570     =cut
571    
572     sub attr {
573     my $self = shift;
574     my $name = shift || return;
575     return $self->{attrs}->{ $name };
576     }
577    
578 dpavlin 42
579 dpavlin 23 =head2 snippet
580    
581     Return snippet from result document
582    
583     print $rdoc->snippet;
584    
585     =cut
586    
587     sub snippet {
588     my $self = shift;
589     return $self->{snippet};
590     }
591    
592 dpavlin 42
593 dpavlin 23 =head2 keywords
594    
595     Return keywords from result document
596    
597     print $rdoc->keywords;
598    
599     =cut
600    
601     sub keywords {
602     my $self = shift;
603     return $self->{keywords};
604     }
605    
606    
607 dpavlin 25 package Search::Estraier::NodeResult;
608    
609     use Carp qw/croak/;
610    
611     #use Search::Estraier;
612     #our @ISA = qw/Search::Estraier/;
613    
614     =head1 Search::Estraier::NodeResult
615    
616     =head2 new
617    
618     my $res = new Search::HyperEstraier::NodeResult(
619     docs => @array_of_rdocs,
620     hits => %hash_with_hints,
621     );
622    
623     =cut
624    
625     sub new {
626     my $class = shift;
627     my $self = {@_};
628     bless($self, $class);
629    
630     foreach my $f (qw/docs hints/) {
631     croak "missing $f for ResultDocument" unless defined($self->{$f});
632     }
633    
634     $self ? return $self : return undef;
635     }
636    
637 dpavlin 42
638 dpavlin 25 =head2 doc_num
639    
640     Return number of documents
641    
642     print $res->doc_num;
643    
644     =cut
645    
646     sub doc_num {
647     my $self = shift;
648     return $#{$self->{docs}};
649     }
650    
651 dpavlin 42
652 dpavlin 25 =head2 get_doc
653    
654     Return single document
655    
656     my $doc = $res->get_doc( 42 );
657    
658     Returns undef if document doesn't exist.
659    
660     =cut
661    
662     sub get_doc {
663     my $self = shift;
664     my $num = shift;
665 dpavlin 43 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
666 dpavlin 25 return undef if ($num < 0 || $num > $self->{docs});
667     return $self->{docs}->[$num];
668     }
669    
670 dpavlin 42
671 dpavlin 25 =head2 hint
672    
673     Return specific hint from results.
674    
675     print $rec->hint( 'VERSION' );
676    
677     Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
678     C<TIME>, C<LINK#n>, C<VIEW>.
679    
680     =cut
681    
682     sub hint {
683     my $self = shift;
684     my $key = shift || return;
685     return $self->{hints}->{$key};
686     }
687    
688    
689 dpavlin 27 package Search::Estraier::Node;
690    
691 dpavlin 37 use Carp qw/carp croak/;
692 dpavlin 33 use URI;
693 dpavlin 36 use MIME::Base64;
694 dpavlin 33 use IO::Socket::INET;
695 dpavlin 29
696 dpavlin 27 =head1 Search::Estraier::Node
697    
698     =head2 new
699    
700     my $node = new Search::HyperEstraier::Node;
701    
702     =cut
703    
704     sub new {
705     my $class = shift;
706     my $self = {
707     pxport => -1,
708 dpavlin 33 timeout => 0, # this used to be -1
709 dpavlin 27 dnum => -1,
710     wnum => -1,
711     size => -1.0,
712     wwidth => 480,
713     hwidth => 96,
714     awidth => 96,
715     status => -1,
716     };
717     bless($self, $class);
718    
719 dpavlin 39 if (@_) {
720 dpavlin 41 $self->{debug} = shift;
721 dpavlin 39 warn "## Node debug on\n";
722     }
723    
724 dpavlin 27 $self ? return $self : return undef;
725     }
726    
727 dpavlin 42
728 dpavlin 29 =head2 set_url
729    
730     Specify URL to node server
731    
732     $node->set_url('http://localhost:1978');
733    
734     =cut
735    
736     sub set_url {
737     my $self = shift;
738     $self->{url} = shift;
739     }
740    
741 dpavlin 42
742 dpavlin 29 =head2 set_proxy
743    
744     Specify proxy server to connect to node server
745    
746     $node->set_proxy('proxy.example.com', 8080);
747    
748     =cut
749    
750     sub set_proxy {
751     my $self = shift;
752     my ($host,$port) = @_;
753 dpavlin 43 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
754 dpavlin 29 $self->{pxhost} = $host;
755     $self->{pxport} = $port;
756     }
757    
758 dpavlin 42
759 dpavlin 30 =head2 set_timeout
760    
761     Specify timeout of connection in seconds
762    
763     $node->set_timeout( 15 );
764    
765     =cut
766    
767     sub set_timeout {
768     my $self = shift;
769     my $sec = shift;
770 dpavlin 43 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
771 dpavlin 30 $self->{timeout} = $sec;
772     }
773    
774 dpavlin 42
775 dpavlin 31 =head2 set_auth
776    
777     Specify name and password for authentication to node server.
778    
779     $node->set_auth('clint','eastwood');
780    
781     =cut
782    
783     sub set_auth {
784     my $self = shift;
785     my ($login,$passwd) = @_;
786 dpavlin 40 my $basic_auth = encode_base64( "$login:$passwd" );
787     chomp($basic_auth);
788     $self->{auth} = $basic_auth;
789 dpavlin 31 }
790    
791 dpavlin 42
792 dpavlin 32 =head2 status
793    
794     Return status code of last request.
795    
796 dpavlin 40 print $node->status;
797 dpavlin 32
798     C<-1> means connection failure.
799    
800     =cut
801    
802     sub status {
803     my $self = shift;
804     return $self->{status};
805     }
806    
807 dpavlin 42
808 dpavlin 40 =head2 put_doc
809    
810 dpavlin 41 Add a document
811 dpavlin 40
812 dpavlin 41 $node->put_doc( $document_draft ) or die "can't add document";
813    
814     Return true on success or false on failture.
815    
816 dpavlin 40 =cut
817    
818     sub put_doc {
819     my $self = shift;
820     my $doc = shift || return;
821 dpavlin 41 return unless ($self->{url});
822     $self->shuttle_url( $self->{url} . '/put_doc',
823     'text/x-estraier-draft',
824     $doc->dump_draft,
825     undef
826     ) == 200;
827 dpavlin 40 }
828    
829 dpavlin 41
830     =head2 out_doc
831    
832     Remove a document
833    
834     $node->out_doc( document_id ) or "can't remove document";
835    
836     Return true on success or false on failture.
837    
838     =cut
839    
840     sub out_doc {
841     my $self = shift;
842     my $id = shift || return;
843     return unless ($self->{url});
844 dpavlin 43 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
845 dpavlin 41 $self->shuttle_url( $self->{url} . '/out_doc',
846     'application/x-www-form-urlencoded',
847     "id=$id",
848     undef
849     ) == 200;
850     }
851    
852    
853     =head2 out_doc_by_uri
854    
855     Remove a registrated document using it's uri
856    
857     $node->out_doc_by_uri( 'file:///document_url' ) or "can't remove document";
858    
859     Return true on success or false on failture.
860    
861     =cut
862    
863     sub out_doc_by_uri {
864     my $self = shift;
865     my $uri = shift || return;
866     return unless ($self->{url});
867     $self->shuttle_url( $self->{url} . '/out_doc',
868     'application/x-www-form-urlencoded',
869     "uri=$uri",
870     undef
871     ) == 200;
872     }
873    
874 dpavlin 42
875     =head2 edit_doc
876    
877     Edit attributes of a document
878    
879     $node->edit_doc( $document_draft ) or die "can't edit document";
880    
881     Return true on success or false on failture.
882    
883     =cut
884    
885     sub edit_doc {
886     my $self = shift;
887     my $doc = shift || return;
888     return unless ($self->{url});
889     $self->shuttle_url( $self->{url} . '/edit_doc',
890     'text/x-estraier-draft',
891     $doc->dump_draft,
892     undef
893     ) == 200;
894     }
895    
896    
897 dpavlin 43 =head2 get_doc
898    
899     Retreive document
900    
901     my $doc = $node->get_doc( document_id ) or die "can't get document";
902    
903     Return true on success or false on failture.
904    
905     =cut
906    
907     sub get_doc {
908     my $self = shift;
909     my $id = shift || return;
910     return $self->_fetch_doc( id => $id );
911     }
912    
913     =head2 get_doc_by_uri
914    
915     Retreive document
916    
917     my $doc = $node->get_doc_by_uri( 'file:///document_uri' ) or die "can't get document";
918    
919     Return true on success or false on failture.
920    
921     =cut
922    
923     sub get_doc_by_uri {
924     my $self = shift;
925     my $uri = shift || return;
926     return $self->_fetch_doc( uri => $uri );
927     }
928    
929     =head2 _fetch_doc
930    
931     Private function used for implementation of C<get_doc> and C<get_doc_by_uri>.
932    
933     my $doc = $node->fetch_doc( id => 42 );
934     my $doc = $node->fetch_doc( uri => 'file://uri/42' );
935    
936     =cut
937    
938     sub _fetch_doc {
939     my $self = shift;
940     my ($name,$val) = @_;
941     return unless ($name && defined($val) && $self->{url});
942     if ($name eq 'id') {
943     croak "id must be numberm not '$val'" unless ($val =~ m/^\d+$/);
944     }
945     my $rv = $self->shuttle_url( $self->{url} . '/get_doc',
946     'application/x-www-form-urlencoded',
947     "$name=$val",
948     my $draft,
949     );
950     return if ($rv != 200);
951     return new Search::Estraier::Document($draft);
952     }
953    
954    
955    
956    
957 dpavlin 33 =head2 shuttle_url
958 dpavlin 32
959 dpavlin 33 This is method which uses C<IO::Socket::INET> to communicate with Hyper Estraier node
960     master.
961 dpavlin 2
962 dpavlin 33 my $rv = shuttle_url( $url, $content_type, \$req_body, \$resbody );
963 dpavlin 2
964 dpavlin 33 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
965     body will be saved within object.
966 dpavlin 2
967     =cut
968    
969 dpavlin 33 sub shuttle_url {
970     my $self = shift;
971 dpavlin 2
972 dpavlin 33 my ($url, $content_type, $reqbody, $resbody) = @_;
973 dpavlin 2
974 dpavlin 40 $self->{status} = -1;
975 dpavlin 33
976 dpavlin 41 warn "## $url\n" if ($self->{debug});
977 dpavlin 36
978 dpavlin 33 $url = new URI($url);
979 dpavlin 37 if (
980     !$url || !$url->scheme || !$url->scheme eq 'http' ||
981     !$url->host || !$url->port || $url->port < 1
982     ) {
983     carp "can't parse $url\n";
984     return -1;
985     }
986 dpavlin 33
987     my ($host,$port,$query) = ($url->host, $url->port, $url->path);
988    
989     if ($self->{pxhost}) {
990     ($host,$port) = ($self->{pxhost}, $self->{pxport});
991     $query = "http://$host:$port/$query";
992 dpavlin 2 }
993    
994 dpavlin 37 $query .= '?' . $url->query if ($url->query && ! $reqbody);
995 dpavlin 2
996 dpavlin 37 my $headers;
997    
998     if ($reqbody) {
999     $headers .= "POST $query HTTP/1.0\r\n";
1000     } else {
1001     $headers .= "GET $query HTTP/1.0\r\n";
1002     }
1003    
1004 dpavlin 40 $headers .= "Host: " . $url->host . ":" . $url->port . "\r\n";
1005 dpavlin 37 $headers .= "Connection: close\r\n";
1006     $headers .= "User-Agent: Search-Estraier/$Search::Estraier::VERSION\r\n";
1007 dpavlin 40 $headers .= "Content-Type: $content_type\r\n";
1008 dpavlin 37 $headers .= "Authorization: Basic $self->{auth}\r\n";
1009     my $len = 0;
1010     {
1011     use bytes;
1012     $len = length($reqbody) if ($reqbody);
1013     }
1014     $headers .= "Content-Length: $len\r\n";
1015     $headers .= "\r\n";
1016    
1017 dpavlin 33 my $sock = IO::Socket::INET->new(
1018     PeerAddr => $host,
1019     PeerPort => $port,
1020     Proto => 'tcp',
1021     Timeout => $self->{timeout} || 90,
1022 dpavlin 37 );
1023 dpavlin 2
1024 dpavlin 37 if (! $sock) {
1025     carp "can't open socket to $host:$port";
1026     return -1;
1027 dpavlin 33 }
1028 dpavlin 2
1029 dpavlin 40 warn $headers if ($self->{debug});
1030 dpavlin 39
1031 dpavlin 37 print $sock $headers or
1032     carp "can't send headers to network:\n$headers\n" and return -1;
1033    
1034     if ($reqbody) {
1035 dpavlin 41 warn "$reqbody\n" if ($self->{debug});
1036 dpavlin 40 print $sock $reqbody or
1037 dpavlin 37 carp "can't send request body to network:\n$$reqbody\n" and return -1;
1038 dpavlin 33 }
1039 dpavlin 2
1040 dpavlin 33 my $line = <$sock>;
1041     chomp($line);
1042     my ($schema, $res_status, undef) = split(/ */, $line, 3);
1043     return if ($schema !~ /^HTTP/ || ! $res_status);
1044 dpavlin 2
1045 dpavlin 40 $self->{status} = $res_status;
1046 dpavlin 39 warn "## response status: $res_status\n" if ($self->{debug});
1047 dpavlin 2
1048 dpavlin 33 # skip rest of headers
1049 dpavlin 38 $line = <$sock>;
1050     while ($line) {
1051 dpavlin 33 $line = <$sock>;
1052 dpavlin 38 $line =~ s/[\r\n]+$//;
1053 dpavlin 40 warn "## ", $line || 'NULL', " ##\n" if ($self->{debug});
1054 dpavlin 38 };
1055 dpavlin 2
1056 dpavlin 33 # read body
1057 dpavlin 38 $len = 0;
1058 dpavlin 33 do {
1059     $len = read($sock, my $buf, 8192);
1060     $$resbody .= $buf if ($resbody);
1061     } while ($len);
1062    
1063 dpavlin 40 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1064 dpavlin 39
1065 dpavlin 40 return $self->{status};
1066 dpavlin 2 }
1067    
1068     ###
1069    
1070     =head1 EXPORT
1071    
1072     Nothing.
1073    
1074     =head1 SEE ALSO
1075    
1076     L<http://hyperestraier.sourceforge.net/>
1077    
1078     Hyper Estraier Ruby interface on which this module is based.
1079    
1080     =head1 AUTHOR
1081    
1082     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
1083    
1084    
1085     =head1 COPYRIGHT AND LICENSE
1086    
1087 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
1088 dpavlin 2
1089     This library is free software; you can redistribute it and/or modify
1090     it under the GPL v2 or later.
1091    
1092     =cut
1093    
1094     1;

  ViewVC Help
Powered by ViewVC 1.1.26