/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 65 - (hide annotations)
Sat Jan 7 23:48:16 2006 UTC (18 years, 2 months ago) by dpavlin
Original Path: trunk/Estraier.pm
File size: 27415 byte(s)
add optional node parametar to new Search::Estraier::Node
1 dpavlin 2 package Search::Estraier;
2    
3     use 5.008;
4     use strict;
5     use warnings;
6    
7 dpavlin 65 our $VERSION = '0.02';
8 dpavlin 2
9     =head1 NAME
10    
11     Search::Estraier - pure perl module to use Hyper Estraier search engine
12    
13     =head1 SYNOPSIS
14    
15     use Search::Estraier;
16     my $est = new Search::Estraier();
17    
18     =head1 DESCRIPTION
19    
20     This module is implementation of node API of Hyper Estraier. Since it's
21     perl-only module with dependencies only on standard perl modules, it will
22     run on all platforms on which perl runs. It doesn't require compilation
23     or Hyper Estraier development files on target machine.
24    
25     It is implemented as multiple packages which closly resamble Ruby
26     implementation. It also includes methods to manage nodes.
27    
28     =cut
29    
30 dpavlin 42 =head1 Inheritable common methods
31    
32     This methods should really move somewhere else.
33    
34 dpavlin 15 =head2 _s
35    
36     Remove multiple whitespaces from string, as well as whitespaces at beginning or end
37    
38     my $text = $self->_s(" this is a text ");
39     $text = 'this is a text';
40    
41     =cut
42    
43     sub _s {
44     my $text = $_[1] || return;
45     $text =~ s/\s\s+/ /gs;
46     $text =~ s/^\s+//;
47     $text =~ s/\s+$//;
48     return $text;
49     }
50    
51 dpavlin 2 package Search::Estraier::Document;
52    
53 dpavlin 9 use Carp qw/croak confess/;
54 dpavlin 7
55 dpavlin 15 use Search::Estraier;
56     our @ISA = qw/Search::Estraier/;
57    
58 dpavlin 2 =head1 Search::Estraier::Document
59    
60 dpavlin 14 This class implements Document which is collection of attributes
61     (key=value), vectors (also key value) display text and hidden text.
62    
63 dpavlin 42
64 dpavlin 2 =head2 new
65    
66 dpavlin 14 Create new document, empty or from draft.
67    
68 dpavlin 2 my $doc = new Search::HyperEstraier::Document;
69 dpavlin 14 my $doc2 = new Search::HyperEstraier::Document( $draft );
70 dpavlin 2
71     =cut
72    
73     sub new {
74     my $class = shift;
75 dpavlin 14 my $self = {};
76 dpavlin 2 bless($self, $class);
77    
78 dpavlin 6 $self->{id} = -1;
79    
80 dpavlin 14 my $draft = shift;
81    
82     if ($draft) {
83     my $in_text = 0;
84     foreach my $line (split(/\n/, $draft)) {
85    
86     if ($in_text) {
87     if ($line =~ /^\t/) {
88     push @{ $self->{htexts} }, substr($line, 1);
89     } else {
90     push @{ $self->{dtexts} }, $line;
91     }
92     next;
93     }
94    
95     if ($line =~ m/^%VECTOR\t(.+)$/) {
96     my @fields = split(/\t/, $1);
97     for my $i ( 0 .. ($#fields - 1) ) {
98     $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ];
99     $i++;
100     }
101     next;
102     } elsif ($line =~ m/^%/) {
103     # What is this? comment?
104     #warn "$line\n";
105     next;
106     } elsif ($line =~ m/^$/) {
107     $in_text = 1;
108     next;
109     } elsif ($line =~ m/^(.+)=(.+)$/) {
110     $self->{attrs}->{ $1 } = $2;
111     next;
112     }
113    
114     warn "draft ignored: $line\n";
115     }
116     }
117    
118 dpavlin 2 $self ? return $self : return undef;
119     }
120    
121 dpavlin 4
122 dpavlin 2 =head2 add_attr
123    
124 dpavlin 6 Add an attribute.
125    
126 dpavlin 2 $doc->add_attr( name => 'value' );
127    
128 dpavlin 9 Delete attribute using
129 dpavlin 5
130     $doc->add_attr( name => undef );
131    
132 dpavlin 2 =cut
133    
134     sub add_attr {
135     my $self = shift;
136     my $attrs = {@_};
137    
138     while (my ($name, $value) = each %{ $attrs }) {
139 dpavlin 9 if (! defined($value)) {
140 dpavlin 15 delete( $self->{attrs}->{ $self->_s($name) } );
141 dpavlin 9 } else {
142 dpavlin 15 $self->{attrs}->{ $self->_s($name) } = $self->_s($value);
143 dpavlin 9 }
144 dpavlin 2 }
145 dpavlin 8
146     return 1;
147 dpavlin 2 }
148    
149 dpavlin 5
150     =head2 add_text
151    
152 dpavlin 6 Add a sentence of text.
153    
154 dpavlin 5 $doc->add_text('this is example text to display');
155    
156     =cut
157    
158     sub add_text {
159     my $self = shift;
160     my $text = shift;
161     return unless defined($text);
162    
163 dpavlin 15 push @{ $self->{dtexts} }, $self->_s($text);
164 dpavlin 5 }
165    
166    
167     =head2 add_hidden_text
168    
169 dpavlin 6 Add a hidden sentence.
170    
171 dpavlin 5 $doc->add_hidden_text('this is example text just for search');
172    
173     =cut
174    
175     sub add_hidden_text {
176     my $self = shift;
177     my $text = shift;
178     return unless defined($text);
179    
180 dpavlin 15 push @{ $self->{htexts} }, $self->_s($text);
181 dpavlin 5 }
182    
183 dpavlin 42
184 dpavlin 6 =head2 id
185    
186     Get the ID number of document. If the object has never been registred, C<-1> is returned.
187    
188     print $doc->id;
189    
190     =cut
191    
192     sub id {
193     my $self = shift;
194     return $self->{id};
195     }
196    
197 dpavlin 42
198 dpavlin 7 =head2 attr_names
199    
200 dpavlin 9 Returns array with attribute names from document object.
201 dpavlin 7
202     my @attrs = $doc->attr_names;
203    
204     =cut
205    
206     sub attr_names {
207     my $self = shift;
208 dpavlin 60 return unless ($self->{attrs});
209     #croak "attr_names return array, not scalar" if (! wantarray);
210 dpavlin 7 return sort keys %{ $self->{attrs} };
211     }
212    
213 dpavlin 8
214     =head2 attr
215    
216 dpavlin 9 Returns value of an attribute.
217 dpavlin 8
218     my $value = $doc->attr( 'attribute' );
219    
220     =cut
221    
222     sub attr {
223     my $self = shift;
224     my $name = shift;
225 dpavlin 60 return unless (defined($name) && $self->{attrs});
226     return $self->{attrs}->{ $name };
227 dpavlin 8 }
228    
229 dpavlin 9
230     =head2 texts
231    
232     Returns array with text sentences.
233    
234     my @texts = $doc->texts;
235    
236     =cut
237    
238     sub texts {
239     my $self = shift;
240 dpavlin 60 #confess "texts return array, not scalar" if (! wantarray);
241     return @{ $self->{dtexts} } if ($self->{dtexts});
242 dpavlin 9 }
243    
244 dpavlin 42
245 dpavlin 12 =head2 cat_texts
246    
247     Return whole text as single scalar.
248    
249     my $text = $doc->cat_texts;
250    
251     =cut
252    
253     sub cat_texts {
254     my $self = shift;
255 dpavlin 60 return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
256 dpavlin 12 }
257    
258 dpavlin 42
259 dpavlin 5 =head2 dump_draft
260    
261 dpavlin 13 Dump draft data from document object.
262    
263 dpavlin 5 print $doc->dump_draft;
264    
265     =cut
266    
267     sub dump_draft {
268 dpavlin 13 my $self = shift;
269     my $draft;
270    
271     foreach my $attr_name (sort keys %{ $self->{attrs} }) {
272     $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";
273     }
274    
275     if ($self->{kwords}) {
276     $draft .= '%%VECTOR';
277     while (my ($key, $value) = each %{ $self->{kwords} }) {
278     $draft .= "\t$key\t$value";
279     }
280     $draft .= "\n";
281     }
282    
283     $draft .= "\n";
284    
285 dpavlin 40 $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
286     $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
287 dpavlin 13
288     return $draft;
289 dpavlin 5 }
290    
291 dpavlin 42
292 dpavlin 4 =head2 delete
293 dpavlin 2
294 dpavlin 4 Empty document object
295 dpavlin 2
296 dpavlin 4 $doc->delete;
297    
298 dpavlin 15 This function is addition to original Ruby API, and since it was included in C wrappers it's here as a
299     convinience. Document objects which go out of scope will be destroyed
300     automatically.
301    
302 dpavlin 4 =cut
303    
304     sub delete {
305     my $self = shift;
306    
307 dpavlin 14 foreach my $data (qw/attrs dtexts stexts kwords/) {
308 dpavlin 5 delete($self->{$data});
309     }
310 dpavlin 4
311 dpavlin 10 $self->{id} = -1;
312    
313 dpavlin 4 return 1;
314     }
315    
316    
317    
318 dpavlin 15 package Search::Estraier::Condition;
319 dpavlin 4
320 dpavlin 16 use Carp qw/confess croak/;
321    
322 dpavlin 15 use Search::Estraier;
323     our @ISA = qw/Search::Estraier/;
324 dpavlin 4
325 dpavlin 16 =head1 Search::Estraier::Condition
326    
327     =head2 new
328    
329     my $cond = new Search::HyperEstraier::Condition;
330    
331     =cut
332    
333     sub new {
334     my $class = shift;
335     my $self = {};
336     bless($self, $class);
337    
338 dpavlin 19 $self->{max} = -1;
339     $self->{options} = 0;
340    
341 dpavlin 16 $self ? return $self : return undef;
342     }
343    
344 dpavlin 42
345 dpavlin 16 =head2 set_phrase
346    
347     $cond->set_phrase('search phrase');
348    
349     =cut
350    
351     sub set_phrase {
352     my $self = shift;
353     $self->{phrase} = $self->_s( shift );
354     }
355    
356 dpavlin 42
357 dpavlin 16 =head2 add_attr
358    
359     $cond->add_attr('@URI STRINC /~dpavlin/');
360    
361     =cut
362    
363     sub add_attr {
364     my $self = shift;
365     my $attr = shift || return;
366     push @{ $self->{attrs} }, $self->_s( $attr );
367     }
368    
369 dpavlin 42
370 dpavlin 16 =head2 set_order
371    
372     $cond->set_order('@mdate NUMD');
373    
374     =cut
375    
376     sub set_order {
377     my $self = shift;
378     $self->{order} = shift;
379     }
380    
381 dpavlin 42
382 dpavlin 16 =head2 set_max
383    
384     $cond->set_max(42);
385    
386     =cut
387    
388     sub set_max {
389     my $self = shift;
390     my $max = shift;
391 dpavlin 43 croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
392 dpavlin 16 $self->{max} = $max;
393     }
394    
395 dpavlin 42
396 dpavlin 16 =head2 set_options
397    
398     $cond->set_options( SURE => 1 );
399    
400     =cut
401    
402 dpavlin 15 my $options = {
403     # check N-gram keys skipping by three
404     SURE => 1 << 0,
405     # check N-gram keys skipping by two
406     USUAL => 1 << 1,
407     # without TF-IDF tuning
408     FAST => 1 << 2,
409     # with the simplified phrase
410     AGITO => 1 << 3,
411     # check every N-gram key
412     NOIDF => 1 << 4,
413     # check N-gram keys skipping by one
414     SIMPLE => 1 << 10,
415     };
416    
417 dpavlin 16 sub set_options {
418     my $self = shift;
419     my $option = shift;
420     confess "unknown option" unless ($options->{$option});
421     $self->{options} ||= $options->{$option};
422 dpavlin 4 }
423    
424 dpavlin 42
425 dpavlin 18 =head2 phrase
426    
427     Return search phrase.
428    
429     print $cond->phrase;
430    
431     =cut
432    
433     sub phrase {
434     my $self = shift;
435     return $self->{phrase};
436     }
437    
438 dpavlin 42
439 dpavlin 19 =head2 order
440 dpavlin 18
441 dpavlin 19 Return search result order.
442    
443     print $cond->order;
444    
445     =cut
446    
447     sub order {
448     my $self = shift;
449     return $self->{order};
450     }
451    
452 dpavlin 42
453 dpavlin 19 =head2 attrs
454    
455     Return search result attrs.
456    
457     my @cond_attrs = $cond->attrs;
458    
459     =cut
460    
461     sub attrs {
462     my $self = shift;
463     #croak "attrs return array, not scalar" if (! wantarray);
464 dpavlin 60 return @{ $self->{attrs} } if ($self->{attrs});
465 dpavlin 19 }
466    
467 dpavlin 42
468 dpavlin 19 =head2 max
469    
470     Return maximum number of results.
471    
472     print $cond->max;
473    
474     C<-1> is returned for unitialized value, C<0> is unlimited.
475    
476     =cut
477    
478     sub max {
479     my $self = shift;
480     return $self->{max};
481     }
482    
483 dpavlin 42
484 dpavlin 19 =head2 options
485    
486     Return options for this condition.
487    
488     print $cond->options;
489    
490     Options are returned in numerical form.
491    
492     =cut
493    
494     sub options {
495     my $self = shift;
496     return $self->{options};
497     }
498    
499    
500 dpavlin 20 package Search::Estraier::ResultDocument;
501    
502 dpavlin 24 use Carp qw/croak/;
503 dpavlin 20
504 dpavlin 24 #use Search::Estraier;
505     #our @ISA = qw/Search::Estraier/;
506 dpavlin 20
507     =head1 Search::Estraier::ResultDocument
508    
509     =head2 new
510    
511 dpavlin 23 my $rdoc = new Search::HyperEstraier::ResultDocument(
512 dpavlin 20 uri => 'http://localhost/document/uri/42',
513     attrs => {
514     foo => 1,
515     bar => 2,
516     },
517     snippet => 'this is a text of snippet'
518     keywords => 'this\tare\tkeywords'
519     );
520    
521     =cut
522    
523     sub new {
524     my $class = shift;
525     my $self = {@_};
526     bless($self, $class);
527    
528 dpavlin 62 croak "missing uri for ResultDocument" unless defined($self->{uri});
529 dpavlin 20
530     $self ? return $self : return undef;
531     }
532    
533 dpavlin 42
534 dpavlin 23 =head2 uri
535 dpavlin 20
536 dpavlin 23 Return URI of result document
537 dpavlin 20
538 dpavlin 23 print $rdoc->uri;
539    
540     =cut
541    
542     sub uri {
543     my $self = shift;
544     return $self->{uri};
545     }
546    
547    
548     =head2 attr_names
549    
550     Returns array with attribute names from result document object.
551    
552     my @attrs = $rdoc->attr_names;
553    
554     =cut
555    
556     sub attr_names {
557     my $self = shift;
558     croak "attr_names return array, not scalar" if (! wantarray);
559     return sort keys %{ $self->{attrs} };
560     }
561    
562 dpavlin 42
563 dpavlin 23 =head2 attr
564    
565     Returns value of an attribute.
566    
567     my $value = $rdoc->attr( 'attribute' );
568    
569     =cut
570    
571     sub attr {
572     my $self = shift;
573     my $name = shift || return;
574     return $self->{attrs}->{ $name };
575     }
576    
577 dpavlin 42
578 dpavlin 23 =head2 snippet
579    
580     Return snippet from result document
581    
582     print $rdoc->snippet;
583    
584     =cut
585    
586     sub snippet {
587     my $self = shift;
588     return $self->{snippet};
589     }
590    
591 dpavlin 42
592 dpavlin 23 =head2 keywords
593    
594     Return keywords from result document
595    
596     print $rdoc->keywords;
597    
598     =cut
599    
600     sub keywords {
601     my $self = shift;
602     return $self->{keywords};
603     }
604    
605    
606 dpavlin 25 package Search::Estraier::NodeResult;
607    
608     use Carp qw/croak/;
609    
610     #use Search::Estraier;
611     #our @ISA = qw/Search::Estraier/;
612    
613     =head1 Search::Estraier::NodeResult
614    
615     =head2 new
616    
617     my $res = new Search::HyperEstraier::NodeResult(
618     docs => @array_of_rdocs,
619     hits => %hash_with_hints,
620     );
621    
622     =cut
623    
624     sub new {
625     my $class = shift;
626     my $self = {@_};
627     bless($self, $class);
628    
629     foreach my $f (qw/docs hints/) {
630     croak "missing $f for ResultDocument" unless defined($self->{$f});
631     }
632    
633     $self ? return $self : return undef;
634     }
635    
636 dpavlin 42
637 dpavlin 25 =head2 doc_num
638    
639     Return number of documents
640    
641     print $res->doc_num;
642    
643     =cut
644    
645     sub doc_num {
646     my $self = shift;
647 dpavlin 53 return $#{$self->{docs}} + 1;
648 dpavlin 25 }
649    
650 dpavlin 42
651 dpavlin 25 =head2 get_doc
652    
653     Return single document
654    
655     my $doc = $res->get_doc( 42 );
656    
657     Returns undef if document doesn't exist.
658    
659     =cut
660    
661     sub get_doc {
662     my $self = shift;
663     my $num = shift;
664 dpavlin 43 croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
665 dpavlin 25 return undef if ($num < 0 || $num > $self->{docs});
666     return $self->{docs}->[$num];
667     }
668    
669 dpavlin 42
670 dpavlin 25 =head2 hint
671    
672     Return specific hint from results.
673    
674     print $rec->hint( 'VERSION' );
675    
676     Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
677     C<TIME>, C<LINK#n>, C<VIEW>.
678    
679     =cut
680    
681     sub hint {
682     my $self = shift;
683     my $key = shift || return;
684     return $self->{hints}->{$key};
685     }
686    
687    
688 dpavlin 27 package Search::Estraier::Node;
689    
690 dpavlin 44 use Carp qw/carp croak confess/;
691 dpavlin 33 use URI;
692 dpavlin 36 use MIME::Base64;
693 dpavlin 33 use IO::Socket::INET;
694 dpavlin 49 use URI::Escape qw/uri_escape/;
695 dpavlin 29
696 dpavlin 27 =head1 Search::Estraier::Node
697    
698     =head2 new
699    
700     my $node = new Search::HyperEstraier::Node;
701    
702 dpavlin 65 or optionally with C<url> as parametar
703    
704     my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
705    
706 dpavlin 27 =cut
707    
708     sub new {
709     my $class = shift;
710     my $self = {
711     pxport => -1,
712 dpavlin 33 timeout => 0, # this used to be -1
713 dpavlin 27 dnum => -1,
714     wnum => -1,
715     size => -1.0,
716     wwidth => 480,
717     hwidth => 96,
718     awidth => 96,
719     status => -1,
720     };
721     bless($self, $class);
722    
723 dpavlin 65 if ($#_ == 0) {
724     $self->{url} = shift;
725     } else {
726     my $args = {@_};
727 dpavlin 39
728 dpavlin 65 $self->{debug} = $args->{debug};
729     warn "## Node debug on\n" if ($self->{debug});
730     }
731 dpavlin 57
732 dpavlin 27 $self ? return $self : return undef;
733     }
734    
735 dpavlin 42
736 dpavlin 29 =head2 set_url
737    
738     Specify URL to node server
739    
740     $node->set_url('http://localhost:1978');
741    
742     =cut
743    
744     sub set_url {
745     my $self = shift;
746     $self->{url} = shift;
747     }
748    
749 dpavlin 42
750 dpavlin 29 =head2 set_proxy
751    
752     Specify proxy server to connect to node server
753    
754     $node->set_proxy('proxy.example.com', 8080);
755    
756     =cut
757    
758     sub set_proxy {
759     my $self = shift;
760     my ($host,$port) = @_;
761 dpavlin 43 croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
762 dpavlin 29 $self->{pxhost} = $host;
763     $self->{pxport} = $port;
764     }
765    
766 dpavlin 42
767 dpavlin 30 =head2 set_timeout
768    
769     Specify timeout of connection in seconds
770    
771     $node->set_timeout( 15 );
772    
773     =cut
774    
775     sub set_timeout {
776     my $self = shift;
777     my $sec = shift;
778 dpavlin 43 croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
779 dpavlin 30 $self->{timeout} = $sec;
780     }
781    
782 dpavlin 42
783 dpavlin 31 =head2 set_auth
784    
785     Specify name and password for authentication to node server.
786    
787     $node->set_auth('clint','eastwood');
788    
789     =cut
790    
791     sub set_auth {
792     my $self = shift;
793     my ($login,$passwd) = @_;
794 dpavlin 40 my $basic_auth = encode_base64( "$login:$passwd" );
795     chomp($basic_auth);
796     $self->{auth} = $basic_auth;
797 dpavlin 31 }
798    
799 dpavlin 42
800 dpavlin 32 =head2 status
801    
802     Return status code of last request.
803    
804 dpavlin 40 print $node->status;
805 dpavlin 32
806     C<-1> means connection failure.
807    
808     =cut
809    
810     sub status {
811     my $self = shift;
812     return $self->{status};
813     }
814    
815 dpavlin 42
816 dpavlin 40 =head2 put_doc
817    
818 dpavlin 41 Add a document
819 dpavlin 40
820 dpavlin 41 $node->put_doc( $document_draft ) or die "can't add document";
821    
822     Return true on success or false on failture.
823    
824 dpavlin 40 =cut
825    
826     sub put_doc {
827     my $self = shift;
828     my $doc = shift || return;
829 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
830 dpavlin 41 $self->shuttle_url( $self->{url} . '/put_doc',
831     'text/x-estraier-draft',
832     $doc->dump_draft,
833     undef
834     ) == 200;
835 dpavlin 40 }
836    
837 dpavlin 41
838     =head2 out_doc
839    
840     Remove a document
841    
842     $node->out_doc( document_id ) or "can't remove document";
843    
844     Return true on success or false on failture.
845    
846     =cut
847    
848     sub out_doc {
849     my $self = shift;
850     my $id = shift || return;
851     return unless ($self->{url});
852 dpavlin 43 croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
853 dpavlin 41 $self->shuttle_url( $self->{url} . '/out_doc',
854     'application/x-www-form-urlencoded',
855     "id=$id",
856     undef
857     ) == 200;
858     }
859    
860    
861     =head2 out_doc_by_uri
862    
863     Remove a registrated document using it's uri
864    
865 dpavlin 45 $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
866 dpavlin 41
867     Return true on success or false on failture.
868    
869     =cut
870    
871     sub out_doc_by_uri {
872     my $self = shift;
873     my $uri = shift || return;
874     return unless ($self->{url});
875     $self->shuttle_url( $self->{url} . '/out_doc',
876     'application/x-www-form-urlencoded',
877 dpavlin 50 "uri=" . uri_escape($uri),
878 dpavlin 41 undef
879     ) == 200;
880     }
881    
882 dpavlin 42
883     =head2 edit_doc
884    
885     Edit attributes of a document
886    
887     $node->edit_doc( $document_draft ) or die "can't edit document";
888    
889     Return true on success or false on failture.
890    
891     =cut
892    
893     sub edit_doc {
894     my $self = shift;
895     my $doc = shift || return;
896 dpavlin 47 return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
897 dpavlin 42 $self->shuttle_url( $self->{url} . '/edit_doc',
898     'text/x-estraier-draft',
899     $doc->dump_draft,
900     undef
901     ) == 200;
902     }
903    
904    
905 dpavlin 43 =head2 get_doc
906    
907     Retreive document
908    
909     my $doc = $node->get_doc( document_id ) or die "can't get document";
910    
911     Return true on success or false on failture.
912    
913     =cut
914    
915     sub get_doc {
916     my $self = shift;
917     my $id = shift || return;
918     return $self->_fetch_doc( id => $id );
919     }
920    
921 dpavlin 44
922 dpavlin 43 =head2 get_doc_by_uri
923    
924     Retreive document
925    
926 dpavlin 45 my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
927 dpavlin 43
928     Return true on success or false on failture.
929    
930     =cut
931    
932     sub get_doc_by_uri {
933     my $self = shift;
934     my $uri = shift || return;
935     return $self->_fetch_doc( uri => $uri );
936     }
937    
938 dpavlin 44
939 dpavlin 49 =head2 get_doc_attr
940    
941     Retrieve the value of an atribute from object
942    
943     my $val = $node->get_doc_attr( document_id, 'attribute_name' ) or
944     die "can't get document attribute";
945    
946     =cut
947    
948     sub get_doc_attr {
949     my $self = shift;
950     my ($id,$name) = @_;
951     return unless ($id && $name);
952     return $self->_fetch_doc( id => $id, attr => $name );
953     }
954    
955    
956     =head2 get_doc_attr_by_uri
957    
958     Retrieve the value of an atribute from object
959    
960     my $val = $node->get_doc_attr_by_uri( document_id, 'attribute_name' ) or
961     die "can't get document attribute";
962    
963     =cut
964    
965     sub get_doc_attr_by_uri {
966     my $self = shift;
967     my ($uri,$name) = @_;
968     return unless ($uri && $name);
969     return $self->_fetch_doc( uri => $uri, attr => $name );
970     }
971    
972    
973 dpavlin 44 =head2 etch_doc
974    
975     Exctract document keywords
976    
977     my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
978    
979     =cut
980    
981 dpavlin 49 sub etch_doc {
982 dpavlin 44 my $self = shift;
983     my $id = shift || return;
984     return $self->_fetch_doc( id => $id, etch => 1 );
985     }
986    
987     =head2 etch_doc_by_uri
988    
989     Retreive document
990    
991 dpavlin 45 my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
992 dpavlin 44
993     Return true on success or false on failture.
994    
995     =cut
996    
997     sub etch_doc_by_uri {
998     my $self = shift;
999     my $uri = shift || return;
1000     return $self->_fetch_doc( uri => $uri, etch => 1 );
1001     }
1002    
1003    
1004 dpavlin 45 =head2 uri_to_id
1005    
1006     Get ID of document specified by URI
1007    
1008     my $id = $node->uri_to_id( 'file:///document/uri/42' );
1009    
1010     =cut
1011    
1012     sub uri_to_id {
1013     my $self = shift;
1014     my $uri = shift || return;
1015     return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1 );
1016     }
1017    
1018    
1019 dpavlin 43 =head2 _fetch_doc
1020    
1021 dpavlin 44 Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
1022     C<etch_doc>, C<etch_doc_by_uri>.
1023 dpavlin 43
1024 dpavlin 45 # this will decode received draft into Search::Estraier::Document object
1025     my $doc = $node->_fetch_doc( id => 42 );
1026     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
1027 dpavlin 43
1028 dpavlin 45 # to extract keywords, add etch
1029     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
1030     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
1031    
1032 dpavlin 49 # to get document attrubute add attr
1033     my $doc = $node->_fetch_doc( id => 42, attr => '@mdate' );
1034     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', attr => '@mdate' );
1035    
1036 dpavlin 45 # more general form which allows implementation of
1037     # uri_to_id
1038     my $id = $node->_fetch_doc(
1039     uri => 'file:///document/uri/42',
1040     path => '/uri_to_id',
1041     chomp_resbody => 1
1042     );
1043    
1044 dpavlin 43 =cut
1045    
1046     sub _fetch_doc {
1047     my $self = shift;
1048 dpavlin 44 my $a = {@_};
1049     return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1050    
1051     my ($arg, $resbody);
1052    
1053 dpavlin 45 my $path = $a->{path} || '/get_doc';
1054 dpavlin 44 $path = '/etch_doc' if ($a->{etch});
1055    
1056     if ($a->{id}) {
1057     croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1058     $arg = 'id=' . $a->{id};
1059     } elsif ($a->{uri}) {
1060 dpavlin 50 $arg = 'uri=' . uri_escape($a->{uri});
1061 dpavlin 44 } else {
1062     confess "unhandled argument. Need id or uri.";
1063 dpavlin 43 }
1064 dpavlin 44
1065 dpavlin 49 if ($a->{attr}) {
1066     $path = '/get_doc_attr';
1067     $arg .= '&attr=' . uri_escape($a->{attr});
1068     $a->{chomp_resbody} = 1;
1069     }
1070    
1071 dpavlin 44 my $rv = $self->shuttle_url( $self->{url} . $path,
1072 dpavlin 43 'application/x-www-form-urlencoded',
1073 dpavlin 44 $arg,
1074 dpavlin 45 \$resbody,
1075 dpavlin 43 );
1076 dpavlin 44
1077 dpavlin 43 return if ($rv != 200);
1078 dpavlin 44
1079     if ($a->{etch}) {
1080     $self->{kwords} = {};
1081     return +{} unless ($resbody);
1082     foreach my $l (split(/\n/, $resbody)) {
1083     my ($k,$v) = split(/\t/, $l, 2);
1084     $self->{kwords}->{$k} = $v if ($v);
1085     }
1086     return $self->{kwords};
1087 dpavlin 45 } elsif ($a->{chomp_resbody}) {
1088     return unless (defined($resbody));
1089     chomp($resbody);
1090     return $resbody;
1091 dpavlin 44 } else {
1092     return new Search::Estraier::Document($resbody);
1093     }
1094 dpavlin 43 }
1095    
1096    
1097 dpavlin 48 =head2 name
1098 dpavlin 43
1099 dpavlin 48 my $node_name = $node->name;
1100 dpavlin 43
1101 dpavlin 48 =cut
1102    
1103     sub name {
1104     my $self = shift;
1105 dpavlin 55 $self->_set_info unless ($self->{name});
1106 dpavlin 48 return $self->{name};
1107     }
1108    
1109    
1110     =head2 label
1111    
1112     my $node_label = $node->label;
1113    
1114     =cut
1115    
1116     sub label {
1117     my $self = shift;
1118 dpavlin 55 $self->_set_info unless ($self->{label});
1119 dpavlin 48 return $self->{label};
1120     }
1121    
1122    
1123     =head2 doc_num
1124    
1125     my $documents_in_node = $node->doc_num;
1126    
1127     =cut
1128    
1129     sub doc_num {
1130     my $self = shift;
1131 dpavlin 55 $self->_set_info if ($self->{dnum} < 0);
1132 dpavlin 48 return $self->{dnum};
1133     }
1134    
1135    
1136     =head2 word_num
1137    
1138     my $words_in_node = $node->word_num;
1139    
1140     =cut
1141    
1142     sub word_num {
1143     my $self = shift;
1144 dpavlin 55 $self->_set_info if ($self->{wnum} < 0);
1145 dpavlin 48 return $self->{wnum};
1146     }
1147    
1148    
1149     =head2 size
1150    
1151     my $node_size = $node->size;
1152    
1153     =cut
1154    
1155     sub size {
1156     my $self = shift;
1157 dpavlin 55 $self->_set_info if ($self->{size} < 0);
1158 dpavlin 48 return $self->{size};
1159     }
1160    
1161    
1162 dpavlin 51 =head2 search
1163 dpavlin 48
1164 dpavlin 51 Search documents which match condition
1165    
1166     my $nres = $node->search( $cond, $depth );
1167    
1168     C<$cond> is C<Search::Estraier::Condition> object, while <$depth> specifies
1169     depth for meta search.
1170    
1171     Function results C<Search::Estraier::NodeResult> object.
1172    
1173     =cut
1174    
1175     sub search {
1176     my $self = shift;
1177     my ($cond, $depth) = @_;
1178     return unless ($cond && defined($depth) && $self->{url});
1179     croak "cond mush be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1180     croak "depth needs number, not '$depth'" unless ($depth =~ m/^\d+$/);
1181    
1182 dpavlin 52 my $resbody;
1183 dpavlin 51
1184 dpavlin 52 my $rv = $self->shuttle_url( $self->{url} . '/search',
1185 dpavlin 53 'application/x-www-form-urlencoded',
1186 dpavlin 61 $self->cond_to_query( $cond, $depth ),
1187 dpavlin 52 \$resbody,
1188     );
1189     return if ($rv != 200);
1190    
1191     my (@docs, $hints);
1192    
1193     my @lines = split(/\n/, $resbody);
1194     return unless (@lines);
1195    
1196     my $border = $lines[0];
1197     my $isend = 0;
1198     my $lnum = 1;
1199    
1200     while ( $lnum <= $#lines ) {
1201     my $line = $lines[$lnum];
1202     $lnum++;
1203    
1204     #warn "## $line\n";
1205     if ($line && $line =~ m/^\Q$border\E(:END)*$/) {
1206     $isend = $1;
1207     last;
1208     }
1209    
1210     if ($line =~ /\t/) {
1211     my ($k,$v) = split(/\t/, $line, 2);
1212     $hints->{$k} = $v;
1213     }
1214     }
1215    
1216     my $snum = $lnum;
1217    
1218     while( ! $isend && $lnum <= $#lines ) {
1219     my $line = $lines[$lnum];
1220 dpavlin 53 #warn "# $lnum: $line\n";
1221 dpavlin 52 $lnum++;
1222    
1223     if ($line && $line =~ m/^\Q$border\E/) {
1224     if ($lnum > $snum) {
1225     my $rdattrs;
1226     my $rdvector;
1227     my $rdsnippet;
1228    
1229     my $rlnum = $snum;
1230     while ($rlnum < $lnum - 1 ) {
1231     #my $rdline = $self->_s($lines[$rlnum]);
1232     my $rdline = $lines[$rlnum];
1233     $rlnum++;
1234     last unless ($rdline);
1235     if ($rdline =~ /^%/) {
1236     $rdvector = $1 if ($rdline =~ /^%VECTOR\t(.+)$/);
1237 dpavlin 53 } elsif($rdline =~ /=/) {
1238     $rdattrs->{$1} = $2 if ($rdline =~ /^(.+)=(.+)$/);
1239 dpavlin 52 } else {
1240 dpavlin 53 confess "invalid format of response";
1241 dpavlin 52 }
1242     }
1243     while($rlnum < $lnum - 1) {
1244     my $rdline = $lines[$rlnum];
1245     $rlnum++;
1246     $rdsnippet .= "$rdline\n";
1247     }
1248 dpavlin 53 #warn Dumper($rdvector, $rdattrs, $rdsnippet);
1249 dpavlin 52 if (my $rduri = $rdattrs->{'@uri'}) {
1250     push @docs, new Search::Estraier::ResultDocument(
1251     uri => $rduri,
1252     attrs => $rdattrs,
1253     snippet => $rdsnippet,
1254     keywords => $rdvector,
1255     );
1256     }
1257     }
1258     $snum = $lnum;
1259     #warn "### $line\n";
1260     $isend = 1 if ($line =~ /:END$/);
1261     }
1262    
1263     }
1264    
1265     if (! $isend) {
1266     warn "received result doesn't have :END\n$resbody";
1267     return;
1268     }
1269    
1270 dpavlin 53 #warn Dumper(\@docs, $hints);
1271    
1272 dpavlin 52 return new Search::Estraier::NodeResult( docs => \@docs, hints => $hints );
1273 dpavlin 51 }
1274    
1275    
1276     =head2 cond_to_query
1277    
1278 dpavlin 55 Return URI encoded string generated from Search::Estraier::Condition
1279    
1280 dpavlin 61 my $args = $node->cond_to_query( $cond, $depth );
1281 dpavlin 51
1282     =cut
1283    
1284     sub cond_to_query {
1285     my $self = shift;
1286    
1287     my $cond = shift || return;
1288     croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1289 dpavlin 61 my $depth = shift;
1290 dpavlin 51
1291     my @args;
1292    
1293     if (my $phrase = $cond->phrase) {
1294     push @args, 'phrase=' . uri_escape($phrase);
1295     }
1296    
1297     if (my @attrs = $cond->attrs) {
1298     for my $i ( 0 .. $#attrs ) {
1299 dpavlin 63 push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1300 dpavlin 51 }
1301     }
1302    
1303     if (my $order = $cond->order) {
1304     push @args, 'order=' . uri_escape($order);
1305     }
1306    
1307     if (my $max = $cond->max) {
1308     push @args, 'max=' . $max;
1309     } else {
1310     push @args, 'max=' . (1 << 30);
1311     }
1312    
1313     if (my $options = $cond->options) {
1314     push @args, 'options=' . $options;
1315     }
1316    
1317 dpavlin 61 push @args, 'depth=' . $depth if ($depth);
1318 dpavlin 51 push @args, 'wwidth=' . $self->{wwidth};
1319     push @args, 'hwidth=' . $self->{hwidth};
1320     push @args, 'awidth=' . $self->{awidth};
1321    
1322     return join('&', @args);
1323     }
1324    
1325    
1326 dpavlin 33 =head2 shuttle_url
1327 dpavlin 32
1328 dpavlin 33 This is method which uses C<IO::Socket::INET> to communicate with Hyper Estraier node
1329     master.
1330 dpavlin 2
1331 dpavlin 52 my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
1332 dpavlin 2
1333 dpavlin 33 C<$resheads> and C<$resbody> booleans controll if response headers and/or response
1334     body will be saved within object.
1335 dpavlin 2
1336     =cut
1337    
1338 dpavlin 59 use LWP::UserAgent;
1339    
1340 dpavlin 33 sub shuttle_url {
1341     my $self = shift;
1342 dpavlin 2
1343 dpavlin 33 my ($url, $content_type, $reqbody, $resbody) = @_;
1344 dpavlin 2
1345 dpavlin 40 $self->{status} = -1;
1346 dpavlin 33
1347 dpavlin 41 warn "## $url\n" if ($self->{debug});
1348 dpavlin 36
1349 dpavlin 33 $url = new URI($url);
1350 dpavlin 37 if (
1351     !$url || !$url->scheme || !$url->scheme eq 'http' ||
1352     !$url->host || !$url->port || $url->port < 1
1353     ) {
1354     carp "can't parse $url\n";
1355     return -1;
1356     }
1357 dpavlin 33
1358 dpavlin 59 my $ua = LWP::UserAgent->new;
1359     $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
1360 dpavlin 33
1361 dpavlin 59 my $req;
1362 dpavlin 37 if ($reqbody) {
1363 dpavlin 59 $req = HTTP::Request->new(POST => $url);
1364 dpavlin 37 } else {
1365 dpavlin 59 $req = HTTP::Request->new(GET => $url);
1366 dpavlin 37 }
1367    
1368 dpavlin 59 $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1369     $req->headers->header( 'Connection', 'close' );
1370     $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} );
1371     $req->content_type( $content_type );
1372 dpavlin 37
1373 dpavlin 59 warn $req->headers->as_string,"\n" if ($self->{debug});
1374 dpavlin 2
1375 dpavlin 37 if ($reqbody) {
1376 dpavlin 41 warn "$reqbody\n" if ($self->{debug});
1377 dpavlin 59 $req->content( $reqbody );
1378 dpavlin 33 }
1379 dpavlin 2
1380 dpavlin 59 my $res = $ua->request($req) || croak "can't make request to $url: $!";
1381 dpavlin 2
1382 dpavlin 59 warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1383 dpavlin 2
1384 dpavlin 59 return -1 if (! $res->is_success);
1385 dpavlin 2
1386 dpavlin 59 ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1387 dpavlin 33
1388 dpavlin 59 $$resbody .= $res->content;
1389    
1390 dpavlin 40 warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1391 dpavlin 39
1392 dpavlin 40 return $self->{status};
1393 dpavlin 2 }
1394    
1395 dpavlin 48
1396 dpavlin 55 =head2 set_snippet_width
1397 dpavlin 48
1398 dpavlin 55 Set width of snippets in results
1399    
1400     $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1401    
1402     C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1403     is not sent with results. If it is negative, whole document text is sent instead of snippet.
1404    
1405     C<$hwidth> specified width of strings from beginning of string. Default
1406     value is C<96>. Negative or zero value keep previous value.
1407    
1408     C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1409     If negative of zero value is provided previous value is kept unchanged.
1410    
1411     =cut
1412    
1413     sub set_snippet_width {
1414     my $self = shift;
1415    
1416     my ($wwidth, $hwidth, $awidth) = @_;
1417     $self->{wwidth} = $wwidth;
1418     $self->{hwidth} = $hwidth if ($hwidth >= 0);
1419     $self->{awidth} = $awidth if ($awidth >= 0);
1420     }
1421    
1422    
1423 dpavlin 56 =head2 set_user
1424 dpavlin 55
1425 dpavlin 56 Manage users of node
1426    
1427     $node->set_user( 'name', $mode );
1428    
1429     C<$mode> can be one of:
1430    
1431     =over 4
1432    
1433     =item 0
1434    
1435     delete account
1436    
1437     =item 1
1438    
1439     set administrative right for user
1440    
1441     =item 2
1442    
1443     set user account as guest
1444    
1445     =back
1446    
1447     Return true on success, otherwise false.
1448    
1449     =cut
1450    
1451     sub set_user {
1452     my $self = shift;
1453     my ($name, $mode) = @_;
1454    
1455     return unless ($self->{url});
1456     croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1457    
1458     $self->shuttle_url( $self->{url} . '/_set_user',
1459     'text/plain',
1460     'name=' . uri_escape($name) . '&mode=' . $mode,
1461     undef
1462     ) == 200;
1463     }
1464    
1465    
1466 dpavlin 57 =head2 set_link
1467    
1468     Manage node links
1469    
1470     $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1471    
1472     If C<$credit> is negative, link is removed.
1473    
1474     =cut
1475    
1476     sub set_link {
1477     my $self = shift;
1478     my ($url, $label, $credit) = @_;
1479    
1480     return unless ($self->{url});
1481     croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1482    
1483     my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1484     $reqbody .= '&credit=' . $credit if ($credit > 0);
1485    
1486     $self->shuttle_url( $self->{url} . '/_set_link',
1487     'text/plain',
1488     $reqbody,
1489     undef
1490     ) == 200;
1491     }
1492    
1493    
1494 dpavlin 55 =head1 PRIVATE METHODS
1495    
1496     You could call those directly, but you don't have to. I hope.
1497    
1498     =head2 _set_info
1499    
1500 dpavlin 48 Set information for node
1501    
1502 dpavlin 55 $node->_set_info;
1503 dpavlin 48
1504     =cut
1505    
1506 dpavlin 55 sub _set_info {
1507 dpavlin 48 my $self = shift;
1508    
1509     $self->{status} = -1;
1510     return unless ($self->{url});
1511    
1512     my $resbody;
1513     my $rv = $self->shuttle_url( $self->{url} . '/inform',
1514     'text/plain',
1515     undef,
1516     \$resbody,
1517     );
1518    
1519     return if ($rv != 200 || !$resbody);
1520    
1521 dpavlin 58 # it seems that response can have multiple line endings
1522     $resbody =~ s/[\r\n]+$//;
1523 dpavlin 48
1524     ( $self->{name}, $self->{label}, $self->{dnum}, $self->{wnum}, $self->{size} ) =
1525     split(/\t/, $resbody, 5);
1526    
1527     }
1528    
1529 dpavlin 2 ###
1530    
1531     =head1 EXPORT
1532    
1533     Nothing.
1534    
1535     =head1 SEE ALSO
1536    
1537     L<http://hyperestraier.sourceforge.net/>
1538    
1539     Hyper Estraier Ruby interface on which this module is based.
1540    
1541     =head1 AUTHOR
1542    
1543     Dobrica Pavlinusic, E<lt>dpavlin@rot13.orgE<gt>
1544    
1545    
1546     =head1 COPYRIGHT AND LICENSE
1547    
1548 dpavlin 15 Copyright (C) 2005-2006 by Dobrica Pavlinusic
1549 dpavlin 2
1550     This library is free software; you can redistribute it and/or modify
1551     it under the GPL v2 or later.
1552    
1553     =cut
1554    
1555     1;

  ViewVC Help
Powered by ViewVC 1.1.26