/[Search-Estraier]/trunk/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 36 by dpavlin, Thu Jan 5 21:51:54 2006 UTC revision 45 by dpavlin, Fri Jan 6 01:36:09 2006 UTC
# Line 27  implementation. It also includes methods Line 27  implementation. It also includes methods
27    
28  =cut  =cut
29    
30    =head1 Inheritable common methods
31    
32    This methods should really move somewhere else.
33    
34  =head2 _s  =head2 _s
35    
36  Remove multiple whitespaces from string, as well as whitespaces at beginning or end  Remove multiple whitespaces from string, as well as whitespaces at beginning or end
# Line 56  our @ISA = qw/Search::Estraier/; Line 60  our @ISA = qw/Search::Estraier/;
60  This class implements Document which is collection of attributes  This class implements Document which is collection of attributes
61  (key=value), vectors (also key value) display text and hidden text.  (key=value), vectors (also key value) display text and hidden text.
62    
63    
64  =head2 new  =head2 new
65    
66  Create new document, empty or from draft.  Create new document, empty or from draft.
# Line 175  sub add_hidden_text { Line 180  sub add_hidden_text {
180          push @{ $self->{htexts} }, $self->_s($text);          push @{ $self->{htexts} }, $self->_s($text);
181  }  }
182    
183    
184  =head2 id  =head2 id
185    
186  Get the ID number of document. If the object has never been registred, C<-1> is returned.  Get the ID number of document. If the object has never been registred, C<-1> is returned.
# Line 188  sub id { Line 194  sub id {
194          return $self->{id};          return $self->{id};
195  }  }
196    
197    
198  =head2 attr_names  =head2 attr_names
199    
200  Returns array with attribute names from document object.  Returns array with attribute names from document object.
# Line 233  sub texts { Line 240  sub texts {
240          return @{ $self->{dtexts} };          return @{ $self->{dtexts} };
241  }  }
242    
243    
244  =head2 cat_texts  =head2 cat_texts
245    
246  Return whole text as single scalar.  Return whole text as single scalar.
# Line 246  sub cat_texts { Line 254  sub cat_texts {
254          return join(' ',@{ $self->{dtexts} });          return join(' ',@{ $self->{dtexts} });
255  }  }
256    
257    
258  =head2 dump_draft  =head2 dump_draft
259    
260  Dump draft data from document object.  Dump draft data from document object.
# Line 272  sub dump_draft { Line 281  sub dump_draft {
281    
282          $draft .= "\n";          $draft .= "\n";
283    
284          $draft .= join("\n", @{ $self->{dtexts} }) . "\n";          $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts});
285          $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n";          $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts});
286    
287          return $draft;          return $draft;
288  }  }
289    
290    
291  =head2 delete  =head2 delete
292    
293  Empty document object  Empty document object
# Line 330  sub new { Line 340  sub new {
340          $self ? return $self : return undef;          $self ? return $self : return undef;
341  }  }
342    
343    
344  =head2 set_phrase  =head2 set_phrase
345    
346    $cond->set_phrase('search phrase');    $cond->set_phrase('search phrase');
# Line 341  sub set_phrase { Line 352  sub set_phrase {
352          $self->{phrase} = $self->_s( shift );          $self->{phrase} = $self->_s( shift );
353  }  }
354    
355    
356  =head2 add_attr  =head2 add_attr
357    
358    $cond->add_attr('@URI STRINC /~dpavlin/');    $cond->add_attr('@URI STRINC /~dpavlin/');
# Line 353  sub add_attr { Line 365  sub add_attr {
365          push @{ $self->{attrs} }, $self->_s( $attr );          push @{ $self->{attrs} }, $self->_s( $attr );
366  }  }
367    
368    
369  =head2 set_order  =head2 set_order
370    
371    $cond->set_order('@mdate NUMD');    $cond->set_order('@mdate NUMD');
# Line 364  sub set_order { Line 377  sub set_order {
377          $self->{order} = shift;          $self->{order} = shift;
378  }  }
379    
380    
381  =head2 set_max  =head2 set_max
382    
383    $cond->set_max(42);    $cond->set_max(42);
# Line 373  sub set_order { Line 387  sub set_order {
387  sub set_max {  sub set_max {
388          my $self = shift;          my $self = shift;
389          my $max = shift;          my $max = shift;
390          croak "set_max needs number" unless ($max =~ m/^\d+$/);          croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
391          $self->{max} = $max;          $self->{max} = $max;
392  }  }
393    
394    
395  =head2 set_options  =head2 set_options
396    
397    $cond->set_options( SURE => 1 );    $cond->set_options( SURE => 1 );
# Line 405  sub set_options { Line 420  sub set_options {
420          $self->{options} ||= $options->{$option};          $self->{options} ||= $options->{$option};
421  }  }
422    
423    
424  =head2 phrase  =head2 phrase
425    
426  Return search phrase.  Return search phrase.
# Line 418  sub phrase { Line 434  sub phrase {
434          return $self->{phrase};          return $self->{phrase};
435  }  }
436    
437    
438  =head2 order  =head2 order
439    
440  Return search result order.  Return search result order.
# Line 431  sub order { Line 448  sub order {
448          return $self->{order};          return $self->{order};
449  }  }
450    
451    
452  =head2 attrs  =head2 attrs
453    
454  Return search result attrs.  Return search result attrs.
# Line 445  sub attrs { Line 463  sub attrs {
463          return @{ $self->{attrs} };          return @{ $self->{attrs} };
464  }  }
465    
466    
467  =head2 max  =head2 max
468    
469  Return maximum number of results.  Return maximum number of results.
# Line 460  sub max { Line 479  sub max {
479          return $self->{max};          return $self->{max};
480  }  }
481    
482    
483  =head2 options  =head2 options
484    
485  Return options for this condition.  Return options for this condition.
# Line 511  sub new { Line 531  sub new {
531          $self ? return $self : return undef;          $self ? return $self : return undef;
532  }  }
533    
534    
535  =head2 uri  =head2 uri
536    
537  Return URI of result document  Return URI of result document
# Line 539  sub attr_names { Line 560  sub attr_names {
560          return sort keys %{ $self->{attrs} };          return sort keys %{ $self->{attrs} };
561  }  }
562    
563    
564  =head2 attr  =head2 attr
565    
566  Returns value of an attribute.  Returns value of an attribute.
# Line 553  sub attr { Line 575  sub attr {
575          return $self->{attrs}->{ $name };          return $self->{attrs}->{ $name };
576  }  }
577    
578    
579  =head2 snippet  =head2 snippet
580    
581  Return snippet from result document  Return snippet from result document
# Line 566  sub snippet { Line 589  sub snippet {
589          return $self->{snippet};          return $self->{snippet};
590  }  }
591    
592    
593  =head2 keywords  =head2 keywords
594    
595  Return keywords from result document  Return keywords from result document
# Line 610  sub new { Line 634  sub new {
634          $self ? return $self : return undef;          $self ? return $self : return undef;
635  }  }
636    
637    
638  =head2 doc_num  =head2 doc_num
639    
640  Return number of documents  Return number of documents
# Line 623  sub doc_num { Line 648  sub doc_num {
648          return $#{$self->{docs}};          return $#{$self->{docs}};
649  }  }
650    
651    
652  =head2 get_doc  =head2 get_doc
653    
654  Return single document  Return single document
# Line 636  Returns undef if document doesn't exist. Line 662  Returns undef if document doesn't exist.
662  sub get_doc {  sub get_doc {
663          my $self = shift;          my $self = shift;
664          my $num = shift;          my $num = shift;
665          croak "expect number as argument" unless ($num =~ m/^\d+$/);          croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
666          return undef if ($num < 0 || $num > $self->{docs});          return undef if ($num < 0 || $num > $self->{docs});
667          return $self->{docs}->[$num];          return $self->{docs}->[$num];
668  }  }
669    
670    
671  =head2 hint  =head2 hint
672    
673  Return specific hint from results.  Return specific hint from results.
# Line 661  sub hint { Line 688  sub hint {
688    
689  package Search::Estraier::Node;  package Search::Estraier::Node;
690    
691  use Carp qw/croak/;  use Carp qw/carp croak confess/;
692  use URI;  use URI;
693  use MIME::Base64;  use MIME::Base64;
694  use IO::Socket::INET;  use IO::Socket::INET;
# Line 689  sub new { Line 716  sub new {
716          };          };
717          bless($self, $class);          bless($self, $class);
718    
719            if (@_) {
720                    $self->{debug} = shift;
721                    warn "## Node debug on\n";
722            }
723    
724          $self ? return $self : return undef;          $self ? return $self : return undef;
725  }  }
726    
727    
728  =head2 set_url  =head2 set_url
729    
730  Specify URL to node server  Specify URL to node server
# Line 705  sub set_url { Line 738  sub set_url {
738          $self->{url} = shift;          $self->{url} = shift;
739  }  }
740    
741    
742  =head2 set_proxy  =head2 set_proxy
743    
744  Specify proxy server to connect to node server  Specify proxy server to connect to node server
# Line 716  Specify proxy server to connect to node Line 750  Specify proxy server to connect to node
750  sub set_proxy {  sub set_proxy {
751          my $self = shift;          my $self = shift;
752          my ($host,$port) = @_;          my ($host,$port) = @_;
753          croak "proxy port must be number" unless ($port =~ m/^\d+$/);          croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
754          $self->{pxhost} = $host;          $self->{pxhost} = $host;
755          $self->{pxport} = $port;          $self->{pxport} = $port;
756  }  }
757    
758    
759  =head2 set_timeout  =head2 set_timeout
760    
761  Specify timeout of connection in seconds  Specify timeout of connection in seconds
# Line 732  Specify timeout of connection in seconds Line 767  Specify timeout of connection in seconds
767  sub set_timeout {  sub set_timeout {
768          my $self = shift;          my $self = shift;
769          my $sec = shift;          my $sec = shift;
770          croak "timeout must be number" unless ($sec =~ m/^\d+$/);          croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
771          $self->{timeout} = $sec;          $self->{timeout} = $sec;
772  }  }
773    
774    
775  =head2 set_auth  =head2 set_auth
776    
777  Specify name and password for authentication to node server.  Specify name and password for authentication to node server.
# Line 747  Specify name and password for authentica Line 783  Specify name and password for authentica
783  sub set_auth {  sub set_auth {
784          my $self = shift;          my $self = shift;
785          my ($login,$passwd) = @_;          my ($login,$passwd) = @_;
786          $self->{auth} = encode_base64( "$login:$passwd" );          my $basic_auth = encode_base64( "$login:$passwd" );
787            chomp($basic_auth);
788            $self->{auth} = $basic_auth;
789  }  }
790    
791    
792  =head2 status  =head2 status
793    
794  Return status code of last request.  Return status code of last request.
795    
796    print $res->status;    print $node->status;
797    
798  C<-1> means connection failure.  C<-1> means connection failure.
799    
# Line 765  sub status { Line 804  sub status {
804          return $self->{status};          return $self->{status};
805  }  }
806    
807    
808    =head2 put_doc
809    
810    Add a document
811    
812      $node->put_doc( $document_draft ) or die "can't add document";
813    
814    Return true on success or false on failture.
815    
816    =cut
817    
818    sub put_doc {
819            my $self = shift;
820            my $doc = shift || return;
821            return unless ($self->{url});
822            $self->shuttle_url( $self->{url} . '/put_doc',
823                    'text/x-estraier-draft',
824                    $doc->dump_draft,
825                    undef
826            ) == 200;
827    }
828    
829    
830    =head2 out_doc
831    
832    Remove a document
833    
834      $node->out_doc( document_id ) or "can't remove document";
835    
836    Return true on success or false on failture.
837    
838    =cut
839    
840    sub out_doc {
841            my $self = shift;
842            my $id = shift || return;
843            return unless ($self->{url});
844            croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
845            $self->shuttle_url( $self->{url} . '/out_doc',
846                    'application/x-www-form-urlencoded',
847                    "id=$id",
848                    undef
849            ) == 200;
850    }
851    
852    
853    =head2 out_doc_by_uri
854    
855    Remove a registrated document using it's uri
856    
857      $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
858    
859    Return true on success or false on failture.
860    
861    =cut
862    
863    sub out_doc_by_uri {
864            my $self = shift;
865            my $uri = shift || return;
866            return unless ($self->{url});
867            $self->shuttle_url( $self->{url} . '/out_doc',
868                    'application/x-www-form-urlencoded',
869                    "uri=$uri",
870                    undef
871            ) == 200;
872    }
873    
874    
875    =head2 edit_doc
876    
877    Edit attributes of a document
878    
879      $node->edit_doc( $document_draft ) or die "can't edit document";
880    
881    Return true on success or false on failture.
882    
883    =cut
884    
885    sub edit_doc {
886            my $self = shift;
887            my $doc = shift || return;
888            return unless ($self->{url});
889            $self->shuttle_url( $self->{url} . '/edit_doc',
890                    'text/x-estraier-draft',
891                    $doc->dump_draft,
892                    undef
893            ) == 200;
894    }
895    
896    
897    =head2 get_doc
898    
899    Retreive document
900    
901      my $doc = $node->get_doc( document_id ) or die "can't get document";
902    
903    Return true on success or false on failture.
904    
905    =cut
906    
907    sub get_doc {
908            my $self = shift;
909            my $id = shift || return;
910            return $self->_fetch_doc( id => $id );
911    }
912    
913    
914    =head2 get_doc_by_uri
915    
916    Retreive document
917    
918      my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
919    
920    Return true on success or false on failture.
921    
922    =cut
923    
924    sub get_doc_by_uri {
925            my $self = shift;
926            my $uri = shift || return;
927            return $self->_fetch_doc( uri => $uri );
928    }
929    
930    
931    =head2 etch_doc
932    
933    Exctract document keywords
934    
935      my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
936    
937    =cut
938    
939    sub erch_doc {
940            my $self = shift;
941            my $id = shift || return;
942            return $self->_fetch_doc( id => $id, etch => 1 );
943    }
944    
945    =head2 etch_doc_by_uri
946    
947    Retreive document
948    
949      my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
950    
951    Return true on success or false on failture.
952    
953    =cut
954    
955    sub etch_doc_by_uri {
956            my $self = shift;
957            my $uri = shift || return;
958            return $self->_fetch_doc( uri => $uri, etch => 1 );
959    }
960    
961    
962    =head2 uri_to_id
963    
964    Get ID of document specified by URI
965    
966      my $id = $node->uri_to_id( 'file:///document/uri/42' );
967    
968    =cut
969    
970    sub uri_to_id {
971            my $self = shift;
972            my $uri = shift || return;
973            return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1 );
974    }
975    
976    
977    =head2 _fetch_doc
978    
979    Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
980    C<etch_doc>, C<etch_doc_by_uri>.
981    
982     # this will decode received draft into Search::Estraier::Document object
983     my $doc = $node->_fetch_doc( id => 42 );
984     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
985    
986     # to extract keywords, add etch
987     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
988     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
989    
990     # more general form which allows implementation of
991     # uri_to_id
992     my $id = $node->_fetch_doc(
993            uri => 'file:///document/uri/42',
994            path => '/uri_to_id',
995            chomp_resbody => 1
996     );
997    
998    =cut
999    
1000    sub _fetch_doc {
1001            my $self = shift;
1002            my $a = {@_};
1003            return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1004    
1005            my ($arg, $resbody);
1006    
1007            my $path = $a->{path} || '/get_doc';
1008            $path = '/etch_doc' if ($a->{etch});
1009    
1010            if ($a->{id}) {
1011                    croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1012                    $arg = 'id=' . $a->{id};
1013            } elsif ($a->{uri}) {
1014                    $arg = 'uri=' . $a->{uri};
1015            } else {
1016                    confess "unhandled argument. Need id or uri.";
1017            }
1018    
1019            my $rv = $self->shuttle_url( $self->{url} . $path,
1020                    'application/x-www-form-urlencoded',
1021                    $arg,
1022                    \$resbody,
1023            );
1024    
1025            return if ($rv != 200);
1026    
1027            if ($a->{etch}) {
1028                    $self->{kwords} = {};
1029                    return +{} unless ($resbody);
1030                    foreach my $l (split(/\n/, $resbody)) {
1031                            my ($k,$v) = split(/\t/, $l, 2);
1032                            $self->{kwords}->{$k} = $v if ($v);
1033                    }
1034                    return $self->{kwords};
1035            } elsif ($a->{chomp_resbody}) {
1036                    return unless (defined($resbody));
1037                    chomp($resbody);
1038                    return $resbody;
1039            } else {
1040                    return new Search::Estraier::Document($resbody);
1041            }
1042    }
1043    
1044    
1045    
1046    
1047  =head2 shuttle_url  =head2 shuttle_url
1048    
1049  This is method which uses C<IO::Socket::INET> to communicate with Hyper Estraier node  This is method which uses C<IO::Socket::INET> to communicate with Hyper Estraier node
# Line 782  sub shuttle_url { Line 1061  sub shuttle_url {
1061    
1062          my ($url, $content_type, $reqbody, $resbody) = @_;          my ($url, $content_type, $reqbody, $resbody) = @_;
1063    
1064          my $status = -1;          $self->{status} = -1;
1065    
1066          warn $url;          warn "## $url\n" if ($self->{debug});
1067    
1068          $url = new URI($url);          $url = new URI($url);
1069          return -1 unless ($url && $url->scheme && $url->scheme eq 'http' && $url->host && $url->port > 1);          if (
1070                            !$url || !$url->scheme || !$url->scheme eq 'http' ||
1071                            !$url->host || !$url->port || $url->port < 1
1072                    ) {
1073                    carp "can't parse $url\n";
1074                    return -1;
1075            }
1076    
1077          my ($host,$port,$query) = ($url->host, $url->port, $url->path);          my ($host,$port,$query) = ($url->host, $url->port, $url->path);
1078    
# Line 796  sub shuttle_url { Line 1081  sub shuttle_url {
1081                  $query = "http://$host:$port/$query";                  $query = "http://$host:$port/$query";
1082          }          }
1083    
1084          $query .= '?' + $url->query if ($url->query && ! $reqbody);          $query .= '?' . $url->query if ($url->query && ! $reqbody);
1085    
1086          my $sock = IO::Socket::INET->new(          my $headers;
                 PeerAddr        => $host,  
                 PeerPort        => $port,  
                 Proto           => 'tcp',  
                 Timeout         => $self->{timeout} || 90,  
         ) || return -1;  
1087    
1088          if ($reqbody) {          if ($reqbody) {
1089                  print $sock "POST $query HTTP/1.0\r\n";                  $headers .= "POST $query HTTP/1.0\r\n";
1090          } else {          } else {
1091                  print $sock "GET $query HTTP/1.0\r\n";                  $headers .= "GET $query HTTP/1.0\r\n";
1092          }          }
1093    
1094          print $sock "Host: $url->host:$url->port\r\n";          $headers .= "Host: " . $url->host . ":" . $url->port . "\r\n";
1095          print $sock "Connection: close\r\n";          $headers .= "Connection: close\r\n";
1096          print $sock "User-Agent: Search-Estraier/$Search::Estraier::VERSION\r\n";          $headers .= "User-Agent: Search-Estraier/$Search::Estraier::VERSION\r\n";
1097          print $sock "Content-Type $content_type\r\n";          $headers .= "Content-Type: $content_type\r\n";
1098          print $sock "Authorization: Basic $self->{auth}\r\n";          $headers .= "Authorization: Basic $self->{auth}\r\n";
1099            my $len = 0;
1100          {          {
1101                  use bytes;                  use bytes;
1102                  print $sock "Content-Length: ", length($reqbody), "\r\n";                  $len = length($reqbody) if ($reqbody);
1103          }          }
1104          print $sock "\r\n";          $headers .= "Content-Length: $len\r\n";
1105            $headers .= "\r\n";
1106    
1107          print $sock $$reqbody if ($reqbody);          my $sock = IO::Socket::INET->new(
1108                    PeerAddr        => $host,
1109                    PeerPort        => $port,
1110                    Proto           => 'tcp',
1111                    Timeout         => $self->{timeout} || 90,
1112            );
1113    
1114            if (! $sock) {
1115                    carp "can't open socket to $host:$port";
1116                    return -1;
1117            }
1118    
1119            warn $headers if ($self->{debug});
1120    
1121            print $sock $headers or
1122                    carp "can't send headers to network:\n$headers\n" and return -1;
1123    
1124            if ($reqbody) {
1125                    warn "$reqbody\n" if ($self->{debug});
1126                    print $sock $reqbody or
1127                            carp "can't send request body to network:\n$$reqbody\n" and return -1;
1128            }
1129    
1130          my $line = <$sock>;          my $line = <$sock>;
1131          chomp($line);          chomp($line);
# Line 830  sub shuttle_url { Line 1133  sub shuttle_url {
1133          return if ($schema !~ /^HTTP/ || ! $res_status);          return if ($schema !~ /^HTTP/ || ! $res_status);
1134    
1135          $self->{status} = $res_status;          $self->{status} = $res_status;
1136            warn "## response status: $res_status\n" if ($self->{debug});
1137    
1138          # skip rest of headers          # skip rest of headers
1139          do {          $line = <$sock>;
1140            while ($line) {
1141                  $line = <$sock>;                  $line = <$sock>;
1142                  chomp($line);                  $line =~ s/[\r\n]+$//;
1143          } until ($line eq '');                  warn "## ", $line || 'NULL', " ##\n" if ($self->{debug});
1144            };
1145    
1146          # read body          # read body
1147          my $len = 0;          $len = 0;
1148          do {          do {
1149                  $len = read($sock, my $buf, 8192);                  $len = read($sock, my $buf, 8192);
1150                  $$resbody .= $buf if ($resbody);                  $$resbody .= $buf if ($resbody);
1151          } while ($len);          } while ($len);
1152    
1153          return $status;          warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1154    
1155            return $self->{status};
1156  }  }
1157    
1158  ###  ###

Legend:
Removed from v.36  
changed lines
  Added in v.45

  ViewVC Help
Powered by ViewVC 1.1.26