/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 42 by dpavlin, Thu Jan 5 23:38:32 2006 UTC revision 48 by dpavlin, Fri Jan 6 02:07:10 2006 UTC
# Line 387  sub set_order { Line 387  sub set_order {
387  sub set_max {  sub set_max {
388          my $self = shift;          my $self = shift;
389          my $max = shift;          my $max = shift;
390          croak "set_max needs number" unless ($max =~ m/^\d+$/);          croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/);
391          $self->{max} = $max;          $self->{max} = $max;
392  }  }
393    
# Line 662  Returns undef if document doesn't exist. Line 662  Returns undef if document doesn't exist.
662  sub get_doc {  sub get_doc {
663          my $self = shift;          my $self = shift;
664          my $num = shift;          my $num = shift;
665          croak "expect number as argument" unless ($num =~ m/^\d+$/);          croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/);
666          return undef if ($num < 0 || $num > $self->{docs});          return undef if ($num < 0 || $num > $self->{docs});
667          return $self->{docs}->[$num];          return $self->{docs}->[$num];
668  }  }
# Line 688  sub hint { Line 688  sub hint {
688    
689  package Search::Estraier::Node;  package Search::Estraier::Node;
690    
691  use Carp qw/carp croak/;  use Carp qw/carp croak confess/;
692  use URI;  use URI;
693  use MIME::Base64;  use MIME::Base64;
694  use IO::Socket::INET;  use IO::Socket::INET;
# Line 750  Specify proxy server to connect to node Line 750  Specify proxy server to connect to node
750  sub set_proxy {  sub set_proxy {
751          my $self = shift;          my $self = shift;
752          my ($host,$port) = @_;          my ($host,$port) = @_;
753          croak "proxy port must be number" unless ($port =~ m/^\d+$/);          croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/);
754          $self->{pxhost} = $host;          $self->{pxhost} = $host;
755          $self->{pxport} = $port;          $self->{pxport} = $port;
756  }  }
# Line 767  Specify timeout of connection in seconds Line 767  Specify timeout of connection in seconds
767  sub set_timeout {  sub set_timeout {
768          my $self = shift;          my $self = shift;
769          my $sec = shift;          my $sec = shift;
770          croak "timeout must be number" unless ($sec =~ m/^\d+$/);          croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/);
771          $self->{timeout} = $sec;          $self->{timeout} = $sec;
772  }  }
773    
# Line 818  Return true on success or false on failt Line 818  Return true on success or false on failt
818  sub put_doc {  sub put_doc {
819          my $self = shift;          my $self = shift;
820          my $doc = shift || return;          my $doc = shift || return;
821          return unless ($self->{url});          return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
822          $self->shuttle_url( $self->{url} . '/put_doc',          $self->shuttle_url( $self->{url} . '/put_doc',
823                  'text/x-estraier-draft',                  'text/x-estraier-draft',
824                  $doc->dump_draft,                  $doc->dump_draft,
# Line 841  sub out_doc { Line 841  sub out_doc {
841          my $self = shift;          my $self = shift;
842          my $id = shift || return;          my $id = shift || return;
843          return unless ($self->{url});          return unless ($self->{url});
844          croak "id must be number" unless ($id =~ m/^\d+$/);          croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/);
845          $self->shuttle_url( $self->{url} . '/out_doc',          $self->shuttle_url( $self->{url} . '/out_doc',
846                  'application/x-www-form-urlencoded',                  'application/x-www-form-urlencoded',
847                  "id=$id",                  "id=$id",
# Line 854  sub out_doc { Line 854  sub out_doc {
854    
855  Remove a registrated document using it's uri  Remove a registrated document using it's uri
856    
857    $node->out_doc_by_uri( 'file:///document_url' ) or "can't remove document";    $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document";
858    
859  Return true on success or false on failture.  Return true on success or false on failture.
860    
# Line 885  Return true on success or false on failt Line 885  Return true on success or false on failt
885  sub edit_doc {  sub edit_doc {
886          my $self = shift;          my $self = shift;
887          my $doc = shift || return;          my $doc = shift || return;
888          return unless ($self->{url});          return unless ($self->{url} && $doc->isa('Search::Estraier::Document'));
889          $self->shuttle_url( $self->{url} . '/edit_doc',          $self->shuttle_url( $self->{url} . '/edit_doc',
890                  'text/x-estraier-draft',                  'text/x-estraier-draft',
891                  $doc->dump_draft,                  $doc->dump_draft,
# Line 894  sub edit_doc { Line 894  sub edit_doc {
894  }  }
895    
896    
897    =head2 get_doc
898    
899    Retreive document
900    
901      my $doc = $node->get_doc( document_id ) or die "can't get document";
902    
903    Return true on success or false on failture.
904    
905    =cut
906    
907    sub get_doc {
908            my $self = shift;
909            my $id = shift || return;
910            return $self->_fetch_doc( id => $id );
911    }
912    
913    
914    =head2 get_doc_by_uri
915    
916    Retreive document
917    
918      my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document";
919    
920    Return true on success or false on failture.
921    
922    =cut
923    
924    sub get_doc_by_uri {
925            my $self = shift;
926            my $uri = shift || return;
927            return $self->_fetch_doc( uri => $uri );
928    }
929    
930    
931    =head2 etch_doc
932    
933    Exctract document keywords
934    
935      my $keywords = $node->etch_doc( document_id ) or die "can't etch document";
936    
937    =cut
938    
939    sub erch_doc {
940            my $self = shift;
941            my $id = shift || return;
942            return $self->_fetch_doc( id => $id, etch => 1 );
943    }
944    
945    =head2 etch_doc_by_uri
946    
947    Retreive document
948    
949      my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document";
950    
951    Return true on success or false on failture.
952    
953    =cut
954    
955    sub etch_doc_by_uri {
956            my $self = shift;
957            my $uri = shift || return;
958            return $self->_fetch_doc( uri => $uri, etch => 1 );
959    }
960    
961    
962    =head2 uri_to_id
963    
964    Get ID of document specified by URI
965    
966      my $id = $node->uri_to_id( 'file:///document/uri/42' );
967    
968    =cut
969    
970    sub uri_to_id {
971            my $self = shift;
972            my $uri = shift || return;
973            return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1 );
974    }
975    
976    
977    =head2 _fetch_doc
978    
979    Private function used for implementing of C<get_doc>, C<get_doc_by_uri>,
980    C<etch_doc>, C<etch_doc_by_uri>.
981    
982     # this will decode received draft into Search::Estraier::Document object
983     my $doc = $node->_fetch_doc( id => 42 );
984     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' );
985    
986     # to extract keywords, add etch
987     my $doc = $node->_fetch_doc( id => 42, etch => 1 );
988     my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 );
989    
990     # more general form which allows implementation of
991     # uri_to_id
992     my $id = $node->_fetch_doc(
993            uri => 'file:///document/uri/42',
994            path => '/uri_to_id',
995            chomp_resbody => 1
996     );
997    
998    =cut
999    
1000    sub _fetch_doc {
1001            my $self = shift;
1002            my $a = {@_};
1003            return unless ( ($a->{id} || $a->{uri}) && $self->{url} );
1004    
1005            my ($arg, $resbody);
1006    
1007            my $path = $a->{path} || '/get_doc';
1008            $path = '/etch_doc' if ($a->{etch});
1009    
1010            if ($a->{id}) {
1011                    croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/);
1012                    $arg = 'id=' . $a->{id};
1013            } elsif ($a->{uri}) {
1014                    $arg = 'uri=' . $a->{uri};
1015            } else {
1016                    confess "unhandled argument. Need id or uri.";
1017            }
1018    
1019            my $rv = $self->shuttle_url( $self->{url} . $path,
1020                    'application/x-www-form-urlencoded',
1021                    $arg,
1022                    \$resbody,
1023            );
1024    
1025            return if ($rv != 200);
1026    
1027            if ($a->{etch}) {
1028                    $self->{kwords} = {};
1029                    return +{} unless ($resbody);
1030                    foreach my $l (split(/\n/, $resbody)) {
1031                            my ($k,$v) = split(/\t/, $l, 2);
1032                            $self->{kwords}->{$k} = $v if ($v);
1033                    }
1034                    return $self->{kwords};
1035            } elsif ($a->{chomp_resbody}) {
1036                    return unless (defined($resbody));
1037                    chomp($resbody);
1038                    return $resbody;
1039            } else {
1040                    return new Search::Estraier::Document($resbody);
1041            }
1042    }
1043    
1044    
1045    =head2 name
1046    
1047      my $node_name = $node->name;
1048    
1049    =cut
1050    
1051    sub name {
1052            my $self = shift;
1053            $self->set_info unless ($self->{name});
1054            return $self->{name};
1055    }
1056    
1057    
1058    =head2 label
1059    
1060      my $node_label = $node->label;
1061    
1062    =cut
1063    
1064    sub label {
1065            my $self = shift;
1066            $self->set_info unless ($self->{label});
1067            return $self->{label};
1068    }
1069    
1070    
1071    =head2 doc_num
1072    
1073      my $documents_in_node = $node->doc_num;
1074    
1075    =cut
1076    
1077    sub doc_num {
1078            my $self = shift;
1079            $self->set_info if ($self->{dnum} < 0);
1080            return $self->{dnum};
1081    }
1082    
1083    
1084    =head2 word_num
1085    
1086      my $words_in_node = $node->word_num;
1087    
1088    =cut
1089    
1090    sub word_num {
1091            my $self = shift;
1092            $self->set_info if ($self->{wnum} < 0);
1093            return $self->{wnum};
1094    }
1095    
1096    
1097    =head2 size
1098    
1099      my $node_size = $node->size;
1100    
1101    =cut
1102    
1103    sub size {
1104            my $self = shift;
1105            $self->set_info if ($self->{size} < 0);
1106            return $self->{size};
1107    }
1108    
1109    
1110    
1111  =head2 shuttle_url  =head2 shuttle_url
1112    
1113  This is method which uses C<IO::Socket::INET> to communicate with Hyper Estraier node  This is method which uses C<IO::Socket::INET> to communicate with Hyper Estraier node
# Line 1005  sub shuttle_url { Line 1219  sub shuttle_url {
1219          return $self->{status};          return $self->{status};
1220  }  }
1221    
1222    
1223    =head2 set_info
1224    
1225    Set information for node
1226    
1227      $node->set_info;
1228    
1229    =cut
1230    
1231    sub set_info {
1232            my $self = shift;
1233    
1234            $self->{status} = -1;
1235            return unless ($self->{url});
1236    
1237            my $resbody;
1238            my $rv = $self->shuttle_url( $self->{url} . '/inform',
1239                    'text/plain',
1240                    undef,
1241                    \$resbody,
1242            );
1243    
1244            return if ($rv != 200 || !$resbody);
1245    
1246            chomp($resbody);
1247    
1248            ( $self->{name}, $self->{label}, $self->{dnum}, $self->{wnum}, $self->{size} ) =
1249                    split(/\t/, $resbody, 5);
1250    
1251    }
1252    
1253  ###  ###
1254    
1255  =head1 EXPORT  =head1 EXPORT

Legend:
Removed from v.42  
changed lines
  Added in v.48

  ViewVC Help
Powered by ViewVC 1.1.26