--- trunk/Estraier.pm 2006/01/05 23:38:32 42 +++ trunk/Estraier.pm 2006/01/06 01:36:09 45 @@ -387,7 +387,7 @@ sub set_max { my $self = shift; my $max = shift; - croak "set_max needs number" unless ($max =~ m/^\d+$/); + croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/); $self->{max} = $max; } @@ -662,7 +662,7 @@ sub get_doc { my $self = shift; my $num = shift; - croak "expect number as argument" unless ($num =~ m/^\d+$/); + croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/); return undef if ($num < 0 || $num > $self->{docs}); return $self->{docs}->[$num]; } @@ -688,7 +688,7 @@ package Search::Estraier::Node; -use Carp qw/carp croak/; +use Carp qw/carp croak confess/; use URI; use MIME::Base64; use IO::Socket::INET; @@ -750,7 +750,7 @@ sub set_proxy { my $self = shift; my ($host,$port) = @_; - croak "proxy port must be number" unless ($port =~ m/^\d+$/); + croak "proxy port must be number, not '$port'" unless ($port =~ m/^\d+$/); $self->{pxhost} = $host; $self->{pxport} = $port; } @@ -767,7 +767,7 @@ sub set_timeout { my $self = shift; my $sec = shift; - croak "timeout must be number" unless ($sec =~ m/^\d+$/); + croak "timeout must be number, not '$sec'" unless ($sec =~ m/^\d+$/); $self->{timeout} = $sec; } @@ -841,7 +841,7 @@ my $self = shift; my $id = shift || return; return unless ($self->{url}); - croak "id must be number" unless ($id =~ m/^\d+$/); + croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/); $self->shuttle_url( $self->{url} . '/out_doc', 'application/x-www-form-urlencoded', "id=$id", @@ -854,7 +854,7 @@ Remove a registrated document using it's uri - $node->out_doc_by_uri( 'file:///document_url' ) or "can't remove document"; + $node->out_doc_by_uri( 'file:///document/uri/42' ) or "can't remove document"; Return true on success or false on failture. @@ -894,6 +894,156 @@ } +=head2 get_doc + +Retreive document + + my $doc = $node->get_doc( document_id ) or die "can't get document"; + +Return true on success or false on failture. + +=cut + +sub get_doc { + my $self = shift; + my $id = shift || return; + return $self->_fetch_doc( id => $id ); +} + + +=head2 get_doc_by_uri + +Retreive document + + my $doc = $node->get_doc_by_uri( 'file:///document/uri/42' ) or die "can't get document"; + +Return true on success or false on failture. + +=cut + +sub get_doc_by_uri { + my $self = shift; + my $uri = shift || return; + return $self->_fetch_doc( uri => $uri ); +} + + +=head2 etch_doc + +Exctract document keywords + + my $keywords = $node->etch_doc( document_id ) or die "can't etch document"; + +=cut + +sub erch_doc { + my $self = shift; + my $id = shift || return; + return $self->_fetch_doc( id => $id, etch => 1 ); +} + +=head2 etch_doc_by_uri + +Retreive document + + my $keywords = $node->etch_doc_by_uri( 'file:///document/uri/42' ) or die "can't etch document"; + +Return true on success or false on failture. + +=cut + +sub etch_doc_by_uri { + my $self = shift; + my $uri = shift || return; + return $self->_fetch_doc( uri => $uri, etch => 1 ); +} + + +=head2 uri_to_id + +Get ID of document specified by URI + + my $id = $node->uri_to_id( 'file:///document/uri/42' ); + +=cut + +sub uri_to_id { + my $self = shift; + my $uri = shift || return; + return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1 ); +} + + +=head2 _fetch_doc + +Private function used for implementing of C, C, +C, C. + + # this will decode received draft into Search::Estraier::Document object + my $doc = $node->_fetch_doc( id => 42 ); + my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42' ); + + # to extract keywords, add etch + my $doc = $node->_fetch_doc( id => 42, etch => 1 ); + my $doc = $node->_fetch_doc( uri => 'file:///document/uri/42', etch => 1 ); + + # more general form which allows implementation of + # uri_to_id + my $id = $node->_fetch_doc( + uri => 'file:///document/uri/42', + path => '/uri_to_id', + chomp_resbody => 1 + ); + +=cut + +sub _fetch_doc { + my $self = shift; + my $a = {@_}; + return unless ( ($a->{id} || $a->{uri}) && $self->{url} ); + + my ($arg, $resbody); + + my $path = $a->{path} || '/get_doc'; + $path = '/etch_doc' if ($a->{etch}); + + if ($a->{id}) { + croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/); + $arg = 'id=' . $a->{id}; + } elsif ($a->{uri}) { + $arg = 'uri=' . $a->{uri}; + } else { + confess "unhandled argument. Need id or uri."; + } + + my $rv = $self->shuttle_url( $self->{url} . $path, + 'application/x-www-form-urlencoded', + $arg, + \$resbody, + ); + + return if ($rv != 200); + + if ($a->{etch}) { + $self->{kwords} = {}; + return +{} unless ($resbody); + foreach my $l (split(/\n/, $resbody)) { + my ($k,$v) = split(/\t/, $l, 2); + $self->{kwords}->{$k} = $v if ($v); + } + return $self->{kwords}; + } elsif ($a->{chomp_resbody}) { + return unless (defined($resbody)); + chomp($resbody); + return $resbody; + } else { + return new Search::Estraier::Document($resbody); + } +} + + + + =head2 shuttle_url This is method which uses C to communicate with Hyper Estraier node