--- trunk/Estraier.pm 2006/01/06 14:10:29 52 +++ trunk/Estraier.pm 2006/01/07 02:40:57 62 @@ -4,7 +4,7 @@ use strict; use warnings; -our $VERSION = '0.00'; +our $VERSION = '0.01'; =head1 NAME @@ -205,7 +205,8 @@ sub attr_names { my $self = shift; - croak "attr_names return array, not scalar" if (! wantarray); + return unless ($self->{attrs}); + #croak "attr_names return array, not scalar" if (! wantarray); return sort keys %{ $self->{attrs} }; } @@ -221,8 +222,8 @@ sub attr { my $self = shift; my $name = shift; - - return $self->{'attrs'}->{ $name }; + return unless (defined($name) && $self->{attrs}); + return $self->{attrs}->{ $name }; } @@ -236,8 +237,8 @@ sub texts { my $self = shift; - confess "texts return array, not scalar" if (! wantarray); - return @{ $self->{dtexts} }; + #confess "texts return array, not scalar" if (! wantarray); + return @{ $self->{dtexts} } if ($self->{dtexts}); } @@ -251,7 +252,7 @@ sub cat_texts { my $self = shift; - return join(' ',@{ $self->{dtexts} }); + return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts}); } @@ -460,7 +461,7 @@ sub attrs { my $self = shift; #croak "attrs return array, not scalar" if (! wantarray); - return @{ $self->{attrs} }; + return @{ $self->{attrs} } if ($self->{attrs}); } @@ -524,9 +525,7 @@ my $self = {@_}; bless($self, $class); - foreach my $f (qw/uri attrs snippet keywords/) { - croak "missing $f for ResultDocument" unless defined($self->{$f}); - } + croak "missing uri for ResultDocument" unless defined($self->{uri}); $self ? return $self : return undef; } @@ -645,7 +644,7 @@ sub doc_num { my $self = shift; - return $#{$self->{docs}}; + return $#{$self->{docs}} + 1; } @@ -717,10 +716,10 @@ }; bless($self, $class); - if (@_) { - $self->{debug} = shift; - warn "## Node debug on\n"; - } + my $args = {@_}; + + $self->{debug} = $args->{debug}; + warn "## Node debug on\n" if ($self->{debug}); $self ? return $self : return undef; } @@ -1095,7 +1094,7 @@ sub name { my $self = shift; - $self->set_info unless ($self->{name}); + $self->_set_info unless ($self->{name}); return $self->{name}; } @@ -1108,7 +1107,7 @@ sub label { my $self = shift; - $self->set_info unless ($self->{label}); + $self->_set_info unless ($self->{label}); return $self->{label}; } @@ -1121,7 +1120,7 @@ sub doc_num { my $self = shift; - $self->set_info if ($self->{dnum} < 0); + $self->_set_info if ($self->{dnum} < 0); return $self->{dnum}; } @@ -1134,7 +1133,7 @@ sub word_num { my $self = shift; - $self->set_info if ($self->{wnum} < 0); + $self->_set_info if ($self->{wnum} < 0); return $self->{wnum}; } @@ -1147,7 +1146,7 @@ sub size { my $self = shift; - $self->set_info if ($self->{size} < 0); + $self->_set_info if ($self->{size} < 0); return $self->{size}; } @@ -1175,8 +1174,8 @@ my $resbody; my $rv = $self->shuttle_url( $self->{url} . '/search', - 'text/x-estraier-draft', - $self->cond_to_query( $cond ), + 'application/x-www-form-urlencoded', + $self->cond_to_query( $cond, $depth ), \$resbody, ); return if ($rv != 200); @@ -1210,6 +1209,7 @@ while( ! $isend && $lnum <= $#lines ) { my $line = $lines[$lnum]; + #warn "# $lnum: $line\n"; $lnum++; if ($line && $line =~ m/^\Q$border\E/) { @@ -1226,8 +1226,10 @@ last unless ($rdline); if ($rdline =~ /^%/) { $rdvector = $1 if ($rdline =~ /^%VECTOR\t(.+)$/); + } elsif($rdline =~ /=/) { + $rdattrs->{$1} = $2 if ($rdline =~ /^(.+)=(.+)$/); } else { - $rdattrs->{$1} = {$2} if ($line =~ /^(.+)=(.+)$/); + confess "invalid format of response"; } } while($rlnum < $lnum - 1) { @@ -1235,6 +1237,7 @@ $rlnum++; $rdsnippet .= "$rdline\n"; } + #warn Dumper($rdvector, $rdattrs, $rdsnippet); if (my $rduri = $rdattrs->{'@uri'}) { push @docs, new Search::Estraier::ResultDocument( uri => $rduri, @@ -1249,17 +1252,14 @@ $isend = 1 if ($line =~ /:END$/); } - if (! $isend) { - warn "received result doesn't have :END\n$resbody"; - return; - } } if (! $isend) { warn "received result doesn't have :END\n$resbody"; return; } - + + #warn Dumper(\@docs, $hints); return new Search::Estraier::NodeResult( docs => \@docs, hints => $hints ); } @@ -1267,7 +1267,9 @@ =head2 cond_to_query - my $args = $node->cond_to_query( $cond ); +Return URI encoded string generated from Search::Estraier::Condition + + my $args = $node->cond_to_query( $cond, $depth ); =cut @@ -1276,6 +1278,7 @@ my $cond = shift || return; croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition')); + my $depth = shift; my @args; @@ -1303,7 +1306,7 @@ push @args, 'options=' . $options; } - push @args, 'depth=' . $self->{depth} if ($self->{depth}); + push @args, 'depth=' . $depth if ($depth); push @args, 'wwidth=' . $self->{wwidth}; push @args, 'hwidth=' . $self->{hwidth}; push @args, 'awidth=' . $self->{awidth}; @@ -1324,6 +1327,8 @@ =cut +use LWP::UserAgent; + sub shuttle_url { my $self = shift; @@ -1342,81 +1347,37 @@ return -1; } - my ($host,$port,$query) = ($url->host, $url->port, $url->path); - - if ($self->{pxhost}) { - ($host,$port) = ($self->{pxhost}, $self->{pxport}); - $query = "http://$host:$port/$query"; - } - - $query .= '?' . $url->query if ($url->query && ! $reqbody); - - my $headers; + my $ua = LWP::UserAgent->new; + $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" ); + my $req; if ($reqbody) { - $headers .= "POST $query HTTP/1.0\r\n"; + $req = HTTP::Request->new(POST => $url); } else { - $headers .= "GET $query HTTP/1.0\r\n"; + $req = HTTP::Request->new(GET => $url); } - $headers .= "Host: " . $url->host . ":" . $url->port . "\r\n"; - $headers .= "Connection: close\r\n"; - $headers .= "User-Agent: Search-Estraier/$Search::Estraier::VERSION\r\n"; - $headers .= "Content-Type: $content_type\r\n"; - $headers .= "Authorization: Basic $self->{auth}\r\n"; - my $len = 0; - { - use bytes; - $len = length($reqbody) if ($reqbody); - } - $headers .= "Content-Length: $len\r\n"; - $headers .= "\r\n"; + $req->headers->header( 'Host' => $url->host . ":" . $url->port ); + $req->headers->header( 'Connection', 'close' ); + $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ); + $req->content_type( $content_type ); - my $sock = IO::Socket::INET->new( - PeerAddr => $host, - PeerPort => $port, - Proto => 'tcp', - Timeout => $self->{timeout} || 90, - ); + warn $req->headers->as_string,"\n" if ($self->{debug}); - if (! $sock) { - carp "can't open socket to $host:$port"; - return -1; + if ($reqbody) { + warn "$reqbody\n" if ($self->{debug}); + $req->content( $reqbody ); } - warn $headers if ($self->{debug}); + my $res = $ua->request($req) || croak "can't make request to $url: $!"; - print $sock $headers or - carp "can't send headers to network:\n$headers\n" and return -1; + warn "## response status: ",$res->status_line,"\n" if ($self->{debug}); - if ($reqbody) { - warn "$reqbody\n" if ($self->{debug}); - print $sock $reqbody or - carp "can't send request body to network:\n$$reqbody\n" and return -1; - } + return -1 if (! $res->is_success); - my $line = <$sock>; - chomp($line); - my ($schema, $res_status, undef) = split(/ */, $line, 3); - return if ($schema !~ /^HTTP/ || ! $res_status); - - $self->{status} = $res_status; - warn "## response status: $res_status\n" if ($self->{debug}); - - # skip rest of headers - $line = <$sock>; - while ($line) { - $line = <$sock>; - $line =~ s/[\r\n]+$//; - warn "## ", $line || 'NULL', " ##\n" if ($self->{debug}); - }; + ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2); - # read body - $len = 0; - do { - $len = read($sock, my $buf, 8192); - $$resbody .= $buf if ($resbody); - } while ($len); + $$resbody .= $res->content; warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug}); @@ -1424,15 +1385,117 @@ } -=head2 set_info +=head2 set_snippet_width + +Set width of snippets in results + + $node->set_snippet_width( $wwidth, $hwidth, $awidth ); + +C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet +is not sent with results. If it is negative, whole document text is sent instead of snippet. + +C<$hwidth> specified width of strings from beginning of string. Default +value is C<96>. Negative or zero value keep previous value. + +C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default. +If negative of zero value is provided previous value is kept unchanged. + +=cut + +sub set_snippet_width { + my $self = shift; + + my ($wwidth, $hwidth, $awidth) = @_; + $self->{wwidth} = $wwidth; + $self->{hwidth} = $hwidth if ($hwidth >= 0); + $self->{awidth} = $awidth if ($awidth >= 0); +} + + +=head2 set_user + +Manage users of node + + $node->set_user( 'name', $mode ); + +C<$mode> can be one of: + +=over 4 + +=item 0 + +delete account + +=item 1 + +set administrative right for user + +=item 2 + +set user account as guest + +=back + +Return true on success, otherwise false. + +=cut + +sub set_user { + my $self = shift; + my ($name, $mode) = @_; + + return unless ($self->{url}); + croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/); + + $self->shuttle_url( $self->{url} . '/_set_user', + 'text/plain', + 'name=' . uri_escape($name) . '&mode=' . $mode, + undef + ) == 200; +} + + +=head2 set_link + +Manage node links + + $node->set_link('http://localhost:1978/node/another', 'another node label', $credit); + +If C<$credit> is negative, link is removed. + +=cut + +sub set_link { + my $self = shift; + my ($url, $label, $credit) = @_; + + return unless ($self->{url}); + croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/); + + my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label); + $reqbody .= '&credit=' . $credit if ($credit > 0); + + $self->shuttle_url( $self->{url} . '/_set_link', + 'text/plain', + $reqbody, + undef + ) == 200; +} + + +=head1 PRIVATE METHODS + +You could call those directly, but you don't have to. I hope. + +=head2 _set_info Set information for node - $node->set_info; + $node->_set_info; =cut -sub set_info { +sub _set_info { my $self = shift; $self->{status} = -1; @@ -1447,7 +1510,8 @@ return if ($rv != 200 || !$resbody); - chomp($resbody); + # it seems that response can have multiple line endings + $resbody =~ s/[\r\n]+$//; ( $self->{name}, $self->{label}, $self->{dnum}, $self->{wnum}, $self->{size} ) = split(/\t/, $resbody, 5);