--- trunk/Estraier.pm 2006/01/28 19:41:59 100 +++ trunk/Estraier.pm 2006/05/08 12:00:43 128 @@ -4,7 +4,7 @@ use strict; use warnings; -our $VERSION = '0.04_1'; +our $VERSION = '0.06_1'; =head1 NAME @@ -17,9 +17,11 @@ use Search::Estraier; # create and configure node - my $node = new Search::Estraier::Node; - $node->set_url("http://localhost:1978/node/test"); - $node->set_auth("admin","admin"); + my $node = new Search::Estraier::Node( + url => 'http://localhost:1978/node/test', + user => 'admin', + passwd => 'admin' + ); # create document my $doc = new Search::Estraier::Document; @@ -32,16 +34,19 @@ $doc->add_text("Somewhere over the rainbow. Way up high."); $doc->add_text("There's a land that I heard of once in a lullaby."); - die "error: ", $node->status,"\n" unless ($node->put_doc($doc)); + die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) }); =head2 Simple searcher use Search::Estraier; # create and configure node - my $node = new Search::Estraier::Node; - $node->set_url("http://localhost:1978/node/test"); - $node->set_auth("admin","admin"); + my $node = new Search::Estraier::Node( + url => 'http://localhost:1978/node/test', + user => 'admin', + passwd => 'admin', + croak_on_error => 1, + ); # create condition my $cond = new Search::Estraier::Condition; @@ -50,9 +55,10 @@ $cond->set_phrase("rainbow AND lullaby"); my $nres = $node->search($cond, 0); - print "Got ", $nres->hits, " results\n"; if (defined($nres)) { + print "Got ", $nres->hits, " results\n"; + # for each document in results for my $i ( 0 ... $nres->doc_num - 1 ) { # get result document @@ -593,6 +599,35 @@ } +=head2 set_skip + +Set number of skipped documents from beginning of results + + $cond->set_skip(42); + +Similar to C in RDBMS. + +=cut + +sub set_skip { + my $self = shift; + $self->{skip} = shift; +} + +=head2 skip + +Return skip for this condition. + + print $cond->skip; + +=cut + +sub skip { + my $self = shift; + return $self->{skip}; +} + + package Search::Estraier::ResultDocument; use Carp qw/croak/; @@ -783,7 +818,7 @@ return $self->{hints}->{$key}; } -=head2 hits +=head2 hints More perlish version of C. This one returns hash. @@ -868,14 +903,12 @@ my $self = { pxport => -1, timeout => 0, # this used to be -1 - dnum => -1, - wnum => -1, - size => -1.0, wwidth => 480, hwidth => 96, awidth => 96, status => -1, }; + bless($self, $class); if ($#_ == 0) { @@ -888,6 +921,12 @@ warn "## Node debug on\n" if ($self->{debug}); } + $self->{inform} = { + dnum => -1, + wnum => -1, + size => -1.0, + }; + $self ? return $self : return undef; } @@ -1166,12 +1205,14 @@ my $id = $node->uri_to_id( 'file:///document/uri/42' ); +This method won't croak, even if using C. + =cut sub uri_to_id { my $self = shift; my $uri = shift || return; - return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1 ); + return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 ); } @@ -1231,6 +1272,7 @@ 'application/x-www-form-urlencoded', $arg, \$resbody, + $a->{croak_on_error}, ); return if ($rv != 200); @@ -1261,8 +1303,8 @@ sub name { my $self = shift; - $self->_set_info unless ($self->{name}); - return $self->{name}; + $self->_set_info unless ($self->{inform}->{name}); + return $self->{inform}->{name}; } @@ -1274,8 +1316,8 @@ sub label { my $self = shift; - $self->_set_info unless ($self->{label}); - return $self->{label}; + $self->_set_info unless ($self->{inform}->{label}); + return $self->{inform}->{label}; } @@ -1287,8 +1329,8 @@ sub doc_num { my $self = shift; - $self->_set_info if ($self->{dnum} < 0); - return $self->{dnum}; + $self->_set_info if ($self->{inform}->{dnum} < 0); + return $self->{inform}->{dnum}; } @@ -1300,8 +1342,8 @@ sub word_num { my $self = shift; - $self->_set_info if ($self->{wnum} < 0); - return $self->{wnum}; + $self->_set_info if ($self->{inform}->{wnum} < 0); + return $self->{inform}->{wnum}; } @@ -1313,8 +1355,8 @@ sub size { my $self = shift; - $self->_set_info if ($self->{size} < 0); - return $self->{size}; + $self->_set_info if ($self->{inform}->{size} < 0); + return $self->{inform}->{size}; } @@ -1347,88 +1389,32 @@ ); return if ($rv != 200); - my (@docs, $hints); - - my @lines = split(/\n/, $resbody); - return unless (@lines); - - my $border = $lines[0]; - my $isend = 0; - my $lnum = 1; - - while ( $lnum <= $#lines ) { - my $line = $lines[$lnum]; - $lnum++; - - #warn "## $line\n"; - if ($line && $line =~ m/^\Q$border\E(:END)*$/) { - $isend = $1; - last; - } - - if ($line =~ /\t/) { - my ($k,$v) = split(/\t/, $line, 2); - $hints->{$k} = $v; - } - } - - my $snum = $lnum; - - while( ! $isend && $lnum <= $#lines ) { - my $line = $lines[$lnum]; - #warn "# $lnum: $line\n"; - $lnum++; - - if ($line && $line =~ m/^\Q$border\E/) { - if ($lnum > $snum) { - my $rdattrs; - my $rdvector; - my $rdsnippet; - - my $rlnum = $snum; - while ($rlnum < $lnum - 1 ) { - #my $rdline = $self->_s($lines[$rlnum]); - my $rdline = $lines[$rlnum]; - $rlnum++; - last unless ($rdline); - if ($rdline =~ /^%/) { - $rdvector = $1 if ($rdline =~ /^%VECTOR\t(.+)$/); - } elsif($rdline =~ /=/) { - $rdattrs->{$1} = $2 if ($rdline =~ /^(.+)=(.+)$/); - } else { - confess "invalid format of response"; - } - } - while($rlnum < $lnum - 1) { - my $rdline = $lines[$rlnum]; - $rlnum++; - $rdsnippet .= "$rdline\n"; - } - #warn Dumper($rdvector, $rdattrs, $rdsnippet); - if (my $rduri = $rdattrs->{'@uri'}) { - push @docs, new Search::Estraier::ResultDocument( - uri => $rduri, - attrs => $rdattrs, - snippet => $rdsnippet, - keywords => $rdvector, - ); - } - } - $snum = $lnum; - #warn "### $line\n"; - $isend = 1 if ($line =~ /:END$/); - } - + my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody; + my $hintsText = splice @records, 0, 2; # starts with empty record + my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm }; + + # process records + my $docs = []; + foreach my $record (@records) + { + # split into keys and snippets + my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s; + + # create document hash + my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm }; + $doc->{'@keywords'} = $doc->{keywords}; + ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm; + $doc->{snippet} = $snippet; + + push @$docs, new Search::Estraier::ResultDocument( + attrs => $doc, + uri => $doc->{'@uri'}, + snippet => $snippet, + keywords => $doc->{'keywords'}, + ); } - if (! $isend) { - warn "received result doesn't have :END\n$resbody"; - return; - } - - #warn Dumper(\@docs, $hints); - - return new Search::Estraier::NodeResult( docs => \@docs, hints => $hints ); + return new Search::Estraier::NodeResult( docs => $docs, hints => $hints ); } @@ -1477,6 +1463,7 @@ push @args, 'wwidth=' . $self->{wwidth}; push @args, 'hwidth=' . $self->{hwidth}; push @args, 'awidth=' . $self->{awidth}; + push @args, 'skip=' . $self->{skip} if ($self->{skip}); return join('&', @args); } @@ -1499,7 +1486,9 @@ sub shuttle_url { my $self = shift; - my ($url, $content_type, $reqbody, $resbody) = @_; + my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_; + + $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error); $self->{status} = -1; @@ -1543,7 +1532,7 @@ ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2); if (! $res->is_success) { - if ($self->{croak_on_error}) { + if ($croak_on_error) { croak("can't get $url: ",$res->status_line); } else { return -1; @@ -1648,11 +1637,57 @@ my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label); $reqbody .= '&credit=' . $credit if ($credit > 0); - $self->shuttle_url( $self->{url} . '/_set_link', + if ($self->shuttle_url( $self->{url} . '/_set_link', 'application/x-www-form-urlencoded', $reqbody, undef - ) == 200; + ) == 200) { + # refresh node info after adding link + $self->_set_info; + return 1; + } +} + +=head2 admins + + my @admins = @{ $node->admins }; + +Return array of users with admin rights on node + +=cut + +sub admins { + my $self = shift; + $self->_set_info unless ($self->{inform}->{name}); + return $self->{inform}->{admins}; +} + +=head2 guests + + my @guests = @{ $node->guests }; + +Return array of users with guest rights on node + +=cut + +sub guests { + my $self = shift; + $self->_set_info unless ($self->{inform}->{name}); + return $self->{inform}->{guests}; +} + +=head2 links + + my $links = @{ $node->links }; + +Return array of links for this node + +=cut + +sub links { + my $self = shift; + $self->_set_info unless ($self->{inform}->{name}); + return $self->{inform}->{links}; } @@ -1683,11 +1718,30 @@ return if ($rv != 200 || !$resbody); - # it seems that response can have multiple line endings - $resbody =~ s/[\r\n]+$//; + my @lines = split(/[\r\n]/,$resbody); + + $self->{inform} = {}; + + ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum}, + $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5); + + return $resbody unless (@lines); + + shift @lines; + + while(my $admin = shift @lines) { + push @{$self->{inform}->{admins}}, $admin; + } + + while(my $guest = shift @lines) { + push @{$self->{inform}->{guests}}, $guest; + } + + while(my $link = shift @lines) { + push @{$self->{inform}->{links}}, $link; + } - ( $self->{name}, $self->{label}, $self->{dnum}, $self->{wnum}, $self->{size} ) = - split(/\t/, $resbody, 5); + return $resbody; } @@ -1707,6 +1761,7 @@ Dobrica Pavlinusic, Edpavlin@rot13.orgE +Robert Klep Erobert@klep.nameE contributed refactored search code =head1 COPYRIGHT AND LICENSE