--- trunk/Estraier.pm 2006/02/21 15:41:57 111 +++ trunk/Estraier.pm 2006/05/08 21:33:37 132 @@ -4,7 +4,7 @@ use strict; use warnings; -our $VERSION = '0.04_3'; +our $VERSION = '0.06_1'; =head1 NAME @@ -599,6 +599,35 @@ } +=head2 set_skip + +Set number of skipped documents from beginning of results + + $cond->set_skip(42); + +Similar to C in RDBMS. + +=cut + +sub set_skip { + my $self = shift; + $self->{skip} = shift; +} + +=head2 skip + +Return skip for this condition. + + print $cond->skip; + +=cut + +sub skip { + my $self = shift; + return $self->{skip}; +} + + package Search::Estraier::ResultDocument; use Carp qw/croak/; @@ -843,6 +872,8 @@ my $node = new Search::HyperEstraier::Node( url => 'http://localhost:1978/node/test', + user => 'admin', + passwd => 'admin' debug => 1, croak_on_error => 1 ); @@ -855,6 +886,14 @@ URL to node +=item user + +specify username for node server authentication + +=item passwd + +password for authentication + =item debug dumps a B of debugging output @@ -885,10 +924,10 @@ if ($#_ == 0) { $self->{url} = shift; } else { - my $args = {@_}; - %$self = ( %$self, @_ ); + $self->set_auth( $self->{user}, $self->{passwd} ) if ($self->{user}); + warn "## Node debug on\n" if ($self->{debug}); } @@ -1360,88 +1399,32 @@ ); return if ($rv != 200); - my (@docs, $hints); - - my @lines = split(/\n/, $resbody); - return unless (@lines); - - my $border = $lines[0]; - my $isend = 0; - my $lnum = 1; - - while ( $lnum <= $#lines ) { - my $line = $lines[$lnum]; - $lnum++; - - #warn "## $line\n"; - if ($line && $line =~ m/^\Q$border\E(:END)*$/) { - $isend = $1; - last; - } - - if ($line =~ /\t/) { - my ($k,$v) = split(/\t/, $line, 2); - $hints->{$k} = $v; - } - } - - my $snum = $lnum; - - while( ! $isend && $lnum <= $#lines ) { - my $line = $lines[$lnum]; - #warn "# $lnum: $line\n"; - $lnum++; - - if ($line && $line =~ m/^\Q$border\E/) { - if ($lnum > $snum) { - my $rdattrs; - my $rdvector; - my $rdsnippet; - - my $rlnum = $snum; - while ($rlnum < $lnum - 1 ) { - #my $rdline = $self->_s($lines[$rlnum]); - my $rdline = $lines[$rlnum]; - $rlnum++; - last unless ($rdline); - if ($rdline =~ /^%/) { - $rdvector = $1 if ($rdline =~ /^%VECTOR\t(.+)$/); - } elsif($rdline =~ /=/) { - $rdattrs->{$1} = $2 if ($rdline =~ /^(.+)=(.+)$/); - } else { - confess "invalid format of response"; - } - } - while($rlnum < $lnum - 1) { - my $rdline = $lines[$rlnum]; - $rlnum++; - $rdsnippet .= "$rdline\n"; - } - #warn Dumper($rdvector, $rdattrs, $rdsnippet); - if (my $rduri = $rdattrs->{'@uri'}) { - push @docs, new Search::Estraier::ResultDocument( - uri => $rduri, - attrs => $rdattrs, - snippet => $rdsnippet, - keywords => $rdvector, - ); - } - } - $snum = $lnum; - #warn "### $line\n"; - $isend = 1 if ($line =~ /:END$/); - } - + my @records = split /--------\[.*?\]--------(?::END)?\r?\n/, $resbody; + my $hintsText = splice @records, 0, 2; # starts with empty record + my $hints = { $hintsText =~ m/^(.*?)\t(.*?)$/gsm }; + + # process records + my $docs = []; + foreach my $record (@records) + { + # split into keys and snippets + my ($keys, $snippet) = $record =~ m/^(.*?)\n\n(.*?)$/s; + + # create document hash + my $doc = { $keys =~ m/^(.*?)=(.*?)$/gsm }; + $doc->{'@keywords'} = $doc->{keywords}; + ($doc->{keywords}) = $keys =~ m/^%VECTOR\t(.*?)$/gm; + $doc->{snippet} = $snippet; + + push @$docs, new Search::Estraier::ResultDocument( + attrs => $doc, + uri => $doc->{'@uri'}, + snippet => $snippet, + keywords => $doc->{'keywords'}, + ); } - if (! $isend) { - warn "received result doesn't have :END\n$resbody"; - return; - } - - #warn Dumper(\@docs, $hints); - - return new Search::Estraier::NodeResult( docs => \@docs, hints => $hints ); + return new Search::Estraier::NodeResult( docs => $docs, hints => $hints ); } @@ -1490,6 +1473,7 @@ push @args, 'wwidth=' . $self->{wwidth}; push @args, 'hwidth=' . $self->{hwidth}; push @args, 'awidth=' . $self->{awidth}; + push @args, 'skip=' . $self->{skip} if ($self->{skip}); return join('&', @args); } @@ -1787,6 +1771,7 @@ Dobrica Pavlinusic, Edpavlin@rot13.orgE +Robert Klep Erobert@klep.nameE contributed refactored search code =head1 COPYRIGHT AND LICENSE