--- trunk/Estraier.pm 2006/01/28 19:18:13 98 +++ trunk/lib/Search/Estraier.pm 2006/11/11 23:34:55 194 @@ -4,7 +4,7 @@ use strict; use warnings; -our $VERSION = '0.04_1'; +our $VERSION = '0.08'; =head1 NAME @@ -17,9 +17,14 @@ use Search::Estraier; # create and configure node - my $node = new Search::Estraier::Node; - $node->set_url("http://localhost:1978/node/test"); - $node->set_auth("admin","admin"); + my $node = new Search::Estraier::Node( + url => 'http://localhost:1978/node/test', + user => 'admin', + passwd => 'admin', + create => 1, + label => 'Label for node', + croak_on_error => 1, + ); # create document my $doc = new Search::Estraier::Document; @@ -32,16 +37,19 @@ $doc->add_text("Somewhere over the rainbow. Way up high."); $doc->add_text("There's a land that I heard of once in a lullaby."); - die "error: ", $node->status,"\n" unless ($node->put_doc($doc)); + die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) }); =head2 Simple searcher use Search::Estraier; # create and configure node - my $node = new Search::Estraier::Node; - $node->set_url("http://localhost:1978/node/test"); - $node->set_auth("admin","admin"); + my $node = new Search::Estraier::Node( + url => 'http://localhost:1978/node/test', + user => 'admin', + passwd => 'admin', + croak_on_error => 1, + ); # create condition my $cond = new Search::Estraier::Condition; @@ -50,7 +58,10 @@ $cond->set_phrase("rainbow AND lullaby"); my $nres = $node->search($cond, 0); + if (defined($nres)) { + print "Got ", $nres->hits, " results\n"; + # for each document in results for my $i ( 0 ... $nres->doc_num - 1 ) { # get result document @@ -109,9 +120,34 @@ =head1 Search::Estraier::Document -This class implements Document which is collection of attributes -(key=value), vectors (also key value) display text and hidden text. +This class implements Document which is single item in Hyper Estraier. + +It's is collection of: + +=over 4 + +=item attributes + +C<< 'key' => 'value' >> pairs which can later be used for filtering of results +You can add common filters to C in estmaster's C<_conf> +file for better performance. See C in +L. + +=item vectors + +also C<< 'key' => 'value' >> pairs + +=item display text + +Text which will be used to create searchable corpus of your index and +included in snippet output. + +=item hidden text + +Text which will be searchable, but will not be included in snippet. + +=back =head2 new @@ -146,11 +182,15 @@ if ($line =~ m/^%VECTOR\t(.+)$/) { my @fields = split(/\t/, $1); - for my $i ( 0 .. ($#fields - 1) ) { - $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ]; - $i++; + if ($#fields % 2 == 1) { + $self->{kwords} = { @fields }; + } else { + warn "can't decode $line\n"; } next; + } elsif ($line =~ m/^%SCORE\t(.+)$/) { + $self->{score} = $1; + next; } elsif ($line =~ m/^%/) { # What is this? comment? #warn "$line\n"; @@ -232,6 +272,53 @@ push @{ $self->{htexts} }, $self->_s($text); } +=head2 add_vectors + +Add a vectors + + $doc->add_vector( + 'vector_name' => 42, + 'another' => 12345, + ); + +=cut + +sub add_vectors { + my $self = shift; + return unless (@_); + + # this is ugly, but works + die "add_vector needs HASH as argument" unless ($#_ % 2 == 1); + + $self->{kwords} = {@_}; +} + +=head2 set_score + +Set the substitute score + + $doc->set_score(12345); + +=cut + +sub set_score { + my $self = shift; + my $score = shift; + return unless (defined($score)); + $self->{score} = $score; +} + +=head2 score + +Get the substitute score + +=cut + +sub score { + my $self = shift; + return -1 unless (defined($self->{score})); + return $self->{score}; +} =head2 id @@ -326,13 +413,17 @@ } if ($self->{kwords}) { - $draft .= '%%VECTOR'; + $draft .= '%VECTOR'; while (my ($key, $value) = each %{ $self->{kwords} }) { $draft .= "\t$key\t$value"; } $draft .= "\n"; } + if (defined($self->{score}) && $self->{score} >= 0) { + $draft .= "%SCORE\t" . $self->{score} . "\n"; + } + $draft .= "\n"; $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts}); @@ -591,6 +682,76 @@ } +=head2 set_skip + +Set number of skipped documents from beginning of results + + $cond->set_skip(42); + +Similar to C in RDBMS. + +=cut + +sub set_skip { + my $self = shift; + $self->{skip} = shift; +} + +=head2 skip + +Return skip for this condition. + + print $cond->skip; + +=cut + +sub skip { + my $self = shift; + return $self->{skip}; +} + + +=head2 set_distinct + + $cond->set_distinct('@author'); + +=cut + +sub set_distinct { + my $self = shift; + $self->{distinct} = shift; +} + +=head2 distinct + +Return distinct attribute + + print $cond->distinct; + +=cut + +sub distinct { + my $self = shift; + return $self->{distinct}; +} + +=head2 set_mask + +Filter out some links when searching. + +Argument array of link numbers, starting with 0 (current node). + + $cond->set_mask(qw/0 1 4/); + +=cut + +sub set_mask { + my $self = shift; + return unless (@_); + $self->{mask} = \@_; +} + + package Search::Estraier::ResultDocument; use Carp qw/croak/; @@ -734,6 +895,9 @@ print $res->doc_num; +This will return real number of documents (limited by C). +If you want to get total number of hits, see C. + =cut sub doc_num { @@ -765,7 +929,7 @@ Return specific hint from results. - print $rec->hint( 'VERSION' ); + print $res->hint( 'VERSION' ); Possible hints are: C, C, C, C, C, C, C