--- trunk/Estraier.pm 2006/05/10 14:57:50 142 +++ trunk/Estraier.pm 2006/11/05 16:26:57 191 @@ -4,7 +4,7 @@ use strict; use warnings; -our $VERSION = '0.06'; +our $VERSION = '0.08'; =head1 NAME @@ -120,9 +120,34 @@ =head1 Search::Estraier::Document -This class implements Document which is collection of attributes -(key=value), vectors (also key value) display text and hidden text. +This class implements Document which is single item in Hyper Estraier. +It's is collection of: + +=over 4 + +=item attributes + +C<< 'key' => 'value' >> pairs which can later be used for filtering of results + +You can add common filters to C in estmaster's C<_conf> +file for better performance. See C in +L. + +=item vectors + +also C<< 'key' => 'value' >> pairs + +=item display text + +Text which will be used to create searchable corpus of your index and +included in snippet output. + +=item hidden text + +Text which will be searchable, but will not be included in snippet. + +=back =head2 new @@ -157,11 +182,15 @@ if ($line =~ m/^%VECTOR\t(.+)$/) { my @fields = split(/\t/, $1); - for my $i ( 0 .. ($#fields - 1) ) { - $self->{kwords}->{ $fields[ $i ] } = $fields[ $i + 1 ]; - $i++; + if ($#fields % 2 == 1) { + $self->{kwords} = { @fields }; + } else { + warn "can't decode $line\n"; } next; + } elsif ($line =~ m/^%SCORE\t(.+)$/) { + $self->{score} = $1; + next; } elsif ($line =~ m/^%/) { # What is this? comment? #warn "$line\n"; @@ -243,6 +272,53 @@ push @{ $self->{htexts} }, $self->_s($text); } +=head2 add_vectors + +Add a vectors + + $doc->add_vector( + 'vector_name' => 42, + 'another' => 12345, + ); + +=cut + +sub add_vectors { + my $self = shift; + return unless (@_); + + # this is ugly, but works + die "add_vector needs HASH as argument" unless ($#_ % 2 == 1); + + $self->{kwords} = {@_}; +} + +=head2 set_score + +Set the substitute score + + $doc->set_score(12345); + +=cut + +sub set_score { + my $self = shift; + my $score = shift; + return unless (defined($score)); + $self->{score} = $score; +} + +=head2 score + +Get the substitute score + +=cut + +sub score { + my $self = shift; + return -1 unless (defined($self->{score})); + return $self->{score}; +} =head2 id @@ -337,13 +413,17 @@ } if ($self->{kwords}) { - $draft .= '%%VECTOR'; + $draft .= '%VECTOR'; while (my ($key, $value) = each %{ $self->{kwords} }) { $draft .= "\t$key\t$value"; } $draft .= "\n"; } + if (defined($self->{score}) && $self->{score} >= 0) { + $draft .= "%SCORE\t" . $self->{score} . "\n"; + } + $draft .= "\n"; $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts}); @@ -631,6 +711,47 @@ } +=head2 set_distinct + + $cond->set_distinct('@author'); + +=cut + +sub set_distinct { + my $self = shift; + $self->{distinct} = shift; +} + +=head2 distinct + +Return distinct attribute + + print $cond->distinct; + +=cut + +sub distinct { + my $self = shift; + return $self->{distinct}; +} + +=head2 set_mask + +Filter out some links when searching. + +Argument array of link numbers, starting with 0 (current node). + + $cond->set_mask(qw/0 1 4/); + +=cut + +sub set_mask { + my $self = shift; + return unless (@_); + $self->{mask} = \@_; +} + + package Search::Estraier::ResultDocument; use Carp qw/croak/; @@ -1053,7 +1174,7 @@ $node->put_doc( $document_draft ) or die "can't add document"; -Return true on success or false on failture. +Return true on success or false on failure. =cut @@ -1061,11 +1182,15 @@ my $self = shift; my $doc = shift || return; return unless ($self->{url} && $doc->isa('Search::Estraier::Document')); - $self->shuttle_url( $self->{url} . '/put_doc', + if ($self->shuttle_url( $self->{url} . '/put_doc', 'text/x-estraier-draft', $doc->dump_draft, undef - ) == 200; + ) == 200) { + $self->_clear_info; + return 1; + } + return undef; } @@ -1084,11 +1209,15 @@ my $id = shift || return; return unless ($self->{url}); croak "id must be number, not '$id'" unless ($id =~ m/^\d+$/); - $self->shuttle_url( $self->{url} . '/out_doc', + if ($self->shuttle_url( $self->{url} . '/out_doc', 'application/x-www-form-urlencoded', "id=$id", undef - ) == 200; + ) == 200) { + $self->_clear_info; + return 1; + } + return undef; } @@ -1106,11 +1235,15 @@ my $self = shift; my $uri = shift || return; return unless ($self->{url}); - $self->shuttle_url( $self->{url} . '/out_doc', + if ($self->shuttle_url( $self->{url} . '/out_doc', 'application/x-www-form-urlencoded', "uri=" . uri_escape($uri), undef - ) == 200; + ) == 200) { + $self->_clear_info; + return 1; + } + return undef; } @@ -1128,11 +1261,15 @@ my $self = shift; my $doc = shift || return; return unless ($self->{url} && $doc->isa('Search::Estraier::Document')); - $self->shuttle_url( $self->{url} . '/edit_doc', + if ($self->shuttle_url( $self->{url} . '/edit_doc', 'text/x-estraier-draft', $doc->dump_draft, undef - ) == 200; + ) == 200) { + $self->_clear_info; + return 1; + } + return undef; } @@ -1290,7 +1427,7 @@ $path = '/etch_doc' if ($a->{etch}); if ($a->{id}) { - croak "id must be numberm not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/); + croak "id must be number not '$a->{id}'" unless ($a->{id} =~ m/^\d+$/); $arg = 'id=' . $a->{id}; } elsif ($a->{uri}) { $arg = 'uri=' . uri_escape($a->{uri}); @@ -1499,7 +1636,18 @@ push @args, 'wwidth=' . $self->{wwidth}; push @args, 'hwidth=' . $self->{hwidth}; push @args, 'awidth=' . $self->{awidth}; - push @args, 'skip=' . $self->{skip} if ($self->{skip}); + push @args, 'skip=' . $cond->{skip} if ($cond->{skip}); + + if (my $distinct = $cond->distinct) { + push @args, 'distinct=' . uri_escape($distinct); + } + + if ($cond->{mask}) { + my $mask = 0; + map { $mask += ( 2 ** $_ ) } @{ $cond->{mask} }; + + push @args, 'mask=' . $mask if ($mask); + } return join('&', @args); } @@ -1646,7 +1794,7 @@ croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/); $self->shuttle_url( $self->{url} . '/_set_user', - 'text/plain', + 'application/x-www-form-urlencoded', 'name=' . uri_escape($name) . '&mode=' . $mode, undef ) == 200; @@ -1679,9 +1827,10 @@ undef ) == 200) { # refresh node info after adding link - $self->_set_info; + $self->_clear_info; return 1; } + return undef; } =head2 admins @@ -1726,6 +1875,31 @@ return $self->{inform}->{links}; } +=head2 cacheusage + +Return cache usage for a node + + my $cache = $node->cacheusage; + +=cut + +sub cacheusage { + my $self = shift; + + return unless ($self->{url}); + + my $resbody; + my $rv = $self->shuttle_url( $self->{url} . '/cacheusage', + 'text/plain', + undef, + \$resbody, + ); + + return if ($rv != 200 || !$resbody); + + return $resbody; +} + =head2 master Set actions on Hyper Estraier node master (C process) @@ -1828,6 +2002,10 @@ ) or confess "shuttle_url failed"; if ($status == $rest->{status}) { + + # refresh node info after sync + $self->_clear_info if ($action eq 'sync' || $action =~ m/^node(?:add|del|clr)$/); + if ($rest->{returns} && wantarray) { my @results; @@ -1885,7 +2063,7 @@ my @lines = split(/[\r\n]/,$resbody); - $self->{inform} = {}; + $self->_clear_info; ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum}, $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5); @@ -1910,6 +2088,25 @@ } +=head2 _clear_info + +Clear information for node + + $node->_clear_info; + +On next call to C, C