--- trunk/Estraier.pm 2006/01/05 14:30:42 23 +++ trunk/Estraier.pm 2006/01/16 21:34:14 77 @@ -4,7 +4,7 @@ use strict; use warnings; -our $VERSION = '0.00'; +our $VERSION = '0.04_1'; =head1 NAME @@ -12,8 +12,57 @@ =head1 SYNOPSIS - use Search::Estraier; - my $est = new Search::Estraier(); +=head2 Simple indexer + + use Search::Estraier; + + # create and configure node + my $node = new Search::Estraier::Node; + $node->set_url("http://localhost:1978/node/test"); + $node->set_auth("admin","admin"); + + # create document + my $doc = new Search::Estraier::Document; + + # add attributes + $doc->add_attr('@uri', "http://estraier.gov/example.txt"); + $doc->add_attr('@title', "Over the Rainbow"); + + # add body text to document + $doc->add_text("Somewhere over the rainbow. Way up high."); + $doc->add_text("There's a land that I heard of once in a lullaby."); + + die "error: ", $node->status,"\n" unless ($node->put_doc($doc)); + +=head2 Simple searcher + + use Search::Estraier; + + # create and configure node + my $node = new Search::Estraier::Node; + $node->set_url("http://localhost:1978/node/test"); + $node->set_auth("admin","admin"); + + # create condition + my $cond = new Search::Estraier::Condition; + + # set search phrase + $cond->set_phrase("rainbow AND lullaby"); + + my $nres = $node->search($cond, 0); + if (defined($nres)) { + # for each document in results + for my $i ( 0 ... $nres->doc_num - 1 ) { + # get result document + my $rdoc = $nres->get_doc($i); + # display attribte + print "URI: ", $rdoc->attr('@uri'),"\n"; + print "Title: ", $rdoc->attr('@title'),"\n"; + print $rdoc->snippet,"\n"; + } + } else { + die "error: ", $node->status,"\n"; + } =head1 DESCRIPTION @@ -25,8 +74,14 @@ It is implemented as multiple packages which closly resamble Ruby implementation. It also includes methods to manage nodes. +There are few examples in C directory of this distribution. + =cut +=head1 Inheritable common methods + +This methods should really move somewhere else. + =head2 _s Remove multiple whitespaces from string, as well as whitespaces at beginning or end @@ -56,6 +111,7 @@ This class implements Document which is collection of attributes (key=value), vectors (also key value) display text and hidden text. + =head2 new Create new document, empty or from draft. @@ -175,6 +231,7 @@ push @{ $self->{htexts} }, $self->_s($text); } + =head2 id Get the ID number of document. If the object has never been registred, C<-1> is returned. @@ -188,6 +245,7 @@ return $self->{id}; } + =head2 attr_names Returns array with attribute names from document object. @@ -198,7 +256,8 @@ sub attr_names { my $self = shift; - croak "attr_names return array, not scalar" if (! wantarray); + return unless ($self->{attrs}); + #croak "attr_names return array, not scalar" if (! wantarray); return sort keys %{ $self->{attrs} }; } @@ -214,8 +273,8 @@ sub attr { my $self = shift; my $name = shift; - - return $self->{'attrs'}->{ $name }; + return unless (defined($name) && $self->{attrs}); + return $self->{attrs}->{ $name }; } @@ -229,10 +288,11 @@ sub texts { my $self = shift; - confess "texts return array, not scalar" if (! wantarray); - return @{ $self->{dtexts} }; + #confess "texts return array, not scalar" if (! wantarray); + return @{ $self->{dtexts} } if ($self->{dtexts}); } + =head2 cat_texts Return whole text as single scalar. @@ -243,9 +303,10 @@ sub cat_texts { my $self = shift; - return join(' ',@{ $self->{dtexts} }); + return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts}); } + =head2 dump_draft Dump draft data from document object. @@ -272,12 +333,13 @@ $draft .= "\n"; - $draft .= join("\n", @{ $self->{dtexts} }) . "\n"; - $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n"; + $draft .= join("\n", @{ $self->{dtexts} }) . "\n" if ($self->{dtexts}); + $draft .= "\t" . join("\n\t", @{ $self->{htexts} }) . "\n" if ($self->{htexts}); return $draft; } + =head2 delete Empty document object @@ -330,6 +392,7 @@ $self ? return $self : return undef; } + =head2 set_phrase $cond->set_phrase('search phrase'); @@ -341,6 +404,7 @@ $self->{phrase} = $self->_s( shift ); } + =head2 add_attr $cond->add_attr('@URI STRINC /~dpavlin/'); @@ -353,6 +417,7 @@ push @{ $self->{attrs} }, $self->_s( $attr ); } + =head2 set_order $cond->set_order('@mdate NUMD'); @@ -364,6 +429,7 @@ $self->{order} = shift; } + =head2 set_max $cond->set_max(42); @@ -373,10 +439,11 @@ sub set_max { my $self = shift; my $max = shift; - croak "set_max needs number" unless ($max =~ m/^\d+$/); + croak "set_max needs number, not '$max'" unless ($max =~ m/^\d+$/); $self->{max} = $max; } + =head2 set_options $cond->set_options( SURE => 1 ); @@ -405,6 +472,7 @@ $self->{options} ||= $options->{$option}; } + =head2 phrase Return search phrase. @@ -418,6 +486,7 @@ return $self->{phrase}; } + =head2 order Return search result order. @@ -431,6 +500,7 @@ return $self->{order}; } + =head2 attrs Return search result attrs. @@ -442,9 +512,10 @@ sub attrs { my $self = shift; #croak "attrs return array, not scalar" if (! wantarray); - return @{ $self->{attrs} }; + return @{ $self->{attrs} } if ($self->{attrs}); } + =head2 max Return maximum number of results. @@ -460,6 +531,7 @@ return $self->{max}; } + =head2 options Return options for this condition. @@ -478,10 +550,10 @@ package Search::Estraier::ResultDocument; -use Carp qw/confess croak/; +use Carp qw/croak/; -use Search::Estraier; -our @ISA = qw/Search::Estraier/; +#use Search::Estraier; +#our @ISA = qw/Search::Estraier/; =head1 Search::Estraier::ResultDocument @@ -504,13 +576,12 @@ my $self = {@_}; bless($self, $class); - foreach my $f (qw/uri attrs snippet keywords/) { - croak "missing $f for ResultDocument" unless defined($self->{$f}); - } + croak "missing uri for ResultDocument" unless defined($self->{uri}); $self ? return $self : return undef; } + =head2 uri Return URI of result document @@ -539,6 +610,7 @@ return sort keys %{ $self->{attrs} }; } + =head2 attr Returns value of an attribute. @@ -553,6 +625,7 @@ return $self->{attrs}->{ $name }; } + =head2 snippet Return snippet from result document @@ -566,6 +639,7 @@ return $self->{snippet}; } + =head2 keywords Return keywords from result document @@ -580,59 +654,928 @@ } -package Search::Estraier::Master; +package Search::Estraier::NodeResult; -use Carp; +use Carp qw/croak/; -=head1 Search::Estraier::Master +#use Search::Estraier; +#our @ISA = qw/Search::Estraier/; -Controll node master. This requires user with administration priviledges. +=head1 Search::Estraier::NodeResult + +=head2 new + + my $res = new Search::HyperEstraier::NodeResult( + docs => @array_of_rdocs, + hits => %hash_with_hints, + ); =cut -{ - package RequestAgent; - our @ISA = qw(LWP::UserAgent); +sub new { + my $class = shift; + my $self = {@_}; + bless($self, $class); - sub new { - my $self = LWP::UserAgent::new(@_); - $self->agent("Search-Estraier/$Search::Estraer::VERSION"); - $self; + foreach my $f (qw/docs hints/) { + croak "missing $f for ResultDocument" unless defined($self->{$f}); } - sub get_basic_credentials { - my($self, $realm, $uri) = @_; -# return ($user, $password); - } + $self ? return $self : return undef; } +=head2 doc_num + +Return number of documents + + print $res->doc_num; + +=cut + +sub doc_num { + my $self = shift; + return $#{$self->{docs}} + 1; +} + + +=head2 get_doc + +Return single document + + my $doc = $res->get_doc( 42 ); + +Returns undef if document doesn't exist. + +=cut + +sub get_doc { + my $self = shift; + my $num = shift; + croak "expect number as argument, not '$num'" unless ($num =~ m/^\d+$/); + return undef if ($num < 0 || $num > $self->{docs}); + return $self->{docs}->[$num]; +} + + +=head2 hint + +Return specific hint from results. + + print $rec->hint( 'VERSION' ); + +Possible hints are: C, C, C, C, C, C, +C