--- trunk/Estraier.pm 2006/01/07 00:00:15 60 +++ trunk/Estraier.pm 2006/01/28 18:19:47 97 @@ -4,7 +4,7 @@ use strict; use warnings; -our $VERSION = '0.01'; +our $VERSION = '0.04_1'; =head1 NAME @@ -12,8 +12,57 @@ =head1 SYNOPSIS - use Search::Estraier; - my $est = new Search::Estraier(); +=head2 Simple indexer + + use Search::Estraier; + + # create and configure node + my $node = new Search::Estraier::Node; + $node->set_url("http://localhost:1978/node/test"); + $node->set_auth("admin","admin"); + + # create document + my $doc = new Search::Estraier::Document; + + # add attributes + $doc->add_attr('@uri', "http://estraier.gov/example.txt"); + $doc->add_attr('@title', "Over the Rainbow"); + + # add body text to document + $doc->add_text("Somewhere over the rainbow. Way up high."); + $doc->add_text("There's a land that I heard of once in a lullaby."); + + die "error: ", $node->status,"\n" unless ($node->put_doc($doc)); + +=head2 Simple searcher + + use Search::Estraier; + + # create and configure node + my $node = new Search::Estraier::Node; + $node->set_url("http://localhost:1978/node/test"); + $node->set_auth("admin","admin"); + + # create condition + my $cond = new Search::Estraier::Condition; + + # set search phrase + $cond->set_phrase("rainbow AND lullaby"); + + my $nres = $node->search($cond, 0); + if (defined($nres)) { + # for each document in results + for my $i ( 0 ... $nres->doc_num - 1 ) { + # get result document + my $rdoc = $nres->get_doc($i); + # display attribte + print "URI: ", $rdoc->attr('@uri'),"\n"; + print "Title: ", $rdoc->attr('@title'),"\n"; + print $rdoc->snippet,"\n"; + } + } else { + die "error: ", $node->status,"\n"; + } =head1 DESCRIPTION @@ -25,6 +74,8 @@ It is implemented as multiple packages which closly resamble Ruby implementation. It also includes methods to manage nodes. +There are few examples in C directory of this distribution. + =cut =head1 Inheritable common methods @@ -41,7 +92,8 @@ =cut sub _s { - my $text = $_[1] || return; + my $text = $_[1]; + return unless defined($text); $text =~ s/\s\s+/ /gs; $text =~ s/^\s+//; $text =~ s/\s+$//; @@ -106,12 +158,12 @@ } elsif ($line =~ m/^$/) { $in_text = 1; next; - } elsif ($line =~ m/^(.+)=(.+)$/) { + } elsif ($line =~ m/^(.+)=(.*)$/) { $self->{attrs}->{ $1 } = $2; next; } - warn "draft ignored: $line\n"; + warn "draft ignored: '$line'\n"; } } @@ -269,7 +321,8 @@ my $draft; foreach my $attr_name (sort keys %{ $self->{attrs} }) { - $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n"; + next unless defined(my $v = $self->{attrs}->{$attr_name}); + $draft .= $attr_name . '=' . $v . "\n"; } if ($self->{kwords}) { @@ -525,9 +578,7 @@ my $self = {@_}; bless($self, $class); - foreach my $f (qw/uri attrs snippet keywords/) { - croak "missing $f for ResultDocument" unless defined($self->{$f}); - } + croak "missing uri for ResultDocument" unless defined($self->{uri}); $self ? return $self : return undef; } @@ -686,6 +737,18 @@ return $self->{hints}->{$key}; } +=head2 hints + +More perlish version of C. This one returns hash. + + my %hints = $rec->hints; + +=cut + +sub hints { + my $self = shift; + return $self->{hints}; +} package Search::Estraier::Node; @@ -701,6 +764,38 @@ my $node = new Search::HyperEstraier::Node; +or optionally with C as parametar + + my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' ); + +or in more verbose form + + my $node = new Search::HyperEstraier::Node( + url => 'http://localhost:1978/node/test', + debug => 1, + croak_on_error => 1 + ); + +with following arguments: + +=over 4 + +=item url + +URL to node + +=item debug + +dumps a B of debugging output + +=item croak_on_error + +very helpful during development. It will croak on all errors instead of +silently returning C<-1> (which is convention of Hyper Estraier API in other +languages). + +=back + =cut sub new { @@ -718,10 +813,15 @@ }; bless($self, $class); - my $args = {@_}; + if ($#_ == 0) { + $self->{url} = shift; + } else { + my $args = {@_}; - $self->{debug} = $args->{debug}; - warn "## Node debug on\n" if ($self->{debug}); + %$self = ( %$self, @_ ); + + warn "## Node debug on\n" if ($self->{debug}); + } $self ? return $self : return undef; } @@ -1177,7 +1277,7 @@ my $rv = $self->shuttle_url( $self->{url} . '/search', 'application/x-www-form-urlencoded', - $self->cond_to_query( $cond ), + $self->cond_to_query( $cond, $depth ), \$resbody, ); return if ($rv != 200); @@ -1271,7 +1371,7 @@ Return URI encoded string generated from Search::Estraier::Condition - my $args = $node->cond_to_query( $cond ); + my $args = $node->cond_to_query( $cond, $depth ); =cut @@ -1280,6 +1380,7 @@ my $cond = shift || return; croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition')); + my $depth = shift; my @args; @@ -1289,7 +1390,7 @@ if (my @attrs = $cond->attrs) { for my $i ( 0 .. $#attrs ) { - push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ); + push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]); } } @@ -1307,7 +1408,7 @@ push @args, 'options=' . $options; } - push @args, 'depth=' . $self->{depth} if ($self->{depth}); + push @args, 'depth=' . $depth if ($depth); push @args, 'wwidth=' . $self->{wwidth}; push @args, 'hwidth=' . $self->{hwidth}; push @args, 'awidth=' . $self->{awidth}; @@ -1318,7 +1419,7 @@ =head2 shuttle_url -This is method which uses C to communicate with Hyper Estraier node +This is method which uses C to communicate with Hyper Estraier node master. my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody ); @@ -1360,7 +1461,7 @@ $req->headers->header( 'Host' => $url->host . ":" . $url->port ); $req->headers->header( 'Connection', 'close' ); - $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ); + $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth}); $req->content_type( $content_type ); warn $req->headers->as_string,"\n" if ($self->{debug}); @@ -1374,10 +1475,16 @@ warn "## response status: ",$res->status_line,"\n" if ($self->{debug}); - return -1 if (! $res->is_success); - ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2); + if (! $res->is_success) { + if ($self->{croak_on_error}) { + croak("can't get $url: ",$res->status_line); + } else { + return -1; + } + } + $$resbody .= $res->content; warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug}); @@ -1477,7 +1584,7 @@ $reqbody .= '&credit=' . $credit if ($credit > 0); $self->shuttle_url( $self->{url} . '/_set_link', - 'text/plain', + 'application/x-www-form-urlencoded', $reqbody, undef ) == 200;