--- trunk/Estraier.pm 2006/01/06 21:05:05 58 +++ trunk/Estraier.pm 2006/01/28 17:58:22 96 @@ -4,7 +4,7 @@ use strict; use warnings; -our $VERSION = '0.00'; +our $VERSION = '0.04_1'; =head1 NAME @@ -12,8 +12,57 @@ =head1 SYNOPSIS - use Search::Estraier; - my $est = new Search::Estraier(); +=head2 Simple indexer + + use Search::Estraier; + + # create and configure node + my $node = new Search::Estraier::Node; + $node->set_url("http://localhost:1978/node/test"); + $node->set_auth("admin","admin"); + + # create document + my $doc = new Search::Estraier::Document; + + # add attributes + $doc->add_attr('@uri', "http://estraier.gov/example.txt"); + $doc->add_attr('@title', "Over the Rainbow"); + + # add body text to document + $doc->add_text("Somewhere over the rainbow. Way up high."); + $doc->add_text("There's a land that I heard of once in a lullaby."); + + die "error: ", $node->status,"\n" unless ($node->put_doc($doc)); + +=head2 Simple searcher + + use Search::Estraier; + + # create and configure node + my $node = new Search::Estraier::Node; + $node->set_url("http://localhost:1978/node/test"); + $node->set_auth("admin","admin"); + + # create condition + my $cond = new Search::Estraier::Condition; + + # set search phrase + $cond->set_phrase("rainbow AND lullaby"); + + my $nres = $node->search($cond, 0); + if (defined($nres)) { + # for each document in results + for my $i ( 0 ... $nres->doc_num - 1 ) { + # get result document + my $rdoc = $nres->get_doc($i); + # display attribte + print "URI: ", $rdoc->attr('@uri'),"\n"; + print "Title: ", $rdoc->attr('@title'),"\n"; + print $rdoc->snippet,"\n"; + } + } else { + die "error: ", $node->status,"\n"; + } =head1 DESCRIPTION @@ -25,6 +74,8 @@ It is implemented as multiple packages which closly resamble Ruby implementation. It also includes methods to manage nodes. +There are few examples in C directory of this distribution. + =cut =head1 Inheritable common methods @@ -41,7 +92,8 @@ =cut sub _s { - my $text = $_[1] || return; + my $text = $_[1]; + return unless defined($text); $text =~ s/\s\s+/ /gs; $text =~ s/^\s+//; $text =~ s/\s+$//; @@ -106,12 +158,12 @@ } elsif ($line =~ m/^$/) { $in_text = 1; next; - } elsif ($line =~ m/^(.+)=(.+)$/) { + } elsif ($line =~ m/^(.+)=(.*)$/) { $self->{attrs}->{ $1 } = $2; next; } - warn "draft ignored: $line\n"; + warn "draft ignored: '$line'\n"; } } @@ -205,7 +257,8 @@ sub attr_names { my $self = shift; - croak "attr_names return array, not scalar" if (! wantarray); + return unless ($self->{attrs}); + #croak "attr_names return array, not scalar" if (! wantarray); return sort keys %{ $self->{attrs} }; } @@ -221,8 +274,8 @@ sub attr { my $self = shift; my $name = shift; - - return $self->{'attrs'}->{ $name }; + return unless (defined($name) && $self->{attrs}); + return $self->{attrs}->{ $name }; } @@ -236,8 +289,8 @@ sub texts { my $self = shift; - confess "texts return array, not scalar" if (! wantarray); - return @{ $self->{dtexts} }; + #confess "texts return array, not scalar" if (! wantarray); + return @{ $self->{dtexts} } if ($self->{dtexts}); } @@ -251,7 +304,7 @@ sub cat_texts { my $self = shift; - return join(' ',@{ $self->{dtexts} }); + return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts}); } @@ -268,7 +321,8 @@ my $draft; foreach my $attr_name (sort keys %{ $self->{attrs} }) { - $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n"; + next unless(my $v = $self->{attrs}->{$attr_name}); + $draft .= $attr_name . '=' . $v . "\n"; } if ($self->{kwords}) { @@ -460,7 +514,7 @@ sub attrs { my $self = shift; #croak "attrs return array, not scalar" if (! wantarray); - return @{ $self->{attrs} }; + return @{ $self->{attrs} } if ($self->{attrs}); } @@ -524,9 +578,7 @@ my $self = {@_}; bless($self, $class); - foreach my $f (qw/uri attrs snippet keywords/) { - croak "missing $f for ResultDocument" unless defined($self->{$f}); - } + croak "missing uri for ResultDocument" unless defined($self->{uri}); $self ? return $self : return undef; } @@ -685,6 +737,18 @@ return $self->{hints}->{$key}; } +=head2 hints + +More perlish version of C. This one returns hash. + + my %hints = $rec->hints; + +=cut + +sub hints { + my $self = shift; + return $self->{hints}; +} package Search::Estraier::Node; @@ -700,6 +764,38 @@ my $node = new Search::HyperEstraier::Node; +or optionally with C as parametar + + my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' ); + +or in more verbose form + + my $node = new Search::HyperEstraier::Node( + url => 'http://localhost:1978/node/test', + debug => 1, + croak_on_error => 1 + ); + +with following arguments: + +=over 4 + +=item url + +URL to node + +=item debug + +dumps a B of debugging output + +=item croak_on_error + +very helpful during development. It will croak on all errors instead of +silently returning C<-1> (which is convention of Hyper Estraier API in other +languages). + +=back + =cut sub new { @@ -717,10 +813,15 @@ }; bless($self, $class); - my $args = {@_}; + if ($#_ == 0) { + $self->{url} = shift; + } else { + my $args = {@_}; + + %$self = ( %$self, @_ ); - $self->{debug} = $args->{debug}; - warn "## Node debug on\n" if ($self->{debug}); + warn "## Node debug on\n" if ($self->{debug}); + } $self ? return $self : return undef; } @@ -1176,7 +1277,7 @@ my $rv = $self->shuttle_url( $self->{url} . '/search', 'application/x-www-form-urlencoded', - $self->cond_to_query( $cond ), + $self->cond_to_query( $cond, $depth ), \$resbody, ); return if ($rv != 200); @@ -1270,7 +1371,7 @@ Return URI encoded string generated from Search::Estraier::Condition - my $args = $node->cond_to_query( $cond ); + my $args = $node->cond_to_query( $cond, $depth ); =cut @@ -1279,6 +1380,7 @@ my $cond = shift || return; croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition')); + my $depth = shift; my @args; @@ -1288,7 +1390,7 @@ if (my @attrs = $cond->attrs) { for my $i ( 0 .. $#attrs ) { - push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ); + push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]); } } @@ -1306,7 +1408,7 @@ push @args, 'options=' . $options; } - push @args, 'depth=' . $self->{depth} if ($self->{depth}); + push @args, 'depth=' . $depth if ($depth); push @args, 'wwidth=' . $self->{wwidth}; push @args, 'hwidth=' . $self->{hwidth}; push @args, 'awidth=' . $self->{awidth}; @@ -1317,7 +1419,7 @@ =head2 shuttle_url -This is method which uses C to communicate with Hyper Estraier node +This is method which uses C to communicate with Hyper Estraier node master. my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody ); @@ -1327,6 +1429,8 @@ =cut +use LWP::UserAgent; + sub shuttle_url { my $self = shift; @@ -1345,81 +1449,43 @@ return -1; } - my ($host,$port,$query) = ($url->host, $url->port, $url->path); + my $ua = LWP::UserAgent->new; + $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" ); - if ($self->{pxhost}) { - ($host,$port) = ($self->{pxhost}, $self->{pxport}); - $query = "http://$host:$port/$query"; + my $req; + if ($reqbody) { + $req = HTTP::Request->new(POST => $url); + } else { + $req = HTTP::Request->new(GET => $url); } - $query .= '?' . $url->query if ($url->query && ! $reqbody); + $req->headers->header( 'Host' => $url->host . ":" . $url->port ); + $req->headers->header( 'Connection', 'close' ); + $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth}); + $req->content_type( $content_type ); - my $headers; + warn $req->headers->as_string,"\n" if ($self->{debug}); if ($reqbody) { - $headers .= "POST $query HTTP/1.0\r\n"; - } else { - $headers .= "GET $query HTTP/1.0\r\n"; + warn "$reqbody\n" if ($self->{debug}); + $req->content( $reqbody ); } - $headers .= "Host: " . $url->host . ":" . $url->port . "\r\n"; - $headers .= "Connection: close\r\n"; - $headers .= "User-Agent: Search-Estraier/$Search::Estraier::VERSION\r\n"; - $headers .= "Content-Type: $content_type\r\n"; - $headers .= "Authorization: Basic $self->{auth}\r\n"; - my $len = 0; - { - use bytes; - $len = length($reqbody) if ($reqbody); - } - $headers .= "Content-Length: $len\r\n"; - $headers .= "\r\n"; - - my $sock = IO::Socket::INET->new( - PeerAddr => $host, - PeerPort => $port, - Proto => 'tcp', - Timeout => $self->{timeout} || 90, - ); - - if (! $sock) { - carp "can't open socket to $host:$port"; - return -1; - } + my $res = $ua->request($req) || croak "can't make request to $url: $!"; - warn $headers if ($self->{debug}); + warn "## response status: ",$res->status_line,"\n" if ($self->{debug}); - print $sock $headers or - carp "can't send headers to network:\n$headers\n" and return -1; + ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2); - if ($reqbody) { - warn "$reqbody\n" if ($self->{debug}); - print $sock $reqbody or - carp "can't send request body to network:\n$$reqbody\n" and return -1; + if (! $res->is_success) { + if ($self->{croak_on_error}) { + croak("can't get $url: ",$res->status_line); + } else { + return -1; + } } - my $line = <$sock>; - chomp($line); - my ($schema, $res_status, undef) = split(/ */, $line, 3); - return if ($schema !~ /^HTTP/ || ! $res_status); - - $self->{status} = $res_status; - warn "## response status: $res_status\n" if ($self->{debug}); - - # skip rest of headers - $line = <$sock>; - while ($line) { - $line = <$sock>; - $line =~ s/[\r\n]+$//; - warn "## ", $line || 'NULL', " ##\n" if ($self->{debug}); - }; - - # read body - $len = 0; - do { - $len = read($sock, my $buf, 8192); - $$resbody .= $buf if ($resbody); - } while ($len); + $$resbody .= $res->content; warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug}); @@ -1518,7 +1584,7 @@ $reqbody .= '&credit=' . $credit if ($credit > 0); $self->shuttle_url( $self->{url} . '/_set_link', - 'text/plain', + 'application/x-www-form-urlencoded', $reqbody, undef ) == 200;