/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 60 by dpavlin, Sat Jan 7 00:00:15 2006 UTC revision 78 by dpavlin, Mon Jan 16 21:42:09 2006 UTC
# Line 4  use 5.008; Line 4  use 5.008;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    
7  our $VERSION = '0.01';  our $VERSION = '0.04_1';
8    
9  =head1 NAME  =head1 NAME
10    
# Line 12  Search::Estraier - pure perl module to u Line 12  Search::Estraier - pure perl module to u
12    
13  =head1 SYNOPSIS  =head1 SYNOPSIS
14    
15    use Search::Estraier;  =head2 Simple indexer
16    my $est = new Search::Estraier();  
17            use Search::Estraier;
18    
19            # create and configure node
20            my $node = new Search::Estraier::Node;
21            $node->set_url("http://localhost:1978/node/test");
22            $node->set_auth("admin","admin");
23    
24            # create document
25            my $doc = new Search::Estraier::Document;
26    
27            # add attributes
28            $doc->add_attr('@uri', "http://estraier.gov/example.txt");
29            $doc->add_attr('@title', "Over the Rainbow");
30    
31            # add body text to document
32            $doc->add_text("Somewhere over the rainbow.  Way up high.");
33            $doc->add_text("There's a land that I heard of once in a lullaby.");
34    
35            die "error: ", $node->status,"\n" unless ($node->put_doc($doc));
36    
37    =head2 Simple searcher
38    
39            use Search::Estraier;
40    
41            # create and configure node
42            my $node = new Search::Estraier::Node;
43            $node->set_url("http://localhost:1978/node/test");
44            $node->set_auth("admin","admin");
45    
46            # create condition
47            my $cond = new Search::Estraier::Condition;
48    
49            # set search phrase
50            $cond->set_phrase("rainbow AND lullaby");
51    
52            my $nres = $node->search($cond, 0);
53            if (defined($nres)) {
54                    # for each document in results
55                    for my $i ( 0 ... $nres->doc_num - 1 ) {
56                            # get result document
57                            my $rdoc = $nres->get_doc($i);
58                            # display attribte
59                            print "URI: ", $rdoc->attr('@uri'),"\n";
60                            print "Title: ", $rdoc->attr('@title'),"\n";
61                            print $rdoc->snippet,"\n";
62                    }
63            } else {
64                    die "error: ", $node->status,"\n";
65            }
66    
67  =head1 DESCRIPTION  =head1 DESCRIPTION
68    
# Line 25  or Hyper Estraier development files on t Line 74  or Hyper Estraier development files on t
74  It is implemented as multiple packages which closly resamble Ruby  It is implemented as multiple packages which closly resamble Ruby
75  implementation. It also includes methods to manage nodes.  implementation. It also includes methods to manage nodes.
76    
77    There are few examples in C<scripts> directory of this distribution.
78    
79  =cut  =cut
80    
81  =head1 Inheritable common methods  =head1 Inheritable common methods
# Line 525  sub new { Line 576  sub new {
576          my $self = {@_};          my $self = {@_};
577          bless($self, $class);          bless($self, $class);
578    
579          foreach my $f (qw/uri attrs snippet keywords/) {          croak "missing uri for ResultDocument" unless defined($self->{uri});
                 croak "missing $f for ResultDocument" unless defined($self->{$f});  
         }  
580    
581          $self ? return $self : return undef;          $self ? return $self : return undef;
582  }  }
# Line 701  use URI::Escape qw/uri_escape/; Line 750  use URI::Escape qw/uri_escape/;
750    
751    my $node = new Search::HyperEstraier::Node;    my $node = new Search::HyperEstraier::Node;
752    
753    or optionally with C<url> as parametar
754    
755      my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
756    
757    or in more verbose form
758    
759      my $node = new Search::HyperEstraier::Node(
760            url => 'http://localhost:1978/node/test',
761            debug => 1,
762            croak_on_error => 1
763      );
764    
765    with following arguments:
766    
767    =over 4
768    
769    =item url
770    
771    URL to node
772    
773    =item debug
774    
775    dumps a B<lot> of debugging output
776    
777    =item croak_on_error
778    
779    very helpful during development. It will croak on all errors instead of
780    silently returning C<-1> (which is convention of Hyper Estraier API in other
781    languages).
782    
783    =back
784    
785  =cut  =cut
786    
787  sub new {  sub new {
# Line 718  sub new { Line 799  sub new {
799          };          };
800          bless($self, $class);          bless($self, $class);
801    
802          my $args = {@_};          if ($#_ == 0) {
803                    $self->{url} = shift;
804            } else {
805                    my $args = {@_};
806    
807                    %$self = ( %$self, @_ );
808    
809          $self->{debug} = $args->{debug};                  warn "## Node debug on\n" if ($self->{debug});
810          warn "## Node debug on\n" if ($self->{debug});          }
811    
812          $self ? return $self : return undef;          $self ? return $self : return undef;
813  }  }
# Line 1177  sub search { Line 1263  sub search {
1263    
1264          my $rv = $self->shuttle_url( $self->{url} . '/search',          my $rv = $self->shuttle_url( $self->{url} . '/search',
1265                  'application/x-www-form-urlencoded',                  'application/x-www-form-urlencoded',
1266                  $self->cond_to_query( $cond ),                  $self->cond_to_query( $cond, $depth ),
1267                  \$resbody,                  \$resbody,
1268          );          );
1269          return if ($rv != 200);          return if ($rv != 200);
# Line 1271  sub search { Line 1357  sub search {
1357    
1358  Return URI encoded string generated from Search::Estraier::Condition  Return URI encoded string generated from Search::Estraier::Condition
1359    
1360    my $args = $node->cond_to_query( $cond );    my $args = $node->cond_to_query( $cond, $depth );
1361    
1362  =cut  =cut
1363    
# Line 1280  sub cond_to_query { Line 1366  sub cond_to_query {
1366    
1367          my $cond = shift || return;          my $cond = shift || return;
1368          croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));          croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1369            my $depth = shift;
1370    
1371          my @args;          my @args;
1372    
# Line 1289  sub cond_to_query { Line 1376  sub cond_to_query {
1376    
1377          if (my @attrs = $cond->attrs) {          if (my @attrs = $cond->attrs) {
1378                  for my $i ( 0 .. $#attrs ) {                  for my $i ( 0 .. $#attrs ) {
1379                          push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] );                          push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1380                  }                  }
1381          }          }
1382    
# Line 1307  sub cond_to_query { Line 1394  sub cond_to_query {
1394                  push @args, 'options=' . $options;                  push @args, 'options=' . $options;
1395          }          }
1396    
1397          push @args, 'depth=' . $self->{depth} if ($self->{depth});          push @args, 'depth=' . $depth if ($depth);
1398          push @args, 'wwidth=' . $self->{wwidth};          push @args, 'wwidth=' . $self->{wwidth};
1399          push @args, 'hwidth=' . $self->{hwidth};          push @args, 'hwidth=' . $self->{hwidth};
1400          push @args, 'awidth=' . $self->{awidth};          push @args, 'awidth=' . $self->{awidth};
# Line 1318  sub cond_to_query { Line 1405  sub cond_to_query {
1405    
1406  =head2 shuttle_url  =head2 shuttle_url
1407    
1408  This is method which uses C<IO::Socket::INET> to communicate with Hyper Estraier node  This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1409  master.  master.
1410    
1411    my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );    my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
# Line 1360  sub shuttle_url { Line 1447  sub shuttle_url {
1447    
1448          $req->headers->header( 'Host' => $url->host . ":" . $url->port );          $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1449          $req->headers->header( 'Connection', 'close' );          $req->headers->header( 'Connection', 'close' );
1450          $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} );          $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1451          $req->content_type( $content_type );          $req->content_type( $content_type );
1452    
1453          warn $req->headers->as_string,"\n" if ($self->{debug});          warn $req->headers->as_string,"\n" if ($self->{debug});
# Line 1374  sub shuttle_url { Line 1461  sub shuttle_url {
1461    
1462          warn "## response status: ",$res->status_line,"\n" if ($self->{debug});          warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1463    
         return -1 if (! $res->is_success);  
   
1464          ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);          ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1465    
1466            if (! $res->is_success) {
1467                    if ($self->{croak_on_error}) {
1468                            croak("can't get $url: ",$res->status_line);
1469                    } else {
1470                            return -1;
1471                    }
1472            }
1473    
1474          $$resbody .= $res->content;          $$resbody .= $res->content;
1475    
1476          warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});          warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
# Line 1477  sub set_link { Line 1570  sub set_link {
1570          $reqbody .= '&credit=' . $credit if ($credit > 0);          $reqbody .= '&credit=' . $credit if ($credit > 0);
1571    
1572          $self->shuttle_url( $self->{url} . '/_set_link',          $self->shuttle_url( $self->{url} . '/_set_link',
1573                  'text/plain',                  'application/x-www-form-urlencoded',
1574                  $reqbody,                  $reqbody,
1575                  undef                  undef
1576          ) == 200;          ) == 200;

Legend:
Removed from v.60  
changed lines
  Added in v.78

  ViewVC Help
Powered by ViewVC 1.1.26