/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by dpavlin, Fri Jan 6 14:39:45 2006 UTC revision 74 by dpavlin, Mon Jan 9 15:28:24 2006 UTC
# Line 4  use 5.008; Line 4  use 5.008;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    
7  our $VERSION = '0.00';  our $VERSION = '0.03';
8    
9  =head1 NAME  =head1 NAME
10    
# Line 12  Search::Estraier - pure perl module to u Line 12  Search::Estraier - pure perl module to u
12    
13  =head1 SYNOPSIS  =head1 SYNOPSIS
14    
15    use Search::Estraier;  =head2 Simple indexer
16    my $est = new Search::Estraier();  
17            use Search::Estraier;
18    
19            # create and configure node
20            my $node = new Search::Estraier::Node;
21            $node->set_url("http://localhost:1978/node/test");
22            $node->set_auth("admin","admin");
23    
24            # create document
25            my $doc = new Search::Estraier::Document;
26    
27            # add attributes
28            $doc->add_attr('@uri', "http://estraier.gov/example.txt");
29            $doc->add_attr('@title', "Over the Rainbow");
30    
31            # add body text to document
32            $doc->add_text("Somewhere over the rainbow.  Way up high.");
33            $doc->add_text("There's a land that I heard of once in a lullaby.");
34    
35            die "error: ", $node->status,"\n" unless ($node->put_doc($doc));
36    
37    =head2 Simple searcher
38    
39            use Search::Estraier;
40    
41            # create and configure node
42            my $node = new Search::Estraier::Node;
43            $node->set_url("http://localhost:1978/node/test");
44            $node->set_auth("admin","admin");
45    
46            # create condition
47            my $cond = new Search::Estraier::Condition;
48    
49            # set search phrase
50            $cond->set_phrase("rainbow AND lullaby");
51    
52            my $nres = $node->search($cond, 0);
53            if (defined($nres)) {
54                    # for each document in results
55                    for my $i ( 0 ... $nres->doc_num - 1 ) {
56                            # get result document
57                            my $rdoc = $nres->get_doc($i);
58                            # display attribte
59                            print "URI: ", $rdoc->attr('@uri'),"\n";
60                            print "Title: ", $rdoc->attr('@title'),"\n";
61                            print $rdoc->snippet,"\n";
62                    }
63            } else {
64                    die "error: ", $node->status,"\n";
65            }
66    
67  =head1 DESCRIPTION  =head1 DESCRIPTION
68    
# Line 25  or Hyper Estraier development files on t Line 74  or Hyper Estraier development files on t
74  It is implemented as multiple packages which closly resamble Ruby  It is implemented as multiple packages which closly resamble Ruby
75  implementation. It also includes methods to manage nodes.  implementation. It also includes methods to manage nodes.
76    
77    There are few examples in C<scripts> directory of this distribution.
78    
79  =cut  =cut
80    
81  =head1 Inheritable common methods  =head1 Inheritable common methods
# Line 205  Returns array with attribute names from Line 256  Returns array with attribute names from
256    
257  sub attr_names {  sub attr_names {
258          my $self = shift;          my $self = shift;
259          croak "attr_names return array, not scalar" if (! wantarray);          return unless ($self->{attrs});
260            #croak "attr_names return array, not scalar" if (! wantarray);
261          return sort keys %{ $self->{attrs} };          return sort keys %{ $self->{attrs} };
262  }  }
263    
# Line 221  Returns value of an attribute. Line 273  Returns value of an attribute.
273  sub attr {  sub attr {
274          my $self = shift;          my $self = shift;
275          my $name = shift;          my $name = shift;
276            return unless (defined($name) && $self->{attrs});
277          return $self->{'attrs'}->{ $name };          return $self->{attrs}->{ $name };
278  }  }
279    
280    
# Line 236  Returns array with text sentences. Line 288  Returns array with text sentences.
288    
289  sub texts {  sub texts {
290          my $self = shift;          my $self = shift;
291          confess "texts return array, not scalar" if (! wantarray);          #confess "texts return array, not scalar" if (! wantarray);
292          return @{ $self->{dtexts} };          return @{ $self->{dtexts} } if ($self->{dtexts});
293  }  }
294    
295    
# Line 251  Return whole text as single scalar. Line 303  Return whole text as single scalar.
303    
304  sub cat_texts {  sub cat_texts {
305          my $self = shift;          my $self = shift;
306          return join(' ',@{ $self->{dtexts} });          return join(' ',@{ $self->{dtexts} }) if ($self->{dtexts});
307  }  }
308    
309    
# Line 460  Return search result attrs. Line 512  Return search result attrs.
512  sub attrs {  sub attrs {
513          my $self = shift;          my $self = shift;
514          #croak "attrs return array, not scalar" if (! wantarray);          #croak "attrs return array, not scalar" if (! wantarray);
515          return @{ $self->{attrs} };          return @{ $self->{attrs} } if ($self->{attrs});
516  }  }
517    
518    
# Line 524  sub new { Line 576  sub new {
576          my $self = {@_};          my $self = {@_};
577          bless($self, $class);          bless($self, $class);
578    
579          foreach my $f (qw/uri attrs snippet keywords/) {          croak "missing uri for ResultDocument" unless defined($self->{uri});
                 croak "missing $f for ResultDocument" unless defined($self->{$f});  
         }  
580    
581          $self ? return $self : return undef;          $self ? return $self : return undef;
582  }  }
# Line 700  use URI::Escape qw/uri_escape/; Line 750  use URI::Escape qw/uri_escape/;
750    
751    my $node = new Search::HyperEstraier::Node;    my $node = new Search::HyperEstraier::Node;
752    
753    or optionally with C<url> as parametar
754    
755      my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
756    
757  =cut  =cut
758    
759  sub new {  sub new {
# Line 717  sub new { Line 771  sub new {
771          };          };
772          bless($self, $class);          bless($self, $class);
773    
774          if (@_) {          if ($#_ == 0) {
775                  $self->{debug} = shift;                  $self->{url} = shift;
776                  warn "## Node debug on\n";          } else {
777                    my $args = {@_};
778    
779                    $self->{debug} = $args->{debug};
780                    warn "## Node debug on\n" if ($self->{debug});
781          }          }
782    
783          $self ? return $self : return undef;          $self ? return $self : return undef;
# Line 1095  sub _fetch_doc { Line 1153  sub _fetch_doc {
1153    
1154  sub name {  sub name {
1155          my $self = shift;          my $self = shift;
1156          $self->set_info unless ($self->{name});          $self->_set_info unless ($self->{name});
1157          return $self->{name};          return $self->{name};
1158  }  }
1159    
# Line 1108  sub name { Line 1166  sub name {
1166    
1167  sub label {  sub label {
1168          my $self = shift;          my $self = shift;
1169          $self->set_info unless ($self->{label});          $self->_set_info unless ($self->{label});
1170          return $self->{label};          return $self->{label};
1171  }  }
1172    
# Line 1121  sub label { Line 1179  sub label {
1179    
1180  sub doc_num {  sub doc_num {
1181          my $self = shift;          my $self = shift;
1182          $self->set_info if ($self->{dnum} < 0);          $self->_set_info if ($self->{dnum} < 0);
1183          return $self->{dnum};          return $self->{dnum};
1184  }  }
1185    
# Line 1134  sub doc_num { Line 1192  sub doc_num {
1192    
1193  sub word_num {  sub word_num {
1194          my $self = shift;          my $self = shift;
1195          $self->set_info if ($self->{wnum} < 0);          $self->_set_info if ($self->{wnum} < 0);
1196          return $self->{wnum};          return $self->{wnum};
1197  }  }
1198    
# Line 1147  sub word_num { Line 1205  sub word_num {
1205    
1206  sub size {  sub size {
1207          my $self = shift;          my $self = shift;
1208          $self->set_info if ($self->{size} < 0);          $self->_set_info if ($self->{size} < 0);
1209          return $self->{size};          return $self->{size};
1210  }  }
1211    
# Line 1176  sub search { Line 1234  sub search {
1234    
1235          my $rv = $self->shuttle_url( $self->{url} . '/search',          my $rv = $self->shuttle_url( $self->{url} . '/search',
1236                  'application/x-www-form-urlencoded',                  'application/x-www-form-urlencoded',
1237                  $self->cond_to_query( $cond ),                  $self->cond_to_query( $cond, $depth ),
1238                  \$resbody,                  \$resbody,
1239          );          );
1240          return if ($rv != 200);          return if ($rv != 200);
# Line 1268  sub search { Line 1326  sub search {
1326    
1327  =head2 cond_to_query  =head2 cond_to_query
1328    
1329    my $args = $node->cond_to_query( $cond );  Return URI encoded string generated from Search::Estraier::Condition
1330    
1331      my $args = $node->cond_to_query( $cond, $depth );
1332    
1333  =cut  =cut
1334    
# Line 1277  sub cond_to_query { Line 1337  sub cond_to_query {
1337    
1338          my $cond = shift || return;          my $cond = shift || return;
1339          croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));          croak "condition must be Search::Estraier::Condition, not '$cond->isa'" unless ($cond->isa('Search::Estraier::Condition'));
1340            my $depth = shift;
1341    
1342          my @args;          my @args;
1343    
# Line 1286  sub cond_to_query { Line 1347  sub cond_to_query {
1347    
1348          if (my @attrs = $cond->attrs) {          if (my @attrs = $cond->attrs) {
1349                  for my $i ( 0 .. $#attrs ) {                  for my $i ( 0 .. $#attrs ) {
1350                          push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] );                          push @args,'attr' . ($i+1) . '=' . uri_escape( $attrs[$i] ) if ($attrs[$i]);
1351                  }                  }
1352          }          }
1353    
# Line 1304  sub cond_to_query { Line 1365  sub cond_to_query {
1365                  push @args, 'options=' . $options;                  push @args, 'options=' . $options;
1366          }          }
1367    
1368          push @args, 'depth=' . $self->{depth} if ($self->{depth});          push @args, 'depth=' . $depth if ($depth);
1369          push @args, 'wwidth=' . $self->{wwidth};          push @args, 'wwidth=' . $self->{wwidth};
1370          push @args, 'hwidth=' . $self->{hwidth};          push @args, 'hwidth=' . $self->{hwidth};
1371          push @args, 'awidth=' . $self->{awidth};          push @args, 'awidth=' . $self->{awidth};
# Line 1315  sub cond_to_query { Line 1376  sub cond_to_query {
1376    
1377  =head2 shuttle_url  =head2 shuttle_url
1378    
1379  This is method which uses C<IO::Socket::INET> to communicate with Hyper Estraier node  This is method which uses C<LWP::UserAgent> to communicate with Hyper Estraier node
1380  master.  master.
1381    
1382    my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );    my $rv = shuttle_url( $url, $content_type, $req_body, \$resbody );
# Line 1325  body will be saved within object. Line 1386  body will be saved within object.
1386    
1387  =cut  =cut
1388    
1389    use LWP::UserAgent;
1390    
1391  sub shuttle_url {  sub shuttle_url {
1392          my $self = shift;          my $self = shift;
1393    
# Line 1343  sub shuttle_url { Line 1406  sub shuttle_url {
1406                  return -1;                  return -1;
1407          }          }
1408    
1409          my ($host,$port,$query) = ($url->host, $url->port, $url->path);          my $ua = LWP::UserAgent->new;
1410            $ua->agent( "Search-Estraier/$Search::Estraier::VERSION" );
         if ($self->{pxhost}) {  
                 ($host,$port) = ($self->{pxhost}, $self->{pxport});  
                 $query = "http://$host:$port/$query";  
         }  
   
         $query .= '?' . $url->query if ($url->query && ! $reqbody);  
   
         my $headers;  
1411    
1412            my $req;
1413          if ($reqbody) {          if ($reqbody) {
1414                  $headers .= "POST $query HTTP/1.0\r\n";                  $req = HTTP::Request->new(POST => $url);
1415          } else {          } else {
1416                  $headers .= "GET $query HTTP/1.0\r\n";                  $req = HTTP::Request->new(GET => $url);
1417          }          }
1418    
1419          $headers .= "Host: " . $url->host . ":" . $url->port . "\r\n";          $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1420          $headers .= "Connection: close\r\n";          $req->headers->header( 'Connection', 'close' );
1421          $headers .= "User-Agent: Search-Estraier/$Search::Estraier::VERSION\r\n";          $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} );
1422          $headers .= "Content-Type: $content_type\r\n";          $req->content_type( $content_type );
         $headers .= "Authorization: Basic $self->{auth}\r\n";  
         my $len = 0;  
         {  
                 use bytes;  
                 $len = length($reqbody) if ($reqbody);  
         }  
         $headers .= "Content-Length: $len\r\n";  
         $headers .= "\r\n";  
   
         my $sock = IO::Socket::INET->new(  
                 PeerAddr        => $host,  
                 PeerPort        => $port,  
                 Proto           => 'tcp',  
                 Timeout         => $self->{timeout} || 90,  
         );  
   
         if (! $sock) {  
                 carp "can't open socket to $host:$port";  
                 return -1;  
         }  
   
         warn $headers if ($self->{debug});  
1423    
1424          print $sock $headers or          warn $req->headers->as_string,"\n" if ($self->{debug});
                 carp "can't send headers to network:\n$headers\n" and return -1;  
1425    
1426          if ($reqbody) {          if ($reqbody) {
1427                  warn "$reqbody\n" if ($self->{debug});                  warn "$reqbody\n" if ($self->{debug});
1428                  print $sock $reqbody or                  $req->content( $reqbody );
                         carp "can't send request body to network:\n$$reqbody\n" and return -1;  
1429          }          }
1430    
1431          my $line = <$sock>;          my $res = $ua->request($req) || croak "can't make request to $url: $!";
         chomp($line);  
         my ($schema, $res_status, undef) = split(/  */, $line, 3);  
         return if ($schema !~ /^HTTP/ || ! $res_status);  
   
         $self->{status} = $res_status;  
         warn "## response status: $res_status\n" if ($self->{debug});  
   
         # skip rest of headers  
         $line = <$sock>;  
         while ($line) {  
                 $line = <$sock>;  
                 $line =~ s/[\r\n]+$//;  
                 warn "## ", $line || 'NULL', " ##\n" if ($self->{debug});  
         };  
1432    
1433          # read body          warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1434          $len = 0;  
1435          do {          return -1 if (! $res->is_success);
1436                  $len = read($sock, my $buf, 8192);  
1437                  $$resbody .= $buf if ($resbody);          ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1438          } while ($len);  
1439            $$resbody .= $res->content;
1440    
1441          warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});          warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
1442    
# Line 1425  sub shuttle_url { Line 1444  sub shuttle_url {
1444  }  }
1445    
1446    
1447  =head2 set_info  =head2 set_snippet_width
1448    
1449    Set width of snippets in results
1450    
1451      $node->set_snippet_width( $wwidth, $hwidth, $awidth );
1452    
1453    C<$wwidth> specifies whole width of snippet. It's C<480> by default. If it's C<0> snippet
1454    is not sent with results. If it is negative, whole document text is sent instead of snippet.
1455    
1456    C<$hwidth> specified width of strings from beginning of string. Default
1457    value is C<96>. Negative or zero value keep previous value.
1458    
1459    C<$awidth> specifies width of strings around each highlighted word. It's C<96> by default.
1460    If negative of zero value is provided previous value is kept unchanged.
1461    
1462    =cut
1463    
1464    sub set_snippet_width {
1465            my $self = shift;
1466    
1467            my ($wwidth, $hwidth, $awidth) = @_;
1468            $self->{wwidth} = $wwidth;
1469            $self->{hwidth} = $hwidth if ($hwidth >= 0);
1470            $self->{awidth} = $awidth if ($awidth >= 0);
1471    }
1472    
1473    
1474    =head2 set_user
1475    
1476    Manage users of node
1477    
1478      $node->set_user( 'name', $mode );
1479    
1480    C<$mode> can be one of:
1481    
1482    =over 4
1483    
1484    =item 0
1485    
1486    delete account
1487    
1488    =item 1
1489    
1490    set administrative right for user
1491    
1492    =item 2
1493    
1494    set user account as guest
1495    
1496    =back
1497    
1498    Return true on success, otherwise false.
1499    
1500    =cut
1501    
1502    sub set_user {
1503            my $self = shift;
1504            my ($name, $mode) = @_;
1505    
1506            return unless ($self->{url});
1507            croak "mode must be number, not '$mode'" unless ($mode =~ m/^\d+$/);
1508    
1509            $self->shuttle_url( $self->{url} . '/_set_user',
1510                    'text/plain',
1511                    'name=' . uri_escape($name) . '&mode=' . $mode,
1512                    undef
1513            ) == 200;
1514    }
1515    
1516    
1517    =head2 set_link
1518    
1519    Manage node links
1520    
1521      $node->set_link('http://localhost:1978/node/another', 'another node label', $credit);
1522    
1523    If C<$credit> is negative, link is removed.
1524    
1525    =cut
1526    
1527    sub set_link {
1528            my $self = shift;
1529            my ($url, $label, $credit) = @_;
1530    
1531            return unless ($self->{url});
1532            croak "mode credit be number, not '$credit'" unless ($credit =~ m/^\d+$/);
1533    
1534            my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1535            $reqbody .= '&credit=' . $credit if ($credit > 0);
1536    
1537            $self->shuttle_url( $self->{url} . '/_set_link',
1538                    'application/x-www-form-urlencoded',
1539                    $reqbody,
1540                    undef
1541            ) == 200;
1542    }
1543    
1544    
1545    =head1 PRIVATE METHODS
1546    
1547    You could call those directly, but you don't have to. I hope.
1548    
1549    =head2 _set_info
1550    
1551  Set information for node  Set information for node
1552    
1553    $node->set_info;    $node->_set_info;
1554    
1555  =cut  =cut
1556    
1557  sub set_info {  sub _set_info {
1558          my $self = shift;          my $self = shift;
1559    
1560          $self->{status} = -1;          $self->{status} = -1;
# Line 1448  sub set_info { Line 1569  sub set_info {
1569    
1570          return if ($rv != 200 || !$resbody);          return if ($rv != 200 || !$resbody);
1571    
1572          chomp($resbody);          # it seems that response can have multiple line endings
1573            $resbody =~ s/[\r\n]+$//;
1574    
1575          ( $self->{name}, $self->{label}, $self->{dnum}, $self->{wnum}, $self->{size} ) =          ( $self->{name}, $self->{label}, $self->{dnum}, $self->{wnum}, $self->{size} ) =
1576                  split(/\t/, $resbody, 5);                  split(/\t/, $resbody, 5);

Legend:
Removed from v.53  
changed lines
  Added in v.74

  ViewVC Help
Powered by ViewVC 1.1.26