/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 97 by dpavlin, Sat Jan 28 18:19:47 2006 UTC revision 111 by dpavlin, Tue Feb 21 15:41:57 2006 UTC
# Line 4  use 5.008; Line 4  use 5.008;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    
7  our $VERSION = '0.04_1';  our $VERSION = '0.04_3';
8    
9  =head1 NAME  =head1 NAME
10    
# Line 17  Search::Estraier - pure perl module to u Line 17  Search::Estraier - pure perl module to u
17          use Search::Estraier;          use Search::Estraier;
18    
19          # create and configure node          # create and configure node
20          my $node = new Search::Estraier::Node;          my $node = new Search::Estraier::Node(
21          $node->set_url("http://localhost:1978/node/test");                  url => 'http://localhost:1978/node/test',
22          $node->set_auth("admin","admin");                  user => 'admin',
23                    passwd => 'admin'
24            );
25    
26          # create document          # create document
27          my $doc = new Search::Estraier::Document;          my $doc = new Search::Estraier::Document;
# Line 32  Search::Estraier - pure perl module to u Line 34  Search::Estraier - pure perl module to u
34          $doc->add_text("Somewhere over the rainbow.  Way up high.");          $doc->add_text("Somewhere over the rainbow.  Way up high.");
35          $doc->add_text("There's a land that I heard of once in a lullaby.");          $doc->add_text("There's a land that I heard of once in a lullaby.");
36    
37          die "error: ", $node->status,"\n" unless ($node->put_doc($doc));          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
38    
39  =head2 Simple searcher  =head2 Simple searcher
40    
41          use Search::Estraier;          use Search::Estraier;
42    
43          # create and configure node          # create and configure node
44          my $node = new Search::Estraier::Node;          my $node = new Search::Estraier::Node(
45          $node->set_url("http://localhost:1978/node/test");                  url => 'http://localhost:1978/node/test',
46          $node->set_auth("admin","admin");                  user => 'admin',
47                    passwd => 'admin',
48                    croak_on_error => 1,
49            );
50    
51          # create condition          # create condition
52          my $cond = new Search::Estraier::Condition;          my $cond = new Search::Estraier::Condition;
# Line 50  Search::Estraier - pure perl module to u Line 55  Search::Estraier - pure perl module to u
55          $cond->set_phrase("rainbow AND lullaby");          $cond->set_phrase("rainbow AND lullaby");
56    
57          my $nres = $node->search($cond, 0);          my $nres = $node->search($cond, 0);
58    
59          if (defined($nres)) {          if (defined($nres)) {
60                    print "Got ", $nres->hits, " results\n";
61    
62                  # for each document in results                  # for each document in results
63                  for my $i ( 0 ... $nres->doc_num - 1 ) {                  for my $i ( 0 ... $nres->doc_num - 1 ) {
64                          # get result document                          # get result document
# Line 370  sub delete { Line 378  sub delete {
378    
379  package Search::Estraier::Condition;  package Search::Estraier::Condition;
380    
381  use Carp qw/confess croak/;  use Carp qw/carp confess croak/;
382    
383  use Search::Estraier;  use Search::Estraier;
384  our @ISA = qw/Search::Estraier/;  our @ISA = qw/Search::Estraier/;
# Line 448  sub set_max { Line 456  sub set_max {
456    
457  =head2 set_options  =head2 set_options
458    
459    $cond->set_options( SURE => 1 );    $cond->set_options( 'SURE' );
460    
461      $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
462    
463    Possible options are:
464    
465    =over 8
466    
467    =item SURE
468    
469    check every N-gram
470    
471    =item USUAL
472    
473    check every second N-gram
474    
475    =item FAST
476    
477    check every third N-gram
478    
479    =item AGITO
480    
481    check every fourth N-gram
482    
483    =item NOIDF
484    
485    don't perform TF-IDF tuning
486    
487    =item SIMPLE
488    
489    use simplified query phrase
490    
491    =back
492    
493    Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
494    options;
495    
496    This option changed in version C<0.04> of this module. It's backwards compatibile.
497    
498  =cut  =cut
499    
500  my $options = {  my $options = {
         # check N-gram keys skipping by three  
501          SURE => 1 << 0,          SURE => 1 << 0,
         # check N-gram keys skipping by two  
502          USUAL => 1 << 1,          USUAL => 1 << 1,
         # without TF-IDF tuning  
503          FAST => 1 << 2,          FAST => 1 << 2,
         # with the simplified phrase  
504          AGITO => 1 << 3,          AGITO => 1 << 3,
         # check every N-gram key  
505          NOIDF => 1 << 4,          NOIDF => 1 << 4,
         # check N-gram keys skipping by one  
506          SIMPLE => 1 << 10,          SIMPLE => 1 << 10,
507  };  };
508    
509  sub set_options {  sub set_options {
510          my $self = shift;          my $self = shift;
511          my $option = shift;          my $opt = 0;
512          confess "unknown option" unless ($options->{$option});          foreach my $option (@_) {
513          $self->{options} ||= $options->{$option};                  my $mask;
514                    unless ($mask = $options->{$option}) {
515                            if ($option eq '1') {
516                                    next;
517                            } else {
518                                    croak "unknown option $option";
519                            }
520                    }
521                    $opt += $mask;
522            }
523            $self->{options} = $opt;
524  }  }
525    
526    
# Line 693  Return number of documents Line 742  Return number of documents
742    
743    print $res->doc_num;    print $res->doc_num;
744    
745    This will return real number of documents (limited by C<max>).
746    If you want to get total number of hits, see C<hits>.
747    
748  =cut  =cut
749    
750  sub doc_num {  sub doc_num {
# Line 724  sub get_doc { Line 776  sub get_doc {
776    
777  Return specific hint from results.  Return specific hint from results.
778    
779    print $rec->hint( 'VERSION' );    print $res->hint( 'VERSION' );
780    
781  Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,  Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
782  C<TIME>, C<LINK#n>, C<VIEW>.  C<TIME>, C<LINK#n>, C<VIEW>.
# Line 741  sub hint { Line 793  sub hint {
793    
794  More perlish version of C<hint>. This one returns hash.  More perlish version of C<hint>. This one returns hash.
795    
796    my %hints = $rec->hints;    my %hints = $res->hints;
797    
798  =cut  =cut
799    
# Line 750  sub hints { Line 802  sub hints {
802          return $self->{hints};          return $self->{hints};
803  }  }
804    
805    =head2 hits
806    
807    Syntaxtic sugar for total number of hits for this query
808    
809      print $res->hits;
810    
811    It's same as
812    
813      print $res->hint('HIT');
814    
815    but shorter.
816    
817    =cut
818    
819    sub hits {
820            my $self = shift;
821            return $self->{hints}->{'HIT'} || 0;
822    }
823    
824  package Search::Estraier::Node;  package Search::Estraier::Node;
825    
826  use Carp qw/carp croak confess/;  use Carp qw/carp croak confess/;
# Line 803  sub new { Line 874  sub new {
874          my $self = {          my $self = {
875                  pxport => -1,                  pxport => -1,
876                  timeout => 0,   # this used to be -1                  timeout => 0,   # this used to be -1
                 dnum => -1,  
                 wnum => -1,  
                 size => -1.0,  
877                  wwidth => 480,                  wwidth => 480,
878                  hwidth => 96,                  hwidth => 96,
879                  awidth => 96,                  awidth => 96,
880                  status => -1,                  status => -1,
881          };          };
882    
883          bless($self, $class);          bless($self, $class);
884    
885          if ($#_ == 0) {          if ($#_ == 0) {
# Line 823  sub new { Line 892  sub new {
892                  warn "## Node debug on\n" if ($self->{debug});                  warn "## Node debug on\n" if ($self->{debug});
893          }          }
894    
895            $self->{inform} = {
896                    dnum => -1,
897                    wnum => -1,
898                    size => -1.0,
899            };
900    
901          $self ? return $self : return undef;          $self ? return $self : return undef;
902  }  }
903    
# Line 1101  Get ID of document specified by URI Line 1176  Get ID of document specified by URI
1176    
1177    my $id = $node->uri_to_id( 'file:///document/uri/42' );    my $id = $node->uri_to_id( 'file:///document/uri/42' );
1178    
1179    This method won't croak, even if using C<croak_on_error>.
1180    
1181  =cut  =cut
1182    
1183  sub uri_to_id {  sub uri_to_id {
1184          my $self = shift;          my $self = shift;
1185          my $uri = shift || return;          my $uri = shift || return;
1186          return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1 );          return $self->_fetch_doc( uri => $uri, path => '/uri_to_id', chomp_resbody => 1, croak_on_error => 0 );
1187  }  }
1188    
1189    
# Line 1166  sub _fetch_doc { Line 1243  sub _fetch_doc {
1243                  'application/x-www-form-urlencoded',                  'application/x-www-form-urlencoded',
1244                  $arg,                  $arg,
1245                  \$resbody,                  \$resbody,
1246                    $a->{croak_on_error},
1247          );          );
1248    
1249          return if ($rv != 200);          return if ($rv != 200);
# Line 1196  sub _fetch_doc { Line 1274  sub _fetch_doc {
1274    
1275  sub name {  sub name {
1276          my $self = shift;          my $self = shift;
1277          $self->_set_info unless ($self->{name});          $self->_set_info unless ($self->{inform}->{name});
1278          return $self->{name};          return $self->{inform}->{name};
1279  }  }
1280    
1281    
# Line 1209  sub name { Line 1287  sub name {
1287    
1288  sub label {  sub label {
1289          my $self = shift;          my $self = shift;
1290          $self->_set_info unless ($self->{label});          $self->_set_info unless ($self->{inform}->{label});
1291          return $self->{label};          return $self->{inform}->{label};
1292  }  }
1293    
1294    
# Line 1222  sub label { Line 1300  sub label {
1300    
1301  sub doc_num {  sub doc_num {
1302          my $self = shift;          my $self = shift;
1303          $self->_set_info if ($self->{dnum} < 0);          $self->_set_info if ($self->{inform}->{dnum} < 0);
1304          return $self->{dnum};          return $self->{inform}->{dnum};
1305  }  }
1306    
1307    
# Line 1235  sub doc_num { Line 1313  sub doc_num {
1313    
1314  sub word_num {  sub word_num {
1315          my $self = shift;          my $self = shift;
1316          $self->_set_info if ($self->{wnum} < 0);          $self->_set_info if ($self->{inform}->{wnum} < 0);
1317          return $self->{wnum};          return $self->{inform}->{wnum};
1318  }  }
1319    
1320    
# Line 1248  sub word_num { Line 1326  sub word_num {
1326    
1327  sub size {  sub size {
1328          my $self = shift;          my $self = shift;
1329          $self->_set_info if ($self->{size} < 0);          $self->_set_info if ($self->{inform}->{size} < 0);
1330          return $self->{size};          return $self->{inform}->{size};
1331  }  }
1332    
1333    
# Line 1434  use LWP::UserAgent; Line 1512  use LWP::UserAgent;
1512  sub shuttle_url {  sub shuttle_url {
1513          my $self = shift;          my $self = shift;
1514    
1515          my ($url, $content_type, $reqbody, $resbody) = @_;          my ($url, $content_type, $reqbody, $resbody, $croak_on_error) = @_;
1516    
1517            $croak_on_error = $self->{croak_on_error} unless defined($croak_on_error);
1518    
1519          $self->{status} = -1;          $self->{status} = -1;
1520    
# Line 1478  sub shuttle_url { Line 1558  sub shuttle_url {
1558          ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);          ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1559    
1560          if (! $res->is_success) {          if (! $res->is_success) {
1561                  if ($self->{croak_on_error}) {                  if ($croak_on_error) {
1562                          croak("can't get $url: ",$res->status_line);                          croak("can't get $url: ",$res->status_line);
1563                  } else {                  } else {
1564                          return -1;                          return -1;
# Line 1583  sub set_link { Line 1663  sub set_link {
1663          my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);          my $reqbody = 'url=' . uri_escape($url) . '&label=' . uri_escape($label);
1664          $reqbody .= '&credit=' . $credit if ($credit > 0);          $reqbody .= '&credit=' . $credit if ($credit > 0);
1665    
1666          $self->shuttle_url( $self->{url} . '/_set_link',          if ($self->shuttle_url( $self->{url} . '/_set_link',
1667                  'application/x-www-form-urlencoded',                  'application/x-www-form-urlencoded',
1668                  $reqbody,                  $reqbody,
1669                  undef                  undef
1670          ) == 200;          ) == 200) {
1671                    # refresh node info after adding link
1672                    $self->_set_info;
1673                    return 1;
1674            }
1675    }
1676    
1677    =head2 admins
1678    
1679     my @admins = @{ $node->admins };
1680    
1681    Return array of users with admin rights on node
1682    
1683    =cut
1684    
1685    sub admins {
1686            my $self = shift;
1687            $self->_set_info unless ($self->{inform}->{name});
1688            return $self->{inform}->{admins};
1689    }
1690    
1691    =head2 guests
1692    
1693     my @guests = @{ $node->guests };
1694    
1695    Return array of users with guest rights on node
1696    
1697    =cut
1698    
1699    sub guests {
1700            my $self = shift;
1701            $self->_set_info unless ($self->{inform}->{name});
1702            return $self->{inform}->{guests};
1703    }
1704    
1705    =head2 links
1706    
1707     my $links = @{ $node->links };
1708    
1709    Return array of links for this node
1710    
1711    =cut
1712    
1713    sub links {
1714            my $self = shift;
1715            $self->_set_info unless ($self->{inform}->{name});
1716            return $self->{inform}->{links};
1717  }  }
1718    
1719    
# Line 1618  sub _set_info { Line 1744  sub _set_info {
1744    
1745          return if ($rv != 200 || !$resbody);          return if ($rv != 200 || !$resbody);
1746    
1747          # it seems that response can have multiple line endings          my @lines = split(/[\r\n]/,$resbody);
1748          $resbody =~ s/[\r\n]+$//;  
1749            $self->{inform} = {};
1750    
1751            ( $self->{inform}->{name}, $self->{inform}->{label}, $self->{inform}->{dnum},
1752                    $self->{inform}->{wnum}, $self->{inform}->{size} ) = split(/\t/, shift @lines, 5);
1753    
1754            return $resbody unless (@lines);
1755    
1756            shift @lines;
1757    
1758            while(my $admin = shift @lines) {
1759                    push @{$self->{inform}->{admins}}, $admin;
1760            }
1761    
1762            while(my $guest = shift @lines) {
1763                    push @{$self->{inform}->{guests}}, $guest;
1764            }
1765    
1766            while(my $link = shift @lines) {
1767                    push @{$self->{inform}->{links}}, $link;
1768            }
1769    
1770          ( $self->{name}, $self->{label}, $self->{dnum}, $self->{wnum}, $self->{size} ) =          return $resbody;
                 split(/\t/, $resbody, 5);  
1771    
1772  }  }
1773    

Legend:
Removed from v.97  
changed lines
  Added in v.111

  ViewVC Help
Powered by ViewVC 1.1.26