/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 68 by dpavlin, Sun Jan 8 00:13:09 2006 UTC revision 102 by dpavlin, Sat Jan 28 19:46:20 2006 UTC
# Line 4  use 5.008; Line 4  use 5.008;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    
7  our $VERSION = '0.02';  our $VERSION = '0.04_1';
8    
9  =head1 NAME  =head1 NAME
10    
# Line 17  Search::Estraier - pure perl module to u Line 17  Search::Estraier - pure perl module to u
17          use Search::Estraier;          use Search::Estraier;
18    
19          # create and configure node          # create and configure node
20          my $node = new Search::Estraier::Node;          my $node = new Search::Estraier::Node(
21          $node->set_url("http://localhost:1978/node/test");                  url => 'http://localhost:1978/node/test',
22          $node->set_auth("admin","admin");                  user => 'admin',
23                    passwd => 'admin'
24            );
25    
26          # create document          # create document
27          my $doc = new Search::Estraier::Document;          my $doc = new Search::Estraier::Document;
# Line 32  Search::Estraier - pure perl module to u Line 34  Search::Estraier - pure perl module to u
34          $doc->add_text("Somewhere over the rainbow.  Way up high.");          $doc->add_text("Somewhere over the rainbow.  Way up high.");
35          $doc->add_text("There's a land that I heard of once in a lullaby.");          $doc->add_text("There's a land that I heard of once in a lullaby.");
36    
37          die "error: ", $node->status,"\n" unless ($node->put_doc($doc));          die "error: ", $node->status,"\n" unless (eval { $node->put_doc($doc) });
38    
39  =head2 Simple searcher  =head2 Simple searcher
40    
41          use Search::Estraier;          use Search::Estraier;
42    
43          # create and configure node          # create and configure node
44          my $node = new Search::Estraier::Node;          my $node = new Search::Estraier::Node(
45          $node->set_url("http://localhost:1978/node/test");                  url => 'http://localhost:1978/node/test',
46          $node->set_auth("admin","admin");                  user => 'admin',
47                    passwd => 'admin',
48                    croak_on_error => 1,
49            );
50    
51          # create condition          # create condition
52          my $cond = new Search::Estraier::Condition;          my $cond = new Search::Estraier::Condition;
# Line 50  Search::Estraier - pure perl module to u Line 55  Search::Estraier - pure perl module to u
55          $cond->set_phrase("rainbow AND lullaby");          $cond->set_phrase("rainbow AND lullaby");
56    
57          my $nres = $node->search($cond, 0);          my $nres = $node->search($cond, 0);
58    
59          if (defined($nres)) {          if (defined($nres)) {
60                    print "Got ", $nres->hits, " results\n";
61    
62                  # for each document in results                  # for each document in results
63                  for my $i ( 0 ... $nres->doc_num - 1 ) {                  for my $i ( 0 ... $nres->doc_num - 1 ) {
64                          # get result document                          # get result document
# Line 92  Remove multiple whitespaces from string, Line 100  Remove multiple whitespaces from string,
100  =cut  =cut
101    
102  sub _s {  sub _s {
103          my $text = $_[1] || return;          my $text = $_[1];
104            return unless defined($text);
105          $text =~ s/\s\s+/ /gs;          $text =~ s/\s\s+/ /gs;
106          $text =~ s/^\s+//;          $text =~ s/^\s+//;
107          $text =~ s/\s+$//;          $text =~ s/\s+$//;
# Line 157  sub new { Line 166  sub new {
166                          } elsif ($line =~ m/^$/) {                          } elsif ($line =~ m/^$/) {
167                                  $in_text = 1;                                  $in_text = 1;
168                                  next;                                  next;
169                          } elsif ($line =~ m/^(.+)=(.+)$/) {                          } elsif ($line =~ m/^(.+)=(.*)$/) {
170                                  $self->{attrs}->{ $1 } = $2;                                  $self->{attrs}->{ $1 } = $2;
171                                  next;                                  next;
172                          }                          }
173    
174                          warn "draft ignored: $line\n";                          warn "draft ignored: '$line'\n";
175                  }                  }
176          }          }
177    
# Line 320  sub dump_draft { Line 329  sub dump_draft {
329          my $draft;          my $draft;
330    
331          foreach my $attr_name (sort keys %{ $self->{attrs} }) {          foreach my $attr_name (sort keys %{ $self->{attrs} }) {
332                  $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";                  next unless defined(my $v = $self->{attrs}->{$attr_name});
333                    $draft .= $attr_name . '=' . $v . "\n";
334          }          }
335    
336          if ($self->{kwords}) {          if ($self->{kwords}) {
# Line 368  sub delete { Line 378  sub delete {
378    
379  package Search::Estraier::Condition;  package Search::Estraier::Condition;
380    
381  use Carp qw/confess croak/;  use Carp qw/carp confess croak/;
382    
383  use Search::Estraier;  use Search::Estraier;
384  our @ISA = qw/Search::Estraier/;  our @ISA = qw/Search::Estraier/;
# Line 446  sub set_max { Line 456  sub set_max {
456    
457  =head2 set_options  =head2 set_options
458    
459    $cond->set_options( SURE => 1 );    $cond->set_options( 'SURE' );
460    
461      $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
462    
463    Possible options are:
464    
465    =over 8
466    
467    =item SURE
468    
469    check every N-gram
470    
471    =item USUAL
472    
473    check every second N-gram
474    
475    =item FAST
476    
477    check every third N-gram
478    
479    =item AGITO
480    
481    check every fourth N-gram
482    
483    =item NOIDF
484    
485    don't perform TF-IDF tuning
486    
487    =item SIMPLE
488    
489    use simplified query phrase
490    
491    =back
492    
493    Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
494    options;
495    
496    This option changed in version C<0.04> of this module. It's backwards compatibile.
497    
498  =cut  =cut
499    
500  my $options = {  my $options = {
         # check N-gram keys skipping by three  
501          SURE => 1 << 0,          SURE => 1 << 0,
         # check N-gram keys skipping by two  
502          USUAL => 1 << 1,          USUAL => 1 << 1,
         # without TF-IDF tuning  
503          FAST => 1 << 2,          FAST => 1 << 2,
         # with the simplified phrase  
504          AGITO => 1 << 3,          AGITO => 1 << 3,
         # check every N-gram key  
505          NOIDF => 1 << 4,          NOIDF => 1 << 4,
         # check N-gram keys skipping by one  
506          SIMPLE => 1 << 10,          SIMPLE => 1 << 10,
507  };  };
508    
509  sub set_options {  sub set_options {
510          my $self = shift;          my $self = shift;
511          my $option = shift;          my $opt = 0;
512          confess "unknown option" unless ($options->{$option});          foreach my $option (@_) {
513          $self->{options} ||= $options->{$option};                  my $mask;
514                    unless ($mask = $options->{$option}) {
515                            if ($option eq '1') {
516                                    next;
517                            } else {
518                                    croak "unknown option $option";
519                            }
520                    }
521                    $opt += $mask;
522            }
523            $self->{options} = $opt;
524  }  }
525    
526    
# Line 691  Return number of documents Line 742  Return number of documents
742    
743    print $res->doc_num;    print $res->doc_num;
744    
745    This will return real number of documents (limited by C<max>).
746    If you want to get total number of hits, see C<hits>.
747    
748  =cut  =cut
749    
750  sub doc_num {  sub doc_num {
# Line 722  sub get_doc { Line 776  sub get_doc {
776    
777  Return specific hint from results.  Return specific hint from results.
778    
779    print $rec->hint( 'VERSION' );    print $res->hint( 'VERSION' );
780    
781  Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,  Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
782  C<TIME>, C<LINK#n>, C<VIEW>.  C<TIME>, C<LINK#n>, C<VIEW>.
# Line 735  sub hint { Line 789  sub hint {
789          return $self->{hints}->{$key};          return $self->{hints}->{$key};
790  }  }
791    
792    =head2 hits
793    
794    More perlish version of C<hint>. This one returns hash.
795    
796      my %hints = $res->hints;
797    
798    =cut
799    
800    sub hints {
801            my $self = shift;
802            return $self->{hints};
803    }
804    
805    =head2 hits
806    
807    Syntaxtic sugar for total number of hits for this query
808    
809      print $res->hits;
810    
811    It's same as
812    
813      print $res->hint('HIT');
814    
815    but shorter.
816    
817    =cut
818    
819    sub hits {
820            my $self = shift;
821            return $self->{hints}->{'HIT'} || 0;
822    }
823    
824  package Search::Estraier::Node;  package Search::Estraier::Node;
825    
# Line 754  or optionally with C<url> as parametar Line 839  or optionally with C<url> as parametar
839    
840    my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );    my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
841    
842    or in more verbose form
843    
844      my $node = new Search::HyperEstraier::Node(
845            url => 'http://localhost:1978/node/test',
846            debug => 1,
847            croak_on_error => 1
848      );
849    
850    with following arguments:
851    
852    =over 4
853    
854    =item url
855    
856    URL to node
857    
858    =item debug
859    
860    dumps a B<lot> of debugging output
861    
862    =item croak_on_error
863    
864    very helpful during development. It will croak on all errors instead of
865    silently returning C<-1> (which is convention of Hyper Estraier API in other
866    languages).
867    
868    =back
869    
870  =cut  =cut
871    
872  sub new {  sub new {
# Line 776  sub new { Line 889  sub new {
889          } else {          } else {
890                  my $args = {@_};                  my $args = {@_};
891    
892                  $self->{debug} = $args->{debug};                  %$self = ( %$self, @_ );
893    
894                  warn "## Node debug on\n" if ($self->{debug});                  warn "## Node debug on\n" if ($self->{debug});
895          }          }
896    
# Line 1418  sub shuttle_url { Line 1532  sub shuttle_url {
1532    
1533          $req->headers->header( 'Host' => $url->host . ":" . $url->port );          $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1534          $req->headers->header( 'Connection', 'close' );          $req->headers->header( 'Connection', 'close' );
1535          $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} );          $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1536          $req->content_type( $content_type );          $req->content_type( $content_type );
1537    
1538          warn $req->headers->as_string,"\n" if ($self->{debug});          warn $req->headers->as_string,"\n" if ($self->{debug});
# Line 1432  sub shuttle_url { Line 1546  sub shuttle_url {
1546    
1547          warn "## response status: ",$res->status_line,"\n" if ($self->{debug});          warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1548    
         return -1 if (! $res->is_success);  
   
1549          ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);          ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1550    
1551            if (! $res->is_success) {
1552                    if ($self->{croak_on_error}) {
1553                            croak("can't get $url: ",$res->status_line);
1554                    } else {
1555                            return -1;
1556                    }
1557            }
1558    
1559          $$resbody .= $res->content;          $$resbody .= $res->content;
1560    
1561          warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});          warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
# Line 1535  sub set_link { Line 1655  sub set_link {
1655          $reqbody .= '&credit=' . $credit if ($credit > 0);          $reqbody .= '&credit=' . $credit if ($credit > 0);
1656    
1657          $self->shuttle_url( $self->{url} . '/_set_link',          $self->shuttle_url( $self->{url} . '/_set_link',
1658                  'text/plain',                  'application/x-www-form-urlencoded',
1659                  $reqbody,                  $reqbody,
1660                  undef                  undef
1661          ) == 200;          ) == 200;

Legend:
Removed from v.68  
changed lines
  Added in v.102

  ViewVC Help
Powered by ViewVC 1.1.26