/[Search-Estraier]/trunk/lib/Search/Estraier.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/Search/Estraier.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 69 by dpavlin, Sun Jan 8 16:49:53 2006 UTC revision 100 by dpavlin, Sat Jan 28 19:41:59 2006 UTC
# Line 4  use 5.008; Line 4  use 5.008;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    
7  our $VERSION = '0.03_1';  our $VERSION = '0.04_1';
8    
9  =head1 NAME  =head1 NAME
10    
# Line 50  Search::Estraier - pure perl module to u Line 50  Search::Estraier - pure perl module to u
50          $cond->set_phrase("rainbow AND lullaby");          $cond->set_phrase("rainbow AND lullaby");
51    
52          my $nres = $node->search($cond, 0);          my $nres = $node->search($cond, 0);
53            print "Got ", $nres->hits, " results\n";
54    
55          if (defined($nres)) {          if (defined($nres)) {
56                  # for each document in results                  # for each document in results
57                  for my $i ( 0 ... $nres->doc_num - 1 ) {                  for my $i ( 0 ... $nres->doc_num - 1 ) {
# Line 92  Remove multiple whitespaces from string, Line 94  Remove multiple whitespaces from string,
94  =cut  =cut
95    
96  sub _s {  sub _s {
97          my $text = $_[1] || return;          my $text = $_[1];
98            return unless defined($text);
99          $text =~ s/\s\s+/ /gs;          $text =~ s/\s\s+/ /gs;
100          $text =~ s/^\s+//;          $text =~ s/^\s+//;
101          $text =~ s/\s+$//;          $text =~ s/\s+$//;
# Line 157  sub new { Line 160  sub new {
160                          } elsif ($line =~ m/^$/) {                          } elsif ($line =~ m/^$/) {
161                                  $in_text = 1;                                  $in_text = 1;
162                                  next;                                  next;
163                          } elsif ($line =~ m/^(.+)=(.+)$/) {                          } elsif ($line =~ m/^(.+)=(.*)$/) {
164                                  $self->{attrs}->{ $1 } = $2;                                  $self->{attrs}->{ $1 } = $2;
165                                  next;                                  next;
166                          }                          }
167    
168                          warn "draft ignored: $line\n";                          warn "draft ignored: '$line'\n";
169                  }                  }
170          }          }
171    
# Line 320  sub dump_draft { Line 323  sub dump_draft {
323          my $draft;          my $draft;
324    
325          foreach my $attr_name (sort keys %{ $self->{attrs} }) {          foreach my $attr_name (sort keys %{ $self->{attrs} }) {
326                  $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n";                  next unless defined(my $v = $self->{attrs}->{$attr_name});
327                    $draft .= $attr_name . '=' . $v . "\n";
328          }          }
329    
330          if ($self->{kwords}) {          if ($self->{kwords}) {
# Line 368  sub delete { Line 372  sub delete {
372    
373  package Search::Estraier::Condition;  package Search::Estraier::Condition;
374    
375  use Carp qw/confess croak/;  use Carp qw/carp confess croak/;
376    
377  use Search::Estraier;  use Search::Estraier;
378  our @ISA = qw/Search::Estraier/;  our @ISA = qw/Search::Estraier/;
# Line 446  sub set_max { Line 450  sub set_max {
450    
451  =head2 set_options  =head2 set_options
452    
453    $cond->set_options( SURE => 1 );    $cond->set_options( 'SURE' );
454    
455      $cond->set_options( qw/AGITO NOIDF SIMPLE/ );
456    
457    Possible options are:
458    
459    =over 8
460    
461    =item SURE
462    
463    check every N-gram
464    
465    =item USUAL
466    
467    check every second N-gram
468    
469    =item FAST
470    
471    check every third N-gram
472    
473    =item AGITO
474    
475    check every fourth N-gram
476    
477    =item NOIDF
478    
479    don't perform TF-IDF tuning
480    
481    =item SIMPLE
482    
483    use simplified query phrase
484    
485    =back
486    
487    Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous
488    options;
489    
490    This option changed in version C<0.04> of this module. It's backwards compatibile.
491    
492  =cut  =cut
493    
494  my $options = {  my $options = {
         # check N-gram keys skipping by three  
495          SURE => 1 << 0,          SURE => 1 << 0,
         # check N-gram keys skipping by two  
496          USUAL => 1 << 1,          USUAL => 1 << 1,
         # without TF-IDF tuning  
497          FAST => 1 << 2,          FAST => 1 << 2,
         # with the simplified phrase  
498          AGITO => 1 << 3,          AGITO => 1 << 3,
         # check every N-gram key  
499          NOIDF => 1 << 4,          NOIDF => 1 << 4,
         # check N-gram keys skipping by one  
500          SIMPLE => 1 << 10,          SIMPLE => 1 << 10,
501  };  };
502    
503  sub set_options {  sub set_options {
504          my $self = shift;          my $self = shift;
505          my $option = shift;          my $opt = 0;
506          confess "unknown option" unless ($options->{$option});          foreach my $option (@_) {
507          $self->{options} ||= $options->{$option};                  my $mask;
508                    unless ($mask = $options->{$option}) {
509                            if ($option eq '1') {
510                                    next;
511                            } else {
512                                    croak "unknown option $option";
513                            }
514                    }
515                    $opt += $mask;
516            }
517            $self->{options} = $opt;
518  }  }
519    
520    
# Line 691  Return number of documents Line 736  Return number of documents
736    
737    print $res->doc_num;    print $res->doc_num;
738    
739    This will return real number of documents (limited by C<max>).
740    If you want to get total number of hits, see C<hits>.
741    
742  =cut  =cut
743    
744  sub doc_num {  sub doc_num {
# Line 722  sub get_doc { Line 770  sub get_doc {
770    
771  Return specific hint from results.  Return specific hint from results.
772    
773    print $rec->hint( 'VERSION' );    print $res->hint( 'VERSION' );
774    
775  Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,  Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>,
776  C<TIME>, C<LINK#n>, C<VIEW>.  C<TIME>, C<LINK#n>, C<VIEW>.
# Line 735  sub hint { Line 783  sub hint {
783          return $self->{hints}->{$key};          return $self->{hints}->{$key};
784  }  }
785    
786    =head2 hits
787    
788    More perlish version of C<hint>. This one returns hash.
789    
790      my %hints = $res->hints;
791    
792    =cut
793    
794    sub hints {
795            my $self = shift;
796            return $self->{hints};
797    }
798    
799    =head2 hits
800    
801    Syntaxtic sugar for total number of hits for this query
802    
803      print $res->hits;
804    
805    It's same as
806    
807      print $res->hint('HIT');
808    
809    but shorter.
810    
811    =cut
812    
813    sub hits {
814            my $self = shift;
815            return $self->{hints}->{'HIT'} || 0;
816    }
817    
818  package Search::Estraier::Node;  package Search::Estraier::Node;
819    
# Line 754  or optionally with C<url> as parametar Line 833  or optionally with C<url> as parametar
833    
834    my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );    my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' );
835    
836    or in more verbose form
837    
838      my $node = new Search::HyperEstraier::Node(
839            url => 'http://localhost:1978/node/test',
840            debug => 1,
841            croak_on_error => 1
842      );
843    
844    with following arguments:
845    
846    =over 4
847    
848    =item url
849    
850    URL to node
851    
852    =item debug
853    
854    dumps a B<lot> of debugging output
855    
856    =item croak_on_error
857    
858    very helpful during development. It will croak on all errors instead of
859    silently returning C<-1> (which is convention of Hyper Estraier API in other
860    languages).
861    
862    =back
863    
864  =cut  =cut
865    
866  sub new {  sub new {
# Line 776  sub new { Line 883  sub new {
883          } else {          } else {
884                  my $args = {@_};                  my $args = {@_};
885    
886                  $self->{debug} = $args->{debug};                  %$self = ( %$self, @_ );
887    
888                  warn "## Node debug on\n" if ($self->{debug});                  warn "## Node debug on\n" if ($self->{debug});
889          }          }
890    
# Line 1418  sub shuttle_url { Line 1526  sub shuttle_url {
1526    
1527          $req->headers->header( 'Host' => $url->host . ":" . $url->port );          $req->headers->header( 'Host' => $url->host . ":" . $url->port );
1528          $req->headers->header( 'Connection', 'close' );          $req->headers->header( 'Connection', 'close' );
1529          $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} );          $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth});
1530          $req->content_type( $content_type );          $req->content_type( $content_type );
1531    
1532          warn $req->headers->as_string,"\n" if ($self->{debug});          warn $req->headers->as_string,"\n" if ($self->{debug});
# Line 1432  sub shuttle_url { Line 1540  sub shuttle_url {
1540    
1541          warn "## response status: ",$res->status_line,"\n" if ($self->{debug});          warn "## response status: ",$res->status_line,"\n" if ($self->{debug});
1542    
         return -1 if (! $res->is_success);  
   
1543          ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);          ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2);
1544    
1545            if (! $res->is_success) {
1546                    if ($self->{croak_on_error}) {
1547                            croak("can't get $url: ",$res->status_line);
1548                    } else {
1549                            return -1;
1550                    }
1551            }
1552    
1553          $$resbody .= $res->content;          $$resbody .= $res->content;
1554    
1555          warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});          warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});
# Line 1535  sub set_link { Line 1649  sub set_link {
1649          $reqbody .= '&credit=' . $credit if ($credit > 0);          $reqbody .= '&credit=' . $credit if ($credit > 0);
1650    
1651          $self->shuttle_url( $self->{url} . '/_set_link',          $self->shuttle_url( $self->{url} . '/_set_link',
1652                  'text/plain',                  'application/x-www-form-urlencoded',
1653                  $reqbody,                  $reqbody,
1654                  undef                  undef
1655          ) == 200;          ) == 200;

Legend:
Removed from v.69  
changed lines
  Added in v.100

  ViewVC Help
Powered by ViewVC 1.1.26