--- trunk/Estraier.pm 2006/01/09 15:28:24 74 +++ trunk/Estraier.pm 2006/01/28 19:18:13 98 @@ -4,7 +4,7 @@ use strict; use warnings; -our $VERSION = '0.03'; +our $VERSION = '0.04_1'; =head1 NAME @@ -92,7 +92,8 @@ =cut sub _s { - my $text = $_[1] || return; + my $text = $_[1]; + return unless defined($text); $text =~ s/\s\s+/ /gs; $text =~ s/^\s+//; $text =~ s/\s+$//; @@ -157,12 +158,12 @@ } elsif ($line =~ m/^$/) { $in_text = 1; next; - } elsif ($line =~ m/^(.+)=(.+)$/) { + } elsif ($line =~ m/^(.+)=(.*)$/) { $self->{attrs}->{ $1 } = $2; next; } - warn "draft ignored: $line\n"; + warn "draft ignored: '$line'\n"; } } @@ -320,7 +321,8 @@ my $draft; foreach my $attr_name (sort keys %{ $self->{attrs} }) { - $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n"; + next unless defined(my $v = $self->{attrs}->{$attr_name}); + $draft .= $attr_name . '=' . $v . "\n"; } if ($self->{kwords}) { @@ -368,7 +370,7 @@ package Search::Estraier::Condition; -use Carp qw/confess croak/; +use Carp qw/carp confess croak/; use Search::Estraier; our @ISA = qw/Search::Estraier/; @@ -446,30 +448,71 @@ =head2 set_options - $cond->set_options( SURE => 1 ); + $cond->set_options( 'SURE' ); + + $cond->set_options( qw/AGITO NOIDF SIMPLE/ ); + +Possible options are: + +=over 8 + +=item SURE + +check every N-gram + +=item USUAL + +check every second N-gram + +=item FAST + +check every third N-gram + +=item AGITO + +check every fourth N-gram + +=item NOIDF + +don't perform TF-IDF tuning + +=item SIMPLE + +use simplified query phrase + +=back + +Skipping N-grams will speed up search, but reduce accuracy. Every call to C will reset previous +options; + +This option changed in version C<0.04> of this module. It's backwards compatibile. =cut my $options = { - # check N-gram keys skipping by three SURE => 1 << 0, - # check N-gram keys skipping by two USUAL => 1 << 1, - # without TF-IDF tuning FAST => 1 << 2, - # with the simplified phrase AGITO => 1 << 3, - # check every N-gram key NOIDF => 1 << 4, - # check N-gram keys skipping by one SIMPLE => 1 << 10, }; sub set_options { my $self = shift; - my $option = shift; - confess "unknown option" unless ($options->{$option}); - $self->{options} ||= $options->{$option}; + my $opt = 0; + foreach my $option (@_) { + my $mask; + unless ($mask = $options->{$option}) { + if ($option eq '1') { + next; + } else { + croak "unknown option $option"; + } + } + $opt += $mask; + } + $self->{options} = $opt; } @@ -735,6 +778,18 @@ return $self->{hints}->{$key}; } +=head2 hints + +More perlish version of C. This one returns hash. + + my %hints = $rec->hints; + +=cut + +sub hints { + my $self = shift; + return $self->{hints}; +} package Search::Estraier::Node; @@ -754,6 +809,34 @@ my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' ); +or in more verbose form + + my $node = new Search::HyperEstraier::Node( + url => 'http://localhost:1978/node/test', + debug => 1, + croak_on_error => 1 + ); + +with following arguments: + +=over 4 + +=item url + +URL to node + +=item debug + +dumps a B of debugging output + +=item croak_on_error + +very helpful during development. It will croak on all errors instead of +silently returning C<-1> (which is convention of Hyper Estraier API in other +languages). + +=back + =cut sub new { @@ -776,7 +859,8 @@ } else { my $args = {@_}; - $self->{debug} = $args->{debug}; + %$self = ( %$self, @_ ); + warn "## Node debug on\n" if ($self->{debug}); } @@ -1418,7 +1502,7 @@ $req->headers->header( 'Host' => $url->host . ":" . $url->port ); $req->headers->header( 'Connection', 'close' ); - $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ); + $req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth}); $req->content_type( $content_type ); warn $req->headers->as_string,"\n" if ($self->{debug}); @@ -1432,10 +1516,16 @@ warn "## response status: ",$res->status_line,"\n" if ($self->{debug}); - return -1 if (! $res->is_success); - ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2); + if (! $res->is_success) { + if ($self->{croak_on_error}) { + croak("can't get $url: ",$res->status_line); + } else { + return -1; + } + } + $$resbody .= $res->content; warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug});