--- trunk/Estraier.pm 2006/01/16 21:34:14 77 +++ trunk/Estraier.pm 2006/01/28 19:18:13 98 @@ -92,7 +92,8 @@ =cut sub _s { - my $text = $_[1] || return; + my $text = $_[1]; + return unless defined($text); $text =~ s/\s\s+/ /gs; $text =~ s/^\s+//; $text =~ s/\s+$//; @@ -157,12 +158,12 @@ } elsif ($line =~ m/^$/) { $in_text = 1; next; - } elsif ($line =~ m/^(.+)=(.+)$/) { + } elsif ($line =~ m/^(.+)=(.*)$/) { $self->{attrs}->{ $1 } = $2; next; } - warn "draft ignored: $line\n"; + warn "draft ignored: '$line'\n"; } } @@ -320,7 +321,8 @@ my $draft; foreach my $attr_name (sort keys %{ $self->{attrs} }) { - $draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n"; + next unless defined(my $v = $self->{attrs}->{$attr_name}); + $draft .= $attr_name . '=' . $v . "\n"; } if ($self->{kwords}) { @@ -368,7 +370,7 @@ package Search::Estraier::Condition; -use Carp qw/confess croak/; +use Carp qw/carp confess croak/; use Search::Estraier; our @ISA = qw/Search::Estraier/; @@ -446,30 +448,71 @@ =head2 set_options - $cond->set_options( SURE => 1 ); + $cond->set_options( 'SURE' ); + + $cond->set_options( qw/AGITO NOIDF SIMPLE/ ); + +Possible options are: + +=over 8 + +=item SURE + +check every N-gram + +=item USUAL + +check every second N-gram + +=item FAST + +check every third N-gram + +=item AGITO + +check every fourth N-gram + +=item NOIDF + +don't perform TF-IDF tuning + +=item SIMPLE + +use simplified query phrase + +=back + +Skipping N-grams will speed up search, but reduce accuracy. Every call to C will reset previous +options; + +This option changed in version C<0.04> of this module. It's backwards compatibile. =cut my $options = { - # check N-gram keys skipping by three SURE => 1 << 0, - # check N-gram keys skipping by two USUAL => 1 << 1, - # without TF-IDF tuning FAST => 1 << 2, - # with the simplified phrase AGITO => 1 << 3, - # check every N-gram key NOIDF => 1 << 4, - # check N-gram keys skipping by one SIMPLE => 1 << 10, }; sub set_options { my $self = shift; - my $option = shift; - confess "unknown option" unless ($options->{$option}); - $self->{options} ||= $options->{$option}; + my $opt = 0; + foreach my $option (@_) { + my $mask; + unless ($mask = $options->{$option}) { + if ($option eq '1') { + next; + } else { + croak "unknown option $option"; + } + } + $opt += $mask; + } + $self->{options} = $opt; } @@ -735,6 +778,18 @@ return $self->{hints}->{$key}; } +=head2 hints + +More perlish version of C. This one returns hash. + + my %hints = $rec->hints; + +=cut + +sub hints { + my $self = shift; + return $self->{hints}; +} package Search::Estraier::Node; @@ -754,6 +809,34 @@ my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' ); +or in more verbose form + + my $node = new Search::HyperEstraier::Node( + url => 'http://localhost:1978/node/test', + debug => 1, + croak_on_error => 1 + ); + +with following arguments: + +=over 4 + +=item url + +URL to node + +=item debug + +dumps a B of debugging output + +=item croak_on_error + +very helpful during development. It will croak on all errors instead of +silently returning C<-1> (which is convention of Hyper Estraier API in other +languages). + +=back + =cut sub new { @@ -776,7 +859,8 @@ } else { my $args = {@_}; - $self->{debug} = $args->{debug}; + %$self = ( %$self, @_ ); + warn "## Node debug on\n" if ($self->{debug}); } @@ -1434,7 +1518,13 @@ ($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2); - return -1 if (! $res->is_success); + if (! $res->is_success) { + if ($self->{croak_on_error}) { + croak("can't get $url: ",$res->status_line); + } else { + return -1; + } + } $$resbody .= $res->content;