4 |
use strict; |
use strict; |
5 |
use warnings; |
use warnings; |
6 |
|
|
7 |
our $VERSION = '0.03_1'; |
our $VERSION = '0.04_1'; |
8 |
|
|
9 |
=head1 NAME |
=head1 NAME |
10 |
|
|
50 |
$cond->set_phrase("rainbow AND lullaby"); |
$cond->set_phrase("rainbow AND lullaby"); |
51 |
|
|
52 |
my $nres = $node->search($cond, 0); |
my $nres = $node->search($cond, 0); |
53 |
|
print "Got ", $nres->hits, " results\n"; |
54 |
|
|
55 |
if (defined($nres)) { |
if (defined($nres)) { |
56 |
# for each document in results |
# for each document in results |
57 |
for my $i ( 0 ... $nres->doc_num - 1 ) { |
for my $i ( 0 ... $nres->doc_num - 1 ) { |
94 |
=cut |
=cut |
95 |
|
|
96 |
sub _s { |
sub _s { |
97 |
my $text = $_[1] || return; |
my $text = $_[1]; |
98 |
|
return unless defined($text); |
99 |
$text =~ s/\s\s+/ /gs; |
$text =~ s/\s\s+/ /gs; |
100 |
$text =~ s/^\s+//; |
$text =~ s/^\s+//; |
101 |
$text =~ s/\s+$//; |
$text =~ s/\s+$//; |
160 |
} elsif ($line =~ m/^$/) { |
} elsif ($line =~ m/^$/) { |
161 |
$in_text = 1; |
$in_text = 1; |
162 |
next; |
next; |
163 |
} elsif ($line =~ m/^(.+)=(.+)$/) { |
} elsif ($line =~ m/^(.+)=(.*)$/) { |
164 |
$self->{attrs}->{ $1 } = $2; |
$self->{attrs}->{ $1 } = $2; |
165 |
next; |
next; |
166 |
} |
} |
167 |
|
|
168 |
warn "draft ignored: $line\n"; |
warn "draft ignored: '$line'\n"; |
169 |
} |
} |
170 |
} |
} |
171 |
|
|
323 |
my $draft; |
my $draft; |
324 |
|
|
325 |
foreach my $attr_name (sort keys %{ $self->{attrs} }) { |
foreach my $attr_name (sort keys %{ $self->{attrs} }) { |
326 |
$draft .= $attr_name . '=' . $self->{attrs}->{$attr_name} . "\n"; |
next unless defined(my $v = $self->{attrs}->{$attr_name}); |
327 |
|
$draft .= $attr_name . '=' . $v . "\n"; |
328 |
} |
} |
329 |
|
|
330 |
if ($self->{kwords}) { |
if ($self->{kwords}) { |
372 |
|
|
373 |
package Search::Estraier::Condition; |
package Search::Estraier::Condition; |
374 |
|
|
375 |
use Carp qw/confess croak/; |
use Carp qw/carp confess croak/; |
376 |
|
|
377 |
use Search::Estraier; |
use Search::Estraier; |
378 |
our @ISA = qw/Search::Estraier/; |
our @ISA = qw/Search::Estraier/; |
450 |
|
|
451 |
=head2 set_options |
=head2 set_options |
452 |
|
|
453 |
$cond->set_options( SURE => 1 ); |
$cond->set_options( 'SURE' ); |
454 |
|
|
455 |
|
$cond->set_options( qw/AGITO NOIDF SIMPLE/ ); |
456 |
|
|
457 |
|
Possible options are: |
458 |
|
|
459 |
|
=over 8 |
460 |
|
|
461 |
|
=item SURE |
462 |
|
|
463 |
|
check every N-gram |
464 |
|
|
465 |
|
=item USUAL |
466 |
|
|
467 |
|
check every second N-gram |
468 |
|
|
469 |
|
=item FAST |
470 |
|
|
471 |
|
check every third N-gram |
472 |
|
|
473 |
|
=item AGITO |
474 |
|
|
475 |
|
check every fourth N-gram |
476 |
|
|
477 |
|
=item NOIDF |
478 |
|
|
479 |
|
don't perform TF-IDF tuning |
480 |
|
|
481 |
|
=item SIMPLE |
482 |
|
|
483 |
|
use simplified query phrase |
484 |
|
|
485 |
|
=back |
486 |
|
|
487 |
|
Skipping N-grams will speed up search, but reduce accuracy. Every call to C<set_options> will reset previous |
488 |
|
options; |
489 |
|
|
490 |
|
This option changed in version C<0.04> of this module. It's backwards compatibile. |
491 |
|
|
492 |
=cut |
=cut |
493 |
|
|
494 |
my $options = { |
my $options = { |
|
# check N-gram keys skipping by three |
|
495 |
SURE => 1 << 0, |
SURE => 1 << 0, |
|
# check N-gram keys skipping by two |
|
496 |
USUAL => 1 << 1, |
USUAL => 1 << 1, |
|
# without TF-IDF tuning |
|
497 |
FAST => 1 << 2, |
FAST => 1 << 2, |
|
# with the simplified phrase |
|
498 |
AGITO => 1 << 3, |
AGITO => 1 << 3, |
|
# check every N-gram key |
|
499 |
NOIDF => 1 << 4, |
NOIDF => 1 << 4, |
|
# check N-gram keys skipping by one |
|
500 |
SIMPLE => 1 << 10, |
SIMPLE => 1 << 10, |
501 |
}; |
}; |
502 |
|
|
503 |
sub set_options { |
sub set_options { |
504 |
my $self = shift; |
my $self = shift; |
505 |
my $option = shift; |
my $opt = 0; |
506 |
confess "unknown option" unless ($options->{$option}); |
foreach my $option (@_) { |
507 |
$self->{options} ||= $options->{$option}; |
my $mask; |
508 |
|
unless ($mask = $options->{$option}) { |
509 |
|
if ($option eq '1') { |
510 |
|
next; |
511 |
|
} else { |
512 |
|
croak "unknown option $option"; |
513 |
|
} |
514 |
|
} |
515 |
|
$opt += $mask; |
516 |
|
} |
517 |
|
$self->{options} = $opt; |
518 |
} |
} |
519 |
|
|
520 |
|
|
736 |
|
|
737 |
print $res->doc_num; |
print $res->doc_num; |
738 |
|
|
739 |
|
This will return real number of documents (limited by C<max>). |
740 |
|
If you want to get total number of hits, see C<hits>. |
741 |
|
|
742 |
=cut |
=cut |
743 |
|
|
744 |
sub doc_num { |
sub doc_num { |
770 |
|
|
771 |
Return specific hint from results. |
Return specific hint from results. |
772 |
|
|
773 |
print $rec->hint( 'VERSION' ); |
print $res->hint( 'VERSION' ); |
774 |
|
|
775 |
Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>, |
Possible hints are: C<VERSION>, C<NODE>, C<HIT>, C<HINT#n>, C<DOCNUM>, C<WORDNUM>, |
776 |
C<TIME>, C<LINK#n>, C<VIEW>. |
C<TIME>, C<LINK#n>, C<VIEW>. |
783 |
return $self->{hints}->{$key}; |
return $self->{hints}->{$key}; |
784 |
} |
} |
785 |
|
|
786 |
|
=head2 hits |
787 |
|
|
788 |
|
More perlish version of C<hint>. This one returns hash. |
789 |
|
|
790 |
|
my %hints = $res->hints; |
791 |
|
|
792 |
|
=cut |
793 |
|
|
794 |
|
sub hints { |
795 |
|
my $self = shift; |
796 |
|
return $self->{hints}; |
797 |
|
} |
798 |
|
|
799 |
|
=head2 hits |
800 |
|
|
801 |
|
Syntaxtic sugar for total number of hits for this query |
802 |
|
|
803 |
|
print $res->hits; |
804 |
|
|
805 |
|
It's same as |
806 |
|
|
807 |
|
print $res->hint('HIT'); |
808 |
|
|
809 |
|
but shorter. |
810 |
|
|
811 |
|
=cut |
812 |
|
|
813 |
|
sub hits { |
814 |
|
my $self = shift; |
815 |
|
return $self->{hints}->{'HIT'} || 0; |
816 |
|
} |
817 |
|
|
818 |
package Search::Estraier::Node; |
package Search::Estraier::Node; |
819 |
|
|
833 |
|
|
834 |
my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' ); |
my $node = new Search::HyperEstraier::Node( 'http://localhost:1978/node/test' ); |
835 |
|
|
836 |
|
or in more verbose form |
837 |
|
|
838 |
|
my $node = new Search::HyperEstraier::Node( |
839 |
|
url => 'http://localhost:1978/node/test', |
840 |
|
debug => 1, |
841 |
|
croak_on_error => 1 |
842 |
|
); |
843 |
|
|
844 |
|
with following arguments: |
845 |
|
|
846 |
|
=over 4 |
847 |
|
|
848 |
|
=item url |
849 |
|
|
850 |
|
URL to node |
851 |
|
|
852 |
|
=item debug |
853 |
|
|
854 |
|
dumps a B<lot> of debugging output |
855 |
|
|
856 |
|
=item croak_on_error |
857 |
|
|
858 |
|
very helpful during development. It will croak on all errors instead of |
859 |
|
silently returning C<-1> (which is convention of Hyper Estraier API in other |
860 |
|
languages). |
861 |
|
|
862 |
|
=back |
863 |
|
|
864 |
=cut |
=cut |
865 |
|
|
866 |
sub new { |
sub new { |
883 |
} else { |
} else { |
884 |
my $args = {@_}; |
my $args = {@_}; |
885 |
|
|
886 |
$self->{debug} = $args->{debug}; |
%$self = ( %$self, @_ ); |
887 |
|
|
888 |
warn "## Node debug on\n" if ($self->{debug}); |
warn "## Node debug on\n" if ($self->{debug}); |
889 |
} |
} |
890 |
|
|
1526 |
|
|
1527 |
$req->headers->header( 'Host' => $url->host . ":" . $url->port ); |
$req->headers->header( 'Host' => $url->host . ":" . $url->port ); |
1528 |
$req->headers->header( 'Connection', 'close' ); |
$req->headers->header( 'Connection', 'close' ); |
1529 |
$req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ); |
$req->headers->header( 'Authorization', 'Basic ' . $self->{auth} ) if ($self->{auth}); |
1530 |
$req->content_type( $content_type ); |
$req->content_type( $content_type ); |
1531 |
|
|
1532 |
warn $req->headers->as_string,"\n" if ($self->{debug}); |
warn $req->headers->as_string,"\n" if ($self->{debug}); |
1540 |
|
|
1541 |
warn "## response status: ",$res->status_line,"\n" if ($self->{debug}); |
warn "## response status: ",$res->status_line,"\n" if ($self->{debug}); |
1542 |
|
|
|
return -1 if (! $res->is_success); |
|
|
|
|
1543 |
($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2); |
($self->{status}, $self->{status_message}) = split(/\s+/, $res->status_line, 2); |
1544 |
|
|
1545 |
|
if (! $res->is_success) { |
1546 |
|
if ($self->{croak_on_error}) { |
1547 |
|
croak("can't get $url: ",$res->status_line); |
1548 |
|
} else { |
1549 |
|
return -1; |
1550 |
|
} |
1551 |
|
} |
1552 |
|
|
1553 |
$$resbody .= $res->content; |
$$resbody .= $res->content; |
1554 |
|
|
1555 |
warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug}); |
warn "## response body:\n$$resbody\n" if ($resbody && $self->{debug}); |
1649 |
$reqbody .= '&credit=' . $credit if ($credit > 0); |
$reqbody .= '&credit=' . $credit if ($credit > 0); |
1650 |
|
|
1651 |
$self->shuttle_url( $self->{url} . '/_set_link', |
$self->shuttle_url( $self->{url} . '/_set_link', |
1652 |
'text/plain', |
'application/x-www-form-urlencoded', |
1653 |
$reqbody, |
$reqbody, |
1654 |
undef |
undef |
1655 |
) == 200; |
) == 200; |