8 |
use KinoSearch::InvIndexer; |
use KinoSearch::InvIndexer; |
9 |
use KinoSearch::Analysis::PolyAnalyzer; |
use KinoSearch::Analysis::PolyAnalyzer; |
10 |
use Encode qw/from_to/; |
use Encode qw/from_to/; |
11 |
use Data::Dumper; |
use Data::Dump qw/dump/; |
12 |
use Storable; |
use Storable; |
13 |
|
|
14 |
=head1 NAME |
=head1 NAME |
17 |
|
|
18 |
=head1 VERSION |
=head1 VERSION |
19 |
|
|
20 |
Version 0.02 |
Version 0.03 |
21 |
|
|
22 |
=cut |
=cut |
23 |
|
|
24 |
our $VERSION = '0.02'; |
our $VERSION = '0.03'; |
25 |
|
|
26 |
=head1 SYNOPSIS |
=head1 SYNOPSIS |
27 |
|
|
80 |
|
|
81 |
my $log = $self->_get_logger; |
my $log = $self->_get_logger; |
82 |
|
|
83 |
#$log->debug("self: ", sub { Dumper($self) }); |
#$log->debug("self: ", sub { dump($self) }); |
84 |
|
|
85 |
foreach my $p (qw/index_path fields database/) { |
foreach my $p (qw/index_path fields database/) { |
86 |
$log->logdie("need $p") unless ($self->{$p}); |
$log->logdie("need $p") unless ($self->{$p}); |
90 |
|
|
91 |
$self->{encoding} ||= 'ISO-8859-2'; |
$self->{encoding} ||= 'ISO-8859-2'; |
92 |
|
|
93 |
$log->info("using index $self->{index_path} with encoding $self->{encoding}"); |
$self->{index_path} .= '/' . $self->{database}; |
94 |
|
|
95 |
|
$self->{clean} = 1 if (! -e $self->{index_path} . '/segments'); |
96 |
|
|
97 |
|
$log->info("using", $self->{clean} ? ' new' : '', " index $self->{index_path} with encoding $self->{encoding}"); |
98 |
|
|
99 |
my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' ); |
my $analyzer = KinoSearch::Analysis::PolyAnalyzer->new( language => 'en' ); |
100 |
|
|
172 |
|
|
173 |
my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )"); |
my $doc = $self->{invindex}->new_doc( $uri ) || $log->logdie("can't create new_doc( $uri )"); |
174 |
|
|
175 |
sub add_value($$$$$) { |
sub _add_value($$$$$) { |
176 |
my ($self,$log,$doc,$n,$v) = @_; |
my ($self,$log,$doc,$n,$v) = @_; |
177 |
return unless ($v); |
return unless ($v); |
178 |
|
|
183 |
$log->warn("can't insert: $n = $v") if ($@); |
$log->warn("can't insert: $n = $v") if ($@); |
184 |
} |
} |
185 |
|
|
186 |
add_value($self,$log,$doc, 'uri', $uri); |
_add_value($self,$log,$doc, 'uri', $uri); |
187 |
|
|
188 |
$log->debug("ds = ", sub { Dumper($args->{'ds'}) } ); |
$log->debug("ds = ", sub { dump($args->{'ds'}) } ); |
189 |
|
|
190 |
# filter all tags which have type defined |
# filter all tags which have type defined |
191 |
my @tags = grep { |
my @tags = grep { |
205 |
$vals = $self->convert( $vals ) or |
$vals = $self->convert( $vals ) or |
206 |
$log->logdie("can't convert '$vals' to UTF-8"); |
$log->logdie("can't convert '$vals' to UTF-8"); |
207 |
|
|
208 |
add_value($self, $log, $doc, $tag, $vals ); |
_add_value($self, $log, $doc, $tag, $vals ); |
209 |
} |
} |
210 |
|
|
211 |
if (my $text = $args->{'text'}) { |
if (my $text = $args->{'text'}) { |
212 |
add_value($self, $log, $doc, 'bodytext', $text ); |
_add_value($self, $log, $doc, 'bodytext', $text ); |
213 |
} |
} |
214 |
|
|
215 |
#$log->debug("adding ", sub { $doc->dump_draft } ); |
#$log->debug("adding ", sub { $doc->dump_draft } ); |