4 |
use strict; |
use strict; |
5 |
use warnings; |
use warnings; |
6 |
|
|
7 |
our $VERSION = '0.06'; |
our $VERSION = '0.10'; |
8 |
|
|
9 |
use Carp; |
use Carp; |
10 |
use File::Temp qw/ tempdir /; |
use File::Temp qw/ tempdir /; |
11 |
|
use BerkeleyDB; |
12 |
#use YAML; |
#use YAML; |
13 |
|
|
14 |
=head1 NAME |
=head1 NAME |
223 |
my $query = shift || return; |
my $query = shift || return; |
224 |
|
|
225 |
$self->finish_update; |
$self->finish_update; |
226 |
|
$self->_tie_meta_db(DB_RDONLY); |
227 |
|
|
228 |
my @results; |
my @results; |
229 |
|
|
230 |
# escape double quotes in query for shell |
# escape double quotes in query for shell |
231 |
$query =~ s/"/\\"/g; |
$query =~ s/"/\\"/g; |
232 |
|
|
233 |
my $open_cmd = $self->{'search'}." -i ".$self->{'index_dir'}.'/index "'.$query.'" |'; |
my $open_cmd = $self->{'search'} . |
234 |
print STDERR "## search $open_cmd\n" if ($self->{'debug'}); |
' -i ' . $self->{'index_dir'}.'/index' . |
235 |
|
' "' . $query . '"'. |
236 |
|
' |'; |
237 |
|
print STDERR "## search: $open_cmd\n" if ($self->{'debug'}); |
238 |
|
|
239 |
open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!"; |
open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!"; |
240 |
while(<SEARCH>) { |
my $l; |
241 |
next if (/^#/); |
while($l = <SEARCH>) { |
242 |
chomp; |
next if ($l =~ /^#/); |
243 |
print STDERR "## $_\n" if ($self->{'debug'}); |
chomp($l); |
244 |
my ($rank,$path,$size,$title) = split(/ /,$_,4); |
print STDERR "## $l\n" if ($self->{'debug'}); |
245 |
|
my ($rank,$path,$size,$title) = split(/ /,$l,4); |
246 |
|
$path =~ s#^\./##; # strip from path |
247 |
push @results, { |
push @results, { |
248 |
rank => $rank, |
rank => $rank, |
249 |
path => $path, |
path => $path, |
259 |
return @results; |
return @results; |
260 |
} |
} |
261 |
|
|
262 |
|
=head2 property |
263 |
|
|
264 |
|
Return stored meta property from result or result path. |
265 |
|
|
266 |
|
print $i->property('path', 'title'); |
267 |
|
print $i->property($res->{'path'}, 'title'); |
268 |
|
|
269 |
|
=cut |
270 |
|
|
271 |
|
sub property { |
272 |
|
my $self = shift; |
273 |
|
|
274 |
|
my ($path,$meta) = @_; |
275 |
|
|
276 |
|
if ($path =~ m/^HASH/) { |
277 |
|
$path = $path->{'path'} || confess "can't find path in input data"; |
278 |
|
} |
279 |
|
|
280 |
|
my $val = $self->{'meta_db'}->{"$path-$meta"}; |
281 |
|
|
282 |
|
print STDERR "## property $path-$meta: ",($val || 'undef'),"\n" if ($self->{'debug'}); |
283 |
|
return $val; |
284 |
|
} |
285 |
|
|
286 |
=head2 finish_update |
=head2 finish_update |
287 |
|
|
288 |
This method will close index. |
This method will close index. |
298 |
|
|
299 |
print STDERR "## finish_update\n" if ($self->{'debug'}); |
print STDERR "## finish_update\n" if ($self->{'debug'}); |
300 |
|
|
301 |
$self->_close_index; |
$self->_close_index && $self->_untie_meta_db; |
302 |
} |
} |
303 |
|
|
304 |
sub DESTROY { |
sub DESTROY { |
344 |
$opt .= " -s _stopwords_"; |
$opt .= " -s _stopwords_"; |
345 |
} |
} |
346 |
|
|
347 |
my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -'; |
my $index_dir = $self->{'index_dir'} || confess "no index_dir?"; |
348 |
|
|
349 |
|
my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$index_dir.'/index -'; |
350 |
|
|
351 |
print STDERR "## init_indexer: $open_cmd\n" if ($self->{'debug'}); |
print STDERR "## init_indexer: $open_cmd\n" if ($self->{'debug'}); |
352 |
|
|
354 |
|
|
355 |
chdir $self->{'cwd'} || confess "can't chdir to ".$self->{'cwd'}.": $!"; |
chdir $self->{'cwd'} || confess "can't chdir to ".$self->{'cwd'}.": $!"; |
356 |
|
|
357 |
|
$self->_tie_meta_db(DB_CREATE); |
358 |
|
|
359 |
return $self->{'_index_fh'}; |
return $self->{'_index_fh'}; |
360 |
} |
} |
361 |
|
|
362 |
|
=head2 _tie_meta_db |
363 |
|
|
364 |
|
Open BerkeleyDB database with meta properties. |
365 |
|
|
366 |
|
$i->_tie_meta_db(DB_CREATE); |
367 |
|
$i->_tie_meta_db(DB_RDONLY); |
368 |
|
|
369 |
|
} |
370 |
|
|
371 |
|
=cut |
372 |
|
|
373 |
|
sub _tie_meta_db { |
374 |
|
my $self = shift; |
375 |
|
|
376 |
|
my $flags = shift || confess "need DB_CREATE or DB_RDONLY"; |
377 |
|
|
378 |
|
return if ($self->{'_meta_db_flags'} && $self->{'_meta_db_flags'} == $flags); |
379 |
|
|
380 |
|
print STDERR "## _tie_meta_db($flags)\n" if ($self->{'debug'}); |
381 |
|
|
382 |
|
$self->_untie_meta_db; |
383 |
|
$self->{'_meta_db_flags'} = $flags; |
384 |
|
|
385 |
|
my $file = $self->{'index_dir'}.'/meta.db'; |
386 |
|
|
387 |
|
tie %{$self->{'meta_db'}}, "BerkeleyDB::Hash", |
388 |
|
-Filename => $file, |
389 |
|
-Flags => $flags |
390 |
|
or confess "cannot open $file: $! $BerkeleyDB::Error\n" ; |
391 |
|
|
392 |
|
return 1; |
393 |
|
} |
394 |
|
|
395 |
|
=head2 _untie_meta_db |
396 |
|
|
397 |
|
Close BerkeleyDB database with meta properties. |
398 |
|
|
399 |
|
$i->_untie_meta_db |
400 |
|
|
401 |
|
=cut |
402 |
|
|
403 |
|
sub _untie_meta_db { |
404 |
|
my $self = shift; |
405 |
|
|
406 |
|
return unless ($self->{'meta_db'}); |
407 |
|
|
408 |
|
print STDERR "## _untie_meta_db\n" if ($self->{'debug'}); |
409 |
|
untie %{$self->{'meta_db'}} || confess "can't untie!"; |
410 |
|
undef $self->{'meta_db'}; |
411 |
|
undef $self->{'_meta_db_flags'}; |
412 |
|
|
413 |
|
return 1; |
414 |
|
} |
415 |
|
|
416 |
=head2 _create_doc |
=head2 _create_doc |
417 |
|
|
418 |
Create temporary file and pass it's name to swish++ |
Create temporary file and pass it's name to swish++ |
440 |
$self->_init_indexer; |
$self->_init_indexer; |
441 |
|
|
442 |
my $path = $self->{'tmp_dir'} || confess "no tmp_dir?"; |
my $path = $self->{'tmp_dir'} || confess "no tmp_dir?"; |
443 |
$path .= '/' . $arg->{'path'}; |
my $id = $arg->{'path'} || confess "no path?"; |
444 |
|
$path .= "/$id"; |
445 |
|
|
446 |
print STDERR "## _create_doc: $path\n" if ($self->{'debug'}); |
print STDERR "## _create_doc: $path\n" if ($self->{'debug'}); |
447 |
|
|
456 |
my $content = $arg->{'meta'}->{$name}; |
my $content = $arg->{'meta'}->{$name}; |
457 |
print TMP qq{<meta name="$name" content="$content">}; |
print TMP qq{<meta name="$name" content="$content">}; |
458 |
$arg->{'body'} .= " $content" if ($self->{'meta_in_body'}); |
$arg->{'body'} .= " $content" if ($self->{'meta_in_body'}); |
459 |
|
$self->{'meta_db'}->{"$id-$name"} = $content; |
460 |
} |
} |
461 |
} |
} |
462 |
|
|
463 |
if (defined($arg->{'title'})) { |
my $title = $arg->{'title'}; |
464 |
print TMP '<title>' . ($arg->{'title'} || '') . '</title>'; |
if (defined($title)) { |
465 |
$arg->{'body'} .= " ".$arg->{'title'} if ($self->{'meta_in_body'}); |
print TMP "<title>$title</title>"; |
466 |
|
$arg->{'body'} .= " $title" if ($self->{'meta_in_body'}); |
467 |
|
$self->{'meta_db'}->{"$id-title"} = $title; |
468 |
} |
} |
469 |
|
|
470 |
print TMP '</head><body>' . $arg->{'body'} . '</body></html>'; |
print TMP '</head><body>' . $arg->{'body'} . '</body></html>'; |
471 |
|
|
472 |
close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!"; |
close(TMP) || confess "can't close tmp file ".$arg->{'path'}.": $!"; |
473 |
|
|
474 |
print { $self->{'_index_fh'} } $arg->{'path'}."\n"; |
print { $self->{'_index_fh'} } "$id\n"; |
475 |
} |
} |
476 |
|
|
477 |
=head2 _close_index |
=head2 _close_index |
491 |
|
|
492 |
print STDERR "## close index\n" if ($self->{'debug'}); |
print STDERR "## close index\n" if ($self->{'debug'}); |
493 |
|
|
494 |
close($self->{'_index_fh'}); |
close($self->{'_index_fh'}) || confess "can't close index: $!"; |
495 |
undef $self->{'_index_fh'}; |
undef $self->{'_index_fh'}; |
496 |
|
|
497 |
|
return 1; |
498 |
} |
} |
499 |
|
|
500 |
1; |
1; |