4 |
use strict; |
use strict; |
5 |
use warnings; |
use warnings; |
6 |
|
|
7 |
our $VERSION = '0.03'; |
our $VERSION = '0.05'; |
8 |
|
|
9 |
use Carp; |
use Carp; |
10 |
use File::Temp qw/ tempdir /; |
use File::Temp qw/ tempdir /; |
31 |
|
|
32 |
=head1 METHODS |
=head1 METHODS |
33 |
|
|
34 |
=head2 open |
=head2 new |
35 |
|
|
36 |
Create new indexing object. |
Create new indexing object. |
37 |
|
|
38 |
my $i = SWISH::PlusPlus->open( |
my $i = SWISH::PlusPlus->new( |
39 |
index_dir => '/path/to/index', |
index_dir => '/path/to/index', |
40 |
index => 'index++', |
index => 'index++', |
41 |
search => 'search++', |
search => 'search++', |
44 |
use_stopwords => 1, |
use_stopwords => 1, |
45 |
); |
); |
46 |
|
|
47 |
Options to open are following: |
Options to new are following: |
48 |
|
|
49 |
=over 5 |
=over 5 |
50 |
|
|
81 |
|
|
82 |
=cut |
=cut |
83 |
|
|
84 |
sub open { |
sub new { |
85 |
my $class = shift; |
my $class = shift; |
86 |
my $self = {@_}; |
my $self = {@_}; |
87 |
bless($self, $class); |
bless($self, $class); |
98 |
$self->{'index'} ||= 'index'; |
$self->{'index'} ||= 'index'; |
99 |
$self->{'search'} ||= 'search'; |
$self->{'search'} ||= 'search'; |
100 |
|
|
101 |
print STDERR "## open index_dir: ",$self->{'index_dir'}," index: ",$self->{'index'}, " search: ",$self->{'search'},"\n" if ($self->{'debug'}); |
print STDERR "## new index_dir: ",$self->{'index_dir'}," index: ",$self->{'index'}, " search: ",$self->{'search'},"\n" if ($self->{'debug'}); |
102 |
|
|
103 |
$self ? return $self : return undef; |
$self ? return $self : return undef; |
104 |
} |
} |
106 |
|
|
107 |
=head2 check_bin |
=head2 check_bin |
108 |
|
|
109 |
Check if swish++ binaries specified in L<open> are available and verify |
Check if swish++ binaries specified in L<new> are available and verify |
110 |
version signature. |
version signature. |
111 |
|
|
112 |
if ($i->check_bin) { |
if ($i->check_bin) { |
218 |
my $open_cmd = $self->{'search'}." -i ".$self->{'index_dir'}.'/index "'.$query.'" |'; |
my $open_cmd = $self->{'search'}." -i ".$self->{'index_dir'}.'/index "'.$query.'" |'; |
219 |
print STDERR "## search $open_cmd\n" if ($self->{'debug'}); |
print STDERR "## search $open_cmd\n" if ($self->{'debug'}); |
220 |
|
|
221 |
CORE::open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!"; |
open(SEARCH, $open_cmd) || confess "can't start $open_cmd: $!"; |
222 |
while(<SEARCH>) { |
while(<SEARCH>) { |
223 |
next if (/^#/); |
next if (/^#/); |
224 |
chomp; |
chomp; |
266 |
my $opt = "-v 4"; |
my $opt = "-v 4"; |
267 |
|
|
268 |
unless ($self->{'use_stopwrods'}) { |
unless ($self->{'use_stopwrods'}) { |
269 |
CORE::open(STOP, '>', "_stopwords_") || carp "can't create empty stopword file, skipping\n"; |
open(STOP, '>', "_stopwords_") || carp "can't create empty stopword file, skipping\n"; |
270 |
print STOP " "; |
print STOP " "; |
271 |
close(STOP); |
close(STOP); |
272 |
$opt .= " -s _stopwords_"; |
$opt .= " -s _stopwords_"; |
275 |
my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -'; |
my $open_cmd = '| '.$self->{'index'}.' '.$opt.' -e "html:*" -i '.$self->{'index_dir'}.'/index -'; |
276 |
|
|
277 |
|
|
278 |
CORE::open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!"; |
open($self->{'index_fh'}, $open_cmd) || confess "can't start index with $open_cmd: $!"; |
279 |
|
|
280 |
|
|
281 |
return $self->{'index_fh'}; |
return $self->{'index_fh'}; |
309 |
|
|
310 |
my $path = $self->{'tmp_dir'} || confess "no tmp_dir?"; |
my $path = $self->{'tmp_dir'} || confess "no tmp_dir?"; |
311 |
|
|
312 |
CORE::open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!"; |
open(TMP, '>', $arg->{'path'}) || die "can't create temp file ".$arg->{'path'}.": $!"; |
313 |
|
|
314 |
print TMP '<html><head>'; |
print TMP '<html><head>'; |
315 |
|
|
316 |
$arg->{'body'} ||= ''; |
$arg->{'body'} ||= ''; |
317 |
|
|
318 |
if ($arg->{'meta'}) { |
if ($arg->{'meta'}) { |
319 |
confess "not yet implemented"; |
foreach my $name (keys %{$arg->{'meta'}}) { |
320 |
|
my $content = $arg->{'meta'}->{$name}; |
321 |
|
print TMP qq{<meta name="$name" content="$content">}; |
322 |
|
$arg->{'body'} .= " $content" if ($self->{'meta_in_body'}); |
323 |
|
} |
324 |
} |
} |
325 |
|
|
326 |
if (defined($arg->{'title'})) { |
if (defined($arg->{'title'})) { |
370 |
Debian version of swish++ is often old (version 5 at moment of this writing |
Debian version of swish++ is often old (version 5 at moment of this writing |
371 |
while version 6 is available in source code), so this module by default |
while version 6 is available in source code), so this module by default |
372 |
uses executable names B<index> and B<search> for self-compiled version |
uses executable names B<index> and B<search> for self-compiled version |
373 |
instead of one from Debian package. See L<open> how to specify Debian |
instead of one from Debian package. See L<new> how to specify Debian |
374 |
default binaries B<index++> and B<search++>. |
default binaries B<index++> and B<search++>. |
375 |
|
|
376 |
=head2 SWISH++ |
=head2 SWISH++ |
394 |
pages. To see my very relaxed sample configuration take a look at C<swish++> |
pages. To see my very relaxed sample configuration take a look at C<swish++> |
395 |
directory included in distribution. |
directory included in distribution. |
396 |
|
|
397 |
|
=head2 SWISH++ config |
398 |
|
|
399 |
|
C<config.h> located in C<swish++> directory of this distribution is relaxed |
400 |
|
SWISH++ configuration that will index all words passed to it. This |
401 |
|
configuration is needed for B<date test> because default configuration |
402 |
|
doesn't recognize 2004-12-05 as date. Have in mind that your index size |
403 |
|
might explode. |
404 |
|
|
405 |
=head1 SEE ALSO |
=head1 SEE ALSO |
406 |
|
|
407 |
C<swish++> web site L<http://homepage.mac.com/pauljlucas/software/swish/> |
C<swish++> web site L<http://homepage.mac.com/pauljlucas/software/swish/> |