7 |
__PACKAGE__->mk_accessors(qw( |
__PACKAGE__->mk_accessors(qw( |
8 |
path |
path |
9 |
database |
database |
10 |
|
input |
11 |
encoding |
encoding |
12 |
clean |
clean |
13 |
|
|
26 |
|
|
27 |
=head1 VERSION |
=head1 VERSION |
28 |
|
|
29 |
Version 0.04 |
Version 0.05 |
30 |
|
|
31 |
=cut |
=cut |
32 |
|
|
33 |
our $VERSION = '0.04'; |
our $VERSION = '0.05'; |
34 |
|
|
35 |
=head1 SYNOPSIS |
=head1 SYNOPSIS |
36 |
|
|
43 |
|
|
44 |
Open KinoSearch index |
Open KinoSearch index |
45 |
|
|
46 |
my $est = new WebPAC::Output::KinoSearch({ |
my $out = new WebPAC::Output::KinoSearch({ |
47 |
path => '/path/to/invindex', |
path => '/path/to/invindex', |
48 |
database => 'demo', |
database => 'demo', |
49 |
encoding => 'iso-8859-2', |
encoding => 'iso-8859-2', |
70 |
|
|
71 |
=back |
=back |
72 |
|
|
73 |
|
=head2 init |
74 |
|
|
75 |
|
$out->init; |
76 |
|
|
77 |
=cut |
=cut |
78 |
|
|
79 |
sub init { |
sub init { |
91 |
|
|
92 |
$self->encoding( 'ISO-8859-2' ) unless $self->encoding; |
$self->encoding( 'ISO-8859-2' ) unless $self->encoding; |
93 |
|
|
94 |
|
## FIXME we shouldn't re-create whole KinoSearch index every time! |
95 |
|
$self->clean( 1 ); |
96 |
|
|
97 |
if ( ! -e $self->path ) { |
if ( ! -e $self->path ) { |
98 |
mkpath $self->path || $log->logdie("can't create ", $self->path,": $!"); |
mkpath $self->path || $log->logdie("can't create ", $self->path,": $!"); |
99 |
$log->info("created ", $self->path); |
$log->info("created ", $self->path); |
100 |
|
} elsif ( $self->clean ) { |
101 |
|
$log->info("removing existing ", $self->path); |
102 |
|
rmtree $self->path || $log->logdie("can't remove ", $self->path,": $!"); |
103 |
|
mkpath $self->path || $log->logdie("can't create ", $self->path,": $!"); |
104 |
} |
} |
105 |
|
|
106 |
my $path = $self->path . '/' . $self->database; |
my $path = $self->path . '/' . $self->database; |
123 |
|
|
124 |
Adds one entry |
Adds one entry |
125 |
|
|
126 |
$est->add( 42, $ds ); |
$out->add( 42, $ds ); |
127 |
|
|
128 |
=cut |
=cut |
129 |
|
|
136 |
$log->logdie("need id") unless defined $id; |
$log->logdie("need id") unless defined $id; |
137 |
$log->logdie("need ds") unless $ds; |
$log->logdie("need ds") unless $ds; |
138 |
|
|
139 |
$log->debug("id: $id ds = ",dump($ds)); |
$log->debug("id: $id ds = ", sub { dump($ds) }); |
140 |
|
|
141 |
my $hash = $self->ds_to_hash( $ds, 'search' ) || return; |
my $hash = $self->ds_to_hash( $ds, 'search' ) || return; |
142 |
|
|
143 |
warn "add( $id, ",dump($ds)," ) => ", dump( $hash ); |
$hash->{id} ||= $id; |
144 |
|
$hash->{database} ||= $self->database; |
145 |
|
$hash->{input} ||= $self->input; |
146 |
|
|
147 |
|
foreach my $f ( keys %$hash ) { |
148 |
|
if ( ref($hash->{$f}) eq 'ARRAY' ) { |
149 |
|
$hash->{$f} = join(' <*> ', @{ $hash->{$f} }); |
150 |
|
} |
151 |
|
} |
152 |
|
|
153 |
|
$log->debug("add( $id, ", sub { dump($ds) }," ) => ", sub { dump( $hash ) }); |
154 |
|
|
155 |
$self->index->add_doc( $hash ); |
$self->index->add_doc( $hash ); |
156 |
|
|
157 |
|
$self->{count}++; |
158 |
|
|
159 |
return 1; |
return 1; |
160 |
} |
} |
161 |
|
|
163 |
|
|
164 |
Close index |
Close index |
165 |
|
|
166 |
$index->finish; |
$out->finish; |
167 |
|
|
168 |
=cut |
=cut |
169 |
|
|
172 |
|
|
173 |
my $log = $self->_get_logger(); |
my $log = $self->_get_logger(); |
174 |
|
|
175 |
$log->info("dummy finish"); |
$log->info("indexed ", $self->{count}, " records"); |
|
|
|
|
} |
|
|
|
|
|
=head2 convert |
|
|
|
|
|
my $utf8_string = $self->convert('string in codepage'); |
|
|
|
|
|
=cut |
|
|
|
|
|
sub convert { |
|
|
my $self = shift; |
|
176 |
|
|
|
my $text = shift || return; |
|
|
from_to($text, $self->{encoding}, 'UTF-8'); |
|
|
return $text; |
|
177 |
} |
} |
178 |
|
|
179 |
=head1 AUTHOR |
=head1 AUTHOR |