5 |
use warnings; |
use warnings; |
6 |
use HTML::Entities; |
use HTML::Entities; |
7 |
|
|
8 |
our $VERSION = '0.05'; |
our $VERSION = '0.06'; |
9 |
|
|
10 |
use Exporter 'import'; |
use Exporter 'import'; |
11 |
use Carp; |
use Carp; |
358 |
Create xml index files for jsFind. This should be called after |
Create xml index files for jsFind. This should be called after |
359 |
your B-Tree has been filled with data. |
your B-Tree has been filled with data. |
360 |
|
|
361 |
$root->to_jsfind('/full/path/to/index/dir/'); |
$root->to_jsfind( |
362 |
|
dir => '/full/path/to/index/dir/', |
363 |
|
data_codepage => 'ISO-8859-2', |
364 |
|
index_codepage => 'UTF-8', |
365 |
|
output_filter => sub { |
366 |
|
my $t = shift || return; |
367 |
|
$t =~ s/è/e/; |
368 |
|
} |
369 |
|
); |
370 |
|
|
371 |
|
All options except C<dir> are optional. |
372 |
|
|
373 |
Returns number of nodes in created tree. |
Returns number of nodes in created tree. |
374 |
|
|
375 |
There is also longer version if you want to recode your data charset |
Options: |
376 |
into different one (probably UTF-8): |
|
377 |
|
=over 4 |
378 |
|
|
379 |
|
=item dir |
380 |
|
|
381 |
|
Full path to directory for index (which will be created if needed). |
382 |
|
|
383 |
|
=item data_codepage |
384 |
|
|
385 |
|
If your imput data isn't in C<ISO-8859-1> encoding, you will have to specify |
386 |
|
this option. |
387 |
|
|
388 |
$root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8'); |
=item index_codepage |
389 |
|
|
390 |
Destination encoding is UTF-8 by default, so you don't have to specify it. |
If your index encoding is not C<UTF-8> use this option. |
391 |
|
|
392 |
$root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250'); |
If you are not using supplied JavaScript search code, or your browser is |
393 |
|
terribly broken and thinks that index shouldn't be in UTF-8 encoding, use |
394 |
|
this option to specify encoding for created XML index. |
395 |
|
|
396 |
|
=item output_filter |
397 |
|
|
398 |
|
B<this is just draft of documentation for option which is not implemented!> |
399 |
|
|
400 |
|
Code ref to sub which can do modifications on resulting XML file for node. |
401 |
|
Encoding of this data will be in L<index_codepage> and you have to take care |
402 |
|
not to break XML structure. Calling L<xmllint> on your result index |
403 |
|
(like C<t/90xmllint.t> does in this distribution) is a good idea after using |
404 |
|
this option. |
405 |
|
|
406 |
|
This option is also right place to plug in unaccenting function using |
407 |
|
L<Text::Unaccent>. |
408 |
|
|
409 |
|
=back |
410 |
|
|
411 |
=cut |
=cut |
412 |
|
|
416 |
sub to_jsfind { |
sub to_jsfind { |
417 |
my $self = shift; |
my $self = shift; |
418 |
|
|
419 |
my $path = shift || confess "to_jsfind need path to your index!"; |
my %arg = @_; |
420 |
|
|
421 |
my ($from_cp,$to_cp) = @_; |
confess "to_jsfind need path to your index directory !" unless ($arg{'dir'}); |
422 |
|
|
423 |
$to_cp ||= 'UTF-8'; |
my $data_codepage = $arg{'data_codepage'}; |
424 |
|
my $index_codepage = $arg{'index_codepage'} || 'UTF-8'; |
425 |
|
|
426 |
if ($from_cp && $to_cp) { |
# create ISO-8859-1 iconv for HTML::Entities decode |
427 |
$iconv = Text::Iconv->new($from_cp,$to_cp); |
$iconv_l1 = Text::Iconv->new('ISO-8859-1',$index_codepage); |
|
} |
|
|
$iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp); |
|
428 |
|
|
429 |
$path .= "/" if ($path =~ /\/$/); |
# create another iconv for data |
430 |
#carp "creating directory for index '$path'" if (! -w $path); |
if ($data_codepage && $index_codepage) { |
431 |
|
$iconv = Text::Iconv->new($data_codepage,$index_codepage); |
432 |
|
} |
433 |
|
|
434 |
return $self->root->to_jsfind($path,"0"); |
return $self->root->to_jsfind($arg{'dir'},"0"); |
435 |
} |
} |
436 |
|
|
437 |
|
|