5 |
use warnings; |
use warnings; |
6 |
use HTML::Entities; |
use HTML::Entities; |
7 |
|
|
8 |
our $VERSION = '0.04'; |
our $VERSION = '0.06'; |
9 |
|
|
10 |
use Exporter 'import'; |
use Exporter 'import'; |
11 |
use Carp; |
use Carp; |
18 |
|
|
19 |
=head1 NAME |
=head1 NAME |
20 |
|
|
21 |
jsFind - generate index for jsFind using B-Tree |
jsFind - generate index for full text search engine in JavaScript |
22 |
|
|
23 |
=head1 SYNOPSIS |
=head1 SYNOPSIS |
24 |
|
|
53 |
|
|
54 |
=item * |
=item * |
55 |
|
|
56 |
You can programatically (and incrementaly) create index for jsFind |
you can programatically (and incrementaly) create index for jsFind |
57 |
|
|
58 |
|
=item * |
59 |
|
|
60 |
|
you can create more than one index and search them using same C<search.html> |
61 |
|
page |
62 |
|
|
63 |
=back |
=back |
64 |
|
|
65 |
You can also examine examples which come as tests with this module, |
You can also examine examples which come as tests with this module, |
66 |
for example C<t/04words.t>. |
for example C<t/04words.t> or C<t/10homer.t>. |
67 |
|
|
68 |
|
=head2 jsFind |
69 |
|
|
70 |
|
jsFind search engine was written by Shawn Garbett from eLucid Software. |
71 |
|
The search engine itself is a small piece of JavaScript (1.2 with level 2 |
72 |
|
DOM). It is easily customizable to fit into a current set of HTML. This |
73 |
|
JavaScript searches an XML index dataset for the appropriate links, and can |
74 |
|
filter and sort the results. |
75 |
|
|
76 |
|
JavaScript code distributed with this module is based on version 0.0.3 which |
77 |
|
was current when this module development started. Various changes where done |
78 |
|
on JavaScript code to fix bugs, add features and remove warnings. For |
79 |
|
complete list see C<Changes> file which comes with distribution. |
80 |
|
|
81 |
|
This module has been tested using C<html/test.html> with following browsers: |
82 |
|
|
83 |
|
=over 5 |
84 |
|
|
85 |
|
=item Mozilla FireFox 0.8 to 1.0 |
86 |
|
|
87 |
|
using DOM 2 C<document.implementation.createDocument> |
88 |
|
|
89 |
|
=item Internet Explorer 5.5 and 6.0 |
90 |
|
|
91 |
|
using ActiveX C<Microsoft.XMLDOM> or C<MSXML2.DOMDocument> |
92 |
|
|
93 |
|
=item Konqueror 3.3 |
94 |
|
|
95 |
|
using DOM 2 C<document.implementation.createDocument> |
96 |
|
|
97 |
|
=item Opera 7.54 (without Java) |
98 |
|
|
99 |
|
using experimental iframe implementation which is much slower than other methods. |
100 |
|
|
101 |
|
=back |
102 |
|
|
103 |
|
If searching doesn't work for your combination of operating system and |
104 |
|
browser, please open C<html/test.html> file and wait a while. It will search sample |
105 |
|
file included with distribution and report results. Reports with included |
106 |
|
test debugging are welcomed. |
107 |
|
|
108 |
=head1 jsFind methods |
=head1 jsFind methods |
109 |
|
|
358 |
Create xml index files for jsFind. This should be called after |
Create xml index files for jsFind. This should be called after |
359 |
your B-Tree has been filled with data. |
your B-Tree has been filled with data. |
360 |
|
|
361 |
$root->to_jsfind('/full/path/to/index/dir/'); |
$root->to_jsfind( |
362 |
|
dir => '/full/path/to/index/dir/', |
363 |
|
data_codepage => 'ISO-8859-2', |
364 |
|
index_codepage => 'UTF-8', |
365 |
|
output_filter => sub { |
366 |
|
my $t = shift || return; |
367 |
|
$t =~ s/è/e/; |
368 |
|
} |
369 |
|
); |
370 |
|
|
371 |
|
All options except C<dir> are optional. |
372 |
|
|
373 |
Returns number of nodes in created tree. |
Returns number of nodes in created tree. |
374 |
|
|
375 |
There is also longer version if you want to recode your data charset |
Options: |
376 |
into different one (probably UTF-8): |
|
377 |
|
=over 4 |
378 |
|
|
379 |
|
=item dir |
380 |
|
|
381 |
|
Full path to directory for index (which will be created if needed). |
382 |
|
|
383 |
|
=item data_codepage |
384 |
|
|
385 |
|
If your imput data isn't in C<ISO-8859-1> encoding, you will have to specify |
386 |
|
this option. |
387 |
|
|
388 |
$root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8'); |
=item index_codepage |
389 |
|
|
390 |
Destination encoding is UTF-8 by default, so you don't have to specify it. |
If your index encoding is not C<UTF-8> use this option. |
391 |
|
|
392 |
$root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250'); |
If you are not using supplied JavaScript search code, or your browser is |
393 |
|
terribly broken and thinks that index shouldn't be in UTF-8 encoding, use |
394 |
|
this option to specify encoding for created XML index. |
395 |
|
|
396 |
|
=item output_filter |
397 |
|
|
398 |
|
B<this is just draft of documentation for option which is not implemented!> |
399 |
|
|
400 |
|
Code ref to sub which can do modifications on resulting XML file for node. |
401 |
|
Encoding of this data will be in L<index_codepage> and you have to take care |
402 |
|
not to break XML structure. Calling L<xmllint> on your result index |
403 |
|
(like C<t/90xmllint.t> does in this distribution) is a good idea after using |
404 |
|
this option. |
405 |
|
|
406 |
|
This option is also right place to plug in unaccenting function using |
407 |
|
L<Text::Unaccent>. |
408 |
|
|
409 |
|
=back |
410 |
|
|
411 |
=cut |
=cut |
412 |
|
|
416 |
sub to_jsfind { |
sub to_jsfind { |
417 |
my $self = shift; |
my $self = shift; |
418 |
|
|
419 |
my $path = shift || confess "to_jsfind need path to your index!"; |
my %arg = @_; |
420 |
|
|
421 |
my ($from_cp,$to_cp) = @_; |
confess "to_jsfind need path to your index directory !" unless ($arg{'dir'}); |
422 |
|
|
423 |
$to_cp ||= 'UTF-8'; |
my $data_codepage = $arg{'data_codepage'}; |
424 |
|
my $index_codepage = $arg{'index_codepage'} || 'UTF-8'; |
425 |
|
|
426 |
if ($from_cp && $to_cp) { |
# create ISO-8859-1 iconv for HTML::Entities decode |
427 |
$iconv = Text::Iconv->new($from_cp,$to_cp); |
$iconv_l1 = Text::Iconv->new('ISO-8859-1',$index_codepage); |
|
} |
|
|
$iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp); |
|
428 |
|
|
429 |
$path .= "/" if ($path =~ /\/$/); |
# create another iconv for data |
430 |
#carp "creating directory for index '$path'" if (! -w $path); |
if ($data_codepage && $index_codepage) { |
431 |
|
$iconv = Text::Iconv->new($data_codepage,$index_codepage); |
432 |
|
} |
433 |
|
|
434 |
return $self->root->to_jsfind($path,"0"); |
return $self->root->to_jsfind($arg{'dir'},"0"); |
435 |
} |
} |
436 |
|
|
437 |
|
|