--- trunk/jsFind.pm 2004/08/28 14:31:58 12 +++ trunk/jsFind.pm 2004/12/19 23:26:23 39 @@ -1,24 +1,24 @@ package jsFind; -use 5.008004; +use 5.005; use strict; use warnings; use HTML::Entities; -our $VERSION = '0.03'; +our $VERSION = '0.07_01'; -use Exporter 'import'; +use Exporter; use Carp; our @ISA = qw(Exporter); BEGIN { - import 'jsFind::Node'; + Exporter::import 'jsFind::Node'; } =head1 NAME -jsFind - generate index for jsFind using B-Tree +jsFind - generate index for full text search engine in JavaScript =head1 SYNOPSIS @@ -53,10 +53,58 @@ =item * -You can programatically (and incrementaly) create index for jsFind +you can programatically (and incrementaly) create index for jsFind + +=item * + +you can create more than one index and search them using same C +page =back +You can also examine examples which come as tests with this module, +for example C or C. + +=head2 jsFind + +jsFind search engine was written by Shawn Garbett from eLucid Software. +The search engine itself is a small piece of JavaScript (1.2 with level 2 +DOM). It is easily customizable to fit into a current set of HTML. This +JavaScript searches an XML index dataset for the appropriate links, and can +filter and sort the results. + +JavaScript code distributed with this module is based on version 0.0.3 which +was current when this module development started. Various changes where done +on JavaScript code to fix bugs, add features and remove warnings. For +complete list see C file which comes with distribution. + +This module has been tested using C with following browsers: + +=over 5 + +=item Mozilla FireFox 0.8 to 1.0 + +using DOM 2 C + +=item Internet Explorer 5.5 and 6.0 + +using ActiveX C or C + +=item Konqueror 3.3 + +using DOM 2 C + +=item Opera 7.54 (without Java) + +using experimental iframe implementation which is much slower than other methods. + +=back + +If searching doesn't work for your combination of operating system and +browser, please open C file and wait a while. It will search sample +file included with distribution and report results. Reports with included +test debugging are welcomed. + =head1 jsFind methods C is mode implementing methods which you, the user, are going to @@ -310,18 +358,55 @@ Create xml index files for jsFind. This should be called after your B-Tree has been filled with data. - $root->to_jsfind('/full/path/to/index/dir/'); + $root->to_jsfind( + dir => '/full/path/to/index/dir/', + data_codepage => 'ISO-8859-2', + index_codepage => 'UTF-8', + output_filter => sub { + my $t = shift || return; + $t =~ s/è/e/; + } + ); + +All options except C are optional. Returns number of nodes in created tree. -There is also longer version if you want to recode your data charset -into different one (probably UTF-8): +Options: + +=over 4 + +=item dir + +Full path to directory for index (which will be created if needed). + +=item data_codepage + +If your imput data isn't in C encoding, you will have to specify +this option. + +=item index_codepage - $root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8'); +If your index encoding is not C use this option. -Destination encoding is UTF-8 by default, so you don't have to specify it. +If you are not using supplied JavaScript search code, or your browser is +terribly broken and thinks that index shouldn't be in UTF-8 encoding, use +this option to specify encoding for created XML index. - $root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250'); +=item output_filter + +B + +Code ref to sub which can do modifications on resulting XML file for node. +Encoding of this data will be in L and you have to take care +not to break XML structure. Calling L on your result index +(like C does in this distribution) is a good idea after using +this option. + +This option is also right place to plug in unaccenting function using +L. + +=back =cut @@ -331,21 +416,22 @@ sub to_jsfind { my $self = shift; - my $path = shift || confess "to_jsfind need path to your index!"; + my %arg = @_; - my ($from_cp,$to_cp) = @_; + confess "to_jsfind need path to your index directory !" unless ($arg{'dir'}); - $to_cp ||= 'UTF-8'; + my $data_codepage = $arg{'data_codepage'}; + my $index_codepage = $arg{'index_codepage'} || 'UTF-8'; - if ($from_cp && $to_cp) { - $iconv = Text::Iconv->new($from_cp,$to_cp); - } - $iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp); + # create ISO-8859-1 iconv for HTML::Entities decode + $iconv_l1 = Text::Iconv->new('ISO-8859-1',$index_codepage); - $path .= "/" if ($path =~ /\/$/); - #carp "creating directory for index '$path'" if (! -w $path); + # create another iconv for data + if ($data_codepage && $index_codepage) { + $iconv = Text::Iconv->new($data_codepage,$index_codepage); + } - return $self->root->to_jsfind($path,"0"); + return $self->root->to_jsfind($arg{'dir'},"0"); } @@ -404,6 +490,7 @@ use Carp; use File::Path; use Text::Iconv; +use POSIX; use base 'jsFind'; @@ -773,6 +860,46 @@ return $d; } +=head2 base_x + +Convert number to base x (used for jsFind index filenames). + + my $n = $tree->base_x(50); + +=cut + +sub base_x { + my $self = shift; + + my $value = shift; + + confess("need non-negative number") if (! defined($value) || $value < 0); + + my @digits = qw( + 0 1 2 3 4 5 6 7 8 9 + a b c d e f g h i j k l m n o p q r s t u v w x y z + ); + + my $base = scalar(@digits); + my $out = ""; + my $pow = 1; + my $pos = 0; + + + if($value == 0) { + return "0"; + } + + while($value > 0) { + $pos = $value % $base; + $out = $digits[$pos] . $out; + $value = floor($value/$base); + $pow *= $base; + } + + return $out; +} + =head2 to_jsfind Create jsFind xml files @@ -783,7 +910,6 @@ =cut - sub to_jsfind { my $self = shift; my ($path,$file) = @_; @@ -793,6 +919,8 @@ confess("path is undefined.") unless ($path); confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file)); + $file = $self->base_x($file); + my $nr_keys = 0; my ($k, $d, $s) = @$self; @@ -846,6 +974,8 @@ B-Trees in perl web site L +This module web site L + =head1 AUTHORS Mark-Jonson Dominus Emjd@pobox.comE wrote C which was