/[jsFind]/trunk/jsFind.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/jsFind.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 12 by dpavlin, Sat Aug 28 14:31:58 2004 UTC revision 39 by dpavlin, Sun Dec 19 23:26:23 2004 UTC
# Line 1  Line 1 
1  package jsFind;  package jsFind;
2    
3  use 5.008004;  use 5.005;
4  use strict;  use strict;
5  use warnings;  use warnings;
6  use HTML::Entities;  use HTML::Entities;
7    
8  our $VERSION = '0.03';  our $VERSION = '0.07_01';
9    
10  use Exporter 'import';  use Exporter;
11  use Carp;  use Carp;
12    
13  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
14    
15  BEGIN {  BEGIN {
16          import 'jsFind::Node';          Exporter::import 'jsFind::Node';
17  }  }
18    
19  =head1 NAME  =head1 NAME
20    
21  jsFind - generate index for jsFind using B-Tree  jsFind - generate index for full text search engine in JavaScript
22    
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
# Line 53  You don't need to use swish-e to create Line 53  You don't need to use swish-e to create
53    
54  =item *  =item *
55    
56  You can programatically (and incrementaly) create index for jsFind  you can programatically (and incrementaly) create index for jsFind
57    
58    =item *
59    
60    you can create more than one index and search them using same C<search.html>
61    page
62    
63  =back  =back
64    
65    You can also examine examples which come as tests with this module,
66    for example C<t/04words.t> or C<t/10homer.t>.
67    
68    =head2 jsFind
69    
70    jsFind search engine was written by Shawn Garbett from eLucid Software.
71    The search engine itself is a small piece of JavaScript (1.2 with level 2
72    DOM). It is easily customizable to fit into a current set of HTML. This
73    JavaScript searches an XML index dataset for the appropriate links, and can
74    filter and sort the results.
75    
76    JavaScript code distributed with this module is based on version 0.0.3 which
77    was current when this module development started. Various changes where done
78    on JavaScript code to fix bugs, add features and remove warnings. For
79    complete list see C<Changes> file which comes with distribution.
80    
81    This module has been tested using C<html/test.html> with following browsers:
82    
83    =over 5
84    
85    =item Mozilla FireFox 0.8 to 1.0
86    
87    using DOM 2 C<document.implementation.createDocument>
88    
89    =item Internet Explorer 5.5 and 6.0
90    
91    using ActiveX C<Microsoft.XMLDOM> or C<MSXML2.DOMDocument>
92    
93    =item Konqueror 3.3
94    
95    using DOM 2 C<document.implementation.createDocument>
96    
97    =item Opera 7.54 (without Java)
98    
99    using experimental iframe implementation which is much slower than other methods.
100    
101    =back
102    
103    If searching doesn't work for your combination of operating system and
104    browser, please open C<html/test.html> file and wait a while. It will search sample
105    file included with distribution and report results. Reports with included
106    test debugging are welcomed.
107    
108  =head1 jsFind methods  =head1 jsFind methods
109    
110  C<jsFind> is mode implementing methods which you, the user, are going to  C<jsFind> is mode implementing methods which you, the user, are going to
# Line 310  sub to_dot { Line 358  sub to_dot {
358  Create xml index files for jsFind. This should be called after  Create xml index files for jsFind. This should be called after
359  your B-Tree has been filled with data.  your B-Tree has been filled with data.
360    
361   $root->to_jsfind('/full/path/to/index/dir/');   $root->to_jsfind(
362            dir => '/full/path/to/index/dir/',
363            data_codepage => 'ISO-8859-2',
364            index_codepage => 'UTF-8',
365            output_filter => sub {
366                    my $t = shift || return;
367                    $t =~ s/&egrave;/e/;
368            }
369     );
370    
371    All options except C<dir> are optional.
372    
373  Returns number of nodes in created tree.  Returns number of nodes in created tree.
374    
375  There is also longer version if you want to recode your data charset  Options:
376  into different one (probably UTF-8):  
377    =over 4
378    
379    =item dir
380    
381    Full path to directory for index (which will be created if needed).
382    
383    =item data_codepage
384    
385    If your imput data isn't in C<ISO-8859-1> encoding, you will have to specify
386    this option.
387    
388    =item index_codepage
389    
390   $root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8');  If your index encoding is not C<UTF-8> use this option.
391    
392  Destination encoding is UTF-8 by default, so you don't have to specify it.  If you are not using supplied JavaScript search code, or your browser is
393    terribly broken and thinks that index shouldn't be in UTF-8 encoding, use
394    this option to specify encoding for created XML index.
395    
396   $root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250');  =item output_filter
397    
398    B<this is just draft of documentation for option which is not implemented!>
399    
400    Code ref to sub which can do modifications on resulting XML file for node.
401    Encoding of this data will be in L<index_codepage> and you have to take care
402    not to break XML structure. Calling L<xmllint> on your result index
403    (like C<t/90xmllint.t> does in this distribution) is a good idea after using
404    this option.
405    
406    This option is also right place to plug in unaccenting function using
407    L<Text::Unaccent>.
408    
409    =back
410    
411  =cut  =cut
412    
# Line 331  my $iconv_l1; Line 416  my $iconv_l1;
416  sub to_jsfind {  sub to_jsfind {
417          my $self = shift;          my $self = shift;
418    
419          my $path = shift || confess "to_jsfind need path to your index!";          my %arg = @_;
420    
421          my ($from_cp,$to_cp) = @_;          confess "to_jsfind need path to your index directory !" unless ($arg{'dir'});
422    
423          $to_cp ||= 'UTF-8';          my $data_codepage = $arg{'data_codepage'};
424            my $index_codepage = $arg{'index_codepage'} || 'UTF-8';
425    
426          if ($from_cp && $to_cp) {          # create ISO-8859-1 iconv for HTML::Entities decode
427                  $iconv = Text::Iconv->new($from_cp,$to_cp);          $iconv_l1 = Text::Iconv->new('ISO-8859-1',$index_codepage);
         }  
         $iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp);  
428    
429          $path .= "/" if ($path =~ /\/$/);          # create another iconv for data
430          #carp "creating directory for index '$path'" if (! -w $path);          if ($data_codepage && $index_codepage) {
431                    $iconv = Text::Iconv->new($data_codepage,$index_codepage);
432            }
433    
434          return $self->root->to_jsfind($path,"0");          return $self->root->to_jsfind($arg{'dir'},"0");
435  }  }
436    
437    
# Line 404  use strict; Line 490  use strict;
490  use Carp;  use Carp;
491  use File::Path;  use File::Path;
492  use Text::Iconv;  use Text::Iconv;
493    use POSIX;
494    
495  use base 'jsFind';  use base 'jsFind';
496    
# Line 773  sub to_xml { Line 860  sub to_xml {
860          return $d;          return $d;
861  }  }
862    
863    =head2 base_x
864    
865    Convert number to base x (used for jsFind index filenames).
866    
867     my $n = $tree->base_x(50);
868    
869    =cut
870    
871    sub base_x {
872            my $self = shift;
873    
874            my $value = shift;
875    
876            confess("need non-negative number") if (! defined($value) || $value < 0);
877    
878            my @digits = qw(
879                    0 1 2 3 4 5 6 7 8 9
880                    a b c d e f g h i j k l m n o p q r s t u v w x y z
881            );
882    
883            my $base = scalar(@digits);
884            my $out = "";
885            my $pow = 1;
886            my $pos = 0;
887    
888    
889            if($value == 0) {
890                    return "0";
891            }
892    
893            while($value > 0) {
894                    $pos = $value % $base;
895                    $out = $digits[$pos] . $out;
896                    $value = floor($value/$base);
897                    $pow *= $base;
898            }
899    
900            return $out;
901    }
902    
903  =head2 to_jsfind  =head2 to_jsfind
904    
905  Create jsFind xml files  Create jsFind xml files
# Line 783  Returns number of elements created Line 910  Returns number of elements created
910    
911  =cut  =cut
912    
   
913  sub to_jsfind {  sub to_jsfind {
914          my $self = shift;          my $self = shift;
915          my ($path,$file) = @_;          my ($path,$file) = @_;
# Line 793  sub to_jsfind { Line 919  sub to_jsfind {
919          confess("path is undefined.") unless ($path);          confess("path is undefined.") unless ($path);
920          confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file));          confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file));
921    
922            $file = $self->base_x($file);
923    
924          my $nr_keys = 0;          my $nr_keys = 0;
925    
926          my ($k, $d, $s) = @$self;          my ($k, $d, $s) = @$self;
# Line 846  jsFind web site L<http://www.elucidsoft. Line 974  jsFind web site L<http://www.elucidsoft.
974    
975  B-Trees in perl web site L<http://perl.plover.com/BTree/>  B-Trees in perl web site L<http://perl.plover.com/BTree/>
976    
977    This module web site L<http://www.rot13.org/~dpavlin/jsFind.html>
978    
979  =head1 AUTHORS  =head1 AUTHORS
980    
981  Mark-Jonson Dominus E<lt>mjd@pobox.comE<gt> wrote C<BTree.pm> which was  Mark-Jonson Dominus E<lt>mjd@pobox.comE<gt> wrote C<BTree.pm> which was

Legend:
Removed from v.12  
changed lines
  Added in v.39

  ViewVC Help
Powered by ViewVC 1.1.26