/[jsFind]/trunk/jsFind.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/jsFind.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 15 by dpavlin, Sun Sep 5 17:57:21 2004 UTC revision 39 by dpavlin, Sun Dec 19 23:26:23 2004 UTC
# Line 5  use strict; Line 5  use strict;
5  use warnings;  use warnings;
6  use HTML::Entities;  use HTML::Entities;
7    
8  our $VERSION = '0.04';  our $VERSION = '0.07_01';
9    
10  use Exporter 'import';  use Exporter;
11  use Carp;  use Carp;
12    
13  our @ISA = qw(Exporter);  our @ISA = qw(Exporter);
14    
15  BEGIN {  BEGIN {
16          import 'jsFind::Node';          Exporter::import 'jsFind::Node';
17  }  }
18    
19  =head1 NAME  =head1 NAME
20    
21  jsFind - generate index for jsFind using B-Tree  jsFind - generate index for full text search engine in JavaScript
22    
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
# Line 53  You don't need to use swish-e to create Line 53  You don't need to use swish-e to create
53    
54  =item *  =item *
55    
56  You can programatically (and incrementaly) create index for jsFind  you can programatically (and incrementaly) create index for jsFind
57    
58    =item *
59    
60    you can create more than one index and search them using same C<search.html>
61    page
62    
63  =back  =back
64    
65  You can also examine examples which come as tests with this module,  You can also examine examples which come as tests with this module,
66  for example C<t/04words.t>.  for example C<t/04words.t> or C<t/10homer.t>.
67    
68    =head2 jsFind
69    
70    jsFind search engine was written by Shawn Garbett from eLucid Software.
71    The search engine itself is a small piece of JavaScript (1.2 with level 2
72    DOM). It is easily customizable to fit into a current set of HTML. This
73    JavaScript searches an XML index dataset for the appropriate links, and can
74    filter and sort the results.
75    
76    JavaScript code distributed with this module is based on version 0.0.3 which
77    was current when this module development started. Various changes where done
78    on JavaScript code to fix bugs, add features and remove warnings. For
79    complete list see C<Changes> file which comes with distribution.
80    
81    This module has been tested using C<html/test.html> with following browsers:
82    
83    =over 5
84    
85    =item Mozilla FireFox 0.8 to 1.0
86    
87    using DOM 2 C<document.implementation.createDocument>
88    
89    =item Internet Explorer 5.5 and 6.0
90    
91    using ActiveX C<Microsoft.XMLDOM> or C<MSXML2.DOMDocument>
92    
93    =item Konqueror 3.3
94    
95    using DOM 2 C<document.implementation.createDocument>
96    
97    =item Opera 7.54 (without Java)
98    
99    using experimental iframe implementation which is much slower than other methods.
100    
101    =back
102    
103    If searching doesn't work for your combination of operating system and
104    browser, please open C<html/test.html> file and wait a while. It will search sample
105    file included with distribution and report results. Reports with included
106    test debugging are welcomed.
107    
108  =head1 jsFind methods  =head1 jsFind methods
109    
# Line 313  sub to_dot { Line 358  sub to_dot {
358  Create xml index files for jsFind. This should be called after  Create xml index files for jsFind. This should be called after
359  your B-Tree has been filled with data.  your B-Tree has been filled with data.
360    
361   $root->to_jsfind('/full/path/to/index/dir/');   $root->to_jsfind(
362            dir => '/full/path/to/index/dir/',
363            data_codepage => 'ISO-8859-2',
364            index_codepage => 'UTF-8',
365            output_filter => sub {
366                    my $t = shift || return;
367                    $t =~ s/&egrave;/e/;
368            }
369     );
370    
371    All options except C<dir> are optional.
372    
373  Returns number of nodes in created tree.  Returns number of nodes in created tree.
374    
375  There is also longer version if you want to recode your data charset  Options:
376  into different one (probably UTF-8):  
377    =over 4
378    
379    =item dir
380    
381    Full path to directory for index (which will be created if needed).
382    
383    =item data_codepage
384    
385    If your imput data isn't in C<ISO-8859-1> encoding, you will have to specify
386    this option.
387    
388   $root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8');  =item index_codepage
389    
390  Destination encoding is UTF-8 by default, so you don't have to specify it.  If your index encoding is not C<UTF-8> use this option.
391    
392   $root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250');  If you are not using supplied JavaScript search code, or your browser is
393    terribly broken and thinks that index shouldn't be in UTF-8 encoding, use
394    this option to specify encoding for created XML index.
395    
396    =item output_filter
397    
398    B<this is just draft of documentation for option which is not implemented!>
399    
400    Code ref to sub which can do modifications on resulting XML file for node.
401    Encoding of this data will be in L<index_codepage> and you have to take care
402    not to break XML structure. Calling L<xmllint> on your result index
403    (like C<t/90xmllint.t> does in this distribution) is a good idea after using
404    this option.
405    
406    This option is also right place to plug in unaccenting function using
407    L<Text::Unaccent>.
408    
409    =back
410    
411  =cut  =cut
412    
# Line 334  my $iconv_l1; Line 416  my $iconv_l1;
416  sub to_jsfind {  sub to_jsfind {
417          my $self = shift;          my $self = shift;
418    
419          my $path = shift || confess "to_jsfind need path to your index!";          my %arg = @_;
420    
421          my ($from_cp,$to_cp) = @_;          confess "to_jsfind need path to your index directory !" unless ($arg{'dir'});
422    
423          $to_cp ||= 'UTF-8';          my $data_codepage = $arg{'data_codepage'};
424            my $index_codepage = $arg{'index_codepage'} || 'UTF-8';
425    
426          if ($from_cp && $to_cp) {          # create ISO-8859-1 iconv for HTML::Entities decode
427                  $iconv = Text::Iconv->new($from_cp,$to_cp);          $iconv_l1 = Text::Iconv->new('ISO-8859-1',$index_codepage);
         }  
         $iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp);  
428    
429          $path .= "/" if ($path =~ /\/$/);          # create another iconv for data
430          #carp "creating directory for index '$path'" if (! -w $path);          if ($data_codepage && $index_codepage) {
431                    $iconv = Text::Iconv->new($data_codepage,$index_codepage);
432            }
433    
434          return $self->root->to_jsfind($path,"0");          return $self->root->to_jsfind($arg{'dir'},"0");
435  }  }
436    
437    
# Line 777  sub to_xml { Line 860  sub to_xml {
860          return $d;          return $d;
861  }  }
862    
863  =head2 base62  =head2 base_x
864    
865  Convert number to base62 (used for jsFind index filenames).  Convert number to base x (used for jsFind index filenames).
866    
867   my $n = $tree->base62(50);   my $n = $tree->base_x(50);
868    
869  =cut  =cut
870    
871  sub base62 {  sub base_x {
872          my $self = shift;          my $self = shift;
873    
874          my $value = shift;          my $value = shift;
# Line 795  sub base62 { Line 878  sub base62 {
878          my @digits = qw(          my @digits = qw(
879                  0 1 2 3 4 5 6 7 8 9                  0 1 2 3 4 5 6 7 8 9
880                  a b c d e f g h i j k l m n o p q r s t u v w x y z                  a b c d e f g h i j k l m n o p q r s t u v w x y z
                 A B C D E F G H I J K L M N O P Q R S T U V W X Y Z  
881          );          );
882    
883          my $base = scalar(@digits);          my $base = scalar(@digits);
# Line 837  sub to_jsfind { Line 919  sub to_jsfind {
919          confess("path is undefined.") unless ($path);          confess("path is undefined.") unless ($path);
920          confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file));          confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file));
921    
922          $file = $self->base62($file);          $file = $self->base_x($file);
923    
924          my $nr_keys = 0;          my $nr_keys = 0;
925    

Legend:
Removed from v.15  
changed lines
  Added in v.39

  ViewVC Help
Powered by ViewVC 1.1.26