/[jsFind]/trunk/jsFind.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/jsFind.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 9 by dpavlin, Wed Jul 21 23:37:49 2004 UTC revision 14 by dpavlin, Sat Aug 28 15:19:22 2004 UTC
# Line 1  Line 1 
1  package jsFind;  package jsFind;
2    
3  use 5.008004;  use 5.005;
4  use strict;  use strict;
5  use warnings;  use warnings;
6    use HTML::Entities;
7    
8  our $VERSION = '0.02';  our $VERSION = '0.03';
9    
10    use Exporter 'import';
11    use Carp;
12    
13    our @ISA = qw(Exporter);
14    
15    BEGIN {
16            import 'jsFind::Node';
17    }
18    
19  =head1 NAME  =head1 NAME
20    
# Line 13  jsFind - generate index for jsFind using Line 23  jsFind - generate index for jsFind using
23  =head1 SYNOPSIS  =head1 SYNOPSIS
24    
25    use jsFind;    use jsFind;
26      my $t = new jsFind(B => 4);
27      my $f = 1;
28      foreach my $k (qw{minima ut dolorem sapiente voluptatem}) {
29            $t->B_search(Key => $k,
30                    Data => {
31                            "path" => {
32                            t => "word $k",
33                            f => $f },
34                    },
35                    Insert => 1,
36                    Append => 1,
37            );
38      }
39    
40  =head1 DESCRIPTION  =head1 DESCRIPTION
41    
# Line 36  You can programatically (and incremental Line 57  You can programatically (and incremental
57    
58  =back  =back
59    
60  =head1 METHODS  You can also examine examples which come as tests with this module,
61    for example C<t/04words.t>.
 This module contains two packages C<jsFind> and C<jsFind::Node>.  
   
 =head2 jsFind methods  
62    
63  =cut  =head1 jsFind methods
64    
65  use Exporter 'import';  C<jsFind> is mode implementing methods which you, the user, are going to
66  use Carp;  use to create indexes.
   
 our @ISA = qw(Exporter);  
   
 BEGIN {  
         import 'jsFind::Node';  
 }  
67    
68  =head3 new  =head2 new
69    
70  Create new tree. Arguments are C<B> which is maximum numbers of keys in  Create new tree. Arguments are C<B> which is maximum numbers of keys in
71  each node and optional C<Root> node. Each root node may have child nodes.  each node and optional C<Root> node. Each root node may have child nodes.
# Line 82  sub new { Line 94  sub new {
94    bless { B => $B, Root => $Root } => $package;    bless { B => $B, Root => $Root } => $package;
95  }  }
96    
97  =head3 B_search  =head2 B_search
98    
99  Search, insert, append or replace data in B-Tree  Search, insert, append or replace data in B-Tree
100    
# Line 224  sub split_and_promote { Line 236  sub split_and_promote {
236    }    }
237  }  }
238    
239  =head3 B  =head2 B
240    
241  Return B (maximum number of keys)  Return B (maximum number of keys)
242    
# Line 236  sub B { Line 248  sub B {
248    $_[0]{B};    $_[0]{B};
249  }  }
250    
251  =head3 root  =head2 root
252    
253  Returns root node  Returns root node
254    
# Line 250  sub root { Line 262  sub root {
262    $self->{Root};    $self->{Root};
263  }  }
264    
265  =head3 node_overfull  =head2 node_overfull
266    
267  Returns if node is overfull  Returns if node is overfull
268    
# Line 264  sub node_overfull { Line 276  sub node_overfull {
276    $node->size > $self->B;    $node->size > $self->B;
277  }  }
278    
279  =head3 to_string  =head2 to_string
280    
281  Returns your tree as formatted string.  Returns your tree as formatted string.
282    
# Line 278  sub to_string { Line 290  sub to_string {
290    $_[0]->root->to_string;    $_[0]->root->to_string;
291  }  }
292    
293  =head3 to_dot  =head2 to_dot
294    
295  Create Graphviz graph of your tree  Create Graphviz graph of your tree
296    
# Line 296  sub to_dot { Line 308  sub to_dot {
308          return $dot;          return $dot;
309  }  }
310    
311  =head3 to_jsfind  =head2 to_jsfind
312    
313  Create xml index files for jsFind. This should be called after  Create xml index files for jsFind. This should be called after
314  your B-Tree has been filled with data.  your B-Tree has been filled with data.
# Line 310  into different one (probably UTF-8): Line 322  into different one (probably UTF-8):
322    
323   $root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8');   $root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8');
324    
325    Destination encoding is UTF-8 by default, so you don't have to specify it.
326    
327     $root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250');
328    
329  =cut  =cut
330    
331  my $iconv;  my $iconv;
332    my $iconv_l1;
333    
334  sub to_jsfind {  sub to_jsfind {
335          my $self = shift;          my $self = shift;
# Line 320  sub to_jsfind { Line 337  sub to_jsfind {
337          my $path = shift || confess "to_jsfind need path to your index!";          my $path = shift || confess "to_jsfind need path to your index!";
338    
339          my ($from_cp,$to_cp) = @_;          my ($from_cp,$to_cp) = @_;
340    
341            $to_cp ||= 'UTF-8';
342    
343          if ($from_cp && $to_cp) {          if ($from_cp && $to_cp) {
344                  $iconv = Text::Iconv->new($from_cp,$to_cp);                  $iconv = Text::Iconv->new($from_cp,$to_cp);
345          }          }
346            $iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp);
347    
348          $path .= "/" if ($path =~ /\/$/);          $path .= "/" if ($path =~ /\/$/);
349          carp "create directory for index '$path': $!" if (! -w $path);          #carp "creating directory for index '$path'" if (! -w $path);
350    
351          return $self->root->to_jsfind($path,"0");          return $self->root->to_jsfind($path,"0");
352  }  }
# Line 336  sub default_cmp { Line 357  sub default_cmp {
357    $_[0] cmp $_[1];    $_[0] cmp $_[1];
358  }  }
359    
360  =head3 _recode  =head2 _recode
361    
362  This is internal function to recode charset.  This is internal function to recode charset.
363    
364    It will also try to decode entities in data using L<HTML::Entities>.
365    
366  =cut  =cut
367    
368  sub _recode {  sub _recode {
369          my $self = shift;          my $self = shift;
370          my $text = shift || return;          my $text = shift || return;
371    
372            sub _decode_html_entities {
373                    my $data = shift || return;
374                    $data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data";
375            }
376    
377          if ($iconv) {          if ($iconv) {
378                  return $iconv->convert($text) || $text;                  $text = $iconv->convert($text) || $text && carp "convert problem: $text";
379          } else {                  $text =~ s/(\&\w+;)/_decode_html_entities($1)/ges;
                 return $text;  
380          }          }
381    
382            return $text;
383  }  }
384    
385  #####################################################################  #####################################################################
386    
387  =head2 jsFind::Node methods  =head1 jsFind::Node methods
388    
389  Each node has C<k> key-data pairs, with C<B> <= C<k> <= C<2B>, and  Each node has C<k> key-data pairs, with C<B> <= C<k> <= C<2B>, and
390  each has C<k+1> subnodes, which might be null.  each has C<k+1> subnodes, which might be null.
# Line 385  my $KEYS = 0; Line 414  my $KEYS = 0;
414  my $DATA = 1;  my $DATA = 1;
415  my $SUBNODES = 2;  my $SUBNODES = 2;
416    
417  =head3 new  =head2 new
418    
419  Create New node  Create New node
420    
# Line 405  sub new { Line 434  sub new {
434    bless [@_] => $package;    bless [@_] => $package;
435  }  }
436    
437  =head3 locate_key  =head2 locate_key
438    
439  Locate key in node using linear search. This should probably be replaced  Locate key in node using linear search. This should probably be replaced
440  by binary search for better performance.  by binary search for better performance.
# Line 442  sub locate_key { Line 471  sub locate_key {
471  }  }
472    
473    
474  =head3 emptynode  =head2 emptynode
475    
476  Creates new empty node  Creates new empty node
477    
# Line 455  sub emptynode { Line 484  sub emptynode {
484    new($_[0]);                   # Pass package name, but not anything else.    new($_[0]);                   # Pass package name, but not anything else.
485  }  }
486    
487  =head3 is_empty  =head2 is_empty
488    
489  Test if node is empty  Test if node is empty
490    
# Line 469  sub is_empty { Line 498  sub is_empty {
498    !defined($self) || $#$self < 0;    !defined($self) || $#$self < 0;
499  }  }
500    
501  =head3 key  =head2 key
502    
503  Return C<$i>th key from node  Return C<$i>th key from node
504    
# Line 485  sub key { Line 514  sub key {
514     $_[0]->[$KEYS][$_[1]];     $_[0]->[$KEYS][$_[1]];
515  }  }
516    
517  =head3 data  =head2 data
518    
519  Return C<$i>th data from node  Return C<$i>th data from node
520    
# Line 498  sub data { Line 527  sub data {
527    $self->[$DATA][$n];    $self->[$DATA][$n];
528  }  }
529    
530  =head3 kdp_replace  =head2 kdp_replace
531    
532  Set key data pair for C<$i>th element in node  Set key data pair for C<$i>th element in node
533    
# Line 519  sub kdp_replace { Line 548  sub kdp_replace {
548     $self->[$DATA][$n]];     $self->[$DATA][$n]];
549  }  }
550    
551  =head3 kdp_insert  =head2 kdp_insert
552    
553    Insert key/data pair in tree
554    
555      $node->kdp_insert("key value" => "data value");
556    
557   # No return value.  No return value.
558    
559  =cut  =cut
560    
# Line 540  sub kdp_insert { Line 573  sub kdp_insert {
573    splice(@{$self->[$SUBNODES]}, $where, 0, undef);    splice(@{$self->[$SUBNODES]}, $where, 0, undef);
574  }  }
575    
576  =head3 kdp_append  =head2 kdp_append
577    
578  Adds new data keys and values to C<$i>th element in node  Adds new data keys and values to C<$i>th element in node
579    
# Line 561  sub kdp_append { Line 594  sub kdp_append {
594     $self->[$DATA][$n]];     $self->[$DATA][$n]];
595  }  }
596    
597  =head3 subnode  =head2 subnode
598    
599  Set new or return existing subnode  Set new or return existing subnode
600    
# Line 579  sub subnode { Line 612  sub subnode {
612    $self->[$SUBNODES][$n];    $self->[$SUBNODES][$n];
613  }  }
614    
615  =head3 is_leaf  =head2 is_leaf
616    
617  Test if node is leaf  Test if node is leaf
618    
# Line 592  sub is_leaf { Line 625  sub is_leaf {
625    ! defined $self->[$SUBNODES][0]; # undefined subnode means leaf node.    ! defined $self->[$SUBNODES][0]; # undefined subnode means leaf node.
626  }  }
627    
628  =head3 size  =head2 size
629    
630  Return number of keys in the node  Return number of keys in the node
631    
# Line 605  sub size { Line 638  sub size {
638    return scalar(@{$self->[$KEYS]});    return scalar(@{$self->[$KEYS]});
639  }  }
640    
641  =head3 halves  =head2 halves
642    
643   # Accept an index $n  Split node into two halves so that keys C<0 .. $n-1> are in one node
644   # Divide into two nodes so that keys 0 .. $n-1 are in one node  and keys C<$n+1 ... $size> are in the other.
645   # and keys $n+1 ... $size are in the other.  
646      my ($left_node, $right_node, $kdp) = $node->halves($n);
647    
648  =cut  =cut
649    
# Line 633  sub halves { Line 667  sub halves {
667    ($self->new(@left), $self->new(@right), \@middle);    ($self->new(@left), $self->new(@right), \@middle);
668  }  }
669    
670  =head3 to_string  =head2 to_string
671    
672  Dumps tree as string  Dumps tree as string
673    
# Line 684  sub to_string { Line 718  sub to_string {
718    
719  =end comment  =end comment
720    
721  =head3 to_dot  =head2 to_dot
722    
723  Recursivly walk nodes of tree  Recursivly walk nodes of tree
724    
# Line 723  sub to_dot { Line 757  sub to_dot {
757          $dot;          $dot;
758  }  }
759    
760  =head3 to_jsfind  =head2 to_xml
761    
762    Escape <, >, & and ", and to produce valid XML
763    
764    =cut
765    
766    my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
767    my $escape_re  = join '|' => keys %escape;
768    
769    sub to_xml {
770            my $self = shift || confess "you should call to_xml as object!";
771    
772            my $d = shift || return;
773            $d = $self->SUPER::_recode($d);
774            confess "escape_re undefined!" unless ($escape_re);
775            $d =~ s/($escape_re)/$escape{$1}/g;
776            return $d;
777    }
778    
779    =head2 to_jsfind
780    
781  Create jsFind xml files  Create jsFind xml files
782    
# Line 733  Returns number of elements created Line 786  Returns number of elements created
786    
787  =cut  =cut
788    
789    
790  sub to_jsfind {  sub to_jsfind {
791          my $self = shift;          my $self = shift;
792          my ($path,$file) = @_;          my ($path,$file) = @_;
# Line 753  sub to_jsfind { Line 807  sub to_jsfind {
807                  my $key = lc($k->[$i]);                  my $key = lc($k->[$i]);
808    
809                  if ($key) {                  if ($key) {
810                          $key_xml .= qq{<k>$key</k>};                          $key_xml .= '<k>'.$self->to_xml($key).'</k>';
811                          $data_xml .= qq{<e>};                          $data_xml .= '<e>';
812          #use Data::Dumper;          #use Data::Dumper;
813          #print Dumper($d->[$i]);          #print Dumper($d->[$i]);
814                          foreach my $path (keys %{$d->[$i]}) {                          foreach my $path (keys %{$d->[$i]}) {
815                                  $data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.($d->[$i]->{$path}->{'t'} || 'no title').'">'.$path.'</l>';                                  $data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.$self->to_xml($d->[$i]->{$path}->{'t'} || 'no title').'">'.$self->to_xml($path).'</l>';
816                                  $nr_keys++;                                  $nr_keys++;
817                          }                          }
818                          $data_xml .= qq{</e>};                          $data_xml .= '</e>';
819                  }                  }
820    
821                  $nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]);                  $nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]);
822          }          }
823    
824          $key_xml .= "</n>";          $key_xml .= '</n>';
825          $data_xml .= "</d>";          $data_xml .= '</d>';
826    
827          if (! -e $path) {          if (! -e $path) {
828                  mkpath($path) || croak "can't create dir '$path': $!";                  mkpath($path) || croak "can't create dir '$path': $!";
# Line 777  sub to_jsfind { Line 831  sub to_jsfind {
831          open(K, "> ${path}/${file}.xml") || croak "can't open '$path/$file.xml': $!";          open(K, "> ${path}/${file}.xml") || croak "can't open '$path/$file.xml': $!";
832          open(D, "> ${path}/_${file}.xml") || croak "can't open '$path/_$file.xml': $!";          open(D, "> ${path}/_${file}.xml") || croak "can't open '$path/_$file.xml': $!";
833    
834          print K $self->SUPER::_recode($key_xml);          print K $key_xml;
835          print D $self->SUPER::_recode($data_xml);          print D $data_xml;
836    
837          close(K);          close(K);
838          close(D);          close(D);
# Line 795  jsFind web site L<http://www.elucidsoft. Line 849  jsFind web site L<http://www.elucidsoft.
849    
850  B-Trees in perl web site L<http://perl.plover.com/BTree/>  B-Trees in perl web site L<http://perl.plover.com/BTree/>
851    
852    This module web site L<http://www.rot13.org/~dpavlin/jsFind.html>
853    
854  =head1 AUTHORS  =head1 AUTHORS
855    
856  Mark-Jonson Dominus E<lt>mjd@pobox.comE<gt> wrote C<BTree.pm> which was  Mark-Jonson Dominus E<lt>mjd@pobox.comE<gt> wrote C<BTree.pm> which was

Legend:
Removed from v.9  
changed lines
  Added in v.14

  ViewVC Help
Powered by ViewVC 1.1.26