jsFind/trunk/jsFind.pm

package jsFind;

use 5.008004;
use strict;
use warnings;

our $VERSION = '0.01';

=head1 NAME

jsFind - generate index for jsFind using B-Tree

=head1 SYNOPSIS

  use jsFind;


=head1 DESCRIPTION

This module can be used to create index files for jsFind, powerful tool for
adding a search engine to a CDROM archive or catalog without requiring the
user to install anything. 

Main difference between this module and scripts delivered with jsFind are:

=over 5

=item *

You don't need to use swish-e to create index

=item *

You can programatically (and incrementaly) create index for jsFind

=back

=head1 METHODS

This module contains two packages C<jsFind> and C<jsFind::Node>.

=head2 jsFind methods

=cut

use Exporter 'import';
use Carp;

our @ISA = qw(Exporter);

BEGIN {
        import 'jsFind::Node';
}

=head3 new

Create new tree. Arguments are C<B> which is maximum numbers of keys in
each node and optional C<Root> node. Each root node may have child nodes.

All nodes are objects from C<jsFind::Node>.

 my $t = new jsFind(B => 4);

=cut

my $DEBUG = 1;

sub new {
  my $package = shift;
  my %ARGV = @_;
  croak "Usage: {$package}::new(B => number [, Root => root node ])"
      unless exists $ARGV{B};
  if ($ARGV{B} % 2) {
    my $B = $ARGV{B} + 1;
    carp "B must be an even number.  Using $B instead.";
    $ARGV{B} = $B;
  }
    
  my $B = $ARGV{B};
  my $Root = exists($ARGV{Root}) ? $ARGV{Root} : jsFind::Node->emptynode;
  bless { B => $B, Root => $Root } => $package;
}

=head3 B_search

Search, insert, append or replace data in B-Tree

 $t->B_search(
        Key => 'key value',
        Data => { "path" => {
                        "t" => "title of document",
                        "f" => 99,
                },
        Insert => 1,
        Append => 1,
 );

Semantics:

If key not found, insert it iff C<Insert> argument is present.

If key B<is> found, replace existing data iff C<Replace> argument
is present or add new datum to existing iff C<Append> argument is present.

=cut

sub B_search {
  my $self = shift;
  my %args = @_;
  my $cur_node = $self->root;
  my $k = $args{Key};
  my $d = $args{Data};
  my @path;

  if ($cur_node->is_empty) {    # Special case for empty root
    if ($args{Insert}) {
      $cur_node->kdp_insert($k => $d);
      return $d;
    } else {
      return undef;
    }
  }

  # Descend tree to leaf
  for (;;) {

    # Didn't hit bottom yet.

    my($there, $where) = $cur_node->locate_key($k);
    if ($there) {               # Found it!
      if ($args{Replace}) {
        $cur_node->kdp_replace($where, $k => $d);
      } elsif ($args{Append}) {
        $cur_node->kdp_append($where, $k => $d);
      }
      return $cur_node->data($where);
    }
    
    # Not here---must be in a subtree.
    
    if ($cur_node->is_leaf) {   # But there are no subtrees
      return undef unless $args{Insert}; # Search failed
      # Stuff it in
      $cur_node->kdp_insert($k => $d);
      if ($self->node_overfull($cur_node)) { # Oops--there was no room.
        $self->split_and_promote($cur_node, @path);
      } 
      return $d;
    }

    # There are subtrees, and the key is in one of them.

    push @path, [$cur_node, $where];    # Record path from root.

    # Move down to search the subtree
    $cur_node = $cur_node->subnode($where);

    # and start over.
  }                             # for (;;) ...

  croak ("How did I get here?");
}


sub split_and_promote_old {
  my $self = shift;
  my ($cur_node, @path) = @_;
  
  for (;;) {
    my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2);
    my ($up, $where) = @{pop @path};
    if ($up) {
      $up->kdp_insert(@$kdp);
      my ($tthere, $twhere) = $up->locate_key($kdp->[0]);
      croak "Couldn't find key `$kdp->[0]' in node after just inserting it!"
          unless $tthere;
      croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!"
          unless $twhere == $where;
      $up->subnode($where,   $newleft);
      $up->subnode($where+1, $newright);
      return unless $self->node_overfull($up);
      $cur_node = $up;
    } else { # We're at the top; make a new root.
      my $newroot = new jsFind::Node ([$kdp->[0]], 
                                     [$kdp->[1]], 
                                     [$newleft, $newright]);
      $self->root($newroot);
      return;
    }
  }
  
}

sub split_and_promote {
  my $self = shift;
  my ($cur_node, @path) = @_;
  
  for (;;) {
    my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2);
    my ($up, $where) = @{pop @path} if (@path);
    if ($up) {
      $up->kdp_insert(@$kdp);
      if ($DEBUG) {
        my ($tthere, $twhere) = $up->locate_key($kdp->[0]);
        croak "Couldn't find key `$kdp->[0]' in node after just inserting it!"
          unless $tthere;
        croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!"
          unless $twhere == $where;
      }
      $up->subnode($where,   $newleft);
      $up->subnode($where+1, $newright);
      return unless $self->node_overfull($up);
      $cur_node = $up;
    } else { # We're at the top; make a new root.
      my $newroot = new jsFind::Node([$kdp->[0]], 
                                     [$kdp->[1]], 
                                     [$newleft, $newright]);
      $self->root($newroot);
      return;
    }
  }
}

=head3 B

Return B (maximum number of keys)

 my $max_size = $t->B;

=cut

sub B {
  $_[0]{B};
}

=head3 root

Returns root node

 my $root = $t->root;

=cut

sub root {
  my ($self, $newroot) = @_;
  $self->{Root} = $newroot if defined $newroot;
  $self->{Root};
}

=head3 node_overfull

Returns if node is overfull

 if ($node->node_overfull) { something }

=cut

sub node_overfull {
  my $self = shift;
  my $node = shift;
  $node->size > $self->B;
}

=head3 to_string

Returns your tree as formatted string.

 my $text = $root->to_string;

Mostly usefull for debugging as output leaves much to be desired.

=cut

sub to_string {
  $_[0]->root->to_string;
}

=head3 to_dot

Create Graphviz graph of your tree

 my $dot_graph = $root->to_dot;

=cut

sub to_dot {
        my $self = shift;

        my $dot = qq/digraph dns {\nrankdir=LR;\n/;
        $dot .= $self->root->to_dot;
        $dot .= qq/\n}\n/;

        return $dot;
}

=head3 to_jsfind

Create xml index files for jsFind. This should be called after
your B-Tree has been filled with data.

 $root->to_jsfind('/full/path/to/index/dir/');

Returns number of nodes in created tree.

=cut

sub to_jsfind {
        my $self = shift;

        my $path = shift || confess "to_jsfind need path to your index!";

        $path .= "/" if ($path =~ /\/$/);
        carp "can't create index in '$path': $!" if (! -w $path);

        return $self->root->to_jsfind($path,"0");
}


# private, default cmd function
sub default_cmp {
  $_[0] cmp $_[1];
}

#####################################################################

=head2 jsFind::Node methods

Each node has C<k> key-data pairs, with C<B> <= C<k> <= C<2B>, and 
each has C<k+1> subnodes, which might be null.

The node is a blessed reference to a list with three elements:

  ($keylist, $datalist, $subnodelist)

each is a reference to a list list.

The null node is represented by a blessed reference to an empty list.

=cut

package jsFind::Node;

use warnings;
use strict;

use Carp;
use File::Path;

my $KEYS = 0;
my $DATA = 1;
my $SUBNODES = 2;

=head3 new

Create New node

 my $node = new jsFind::Node ($keylist, $datalist, $subnodelist);

You can also mit argument list to create empty node.

 my $empty_node = new jsFind::Node;

=cut

sub new {
  my $self = shift;
  my $package = ref $self || $self;
  croak "Internal error:  jsFind::Node::new called with wrong number of arguments."
      unless @_ == 3 || @_ == 0;
  bless [@_] => $package;
}

=head3 locate_key

Locate key in node using linear search. This should probably be replaced
by binary search for better performance.

 my ($found, $index) = $node->locate_key($key, $cmp_coderef);

Argument C<$cmp_coderef> is optional reference to custom comparison
operator.

Returns (1, $index) if $key[$index] eq $key.

Returns (0, $index) if key could be found in $subnode[$index].

In scalar context, just returns 1 or 0.

=cut

sub locate_key {
  # Use linear search for testing, replace with binary search.
  my $self = shift;
  my $key = shift;
  my $cmp = shift || \&jsFind::default_cmp;
  my $i;
  my $cmp_result;
  my $N = $self->size;
  for ($i = 0; $i < $N; $i++) {
    $cmp_result = &$cmp($key, $self->key($i));
    last if $cmp_result <= 0;
  }
  
  # $i is now the index of the first node-key greater than $key
  # or $N if there is no such.  $cmp_result is 0 iff the key was found.
  (!$cmp_result, $i);
}


=head3 emptynode

Creates new empty node

 $node = $root->emptynode;
 $new_node = $node->emptynode;

=cut

sub emptynode {
  new($_[0]);                   # Pass package name, but not anything else.
}

=head3 is_empty

Test if node is empty

 if ($node->is_empty) { something }

=cut

# undef is empty; so is a blessed empty list.
sub is_empty {
  my $self = shift;
  !defined($self) || $#$self < 0;
}

=head3 key

Return C<$i>th key from node

 my $key = $node->key($i);

=cut

sub key {
#  my ($self, $n) = @_;
#  $self->[$KEYS][$n];

#       speedup
   $_[0]->[$KEYS][$_[1]];
}

=head3 data

Return C<$i>th data from node

 my $data = $node->data($i);

=cut

sub data {
  my ($self, $n) = @_;
  $self->[$DATA][$n];
}

=head3 kdp_replace

Set key data pair for C<$i>th element in node

 $node->kdp_replace($i, "key value" => {
        "data key 1" => "data value 1",
        "data key 2" => "data value 2",
 };

=cut

sub kdp_replace {
  my ($self, $n, $k => $d) = @_;
  if (defined $k) {
    $self->[$KEYS][$n] = $k;
    $self->[$DATA][$n] = $d;
  }
  [$self->[$KEYS][$n], 
   $self->[$DATA][$n]];
}

=head3 kdp_insert

 # No return value.

=cut

sub kdp_insert {
  my $self = shift;
  my ($k => $d) = @_;
  my ($there, $where) = $self->locate_key($k) unless $self->is_empty;

  if ($there) { croak("Tried to insert `$k => $d' into node where `$k' was already present."); }

  # undef fix
  $where ||= 0;

  splice(@{$self->[$KEYS]}, $where, 0, $k);
  splice(@{$self->[$DATA]}, $where, 0, $d);
  splice(@{$self->[$SUBNODES]}, $where, 0, undef);
}

=head3 kdp_append

Adds new data keys and values to C<$i>th element in node

 $node->kdp_append($i, "key value" => {
        "added data key" => "added data value",
 };

=cut

sub kdp_append {
  my ($self, $n, $k => $d) = @_;
  if (defined $k) {
    $self->[$KEYS][$n] = $k;
    my ($kv,$dv) = %{$d};
    $self->[$DATA][$n]->{$kv} = $dv;
  }
  [$self->[$KEYS][$n], 
   $self->[$DATA][$n]];
}

=head3 subnode

Set new or return existing subnode

 # return 4th subnode
 my $my_node = $node->subnode(4);

 # create new subnode 5 from $my_node
 $node->subnode(5, $my_node);

=cut

sub subnode {
  my ($self, $n, $newnode) = @_;
  $self->[$SUBNODES][$n] = $newnode if defined $newnode;
  $self->[$SUBNODES][$n];
}

=head3 is_leaf

Test if node is leaf

 if ($node->is_leaf) { something }

=cut

sub is_leaf {
  my $self = shift;
  ! defined $self->[$SUBNODES][0]; # undefined subnode means leaf node.
}

=head3 size

Return number of keys in the node

 my $nr = $node->size;

=cut

sub size {
  my $self = shift;
  return scalar(@{$self->[$KEYS]});
}

=head3 halves

 # Accept an index $n
 # Divide into two nodes so that keys 0 .. $n-1 are in one node
 # and keys $n+1 ... $size are in the other.

=cut

sub halves {
  my $self = shift;
  my $n = shift;
  my $s = $self->size;
  my @right;
  my @left;

  $left[$KEYS] = [@{$self->[$KEYS]}[0 .. $n-1]];
  $left[$DATA] = [@{$self->[$DATA]}[0 .. $n-1]];
  $left[$SUBNODES] = [@{$self->[$SUBNODES]}[0 .. $n]];

  $right[$KEYS] = [@{$self->[$KEYS]}[$n+1 .. $s-1]];
  $right[$DATA] = [@{$self->[$DATA]}[$n+1 .. $s-1]];
  $right[$SUBNODES] = [@{$self->[$SUBNODES]}[$n+1 .. $s]];

  my @middle = ($self->[$KEYS][$n], $self->[$DATA][$n]);

  ($self->new(@left), $self->new(@right), \@middle);
}

=head3 to_string

Dumps tree as string

 my $str = $root->to_string;

=cut

sub to_string {
  my $self = shift;
  my $indent = shift || 0;
  my $I = ' ' x $indent;
  return '' if $self->is_empty;
  my ($k, $d, $s) = @$self;
  my $result = '';
  $result .= defined($s->[0]) ? $s->[0]->to_string($indent+2) : '';
  my $N = $self->size;
  my $i;
  for ($i = 0; $i < $N; $i++) {
#    $result .= $I . "$k->[$i] => $d->[$i]\n";
    $result .= $I . "$k->[$i]\n";
    $result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+2) : '';
  }
  $result;
}

=begin comment

use Data::Dumper;

sub to_string {
  my $self = shift;
  my $indent = shift || 0;
  my $path = shift || '0';
  return '' if $self->is_empty;
  my ($k, $d, $s) = @$self;
  my $result = '';
  $result .= defined($s->[0]) ? $s->[0]->to_string($indent+1,"$path/0") : '';
  my $N = $self->size;
  for (my $i = 0; $i < $N; $i++) {
        my $dump = Dumper($d->[$i]);
        $dump =~ s/[\n\r\s]+/ /gs;
        $dump =~ s/\$VAR1\s*=\s*//;
    $result .= sprintf("%-5s [%2d] %2s: %s => %s\n", $path, $i, $indent, $k->[$i], $dump);
    $result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+1,"$path/$i") : '';
  }
  $result;
}

=end comment

=head3 to_dot

Recursivly walk nodes of tree

=cut

sub to_dot {
        my $self = shift;
        my $parent = shift;

        return '' if $self->is_empty;

        my $dot = '';

        my ($k, $d, $s) = @$self;
        my $N = $self->size;

        my @dot_keys;

        my $node_name = $parent || '_';
        $node_name =~ s/\W+//g;
        $node_name .= " [$N]";

        for (my $i = 0; $i <= $N; $i++) {
                if (my $key = $k->[$i]) {
                        push @dot_keys, qq{<$i>$key};
                }
                $dot .= $s->[$i]->to_dot(qq{"$node_name":$i}) if ($s->[$i]);
        }
        push @dot_keys, qq{<$N>...} if (! $self->is_leaf);

        my $label = join("|",@dot_keys);
        $dot .= qq{"$node_name" [ shape=record, label="$label" ];\n};

        $dot .= qq{$parent -> "$node_name";\n} if ($parent);

        $dot;
}

=head3 to_jsfind

Create jsFind xml files

 my $nr=$tree->to_dot('/path/to/index','0');

Returns number of elements created

=cut

sub to_jsfind {
        my $self = shift;
        my ($path,$file) = @_;

        return 0 if $self->is_empty;

        my $nr_keys = 0;

        my ($k, $d, $s) = @$self;
        my $N = $self->size;

        my ($key_xml, $data_xml) = ("<n>","<d>");

        for (my $i = 0; $i <= $N; $i++) {
                my $key = lc($k->[$i]);

                if ($key) {
                        $key_xml .= qq{<k>$key</k>};
                        $data_xml .= qq{<e>};
        #use Data::Dumper;
        #print Dumper($d->[$i]);
                        foreach my $path (keys %{$d->[$i]}) {
                                $data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.($d->[$i]->{$path}->{'t'} || 'no title').'">'.$path.'</l>';
                                $nr_keys++;
                        }
                        $data_xml .= qq{</e>};
                }

                $nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]);
        }

        $key_xml .= "</n>";
        $data_xml .= "</d>";

        if (! -e $path) {
                mkpath($path) || croak "can't create dir '$path': $!";
        }

        open(K, "> ${path}/${file}.xml") || croak "can't open '$path/$file.xml': $!";
        open(D, "> ${path}/_${file}.xml") || croak "can't open '$path/_$file.xml': $!";

        print K $key_xml;
        print D $data_xml;

        close(K);
        close(D);

        return $nr_keys;
}

1;
__END__

=head1 SEE ALSO

jsFind web site L<http://www.elucidsoft.net/projects/jsfind/>

B-Trees in perl web site L<http://perl.plover.com/BTree/>

=head1 AUTHORS

Mark-Jonson Dominus E<lt>mjd@pobox.comE<gt> wrote C<BTree.pm> which was
base for this module

Shawn P. Garbett E<lt>shawn@elucidsoft.netE<gt> wrote jsFind

Dobrica Pavlinusic E<lt>dpavlin@rot13.orgE<gt> wrote this module

=head1 COPYRIGHT AND LICENSE

Copyright (C) 2004 by Dobrica Pavlinusic

This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the Free
Software Foundation; either version 2 of the License, or (at your option)
any later version. This program is distributed in the hope that it will be
useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License for more details.

=cut
1	package jsFind;
2
3	use 5.008004;
4	use strict;
5	use warnings;
6
7	our $VERSION = '0.01';
8
9	=head1 NAME
10
11	jsFind - generate index for jsFind using B-Tree
12
13	=head1 SYNOPSIS
14
15	use jsFind;
16
17
18
19	=head1 DESCRIPTION
20
21	This module can be used to create index files for jsFind, powerful tool for
22	adding a search engine to a CDROM archive or catalog without requiring the
23	user to install anything.
24
25	Main difference between this module and scripts delivered with jsFind are:
26
27	=over 5
28
29	=item *
30
31	You don't need to use swish-e to create index
32
33	=item *
34
35	You can programatically (and incrementaly) create index for jsFind
36
37	=back
38
39	=head1 METHODS
40
41	This module contains two packages C<jsFind> and C<jsFind::Node>.
42
43	=head2 jsFind methods
44
45	=cut
46
47	use Exporter 'import';
48	use Carp;
49
50	our @ISA = qw(Exporter);
51
52	BEGIN {
53	import 'jsFind::Node';
54	}
55
56	=head3 new
57
58	Create new tree. Arguments are C<B> which is maximum numbers of keys in
59	each node and optional C<Root> node. Each root node may have child nodes.
60
61	All nodes are objects from C<jsFind::Node>.
62
63	my $t = new jsFind(B => 4);
64
65	=cut
66
67	my $DEBUG = 1;
68
69	sub new {
70	my $package = shift;
71	my %ARGV = @_;
72	croak "Usage: {$package}::new(B => number [, Root => root node ])"
73	unless exists $ARGV{B};
74	if ($ARGV{B} % 2) {
75	my $B = $ARGV{B} + 1;
76	carp "B must be an even number. Using $B instead.";
77	$ARGV{B} = $B;
78	}
79
80	my $B = $ARGV{B};
81	my $Root = exists($ARGV{Root}) ? $ARGV{Root} : jsFind::Node->emptynode;
82	bless { B => $B, Root => $Root } => $package;
83	}
84
85	=head3 B_search
86
87	Search, insert, append or replace data in B-Tree
88
89	$t->B_search(
90	Key => 'key value',
91	Data => { "path" => {
92	"t" => "title of document",
93	"f" => 99,
94	},
95	Insert => 1,
96	Append => 1,
97	);
98
99	Semantics:
100
101	If key not found, insert it iff C<Insert> argument is present.
102
103	If key B<is> found, replace existing data iff C<Replace> argument
104	is present or add new datum to existing iff C<Append> argument is present.
105
106	=cut
107
108	sub B_search {
109	my $self = shift;
110	my %args = @_;
111	my $cur_node = $self->root;
112	my $k = $args{Key};
113	my $d = $args{Data};
114	my @path;
115
116	if ($cur_node->is_empty) { # Special case for empty root
117	if ($args{Insert}) {
118	$cur_node->kdp_insert($k => $d);
119	return $d;
120	} else {
121	return undef;
122	}
123	}
124
125	# Descend tree to leaf
126	for (;;) {
127
128	# Didn't hit bottom yet.
129
130	my($there, $where) = $cur_node->locate_key($k);
131	if ($there) { # Found it!
132	if ($args{Replace}) {
133	$cur_node->kdp_replace($where, $k => $d);
134	} elsif ($args{Append}) {
135	$cur_node->kdp_append($where, $k => $d);
136	}
137	return $cur_node->data($where);
138	}
139
140	# Not here---must be in a subtree.
141
142	if ($cur_node->is_leaf) { # But there are no subtrees
143	return undef unless $args{Insert}; # Search failed
144	# Stuff it in
145	$cur_node->kdp_insert($k => $d);
146	if ($self->node_overfull($cur_node)) { # Oops--there was no room.
147	$self->split_and_promote($cur_node, @path);
148	}
149	return $d;
150	}
151
152	# There are subtrees, and the key is in one of them.
153
154	push @path, [$cur_node, $where]; # Record path from root.
155
156	# Move down to search the subtree
157	$cur_node = $cur_node->subnode($where);
158
159	# and start over.
160	} # for (;;) ...
161
162	croak ("How did I get here?");
163	}
164
165
166
167	sub split_and_promote_old {
168	my $self = shift;
169	my ($cur_node, @path) = @_;
170
171	for (;;) {
172	my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2);
173	my ($up, $where) = @{pop @path};
174	if ($up) {
175	$up->kdp_insert(@$kdp);
176	my ($tthere, $twhere) = $up->locate_key($kdp->[0]);
177	croak "Couldn't find key `$kdp->[0]' in node after just inserting it!"
178	unless $tthere;
179	croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!"
180	unless $twhere == $where;
181	$up->subnode($where, $newleft);
182	$up->subnode($where+1, $newright);
183	return unless $self->node_overfull($up);
184	$cur_node = $up;
185	} else { # We're at the top; make a new root.
186	my $newroot = new jsFind::Node ([$kdp->[0]],
187	[$kdp->[1]],
188	[$newleft, $newright]);
189	$self->root($newroot);
190	return;
191	}
192	}
193
194	}
195
196	sub split_and_promote {
197	my $self = shift;
198	my ($cur_node, @path) = @_;
199
200	for (;;) {
201	my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2);
202	my ($up, $where) = @{pop @path} if (@path);
203	if ($up) {
204	$up->kdp_insert(@$kdp);
205	if ($DEBUG) {
206	my ($tthere, $twhere) = $up->locate_key($kdp->[0]);
207	croak "Couldn't find key `$kdp->[0]' in node after just inserting it!"
208	unless $tthere;
209	croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!"
210	unless $twhere == $where;
211	}
212	$up->subnode($where, $newleft);
213	$up->subnode($where+1, $newright);
214	return unless $self->node_overfull($up);
215	$cur_node = $up;
216	} else { # We're at the top; make a new root.
217	my $newroot = new jsFind::Node([$kdp->[0]],
218	[$kdp->[1]],
219	[$newleft, $newright]);
220	$self->root($newroot);
221	return;
222	}
223	}
224	}
225
226	=head3 B
227
228	Return B (maximum number of keys)
229
230	my $max_size = $t->B;
231
232	=cut
233
234	sub B {
235	$_[0]{B};
236	}
237
238	=head3 root
239
240	Returns root node
241
242	my $root = $t->root;
243
244	=cut
245
246	sub root {
247	my ($self, $newroot) = @_;
248	$self->{Root} = $newroot if defined $newroot;
249	$self->{Root};
250	}
251
252	=head3 node_overfull
253
254	Returns if node is overfull
255
256	if ($node->node_overfull) { something }
257
258	=cut
259
260	sub node_overfull {
261	my $self = shift;
262	my $node = shift;
263	$node->size > $self->B;
264	}
265
266	=head3 to_string
267
268	Returns your tree as formatted string.
269
270	my $text = $root->to_string;
271
272	Mostly usefull for debugging as output leaves much to be desired.
273
274	=cut
275
276	sub to_string {
277	$_[0]->root->to_string;
278	}
279
280	=head3 to_dot
281
282	Create Graphviz graph of your tree
283
284	my $dot_graph = $root->to_dot;
285
286	=cut
287
288	sub to_dot {
289	my $self = shift;
290
291	my $dot = qq/digraph dns {\nrankdir=LR;\n/;
292	$dot .= $self->root->to_dot;
293	$dot .= qq/\n}\n/;
294
295	return $dot;
296	}
297
298	=head3 to_jsfind
299
300	Create xml index files for jsFind. This should be called after
301	your B-Tree has been filled with data.
302
303	$root->to_jsfind('/full/path/to/index/dir/');
304
305	Returns number of nodes in created tree.
306
307	=cut
308
309	sub to_jsfind {
310	my $self = shift;
311
312	my $path = shift \|\| confess "to_jsfind need path to your index!";
313
314	$path .= "/" if ($path =~ /\/$/);
315	carp "can't create index in '$path': $!" if (! -w $path);
316
317	return $self->root->to_jsfind($path,"0");
318	}
319
320
321	# private, default cmd function
322	sub default_cmp {
323	$_[0] cmp $_[1];
324	}
325
326	#####################################################################
327
328	=head2 jsFind::Node methods
329
330	Each node has C<k> key-data pairs, with C<B> <= C<k> <= C<2B>, and
331	each has C<k+1> subnodes, which might be null.
332
333	The node is a blessed reference to a list with three elements:
334
335	($keylist, $datalist, $subnodelist)
336
337	each is a reference to a list list.
338
339	The null node is represented by a blessed reference to an empty list.
340
341	=cut
342
343	package jsFind::Node;
344
345	use warnings;
346	use strict;
347
348	use Carp;
349	use File::Path;
350
351	my $KEYS = 0;
352	my $DATA = 1;
353	my $SUBNODES = 2;
354
355	=head3 new
356
357	Create New node
358
359	my $node = new jsFind::Node ($keylist, $datalist, $subnodelist);
360
361	You can also mit argument list to create empty node.
362
363	my $empty_node = new jsFind::Node;
364
365	=cut
366
367	sub new {
368	my $self = shift;
369	my $package = ref $self \|\| $self;
370	croak "Internal error: jsFind::Node::new called with wrong number of arguments."
371	unless @_ == 3 \|\| @_ == 0;
372	bless [@_] => $package;
373	}
374
375	=head3 locate_key
376
377	Locate key in node using linear search. This should probably be replaced
378	by binary search for better performance.
379
380	my ($found, $index) = $node->locate_key($key, $cmp_coderef);
381
382	Argument C<$cmp_coderef> is optional reference to custom comparison
383	operator.
384
385	Returns (1, $index) if $key[$index] eq $key.
386
387	Returns (0, $index) if key could be found in $subnode[$index].
388
389	In scalar context, just returns 1 or 0.
390
391	=cut
392
393	sub locate_key {
394	# Use linear search for testing, replace with binary search.
395	my $self = shift;
396	my $key = shift;
397	my $cmp = shift \|\| \&jsFind::default_cmp;
398	my $i;
399	my $cmp_result;
400	my $N = $self->size;
401	for ($i = 0; $i < $N; $i++) {
402	$cmp_result = &$cmp($key, $self->key($i));
403	last if $cmp_result <= 0;
404	}
405
406	# $i is now the index of the first node-key greater than $key
407	# or $N if there is no such. $cmp_result is 0 iff the key was found.
408	(!$cmp_result, $i);
409	}
410
411
412	=head3 emptynode
413
414	Creates new empty node
415
416	$node = $root->emptynode;
417	$new_node = $node->emptynode;
418
419	=cut
420
421	sub emptynode {
422	new($_[0]); # Pass package name, but not anything else.
423	}
424
425	=head3 is_empty
426
427	Test if node is empty
428
429	if ($node->is_empty) { something }
430
431	=cut
432
433	# undef is empty; so is a blessed empty list.
434	sub is_empty {
435	my $self = shift;
436	!defined($self) \|\| $#$self < 0;
437	}
438
439	=head3 key
440
441	Return C<$i>th key from node
442
443	my $key = $node->key($i);
444
445	=cut
446
447	sub key {
448	# my ($self, $n) = @_;
449	# $self->[$KEYS][$n];
450
451	# speedup
452	$_[0]->[$KEYS][$_[1]];
453	}
454
455	=head3 data
456
457	Return C<$i>th data from node
458
459	my $data = $node->data($i);
460
461	=cut
462
463	sub data {
464	my ($self, $n) = @_;
465	$self->[$DATA][$n];
466	}
467
468	=head3 kdp_replace
469
470	Set key data pair for C<$i>th element in node
471
472	$node->kdp_replace($i, "key value" => {
473	"data key 1" => "data value 1",
474	"data key 2" => "data value 2",
475	};
476
477	=cut
478
479	sub kdp_replace {
480	my ($self, $n, $k => $d) = @_;
481	if (defined $k) {
482	$self->[$KEYS][$n] = $k;
483	$self->[$DATA][$n] = $d;
484	}
485	[$self->[$KEYS][$n],
486	$self->[$DATA][$n]];
487	}
488
489	=head3 kdp_insert
490
491	# No return value.
492
493	=cut
494
495	sub kdp_insert {
496	my $self = shift;
497	my ($k => $d) = @_;
498	my ($there, $where) = $self->locate_key($k) unless $self->is_empty;
499
500	if ($there) { croak("Tried to insert `$k => $d' into node where `$k' was already present."); }
501
502	# undef fix
503	$where \|\|= 0;
504
505	splice(@{$self->[$KEYS]}, $where, 0, $k);
506	splice(@{$self->[$DATA]}, $where, 0, $d);
507	splice(@{$self->[$SUBNODES]}, $where, 0, undef);
508	}
509
510	=head3 kdp_append
511
512	Adds new data keys and values to C<$i>th element in node
513
514	$node->kdp_append($i, "key value" => {
515	"added data key" => "added data value",
516	};
517
518	=cut
519
520	sub kdp_append {
521	my ($self, $n, $k => $d) = @_;
522	if (defined $k) {
523	$self->[$KEYS][$n] = $k;
524	my ($kv,$dv) = %{$d};
525	$self->[$DATA][$n]->{$kv} = $dv;
526	}
527	[$self->[$KEYS][$n],
528	$self->[$DATA][$n]];
529	}
530
531	=head3 subnode
532
533	Set new or return existing subnode
534
535	# return 4th subnode
536	my $my_node = $node->subnode(4);
537
538	# create new subnode 5 from $my_node
539	$node->subnode(5, $my_node);
540
541	=cut
542
543	sub subnode {
544	my ($self, $n, $newnode) = @_;
545	$self->[$SUBNODES][$n] = $newnode if defined $newnode;
546	$self->[$SUBNODES][$n];
547	}
548
549	=head3 is_leaf
550
551	Test if node is leaf
552
553	if ($node->is_leaf) { something }
554
555	=cut
556
557	sub is_leaf {
558	my $self = shift;
559	! defined $self->[$SUBNODES][0]; # undefined subnode means leaf node.
560	}
561
562	=head3 size
563
564	Return number of keys in the node
565
566	my $nr = $node->size;
567
568	=cut
569
570	sub size {
571	my $self = shift;
572	return scalar(@{$self->[$KEYS]});
573	}
574
575	=head3 halves
576
577	# Accept an index $n
578	# Divide into two nodes so that keys 0 .. $n-1 are in one node
579	# and keys $n+1 ... $size are in the other.
580
581	=cut
582
583	sub halves {
584	my $self = shift;
585	my $n = shift;
586	my $s = $self->size;
587	my @right;
588	my @left;
589
590	$left[$KEYS] = [@{$self->[$KEYS]}[0 .. $n-1]];
591	$left[$DATA] = [@{$self->[$DATA]}[0 .. $n-1]];
592	$left[$SUBNODES] = [@{$self->[$SUBNODES]}[0 .. $n]];
593
594	$right[$KEYS] = [@{$self->[$KEYS]}[$n+1 .. $s-1]];
595	$right[$DATA] = [@{$self->[$DATA]}[$n+1 .. $s-1]];
596	$right[$SUBNODES] = [@{$self->[$SUBNODES]}[$n+1 .. $s]];
597
598	my @middle = ($self->[$KEYS][$n], $self->[$DATA][$n]);
599
600	($self->new(@left), $self->new(@right), \@middle);
601	}
602
603	=head3 to_string
604
605	Dumps tree as string
606
607	my $str = $root->to_string;
608
609	=cut
610
611	sub to_string {
612	my $self = shift;
613	my $indent = shift \|\| 0;
614	my $I = ' ' x $indent;
615	return '' if $self->is_empty;
616	my ($k, $d, $s) = @$self;
617	my $result = '';
618	$result .= defined($s->[0]) ? $s->[0]->to_string($indent+2) : '';
619	my $N = $self->size;
620	my $i;
621	for ($i = 0; $i < $N; $i++) {
622	# $result .= $I . "$k->[$i] => $d->[$i]\n";
623	$result .= $I . "$k->[$i]\n";
624	$result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+2) : '';
625	}
626	$result;
627	}
628
629	=begin comment
630
631	use Data::Dumper;
632
633	sub to_string {
634	my $self = shift;
635	my $indent = shift \|\| 0;
636	my $path = shift \|\| '0';
637	return '' if $self->is_empty;
638	my ($k, $d, $s) = @$self;
639	my $result = '';
640	$result .= defined($s->[0]) ? $s->[0]->to_string($indent+1,"$path/0") : '';
641	my $N = $self->size;
642	for (my $i = 0; $i < $N; $i++) {
643	my $dump = Dumper($d->[$i]);
644	$dump =~ s/[\n\r\s]+/ /gs;
645	$dump =~ s/\$VAR1\s=\s//;
646	$result .= sprintf("%-5s [%2d] %2s: %s => %s\n", $path, $i, $indent, $k->[$i], $dump);
647	$result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+1,"$path/$i") : '';
648	}
649	$result;
650	}
651
652	=end comment
653
654	=head3 to_dot
655
656	Recursivly walk nodes of tree
657
658	=cut
659
660	sub to_dot {
661	my $self = shift;
662	my $parent = shift;
663
664	return '' if $self->is_empty;
665
666	my $dot = '';
667
668	my ($k, $d, $s) = @$self;
669	my $N = $self->size;
670
671	my @dot_keys;
672
673	my $node_name = $parent \|\| '_';
674	$node_name =~ s/\W+//g;
675	$node_name .= " [$N]";
676
677	for (my $i = 0; $i <= $N; $i++) {
678	if (my $key = $k->[$i]) {
679	push @dot_keys, qq{<$i>$key};
680	}
681	$dot .= $s->[$i]->to_dot(qq{"$node_name":$i}) if ($s->[$i]);
682	}
683	push @dot_keys, qq{<$N>...} if (! $self->is_leaf);
684
685	my $label = join("\|",@dot_keys);
686	$dot .= qq{"$node_name" [ shape=record, label="$label" ];\n};
687
688	$dot .= qq{$parent -> "$node_name";\n} if ($parent);
689
690	$dot;
691	}
692
693	=head3 to_jsfind
694
695	Create jsFind xml files
696
697	my $nr=$tree->to_dot('/path/to/index','0');
698
699	Returns number of elements created
700
701	=cut
702
703	sub to_jsfind {
704	my $self = shift;
705	my ($path,$file) = @_;
706
707	return 0 if $self->is_empty;
708
709	my $nr_keys = 0;
710
711	my ($k, $d, $s) = @$self;
712	my $N = $self->size;
713
714	my ($key_xml, $data_xml) = ("<n>","<d>");
715
716	for (my $i = 0; $i <= $N; $i++) {
717	my $key = lc($k->[$i]);
718
719	if ($key) {
720	$key_xml .= qq{<k>$key</k>};
721	$data_xml .= qq{<e>};
722	#use Data::Dumper;
723	#print Dumper($d->[$i]);
724	foreach my $path (keys %{$d->[$i]}) {
725	$data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} \|\| 1).'" t="'.($d->[$i]->{$path}->{'t'} \|\| 'no title').'">'.$path.'</l>';
726	$nr_keys++;
727	}
728	$data_xml .= qq{</e>};
729	}
730
731	$nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]);
732	}
733
734	$key_xml .= "</n>";
735	$data_xml .= "</d>";
736
737	if (! -e $path) {
738	mkpath($path) \|\| croak "can't create dir '$path': $!";
739	}
740
741	open(K, "> ${path}/${file}.xml") \|\| croak "can't open '$path/$file.xml': $!";
742	open(D, "> ${path}/_${file}.xml") \|\| croak "can't open '$path/_$file.xml': $!";
743
744	print K $key_xml;
745	print D $data_xml;
746
747	close(K);
748	close(D);
749
750	return $nr_keys;
751	}
752
753	1;
754	__END__
755
756	=head1 SEE ALSO
757
758	jsFind web site L<http://www.elucidsoft.net/projects/jsfind/>
759
760	B-Trees in perl web site L<http://perl.plover.com/BTree/>
761
762	=head1 AUTHORS
763
764	Mark-Jonson Dominus E<lt>mjd@pobox.comE<gt> wrote C<BTree.pm> which was
765	base for this module
766
767	Shawn P. Garbett E<lt>shawn@elucidsoft.netE<gt> wrote jsFind
768
769	Dobrica Pavlinusic E<lt>dpavlin@rot13.orgE<gt> wrote this module
770
771	=head1 COPYRIGHT AND LICENSE
772
773	Copyright (C) 2004 by Dobrica Pavlinusic
774
775	This program is free software; you can redistribute it and/or modify it
776	under the terms of the GNU General Public License as published by the Free
777	Software Foundation; either version 2 of the License, or (at your option)
778	any later version. This program is distributed in the hope that it will be
779	useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
780	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
781	Public License for more details.
782
783	=cut