/[jsFind]/trunk/jsFind.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/jsFind.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 39 - (hide annotations)
Sun Dec 19 23:26:23 2004 UTC (19 years, 4 months ago) by dpavlin
File size: 21030 byte(s)
support for older Export which doesn't export 'import'. This should make
jsFind functional on perl 5.8.2 (on Darwin for example)

1 dpavlin 1 package jsFind;
2    
3 dpavlin 14 use 5.005;
4 dpavlin 1 use strict;
5     use warnings;
6 dpavlin 11 use HTML::Entities;
7 dpavlin 1
8 dpavlin 39 our $VERSION = '0.07_01';
9 dpavlin 1
10 dpavlin 39 use Exporter;
11 dpavlin 12 use Carp;
12    
13     our @ISA = qw(Exporter);
14    
15     BEGIN {
16 dpavlin 39 Exporter::import 'jsFind::Node';
17 dpavlin 12 }
18    
19 dpavlin 1 =head1 NAME
20    
21 dpavlin 33 jsFind - generate index for full text search engine in JavaScript
22 dpavlin 1
23     =head1 SYNOPSIS
24    
25     use jsFind;
26 dpavlin 12 my $t = new jsFind(B => 4);
27     my $f = 1;
28     foreach my $k (qw{minima ut dolorem sapiente voluptatem}) {
29     $t->B_search(Key => $k,
30     Data => {
31     "path" => {
32     t => "word $k",
33     f => $f },
34     },
35     Insert => 1,
36     Append => 1,
37     );
38     }
39 dpavlin 1
40     =head1 DESCRIPTION
41    
42     This module can be used to create index files for jsFind, powerful tool for
43     adding a search engine to a CDROM archive or catalog without requiring the
44     user to install anything.
45    
46     Main difference between this module and scripts delivered with jsFind are:
47    
48     =over 5
49    
50     =item *
51    
52     You don't need to use swish-e to create index
53    
54     =item *
55    
56 dpavlin 33 you can programatically (and incrementaly) create index for jsFind
57 dpavlin 1
58 dpavlin 33 =item *
59    
60     you can create more than one index and search them using same C<search.html>
61     page
62    
63 dpavlin 1 =back
64    
65 dpavlin 14 You can also examine examples which come as tests with this module,
66 dpavlin 33 for example C<t/04words.t> or C<t/10homer.t>.
67 dpavlin 14
68 dpavlin 33 =head2 jsFind
69    
70     jsFind search engine was written by Shawn Garbett from eLucid Software.
71     The search engine itself is a small piece of JavaScript (1.2 with level 2
72     DOM). It is easily customizable to fit into a current set of HTML. This
73     JavaScript searches an XML index dataset for the appropriate links, and can
74     filter and sort the results.
75    
76     JavaScript code distributed with this module is based on version 0.0.3 which
77     was current when this module development started. Various changes where done
78     on JavaScript code to fix bugs, add features and remove warnings. For
79     complete list see C<Changes> file which comes with distribution.
80    
81     This module has been tested using C<html/test.html> with following browsers:
82    
83     =over 5
84    
85     =item Mozilla FireFox 0.8 to 1.0
86    
87     using DOM 2 C<document.implementation.createDocument>
88    
89     =item Internet Explorer 5.5 and 6.0
90    
91     using ActiveX C<Microsoft.XMLDOM> or C<MSXML2.DOMDocument>
92    
93     =item Konqueror 3.3
94    
95     using DOM 2 C<document.implementation.createDocument>
96    
97     =item Opera 7.54 (without Java)
98    
99     using experimental iframe implementation which is much slower than other methods.
100    
101     =back
102    
103     If searching doesn't work for your combination of operating system and
104     browser, please open C<html/test.html> file and wait a while. It will search sample
105     file included with distribution and report results. Reports with included
106     test debugging are welcomed.
107    
108 dpavlin 12 =head1 jsFind methods
109 dpavlin 1
110 dpavlin 12 C<jsFind> is mode implementing methods which you, the user, are going to
111     use to create indexes.
112 dpavlin 1
113 dpavlin 12 =head2 new
114 dpavlin 1
115     Create new tree. Arguments are C<B> which is maximum numbers of keys in
116     each node and optional C<Root> node. Each root node may have child nodes.
117    
118     All nodes are objects from C<jsFind::Node>.
119    
120     my $t = new jsFind(B => 4);
121    
122     =cut
123    
124     my $DEBUG = 1;
125    
126     sub new {
127     my $package = shift;
128     my %ARGV = @_;
129     croak "Usage: {$package}::new(B => number [, Root => root node ])"
130     unless exists $ARGV{B};
131     if ($ARGV{B} % 2) {
132     my $B = $ARGV{B} + 1;
133     carp "B must be an even number. Using $B instead.";
134     $ARGV{B} = $B;
135     }
136    
137     my $B = $ARGV{B};
138     my $Root = exists($ARGV{Root}) ? $ARGV{Root} : jsFind::Node->emptynode;
139     bless { B => $B, Root => $Root } => $package;
140     }
141    
142 dpavlin 12 =head2 B_search
143 dpavlin 1
144     Search, insert, append or replace data in B-Tree
145    
146 dpavlin 5 $t->B_search(
147     Key => 'key value',
148     Data => { "path" => {
149     "t" => "title of document",
150     "f" => 99,
151 dpavlin 7 },
152 dpavlin 5 },
153     Insert => 1,
154     Append => 1,
155     );
156 dpavlin 1
157     Semantics:
158    
159     If key not found, insert it iff C<Insert> argument is present.
160    
161     If key B<is> found, replace existing data iff C<Replace> argument
162     is present or add new datum to existing iff C<Append> argument is present.
163    
164     =cut
165    
166     sub B_search {
167     my $self = shift;
168     my %args = @_;
169     my $cur_node = $self->root;
170     my $k = $args{Key};
171     my $d = $args{Data};
172     my @path;
173    
174     if ($cur_node->is_empty) { # Special case for empty root
175     if ($args{Insert}) {
176     $cur_node->kdp_insert($k => $d);
177     return $d;
178     } else {
179     return undef;
180     }
181     }
182    
183     # Descend tree to leaf
184     for (;;) {
185    
186     # Didn't hit bottom yet.
187    
188     my($there, $where) = $cur_node->locate_key($k);
189     if ($there) { # Found it!
190     if ($args{Replace}) {
191     $cur_node->kdp_replace($where, $k => $d);
192     } elsif ($args{Append}) {
193     $cur_node->kdp_append($where, $k => $d);
194     }
195     return $cur_node->data($where);
196     }
197    
198     # Not here---must be in a subtree.
199    
200     if ($cur_node->is_leaf) { # But there are no subtrees
201     return undef unless $args{Insert}; # Search failed
202     # Stuff it in
203     $cur_node->kdp_insert($k => $d);
204     if ($self->node_overfull($cur_node)) { # Oops--there was no room.
205     $self->split_and_promote($cur_node, @path);
206     }
207     return $d;
208     }
209    
210     # There are subtrees, and the key is in one of them.
211    
212     push @path, [$cur_node, $where]; # Record path from root.
213    
214     # Move down to search the subtree
215     $cur_node = $cur_node->subnode($where);
216    
217     # and start over.
218     } # for (;;) ...
219    
220     croak ("How did I get here?");
221     }
222    
223    
224    
225     sub split_and_promote_old {
226     my $self = shift;
227     my ($cur_node, @path) = @_;
228    
229     for (;;) {
230     my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2);
231     my ($up, $where) = @{pop @path};
232     if ($up) {
233     $up->kdp_insert(@$kdp);
234     my ($tthere, $twhere) = $up->locate_key($kdp->[0]);
235     croak "Couldn't find key `$kdp->[0]' in node after just inserting it!"
236     unless $tthere;
237     croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!"
238     unless $twhere == $where;
239     $up->subnode($where, $newleft);
240     $up->subnode($where+1, $newright);
241     return unless $self->node_overfull($up);
242     $cur_node = $up;
243     } else { # We're at the top; make a new root.
244     my $newroot = new jsFind::Node ([$kdp->[0]],
245     [$kdp->[1]],
246     [$newleft, $newright]);
247     $self->root($newroot);
248     return;
249     }
250     }
251    
252     }
253    
254     sub split_and_promote {
255     my $self = shift;
256     my ($cur_node, @path) = @_;
257    
258     for (;;) {
259     my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2);
260     my ($up, $where) = @{pop @path} if (@path);
261     if ($up) {
262     $up->kdp_insert(@$kdp);
263     if ($DEBUG) {
264     my ($tthere, $twhere) = $up->locate_key($kdp->[0]);
265     croak "Couldn't find key `$kdp->[0]' in node after just inserting it!"
266     unless $tthere;
267     croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!"
268     unless $twhere == $where;
269     }
270     $up->subnode($where, $newleft);
271     $up->subnode($where+1, $newright);
272     return unless $self->node_overfull($up);
273     $cur_node = $up;
274     } else { # We're at the top; make a new root.
275     my $newroot = new jsFind::Node([$kdp->[0]],
276     [$kdp->[1]],
277     [$newleft, $newright]);
278     $self->root($newroot);
279     return;
280     }
281     }
282     }
283    
284 dpavlin 12 =head2 B
285 dpavlin 1
286     Return B (maximum number of keys)
287    
288     my $max_size = $t->B;
289    
290     =cut
291    
292     sub B {
293     $_[0]{B};
294     }
295    
296 dpavlin 12 =head2 root
297 dpavlin 1
298     Returns root node
299    
300     my $root = $t->root;
301    
302     =cut
303    
304     sub root {
305     my ($self, $newroot) = @_;
306     $self->{Root} = $newroot if defined $newroot;
307     $self->{Root};
308     }
309    
310 dpavlin 12 =head2 node_overfull
311 dpavlin 1
312     Returns if node is overfull
313    
314     if ($node->node_overfull) { something }
315    
316     =cut
317    
318     sub node_overfull {
319     my $self = shift;
320     my $node = shift;
321     $node->size > $self->B;
322     }
323    
324 dpavlin 12 =head2 to_string
325 dpavlin 1
326     Returns your tree as formatted string.
327    
328     my $text = $root->to_string;
329    
330     Mostly usefull for debugging as output leaves much to be desired.
331    
332     =cut
333    
334     sub to_string {
335     $_[0]->root->to_string;
336     }
337    
338 dpavlin 12 =head2 to_dot
339 dpavlin 1
340     Create Graphviz graph of your tree
341    
342     my $dot_graph = $root->to_dot;
343    
344     =cut
345    
346     sub to_dot {
347     my $self = shift;
348    
349     my $dot = qq/digraph dns {\nrankdir=LR;\n/;
350     $dot .= $self->root->to_dot;
351     $dot .= qq/\n}\n/;
352    
353     return $dot;
354     }
355    
356 dpavlin 12 =head2 to_jsfind
357 dpavlin 1
358     Create xml index files for jsFind. This should be called after
359     your B-Tree has been filled with data.
360    
361 dpavlin 35 $root->to_jsfind(
362     dir => '/full/path/to/index/dir/',
363     data_codepage => 'ISO-8859-2',
364     index_codepage => 'UTF-8',
365     output_filter => sub {
366     my $t = shift || return;
367     $t =~ s/&egrave;/e/;
368     }
369     );
370 dpavlin 1
371 dpavlin 35 All options except C<dir> are optional.
372    
373 dpavlin 1 Returns number of nodes in created tree.
374    
375 dpavlin 35 Options:
376 dpavlin 9
377 dpavlin 35 =over 4
378 dpavlin 9
379 dpavlin 35 =item dir
380 dpavlin 11
381 dpavlin 35 Full path to directory for index (which will be created if needed).
382 dpavlin 11
383 dpavlin 35 =item data_codepage
384    
385     If your imput data isn't in C<ISO-8859-1> encoding, you will have to specify
386     this option.
387    
388     =item index_codepage
389    
390     If your index encoding is not C<UTF-8> use this option.
391    
392     If you are not using supplied JavaScript search code, or your browser is
393     terribly broken and thinks that index shouldn't be in UTF-8 encoding, use
394     this option to specify encoding for created XML index.
395    
396     =item output_filter
397    
398     B<this is just draft of documentation for option which is not implemented!>
399    
400     Code ref to sub which can do modifications on resulting XML file for node.
401     Encoding of this data will be in L<index_codepage> and you have to take care
402     not to break XML structure. Calling L<xmllint> on your result index
403     (like C<t/90xmllint.t> does in this distribution) is a good idea after using
404     this option.
405    
406     This option is also right place to plug in unaccenting function using
407     L<Text::Unaccent>.
408    
409     =back
410    
411 dpavlin 1 =cut
412    
413 dpavlin 9 my $iconv;
414 dpavlin 11 my $iconv_l1;
415 dpavlin 9
416 dpavlin 1 sub to_jsfind {
417     my $self = shift;
418    
419 dpavlin 35 my %arg = @_;
420 dpavlin 1
421 dpavlin 35 confess "to_jsfind need path to your index directory !" unless ($arg{'dir'});
422 dpavlin 11
423 dpavlin 35 my $data_codepage = $arg{'data_codepage'};
424     my $index_codepage = $arg{'index_codepage'} || 'UTF-8';
425 dpavlin 11
426 dpavlin 35 # create ISO-8859-1 iconv for HTML::Entities decode
427     $iconv_l1 = Text::Iconv->new('ISO-8859-1',$index_codepage);
428    
429     # create another iconv for data
430     if ($data_codepage && $index_codepage) {
431     $iconv = Text::Iconv->new($data_codepage,$index_codepage);
432 dpavlin 9 }
433    
434 dpavlin 35 return $self->root->to_jsfind($arg{'dir'},"0");
435 dpavlin 1 }
436    
437    
438     # private, default cmd function
439     sub default_cmp {
440     $_[0] cmp $_[1];
441     }
442    
443 dpavlin 12 =head2 _recode
444 dpavlin 9
445     This is internal function to recode charset.
446    
447 dpavlin 12 It will also try to decode entities in data using L<HTML::Entities>.
448 dpavlin 11
449 dpavlin 9 =cut
450    
451     sub _recode {
452     my $self = shift;
453     my $text = shift || return;
454    
455 dpavlin 11 sub _decode_html_entities {
456     my $data = shift || return;
457     $data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data";
458     }
459    
460 dpavlin 9 if ($iconv) {
461 dpavlin 11 $text = $iconv->convert($text) || $text && carp "convert problem: $text";
462     $text =~ s/(\&\w+;)/_decode_html_entities($1)/ges;
463 dpavlin 9 }
464 dpavlin 11
465     return $text;
466 dpavlin 9 }
467    
468 dpavlin 1 #####################################################################
469    
470 dpavlin 12 =head1 jsFind::Node methods
471 dpavlin 1
472     Each node has C<k> key-data pairs, with C<B> <= C<k> <= C<2B>, and
473     each has C<k+1> subnodes, which might be null.
474    
475     The node is a blessed reference to a list with three elements:
476    
477     ($keylist, $datalist, $subnodelist)
478    
479     each is a reference to a list list.
480    
481     The null node is represented by a blessed reference to an empty list.
482    
483     =cut
484    
485     package jsFind::Node;
486    
487     use warnings;
488     use strict;
489    
490     use Carp;
491     use File::Path;
492 dpavlin 9 use Text::Iconv;
493 dpavlin 15 use POSIX;
494 dpavlin 1
495 dpavlin 9 use base 'jsFind';
496    
497 dpavlin 1 my $KEYS = 0;
498     my $DATA = 1;
499     my $SUBNODES = 2;
500    
501 dpavlin 12 =head2 new
502 dpavlin 1
503     Create New node
504    
505     my $node = new jsFind::Node ($keylist, $datalist, $subnodelist);
506    
507     You can also mit argument list to create empty node.
508    
509     my $empty_node = new jsFind::Node;
510    
511     =cut
512    
513     sub new {
514     my $self = shift;
515     my $package = ref $self || $self;
516     croak "Internal error: jsFind::Node::new called with wrong number of arguments."
517     unless @_ == 3 || @_ == 0;
518     bless [@_] => $package;
519     }
520    
521 dpavlin 12 =head2 locate_key
522 dpavlin 1
523     Locate key in node using linear search. This should probably be replaced
524     by binary search for better performance.
525    
526     my ($found, $index) = $node->locate_key($key, $cmp_coderef);
527    
528     Argument C<$cmp_coderef> is optional reference to custom comparison
529     operator.
530    
531     Returns (1, $index) if $key[$index] eq $key.
532    
533     Returns (0, $index) if key could be found in $subnode[$index].
534    
535     In scalar context, just returns 1 or 0.
536    
537     =cut
538    
539     sub locate_key {
540     # Use linear search for testing, replace with binary search.
541     my $self = shift;
542     my $key = shift;
543     my $cmp = shift || \&jsFind::default_cmp;
544     my $i;
545     my $cmp_result;
546     my $N = $self->size;
547     for ($i = 0; $i < $N; $i++) {
548     $cmp_result = &$cmp($key, $self->key($i));
549     last if $cmp_result <= 0;
550     }
551    
552     # $i is now the index of the first node-key greater than $key
553     # or $N if there is no such. $cmp_result is 0 iff the key was found.
554     (!$cmp_result, $i);
555     }
556    
557    
558 dpavlin 12 =head2 emptynode
559 dpavlin 1
560     Creates new empty node
561    
562     $node = $root->emptynode;
563     $new_node = $node->emptynode;
564    
565     =cut
566    
567     sub emptynode {
568     new($_[0]); # Pass package name, but not anything else.
569     }
570    
571 dpavlin 12 =head2 is_empty
572 dpavlin 1
573     Test if node is empty
574    
575     if ($node->is_empty) { something }
576    
577     =cut
578    
579     # undef is empty; so is a blessed empty list.
580     sub is_empty {
581     my $self = shift;
582     !defined($self) || $#$self < 0;
583     }
584    
585 dpavlin 12 =head2 key
586 dpavlin 1
587     Return C<$i>th key from node
588    
589     my $key = $node->key($i);
590    
591     =cut
592    
593     sub key {
594     # my ($self, $n) = @_;
595     # $self->[$KEYS][$n];
596    
597     # speedup
598     $_[0]->[$KEYS][$_[1]];
599     }
600    
601 dpavlin 12 =head2 data
602 dpavlin 1
603     Return C<$i>th data from node
604    
605     my $data = $node->data($i);
606    
607     =cut
608    
609     sub data {
610     my ($self, $n) = @_;
611     $self->[$DATA][$n];
612     }
613    
614 dpavlin 12 =head2 kdp_replace
615 dpavlin 1
616     Set key data pair for C<$i>th element in node
617    
618     $node->kdp_replace($i, "key value" => {
619     "data key 1" => "data value 1",
620     "data key 2" => "data value 2",
621     };
622    
623     =cut
624    
625     sub kdp_replace {
626     my ($self, $n, $k => $d) = @_;
627     if (defined $k) {
628     $self->[$KEYS][$n] = $k;
629     $self->[$DATA][$n] = $d;
630     }
631     [$self->[$KEYS][$n],
632     $self->[$DATA][$n]];
633     }
634    
635 dpavlin 12 =head2 kdp_insert
636 dpavlin 1
637 dpavlin 12 Insert key/data pair in tree
638 dpavlin 1
639 dpavlin 12 $node->kdp_insert("key value" => "data value");
640    
641     No return value.
642    
643 dpavlin 1 =cut
644    
645     sub kdp_insert {
646     my $self = shift;
647     my ($k => $d) = @_;
648     my ($there, $where) = $self->locate_key($k) unless $self->is_empty;
649    
650     if ($there) { croak("Tried to insert `$k => $d' into node where `$k' was already present."); }
651    
652     # undef fix
653     $where ||= 0;
654    
655     splice(@{$self->[$KEYS]}, $where, 0, $k);
656     splice(@{$self->[$DATA]}, $where, 0, $d);
657     splice(@{$self->[$SUBNODES]}, $where, 0, undef);
658     }
659    
660 dpavlin 12 =head2 kdp_append
661 dpavlin 1
662     Adds new data keys and values to C<$i>th element in node
663    
664     $node->kdp_append($i, "key value" => {
665     "added data key" => "added data value",
666     };
667    
668     =cut
669    
670     sub kdp_append {
671     my ($self, $n, $k => $d) = @_;
672     if (defined $k) {
673     $self->[$KEYS][$n] = $k;
674     my ($kv,$dv) = %{$d};
675     $self->[$DATA][$n]->{$kv} = $dv;
676     }
677     [$self->[$KEYS][$n],
678     $self->[$DATA][$n]];
679     }
680    
681 dpavlin 12 =head2 subnode
682 dpavlin 1
683     Set new or return existing subnode
684    
685     # return 4th subnode
686     my $my_node = $node->subnode(4);
687    
688     # create new subnode 5 from $my_node
689     $node->subnode(5, $my_node);
690    
691     =cut
692    
693     sub subnode {
694     my ($self, $n, $newnode) = @_;
695     $self->[$SUBNODES][$n] = $newnode if defined $newnode;
696     $self->[$SUBNODES][$n];
697     }
698    
699 dpavlin 12 =head2 is_leaf
700 dpavlin 1
701     Test if node is leaf
702    
703     if ($node->is_leaf) { something }
704    
705     =cut
706    
707     sub is_leaf {
708     my $self = shift;
709     ! defined $self->[$SUBNODES][0]; # undefined subnode means leaf node.
710     }
711    
712 dpavlin 12 =head2 size
713 dpavlin 1
714     Return number of keys in the node
715    
716     my $nr = $node->size;
717    
718     =cut
719    
720     sub size {
721     my $self = shift;
722     return scalar(@{$self->[$KEYS]});
723     }
724    
725 dpavlin 12 =head2 halves
726 dpavlin 1
727 dpavlin 12 Split node into two halves so that keys C<0 .. $n-1> are in one node
728     and keys C<$n+1 ... $size> are in the other.
729 dpavlin 1
730 dpavlin 12 my ($left_node, $right_node, $kdp) = $node->halves($n);
731    
732 dpavlin 1 =cut
733    
734     sub halves {
735     my $self = shift;
736     my $n = shift;
737     my $s = $self->size;
738     my @right;
739     my @left;
740    
741     $left[$KEYS] = [@{$self->[$KEYS]}[0 .. $n-1]];
742     $left[$DATA] = [@{$self->[$DATA]}[0 .. $n-1]];
743     $left[$SUBNODES] = [@{$self->[$SUBNODES]}[0 .. $n]];
744    
745     $right[$KEYS] = [@{$self->[$KEYS]}[$n+1 .. $s-1]];
746     $right[$DATA] = [@{$self->[$DATA]}[$n+1 .. $s-1]];
747     $right[$SUBNODES] = [@{$self->[$SUBNODES]}[$n+1 .. $s]];
748    
749     my @middle = ($self->[$KEYS][$n], $self->[$DATA][$n]);
750    
751     ($self->new(@left), $self->new(@right), \@middle);
752     }
753    
754 dpavlin 12 =head2 to_string
755 dpavlin 1
756     Dumps tree as string
757    
758     my $str = $root->to_string;
759    
760     =cut
761    
762     sub to_string {
763     my $self = shift;
764     my $indent = shift || 0;
765     my $I = ' ' x $indent;
766     return '' if $self->is_empty;
767     my ($k, $d, $s) = @$self;
768     my $result = '';
769     $result .= defined($s->[0]) ? $s->[0]->to_string($indent+2) : '';
770     my $N = $self->size;
771     my $i;
772     for ($i = 0; $i < $N; $i++) {
773     # $result .= $I . "$k->[$i] => $d->[$i]\n";
774     $result .= $I . "$k->[$i]\n";
775     $result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+2) : '';
776     }
777     $result;
778     }
779    
780     =begin comment
781    
782     use Data::Dumper;
783    
784     sub to_string {
785     my $self = shift;
786     my $indent = shift || 0;
787     my $path = shift || '0';
788     return '' if $self->is_empty;
789     my ($k, $d, $s) = @$self;
790     my $result = '';
791     $result .= defined($s->[0]) ? $s->[0]->to_string($indent+1,"$path/0") : '';
792     my $N = $self->size;
793     for (my $i = 0; $i < $N; $i++) {
794     my $dump = Dumper($d->[$i]);
795     $dump =~ s/[\n\r\s]+/ /gs;
796     $dump =~ s/\$VAR1\s*=\s*//;
797     $result .= sprintf("%-5s [%2d] %2s: %s => %s\n", $path, $i, $indent, $k->[$i], $dump);
798     $result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+1,"$path/$i") : '';
799     }
800     $result;
801     }
802    
803     =end comment
804    
805 dpavlin 12 =head2 to_dot
806 dpavlin 1
807     Recursivly walk nodes of tree
808    
809     =cut
810    
811     sub to_dot {
812     my $self = shift;
813     my $parent = shift;
814    
815     return '' if $self->is_empty;
816    
817     my $dot = '';
818    
819     my ($k, $d, $s) = @$self;
820     my $N = $self->size;
821    
822     my @dot_keys;
823    
824     my $node_name = $parent || '_';
825     $node_name =~ s/\W+//g;
826     $node_name .= " [$N]";
827    
828     for (my $i = 0; $i <= $N; $i++) {
829     if (my $key = $k->[$i]) {
830     push @dot_keys, qq{<$i>$key};
831     }
832     $dot .= $s->[$i]->to_dot(qq{"$node_name":$i}) if ($s->[$i]);
833     }
834     push @dot_keys, qq{<$N>...} if (! $self->is_leaf);
835    
836     my $label = join("|",@dot_keys);
837     $dot .= qq{"$node_name" [ shape=record, label="$label" ];\n};
838    
839     $dot .= qq{$parent -> "$node_name";\n} if ($parent);
840    
841     $dot;
842     }
843    
844 dpavlin 12 =head2 to_xml
845 dpavlin 11
846     Escape <, >, & and ", and to produce valid XML
847    
848     =cut
849    
850     my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
851     my $escape_re = join '|' => keys %escape;
852    
853     sub to_xml {
854     my $self = shift || confess "you should call to_xml as object!";
855    
856     my $d = shift || return;
857     $d = $self->SUPER::_recode($d);
858     confess "escape_re undefined!" unless ($escape_re);
859     $d =~ s/($escape_re)/$escape{$1}/g;
860     return $d;
861     }
862    
863 dpavlin 36 =head2 base_x
864 dpavlin 15
865 dpavlin 36 Convert number to base x (used for jsFind index filenames).
866 dpavlin 15
867 dpavlin 36 my $n = $tree->base_x(50);
868 dpavlin 15
869     =cut
870    
871 dpavlin 36 sub base_x {
872 dpavlin 15 my $self = shift;
873    
874     my $value = shift;
875    
876     confess("need non-negative number") if (! defined($value) || $value < 0);
877    
878     my @digits = qw(
879     0 1 2 3 4 5 6 7 8 9
880     a b c d e f g h i j k l m n o p q r s t u v w x y z
881     );
882    
883     my $base = scalar(@digits);
884     my $out = "";
885     my $pow = 1;
886     my $pos = 0;
887    
888    
889     if($value == 0) {
890     return "0";
891     }
892    
893     while($value > 0) {
894     $pos = $value % $base;
895     $out = $digits[$pos] . $out;
896     $value = floor($value/$base);
897     $pow *= $base;
898     }
899    
900     return $out;
901     }
902    
903 dpavlin 12 =head2 to_jsfind
904 dpavlin 1
905     Create jsFind xml files
906    
907 dpavlin 8 my $nr=$tree->to_jsfind('/path/to/index','0');
908 dpavlin 1
909     Returns number of elements created
910    
911     =cut
912    
913     sub to_jsfind {
914     my $self = shift;
915     my ($path,$file) = @_;
916    
917     return 0 if $self->is_empty;
918    
919 dpavlin 8 confess("path is undefined.") unless ($path);
920     confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file));
921    
922 dpavlin 36 $file = $self->base_x($file);
923 dpavlin 15
924 dpavlin 1 my $nr_keys = 0;
925    
926     my ($k, $d, $s) = @$self;
927     my $N = $self->size;
928    
929     my ($key_xml, $data_xml) = ("<n>","<d>");
930    
931     for (my $i = 0; $i <= $N; $i++) {
932     my $key = lc($k->[$i]);
933    
934     if ($key) {
935 dpavlin 11 $key_xml .= '<k>'.$self->to_xml($key).'</k>';
936     $data_xml .= '<e>';
937 dpavlin 1 #use Data::Dumper;
938     #print Dumper($d->[$i]);
939     foreach my $path (keys %{$d->[$i]}) {
940 dpavlin 11 $data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.$self->to_xml($d->[$i]->{$path}->{'t'} || 'no title').'">'.$self->to_xml($path).'</l>';
941 dpavlin 1 $nr_keys++;
942     }
943 dpavlin 11 $data_xml .= '</e>';
944 dpavlin 1 }
945    
946     $nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]);
947     }
948    
949 dpavlin 11 $key_xml .= '</n>';
950     $data_xml .= '</d>';
951 dpavlin 1
952     if (! -e $path) {
953     mkpath($path) || croak "can't create dir '$path': $!";
954     }
955    
956     open(K, "> ${path}/${file}.xml") || croak "can't open '$path/$file.xml': $!";
957     open(D, "> ${path}/_${file}.xml") || croak "can't open '$path/_$file.xml': $!";
958    
959 dpavlin 11 print K $key_xml;
960     print D $data_xml;
961 dpavlin 1
962     close(K);
963     close(D);
964    
965     return $nr_keys;
966     }
967    
968     1;
969     __END__
970    
971     =head1 SEE ALSO
972    
973     jsFind web site L<http://www.elucidsoft.net/projects/jsfind/>
974    
975     B-Trees in perl web site L<http://perl.plover.com/BTree/>
976    
977 dpavlin 14 This module web site L<http://www.rot13.org/~dpavlin/jsFind.html>
978    
979 dpavlin 1 =head1 AUTHORS
980    
981     Mark-Jonson Dominus E<lt>mjd@pobox.comE<gt> wrote C<BTree.pm> which was
982     base for this module
983    
984     Shawn P. Garbett E<lt>shawn@elucidsoft.netE<gt> wrote jsFind
985    
986     Dobrica Pavlinusic E<lt>dpavlin@rot13.orgE<gt> wrote this module
987    
988     =head1 COPYRIGHT AND LICENSE
989    
990     Copyright (C) 2004 by Dobrica Pavlinusic
991    
992     This program is free software; you can redistribute it and/or modify it
993     under the terms of the GNU General Public License as published by the Free
994     Software Foundation; either version 2 of the License, or (at your option)
995     any later version. This program is distributed in the hope that it will be
996     useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
997     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
998     Public License for more details.
999    
1000     =cut

  ViewVC Help
Powered by ViewVC 1.1.26