/[jsFind]/trunk/jsFind.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/jsFind.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 33 - (hide annotations)
Sun Oct 10 05:10:25 2004 UTC (19 years, 7 months ago) by dpavlin
File size: 20093 byte(s)
Version 0.05: much better documentation, no API change

1 dpavlin 1 package jsFind;
2    
3 dpavlin 14 use 5.005;
4 dpavlin 1 use strict;
5     use warnings;
6 dpavlin 11 use HTML::Entities;
7 dpavlin 1
8 dpavlin 33 our $VERSION = '0.05';
9 dpavlin 1
10 dpavlin 12 use Exporter 'import';
11     use Carp;
12    
13     our @ISA = qw(Exporter);
14    
15     BEGIN {
16     import 'jsFind::Node';
17     }
18    
19 dpavlin 1 =head1 NAME
20    
21 dpavlin 33 jsFind - generate index for full text search engine in JavaScript
22 dpavlin 1
23     =head1 SYNOPSIS
24    
25     use jsFind;
26 dpavlin 12 my $t = new jsFind(B => 4);
27     my $f = 1;
28     foreach my $k (qw{minima ut dolorem sapiente voluptatem}) {
29     $t->B_search(Key => $k,
30     Data => {
31     "path" => {
32     t => "word $k",
33     f => $f },
34     },
35     Insert => 1,
36     Append => 1,
37     );
38     }
39 dpavlin 1
40     =head1 DESCRIPTION
41    
42     This module can be used to create index files for jsFind, powerful tool for
43     adding a search engine to a CDROM archive or catalog without requiring the
44     user to install anything.
45    
46     Main difference between this module and scripts delivered with jsFind are:
47    
48     =over 5
49    
50     =item *
51    
52     You don't need to use swish-e to create index
53    
54     =item *
55    
56 dpavlin 33 you can programatically (and incrementaly) create index for jsFind
57 dpavlin 1
58 dpavlin 33 =item *
59    
60     you can create more than one index and search them using same C<search.html>
61     page
62    
63 dpavlin 1 =back
64    
65 dpavlin 14 You can also examine examples which come as tests with this module,
66 dpavlin 33 for example C<t/04words.t> or C<t/10homer.t>.
67 dpavlin 14
68 dpavlin 33 =head2 jsFind
69    
70     jsFind search engine was written by Shawn Garbett from eLucid Software.
71     The search engine itself is a small piece of JavaScript (1.2 with level 2
72     DOM). It is easily customizable to fit into a current set of HTML. This
73     JavaScript searches an XML index dataset for the appropriate links, and can
74     filter and sort the results.
75    
76     JavaScript code distributed with this module is based on version 0.0.3 which
77     was current when this module development started. Various changes where done
78     on JavaScript code to fix bugs, add features and remove warnings. For
79     complete list see C<Changes> file which comes with distribution.
80    
81     This module has been tested using C<html/test.html> with following browsers:
82    
83     =over 5
84    
85     =item Mozilla FireFox 0.8 to 1.0
86    
87     using DOM 2 C<document.implementation.createDocument>
88    
89     =item Internet Explorer 5.5 and 6.0
90    
91     using ActiveX C<Microsoft.XMLDOM> or C<MSXML2.DOMDocument>
92    
93     =item Konqueror 3.3
94    
95     using DOM 2 C<document.implementation.createDocument>
96    
97     =item Opera 7.54 (without Java)
98    
99     using experimental iframe implementation which is much slower than other methods.
100    
101     =back
102    
103     If searching doesn't work for your combination of operating system and
104     browser, please open C<html/test.html> file and wait a while. It will search sample
105     file included with distribution and report results. Reports with included
106     test debugging are welcomed.
107    
108 dpavlin 12 =head1 jsFind methods
109 dpavlin 1
110 dpavlin 12 C<jsFind> is mode implementing methods which you, the user, are going to
111     use to create indexes.
112 dpavlin 1
113 dpavlin 12 =head2 new
114 dpavlin 1
115     Create new tree. Arguments are C<B> which is maximum numbers of keys in
116     each node and optional C<Root> node. Each root node may have child nodes.
117    
118     All nodes are objects from C<jsFind::Node>.
119    
120     my $t = new jsFind(B => 4);
121    
122     =cut
123    
124     my $DEBUG = 1;
125    
126     sub new {
127     my $package = shift;
128     my %ARGV = @_;
129     croak "Usage: {$package}::new(B => number [, Root => root node ])"
130     unless exists $ARGV{B};
131     if ($ARGV{B} % 2) {
132     my $B = $ARGV{B} + 1;
133     carp "B must be an even number. Using $B instead.";
134     $ARGV{B} = $B;
135     }
136    
137     my $B = $ARGV{B};
138     my $Root = exists($ARGV{Root}) ? $ARGV{Root} : jsFind::Node->emptynode;
139     bless { B => $B, Root => $Root } => $package;
140     }
141    
142 dpavlin 12 =head2 B_search
143 dpavlin 1
144     Search, insert, append or replace data in B-Tree
145    
146 dpavlin 5 $t->B_search(
147     Key => 'key value',
148     Data => { "path" => {
149     "t" => "title of document",
150     "f" => 99,
151 dpavlin 7 },
152 dpavlin 5 },
153     Insert => 1,
154     Append => 1,
155     );
156 dpavlin 1
157     Semantics:
158    
159     If key not found, insert it iff C<Insert> argument is present.
160    
161     If key B<is> found, replace existing data iff C<Replace> argument
162     is present or add new datum to existing iff C<Append> argument is present.
163    
164     =cut
165    
166     sub B_search {
167     my $self = shift;
168     my %args = @_;
169     my $cur_node = $self->root;
170     my $k = $args{Key};
171     my $d = $args{Data};
172     my @path;
173    
174     if ($cur_node->is_empty) { # Special case for empty root
175     if ($args{Insert}) {
176     $cur_node->kdp_insert($k => $d);
177     return $d;
178     } else {
179     return undef;
180     }
181     }
182    
183     # Descend tree to leaf
184     for (;;) {
185    
186     # Didn't hit bottom yet.
187    
188     my($there, $where) = $cur_node->locate_key($k);
189     if ($there) { # Found it!
190     if ($args{Replace}) {
191     $cur_node->kdp_replace($where, $k => $d);
192     } elsif ($args{Append}) {
193     $cur_node->kdp_append($where, $k => $d);
194     }
195     return $cur_node->data($where);
196     }
197    
198     # Not here---must be in a subtree.
199    
200     if ($cur_node->is_leaf) { # But there are no subtrees
201     return undef unless $args{Insert}; # Search failed
202     # Stuff it in
203     $cur_node->kdp_insert($k => $d);
204     if ($self->node_overfull($cur_node)) { # Oops--there was no room.
205     $self->split_and_promote($cur_node, @path);
206     }
207     return $d;
208     }
209    
210     # There are subtrees, and the key is in one of them.
211    
212     push @path, [$cur_node, $where]; # Record path from root.
213    
214     # Move down to search the subtree
215     $cur_node = $cur_node->subnode($where);
216    
217     # and start over.
218     } # for (;;) ...
219    
220     croak ("How did I get here?");
221     }
222    
223    
224    
225     sub split_and_promote_old {
226     my $self = shift;
227     my ($cur_node, @path) = @_;
228    
229     for (;;) {
230     my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2);
231     my ($up, $where) = @{pop @path};
232     if ($up) {
233     $up->kdp_insert(@$kdp);
234     my ($tthere, $twhere) = $up->locate_key($kdp->[0]);
235     croak "Couldn't find key `$kdp->[0]' in node after just inserting it!"
236     unless $tthere;
237     croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!"
238     unless $twhere == $where;
239     $up->subnode($where, $newleft);
240     $up->subnode($where+1, $newright);
241     return unless $self->node_overfull($up);
242     $cur_node = $up;
243     } else { # We're at the top; make a new root.
244     my $newroot = new jsFind::Node ([$kdp->[0]],
245     [$kdp->[1]],
246     [$newleft, $newright]);
247     $self->root($newroot);
248     return;
249     }
250     }
251    
252     }
253    
254     sub split_and_promote {
255     my $self = shift;
256     my ($cur_node, @path) = @_;
257    
258     for (;;) {
259     my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2);
260     my ($up, $where) = @{pop @path} if (@path);
261     if ($up) {
262     $up->kdp_insert(@$kdp);
263     if ($DEBUG) {
264     my ($tthere, $twhere) = $up->locate_key($kdp->[0]);
265     croak "Couldn't find key `$kdp->[0]' in node after just inserting it!"
266     unless $tthere;
267     croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!"
268     unless $twhere == $where;
269     }
270     $up->subnode($where, $newleft);
271     $up->subnode($where+1, $newright);
272     return unless $self->node_overfull($up);
273     $cur_node = $up;
274     } else { # We're at the top; make a new root.
275     my $newroot = new jsFind::Node([$kdp->[0]],
276     [$kdp->[1]],
277     [$newleft, $newright]);
278     $self->root($newroot);
279     return;
280     }
281     }
282     }
283    
284 dpavlin 12 =head2 B
285 dpavlin 1
286     Return B (maximum number of keys)
287    
288     my $max_size = $t->B;
289    
290     =cut
291    
292     sub B {
293     $_[0]{B};
294     }
295    
296 dpavlin 12 =head2 root
297 dpavlin 1
298     Returns root node
299    
300     my $root = $t->root;
301    
302     =cut
303    
304     sub root {
305     my ($self, $newroot) = @_;
306     $self->{Root} = $newroot if defined $newroot;
307     $self->{Root};
308     }
309    
310 dpavlin 12 =head2 node_overfull
311 dpavlin 1
312     Returns if node is overfull
313    
314     if ($node->node_overfull) { something }
315    
316     =cut
317    
318     sub node_overfull {
319     my $self = shift;
320     my $node = shift;
321     $node->size > $self->B;
322     }
323    
324 dpavlin 12 =head2 to_string
325 dpavlin 1
326     Returns your tree as formatted string.
327    
328     my $text = $root->to_string;
329    
330     Mostly usefull for debugging as output leaves much to be desired.
331    
332     =cut
333    
334     sub to_string {
335     $_[0]->root->to_string;
336     }
337    
338 dpavlin 12 =head2 to_dot
339 dpavlin 1
340     Create Graphviz graph of your tree
341    
342     my $dot_graph = $root->to_dot;
343    
344     =cut
345    
346     sub to_dot {
347     my $self = shift;
348    
349     my $dot = qq/digraph dns {\nrankdir=LR;\n/;
350     $dot .= $self->root->to_dot;
351     $dot .= qq/\n}\n/;
352    
353     return $dot;
354     }
355    
356 dpavlin 12 =head2 to_jsfind
357 dpavlin 1
358     Create xml index files for jsFind. This should be called after
359     your B-Tree has been filled with data.
360    
361     $root->to_jsfind('/full/path/to/index/dir/');
362    
363     Returns number of nodes in created tree.
364    
365 dpavlin 9 There is also longer version if you want to recode your data charset
366     into different one (probably UTF-8):
367    
368     $root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8');
369    
370 dpavlin 11 Destination encoding is UTF-8 by default, so you don't have to specify it.
371    
372     $root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250');
373    
374 dpavlin 1 =cut
375    
376 dpavlin 9 my $iconv;
377 dpavlin 11 my $iconv_l1;
378 dpavlin 9
379 dpavlin 1 sub to_jsfind {
380     my $self = shift;
381    
382     my $path = shift || confess "to_jsfind need path to your index!";
383    
384 dpavlin 9 my ($from_cp,$to_cp) = @_;
385 dpavlin 11
386     $to_cp ||= 'UTF-8';
387    
388 dpavlin 9 if ($from_cp && $to_cp) {
389     $iconv = Text::Iconv->new($from_cp,$to_cp);
390     }
391 dpavlin 11 $iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp);
392 dpavlin 9
393 dpavlin 1 $path .= "/" if ($path =~ /\/$/);
394 dpavlin 11 #carp "creating directory for index '$path'" if (! -w $path);
395 dpavlin 1
396     return $self->root->to_jsfind($path,"0");
397     }
398    
399    
400     # private, default cmd function
401     sub default_cmp {
402     $_[0] cmp $_[1];
403     }
404    
405 dpavlin 12 =head2 _recode
406 dpavlin 9
407     This is internal function to recode charset.
408    
409 dpavlin 12 It will also try to decode entities in data using L<HTML::Entities>.
410 dpavlin 11
411 dpavlin 9 =cut
412    
413     sub _recode {
414     my $self = shift;
415     my $text = shift || return;
416    
417 dpavlin 11 sub _decode_html_entities {
418     my $data = shift || return;
419     $data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data";
420     }
421    
422 dpavlin 9 if ($iconv) {
423 dpavlin 11 $text = $iconv->convert($text) || $text && carp "convert problem: $text";
424     $text =~ s/(\&\w+;)/_decode_html_entities($1)/ges;
425 dpavlin 9 }
426 dpavlin 11
427     return $text;
428 dpavlin 9 }
429    
430 dpavlin 1 #####################################################################
431    
432 dpavlin 12 =head1 jsFind::Node methods
433 dpavlin 1
434     Each node has C<k> key-data pairs, with C<B> <= C<k> <= C<2B>, and
435     each has C<k+1> subnodes, which might be null.
436    
437     The node is a blessed reference to a list with three elements:
438    
439     ($keylist, $datalist, $subnodelist)
440    
441     each is a reference to a list list.
442    
443     The null node is represented by a blessed reference to an empty list.
444    
445     =cut
446    
447     package jsFind::Node;
448    
449     use warnings;
450     use strict;
451    
452     use Carp;
453     use File::Path;
454 dpavlin 9 use Text::Iconv;
455 dpavlin 15 use POSIX;
456 dpavlin 1
457 dpavlin 9 use base 'jsFind';
458    
459 dpavlin 1 my $KEYS = 0;
460     my $DATA = 1;
461     my $SUBNODES = 2;
462    
463 dpavlin 12 =head2 new
464 dpavlin 1
465     Create New node
466    
467     my $node = new jsFind::Node ($keylist, $datalist, $subnodelist);
468    
469     You can also mit argument list to create empty node.
470    
471     my $empty_node = new jsFind::Node;
472    
473     =cut
474    
475     sub new {
476     my $self = shift;
477     my $package = ref $self || $self;
478     croak "Internal error: jsFind::Node::new called with wrong number of arguments."
479     unless @_ == 3 || @_ == 0;
480     bless [@_] => $package;
481     }
482    
483 dpavlin 12 =head2 locate_key
484 dpavlin 1
485     Locate key in node using linear search. This should probably be replaced
486     by binary search for better performance.
487    
488     my ($found, $index) = $node->locate_key($key, $cmp_coderef);
489    
490     Argument C<$cmp_coderef> is optional reference to custom comparison
491     operator.
492    
493     Returns (1, $index) if $key[$index] eq $key.
494    
495     Returns (0, $index) if key could be found in $subnode[$index].
496    
497     In scalar context, just returns 1 or 0.
498    
499     =cut
500    
501     sub locate_key {
502     # Use linear search for testing, replace with binary search.
503     my $self = shift;
504     my $key = shift;
505     my $cmp = shift || \&jsFind::default_cmp;
506     my $i;
507     my $cmp_result;
508     my $N = $self->size;
509     for ($i = 0; $i < $N; $i++) {
510     $cmp_result = &$cmp($key, $self->key($i));
511     last if $cmp_result <= 0;
512     }
513    
514     # $i is now the index of the first node-key greater than $key
515     # or $N if there is no such. $cmp_result is 0 iff the key was found.
516     (!$cmp_result, $i);
517     }
518    
519    
520 dpavlin 12 =head2 emptynode
521 dpavlin 1
522     Creates new empty node
523    
524     $node = $root->emptynode;
525     $new_node = $node->emptynode;
526    
527     =cut
528    
529     sub emptynode {
530     new($_[0]); # Pass package name, but not anything else.
531     }
532    
533 dpavlin 12 =head2 is_empty
534 dpavlin 1
535     Test if node is empty
536    
537     if ($node->is_empty) { something }
538    
539     =cut
540    
541     # undef is empty; so is a blessed empty list.
542     sub is_empty {
543     my $self = shift;
544     !defined($self) || $#$self < 0;
545     }
546    
547 dpavlin 12 =head2 key
548 dpavlin 1
549     Return C<$i>th key from node
550    
551     my $key = $node->key($i);
552    
553     =cut
554    
555     sub key {
556     # my ($self, $n) = @_;
557     # $self->[$KEYS][$n];
558    
559     # speedup
560     $_[0]->[$KEYS][$_[1]];
561     }
562    
563 dpavlin 12 =head2 data
564 dpavlin 1
565     Return C<$i>th data from node
566    
567     my $data = $node->data($i);
568    
569     =cut
570    
571     sub data {
572     my ($self, $n) = @_;
573     $self->[$DATA][$n];
574     }
575    
576 dpavlin 12 =head2 kdp_replace
577 dpavlin 1
578     Set key data pair for C<$i>th element in node
579    
580     $node->kdp_replace($i, "key value" => {
581     "data key 1" => "data value 1",
582     "data key 2" => "data value 2",
583     };
584    
585     =cut
586    
587     sub kdp_replace {
588     my ($self, $n, $k => $d) = @_;
589     if (defined $k) {
590     $self->[$KEYS][$n] = $k;
591     $self->[$DATA][$n] = $d;
592     }
593     [$self->[$KEYS][$n],
594     $self->[$DATA][$n]];
595     }
596    
597 dpavlin 12 =head2 kdp_insert
598 dpavlin 1
599 dpavlin 12 Insert key/data pair in tree
600 dpavlin 1
601 dpavlin 12 $node->kdp_insert("key value" => "data value");
602    
603     No return value.
604    
605 dpavlin 1 =cut
606    
607     sub kdp_insert {
608     my $self = shift;
609     my ($k => $d) = @_;
610     my ($there, $where) = $self->locate_key($k) unless $self->is_empty;
611    
612     if ($there) { croak("Tried to insert `$k => $d' into node where `$k' was already present."); }
613    
614     # undef fix
615     $where ||= 0;
616    
617     splice(@{$self->[$KEYS]}, $where, 0, $k);
618     splice(@{$self->[$DATA]}, $where, 0, $d);
619     splice(@{$self->[$SUBNODES]}, $where, 0, undef);
620     }
621    
622 dpavlin 12 =head2 kdp_append
623 dpavlin 1
624     Adds new data keys and values to C<$i>th element in node
625    
626     $node->kdp_append($i, "key value" => {
627     "added data key" => "added data value",
628     };
629    
630     =cut
631    
632     sub kdp_append {
633     my ($self, $n, $k => $d) = @_;
634     if (defined $k) {
635     $self->[$KEYS][$n] = $k;
636     my ($kv,$dv) = %{$d};
637     $self->[$DATA][$n]->{$kv} = $dv;
638     }
639     [$self->[$KEYS][$n],
640     $self->[$DATA][$n]];
641     }
642    
643 dpavlin 12 =head2 subnode
644 dpavlin 1
645     Set new or return existing subnode
646    
647     # return 4th subnode
648     my $my_node = $node->subnode(4);
649    
650     # create new subnode 5 from $my_node
651     $node->subnode(5, $my_node);
652    
653     =cut
654    
655     sub subnode {
656     my ($self, $n, $newnode) = @_;
657     $self->[$SUBNODES][$n] = $newnode if defined $newnode;
658     $self->[$SUBNODES][$n];
659     }
660    
661 dpavlin 12 =head2 is_leaf
662 dpavlin 1
663     Test if node is leaf
664    
665     if ($node->is_leaf) { something }
666    
667     =cut
668    
669     sub is_leaf {
670     my $self = shift;
671     ! defined $self->[$SUBNODES][0]; # undefined subnode means leaf node.
672     }
673    
674 dpavlin 12 =head2 size
675 dpavlin 1
676     Return number of keys in the node
677    
678     my $nr = $node->size;
679    
680     =cut
681    
682     sub size {
683     my $self = shift;
684     return scalar(@{$self->[$KEYS]});
685     }
686    
687 dpavlin 12 =head2 halves
688 dpavlin 1
689 dpavlin 12 Split node into two halves so that keys C<0 .. $n-1> are in one node
690     and keys C<$n+1 ... $size> are in the other.
691 dpavlin 1
692 dpavlin 12 my ($left_node, $right_node, $kdp) = $node->halves($n);
693    
694 dpavlin 1 =cut
695    
696     sub halves {
697     my $self = shift;
698     my $n = shift;
699     my $s = $self->size;
700     my @right;
701     my @left;
702    
703     $left[$KEYS] = [@{$self->[$KEYS]}[0 .. $n-1]];
704     $left[$DATA] = [@{$self->[$DATA]}[0 .. $n-1]];
705     $left[$SUBNODES] = [@{$self->[$SUBNODES]}[0 .. $n]];
706    
707     $right[$KEYS] = [@{$self->[$KEYS]}[$n+1 .. $s-1]];
708     $right[$DATA] = [@{$self->[$DATA]}[$n+1 .. $s-1]];
709     $right[$SUBNODES] = [@{$self->[$SUBNODES]}[$n+1 .. $s]];
710    
711     my @middle = ($self->[$KEYS][$n], $self->[$DATA][$n]);
712    
713     ($self->new(@left), $self->new(@right), \@middle);
714     }
715    
716 dpavlin 12 =head2 to_string
717 dpavlin 1
718     Dumps tree as string
719    
720     my $str = $root->to_string;
721    
722     =cut
723    
724     sub to_string {
725     my $self = shift;
726     my $indent = shift || 0;
727     my $I = ' ' x $indent;
728     return '' if $self->is_empty;
729     my ($k, $d, $s) = @$self;
730     my $result = '';
731     $result .= defined($s->[0]) ? $s->[0]->to_string($indent+2) : '';
732     my $N = $self->size;
733     my $i;
734     for ($i = 0; $i < $N; $i++) {
735     # $result .= $I . "$k->[$i] => $d->[$i]\n";
736     $result .= $I . "$k->[$i]\n";
737     $result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+2) : '';
738     }
739     $result;
740     }
741    
742     =begin comment
743    
744     use Data::Dumper;
745    
746     sub to_string {
747     my $self = shift;
748     my $indent = shift || 0;
749     my $path = shift || '0';
750     return '' if $self->is_empty;
751     my ($k, $d, $s) = @$self;
752     my $result = '';
753     $result .= defined($s->[0]) ? $s->[0]->to_string($indent+1,"$path/0") : '';
754     my $N = $self->size;
755     for (my $i = 0; $i < $N; $i++) {
756     my $dump = Dumper($d->[$i]);
757     $dump =~ s/[\n\r\s]+/ /gs;
758     $dump =~ s/\$VAR1\s*=\s*//;
759     $result .= sprintf("%-5s [%2d] %2s: %s => %s\n", $path, $i, $indent, $k->[$i], $dump);
760     $result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+1,"$path/$i") : '';
761     }
762     $result;
763     }
764    
765     =end comment
766    
767 dpavlin 12 =head2 to_dot
768 dpavlin 1
769     Recursivly walk nodes of tree
770    
771     =cut
772    
773     sub to_dot {
774     my $self = shift;
775     my $parent = shift;
776    
777     return '' if $self->is_empty;
778    
779     my $dot = '';
780    
781     my ($k, $d, $s) = @$self;
782     my $N = $self->size;
783    
784     my @dot_keys;
785    
786     my $node_name = $parent || '_';
787     $node_name =~ s/\W+//g;
788     $node_name .= " [$N]";
789    
790     for (my $i = 0; $i <= $N; $i++) {
791     if (my $key = $k->[$i]) {
792     push @dot_keys, qq{<$i>$key};
793     }
794     $dot .= $s->[$i]->to_dot(qq{"$node_name":$i}) if ($s->[$i]);
795     }
796     push @dot_keys, qq{<$N>...} if (! $self->is_leaf);
797    
798     my $label = join("|",@dot_keys);
799     $dot .= qq{"$node_name" [ shape=record, label="$label" ];\n};
800    
801     $dot .= qq{$parent -> "$node_name";\n} if ($parent);
802    
803     $dot;
804     }
805    
806 dpavlin 12 =head2 to_xml
807 dpavlin 11
808     Escape <, >, & and ", and to produce valid XML
809    
810     =cut
811    
812     my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
813     my $escape_re = join '|' => keys %escape;
814    
815     sub to_xml {
816     my $self = shift || confess "you should call to_xml as object!";
817    
818     my $d = shift || return;
819     $d = $self->SUPER::_recode($d);
820     confess "escape_re undefined!" unless ($escape_re);
821     $d =~ s/($escape_re)/$escape{$1}/g;
822     return $d;
823     }
824    
825 dpavlin 15 =head2 base62
826    
827     Convert number to base62 (used for jsFind index filenames).
828    
829     my $n = $tree->base62(50);
830    
831     =cut
832    
833     sub base62 {
834     my $self = shift;
835    
836     my $value = shift;
837    
838     confess("need non-negative number") if (! defined($value) || $value < 0);
839    
840     my @digits = qw(
841     0 1 2 3 4 5 6 7 8 9
842     a b c d e f g h i j k l m n o p q r s t u v w x y z
843     A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
844     );
845    
846     my $base = scalar(@digits);
847     my $out = "";
848     my $pow = 1;
849     my $pos = 0;
850    
851    
852     if($value == 0) {
853     return "0";
854     }
855    
856     while($value > 0) {
857     $pos = $value % $base;
858     $out = $digits[$pos] . $out;
859     $value = floor($value/$base);
860     $pow *= $base;
861     }
862    
863     return $out;
864     }
865    
866 dpavlin 12 =head2 to_jsfind
867 dpavlin 1
868     Create jsFind xml files
869    
870 dpavlin 8 my $nr=$tree->to_jsfind('/path/to/index','0');
871 dpavlin 1
872     Returns number of elements created
873    
874     =cut
875    
876     sub to_jsfind {
877     my $self = shift;
878     my ($path,$file) = @_;
879    
880     return 0 if $self->is_empty;
881    
882 dpavlin 8 confess("path is undefined.") unless ($path);
883     confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file));
884    
885 dpavlin 15 $file = $self->base62($file);
886    
887 dpavlin 1 my $nr_keys = 0;
888    
889     my ($k, $d, $s) = @$self;
890     my $N = $self->size;
891    
892     my ($key_xml, $data_xml) = ("<n>","<d>");
893    
894     for (my $i = 0; $i <= $N; $i++) {
895     my $key = lc($k->[$i]);
896    
897     if ($key) {
898 dpavlin 11 $key_xml .= '<k>'.$self->to_xml($key).'</k>';
899     $data_xml .= '<e>';
900 dpavlin 1 #use Data::Dumper;
901     #print Dumper($d->[$i]);
902     foreach my $path (keys %{$d->[$i]}) {
903 dpavlin 11 $data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.$self->to_xml($d->[$i]->{$path}->{'t'} || 'no title').'">'.$self->to_xml($path).'</l>';
904 dpavlin 1 $nr_keys++;
905     }
906 dpavlin 11 $data_xml .= '</e>';
907 dpavlin 1 }
908    
909     $nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]);
910     }
911    
912 dpavlin 11 $key_xml .= '</n>';
913     $data_xml .= '</d>';
914 dpavlin 1
915     if (! -e $path) {
916     mkpath($path) || croak "can't create dir '$path': $!";
917     }
918    
919     open(K, "> ${path}/${file}.xml") || croak "can't open '$path/$file.xml': $!";
920     open(D, "> ${path}/_${file}.xml") || croak "can't open '$path/_$file.xml': $!";
921    
922 dpavlin 11 print K $key_xml;
923     print D $data_xml;
924 dpavlin 1
925     close(K);
926     close(D);
927    
928     return $nr_keys;
929     }
930    
931     1;
932     __END__
933    
934     =head1 SEE ALSO
935    
936     jsFind web site L<http://www.elucidsoft.net/projects/jsfind/>
937    
938     B-Trees in perl web site L<http://perl.plover.com/BTree/>
939    
940 dpavlin 14 This module web site L<http://www.rot13.org/~dpavlin/jsFind.html>
941    
942 dpavlin 1 =head1 AUTHORS
943    
944     Mark-Jonson Dominus E<lt>mjd@pobox.comE<gt> wrote C<BTree.pm> which was
945     base for this module
946    
947     Shawn P. Garbett E<lt>shawn@elucidsoft.netE<gt> wrote jsFind
948    
949     Dobrica Pavlinusic E<lt>dpavlin@rot13.orgE<gt> wrote this module
950    
951     =head1 COPYRIGHT AND LICENSE
952    
953     Copyright (C) 2004 by Dobrica Pavlinusic
954    
955     This program is free software; you can redistribute it and/or modify it
956     under the terms of the GNU General Public License as published by the Free
957     Software Foundation; either version 2 of the License, or (at your option)
958     any later version. This program is distributed in the hope that it will be
959     useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
960     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
961     Public License for more details.
962    
963     =cut

  ViewVC Help
Powered by ViewVC 1.1.26