/[jsFind]/trunk/jsFind.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/jsFind.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 15 - (hide annotations)
Sun Sep 5 17:57:21 2004 UTC (19 years, 8 months ago) by dpavlin
File size: 18605 byte(s)
version 0.04: fix bug when creating jsFind index files without first
encoding numbers in base62

1 dpavlin 1 package jsFind;
2    
3 dpavlin 14 use 5.005;
4 dpavlin 1 use strict;
5     use warnings;
6 dpavlin 11 use HTML::Entities;
7 dpavlin 1
8 dpavlin 15 our $VERSION = '0.04';
9 dpavlin 1
10 dpavlin 12 use Exporter 'import';
11     use Carp;
12    
13     our @ISA = qw(Exporter);
14    
15     BEGIN {
16     import 'jsFind::Node';
17     }
18    
19 dpavlin 1 =head1 NAME
20    
21     jsFind - generate index for jsFind using B-Tree
22    
23     =head1 SYNOPSIS
24    
25     use jsFind;
26 dpavlin 12 my $t = new jsFind(B => 4);
27     my $f = 1;
28     foreach my $k (qw{minima ut dolorem sapiente voluptatem}) {
29     $t->B_search(Key => $k,
30     Data => {
31     "path" => {
32     t => "word $k",
33     f => $f },
34     },
35     Insert => 1,
36     Append => 1,
37     );
38     }
39 dpavlin 1
40     =head1 DESCRIPTION
41    
42     This module can be used to create index files for jsFind, powerful tool for
43     adding a search engine to a CDROM archive or catalog without requiring the
44     user to install anything.
45    
46     Main difference between this module and scripts delivered with jsFind are:
47    
48     =over 5
49    
50     =item *
51    
52     You don't need to use swish-e to create index
53    
54     =item *
55    
56     You can programatically (and incrementaly) create index for jsFind
57    
58     =back
59    
60 dpavlin 14 You can also examine examples which come as tests with this module,
61     for example C<t/04words.t>.
62    
63 dpavlin 12 =head1 jsFind methods
64 dpavlin 1
65 dpavlin 12 C<jsFind> is mode implementing methods which you, the user, are going to
66     use to create indexes.
67 dpavlin 1
68 dpavlin 12 =head2 new
69 dpavlin 1
70     Create new tree. Arguments are C<B> which is maximum numbers of keys in
71     each node and optional C<Root> node. Each root node may have child nodes.
72    
73     All nodes are objects from C<jsFind::Node>.
74    
75     my $t = new jsFind(B => 4);
76    
77     =cut
78    
79     my $DEBUG = 1;
80    
81     sub new {
82     my $package = shift;
83     my %ARGV = @_;
84     croak "Usage: {$package}::new(B => number [, Root => root node ])"
85     unless exists $ARGV{B};
86     if ($ARGV{B} % 2) {
87     my $B = $ARGV{B} + 1;
88     carp "B must be an even number. Using $B instead.";
89     $ARGV{B} = $B;
90     }
91    
92     my $B = $ARGV{B};
93     my $Root = exists($ARGV{Root}) ? $ARGV{Root} : jsFind::Node->emptynode;
94     bless { B => $B, Root => $Root } => $package;
95     }
96    
97 dpavlin 12 =head2 B_search
98 dpavlin 1
99     Search, insert, append or replace data in B-Tree
100    
101 dpavlin 5 $t->B_search(
102     Key => 'key value',
103     Data => { "path" => {
104     "t" => "title of document",
105     "f" => 99,
106 dpavlin 7 },
107 dpavlin 5 },
108     Insert => 1,
109     Append => 1,
110     );
111 dpavlin 1
112     Semantics:
113    
114     If key not found, insert it iff C<Insert> argument is present.
115    
116     If key B<is> found, replace existing data iff C<Replace> argument
117     is present or add new datum to existing iff C<Append> argument is present.
118    
119     =cut
120    
121     sub B_search {
122     my $self = shift;
123     my %args = @_;
124     my $cur_node = $self->root;
125     my $k = $args{Key};
126     my $d = $args{Data};
127     my @path;
128    
129     if ($cur_node->is_empty) { # Special case for empty root
130     if ($args{Insert}) {
131     $cur_node->kdp_insert($k => $d);
132     return $d;
133     } else {
134     return undef;
135     }
136     }
137    
138     # Descend tree to leaf
139     for (;;) {
140    
141     # Didn't hit bottom yet.
142    
143     my($there, $where) = $cur_node->locate_key($k);
144     if ($there) { # Found it!
145     if ($args{Replace}) {
146     $cur_node->kdp_replace($where, $k => $d);
147     } elsif ($args{Append}) {
148     $cur_node->kdp_append($where, $k => $d);
149     }
150     return $cur_node->data($where);
151     }
152    
153     # Not here---must be in a subtree.
154    
155     if ($cur_node->is_leaf) { # But there are no subtrees
156     return undef unless $args{Insert}; # Search failed
157     # Stuff it in
158     $cur_node->kdp_insert($k => $d);
159     if ($self->node_overfull($cur_node)) { # Oops--there was no room.
160     $self->split_and_promote($cur_node, @path);
161     }
162     return $d;
163     }
164    
165     # There are subtrees, and the key is in one of them.
166    
167     push @path, [$cur_node, $where]; # Record path from root.
168    
169     # Move down to search the subtree
170     $cur_node = $cur_node->subnode($where);
171    
172     # and start over.
173     } # for (;;) ...
174    
175     croak ("How did I get here?");
176     }
177    
178    
179    
180     sub split_and_promote_old {
181     my $self = shift;
182     my ($cur_node, @path) = @_;
183    
184     for (;;) {
185     my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2);
186     my ($up, $where) = @{pop @path};
187     if ($up) {
188     $up->kdp_insert(@$kdp);
189     my ($tthere, $twhere) = $up->locate_key($kdp->[0]);
190     croak "Couldn't find key `$kdp->[0]' in node after just inserting it!"
191     unless $tthere;
192     croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!"
193     unless $twhere == $where;
194     $up->subnode($where, $newleft);
195     $up->subnode($where+1, $newright);
196     return unless $self->node_overfull($up);
197     $cur_node = $up;
198     } else { # We're at the top; make a new root.
199     my $newroot = new jsFind::Node ([$kdp->[0]],
200     [$kdp->[1]],
201     [$newleft, $newright]);
202     $self->root($newroot);
203     return;
204     }
205     }
206    
207     }
208    
209     sub split_and_promote {
210     my $self = shift;
211     my ($cur_node, @path) = @_;
212    
213     for (;;) {
214     my ($newleft, $newright, $kdp) = $cur_node->halves($self->B / 2);
215     my ($up, $where) = @{pop @path} if (@path);
216     if ($up) {
217     $up->kdp_insert(@$kdp);
218     if ($DEBUG) {
219     my ($tthere, $twhere) = $up->locate_key($kdp->[0]);
220     croak "Couldn't find key `$kdp->[0]' in node after just inserting it!"
221     unless $tthere;
222     croak "`$kdp->[0]' went into node at `$twhere' instead of expected `$where'!"
223     unless $twhere == $where;
224     }
225     $up->subnode($where, $newleft);
226     $up->subnode($where+1, $newright);
227     return unless $self->node_overfull($up);
228     $cur_node = $up;
229     } else { # We're at the top; make a new root.
230     my $newroot = new jsFind::Node([$kdp->[0]],
231     [$kdp->[1]],
232     [$newleft, $newright]);
233     $self->root($newroot);
234     return;
235     }
236     }
237     }
238    
239 dpavlin 12 =head2 B
240 dpavlin 1
241     Return B (maximum number of keys)
242    
243     my $max_size = $t->B;
244    
245     =cut
246    
247     sub B {
248     $_[0]{B};
249     }
250    
251 dpavlin 12 =head2 root
252 dpavlin 1
253     Returns root node
254    
255     my $root = $t->root;
256    
257     =cut
258    
259     sub root {
260     my ($self, $newroot) = @_;
261     $self->{Root} = $newroot if defined $newroot;
262     $self->{Root};
263     }
264    
265 dpavlin 12 =head2 node_overfull
266 dpavlin 1
267     Returns if node is overfull
268    
269     if ($node->node_overfull) { something }
270    
271     =cut
272    
273     sub node_overfull {
274     my $self = shift;
275     my $node = shift;
276     $node->size > $self->B;
277     }
278    
279 dpavlin 12 =head2 to_string
280 dpavlin 1
281     Returns your tree as formatted string.
282    
283     my $text = $root->to_string;
284    
285     Mostly usefull for debugging as output leaves much to be desired.
286    
287     =cut
288    
289     sub to_string {
290     $_[0]->root->to_string;
291     }
292    
293 dpavlin 12 =head2 to_dot
294 dpavlin 1
295     Create Graphviz graph of your tree
296    
297     my $dot_graph = $root->to_dot;
298    
299     =cut
300    
301     sub to_dot {
302     my $self = shift;
303    
304     my $dot = qq/digraph dns {\nrankdir=LR;\n/;
305     $dot .= $self->root->to_dot;
306     $dot .= qq/\n}\n/;
307    
308     return $dot;
309     }
310    
311 dpavlin 12 =head2 to_jsfind
312 dpavlin 1
313     Create xml index files for jsFind. This should be called after
314     your B-Tree has been filled with data.
315    
316     $root->to_jsfind('/full/path/to/index/dir/');
317    
318     Returns number of nodes in created tree.
319    
320 dpavlin 9 There is also longer version if you want to recode your data charset
321     into different one (probably UTF-8):
322    
323     $root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8');
324    
325 dpavlin 11 Destination encoding is UTF-8 by default, so you don't have to specify it.
326    
327     $root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250');
328    
329 dpavlin 1 =cut
330    
331 dpavlin 9 my $iconv;
332 dpavlin 11 my $iconv_l1;
333 dpavlin 9
334 dpavlin 1 sub to_jsfind {
335     my $self = shift;
336    
337     my $path = shift || confess "to_jsfind need path to your index!";
338    
339 dpavlin 9 my ($from_cp,$to_cp) = @_;
340 dpavlin 11
341     $to_cp ||= 'UTF-8';
342    
343 dpavlin 9 if ($from_cp && $to_cp) {
344     $iconv = Text::Iconv->new($from_cp,$to_cp);
345     }
346 dpavlin 11 $iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp);
347 dpavlin 9
348 dpavlin 1 $path .= "/" if ($path =~ /\/$/);
349 dpavlin 11 #carp "creating directory for index '$path'" if (! -w $path);
350 dpavlin 1
351     return $self->root->to_jsfind($path,"0");
352     }
353    
354    
355     # private, default cmd function
356     sub default_cmp {
357     $_[0] cmp $_[1];
358     }
359    
360 dpavlin 12 =head2 _recode
361 dpavlin 9
362     This is internal function to recode charset.
363    
364 dpavlin 12 It will also try to decode entities in data using L<HTML::Entities>.
365 dpavlin 11
366 dpavlin 9 =cut
367    
368     sub _recode {
369     my $self = shift;
370     my $text = shift || return;
371    
372 dpavlin 11 sub _decode_html_entities {
373     my $data = shift || return;
374     $data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data";
375     }
376    
377 dpavlin 9 if ($iconv) {
378 dpavlin 11 $text = $iconv->convert($text) || $text && carp "convert problem: $text";
379     $text =~ s/(\&\w+;)/_decode_html_entities($1)/ges;
380 dpavlin 9 }
381 dpavlin 11
382     return $text;
383 dpavlin 9 }
384    
385 dpavlin 1 #####################################################################
386    
387 dpavlin 12 =head1 jsFind::Node methods
388 dpavlin 1
389     Each node has C<k> key-data pairs, with C<B> <= C<k> <= C<2B>, and
390     each has C<k+1> subnodes, which might be null.
391    
392     The node is a blessed reference to a list with three elements:
393    
394     ($keylist, $datalist, $subnodelist)
395    
396     each is a reference to a list list.
397    
398     The null node is represented by a blessed reference to an empty list.
399    
400     =cut
401    
402     package jsFind::Node;
403    
404     use warnings;
405     use strict;
406    
407     use Carp;
408     use File::Path;
409 dpavlin 9 use Text::Iconv;
410 dpavlin 15 use POSIX;
411 dpavlin 1
412 dpavlin 9 use base 'jsFind';
413    
414 dpavlin 1 my $KEYS = 0;
415     my $DATA = 1;
416     my $SUBNODES = 2;
417    
418 dpavlin 12 =head2 new
419 dpavlin 1
420     Create New node
421    
422     my $node = new jsFind::Node ($keylist, $datalist, $subnodelist);
423    
424     You can also mit argument list to create empty node.
425    
426     my $empty_node = new jsFind::Node;
427    
428     =cut
429    
430     sub new {
431     my $self = shift;
432     my $package = ref $self || $self;
433     croak "Internal error: jsFind::Node::new called with wrong number of arguments."
434     unless @_ == 3 || @_ == 0;
435     bless [@_] => $package;
436     }
437    
438 dpavlin 12 =head2 locate_key
439 dpavlin 1
440     Locate key in node using linear search. This should probably be replaced
441     by binary search for better performance.
442    
443     my ($found, $index) = $node->locate_key($key, $cmp_coderef);
444    
445     Argument C<$cmp_coderef> is optional reference to custom comparison
446     operator.
447    
448     Returns (1, $index) if $key[$index] eq $key.
449    
450     Returns (0, $index) if key could be found in $subnode[$index].
451    
452     In scalar context, just returns 1 or 0.
453    
454     =cut
455    
456     sub locate_key {
457     # Use linear search for testing, replace with binary search.
458     my $self = shift;
459     my $key = shift;
460     my $cmp = shift || \&jsFind::default_cmp;
461     my $i;
462     my $cmp_result;
463     my $N = $self->size;
464     for ($i = 0; $i < $N; $i++) {
465     $cmp_result = &$cmp($key, $self->key($i));
466     last if $cmp_result <= 0;
467     }
468    
469     # $i is now the index of the first node-key greater than $key
470     # or $N if there is no such. $cmp_result is 0 iff the key was found.
471     (!$cmp_result, $i);
472     }
473    
474    
475 dpavlin 12 =head2 emptynode
476 dpavlin 1
477     Creates new empty node
478    
479     $node = $root->emptynode;
480     $new_node = $node->emptynode;
481    
482     =cut
483    
484     sub emptynode {
485     new($_[0]); # Pass package name, but not anything else.
486     }
487    
488 dpavlin 12 =head2 is_empty
489 dpavlin 1
490     Test if node is empty
491    
492     if ($node->is_empty) { something }
493    
494     =cut
495    
496     # undef is empty; so is a blessed empty list.
497     sub is_empty {
498     my $self = shift;
499     !defined($self) || $#$self < 0;
500     }
501    
502 dpavlin 12 =head2 key
503 dpavlin 1
504     Return C<$i>th key from node
505    
506     my $key = $node->key($i);
507    
508     =cut
509    
510     sub key {
511     # my ($self, $n) = @_;
512     # $self->[$KEYS][$n];
513    
514     # speedup
515     $_[0]->[$KEYS][$_[1]];
516     }
517    
518 dpavlin 12 =head2 data
519 dpavlin 1
520     Return C<$i>th data from node
521    
522     my $data = $node->data($i);
523    
524     =cut
525    
526     sub data {
527     my ($self, $n) = @_;
528     $self->[$DATA][$n];
529     }
530    
531 dpavlin 12 =head2 kdp_replace
532 dpavlin 1
533     Set key data pair for C<$i>th element in node
534    
535     $node->kdp_replace($i, "key value" => {
536     "data key 1" => "data value 1",
537     "data key 2" => "data value 2",
538     };
539    
540     =cut
541    
542     sub kdp_replace {
543     my ($self, $n, $k => $d) = @_;
544     if (defined $k) {
545     $self->[$KEYS][$n] = $k;
546     $self->[$DATA][$n] = $d;
547     }
548     [$self->[$KEYS][$n],
549     $self->[$DATA][$n]];
550     }
551    
552 dpavlin 12 =head2 kdp_insert
553 dpavlin 1
554 dpavlin 12 Insert key/data pair in tree
555 dpavlin 1
556 dpavlin 12 $node->kdp_insert("key value" => "data value");
557    
558     No return value.
559    
560 dpavlin 1 =cut
561    
562     sub kdp_insert {
563     my $self = shift;
564     my ($k => $d) = @_;
565     my ($there, $where) = $self->locate_key($k) unless $self->is_empty;
566    
567     if ($there) { croak("Tried to insert `$k => $d' into node where `$k' was already present."); }
568    
569     # undef fix
570     $where ||= 0;
571    
572     splice(@{$self->[$KEYS]}, $where, 0, $k);
573     splice(@{$self->[$DATA]}, $where, 0, $d);
574     splice(@{$self->[$SUBNODES]}, $where, 0, undef);
575     }
576    
577 dpavlin 12 =head2 kdp_append
578 dpavlin 1
579     Adds new data keys and values to C<$i>th element in node
580    
581     $node->kdp_append($i, "key value" => {
582     "added data key" => "added data value",
583     };
584    
585     =cut
586    
587     sub kdp_append {
588     my ($self, $n, $k => $d) = @_;
589     if (defined $k) {
590     $self->[$KEYS][$n] = $k;
591     my ($kv,$dv) = %{$d};
592     $self->[$DATA][$n]->{$kv} = $dv;
593     }
594     [$self->[$KEYS][$n],
595     $self->[$DATA][$n]];
596     }
597    
598 dpavlin 12 =head2 subnode
599 dpavlin 1
600     Set new or return existing subnode
601    
602     # return 4th subnode
603     my $my_node = $node->subnode(4);
604    
605     # create new subnode 5 from $my_node
606     $node->subnode(5, $my_node);
607    
608     =cut
609    
610     sub subnode {
611     my ($self, $n, $newnode) = @_;
612     $self->[$SUBNODES][$n] = $newnode if defined $newnode;
613     $self->[$SUBNODES][$n];
614     }
615    
616 dpavlin 12 =head2 is_leaf
617 dpavlin 1
618     Test if node is leaf
619    
620     if ($node->is_leaf) { something }
621    
622     =cut
623    
624     sub is_leaf {
625     my $self = shift;
626     ! defined $self->[$SUBNODES][0]; # undefined subnode means leaf node.
627     }
628    
629 dpavlin 12 =head2 size
630 dpavlin 1
631     Return number of keys in the node
632    
633     my $nr = $node->size;
634    
635     =cut
636    
637     sub size {
638     my $self = shift;
639     return scalar(@{$self->[$KEYS]});
640     }
641    
642 dpavlin 12 =head2 halves
643 dpavlin 1
644 dpavlin 12 Split node into two halves so that keys C<0 .. $n-1> are in one node
645     and keys C<$n+1 ... $size> are in the other.
646 dpavlin 1
647 dpavlin 12 my ($left_node, $right_node, $kdp) = $node->halves($n);
648    
649 dpavlin 1 =cut
650    
651     sub halves {
652     my $self = shift;
653     my $n = shift;
654     my $s = $self->size;
655     my @right;
656     my @left;
657    
658     $left[$KEYS] = [@{$self->[$KEYS]}[0 .. $n-1]];
659     $left[$DATA] = [@{$self->[$DATA]}[0 .. $n-1]];
660     $left[$SUBNODES] = [@{$self->[$SUBNODES]}[0 .. $n]];
661    
662     $right[$KEYS] = [@{$self->[$KEYS]}[$n+1 .. $s-1]];
663     $right[$DATA] = [@{$self->[$DATA]}[$n+1 .. $s-1]];
664     $right[$SUBNODES] = [@{$self->[$SUBNODES]}[$n+1 .. $s]];
665    
666     my @middle = ($self->[$KEYS][$n], $self->[$DATA][$n]);
667    
668     ($self->new(@left), $self->new(@right), \@middle);
669     }
670    
671 dpavlin 12 =head2 to_string
672 dpavlin 1
673     Dumps tree as string
674    
675     my $str = $root->to_string;
676    
677     =cut
678    
679     sub to_string {
680     my $self = shift;
681     my $indent = shift || 0;
682     my $I = ' ' x $indent;
683     return '' if $self->is_empty;
684     my ($k, $d, $s) = @$self;
685     my $result = '';
686     $result .= defined($s->[0]) ? $s->[0]->to_string($indent+2) : '';
687     my $N = $self->size;
688     my $i;
689     for ($i = 0; $i < $N; $i++) {
690     # $result .= $I . "$k->[$i] => $d->[$i]\n";
691     $result .= $I . "$k->[$i]\n";
692     $result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+2) : '';
693     }
694     $result;
695     }
696    
697     =begin comment
698    
699     use Data::Dumper;
700    
701     sub to_string {
702     my $self = shift;
703     my $indent = shift || 0;
704     my $path = shift || '0';
705     return '' if $self->is_empty;
706     my ($k, $d, $s) = @$self;
707     my $result = '';
708     $result .= defined($s->[0]) ? $s->[0]->to_string($indent+1,"$path/0") : '';
709     my $N = $self->size;
710     for (my $i = 0; $i < $N; $i++) {
711     my $dump = Dumper($d->[$i]);
712     $dump =~ s/[\n\r\s]+/ /gs;
713     $dump =~ s/\$VAR1\s*=\s*//;
714     $result .= sprintf("%-5s [%2d] %2s: %s => %s\n", $path, $i, $indent, $k->[$i], $dump);
715     $result .= defined($s->[$i+1]) ? $s->[$i+1]->to_string($indent+1,"$path/$i") : '';
716     }
717     $result;
718     }
719    
720     =end comment
721    
722 dpavlin 12 =head2 to_dot
723 dpavlin 1
724     Recursivly walk nodes of tree
725    
726     =cut
727    
728     sub to_dot {
729     my $self = shift;
730     my $parent = shift;
731    
732     return '' if $self->is_empty;
733    
734     my $dot = '';
735    
736     my ($k, $d, $s) = @$self;
737     my $N = $self->size;
738    
739     my @dot_keys;
740    
741     my $node_name = $parent || '_';
742     $node_name =~ s/\W+//g;
743     $node_name .= " [$N]";
744    
745     for (my $i = 0; $i <= $N; $i++) {
746     if (my $key = $k->[$i]) {
747     push @dot_keys, qq{<$i>$key};
748     }
749     $dot .= $s->[$i]->to_dot(qq{"$node_name":$i}) if ($s->[$i]);
750     }
751     push @dot_keys, qq{<$N>...} if (! $self->is_leaf);
752    
753     my $label = join("|",@dot_keys);
754     $dot .= qq{"$node_name" [ shape=record, label="$label" ];\n};
755    
756     $dot .= qq{$parent -> "$node_name";\n} if ($parent);
757    
758     $dot;
759     }
760    
761 dpavlin 12 =head2 to_xml
762 dpavlin 11
763     Escape <, >, & and ", and to produce valid XML
764    
765     =cut
766    
767     my %escape = ('<'=>'&lt;', '>'=>'&gt;', '&'=>'&amp;', '"'=>'&quot;');
768     my $escape_re = join '|' => keys %escape;
769    
770     sub to_xml {
771     my $self = shift || confess "you should call to_xml as object!";
772    
773     my $d = shift || return;
774     $d = $self->SUPER::_recode($d);
775     confess "escape_re undefined!" unless ($escape_re);
776     $d =~ s/($escape_re)/$escape{$1}/g;
777     return $d;
778     }
779    
780 dpavlin 15 =head2 base62
781    
782     Convert number to base62 (used for jsFind index filenames).
783    
784     my $n = $tree->base62(50);
785    
786     =cut
787    
788     sub base62 {
789     my $self = shift;
790    
791     my $value = shift;
792    
793     confess("need non-negative number") if (! defined($value) || $value < 0);
794    
795     my @digits = qw(
796     0 1 2 3 4 5 6 7 8 9
797     a b c d e f g h i j k l m n o p q r s t u v w x y z
798     A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
799     );
800    
801     my $base = scalar(@digits);
802     my $out = "";
803     my $pow = 1;
804     my $pos = 0;
805    
806    
807     if($value == 0) {
808     return "0";
809     }
810    
811     while($value > 0) {
812     $pos = $value % $base;
813     $out = $digits[$pos] . $out;
814     $value = floor($value/$base);
815     $pow *= $base;
816     }
817    
818     return $out;
819     }
820    
821 dpavlin 12 =head2 to_jsfind
822 dpavlin 1
823     Create jsFind xml files
824    
825 dpavlin 8 my $nr=$tree->to_jsfind('/path/to/index','0');
826 dpavlin 1
827     Returns number of elements created
828    
829     =cut
830    
831     sub to_jsfind {
832     my $self = shift;
833     my ($path,$file) = @_;
834    
835     return 0 if $self->is_empty;
836    
837 dpavlin 8 confess("path is undefined.") unless ($path);
838     confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file));
839    
840 dpavlin 15 $file = $self->base62($file);
841    
842 dpavlin 1 my $nr_keys = 0;
843    
844     my ($k, $d, $s) = @$self;
845     my $N = $self->size;
846    
847     my ($key_xml, $data_xml) = ("<n>","<d>");
848    
849     for (my $i = 0; $i <= $N; $i++) {
850     my $key = lc($k->[$i]);
851    
852     if ($key) {
853 dpavlin 11 $key_xml .= '<k>'.$self->to_xml($key).'</k>';
854     $data_xml .= '<e>';
855 dpavlin 1 #use Data::Dumper;
856     #print Dumper($d->[$i]);
857     foreach my $path (keys %{$d->[$i]}) {
858 dpavlin 11 $data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.$self->to_xml($d->[$i]->{$path}->{'t'} || 'no title').'">'.$self->to_xml($path).'</l>';
859 dpavlin 1 $nr_keys++;
860     }
861 dpavlin 11 $data_xml .= '</e>';
862 dpavlin 1 }
863    
864     $nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]);
865     }
866    
867 dpavlin 11 $key_xml .= '</n>';
868     $data_xml .= '</d>';
869 dpavlin 1
870     if (! -e $path) {
871     mkpath($path) || croak "can't create dir '$path': $!";
872     }
873    
874     open(K, "> ${path}/${file}.xml") || croak "can't open '$path/$file.xml': $!";
875     open(D, "> ${path}/_${file}.xml") || croak "can't open '$path/_$file.xml': $!";
876    
877 dpavlin 11 print K $key_xml;
878     print D $data_xml;
879 dpavlin 1
880     close(K);
881     close(D);
882    
883     return $nr_keys;
884     }
885    
886     1;
887     __END__
888    
889     =head1 SEE ALSO
890    
891     jsFind web site L<http://www.elucidsoft.net/projects/jsfind/>
892    
893     B-Trees in perl web site L<http://perl.plover.com/BTree/>
894    
895 dpavlin 14 This module web site L<http://www.rot13.org/~dpavlin/jsFind.html>
896    
897 dpavlin 1 =head1 AUTHORS
898    
899     Mark-Jonson Dominus E<lt>mjd@pobox.comE<gt> wrote C<BTree.pm> which was
900     base for this module
901    
902     Shawn P. Garbett E<lt>shawn@elucidsoft.netE<gt> wrote jsFind
903    
904     Dobrica Pavlinusic E<lt>dpavlin@rot13.orgE<gt> wrote this module
905    
906     =head1 COPYRIGHT AND LICENSE
907    
908     Copyright (C) 2004 by Dobrica Pavlinusic
909    
910     This program is free software; you can redistribute it and/or modify it
911     under the terms of the GNU General Public License as published by the Free
912     Software Foundation; either version 2 of the License, or (at your option)
913     any later version. This program is distributed in the hope that it will be
914     useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
915     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
916     Public License for more details.
917    
918     =cut

  ViewVC Help
Powered by ViewVC 1.1.26