3 |
use 5.008004; |
use 5.008004; |
4 |
use strict; |
use strict; |
5 |
use warnings; |
use warnings; |
6 |
|
use HTML::Entities; |
7 |
|
|
8 |
our $VERSION = '0.01'; |
our $VERSION = '0.03'; |
9 |
|
|
10 |
=head1 NAME |
=head1 NAME |
11 |
|
|
92 |
Data => { "path" => { |
Data => { "path" => { |
93 |
"t" => "title of document", |
"t" => "title of document", |
94 |
"f" => 99, |
"f" => 99, |
95 |
|
}, |
96 |
}, |
}, |
97 |
Insert => 1, |
Insert => 1, |
98 |
Append => 1, |
Append => 1, |
306 |
|
|
307 |
Returns number of nodes in created tree. |
Returns number of nodes in created tree. |
308 |
|
|
309 |
|
There is also longer version if you want to recode your data charset |
310 |
|
into different one (probably UTF-8): |
311 |
|
|
312 |
|
$root->to_jsfind('/full/path/to/index/dir/','ISO-8859-2','UTF-8'); |
313 |
|
|
314 |
|
Destination encoding is UTF-8 by default, so you don't have to specify it. |
315 |
|
|
316 |
|
$root->to_jsfind('/full/path/to/index/dir/','WINDOWS-1250'); |
317 |
|
|
318 |
=cut |
=cut |
319 |
|
|
320 |
|
my $iconv; |
321 |
|
my $iconv_l1; |
322 |
|
|
323 |
sub to_jsfind { |
sub to_jsfind { |
324 |
my $self = shift; |
my $self = shift; |
325 |
|
|
326 |
my $path = shift || confess "to_jsfind need path to your index!"; |
my $path = shift || confess "to_jsfind need path to your index!"; |
327 |
|
|
328 |
|
my ($from_cp,$to_cp) = @_; |
329 |
|
|
330 |
|
$to_cp ||= 'UTF-8'; |
331 |
|
|
332 |
|
if ($from_cp && $to_cp) { |
333 |
|
$iconv = Text::Iconv->new($from_cp,$to_cp); |
334 |
|
} |
335 |
|
$iconv_l1 = Text::Iconv->new('ISO-8859-1',$to_cp); |
336 |
|
|
337 |
$path .= "/" if ($path =~ /\/$/); |
$path .= "/" if ($path =~ /\/$/); |
338 |
carp "can't create index in '$path': $!" if (! -w $path); |
#carp "creating directory for index '$path'" if (! -w $path); |
339 |
|
|
340 |
return $self->root->to_jsfind($path,"0"); |
return $self->root->to_jsfind($path,"0"); |
341 |
} |
} |
346 |
$_[0] cmp $_[1]; |
$_[0] cmp $_[1]; |
347 |
} |
} |
348 |
|
|
349 |
|
=head3 _recode |
350 |
|
|
351 |
|
This is internal function to recode charset. |
352 |
|
|
353 |
|
It will also try to decode HTML::Entities in data. |
354 |
|
|
355 |
|
=cut |
356 |
|
|
357 |
|
sub _recode { |
358 |
|
my $self = shift; |
359 |
|
my $text = shift || return; |
360 |
|
|
361 |
|
sub _decode_html_entities { |
362 |
|
my $data = shift || return; |
363 |
|
$data = $iconv_l1->convert(decode_entities($data)) || croak "entity decode problem: $data"; |
364 |
|
} |
365 |
|
|
366 |
|
if ($iconv) { |
367 |
|
$text = $iconv->convert($text) || $text && carp "convert problem: $text"; |
368 |
|
$text =~ s/(\&\w+;)/_decode_html_entities($1)/ges; |
369 |
|
} |
370 |
|
|
371 |
|
return $text; |
372 |
|
} |
373 |
|
|
374 |
##################################################################### |
##################################################################### |
375 |
|
|
376 |
=head2 jsFind::Node methods |
=head2 jsFind::Node methods |
395 |
|
|
396 |
use Carp; |
use Carp; |
397 |
use File::Path; |
use File::Path; |
398 |
|
use Text::Iconv; |
399 |
|
|
400 |
|
use base 'jsFind'; |
401 |
|
|
402 |
my $KEYS = 0; |
my $KEYS = 0; |
403 |
my $DATA = 1; |
my $DATA = 1; |
741 |
$dot; |
$dot; |
742 |
} |
} |
743 |
|
|
744 |
|
=head3 to_xml |
745 |
|
|
746 |
|
Escape <, >, & and ", and to produce valid XML |
747 |
|
|
748 |
|
=cut |
749 |
|
|
750 |
|
my %escape = ('<'=>'<', '>'=>'>', '&'=>'&', '"'=>'"'); |
751 |
|
my $escape_re = join '|' => keys %escape; |
752 |
|
|
753 |
|
sub to_xml { |
754 |
|
my $self = shift || confess "you should call to_xml as object!"; |
755 |
|
|
756 |
|
my $d = shift || return; |
757 |
|
$d = $self->SUPER::_recode($d); |
758 |
|
confess "escape_re undefined!" unless ($escape_re); |
759 |
|
$d =~ s/($escape_re)/$escape{$1}/g; |
760 |
|
return $d; |
761 |
|
} |
762 |
|
|
763 |
=head3 to_jsfind |
=head3 to_jsfind |
764 |
|
|
765 |
Create jsFind xml files |
Create jsFind xml files |
766 |
|
|
767 |
my $nr=$tree->to_dot('/path/to/index','0'); |
my $nr=$tree->to_jsfind('/path/to/index','0'); |
768 |
|
|
769 |
Returns number of elements created |
Returns number of elements created |
770 |
|
|
771 |
=cut |
=cut |
772 |
|
|
773 |
|
|
774 |
sub to_jsfind { |
sub to_jsfind { |
775 |
my $self = shift; |
my $self = shift; |
776 |
my ($path,$file) = @_; |
my ($path,$file) = @_; |
777 |
|
|
778 |
return 0 if $self->is_empty; |
return 0 if $self->is_empty; |
779 |
|
|
780 |
|
confess("path is undefined.") unless ($path); |
781 |
|
confess("file is undefined. Did you call \$t->root->to_jsfind(..) instead of \$t->to_jsfind(..) ?") unless (defined($file)); |
782 |
|
|
783 |
my $nr_keys = 0; |
my $nr_keys = 0; |
784 |
|
|
785 |
my ($k, $d, $s) = @$self; |
my ($k, $d, $s) = @$self; |
791 |
my $key = lc($k->[$i]); |
my $key = lc($k->[$i]); |
792 |
|
|
793 |
if ($key) { |
if ($key) { |
794 |
$key_xml .= qq{<k>$key</k>}; |
$key_xml .= '<k>'.$self->to_xml($key).'</k>'; |
795 |
$data_xml .= qq{<e>}; |
$data_xml .= '<e>'; |
796 |
#use Data::Dumper; |
#use Data::Dumper; |
797 |
#print Dumper($d->[$i]); |
#print Dumper($d->[$i]); |
798 |
foreach my $path (keys %{$d->[$i]}) { |
foreach my $path (keys %{$d->[$i]}) { |
799 |
$data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.($d->[$i]->{$path}->{'t'} || 'no title').'">'.$path.'</l>'; |
$data_xml .= '<l f="'.($d->[$i]->{$path}->{'f'} || 1).'" t="'.$self->to_xml($d->[$i]->{$path}->{'t'} || 'no title').'">'.$self->to_xml($path).'</l>'; |
800 |
$nr_keys++; |
$nr_keys++; |
801 |
} |
} |
802 |
$data_xml .= qq{</e>}; |
$data_xml .= '</e>'; |
803 |
} |
} |
804 |
|
|
805 |
$nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]); |
$nr_keys += $s->[$i]->to_jsfind("$path/$file","$i") if ($s->[$i]); |
806 |
} |
} |
807 |
|
|
808 |
$key_xml .= "</n>"; |
$key_xml .= '</n>'; |
809 |
$data_xml .= "</d>"; |
$data_xml .= '</d>'; |
810 |
|
|
811 |
if (! -e $path) { |
if (! -e $path) { |
812 |
mkpath($path) || croak "can't create dir '$path': $!"; |
mkpath($path) || croak "can't create dir '$path': $!"; |