3 |
use strict; |
use strict; |
4 |
use warnings; |
use warnings; |
5 |
|
|
6 |
use Test::More tests => 5; |
use Test::More tests => 7; |
7 |
use blib; |
use blib; |
8 |
use jsFind; |
use jsFind; |
9 |
use Data::Dumper; |
use Data::Dumper; |
10 |
|
|
11 |
BEGIN { use_ok('jsFind'); } |
BEGIN { use_ok('jsFind'); } |
12 |
|
|
13 |
my $t = new jsFind B => 100; |
my $t = new jsFind B => 10; |
14 |
|
|
15 |
ok(-e "t/homer.txt", "Homer: The Odyssey preface"); |
ok(-e "t/homer.txt", "Homer: The Odyssey preface"); |
16 |
|
|
17 |
my $p = 0; |
my $line = 0; |
18 |
my $text = ''; |
my $text = ''; |
19 |
my %words; |
my %words_usage; |
20 |
my $max = 2; |
my $word_count = 0; |
21 |
|
my $max_words; |
22 |
|
#$max_words = 100; |
23 |
|
|
24 |
my $res; |
my $res; |
25 |
|
|
26 |
|
my $full_text; |
27 |
|
|
28 |
ok(open(U, "t/homer.txt"), "open homer.txt"); |
ok(open(U, "t/homer.txt"), "open homer.txt"); |
29 |
while(<U>) { |
while(<U>) { |
30 |
chomp; |
chomp; |
31 |
if (/^\s*$/) { |
$line++; |
32 |
$p++; |
next if (/^\s*$/); |
33 |
|
|
34 |
|
$full_text = "$line: "; |
35 |
|
|
36 |
|
my %usage; |
37 |
|
|
38 |
|
my @words = split(/\s+/,lc($_)); |
39 |
|
|
40 |
diag "paragraph $p"; |
foreach (@words) { |
41 |
|
$usage{$_}++; |
42 |
|
} |
43 |
|
|
44 |
my %usage; |
foreach my $word (@words) { |
|
foreach (split(/\s+/,lc($text))) { |
|
|
$usage{$_}++; |
|
|
} |
|
45 |
|
|
46 |
foreach my $word (keys %usage) { |
next if ($word eq ''); |
47 |
|
|
48 |
$words{$word} += $usage{$word}; |
$words_usage{"$word $line"} = $usage{$word}; |
49 |
|
|
50 |
$res->{$word}->{$p} = $usage{$word}; |
$res->{$word}->{$line} = $usage{$word}; |
51 |
|
|
52 |
$t->B_search( |
$t->B_search( |
53 |
Key => $word, |
Key => $word, |
54 |
Data => { "$p" => { |
Data => { "$line" => { |
55 |
t => "Ulyss paragraph $p", |
t => "Odyssey line $line", |
56 |
f => $usage{$word}, |
f => $usage{$word}, |
|
}, |
|
57 |
}, |
}, |
58 |
Insert => 1, |
}, |
59 |
Append => 1, |
Insert => 1, |
60 |
); |
Append => 1, |
61 |
} |
); |
62 |
|
|
63 |
$text = ''; |
$word_count++; |
64 |
|
|
65 |
last if ($max && $max == $p); |
$full_text .= "$word "; |
66 |
} else { |
|
|
$text .= $_; |
|
67 |
} |
} |
68 |
|
|
69 |
|
$full_text = "\n"; |
70 |
|
|
71 |
|
last if ($max_words && $word_count >= $max_words); |
72 |
} |
} |
73 |
|
|
74 |
my $test_data = Dumper($res); |
my $test_data = Dumper($res); |
75 |
$test_data =~ s/=>/:/gs; |
$test_data =~ s/=>/:/gs; |
76 |
$test_data =~ s/\$VAR1/var test_data/; |
$test_data =~ s/\$VAR1/var test_data/; |
77 |
open(JS, "> html/test_data.js") || die "can't open test_data.js: $!"; |
ok(open(JS, "> html/test_data.js"), "test_data.js"); |
78 |
print JS $test_data; |
print JS $test_data; |
79 |
close(JS); |
close(JS); |
80 |
|
|
81 |
ok($test_data, "test_data saved"); |
ok($test_data, "test_data saved"); |
82 |
|
|
83 |
#my $sum = 0; |
my $sum = 0; |
84 |
#foreach my $w (keys %words) { |
ok(open(TD, "> homer_freq.txt"), "homer_freq.txt"); |
85 |
# print STDERR "$w: $words{$w}\n"; |
foreach my $w (keys %words_usage) { |
86 |
# $sum += $words{$w}; |
print TD "$w: $words_usage{$w}\n"; |
87 |
#} |
$sum += $words_usage{$w}; |
88 |
|
} |
89 |
|
close(TD); |
90 |
|
diag "homer_freq.txt created"; |
91 |
|
|
92 |
my $total_words = scalar keys %words; |
if (open(T,"> homer_text.txt")) { |
93 |
|
print T $full_text; |
94 |
|
close(T); |
95 |
|
} |
96 |
|
diag "homer_text.txt created"; |
97 |
|
|
98 |
|
if (open(T,"> homer_words.txt")) { |
99 |
|
print T $t->to_string; |
100 |
|
close(T); |
101 |
|
} |
102 |
|
diag "homer_words.txt created"; |
103 |
|
|
104 |
|
my $total_words = scalar keys %words_usage; |
105 |
|
|
106 |
cmp_ok($t->to_jsfind("./html/homer"), '==', $total_words, " jsfind index with $total_words words"); |
cmp_ok($t->to_jsfind("./html/homer"), '==', $total_words, " jsfind index with $total_words words"); |
107 |
|
|
108 |
|
#print Dumper($t); |