17 |
my $p = 0; |
my $p = 0; |
18 |
my $text = ''; |
my $text = ''; |
19 |
my %words; |
my %words; |
20 |
my $max = 2; |
my $word_count = 0; |
21 |
|
my $max_words = 100; |
22 |
|
|
23 |
my $res; |
my $res; |
24 |
|
|
25 |
|
my $full_text; |
26 |
|
|
27 |
ok(open(U, "t/homer.txt"), "open homer.txt"); |
ok(open(U, "t/homer.txt"), "open homer.txt"); |
28 |
while(<U>) { |
while(<U>) { |
29 |
chomp; |
chomp; |
30 |
if (/^\s*$/) { |
if (/^\s*$/) { |
31 |
$p++; |
$p++; |
32 |
|
|
33 |
|
$full_text .= "\n" if ($full_text); |
34 |
|
$full_text = "paragraph $p\n\n"; |
35 |
|
|
36 |
diag "paragraph $p"; |
diag "paragraph $p"; |
37 |
|
|
38 |
my %usage; |
my %usage; |
39 |
foreach (split(/\s+/,lc($text))) { |
|
40 |
|
my @words = split(/\s+/,lc($text)); |
41 |
|
|
42 |
|
foreach (@words) { |
43 |
$usage{$_}++; |
$usage{$_}++; |
44 |
} |
} |
45 |
|
|
46 |
foreach my $word (keys %usage) { |
foreach my $word (@words) { |
47 |
|
|
48 |
$words{$word} += $usage{$word}; |
$words{$word} += $usage{$word}; |
49 |
|
|
59 |
Insert => 1, |
Insert => 1, |
60 |
Append => 1, |
Append => 1, |
61 |
); |
); |
62 |
|
|
63 |
|
$word_count++; |
64 |
|
|
65 |
|
$full_text .= "$word "; |
66 |
|
|
67 |
|
last if ($max_words && $word_count >= $max_words); |
68 |
} |
} |
69 |
|
|
70 |
$text = ''; |
$text = ''; |
71 |
|
|
72 |
last if ($max && $max == $p); |
last if ($max_words && $word_count >= $max_words); |
73 |
} else { |
} else { |
74 |
$text .= $_; |
$text .= $_; |
75 |
} |
} |
91 |
# $sum += $words{$w}; |
# $sum += $words{$w}; |
92 |
#} |
#} |
93 |
|
|
94 |
|
if (open(T,"> homer_text.txt")) { |
95 |
|
print T $full_text; |
96 |
|
close(T); |
97 |
|
} |
98 |
|
diag "homer_text.txt created"; |
99 |
|
|
100 |
|
if (open(T,"> homer_words.txt")) { |
101 |
|
print T $t->to_string; |
102 |
|
close(T); |
103 |
|
} |
104 |
|
diag "homer_words.txt created"; |
105 |
|
|
106 |
my $total_words = scalar keys %words; |
my $total_words = scalar keys %words; |
107 |
|
|
108 |
cmp_ok($t->to_jsfind("./html/homer"), '==', $total_words, " jsfind index with $total_words words"); |
cmp_ok($t->to_jsfind("./html/homer"), '==', $total_words, " jsfind index with $total_words words"); |