1 |
dpavlin |
15 |
#!/usr/bin/perl |
2 |
|
|
|
3 |
|
|
use strict; |
4 |
|
|
use warnings; |
5 |
|
|
|
6 |
|
|
use Test::More tests => 6; |
7 |
|
|
use blib; |
8 |
|
|
use Data::Dumper; |
9 |
|
|
|
10 |
|
|
BEGIN { use_ok('SWISH::PlusPlus'); } |
11 |
|
|
|
12 |
|
|
my $i = new SWISH::PlusPlus( |
13 |
|
|
# debug => 1, |
14 |
|
|
index_dir => 'homer', |
15 |
|
|
meta_in_body => 1, |
16 |
|
|
); |
17 |
|
|
|
18 |
|
|
my $file = shift @ARGV || 't/homer.txt'; |
19 |
|
|
|
20 |
|
|
ok(-e $file, "reading input file $file"); |
21 |
|
|
|
22 |
|
|
my $line = 0; |
23 |
|
|
my $text = ''; |
24 |
|
|
my %words_usage; |
25 |
|
|
my $word_count = 0; |
26 |
|
|
my $max_words; |
27 |
|
|
#$max_words = 100; |
28 |
|
|
|
29 |
|
|
my $res; |
30 |
|
|
|
31 |
|
|
my $full_text; |
32 |
|
|
|
33 |
|
|
ok(open(U, $file), "open $file"); |
34 |
|
|
while(<U>) { |
35 |
|
|
chomp; |
36 |
|
|
$line++; |
37 |
|
|
next if (/^\s*$/); |
38 |
|
|
|
39 |
|
|
$full_text = "$line: "; |
40 |
|
|
|
41 |
|
|
my %usage; |
42 |
|
|
|
43 |
|
|
my @words = split(/\s+/,lc($_)); |
44 |
|
|
|
45 |
|
|
foreach (@words) { |
46 |
|
|
$usage{$_}++; |
47 |
|
|
} |
48 |
|
|
|
49 |
|
|
foreach my $word (@words) { |
50 |
|
|
|
51 |
|
|
next if ($word eq ''); |
52 |
|
|
|
53 |
|
|
$words_usage{"$word $line"} = $usage{$word}; |
54 |
|
|
|
55 |
|
|
$res->{$word}->{$line} = $usage{$word}; |
56 |
|
|
|
57 |
|
|
$i->add( |
58 |
|
|
path => $word, |
59 |
|
|
title => $line, |
60 |
|
|
meta => { |
61 |
|
|
usage => $usage{$word}, |
62 |
|
|
} |
63 |
|
|
); |
64 |
|
|
|
65 |
|
|
$word_count++; |
66 |
|
|
|
67 |
|
|
$full_text .= "$word "; |
68 |
|
|
|
69 |
|
|
} |
70 |
|
|
|
71 |
|
|
$full_text = "\n"; |
72 |
|
|
|
73 |
|
|
last if ($max_words && $word_count >= $max_words); |
74 |
|
|
} |
75 |
|
|
|
76 |
|
|
diag "cwd ",$i->{'cwd'}; |
77 |
|
|
|
78 |
|
|
my $test_data = Dumper($res); |
79 |
|
|
ok(open(JS, "> homer/dump"), "homer/dump"); |
80 |
|
|
print JS $test_data; |
81 |
|
|
close(JS); |
82 |
|
|
|
83 |
|
|
ok($test_data, "test_data saved"); |
84 |
|
|
|
85 |
|
|
my $sum = 0; |
86 |
|
|
ok(open(TD, "> homer/freq.txt"), "homer/freq.txt"); |
87 |
|
|
foreach my $w (keys %words_usage) { |
88 |
|
|
print TD "$w: $words_usage{$w}\n"; |
89 |
|
|
$sum += $words_usage{$w}; |
90 |
|
|
} |
91 |
|
|
close(TD); |
92 |
|
|
diag "homer/freq.txt created"; |
93 |
|
|
|
94 |
|
|
if (open(T,"> homer/text.txt")) { |
95 |
|
|
print T $full_text; |
96 |
|
|
close(T); |
97 |
|
|
} |
98 |
|
|
diag "homer/text.txt created"; |
99 |
|
|
|
100 |
|
|
my $total_words = scalar keys %words_usage; |
101 |
|
|
|
102 |
|
|
diag "total words: $total_words"; |
103 |
|
|
|
104 |
|
|
$i->finish_update; |
105 |
|
|
|