/[jsFind]/trunk/t/10homer.t
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/t/10homer.t

Parent Directory Parent Directory | Revision Log Revision Log


Revision 37 - (hide annotations)
Sat Oct 30 21:48:31 2004 UTC (19 years, 6 months ago) by dpavlin
File MIME type: application/x-troff
File size: 1820 byte(s)
homer test can now index any text file supplied as first (and only) argument
in command line. This is useful to generate test data from other sources.
I don't recommend input files which are not plain 7-bit ASCII, because
generated JavaScript array might have wrong encoding for 8-bit characters.
This is serious problem. However, since JavaScript comparison and sort order
are locale dependent, it's much easier to use something like Text::Unaccent
on input data than to fix sort/comparison order (which could also be done,
see my js_locale project)

1 dpavlin 20 #!/usr/bin/perl
2    
3     use strict;
4     use warnings;
5    
6 dpavlin 28 use Test::More tests => 7;
7 dpavlin 20 use blib;
8     use jsFind;
9     use Data::Dumper;
10    
11     BEGIN { use_ok('jsFind'); }
12    
13 dpavlin 37 my $t = new jsFind B => 200;
14 dpavlin 20
15 dpavlin 37 my $file = shift @ARGV || 't/homer.txt';
16 dpavlin 20
17 dpavlin 37 ok(-e $file, "reading input file $file");
18    
19 dpavlin 25 my $line = 0;
20 dpavlin 20 my $text = '';
21 dpavlin 28 my %words_usage;
22 dpavlin 23 my $word_count = 0;
23 dpavlin 28 my $max_words;
24     #$max_words = 100;
25 dpavlin 20
26     my $res;
27    
28 dpavlin 23 my $full_text;
29    
30 dpavlin 37 ok(open(U, $file), "open $file");
31 dpavlin 20 while(<U>) {
32     chomp;
33 dpavlin 25 $line++;
34     next if (/^\s*$/);
35 dpavlin 20
36 dpavlin 25 $full_text = "$line: ";
37 dpavlin 23
38 dpavlin 25 my %usage;
39 dpavlin 20
40 dpavlin 25 my @words = split(/\s+/,lc($_));
41 dpavlin 23
42 dpavlin 25 foreach (@words) {
43     $usage{$_}++;
44     }
45 dpavlin 23
46 dpavlin 25 foreach my $word (@words) {
47 dpavlin 20
48 dpavlin 25 next if ($word eq '');
49 dpavlin 20
50 dpavlin 28 $words_usage{"$word $line"} = $usage{$word};
51 dpavlin 20
52 dpavlin 25 $res->{$word}->{$line} = $usage{$word};
53 dpavlin 20
54 dpavlin 25 $t->B_search(
55     Key => $word,
56     Data => { "$line" => {
57     t => "Odyssey line $line",
58     f => $usage{$word},
59 dpavlin 20 },
60 dpavlin 25 },
61     Insert => 1,
62     Append => 1,
63     );
64 dpavlin 23
65 dpavlin 25 $word_count++;
66 dpavlin 23
67 dpavlin 25 $full_text .= "$word ";
68 dpavlin 23
69 dpavlin 25 }
70 dpavlin 20
71 dpavlin 25 $full_text = "\n";
72 dpavlin 20
73 dpavlin 25 last if ($max_words && $word_count >= $max_words);
74 dpavlin 20 }
75    
76     my $test_data = Dumper($res);
77     $test_data =~ s/=>/:/gs;
78     $test_data =~ s/\$VAR1/var test_data/;
79 dpavlin 28 ok(open(JS, "> html/test_data.js"), "test_data.js");
80 dpavlin 20 print JS $test_data;
81     close(JS);
82    
83     ok($test_data, "test_data saved");
84    
85 dpavlin 28 my $sum = 0;
86     ok(open(TD, "> homer_freq.txt"), "homer_freq.txt");
87     foreach my $w (keys %words_usage) {
88     print TD "$w: $words_usage{$w}\n";
89     $sum += $words_usage{$w};
90     }
91     close(TD);
92     diag "homer_freq.txt created";
93 dpavlin 20
94 dpavlin 23 if (open(T,"> homer_text.txt")) {
95     print T $full_text;
96     close(T);
97     }
98     diag "homer_text.txt created";
99    
100     if (open(T,"> homer_words.txt")) {
101     print T $t->to_string;
102     close(T);
103     }
104     diag "homer_words.txt created";
105    
106 dpavlin 28 my $total_words = scalar keys %words_usage;
107 dpavlin 20
108 dpavlin 35 cmp_ok($t->to_jsfind(dir=>"./html/homer"), '==', $total_words, " jsfind index with $total_words words");
109 dpavlin 25
110 dpavlin 28 #print Dumper($t);

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26