/[jsFind]/trunk/t/10homer.t
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/t/10homer.t

Parent Directory Parent Directory | Revision Log Revision Log


Revision 37 - (show annotations)
Sat Oct 30 21:48:31 2004 UTC (19 years, 6 months ago) by dpavlin
File MIME type: application/x-troff
File size: 1820 byte(s)
homer test can now index any text file supplied as first (and only) argument
in command line. This is useful to generate test data from other sources.
I don't recommend input files which are not plain 7-bit ASCII, because
generated JavaScript array might have wrong encoding for 8-bit characters.
This is serious problem. However, since JavaScript comparison and sort order
are locale dependent, it's much easier to use something like Text::Unaccent
on input data than to fix sort/comparison order (which could also be done,
see my js_locale project)

1 #!/usr/bin/perl
2
3 use strict;
4 use warnings;
5
6 use Test::More tests => 7;
7 use blib;
8 use jsFind;
9 use Data::Dumper;
10
11 BEGIN { use_ok('jsFind'); }
12
13 my $t = new jsFind B => 200;
14
15 my $file = shift @ARGV || 't/homer.txt';
16
17 ok(-e $file, "reading input file $file");
18
19 my $line = 0;
20 my $text = '';
21 my %words_usage;
22 my $word_count = 0;
23 my $max_words;
24 #$max_words = 100;
25
26 my $res;
27
28 my $full_text;
29
30 ok(open(U, $file), "open $file");
31 while(<U>) {
32 chomp;
33 $line++;
34 next if (/^\s*$/);
35
36 $full_text = "$line: ";
37
38 my %usage;
39
40 my @words = split(/\s+/,lc($_));
41
42 foreach (@words) {
43 $usage{$_}++;
44 }
45
46 foreach my $word (@words) {
47
48 next if ($word eq '');
49
50 $words_usage{"$word $line"} = $usage{$word};
51
52 $res->{$word}->{$line} = $usage{$word};
53
54 $t->B_search(
55 Key => $word,
56 Data => { "$line" => {
57 t => "Odyssey line $line",
58 f => $usage{$word},
59 },
60 },
61 Insert => 1,
62 Append => 1,
63 );
64
65 $word_count++;
66
67 $full_text .= "$word ";
68
69 }
70
71 $full_text = "\n";
72
73 last if ($max_words && $word_count >= $max_words);
74 }
75
76 my $test_data = Dumper($res);
77 $test_data =~ s/=>/:/gs;
78 $test_data =~ s/\$VAR1/var test_data/;
79 ok(open(JS, "> html/test_data.js"), "test_data.js");
80 print JS $test_data;
81 close(JS);
82
83 ok($test_data, "test_data saved");
84
85 my $sum = 0;
86 ok(open(TD, "> homer_freq.txt"), "homer_freq.txt");
87 foreach my $w (keys %words_usage) {
88 print TD "$w: $words_usage{$w}\n";
89 $sum += $words_usage{$w};
90 }
91 close(TD);
92 diag "homer_freq.txt created";
93
94 if (open(T,"> homer_text.txt")) {
95 print T $full_text;
96 close(T);
97 }
98 diag "homer_text.txt created";
99
100 if (open(T,"> homer_words.txt")) {
101 print T $t->to_string;
102 close(T);
103 }
104 diag "homer_words.txt created";
105
106 my $total_words = scalar keys %words_usage;
107
108 cmp_ok($t->to_jsfind(dir=>"./html/homer"), '==', $total_words, " jsfind index with $total_words words");
109
110 #print Dumper($t);

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26