/[stem-hr]/stem.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /stem.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 12 - (hide annotations)
Sat Jul 9 10:15:09 2005 UTC (18 years, 9 months ago) by dpavlin
File MIME type: text/plain
File size: 942 byte(s)
moved stemmer into own pm package

1 dpavlin 12 #!/usr/bin/perl -w
2    
3     use lib '.';
4     require 'stem-hr.pm';
5    
6     my %rules;
7     my %stem_words;
8     my $words = 0;
9     my $stems = 0;
10    
11    
12     my $last_stem = '';
13     my $errors = 0;
14     sub check_stem {
15     my $s = shift || return;
16     if ($last_stem) {
17     if ($last_stem ne $s) {
18     print "ERROR==> ";
19     $errors++;
20     }
21     } else {
22     $last_stem = $s;
23     }
24     }
25    
26     while(<>) {
27     chomp;
28     next if (/^#/);
29     if (/^$/) {
30     print "\n";
31     $last_stem = '';
32     next;
33     }
34    
35     $words++;
36    
37     my $orig = $_;
38    
39     my $stem = StemHr::stem($_);
40    
41     if (s/^(.+)\s(\d+)$/$1\t$2/g) {
42     $rules{$2}++;
43     $stems++;
44     $stem_words{$1}++;
45     check_stem($1);
46     } else {
47     $last_stem = $_;
48     }
49    
50     printf("%-15s %s\n",$orig,$_);
51    
52     }
53     my $nr_stems = keys(%stem_words);
54     printf "\n# %d words, %d stems in %d ops, %.2f%% size [%d errors]\n",$words,$nr_stems,$stems,($nr_stems*100/$words),$errors;
55    
56     foreach my $s (keys %stem_words) {
57     print "#stem $stem_words{$s} $s\n";
58     }
59    
60     foreach my $r (sort keys %rules) {
61     print "#rule $rules{$r} $r\n";
62     }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26