--- stem-hr.pl 2005/02/25 23:40:18 2 +++ stem-hr.pl 2005/02/26 00:01:42 3 @@ -18,10 +18,6 @@ # - uzvici # -sub rm_a { - s/a//g; -} - sub kgh { my ($pre,$replace,$post) = @_; $replace =~ s/[cè]/k/g; @@ -35,8 +31,19 @@ # suglasnici my $su = '[^aeiou]'; +my %rules; +my %stem_words; +my $words = 0; +my $stems = 0; + while(<>) { chomp; + if (/^$/) { + print "\n"; + next; + } + + $words++; my $orig = $_; @@ -59,7 +66,15 @@ s/(o|e|a|u|om|em)$/ 7/g; } - s/\s(\d+)$/\t$1/g; + if (s/^(.+)\s(\d+)$/$1\t$2/g) { + $rules{$2}++; + $stems++; + $stem_words{$1}++; + } printf("%-15s %s\n",$orig,$_); + } +my $nr_stems = keys(%stem_words); +printf "\n# %d words, %d stems in %d ops, %.2f%% size\n",$words,$nr_stems,$stems,($nr_stems*100/$words); +