--- Alternative.pm 2002/02/11 14:33:42 1.2 +++ Alternative.pm 2002/02/11 20:19:59 1.3 @@ -16,20 +16,20 @@ &load_affix ); -#my @affix_regexp; -#my @affix_add; -#my @affix_sub; - my $debug=0; -# stub - +# +# make new instance of language, get args +# sub new { my $class = shift; my $self = {}; bless($self, $class); $self->{ARGS} = {@_}; $debug = $self->{ARGS}->{DEBUG}; + @{$self->{affix_regexp}} = (); + @{$self->{affix_add}} = (); + @{$self->{affix_sub}} = (); $self ? return $self : return undef; } @@ -79,9 +79,9 @@ sub nuke_s { my $tmp = $_[0]; return if (!$tmp); - $tmp=~s/^ *//g; - $tmp=~s/ *$//g; - $tmp=~s/ *//g; + # $tmp=~s/^\s+//g; + # $tmp=~s/\s+$//g; + $tmp=~s/\s+//g; return $tmp; } @@ -93,6 +93,29 @@ } # +# function for reading raw findaffix output +# + +sub load_findaffix { + my $self = shift; + my $filename = shift; + + print STDERR "reading findaffix output $filename\n" if ($debug); + + open (A,$filename) || die "Can't open findaffix output $filename: $!"; + while() { + chomp; + my @line=split(m;/;,$_,4); + if ($#line > 2) { + push @{$self->{affix_regexp}},'.'; + push @{$self->{affix_sub}},$line[0]; + push @{$self->{affix_add}},$line[1]; + } + } + return 1; +} + +# # function which returns original word and all alternatives # @@ -106,6 +129,7 @@ my $regexp = $self->{affix_regexp}[$i]; my $add = $self->{affix_add}[$i]; my $sub = $self->{affix_sub}[$i]; + print STDERR "r:'$regexp'\t-'",$sub||'',"'\t+'",$add||'',"'\n" if ($debug); next if length($word) < length($sub); my $tmp_word = $word; if ($sub) { @@ -118,6 +142,7 @@ } else { $tmp_word = $word.$add; } + print STDERR "\t ?:$tmp_word\n" if ($debug); if ($tmp_word =~ m/$regexp/ix) { # print "$word -> $tmp_word\t-$sub, +$add, regexp: $regexp\n"; push @out,lc($tmp_word); @@ -133,7 +158,7 @@ =head1 NAME -Alternative.pm - see all alternatives of a given word in a given language +Alternative.pm - alternative spelling of a given word in a given language =head1 SYNOPSIS