/[Lingua-Spelling-Alternative]/Alternative.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /Alternative.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1.2 by dpavlin, Mon Feb 11 14:33:42 2002 UTC revision 1.4 by dpavlin, Tue Feb 12 12:41:31 2002 UTC
# Line 13  $VERSION = '0.01'; Line 13  $VERSION = '0.01';
13  #@EXPORT = qw();  #@EXPORT = qw();
14  @EXPORT_OK = qw(  @EXPORT_OK = qw(
15          &alternatives          &alternatives
         &load_affix  
16          );          );
17    
 #my @affix_regexp;  
 #my @affix_add;  
 #my @affix_sub;  
   
18  my $debug=0;  my $debug=0;
19    
20  # stub  #
21    # make new instance of language, get args
22    #
23  sub new {  sub new {
24          my $class = shift;          my $class = shift;
25          my $self = {};          my $self = {};
26          bless($self, $class);          bless($self, $class);
27          $self->{ARGS} = {@_};          $self->{ARGS} = {@_};
28          $debug = $self->{ARGS}->{DEBUG};          $debug = $self->{ARGS}->{DEBUG};
29            @{$self->{affix_regexp}} = ();
30            @{$self->{affix_add}} = ();
31            @{$self->{affix_sub}} = ();
32          $self ? return $self : return undef;          $self ? return $self : return undef;
33  }  }
34    
# Line 79  sub load_affix { Line 78  sub load_affix {
78                  sub nuke_s {                  sub nuke_s {
79                          my $tmp = $_[0];                          my $tmp = $_[0];
80                          return if (!$tmp);                          return if (!$tmp);
81                          $tmp=~s/^ *//g;                  #       $tmp=~s/^\s+//g;
82                          $tmp=~s/ *$//g;                  #       $tmp=~s/\s+$//g;
83                          $tmp=~s/ *//g;                          $tmp=~s/\s+//g;
84                          return $tmp;                          return $tmp;
85                  }                  }
86    
# Line 93  sub load_affix { Line 92  sub load_affix {
92  }  }
93    
94  #  #
95    # function for reading raw findaffix output
96    #
97    
98    sub load_findaffix {
99            my $self = shift;
100            my $filename = shift;
101    
102            print STDERR "reading findaffix output $filename\n" if ($debug);
103    
104            open (A,$filename) || die "Can't open findaffix output $filename: $!";
105            while(<A>) {
106                    chomp;
107                    my @line=split(m;/;,$_,4);
108                    if ($#line > 2) {
109                            push @{$self->{affix_regexp}},'.';
110                            push @{$self->{affix_sub}},$line[0];
111                            push @{$self->{affix_add}},$line[1];
112                    }
113            }
114            return 1;
115    }
116    
117    #
118  # function which returns original word and all alternatives  # function which returns original word and all alternatives
119  #  #
120    
# Line 106  sub alternatives { Line 128  sub alternatives {
128                          my $regexp = $self->{affix_regexp}[$i];                          my $regexp = $self->{affix_regexp}[$i];
129                          my $add = $self->{affix_add}[$i];                          my $add = $self->{affix_add}[$i];
130                          my $sub = $self->{affix_sub}[$i];                          my $sub = $self->{affix_sub}[$i];
131                            print STDERR "r:'$regexp'\t-'",$sub||'',"'\t+'",$add||'',"'\n" if ($debug);
132                          next if length($word) < length($sub);                          next if length($word) < length($sub);
133                          my $tmp_word = $word;                          my $tmp_word = $word;
134                          if ($sub) {                          if ($sub) {
# Line 118  sub alternatives { Line 141  sub alternatives {
141                          } else {                          } else {
142                                  $tmp_word = $word.$add;                                  $tmp_word = $word.$add;
143                          }                          }
144                            print STDERR "\t ?:$tmp_word\n" if ($debug);
145                          if ($tmp_word =~ m/$regexp/ix) {                          if ($tmp_word =~ m/$regexp/ix) {
146  #                               print "$word -> $tmp_word\t-$sub, +$add, regexp: $regexp\n";  #                               print "$word -> $tmp_word\t-$sub, +$add, regexp: $regexp\n";
147                                  push @out,lc($tmp_word);                                  push @out,lc($tmp_word);
# Line 127  sub alternatives { Line 151  sub alternatives {
151          return @out;          return @out;
152  }  }
153    
154    #
155    # function which return minimal word of all alternatives
156    #
157    
158    sub minimal {
159            my $self = shift;
160            my @out;
161            foreach my $word (@_) {
162                    my @alt = $self->alternatives($word);
163                    my $minimal = shift @alt;
164                    foreach (@alt) {
165                            $minimal=$_ if (length($_) < length($minimal));
166                    }
167                    push @out,$minimal;
168            }
169            return @out;
170    }
171    
172  ###############################################################################  ###############################################################################
173  1;  1;
174  __END__  __END__
175    
176  =head1 NAME  =head1 NAME
177    
178  Alternative.pm - see all alternatives of a given word in a given language  Alternative.pm - alternative spelling of a given word in a given language
179    
180  =head1 SYNOPSIS  =head1 SYNOPSIS
181    
# Line 155  entered in search engine) Line 197  entered in search engine)
197    
198  =item new  =item new
199    
200  The new() constructor (without parameters) create container for new  The new() constructor (without parameters) create container for new language.
201  language.  Only parametar it supports is DEBUG which turns on (some) debugging output.
 Only parametar it supports is DEBUG which turns on (some) debugging  
 output.  
202    
203  =item load_affix  =item load_affix
204    
205  Function load_affix loads ispell's affix file.  Function load_affix loads ispell's affix file for later usage.
206    
207    =item load_findaffix
208    
209    This function loads output of findaffix program from ispell package.
210    This is better idea (if you are creating affix file for particular language
211    yourself) because affix file from ispell (which is created from data returned
212    by findaffix) is limited to 26 entries (because each entry is denoted by
213    single character).
214    
215  =item alternatives  =item alternatives
216    
217  Function alternatives  Function alternatives return all alternative spellings of particular
218    word(s). It will also return spelling which are not correct if there is
219    rule like that in affix file.
220    
221    =item minimal
222    
223    This function returns minimal of all alternatives of a given word(s). It's
224    a poor man's version of normalize (because we don't know gramatic of
225    particular language, just some spelling rules).
226    
227  =head1 PRIVATE METHODS  =head1 PRIVATE METHODS
228    

Legend:
Removed from v.1.2  
changed lines
  Added in v.1.4

  ViewVC Help
Powered by ViewVC 1.1.26