/[Lingua-Spelling-Alternative]/Alternative.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /Alternative.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.8 - (hide annotations)
Tue Nov 18 10:30:10 2003 UTC (20 years, 4 months ago) by dpavlin
Branch: MAIN
Changes since 1.7: +7 -6 lines
fixed example, better documentation

1 dpavlin 1.1 # Documentation and Copyright exist after __END__
2    
3     package Lingua::Spelling::Alternative;
4     require 5.001;
5    
6     use strict;
7     use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
8    
9     use Exporter;
10     $VERSION = '0.01';
11     @ISA = ('Exporter');
12    
13     #@EXPORT = qw();
14     @EXPORT_OK = qw(
15     &alternatives
16     );
17    
18 dpavlin 1.2 my $debug=0;
19 dpavlin 1.1
20 dpavlin 1.3 #
21     # make new instance of language, get args
22     #
23 dpavlin 1.1 sub new {
24     my $class = shift;
25     my $self = {};
26     bless($self, $class);
27     $self->{ARGS} = {@_};
28 dpavlin 1.2 $debug = $self->{ARGS}->{DEBUG};
29 dpavlin 1.3 @{$self->{affix_regexp}} = ();
30     @{$self->{affix_add}} = ();
31     @{$self->{affix_sub}} = ();
32 dpavlin 1.1 $self ? return $self : return undef;
33     }
34    
35    
36     #
37     # load affix file in internal structures
38     #
39    
40     sub load_affix {
41     my $self = shift;
42     my $filename = shift;
43    
44     my $suffixes=0;
45    
46     my ($regexp,$add,$sub);
47    
48     print STDERR "reading affix file $filename\n" if ($debug);
49    
50     open (A,$filename) || die "Can't open affix file $filename: $!";
51     while(<A>) {
52     chomp;
53     next if (/^#|^[\s\t\n\r]*$/);
54    
55     if (/^suffixes/i) {
56     $suffixes++;
57     next;
58     }
59    
60     next if (! $suffixes);
61    
62     if (/^flag[\s\t]+\*{0,1}(.):/i) {
63     undef $regexp;
64     undef $add;
65     undef $sub;
66     next;
67     }
68    
69     if (/^[\s\t]*([^>#]+)>[\s\t]+-([^\,\s\t]+),([^\s\t]+)/) {
70     $regexp = $1;
71     $add = $2;
72     $sub = $3 if ($3 ne "-");
73     } elsif (/^[\s\t]*([^>#]+)>[\s\t]+([^\s\t\#]+)/) {
74     $regexp = $1;
75     $sub = $2;
76     }
77    
78     sub nuke_s {
79     my $tmp = $_[0];
80     return if (!$tmp);
81 dpavlin 1.3 # $tmp=~s/^\s+//g;
82     # $tmp=~s/\s+$//g;
83     $tmp=~s/\s+//g;
84 dpavlin 1.1 return $tmp;
85     }
86    
87     push @{$self->{affix_regexp}},nuke_s($regexp);
88     push @{$self->{affix_add}},nuke_s($add);
89     push @{$self->{affix_sub}},nuke_s($sub);
90     }
91     return 1;
92     }
93    
94     #
95 dpavlin 1.3 # function for reading raw findaffix output
96     #
97    
98     sub load_findaffix {
99     my $self = shift;
100     my $filename = shift;
101    
102     print STDERR "reading findaffix output $filename\n" if ($debug);
103    
104     open (A,$filename) || die "Can't open findaffix output $filename: $!";
105     while(<A>) {
106     chomp;
107     my @line=split(m;/;,$_,4);
108     if ($#line > 2) {
109     push @{$self->{affix_regexp}},'.';
110     push @{$self->{affix_sub}},$line[0];
111     push @{$self->{affix_add}},$line[1];
112     }
113     }
114     return 1;
115     }
116    
117     #
118 dpavlin 1.1 # function which returns original word and all alternatives
119     #
120    
121     sub alternatives {
122     my $self = shift;
123     my @out;
124     foreach my $word (@_) {
125     push @out,$word; # save original word
126     next if (length($word) < 3); # cludge: preskoci kratke
127     for(my $i=0; $i<=$#{$self->{affix_regexp}}; $i++) {
128     my $regexp = $self->{affix_regexp}[$i];
129     my $add = $self->{affix_add}[$i];
130     my $sub = $self->{affix_sub}[$i];
131 dpavlin 1.3 print STDERR "r:'$regexp'\t-'",$sub||'',"'\t+'",$add||'',"'\n" if ($debug);
132 dpavlin 1.1 next if length($word) < length($sub);
133     my $tmp_word = $word;
134     if ($sub) {
135     next if ($word !~ m/$sub$/i);
136     if ($add) {
137     $tmp_word =~ s/$sub$/$add/i;
138     } else {
139     $tmp_word =~ s/$sub$//i;
140     }
141     } else {
142     $tmp_word = $word.$add;
143     }
144 dpavlin 1.3 print STDERR "\t ?:$tmp_word\n" if ($debug);
145 dpavlin 1.1 if ($tmp_word =~ m/$regexp/ix) {
146     # print "$word -> $tmp_word\t-$sub, +$add, regexp: $regexp\n";
147     push @out,lc($tmp_word);
148     }
149     }
150     }
151     return @out;
152     }
153    
154 dpavlin 1.4 #
155     # function which return minimal word of all alternatives
156     #
157    
158     sub minimal {
159     my $self = shift;
160     my @out;
161     foreach my $word (@_) {
162     my @alt = $self->alternatives($word);
163     my $minimal = shift @alt;
164     foreach (@alt) {
165     $minimal=$_ if (length($_) < length($minimal));
166     }
167     push @out,$minimal;
168     }
169     return @out;
170     }
171    
172 dpavlin 1.1 ###############################################################################
173     1;
174     __END__
175    
176     =head1 NAME
177    
178 dpavlin 1.3 Alternative.pm - alternative spelling of a given word in a given language
179 dpavlin 1.1
180     =head1 SYNOPSIS
181    
182 dpavlin 1.6 use Lingua::Spelling::Alternative;
183 dpavlin 1.1
184 dpavlin 1.8 my $en = new Lingua::Spelling::Alternative;
185     $en->load_affix('/usr/lib/ispell/default.aff') or die $!;
186     print join(" ",$en->alternatives("cars")),"\n";
187 dpavlin 1.1
188     =head1 DESCRIPTION
189    
190     This module is designed to return all valid forms of a given word
191     (for example when you want to see all possible forms of some word
192     entered in search engine)
193    
194     =head1 PUBLIC METHODS
195    
196     =over 4
197    
198     =item new
199    
200 dpavlin 1.4 The new() constructor (without parameters) create container for new language.
201     Only parametar it supports is DEBUG which turns on (some) debugging output.
202 dpavlin 1.1
203     =item load_affix
204    
205 dpavlin 1.5 Function load_affix() loads ispell's affix file for later usage.
206 dpavlin 1.4
207     =item load_findaffix
208    
209     This function loads output of findaffix program from ispell package.
210     This is better idea (if you are creating affix file for particular language
211 dpavlin 1.5 yourself or you can get your hands on one) because affix file from ispel
212     is limited to 26 entries (because each entry is denoted by single character).
213 dpavlin 1.1
214     =item alternatives
215    
216 dpavlin 1.8 Function alternatives return array of all alternative spellings of particular
217 dpavlin 1.4 word(s). It will also return spelling which are not correct if there is
218     rule like that in affix file.
219    
220     =item minimal
221    
222     This function returns minimal of all alternatives of a given word(s). It's
223     a poor man's version of normalize (because we don't know gramatic of
224     particular language, just some spelling rules).
225 dpavlin 1.1
226 dpavlin 1.7 =back
227    
228 dpavlin 1.1 =head1 PRIVATE METHODS
229    
230     Documented as being not documented.
231    
232     =head1 EXAMPLES
233    
234     Please see the test.pl program in distribution which exercises some
235     aspects of Alternative.pm.
236    
237     =head1 BUGS
238    
239 dpavlin 1.8 There are no known bugs. If you find any, please report it in CPAN's
240     request tracker at: http://rt.cpan.org/
241 dpavlin 1.1
242     =head1 CONTACT AND COPYRIGHT
243    
244 dpavlin 1.8 Copyright 2002-2003 Dobrica Pavlinusic (dpavlin@rot13.org). All
245 dpavlin 1.1 rights reserved. This program is free software; you can redistribute
246     it and/or modify it under the same terms as Perl itself.
247    
248     =cut

  ViewVC Help
Powered by ViewVC 1.1.26