/[Lingua-Spelling-Alternative]/Alternative.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /Alternative.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.6 - (hide annotations)
Wed Jul 3 08:06:06 2002 UTC (21 years, 9 months ago) by dpavlin
Branch: MAIN
Changes since 1.5: +1 -1 lines
added missing : in POD file (thanks to Mike Castle for reporting that)

1 dpavlin 1.1 # Documentation and Copyright exist after __END__
2    
3     package Lingua::Spelling::Alternative;
4     require 5.001;
5    
6     use strict;
7     use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
8    
9     use Exporter;
10     $VERSION = '0.01';
11     @ISA = ('Exporter');
12    
13     #@EXPORT = qw();
14     @EXPORT_OK = qw(
15     &alternatives
16     );
17    
18 dpavlin 1.2 my $debug=0;
19 dpavlin 1.1
20 dpavlin 1.3 #
21     # make new instance of language, get args
22     #
23 dpavlin 1.1 sub new {
24     my $class = shift;
25     my $self = {};
26     bless($self, $class);
27     $self->{ARGS} = {@_};
28 dpavlin 1.2 $debug = $self->{ARGS}->{DEBUG};
29 dpavlin 1.3 @{$self->{affix_regexp}} = ();
30     @{$self->{affix_add}} = ();
31     @{$self->{affix_sub}} = ();
32 dpavlin 1.1 $self ? return $self : return undef;
33     }
34    
35    
36     #
37     # load affix file in internal structures
38     #
39    
40     sub load_affix {
41     my $self = shift;
42     my $filename = shift;
43    
44     my $suffixes=0;
45    
46     my ($regexp,$add,$sub);
47    
48     print STDERR "reading affix file $filename\n" if ($debug);
49    
50     open (A,$filename) || die "Can't open affix file $filename: $!";
51     while(<A>) {
52     chomp;
53     next if (/^#|^[\s\t\n\r]*$/);
54    
55     if (/^suffixes/i) {
56     $suffixes++;
57     next;
58     }
59    
60     next if (! $suffixes);
61    
62     if (/^flag[\s\t]+\*{0,1}(.):/i) {
63     undef $regexp;
64     undef $add;
65     undef $sub;
66     next;
67     }
68    
69     if (/^[\s\t]*([^>#]+)>[\s\t]+-([^\,\s\t]+),([^\s\t]+)/) {
70     $regexp = $1;
71     $add = $2;
72     $sub = $3 if ($3 ne "-");
73     } elsif (/^[\s\t]*([^>#]+)>[\s\t]+([^\s\t\#]+)/) {
74     $regexp = $1;
75     $sub = $2;
76     }
77    
78     sub nuke_s {
79     my $tmp = $_[0];
80     return if (!$tmp);
81 dpavlin 1.3 # $tmp=~s/^\s+//g;
82     # $tmp=~s/\s+$//g;
83     $tmp=~s/\s+//g;
84 dpavlin 1.1 return $tmp;
85     }
86    
87     push @{$self->{affix_regexp}},nuke_s($regexp);
88     push @{$self->{affix_add}},nuke_s($add);
89     push @{$self->{affix_sub}},nuke_s($sub);
90     }
91     return 1;
92     }
93    
94     #
95 dpavlin 1.3 # function for reading raw findaffix output
96     #
97    
98     sub load_findaffix {
99     my $self = shift;
100     my $filename = shift;
101    
102     print STDERR "reading findaffix output $filename\n" if ($debug);
103    
104     open (A,$filename) || die "Can't open findaffix output $filename: $!";
105     while(<A>) {
106     chomp;
107     my @line=split(m;/;,$_,4);
108     if ($#line > 2) {
109     push @{$self->{affix_regexp}},'.';
110     push @{$self->{affix_sub}},$line[0];
111     push @{$self->{affix_add}},$line[1];
112     }
113     }
114     return 1;
115     }
116    
117     #
118 dpavlin 1.1 # function which returns original word and all alternatives
119     #
120    
121     sub alternatives {
122     my $self = shift;
123     my @out;
124     foreach my $word (@_) {
125     push @out,$word; # save original word
126     next if (length($word) < 3); # cludge: preskoci kratke
127     for(my $i=0; $i<=$#{$self->{affix_regexp}}; $i++) {
128     my $regexp = $self->{affix_regexp}[$i];
129     my $add = $self->{affix_add}[$i];
130     my $sub = $self->{affix_sub}[$i];
131 dpavlin 1.3 print STDERR "r:'$regexp'\t-'",$sub||'',"'\t+'",$add||'',"'\n" if ($debug);
132 dpavlin 1.1 next if length($word) < length($sub);
133     my $tmp_word = $word;
134     if ($sub) {
135     next if ($word !~ m/$sub$/i);
136     if ($add) {
137     $tmp_word =~ s/$sub$/$add/i;
138     } else {
139     $tmp_word =~ s/$sub$//i;
140     }
141     } else {
142     $tmp_word = $word.$add;
143     }
144 dpavlin 1.3 print STDERR "\t ?:$tmp_word\n" if ($debug);
145 dpavlin 1.1 if ($tmp_word =~ m/$regexp/ix) {
146     # print "$word -> $tmp_word\t-$sub, +$add, regexp: $regexp\n";
147     push @out,lc($tmp_word);
148     }
149     }
150     }
151     return @out;
152     }
153    
154 dpavlin 1.4 #
155     # function which return minimal word of all alternatives
156     #
157    
158     sub minimal {
159     my $self = shift;
160     my @out;
161     foreach my $word (@_) {
162     my @alt = $self->alternatives($word);
163     my $minimal = shift @alt;
164     foreach (@alt) {
165     $minimal=$_ if (length($_) < length($minimal));
166     }
167     push @out,$minimal;
168     }
169     return @out;
170     }
171    
172 dpavlin 1.1 ###############################################################################
173     1;
174     __END__
175    
176     =head1 NAME
177    
178 dpavlin 1.3 Alternative.pm - alternative spelling of a given word in a given language
179 dpavlin 1.1
180     =head1 SYNOPSIS
181    
182 dpavlin 1.6 use Lingua::Spelling::Alternative;
183 dpavlin 1.1
184     my $en = new Alternative;
185     $en->load_affix('/usr/lib/ispell/english.aff') or die $!;
186     print $en->alternatives("cars");
187    
188     =head1 DESCRIPTION
189    
190     This module is designed to return all valid forms of a given word
191     (for example when you want to see all possible forms of some word
192     entered in search engine)
193    
194     =head1 PUBLIC METHODS
195    
196     =over 4
197    
198     =item new
199    
200 dpavlin 1.4 The new() constructor (without parameters) create container for new language.
201     Only parametar it supports is DEBUG which turns on (some) debugging output.
202 dpavlin 1.1
203     =item load_affix
204    
205 dpavlin 1.5 Function load_affix() loads ispell's affix file for later usage.
206 dpavlin 1.4
207     =item load_findaffix
208    
209     This function loads output of findaffix program from ispell package.
210     This is better idea (if you are creating affix file for particular language
211 dpavlin 1.5 yourself or you can get your hands on one) because affix file from ispel
212     is limited to 26 entries (because each entry is denoted by single character).
213 dpavlin 1.1
214     =item alternatives
215    
216 dpavlin 1.4 Function alternatives return all alternative spellings of particular
217     word(s). It will also return spelling which are not correct if there is
218     rule like that in affix file.
219    
220     =item minimal
221    
222     This function returns minimal of all alternatives of a given word(s). It's
223     a poor man's version of normalize (because we don't know gramatic of
224     particular language, just some spelling rules).
225 dpavlin 1.1
226     =head1 PRIVATE METHODS
227    
228     Documented as being not documented.
229    
230     =head1 EXAMPLES
231    
232     Please see the test.pl program in distribution which exercises some
233     aspects of Alternative.pm.
234    
235     =head1 BUGS
236    
237     There are no known bugs.
238    
239     =head1 CONTACT AND COPYRIGHT
240    
241     Copyright 2002 Dobrica Pavlinusic (dpavlin@rot13.org). All
242     rights reserved. This program is free software; you can redistribute
243     it and/or modify it under the same terms as Perl itself.
244    
245     =cut

  ViewVC Help
Powered by ViewVC 1.1.26