/[Lingua-Spelling-Alternative]/Alternative.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /Alternative.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (show annotations)
Mon Feb 11 20:19:59 2002 UTC (22 years, 1 month ago) by dpavlin
Branch: MAIN
Changes since 1.2: +35 -10 lines
support for output from loadaffix

1 # Documentation and Copyright exist after __END__
2
3 package Lingua::Spelling::Alternative;
4 require 5.001;
5
6 use strict;
7 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
8
9 use Exporter;
10 $VERSION = '0.01';
11 @ISA = ('Exporter');
12
13 #@EXPORT = qw();
14 @EXPORT_OK = qw(
15 &alternatives
16 &load_affix
17 );
18
19 my $debug=0;
20
21 #
22 # make new instance of language, get args
23 #
24 sub new {
25 my $class = shift;
26 my $self = {};
27 bless($self, $class);
28 $self->{ARGS} = {@_};
29 $debug = $self->{ARGS}->{DEBUG};
30 @{$self->{affix_regexp}} = ();
31 @{$self->{affix_add}} = ();
32 @{$self->{affix_sub}} = ();
33 $self ? return $self : return undef;
34 }
35
36
37 #
38 # load affix file in internal structures
39 #
40
41 sub load_affix {
42 my $self = shift;
43 my $filename = shift;
44
45 my $suffixes=0;
46
47 my ($regexp,$add,$sub);
48
49 print STDERR "reading affix file $filename\n" if ($debug);
50
51 open (A,$filename) || die "Can't open affix file $filename: $!";
52 while(<A>) {
53 chomp;
54 next if (/^#|^[\s\t\n\r]*$/);
55
56 if (/^suffixes/i) {
57 $suffixes++;
58 next;
59 }
60
61 next if (! $suffixes);
62
63 if (/^flag[\s\t]+\*{0,1}(.):/i) {
64 undef $regexp;
65 undef $add;
66 undef $sub;
67 next;
68 }
69
70 if (/^[\s\t]*([^>#]+)>[\s\t]+-([^\,\s\t]+),([^\s\t]+)/) {
71 $regexp = $1;
72 $add = $2;
73 $sub = $3 if ($3 ne "-");
74 } elsif (/^[\s\t]*([^>#]+)>[\s\t]+([^\s\t\#]+)/) {
75 $regexp = $1;
76 $sub = $2;
77 }
78
79 sub nuke_s {
80 my $tmp = $_[0];
81 return if (!$tmp);
82 # $tmp=~s/^\s+//g;
83 # $tmp=~s/\s+$//g;
84 $tmp=~s/\s+//g;
85 return $tmp;
86 }
87
88 push @{$self->{affix_regexp}},nuke_s($regexp);
89 push @{$self->{affix_add}},nuke_s($add);
90 push @{$self->{affix_sub}},nuke_s($sub);
91 }
92 return 1;
93 }
94
95 #
96 # function for reading raw findaffix output
97 #
98
99 sub load_findaffix {
100 my $self = shift;
101 my $filename = shift;
102
103 print STDERR "reading findaffix output $filename\n" if ($debug);
104
105 open (A,$filename) || die "Can't open findaffix output $filename: $!";
106 while(<A>) {
107 chomp;
108 my @line=split(m;/;,$_,4);
109 if ($#line > 2) {
110 push @{$self->{affix_regexp}},'.';
111 push @{$self->{affix_sub}},$line[0];
112 push @{$self->{affix_add}},$line[1];
113 }
114 }
115 return 1;
116 }
117
118 #
119 # function which returns original word and all alternatives
120 #
121
122 sub alternatives {
123 my $self = shift;
124 my @out;
125 foreach my $word (@_) {
126 push @out,$word; # save original word
127 next if (length($word) < 3); # cludge: preskoci kratke
128 for(my $i=0; $i<=$#{$self->{affix_regexp}}; $i++) {
129 my $regexp = $self->{affix_regexp}[$i];
130 my $add = $self->{affix_add}[$i];
131 my $sub = $self->{affix_sub}[$i];
132 print STDERR "r:'$regexp'\t-'",$sub||'',"'\t+'",$add||'',"'\n" if ($debug);
133 next if length($word) < length($sub);
134 my $tmp_word = $word;
135 if ($sub) {
136 next if ($word !~ m/$sub$/i);
137 if ($add) {
138 $tmp_word =~ s/$sub$/$add/i;
139 } else {
140 $tmp_word =~ s/$sub$//i;
141 }
142 } else {
143 $tmp_word = $word.$add;
144 }
145 print STDERR "\t ?:$tmp_word\n" if ($debug);
146 if ($tmp_word =~ m/$regexp/ix) {
147 # print "$word -> $tmp_word\t-$sub, +$add, regexp: $regexp\n";
148 push @out,lc($tmp_word);
149 }
150 }
151 }
152 return @out;
153 }
154
155 ###############################################################################
156 1;
157 __END__
158
159 =head1 NAME
160
161 Alternative.pm - alternative spelling of a given word in a given language
162
163 =head1 SYNOPSIS
164
165 use Lingua::Spelling:Alternative;
166
167 my $en = new Alternative;
168 $en->load_affix('/usr/lib/ispell/english.aff') or die $!;
169 print $en->alternatives("cars");
170
171 =head1 DESCRIPTION
172
173 This module is designed to return all valid forms of a given word
174 (for example when you want to see all possible forms of some word
175 entered in search engine)
176
177 =head1 PUBLIC METHODS
178
179 =over 4
180
181 =item new
182
183 The new() constructor (without parameters) create container for new
184 language.
185 Only parametar it supports is DEBUG which turns on (some) debugging
186 output.
187
188 =item load_affix
189
190 Function load_affix loads ispell's affix file.
191
192 =item alternatives
193
194 Function alternatives
195
196 =head1 PRIVATE METHODS
197
198 Documented as being not documented.
199
200 =head1 EXAMPLES
201
202 Please see the test.pl program in distribution which exercises some
203 aspects of Alternative.pm.
204
205 =head1 BUGS
206
207 There are no known bugs.
208
209 =head1 CONTACT AND COPYRIGHT
210
211 Copyright 2002 Dobrica Pavlinusic (dpavlin@rot13.org). All
212 rights reserved. This program is free software; you can redistribute
213 it and/or modify it under the same terms as Perl itself.
214
215 =cut

  ViewVC Help
Powered by ViewVC 1.1.26