/[Lingua-Spelling-Alternative]/Alternative.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /Alternative.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations)
Mon Feb 11 20:19:59 2002 UTC (22 years, 2 months ago) by dpavlin
Branch: MAIN
Changes since 1.2: +35 -10 lines
support for output from loadaffix

1 dpavlin 1.1 # Documentation and Copyright exist after __END__
2    
3     package Lingua::Spelling::Alternative;
4     require 5.001;
5    
6     use strict;
7     use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
8    
9     use Exporter;
10     $VERSION = '0.01';
11     @ISA = ('Exporter');
12    
13     #@EXPORT = qw();
14     @EXPORT_OK = qw(
15     &alternatives
16     &load_affix
17     );
18    
19 dpavlin 1.2 my $debug=0;
20 dpavlin 1.1
21 dpavlin 1.3 #
22     # make new instance of language, get args
23     #
24 dpavlin 1.1 sub new {
25     my $class = shift;
26     my $self = {};
27     bless($self, $class);
28     $self->{ARGS} = {@_};
29 dpavlin 1.2 $debug = $self->{ARGS}->{DEBUG};
30 dpavlin 1.3 @{$self->{affix_regexp}} = ();
31     @{$self->{affix_add}} = ();
32     @{$self->{affix_sub}} = ();
33 dpavlin 1.1 $self ? return $self : return undef;
34     }
35    
36    
37     #
38     # load affix file in internal structures
39     #
40    
41     sub load_affix {
42     my $self = shift;
43     my $filename = shift;
44    
45     my $suffixes=0;
46    
47     my ($regexp,$add,$sub);
48    
49     print STDERR "reading affix file $filename\n" if ($debug);
50    
51     open (A,$filename) || die "Can't open affix file $filename: $!";
52     while(<A>) {
53     chomp;
54     next if (/^#|^[\s\t\n\r]*$/);
55    
56     if (/^suffixes/i) {
57     $suffixes++;
58     next;
59     }
60    
61     next if (! $suffixes);
62    
63     if (/^flag[\s\t]+\*{0,1}(.):/i) {
64     undef $regexp;
65     undef $add;
66     undef $sub;
67     next;
68     }
69    
70     if (/^[\s\t]*([^>#]+)>[\s\t]+-([^\,\s\t]+),([^\s\t]+)/) {
71     $regexp = $1;
72     $add = $2;
73     $sub = $3 if ($3 ne "-");
74     } elsif (/^[\s\t]*([^>#]+)>[\s\t]+([^\s\t\#]+)/) {
75     $regexp = $1;
76     $sub = $2;
77     }
78    
79     sub nuke_s {
80     my $tmp = $_[0];
81     return if (!$tmp);
82 dpavlin 1.3 # $tmp=~s/^\s+//g;
83     # $tmp=~s/\s+$//g;
84     $tmp=~s/\s+//g;
85 dpavlin 1.1 return $tmp;
86     }
87    
88     push @{$self->{affix_regexp}},nuke_s($regexp);
89     push @{$self->{affix_add}},nuke_s($add);
90     push @{$self->{affix_sub}},nuke_s($sub);
91     }
92     return 1;
93     }
94    
95     #
96 dpavlin 1.3 # function for reading raw findaffix output
97     #
98    
99     sub load_findaffix {
100     my $self = shift;
101     my $filename = shift;
102    
103     print STDERR "reading findaffix output $filename\n" if ($debug);
104    
105     open (A,$filename) || die "Can't open findaffix output $filename: $!";
106     while(<A>) {
107     chomp;
108     my @line=split(m;/;,$_,4);
109     if ($#line > 2) {
110     push @{$self->{affix_regexp}},'.';
111     push @{$self->{affix_sub}},$line[0];
112     push @{$self->{affix_add}},$line[1];
113     }
114     }
115     return 1;
116     }
117    
118     #
119 dpavlin 1.1 # function which returns original word and all alternatives
120     #
121    
122     sub alternatives {
123     my $self = shift;
124     my @out;
125     foreach my $word (@_) {
126     push @out,$word; # save original word
127     next if (length($word) < 3); # cludge: preskoci kratke
128     for(my $i=0; $i<=$#{$self->{affix_regexp}}; $i++) {
129     my $regexp = $self->{affix_regexp}[$i];
130     my $add = $self->{affix_add}[$i];
131     my $sub = $self->{affix_sub}[$i];
132 dpavlin 1.3 print STDERR "r:'$regexp'\t-'",$sub||'',"'\t+'",$add||'',"'\n" if ($debug);
133 dpavlin 1.1 next if length($word) < length($sub);
134     my $tmp_word = $word;
135     if ($sub) {
136     next if ($word !~ m/$sub$/i);
137     if ($add) {
138     $tmp_word =~ s/$sub$/$add/i;
139     } else {
140     $tmp_word =~ s/$sub$//i;
141     }
142     } else {
143     $tmp_word = $word.$add;
144     }
145 dpavlin 1.3 print STDERR "\t ?:$tmp_word\n" if ($debug);
146 dpavlin 1.1 if ($tmp_word =~ m/$regexp/ix) {
147     # print "$word -> $tmp_word\t-$sub, +$add, regexp: $regexp\n";
148     push @out,lc($tmp_word);
149     }
150     }
151     }
152     return @out;
153     }
154    
155     ###############################################################################
156     1;
157     __END__
158    
159     =head1 NAME
160    
161 dpavlin 1.3 Alternative.pm - alternative spelling of a given word in a given language
162 dpavlin 1.1
163     =head1 SYNOPSIS
164    
165     use Lingua::Spelling:Alternative;
166    
167     my $en = new Alternative;
168     $en->load_affix('/usr/lib/ispell/english.aff') or die $!;
169     print $en->alternatives("cars");
170    
171     =head1 DESCRIPTION
172    
173     This module is designed to return all valid forms of a given word
174     (for example when you want to see all possible forms of some word
175     entered in search engine)
176    
177     =head1 PUBLIC METHODS
178    
179     =over 4
180    
181     =item new
182    
183     The new() constructor (without parameters) create container for new
184     language.
185 dpavlin 1.2 Only parametar it supports is DEBUG which turns on (some) debugging
186     output.
187 dpavlin 1.1
188     =item load_affix
189    
190     Function load_affix loads ispell's affix file.
191    
192     =item alternatives
193    
194     Function alternatives
195    
196     =head1 PRIVATE METHODS
197    
198     Documented as being not documented.
199    
200     =head1 EXAMPLES
201    
202     Please see the test.pl program in distribution which exercises some
203     aspects of Alternative.pm.
204    
205     =head1 BUGS
206    
207     There are no known bugs.
208    
209     =head1 CONTACT AND COPYRIGHT
210    
211     Copyright 2002 Dobrica Pavlinusic (dpavlin@rot13.org). All
212     rights reserved. This program is free software; you can redistribute
213     it and/or modify it under the same terms as Perl itself.
214    
215     =cut

  ViewVC Help
Powered by ViewVC 1.1.26