/[wait]/trunk/lib/WAIT/Filter.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WAIT/Filter.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

cvs-head/lib/WAIT/Filter.pm revision 41 by laperla, Mon Nov 13 20:25:49 2000 UTC trunk/lib/WAIT/Filter.pm revision 118 by dpavlin, Fri Jul 15 18:59:10 2005 UTC
# Line 16  package WAIT::Filter; Line 16  package WAIT::Filter;
16  require WAIT;  require WAIT;
17  use strict;  use strict;
18  use Carp;  use Carp;
19  use vars qw($VERSION @ISA @EXPORT_OK %STOP $SPLIT $AUTOLOAD);  use vars qw($VERSION @ISA @EXPORT_OK %STOP $SPLIT $UNAC $ICONV $AUTOLOAD);
20  use subs qw(grundform);  use subs qw(grundform);
21    
22    use Text::Unaccent;
23    use Text::Iconv;
24    
25  require Exporter;  require Exporter;
26    
27  @ISA = qw(Exporter);  @ISA = qw(Exporter);
# Line 47  $SPLIT = q[ Line 50  $SPLIT = q[
50                           }                           }
51            ];            ];
52    
53    $UNAC = q[
54            sub unac_CHARSET {
55                    map split(' ',unac_string('CHARSET', $_) || $_), @_;
56            }
57    ];
58    
59    my $iconv;
60    
61    $ICONV = q[
62            sub iconv_CHARSETfrom_CHARSETto {
63                    my $ic = $iconv->{'CHARSETfrom_CHARSETto'});
64                    $ic ||= $iconv->{'CHARSETfrom_CHARSETto'} = Text::Iconv->new('CHARSETfrom','CHARSETto');
65                    map split(' ',$ic->convert($_) || $_), @_;
66            }
67    ];
68                    
69    
70  sub AUTOLOAD {  sub AUTOLOAD {
71    my $func = $AUTOLOAD; $func =~ s/.*:://;    my $func = $AUTOLOAD; $func =~ s/.*:://;
72    
# Line 73  sub AUTOLOAD { Line 93  sub AUTOLOAD {
93      goto \&date;      goto \&date;
94    } elsif ($func eq 'decode_entities') {    } elsif ($func eq 'decode_entities') {
95      eval {require HTML::Entities;};      eval {require HTML::Entities;};
96      croak "You must have HTML::Entities to use 'date'"      croak "You must have HTML::Entities to use 'decode_entities'"
97        if $@ ne '';        if $@ ne '';
98      *decode_entities = HTML::Entities->can('decode_entities');      *decode_entities = HTML::Entities->can('decode_entities');
99      goto &decode_entities;      goto &decode_entities;
# Line 87  sub AUTOLOAD { Line 107  sub AUTOLOAD {
107        $s;        $s;
108      };      };
109      goto \&$func;      goto \&$func;
110      } elsif ($func =~ /unac_(.+)/) {
111        my $charset = $1;
112        my $unac = $UNAC;
113        $unac =~ s/CHARSET/$charset/g;
114    print "### $unac ###\n";
115        eval $unac;
116        if ($@ eq '') {
117         goto &$func;
118        }
119      } elsif ($func =~ /iconv_([^_]+)_([^_]+)/) {
120        my ($cf,$ct) = ($1,$2);
121        my $iconv = $ICONV;
122    print "### $cf -> $ct\n";
123        $iconv =~ s/CHARSETfrom/$cf/gs;
124        $iconv =~ s/CHARSETto/$ct/gs;
125    print "### $iconv ###\n";
126        eval $iconv;
127        if ($@ eq '') {
128         goto &$func;
129        }
130    }    }
131    Carp::confess "Class WAIT::Filter::$func not found";    Carp::confess "Class WAIT::Filter::$func not found";
132  }  }
# Line 97  while (<DATA>) { Line 137  while (<DATA>) {
137    next if /^\s*#/; # there's a comment    next if /^\s*#/; # there's a comment
138    $STOP{$_}++;    $STOP{$_}++;
139  }  }
140    close DATA;
141    
142  sub stop {  sub stop {
143    if (exists $STOP{$_[0]}) {    if (exists $STOP{$_[0]}) {
# Line 114  sub gdate { Line 155  sub gdate {
155  }  }
156    
157  1;  1;
158    
159  __DATA__  __DATA__
160  a  a
161  about  about
# Line 573  will Line 615  will
615  with  with
616  you  you
617  __END__  __END__
 # Below is the stub of documentation for your module. You better edit it!  
618    
619  =head1 NAME  =head1 NAME
620    
# Line 648  computes the 8 byte B<Phonix> code for I Line 689  computes the 8 byte B<Phonix> code for I
689    PY: 1990    PY: 1990
690    PM: OCT    PM: OCT
691    
692    =back
693    
694  =head1 ISO charcater case functions  =head1 ISO charcater case functions
695    
696  There are some additional function which transpose some/most ISOlatin1  There are some additional function which transpose some/most ISOlatin1
# Line 666  Here are the hardcoded characters which Line 709  Here are the hardcoded characters which
709    abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïñòóôõöøùúûüýß    abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïñòóôõöøùúûüýß
710    ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝß    ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝß
711    
712    =over 5
713    
714  =item C<$new = >B<isolc>C<($word)>  =item C<$new = >B<isolc>C<($word)>
715    
716  =item B<disolc>C<($word)>  =item B<disolc>C<($word)>
# Line 709  words. Split acts just like the perl spl Line 754  words. Split acts just like the perl spl
754  words from the list that are shorter than 2 characters (bytes), split3  words from the list that are shorter than 2 characters (bytes), split3
755  eliminates those shorter than 3 characters (bytes) and so on.  eliminates those shorter than 3 characters (bytes) and so on.
756    
757    =back
758    
759  =head1 AUTHOR  =head1 AUTHOR
760    
761  Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>  Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>

Legend:
Removed from v.41  
changed lines
  Added in v.118

  ViewVC Help
Powered by ViewVC 1.1.26