/[wait]/trunk/lib/WAIT/Filter.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/lib/WAIT/Filter.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

branches/CPAN/lib/WAIT/Filter.pm revision 19 by ulpfr, Tue May 9 11:29:45 2000 UTC trunk/lib/WAIT/Filter.pm revision 109 by dpavlin, Tue Jul 13 17:50:27 2004 UTC
# Line 31  require Exporter; Line 31  require Exporter;
31                  isouc disouc                  isouc disouc
32                  isotr disotr                  isotr disotr
33                  stop grundform                  stop grundform
34                  utf8iso                  utf8iso
35                 );                 );
36  # (most implemented in WAIT.xs)  # (most implemented in WAIT.xs)
37    
# Line 78  sub AUTOLOAD { Line 78  sub AUTOLOAD {
78      *decode_entities = HTML::Entities->can('decode_entities');      *decode_entities = HTML::Entities->can('decode_entities');
79      goto &decode_entities;      goto &decode_entities;
80    } elsif ($func =~ /^d?utf8iso$/) {    } elsif ($func =~ /^d?utf8iso$/) {
     require WAIT::Filter::utf8iso;  
     croak "Your perl version must at least be 5.00556 to use '$func'"  
         if $] < 5.00556;  
81      no strict 'refs';      no strict 'refs';
82      *$func = \&{"WAIT::Filter::utf8iso::$func"};      *$func = sub {
83      goto &utf8iso;        # Courtesy JHI
84          my $s = shift;
85          $s =~ s{([\xC0-\xDF])([\x80-\xBF])}
86                 {chr(ord($1)<<6&0xC0|ord($2)&0x3F)}eg;
87          $s;
88        };
89        goto \&$func;
90    }    }
91    Carp::confess "Class WAIT::Filter::$func not found";    Carp::confess "Class WAIT::Filter::$func not found";
92  }  }
# Line 94  while (<DATA>) { Line 97  while (<DATA>) {
97    next if /^\s*#/; # there's a comment    next if /^\s*#/; # there's a comment
98    $STOP{$_}++;    $STOP{$_}++;
99  }  }
100    close DATA;
101    
102  sub stop {  sub stop {
103    if (exists $STOP{$_[0]}) {    if (exists $STOP{$_[0]}) {
# Line 111  sub gdate { Line 115  sub gdate {
115  }  }
116    
117  1;  1;
118    
119  __DATA__  __DATA__
120  a  a
121  about  about
# Line 570  will Line 575  will
575  with  with
576  you  you
577  __END__  __END__
 # Below is the stub of documentation for your module. You better edit it!  
578    
579  =head1 NAME  =head1 NAME
580    
# Line 579  WAIT::Filter - Perl extension providing Line 583  WAIT::Filter - Perl extension providing
583  =head1 SYNOPSIS  =head1 SYNOPSIS
584    
585    use WAIT::Filter qw(Stem Soundex Phonix isolc disolc isouc disouc    use WAIT::Filter qw(Stem Soundex Phonix isolc disolc isouc disouc
586                        isotr disotr stop grundform utf8iso);                        isotr disotr stop grundform);
587    
588    $stem   = Stem($word);    $stem   = Stem($word);
589    $scode  = Soundex($word);    $scode  = Soundex($word);
# Line 645  computes the 8 byte B<Phonix> code for I Line 649  computes the 8 byte B<Phonix> code for I
649    PY: 1990    PY: 1990
650    PM: OCT    PM: OCT
651    
652    =back
653    
654  =head1 ISO charcater case functions  =head1 ISO charcater case functions
655    
656  There are some additional function which transpose some/most ISOlatin1  There are some additional function which transpose some/most ISOlatin1
# Line 663  Here are the hardcoded characters which Line 669  Here are the hardcoded characters which
669    abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïñòóôõöøùúûüýß    abcdefghijklmnopqrstuvwxyzàáâãäåæçèéêëìíîïñòóôõöøùúûüýß
670    ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝß    ABCDEFGHIJKLMNOPQRSTUVWXYZÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÑÒÓÔÕÖØÙÚÛÜÝß
671    
672    =over 5
673    
674  =item C<$new = >B<isolc>C<($word)>  =item C<$new = >B<isolc>C<($word)>
675    
676  =item B<disolc>C<($word)>  =item B<disolc>C<($word)>
# Line 691  Calls Text::German::reduce Line 699  Calls Text::German::reduce
699    
700  =item C<$new = >B<utf8iso>C<($word)>  =item C<$new = >B<utf8iso>C<($word)>
701    
702  Convert UTF8 encoded strings to ISO-8859-1. WAIT currently is  Deprecated due to flux in perl versions between 5.005 and 5.8. The
703  internally based on the Latin1 character set, so if you process  function converts UTF8 encoded strings to ISO-8859-1. WAIT is
704    internally still based on the Latin1 character set, so if you process
705  anything in a different encoding, you should convert to Latin1 as the  anything in a different encoding, you should convert to Latin1 as the
706  first filter.  first filter or refrain from using the iso-latin-1 based filter
707    functions. It is recommended that you use your own converter based on
708    the perl version you're using.
709    
710  =item split, split2, split3, ...  =item split, split2, split3, ...
711    
# Line 703  words. Split acts just like the perl spl Line 714  words. Split acts just like the perl spl
714  words from the list that are shorter than 2 characters (bytes), split3  words from the list that are shorter than 2 characters (bytes), split3
715  eliminates those shorter than 3 characters (bytes) and so on.  eliminates those shorter than 3 characters (bytes) and so on.
716    
717    =back
718    
719  =head1 AUTHOR  =head1 AUTHOR
720    
721  Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>  Ulrich Pfeifer E<lt>F<pfeifer@ls6.informatik.uni-dortmund.de>E<gt>

Legend:
Removed from v.19  
changed lines
  Added in v.109

  ViewVC Help
Powered by ViewVC 1.1.26