40 |
my $out = ""; |
my $out = ""; |
41 |
while (my $fld = shift @_) { |
while (my $fld = shift @_) { |
42 |
if ($fld =~ s/\s*[,;\.!?'"<>\[\]]*\s+/ /g) { |
if ($fld =~ s/\s*[,;\.!?'"<>\[\]]*\s+/ /g) { |
43 |
|
$fld =~ s/></ /g; |
44 |
foreach my $w (split(/\s+/,$fld)) { |
foreach my $w (split(/\s+/,$fld)) { |
45 |
# FIX: this should be replaced by stemmer! |
# FIX: this should be replaced by stemmer! |
46 |
$out .= "W $w $nr\n"; |
# remove chars from beginning of word |
47 |
|
$w =~ s/^['"<(\[]//g; |
48 |
|
# remove chars from end of word |
49 |
|
$w =~ s/[,;\.!?'">)\]]$//g; |
50 |
|
# preglasi φΦδΔόά -> |
51 |
|
if ($w =~ m/[]/) { |
52 |
|
$out .= "W $w $nr\n"; |
53 |
|
$w =~ s/[]/oe/g; |
54 |
|
$w =~ s/[]/ae/g; |
55 |
|
$w =~ s/[]/ue/g; |
56 |
|
$out .= "W $w $nr\n"; |
57 |
|
} else { |
58 |
|
$out .= "W $w $nr\n"; |
59 |
|
} |
60 |
} |
} |
61 |
} else { |
} else { |
62 |
$out .= "W $fld $nr\n"; |
$out .= "W $fld $nr\n"; |