1 |
#!/usr/bin/perl -w |
#!/usr/bin/perl -w |
2 |
|
|
3 |
# indexer, Dobrica Pavlinusic <dpavlin@rot13.org> 2001-01-28 |
# indexer, Dobrica Pavlinusic <dpavlin@rot13.org> 2002-01-28 |
4 |
# options: -q quiet |
# options: -q quiet |
5 |
# -d debug |
# -d debug |
6 |
# -v verbose |
# -v verbose |
27 |
my $zakona=0; |
my $zakona=0; |
28 |
|
|
29 |
my $hr = new Lingua::Spelling::Alternative( DEBUG => $opts{d} ); |
my $hr = new Lingua::Spelling::Alternative( DEBUG => $opts{d} ); |
30 |
$hr->load_affix("$nn_dir/search/croatian.aff"); |
#$hr->load_affix("$nn_dir/search/croatian.aff"); |
31 |
|
$hr->load_findaffix("$nn_dir/prvih_50.txt"); |
32 |
|
|
33 |
|
|
34 |
#-------------------------------------------------------------------- |
#-------------------------------------------------------------------- |
74 |
$naslov_czs =~ tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/; |
$naslov_czs =~ tr/¹©ðÐèÈæƾ®/sSdDcCcCzZ/; |
75 |
$naslov_czs =~ tr/a-zA-Z/ /cs; # non a-z -> space |
$naslov_czs =~ tr/a-zA-Z/ /cs; # non a-z -> space |
76 |
$naslov_czs = join(" ",$hr->alternatives(split(/ /,$naslov_czs))); |
$naslov_czs = join(" ",$hr->alternatives(split(/ /,$naslov_czs))); |
77 |
|
# $naslov_czs = $hr->minimal(split(/ /,$naslov_czs)); |
78 |
$sth->execute($br,$god,$nr,$aname,$naslov,$naslov_czs) || die $dbh->errstr(); |
$sth->execute($br,$god,$nr,$aname,$naslov,$naslov_czs) || die $dbh->errstr(); |
79 |
$naslov=""; |
$naslov=""; |
80 |
$nr=0; |
$nr=0; |
82 |
} |
} |
83 |
|
|
84 |
if ($sadrzaj) { |
if ($sadrzaj) { |
85 |
if (s/<a href="#([^"]+)">\s*(\d+)\.\s*<[^>]+>//i) { |
if (s/<a href="#([^"]+)">\s*(\S+)\.\s*<[^>]+>//i) { |
86 |
($aname,$nr) = ($1,$2); |
($aname,$nr) = ($1,$2); |
87 |
} elsif (s/<a href="Javascript:Mojdok\((\d+),(\d+),(\w+),(\d+)\)[^>]*>//i) { |
} elsif (s/<a href="Javascript:Mojdok\((\d+),(\d+),(\w+),(\d+)\)[^>]*>//i) { |
88 |
($nr,$aname) = ($3,$4); |
($nr,$aname) = ($3,$4); |