7 |
use lib '/home/dpavlin/stem-hr/'; |
use lib '/home/dpavlin/stem-hr/'; |
8 |
use StemHR; |
use StemHR; |
9 |
|
|
|
warn dump( StemHR->stem('kuæni') ); |
|
|
|
|
10 |
my $debug = shift @ARGV; |
my $debug = shift @ARGV; |
11 |
|
|
|
my $type = 'slogovi'; |
|
|
|
|
12 |
my $dbh = EPrints->dbh; |
my $dbh = EPrints->dbh; |
13 |
my $sth = $dbh->prepare(qq{ |
my $sth = $dbh->prepare(qq{ |
14 |
SELECT |
SELECT |
30 |
maximum_word_length => 15 ); |
maximum_word_length => 15 ); |
31 |
|
|
32 |
# use this encoding for any incoming text |
# use this encoding for any incoming text |
33 |
#$indexer->set_default_encoding( "utf8"); |
$indexer->set_default_encoding( "iso-8859-2" ); |
34 |
|
|
35 |
my $total = 0; |
my $total = 0; |
36 |
|
|
42 |
EPrints->lookup( 'abstract' ) |
EPrints->lookup( 'abstract' ) |
43 |
); |
); |
44 |
my @body = split( /\W*\s+\W*/, "$title $title $title $keywords $keywords $abstract" ); |
my @body = split( /\W*\s+\W*/, "$title $title $title $keywords $keywords $abstract" ); |
45 |
my $body; |
my $body = ''; |
46 |
foreach my $word ( @body ) { |
foreach my $word ( @body ) { |
47 |
# $body .= StemHR->stem( $word ) . ' '; |
$body .= StemHR->stem( $word ) . ' '; |
|
$body .= join(" ",EPrints::slogovi( $word )) . ' '; |
|
48 |
} |
} |
49 |
|
|
50 |
|
$body .= EPrints::slogovi( "$title $keywords $abstract" ); |
51 |
|
|
52 |
warn "body: $body\n" if $debug; |
warn "body: $body\n" if $debug; |
53 |
|
|
54 |
$indexer->index( $row->{id}, join(" ", @body, $body ) ); |
$indexer->index( $row->{id}, join(" ", @body, $body ) ); |