--- EPrints/index.pl 2007/06/29 09:08:58 1 +++ EPrints/index.pl 2007/06/29 14:53:14 8 @@ -2,14 +2,11 @@ use strict; use Semantic::API; -use DBI; use Data::Dump qw/dump/; use EPrints qw/_x/; use lib '/home/dpavlin/stem-hr/'; use StemHR; -warn dump( StemHR->stem('kuæni') ); - my $debug = shift @ARGV; my $dbh = EPrints->dbh; @@ -33,7 +30,7 @@ maximum_word_length => 15 ); # use this encoding for any incoming text -$indexer->set_default_encoding( "utf8"); +$indexer->set_default_encoding( "iso-8859-2" ); my $total = 0; @@ -45,11 +42,13 @@ EPrints->lookup( 'abstract' ) ); my @body = split( /\W*\s+\W*/, "$title $title $title $keywords $keywords $abstract" ); - my $body; + my $body = ''; foreach my $word ( @body ) { $body .= StemHR->stem( $word ) . ' '; } + $body .= EPrints::slogovi( "$title $keywords $abstract" ); + warn "body: $body\n" if $debug; $indexer->index( $row->{id}, join(" ", @body, $body ) );