3 |
use strict; |
use strict; |
4 |
use Semantic::API; |
use Semantic::API; |
5 |
use Data::Dump qw/dump/; |
use Data::Dump qw/dump/; |
6 |
|
|
7 |
use EPrints qw/_x/; |
use EPrints qw/_x/; |
8 |
|
|
9 |
use lib '/home/dpavlin/stem-hr/'; |
use lib '/home/dpavlin/stem-hr/'; |
10 |
use StemHR; |
use StemHR; |
11 |
|
|
|
warn dump( StemHR->stem('kuæni') ); |
|
|
|
|
12 |
my $debug = shift @ARGV; |
my $debug = shift @ARGV; |
13 |
|
|
14 |
my $dbh = EPrints->dbh; |
my $dbh = EPrints->dbh; |
32 |
maximum_word_length => 15 ); |
maximum_word_length => 15 ); |
33 |
|
|
34 |
# use this encoding for any incoming text |
# use this encoding for any incoming text |
35 |
$indexer->set_default_encoding( "utf8"); |
$indexer->set_default_encoding( "iso-8859-2" ); |
36 |
|
|
37 |
my $total = 0; |
my $total = 0; |
38 |
|
|
44 |
EPrints->lookup( 'abstract' ) |
EPrints->lookup( 'abstract' ) |
45 |
); |
); |
46 |
my @body = split( /\W*\s+\W*/, "$title $title $title $keywords $keywords $abstract" ); |
my @body = split( /\W*\s+\W*/, "$title $title $title $keywords $keywords $abstract" ); |
47 |
my $body; |
my $body = ''; |
48 |
foreach my $word ( @body ) { |
foreach my $word ( @body ) { |
49 |
$body .= StemHR->stem( $word ) . ' '; |
$body .= StemHR->stem( $word ) . ' '; |
50 |
} |
} |
51 |
|
|
52 |
|
$body .= EPrints::slogovi( "$title $keywords $abstract" ); |
53 |
|
|
54 |
warn "body: $body\n" if $debug; |
warn "body: $body\n" if $debug; |
55 |
|
|
56 |
$indexer->index( $row->{id}, join(" ", @body, $body ) ); |
$body .= EPrints->fulltext_content; |
57 |
|
|
58 |
|
$indexer->index( $row->{id}, join(" ", @body, $body ) ); |
59 |
$total++; |
$total++; |
60 |
print STDERR _x( $row->{id}, " ", $row->{title} ), "\n"; |
print STDERR _x( $row->{id}, " ", $row->{title} ), "\n"; |
61 |
} |
} |