--- EPrints/index.pl 2007/06/29 18:46:45 13 +++ EPrints/index.pl 2007/06/29 22:54:51 14 @@ -10,6 +10,7 @@ use StemHR; my $debug = shift @ARGV; +my $use_score = 0; my $dbh = EPrints->dbh; my $sth = $dbh->prepare(qq{ @@ -38,26 +39,32 @@ while (my $row = $sth->fetchrow_hashref ) { EPrints->id( $row->{id} ); - my ( $title, $keywords, $abstract ) = ( - _x( $row->{title} ), - EPrints->lookup( 'keywords' ), - EPrints->lookup( 'abstract' ) - ); - my @body = split( /\W*\s+\W*/, "$title $title $title $keywords $keywords $abstract" ); - my $body = ''; - foreach my $word ( @body ) { - $body .= StemHR->stem( $word ) . ' '; - } - - $body .= EPrints::slogovi( "$title $keywords $abstract" ); + my $parts = { + title => [ _x( $row->{title} ), 4 ], + keywords => [ EPrints->lookup( 'keywords' ), 3 ], + abstract => [ EPrints->lookup( 'abstract' ), 2 ], +# content => [ EPrints->fulltext_content, 1 ], + }; - warn "body: $body\n" if $debug; + my $body = ''; - $body .= EPrints->fulltext_content; + foreach my $part ( qw/title keywords abstract content/ ) { + my $content = $parts->{$part}->[0]; + next unless defined $content; + +# $content = StemHR->stem( $content ); + $content = EPrints->slogovi( $content ); + + if ( $use_score ) { + map { $body .= $content } 1 .. $parts->{$part}->[1]; + } else { + $body .= $content; + } + } - $indexer->index( $row->{id}, join(" ", @body, $body ) ); + $indexer->index( $row->{id}, $body ); $total++; - print STDERR _x( $row->{id}, " ", $row->{title} ), "\n"; + print STDERR "$total: ", $row->{id}, " ", _x( $row->{title} ), " - ", length($body), " bytes\n"; }