--- EPrints/index.pl 2007/06/29 22:54:51 14 +++ EPrints/index.pl 2007/06/30 13:46:51 17 @@ -6,11 +6,12 @@ use EPrints qw/_x/; -use lib '/home/dpavlin/stem-hr/'; -use StemHR; - my $debug = shift @ARGV; -my $use_score = 0; +my $use = { + score => 1, + stem => 1, + slogovi => 1, +}; my $dbh = EPrints->dbh; my $sth = $dbh->prepare(qq{ @@ -18,6 +19,8 @@ archive_title.eprintid as id, title FROM archive_title +WHERE + lang = 'hr' }) || die $dbh->errstr(); $sth->execute() || die $sth->errstr(); @@ -38,28 +41,48 @@ my $total = 0; while (my $row = $sth->fetchrow_hashref ) { - EPrints->id( $row->{id} ); + my $id = $row->{id}; + EPrints->id( $id ); my $parts = { - title => [ _x( $row->{title} ), 4 ], - keywords => [ EPrints->lookup( 'keywords' ), 3 ], - abstract => [ EPrints->lookup( 'abstract' ), 2 ], + title => [ _x( $row->{title} ), 3 ], + keywords => [ EPrints->lookup( 'keywords' ), 2 ], + abstract => [ EPrints->lookup( 'abstract' ), 1 ], # content => [ EPrints->fulltext_content, 1 ], }; + my $skip = 0; + foreach my $part ( qw/title keywords abstract/ ) { + if ( ! $parts->{$part}->[0] ) { + warn "skipped $id doesn't have required part $part\n"; + $skip = 1; + last; + } + } + next if $skip; + my $body = ''; foreach my $part ( qw/title keywords abstract content/ ) { - my $content = $parts->{$part}->[0]; - next unless defined $content; - -# $content = StemHR->stem( $content ); - $content = EPrints->slogovi( $content ); + my $content = $parts->{$part}->[0] || next; + + if ( $use->{slogovi} ) { + $body .= ' ' . EPrints->slogovi( $content ); + } + + if ( $use->{stem} ) { + my $stem = EPrints->stem( $content ); + warn "stem of '$content' didn't return anything\n" unless $stem; + $content = $stem; + } - if ( $use_score ) { - map { $body .= $content } 1 .. $parts->{$part}->[1]; + + if ( $use->{score} ) { + map { $body .= "$content " } 1 .. $parts->{$part}->[1]; } else { - $body .= $content; + $body .= "$content "; } + + warn ">>> $body <<<\n" if $debug; } $indexer->index( $row->{id}, $body );