/[Semantic-Engine]/EPrints/index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /EPrints/index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 13 - (hide annotations)
Fri Jun 29 18:46:45 2007 UTC (16 years, 10 months ago) by dpavlin
File MIME type: text/plain
File size: 1496 byte(s)
added EPrints->fulltext_content
1 dpavlin 1 #!/usr/bin/perl -w
2    
3     use strict;
4     use Semantic::API;
5     use Data::Dump qw/dump/;
6 dpavlin 13
7 dpavlin 1 use EPrints qw/_x/;
8 dpavlin 13
9 dpavlin 1 use lib '/home/dpavlin/stem-hr/';
10     use StemHR;
11    
12     my $debug = shift @ARGV;
13    
14     my $dbh = EPrints->dbh;
15     my $sth = $dbh->prepare(qq{
16     SELECT
17     archive_title.eprintid as id,
18     title
19     FROM archive_title
20     }) || die $dbh->errstr();
21     $sth->execute() || die $sth->errstr();
22    
23     my $indexer = Semantic::API::Index->new(
24     storage => 'sqlite',
25     database => 'eprints.db',
26     collection => 'EPrints'
27     );
28    
29    
30     $indexer->add_word_filters( minimum_length => 3,
31     too_many_numbers => 10,
32     maximum_word_length => 15 );
33    
34     # use this encoding for any incoming text
35 dpavlin 8 $indexer->set_default_encoding( "iso-8859-2" );
36 dpavlin 1
37     my $total = 0;
38    
39     while (my $row = $sth->fetchrow_hashref ) {
40     EPrints->id( $row->{id} );
41     my ( $title, $keywords, $abstract ) = (
42     _x( $row->{title} ),
43     EPrints->lookup( 'keywords' ),
44     EPrints->lookup( 'abstract' )
45     );
46     my @body = split( /\W*\s+\W*/, "$title $title $title $keywords $keywords $abstract" );
47 dpavlin 8 my $body = '';
48 dpavlin 1 foreach my $word ( @body ) {
49 dpavlin 8 $body .= StemHR->stem( $word ) . ' ';
50 dpavlin 1 }
51    
52 dpavlin 8 $body .= EPrints::slogovi( "$title $keywords $abstract" );
53    
54 dpavlin 1 warn "body: $body\n" if $debug;
55    
56 dpavlin 13 $body .= EPrints->fulltext_content;
57    
58     $indexer->index( $row->{id}, join(" ", @body, $body ) );
59 dpavlin 1 $total++;
60     print STDERR _x( $row->{id}, " ", $row->{title} ), "\n";
61     }
62    
63    
64     print STDERR "\nNow adding $total items to the database...";
65     $indexer->finish();
66     print STDERR "done!\n";
67    

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26