/[Semantic-Engine]/EPrints/index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /EPrints/index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1 - (hide annotations)
Fri Jun 29 09:08:58 2007 UTC (16 years, 5 months ago) by dpavlin
File MIME type: text/plain
File size: 1432 byte(s)
EPrints indexer and searcher for Semantic-Engine
at http://www.hirank.com/semantic-indexing-project/index.html

1 dpavlin 1 #!/usr/bin/perl -w
2    
3     use strict;
4     use Semantic::API;
5     use DBI;
6     use Data::Dump qw/dump/;
7     use EPrints qw/_x/;
8     use lib '/home/dpavlin/stem-hr/';
9     use StemHR;
10    
11     warn dump( StemHR->stem('ku├Žni') );
12    
13     my $debug = shift @ARGV;
14    
15     my $dbh = EPrints->dbh;
16     my $sth = $dbh->prepare(qq{
17     SELECT
18     archive_title.eprintid as id,
19     title
20     FROM archive_title
21     }) || die $dbh->errstr();
22     $sth->execute() || die $sth->errstr();
23    
24     my $indexer = Semantic::API::Index->new(
25     storage => 'sqlite',
26     database => 'eprints.db',
27     collection => 'EPrints'
28     );
29    
30    
31     $indexer->add_word_filters( minimum_length => 3,
32     too_many_numbers => 10,
33     maximum_word_length => 15 );
34    
35     # use this encoding for any incoming text
36     $indexer->set_default_encoding( "utf8");
37    
38     my $total = 0;
39    
40     while (my $row = $sth->fetchrow_hashref ) {
41     EPrints->id( $row->{id} );
42     my ( $title, $keywords, $abstract ) = (
43     _x( $row->{title} ),
44     EPrints->lookup( 'keywords' ),
45     EPrints->lookup( 'abstract' )
46     );
47     my @body = split( /\W*\s+\W*/, "$title $title $title $keywords $keywords $abstract" );
48     my $body;
49     foreach my $word ( @body ) {
50     $body .= StemHR->stem( $word ) . ' ';
51     }
52    
53     warn "body: $body\n" if $debug;
54    
55     $indexer->index( $row->{id}, join(" ", @body, $body ) );
56     $total++;
57     print STDERR _x( $row->{id}, " ", $row->{title} ), "\n";
58     }
59    
60    
61     print STDERR "\nNow adding $total items to the database...";
62     $indexer->finish();
63     print STDERR "done!\n";
64    

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26