/[Semantic-Engine]/EPrints/index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /EPrints/index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 14 - (hide annotations)
Fri Jun 29 22:54:51 2007 UTC (16 years, 9 months ago) by dpavlin
File MIME type: text/plain
File size: 1656 byte(s)
code cleanup
1 dpavlin 1 #!/usr/bin/perl -w
2    
3     use strict;
4     use Semantic::API;
5     use Data::Dump qw/dump/;
6 dpavlin 13
7 dpavlin 1 use EPrints qw/_x/;
8 dpavlin 13
9 dpavlin 1 use lib '/home/dpavlin/stem-hr/';
10     use StemHR;
11    
12     my $debug = shift @ARGV;
13 dpavlin 14 my $use_score = 0;
14 dpavlin 1
15     my $dbh = EPrints->dbh;
16     my $sth = $dbh->prepare(qq{
17     SELECT
18     archive_title.eprintid as id,
19     title
20     FROM archive_title
21     }) || die $dbh->errstr();
22     $sth->execute() || die $sth->errstr();
23    
24     my $indexer = Semantic::API::Index->new(
25     storage => 'sqlite',
26     database => 'eprints.db',
27     collection => 'EPrints'
28     );
29    
30    
31     $indexer->add_word_filters( minimum_length => 3,
32     too_many_numbers => 10,
33     maximum_word_length => 15 );
34    
35     # use this encoding for any incoming text
36 dpavlin 8 $indexer->set_default_encoding( "iso-8859-2" );
37 dpavlin 1
38     my $total = 0;
39    
40     while (my $row = $sth->fetchrow_hashref ) {
41     EPrints->id( $row->{id} );
42 dpavlin 14 my $parts = {
43     title => [ _x( $row->{title} ), 4 ],
44     keywords => [ EPrints->lookup( 'keywords' ), 3 ],
45     abstract => [ EPrints->lookup( 'abstract' ), 2 ],
46     # content => [ EPrints->fulltext_content, 1 ],
47     };
48    
49 dpavlin 8 my $body = '';
50 dpavlin 1
51 dpavlin 14 foreach my $part ( qw/title keywords abstract content/ ) {
52     my $content = $parts->{$part}->[0];
53     next unless defined $content;
54    
55     # $content = StemHR->stem( $content );
56     $content = EPrints->slogovi( $content );
57 dpavlin 8
58 dpavlin 14 if ( $use_score ) {
59     map { $body .= $content } 1 .. $parts->{$part}->[1];
60     } else {
61     $body .= $content;
62     }
63     }
64 dpavlin 1
65 dpavlin 14 $indexer->index( $row->{id}, $body );
66 dpavlin 1 $total++;
67 dpavlin 14 print STDERR "$total: ", $row->{id}, " ", _x( $row->{title} ), " - ", length($body), " bytes\n";
68 dpavlin 1 }
69    
70    
71     print STDERR "\nNow adding $total items to the database...";
72     $indexer->finish();
73     print STDERR "done!\n";
74    

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26