/[Semantic-Engine]/EPrints/index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /EPrints/index.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 4 - (show annotations)
Fri Jun 29 09:52:53 2007 UTC (16 years, 5 months ago) by dpavlin
File MIME type: text/plain
File size: 1502 byte(s)
added slogovi which separate words into positional dependent variable length
(until next aeiou)
1 #!/usr/bin/perl -w
2
3 use strict;
4 use Semantic::API;
5 use Data::Dump qw/dump/;
6 use EPrints qw/_x/;
7 use lib '/home/dpavlin/stem-hr/';
8 use StemHR;
9
10 warn dump( StemHR->stem('ku├Žni') );
11
12 my $debug = shift @ARGV;
13
14 my $type = 'slogovi';
15
16 my $dbh = EPrints->dbh;
17 my $sth = $dbh->prepare(qq{
18 SELECT
19 archive_title.eprintid as id,
20 title
21 FROM archive_title
22 }) || die $dbh->errstr();
23 $sth->execute() || die $sth->errstr();
24
25 my $indexer = Semantic::API::Index->new(
26 storage => 'sqlite',
27 database => 'eprints.db',
28 collection => 'EPrints'
29 );
30
31
32 $indexer->add_word_filters( minimum_length => 3,
33 too_many_numbers => 10,
34 maximum_word_length => 15 );
35
36 # use this encoding for any incoming text
37 #$indexer->set_default_encoding( "utf8");
38
39 my $total = 0;
40
41 while (my $row = $sth->fetchrow_hashref ) {
42 EPrints->id( $row->{id} );
43 my ( $title, $keywords, $abstract ) = (
44 _x( $row->{title} ),
45 EPrints->lookup( 'keywords' ),
46 EPrints->lookup( 'abstract' )
47 );
48 my @body = split( /\W*\s+\W*/, "$title $title $title $keywords $keywords $abstract" );
49 my $body;
50 foreach my $word ( @body ) {
51 # $body .= StemHR->stem( $word ) . ' ';
52 $body .= join(" ",EPrints::slogovi( $word )) . ' ';
53 }
54
55 warn "body: $body\n" if $debug;
56
57 $indexer->index( $row->{id}, join(" ", @body, $body ) );
58 $total++;
59 print STDERR _x( $row->{id}, " ", $row->{title} ), "\n";
60 }
61
62
63 print STDERR "\nNow adding $total items to the database...";
64 $indexer->finish();
65 print STDERR "done!\n";
66

Properties

Name Value
svn:executable

  ViewVC Help
Powered by ViewVC 1.1.26