/[Semantic-Engine]/EPrints/index.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /EPrints/index.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 4 by dpavlin, Fri Jun 29 09:52:53 2007 UTC revision 14 by dpavlin, Fri Jun 29 22:54:51 2007 UTC
# Line 3  Line 3 
3  use strict;  use strict;
4  use Semantic::API;  use Semantic::API;
5  use Data::Dump qw/dump/;  use Data::Dump qw/dump/;
6    
7  use EPrints qw/_x/;  use EPrints qw/_x/;
8    
9  use lib '/home/dpavlin/stem-hr/';  use lib '/home/dpavlin/stem-hr/';
10  use StemHR;  use StemHR;
11    
 warn dump( StemHR->stem('kuæni') );  
   
12  my $debug = shift @ARGV;  my $debug = shift @ARGV;
13    my $use_score = 0;
 my $type = 'slogovi';  
14    
15  my $dbh = EPrints->dbh;  my $dbh = EPrints->dbh;
16  my $sth = $dbh->prepare(qq{  my $sth = $dbh->prepare(qq{
# Line 34  $indexer->add_word_filters( minimum_leng Line 33  $indexer->add_word_filters( minimum_leng
33                              maximum_word_length => 15 );                              maximum_word_length => 15 );
34    
35  # use this encoding for any incoming text  # use this encoding for any incoming text
36  #$indexer->set_default_encoding( "utf8");  $indexer->set_default_encoding( "iso-8859-2" );
37    
38  my $total = 0;  my $total = 0;
39    
40  while (my $row = $sth->fetchrow_hashref ) {  while (my $row = $sth->fetchrow_hashref ) {
41          EPrints->id( $row->{id} );          EPrints->id( $row->{id} );
42          my ( $title, $keywords, $abstract ) = (          my $parts = {
43                  _x( $row->{title} ),                  title => [ _x( $row->{title} ), 4 ],
44                  EPrints->lookup( 'keywords' ),                  keywords => [ EPrints->lookup( 'keywords' ), 3 ],
45                  EPrints->lookup( 'abstract' )                  abstract => [ EPrints->lookup( 'abstract' ), 2 ],
46          );  #               content => [ EPrints->fulltext_content, 1 ],
47          my @body = split( /\W*\s+\W*/, "$title $title $title $keywords $keywords $abstract" );          };
48          my $body;  
49          foreach my $word ( @body ) {          my $body = '';
50  #               $body .= StemHR->stem( $word ) . ' ';  
51                  $body .= join(" ",EPrints::slogovi( $word )) . ' ';          foreach my $part ( qw/title keywords abstract content/ ) {
52                    my $content = $parts->{$part}->[0];
53                    next unless defined $content;
54                    
55    #               $content = StemHR->stem( $content );
56                    $content = EPrints->slogovi( $content );
57    
58                    if ( $use_score ) {
59                            map { $body .= $content } 1 .. $parts->{$part}->[1];
60                    } else {
61                            $body .= $content;
62                    }
63          }          }
64    
65          warn "body: $body\n" if $debug;          $indexer->index( $row->{id}, $body );
   
     $indexer->index( $row->{id}, join(" ", @body, $body ) );  
66          $total++;          $total++;
67          print STDERR _x( $row->{id}, " ", $row->{title} ), "\n";          print STDERR "$total: ", $row->{id}, " ", _x( $row->{title} ), " - ", length($body), " bytes\n";
68  }  }
69    
70    

Legend:
Removed from v.4  
changed lines
  Added in v.14

  ViewVC Help
Powered by ViewVC 1.1.26