/[Semantic-Engine]/EPrints/EPrints.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /EPrints/EPrints.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 13 - (show annotations)
Fri Jun 29 18:46:45 2007 UTC (16 years, 10 months ago) by dpavlin
File size: 2623 byte(s)
added EPrints->fulltext_content
1 # Dobrica Pavlinusic, <dpavlin@rot13.org> 06/28/07 23:28:21 CEST
2
3 package EPrints;
4
5 use Exporter 'import';
6 @EXPORT_OK = qw(_x slogovi);
7
8 use Encode qw/from_to decode_utf8 decode/;
9 use Data::Dump qw/dump/;
10 use DBI;
11 use URI::Escape;
12
13 use strict;
14 use warnings;
15
16 my $debug = 0;
17
18 my $connect = "DBI:mysql:dbname=eprints";
19 # path to eprints installation
20 my $eprints_archive = '/data/eprints2/archives/ffzg/documents/disk0/';
21
22 my $dbh = DBI->connect($connect,"dpavlin","") || die $DBI::errstr;
23
24 sub dbh {
25 my $self = shift;
26 return $dbh;
27 }
28
29 my $id;
30
31 sub id {
32 my $self = shift;
33 if ( defined( $_[0] ) ) {
34 $id = $_[0];
35 warn "# id = $id\n" if $debug;
36 }
37 return $id;
38 }
39
40 sub lookup {
41 my $self = shift;
42 my $field = shift;
43 my $table = shift;
44 my $where = '';
45
46 if ( ! $table ) {
47 $table = "archive_$field";
48 $where = " and lang = 'hr'";
49 }
50
51 my $sql = qq{
52 SELECT $field
53 FROM $table
54 WHERE eprintid = $id $where
55 };
56 warn "# sql: $sql\n" if $debug;
57 my @results = map { _x( $_->{$field} ) } @{ $dbh->selectall_arrayref($sql, { Slice => {} }) };
58
59 warn "# loookup( $field, $id ) = ", dump( @results ),$/ if $debug;
60 return join(" ", @results);
61 }
62
63 sub fulltext {
64 my $self = shift;
65 my $fulltext = EPrints->lookup( 'fileinfo', 'archive' );
66 $fulltext =~ s/\s+$//;
67 return split(/;/, $fulltext);
68 }
69
70 sub fulltext_content {
71 my $self = shift;
72
73 my $path = $eprints_archive;
74
75 my ( $type, $uri ) = EPrints->fulltext;
76 $uri =~ s!http://[^/]+/!!;
77 $uri = uri_unescape($uri);
78 if ( $uri =~ s|^(\d+)/|| ) {
79 my $nr = sprintf("%08d", $1);
80 $nr =~ s!(\d\d)!$1/!g;
81 $path .= "/$nr/$uri";
82 } else {
83 warn "can't find ID in $uri";
84 return;
85 }
86 $path =~ s!//+!/!g;
87 if ( -r $path ) {
88 print "+ $path ", -s $path, " bytes\n";
89 open(my $pdf, "pdftotext $path - | iconv -f utf-8 -t iso-8859-2 -c |") || die "can't open pdftotext $path: $!";
90 local $/;
91 my $content = <$pdf>;
92 print "\t>>", length( $content ), " text bytes\n";
93 close($pdf); # || die "can't close $path: $!";
94 return $content;
95 } else {
96 warn "ERROR: $path: $!\n";
97 }
98
99 }
100
101 sub _x {
102 my $v = join(" ", @_);
103 decode_utf8( $v );
104 from_to( $v, 'utf-8', 'iso-8859-2' );
105 warn "_x($v)\n" if $debug;
106 return "$v ";
107 }
108
109 sub slogovi {
110 my $text = shift;
111
112 my $count = 2;
113 my $out = '';
114
115 foreach my $w ( split(/\W*\s+\W*/, $text ) ) {
116 warn "w: $w\n" if $debug;
117 my @s;
118 while ( $w =~ s/^([^aeiou]*[aeiou])//i ) {
119 push @s, $1;
120 }
121 push @s, $w if $w;
122 warn "slogovi = ", dump( @s ), $/ if $debug;
123 foreach my $p ( 0 .. ( $#s - $count + 1 ) ) {
124 map { $out .= $s[ $p + $_ ] } 0 .. $count - 1;
125 $out .= ' ';
126 }
127 }
128 warn "$out\n" if $debug;
129 return $out;
130 }
131
132 1;

  ViewVC Help
Powered by ViewVC 1.1.26