/[Semantic-Engine]/EPrints/EPrints.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /EPrints/EPrints.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 14 - (hide annotations)
Fri Jun 29 22:54:51 2007 UTC (16 years, 10 months ago) by dpavlin
File size: 2642 byte(s)
code cleanup
1 dpavlin 1 # Dobrica Pavlinusic, <dpavlin@rot13.org> 06/28/07 23:28:21 CEST
2    
3     package EPrints;
4    
5     use Exporter 'import';
6 dpavlin 4 @EXPORT_OK = qw(_x slogovi);
7 dpavlin 1
8     use Encode qw/from_to decode_utf8 decode/;
9     use Data::Dump qw/dump/;
10 dpavlin 3 use DBI;
11 dpavlin 13 use URI::Escape;
12 dpavlin 1
13     use strict;
14     use warnings;
15    
16     my $debug = 0;
17    
18     my $connect = "DBI:mysql:dbname=eprints";
19 dpavlin 13 # path to eprints installation
20     my $eprints_archive = '/data/eprints2/archives/ffzg/documents/disk0/';
21 dpavlin 1
22     my $dbh = DBI->connect($connect,"dpavlin","") || die $DBI::errstr;
23    
24     sub dbh {
25     my $self = shift;
26     return $dbh;
27     }
28    
29     my $id;
30    
31     sub id {
32     my $self = shift;
33     if ( defined( $_[0] ) ) {
34     $id = $_[0];
35     warn "# id = $id\n" if $debug;
36     }
37     return $id;
38     }
39    
40     sub lookup {
41     my $self = shift;
42     my $field = shift;
43 dpavlin 11 my $table = shift;
44     my $where = '';
45 dpavlin 1
46 dpavlin 11 if ( ! $table ) {
47     $table = "archive_$field";
48     $where = " and lang = 'hr'";
49     }
50    
51 dpavlin 1 my $sql = qq{
52     SELECT $field
53 dpavlin 11 FROM $table
54     WHERE eprintid = $id $where
55 dpavlin 1 };
56     warn "# sql: $sql\n" if $debug;
57     my @results = map { _x( $_->{$field} ) } @{ $dbh->selectall_arrayref($sql, { Slice => {} }) };
58    
59     warn "# loookup( $field, $id ) = ", dump( @results ),$/ if $debug;
60     return join(" ", @results);
61     }
62    
63 dpavlin 12 sub fulltext {
64     my $self = shift;
65 dpavlin 13 my $fulltext = EPrints->lookup( 'fileinfo', 'archive' );
66     $fulltext =~ s/\s+$//;
67     return split(/;/, $fulltext);
68 dpavlin 12 }
69    
70 dpavlin 13 sub fulltext_content {
71     my $self = shift;
72    
73     my $path = $eprints_archive;
74    
75     my ( $type, $uri ) = EPrints->fulltext;
76     $uri =~ s!http://[^/]+/!!;
77     $uri = uri_unescape($uri);
78     if ( $uri =~ s|^(\d+)/|| ) {
79     my $nr = sprintf("%08d", $1);
80     $nr =~ s!(\d\d)!$1/!g;
81     $path .= "/$nr/$uri";
82     } else {
83     warn "can't find ID in $uri";
84     return;
85     }
86     $path =~ s!//+!/!g;
87     if ( -r $path ) {
88     print "+ $path ", -s $path, " bytes\n";
89     open(my $pdf, "pdftotext $path - | iconv -f utf-8 -t iso-8859-2 -c |") || die "can't open pdftotext $path: $!";
90     local $/;
91     my $content = <$pdf>;
92     print "\t>>", length( $content ), " text bytes\n";
93     close($pdf); # || die "can't close $path: $!";
94     return $content;
95     } else {
96     warn "ERROR: $path: $!\n";
97     }
98    
99     }
100    
101 dpavlin 1 sub _x {
102     my $v = join(" ", @_);
103     decode_utf8( $v );
104     from_to( $v, 'utf-8', 'iso-8859-2' );
105     warn "_x($v)\n" if $debug;
106     return "$v ";
107     }
108    
109 dpavlin 4 sub slogovi {
110 dpavlin 14 my $self = shift;
111 dpavlin 4 my $text = shift;
112 dpavlin 6
113     my $count = 2;
114     my $out = '';
115    
116 dpavlin 4 foreach my $w ( split(/\W*\s+\W*/, $text ) ) {
117 dpavlin 6 warn "w: $w\n" if $debug;
118     my @s;
119 dpavlin 4 while ( $w =~ s/^([^aeiou]*[aeiou])//i ) {
120 dpavlin 6 push @s, $1;
121 dpavlin 4 }
122 dpavlin 6 push @s, $w if $w;
123 dpavlin 4 warn "slogovi = ", dump( @s ), $/ if $debug;
124 dpavlin 6 foreach my $p ( 0 .. ( $#s - $count + 1 ) ) {
125     map { $out .= $s[ $p + $_ ] } 0 .. $count - 1;
126     $out .= ' ';
127     }
128 dpavlin 4 }
129 dpavlin 6 warn "$out\n" if $debug;
130     return $out;
131 dpavlin 4 }
132    
133 dpavlin 1 1;

  ViewVC Help
Powered by ViewVC 1.1.26