/[Semantic-Engine]/EPrints/EPrints.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /EPrints/EPrints.pm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 12 by dpavlin, Fri Jun 29 17:03:54 2007 UTC revision 13 by dpavlin, Fri Jun 29 18:46:45 2007 UTC
# Line 8  use Exporter 'import'; Line 8  use Exporter 'import';
8  use Encode qw/from_to decode_utf8 decode/;  use Encode qw/from_to decode_utf8 decode/;
9  use Data::Dump qw/dump/;  use Data::Dump qw/dump/;
10  use DBI;  use DBI;
11    use URI::Escape;
12    
13  use strict;  use strict;
14  use warnings;  use warnings;
# Line 15  use warnings; Line 16  use warnings;
16  my $debug = 0;  my $debug = 0;
17    
18  my $connect = "DBI:mysql:dbname=eprints";  my $connect = "DBI:mysql:dbname=eprints";
19    # path to eprints installation
20    my $eprints_archive = '/data/eprints2/archives/ffzg/documents/disk0/';
21    
22  my $dbh = DBI->connect($connect,"dpavlin","") || die $DBI::errstr;  my $dbh = DBI->connect($connect,"dpavlin","") || die $DBI::errstr;
23    
# Line 59  sub lookup { Line 62  sub lookup {
62    
63  sub fulltext {  sub fulltext {
64          my $self = shift;          my $self = shift;
65          return split(/;/, EPrints->lookup( 'fileinfo', 'archive' ));          my $fulltext = EPrints->lookup( 'fileinfo', 'archive' );
66            $fulltext =~ s/\s+$//;
67            return split(/;/, $fulltext);
68    }
69    
70    sub fulltext_content {
71            my $self = shift;
72    
73            my $path = $eprints_archive;
74    
75            my ( $type, $uri ) = EPrints->fulltext;
76            $uri =~ s!http://[^/]+/!!;
77            $uri = uri_unescape($uri);
78            if ( $uri =~ s|^(\d+)/|| ) {
79                    my $nr = sprintf("%08d", $1);
80                    $nr =~ s!(\d\d)!$1/!g;
81                    $path .= "/$nr/$uri";
82            } else {
83                    warn "can't find ID in $uri";
84                    return;
85            }
86            $path =~ s!//+!/!g;
87            if ( -r $path ) {
88                    print "+ $path ", -s $path, " bytes\n";
89                    open(my $pdf, "pdftotext $path - | iconv -f utf-8 -t iso-8859-2 -c |") || die "can't open pdftotext $path: $!";
90                    local $/;
91                    my $content = <$pdf>;
92                    print "\t>>", length( $content ), " text bytes\n";
93                    close($pdf); # || die "can't close $path: $!";
94                    return $content;
95            } else {
96                    warn "ERROR: $path: $!\n";
97            }
98    
99  }  }
100    
101  sub _x {  sub _x {

Legend:
Removed from v.12  
changed lines
  Added in v.13

  ViewVC Help
Powered by ViewVC 1.1.26