14 |
|
|
15 |
package WAIT::Document::Ora; |
package WAIT::Document::Ora; |
16 |
@ISA = qw(WAIT::Document::Base); |
@ISA = qw(WAIT::Document::Base); |
17 |
require WAIT::Document::Base; |
use WAIT::Document::Base; |
18 |
|
|
19 |
use IO::File; |
use IO::File; |
20 |
|
use Encode; |
21 |
use strict; |
use strict; |
22 |
use Carp; |
use Carp; |
23 |
|
|
52 |
local($/) = undef; |
local($/) = undef; |
53 |
|
|
54 |
my $fh = IO::File->new(join('/',$self->{Dir},$file,'desc.html')); |
my $fh = IO::File->new(join('/',$self->{Dir},$file,'desc.html')); |
55 |
my $desc = $fh->getline(); |
my $desc = conv_getline($fh); |
56 |
$fh = IO::File->new(join('/',$self->{Dir},$file,'author.html')); |
$fh = IO::File->new(join('/',$self->{Dir},$file,'author.html')); |
57 |
my $author = $fh->getline() if $fh; |
my $author = conv_getline($fh) if $fh; |
58 |
$fh = IO::File->new(join('/',$self->{Dir},$file,'index.html')); |
$fh = IO::File->new(join('/',$self->{Dir},$file,'index.html')); |
59 |
my $index = $fh->getline() if $fh; |
my $index = conv_getline($fh) if $fh; |
60 |
$fh = IO::File->new(join('/',$self->{Dir},$file,'colophon.html')); |
$fh = IO::File->new(join('/',$self->{Dir},$file,'colophon.html')); |
61 |
my $colophon = $fh->getline() if $fh; |
my $colophon = conv_getline($fh) if $fh; |
62 |
return { |
return { |
63 |
desc => $desc, |
desc => $desc, |
64 |
author => $author, |
author => $author, |
67 |
}; |
}; |
68 |
} |
} |
69 |
|
|
70 |
|
# WAIT::Document::Ora::conv_getline |
71 |
|
sub conv_getline ($) { |
72 |
|
my($fh) = shift; |
73 |
|
local $/ = "\n"; |
74 |
|
my $firstline = <$fh>; |
75 |
|
my $src_enc; |
76 |
|
# \042 is double quote, \047 is single quote. I avoid single quotes |
77 |
|
# here just for easier copy and paste to the terminal (I need to |
78 |
|
# debug here frequently) |
79 |
|
if ($firstline =~ /<\?xml[^>]+encoding\s*=([\042\047])([\w\-]+)\1/) { |
80 |
|
$src_enc = $2; |
81 |
|
} else { |
82 |
|
$src_enc = "ISO-8859-1"; |
83 |
|
} |
84 |
|
seek $fh, 0, 0; |
85 |
|
undef $/; |
86 |
|
my $content = <$fh>; |
87 |
|
$content =~ s/\s+/ /gs; # eliminate TABs and CRs for easier debugging |
88 |
|
my $dcontent = Encode::decode($src_enc,$content); |
89 |
|
$dcontent; |
90 |
|
} |
91 |
|
|
92 |
sub FIRSTKEY { |
sub FIRSTKEY { |
93 |
my $self = shift; |
my $self = shift; |
94 |
$self->{fno} = 0; |
$self->{fno} = 0; |