4 |
# Author : Ulrich Pfeifer |
# Author : Ulrich Pfeifer |
5 |
# Created On : Mon Sep 16 19:04:37 1996 |
# Created On : Mon Sep 16 19:04:37 1996 |
6 |
# Last Modified By: Ulrich Pfeifer |
# Last Modified By: Ulrich Pfeifer |
7 |
# Last Modified On: Mon Dec 31 14:45:13 2001 |
# Last Modified On: Fri Jan 4 15:56:11 2002 |
8 |
# Language : CPerl |
# Language : CPerl |
9 |
# Update Count : 84 |
# Update Count : 86 |
10 |
# Status : Unknown, Use with caution! |
# Status : Unknown, Use with caution! |
11 |
# |
# |
12 |
# Copyright (c) 1996-1997, Ulrich Pfeifer |
# Copyright (c) 1996-1997, Ulrich Pfeifer |
13 |
# |
# |
14 |
|
|
15 |
package WAIT::Document::Ora; |
package WAIT::Document::Ora; |
16 |
@ISA = qw(WAIT::Document::Base); |
use base qw(WAIT::Document::Base); |
|
require WAIT::Document::Base; |
|
17 |
|
|
18 |
use IO::File; |
use IO::File; |
19 |
|
use Encode; |
20 |
use strict; |
use strict; |
21 |
use Carp; |
use Carp; |
22 |
|
|
26 |
my @files; |
my @files; |
27 |
|
|
28 |
opendir(DIR, $dir) or croak "Could not open '$dir': $!"; |
opendir(DIR, $dir) or croak "Could not open '$dir': $!"; |
29 |
for my $entry (readdir DIR) { |
DIRENT: for my $entry (readdir DIR) { |
30 |
if (-f "$dir/$entry/desc.html") { |
if (-f "$dir/$entry/desc.html") { |
31 |
|
my $index = "$dir/$entry/index.html"; |
32 |
|
open F, $index or Carp::confess("Could not open $index: $!"); |
33 |
|
local $/; |
34 |
|
my $content = <F>; |
35 |
|
next DIRENT unless $content =~ m|<div|s; |
36 |
push @files, $entry; |
push @files, $entry; |
37 |
} |
} |
38 |
} |
} |
39 |
|
closedir DIR; |
40 |
my $self = { |
my $self = { |
41 |
Dir => $dir, |
Dir => $dir, |
42 |
Files => \@files |
Files => \@files |
46 |
|
|
47 |
sub FETCH { |
sub FETCH { |
48 |
my $self = shift; |
my $self = shift; |
49 |
my $file = shift; |
my $id = shift; |
50 |
|
|
51 |
local($/) = undef; |
local($/) = undef; |
52 |
|
|
53 |
my $fh = IO::File->new(join('/',$self->{Dir},$file,'desc.html')); |
my $ret = {}; |
54 |
my $desc = $fh->getline(); |
my @p = qw( |
55 |
$fh = IO::File->new(join('/',$self->{Dir},$file,'author.html')); |
author |
56 |
my $author = $fh->getline(); |
colophon |
57 |
return { desc => $desc, author => $author}; |
desc |
58 |
|
index |
59 |
|
inx |
60 |
|
toc |
61 |
|
translator |
62 |
|
); |
63 |
|
push @p, "chapter" if oreilly_de_catalog::config::WITH_CHAPTER(); |
64 |
|
for my $p (@p) { |
65 |
|
my $file = $p eq "chapter" ? "chapter/index" : $p; |
66 |
|
$ret->{$p} = $self->conv_getline("$id/$file.html"); |
67 |
|
} |
68 |
|
return $ret; |
69 |
|
} |
70 |
|
|
71 |
|
# WAIT::Document::Ora::conv_getline |
72 |
|
sub conv_getline { |
73 |
|
my($self) = shift; |
74 |
|
my($file) = shift; |
75 |
|
my $fh = IO::File->new("$self->{Dir}/$file") or return ""; |
76 |
|
|
77 |
|
local $/ = "\n"; |
78 |
|
my $firstline = <$fh>; |
79 |
|
my $src_enc; |
80 |
|
# \042 is double quote, \047 is single quote. I avoid single quotes |
81 |
|
# here just for easier copy and paste to the terminal (I need to |
82 |
|
# debug here frequently) |
83 |
|
if ($firstline =~ /<\?xml[^>]+encoding\s*=([\042\047])([\w\-]+)\1/) { |
84 |
|
$src_enc = $2; |
85 |
|
} else { |
86 |
|
$src_enc = "ISO-8859-1"; |
87 |
|
} |
88 |
|
seek $fh, 0, 0; |
89 |
|
undef $/; |
90 |
|
my $content = <$fh>; |
91 |
|
close $fh; |
92 |
|
$content =~ s/\s+/ /gs; # eliminate TABs and CRs for easier debugging |
93 |
|
my $dcontent = Encode::decode($src_enc,$content); |
94 |
|
unless (utf8::valid($dcontent)) { |
95 |
|
warn "utf8 says invalid"; |
96 |
|
} |
97 |
|
unless (Encode::is_utf8($dcontent)) { |
98 |
|
warn "Encode says this isn't utf8"; |
99 |
|
} |
100 |
|
$dcontent; |
101 |
} |
} |
102 |
|
|
103 |
sub FIRSTKEY { |
sub FIRSTKEY { |