1 |
# -*- Mode: Cperl -*- |
2 |
# Nroff.pm -- |
3 |
# ITIID : $ITI$ $Header $__Header$ |
4 |
# Author : Ulrich Pfeifer |
5 |
# Created On : Mon Sep 16 19:04:37 1996 |
6 |
# Last Modified By: Ulrich Pfeifer |
7 |
# Last Modified On: Fri Jan 4 15:56:11 2002 |
8 |
# Language : CPerl |
9 |
# Update Count : 86 |
10 |
# Status : Unknown, Use with caution! |
11 |
# |
12 |
# Copyright (c) 1996-1997, Ulrich Pfeifer |
13 |
# |
14 |
|
15 |
package WAIT::Document::Ora; |
16 |
use base qw(WAIT::Document::Base); |
17 |
|
18 |
use IO::File; |
19 |
use Encode; |
20 |
use strict; |
21 |
use Carp; |
22 |
|
23 |
sub TIEHASH { |
24 |
my $type = shift; |
25 |
my $dir = shift; |
26 |
my @files; |
27 |
|
28 |
opendir(DIR, $dir) or croak "Could not open '$dir': $!"; |
29 |
DIRENT: for my $entry (readdir DIR) { |
30 |
if (-f "$dir/$entry/desc.html") { |
31 |
my $index = "$dir/$entry/index.html"; |
32 |
open F, $index or Carp::confess("Could not open $index: $!"); |
33 |
local $/; |
34 |
my $content = <F>; |
35 |
next DIRENT unless $content =~ m|<div|s; |
36 |
push @files, $entry; |
37 |
} |
38 |
} |
39 |
closedir DIR; |
40 |
my $self = { |
41 |
Dir => $dir, |
42 |
Files => \@files |
43 |
}; |
44 |
bless $self, ref($type) || $type; |
45 |
} |
46 |
|
47 |
sub FETCH { |
48 |
my $self = shift; |
49 |
my $id = shift; |
50 |
|
51 |
local($/) = undef; |
52 |
|
53 |
my $ret = {}; |
54 |
my @p = qw( |
55 |
author |
56 |
colophon |
57 |
desc |
58 |
index |
59 |
inx |
60 |
toc |
61 |
translator |
62 |
); |
63 |
push @p, "chapter" if oreilly_de_catalog::config::WITH_CHAPTER(); |
64 |
for my $p (@p) { |
65 |
my $file = $p eq "chapter" ? "chapter/index" : $p; |
66 |
$ret->{$p} = $self->conv_getline("$id/$file.html"); |
67 |
} |
68 |
return $ret; |
69 |
} |
70 |
|
71 |
# WAIT::Document::Ora::conv_getline |
72 |
sub conv_getline { |
73 |
my($self) = shift; |
74 |
my($file) = shift; |
75 |
my $fh = IO::File->new("$self->{Dir}/$file") or return ""; |
76 |
|
77 |
local $/ = "\n"; |
78 |
my $firstline = <$fh>; |
79 |
my $src_enc; |
80 |
# \042 is double quote, \047 is single quote. I avoid single quotes |
81 |
# here just for easier copy and paste to the terminal (I need to |
82 |
# debug here frequently) |
83 |
if ($firstline =~ /<\?xml[^>]+encoding\s*=([\042\047])([\w\-]+)\1/) { |
84 |
$src_enc = $2; |
85 |
} else { |
86 |
$src_enc = "ISO-8859-1"; |
87 |
} |
88 |
seek $fh, 0, 0; |
89 |
undef $/; |
90 |
my $content = <$fh>; |
91 |
close $fh; |
92 |
$content =~ s/\s+/ /gs; # eliminate TABs and CRs for easier debugging |
93 |
my $dcontent = Encode::decode($src_enc,$content); |
94 |
unless (utf8::valid($dcontent)) { |
95 |
warn "utf8 says invalid"; |
96 |
} |
97 |
unless (Encode::is_utf8($dcontent)) { |
98 |
warn "Encode says this isn't utf8"; |
99 |
} |
100 |
$dcontent; |
101 |
} |
102 |
|
103 |
sub FIRSTKEY { |
104 |
my $self = shift; |
105 |
$self->{fno} = 0; |
106 |
$self->NEXTKEY; |
107 |
} |
108 |
|
109 |
sub NEXTKEY { |
110 |
my $self = shift; |
111 |
return undef if ($self->{fno}++ > @{$self->{Files}}); |
112 |
$self->{Files}->[$self->{fno}-1]; |
113 |
} |
114 |
|
115 |
sub close { |
116 |
my $self = shift; |
117 |
|
118 |
delete $self->{fno}; |
119 |
delete $self->{Files}; # no need at query time |
120 |
} |
121 |
|
122 |
1; |