1 |
#!/usr/bin/perl |
#!/usr/bin/perl |
2 |
# -*- Mode: Perl -*- |
# -*- Mode: Perl -*- |
3 |
# $Basename: HTML.pm $ |
# $Basename: HTML.pm $ |
4 |
# $Revision: 1.10 $ |
# $Revision: 1.11 $ |
5 |
# Author : Ulrich Pfeifer with Andreas König |
# Author : Ulrich Pfeifer with Andreas König |
6 |
# Created On : Sat Nov 1 1997 |
# Created On : Sat Nov 1 1997 |
7 |
# Last Modified By: Ulrich Pfeifer |
# Last Modified By: Ulrich Pfeifer |
110 |
sub my_parse ($) { |
sub my_parse ($) { |
111 |
my($s) = @_; |
my($s) = @_; |
112 |
my $ls; |
my $ls; |
113 |
|
if (oreilly_de_catalog::config::CHARACTER_STATS() ) { |
114 |
|
use charnames ":full"; |
115 |
|
my %seen; |
116 |
|
while ( $s =~ /([^\000-\177])/g ) { |
117 |
|
my $c = $1; |
118 |
|
$seen{$c}++; |
119 |
|
} |
120 |
|
for my $c (sort {$seen{$a} <=> $seen{$b} } keys %seen) { |
121 |
|
my $ord = ord $c; |
122 |
|
my $charname = charnames::viacode($ord); |
123 |
|
# printf "CS: %5d %4d %s %s\n", $ord, $seen{$c}, $c, $charname; |
124 |
|
printf "CS: %5x %4d %s\n", $ord, $seen{$c}, $charname||"[undef]"; |
125 |
|
} |
126 |
|
} |
127 |
|
if (oreilly_de_catalog::config::ENTITY_STATS() ) { |
128 |
|
use charnames ":full"; |
129 |
|
my %seen; |
130 |
|
while ( $s =~ /(&#(\d+|x[\da-f]+);?)/ig ) { |
131 |
|
my($full) = $1; |
132 |
|
$seen{$full}++; |
133 |
|
} |
134 |
|
for my $full (sort {$seen{$a} <=> $seen{$b} } keys %seen) { |
135 |
|
my($n) = $full =~ /&#(\d+|x[\da-f]+)/i; |
136 |
|
# die "full[$full]" unless defined $n; |
137 |
|
my $ord = $n =~ s/^x//i ? hex $n : $n; |
138 |
|
my $charname = charnames::viacode($ord); |
139 |
|
# printf "CS: %5d %4d %s %s\n", $ord, $seen{$c}, $c, $charname; |
140 |
|
printf "ES: %-12s %5x %4d %s\n", $full, $ord, $seen{$full}, $charname||"[undef]"; |
141 |
|
} |
142 |
|
} |
143 |
|
|
144 |
if (oreilly_de_catalog::config::ALLOW_LATIN_INTERMEDIATE()) { |
if (oreilly_de_catalog::config::ALLOW_LATIN_INTERMEDIATE()) { |
145 |
warn "Warning: this HTML::Parser has Unicode support on" |
warn "Warning: this HTML::Parser has Unicode support on" |
146 |
if HTML::Entities::UNICODE_SUPPORT(); |
if HTML::Entities::UNICODE_SUPPORT(); |
309 |
$s =~ s/\b\d+\.//g; # 7.vi Options 8.Enhanced Tags 9.nvi-New vi |
$s =~ s/\b\d+\.//g; # 7.vi Options 8.Enhanced Tags 9.nvi-New vi |
310 |
$doc{toc} = $s; |
$doc{toc} = $s; |
311 |
} else { |
} else { |
312 |
die "toc[$doc->{toc}] not parseable?"; |
warn "toc contains no \$s?"; |
313 |
} |
} |
314 |
} |
} |
315 |
if ($doc->{inx}) { |
if ($doc->{inx}) { |