1 |
#!/usr/bin/perl |
#!/usr/bin/perl |
2 |
# -*- Mode: Perl -*- |
# -*- Mode: Perl -*- |
3 |
# $Basename: HTML.pm $ |
# $Basename: HTML.pm $ |
4 |
# $Revision: 1.6 $ |
# $Revision: 1.7 $ |
5 |
# Author : Ulrich Pfeifer with Andreas König |
# Author : Ulrich Pfeifer with Andreas König |
6 |
# Created On : Sat Nov 1 1997 |
# Created On : Sat Nov 1 1997 |
7 |
# Last Modified By: Ulrich Pfeifer |
# Last Modified By: Ulrich Pfeifer |
21 |
use vars qw(@ISA); |
use vars qw(@ISA); |
22 |
@ISA = qw(WAIT::Parse::Base); |
@ISA = qw(WAIT::Parse::Base); |
23 |
|
|
24 |
|
|
25 |
|
=pod |
26 |
|
|
27 |
|
Text from 2002-03-05 is structured with <div> tags as follows: |
28 |
|
|
29 |
|
index.html: |
30 |
|
<div id="biblio"> BIBLIOGRAPHISCHE ANGABEN |
31 |
|
<div id="short_desc"> KURZE BESCHREIBUNG |
32 |
|
|
33 |
|
desc.html: |
34 |
|
<div id="long_desc"> AUSFUEHRLICHE BESCHREIBUNG |
35 |
|
|
36 |
|
author.html: |
37 |
|
<div id="author_bio"> BIOGRAPHIE DES AUTOREN |
38 |
|
|
39 |
|
translator.html: |
40 |
|
<div id="translator_bio"> BIOGRAPHIE DES UEBERSETZERS |
41 |
|
|
42 |
|
=cut |
43 |
|
|
44 |
my $debug = 0; |
my $debug = 0; |
45 |
my %is_text = ( |
my %is_text = ( |
46 |
p => 'text', |
p => 'text', |
47 |
|
a => 'text', # uebersetzer |
48 |
# h1 => 'text', |
# h1 => 'text', |
49 |
# h2 => 'text', |
# h2 => 'text', |
50 |
# h3 => 'text', |
# h3 => 'text', |
51 |
title => 'title', |
title => 'title', |
52 |
); |
); |
53 |
|
|
54 |
my $p = HTML::Parser->new( |
my $p = HTML::Parser->new( |