/[wait]/cvs-head/lib/WAIT/Parse/Ora.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /cvs-head/lib/WAIT/Parse/Ora.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 65 - (show annotations)
Wed Jan 23 12:22:54 2002 UTC (22 years, 3 months ago) by laperla
File size: 2377 byte(s)
- Zwischenzustand

1 #!/usr/bin/perl
2 # -*- Mode: Perl -*-
3 # $Basename: HTML.pm $
4 # $Revision: 1.4 $
5 # Author : Ulrich Pfeifer with Andreas König
6 # Created On : Sat Nov 1 1997
7 # Last Modified By: Ulrich Pfeifer
8 # Last Modified On: Fri Jan 4 16:06:14 2002
9 # Language : CPerl
10 # Update Count : 14
11 # Status : Unknown, Use with caution!
12 #
13 # (C) Copyright 1997, Ulrich Pfeifer, all rights reserved.
14 #
15 #
16
17 package WAIT::Parse::Ora;
18 use HTML::Parser;
19 use strict;
20 use vars qw(@ISA);
21 @ISA = qw(WAIT::Parse::Base);
22
23 my $debug = 0;
24 my %text = (
25 p => 'text',
26 # h1 => 'text',
27 # h2 => 'text',
28 # h3 => 'text',
29 title => 'title',
30 );
31
32 my $p = HTML::Parser->new(
33 api_version => 3,
34 start_h => [\&handle_start, "tagname, attr"],
35 end_h => [\&handle_end, "tagname"],
36 text_h => [\&handle_text, "dtext"],
37 marked_sections => 1,
38 );
39 my %result;
40 my $text;
41 my $open;
42
43 sub handle_start {
44 my $tag = shift;
45
46 return unless $text{$tag};
47 $open++;
48 print ">" x $open, $tag, "\n" if $debug;
49 }
50
51 sub handle_end {
52 my $tag = shift;
53
54 return unless $text{$tag};
55 print "<" x $open, $tag, "\n" if $debug;
56 $open--;
57 $text =~ s/^\s+//;
58 $text =~ s/\s+$//;
59 $text =~ s/\s+/ /g;
60 $result{$text{$tag}} .= $text . ' ';
61 $text = '';
62 }
63
64
65 sub handle_text {
66 $text .= $_[0] if $open;
67 }
68
69 sub split {
70 my ($self, $doc) = @_;
71 my %doc = ( isbn => '', author => '', about => '', colophon => '' );
72 my $desc = $doc->{desc};
73 my $auth = $doc->{author};
74 my $colophon = $doc->{colophon};
75
76 if ($doc->{author}) {
77 %result = ();
78 $text = '';
79 $open = 0;
80 $p->parse($doc->{author});
81 $p->eof;
82 $doc{author} = $result{title};
83 $doc{author} =~ s/^By\s+//;
84 $doc{about} = $result{text};
85 }
86 if ($doc->{index}) {
87 $doc->{index} =~ /ISBN\s*([^<]+)/ and $doc{isbn} = $1;
88 }
89 if ($doc->{colophon}) {
90 %result = ();
91 $text = '';
92 $open = 0;
93 $p->parse($doc->{colophon});
94 $p->eof;
95 $doc{colophon} = $result{text};
96 }
97 %result = ();
98 $text = '';
99 $open = 0;
100
101 $p->parse($doc->{desc});
102 $p->eof;
103
104 $doc{text} = $result{text};
105 $doc{title} = $result{title};
106
107 return \%doc;
108 }
109
110 1;

Properties

Name Value
cvs2svn:cvs-rev 1.4

  ViewVC Help
Powered by ViewVC 1.1.26