/[wait]/cvs-head/lib/WAIT/Parse/Ora.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /cvs-head/lib/WAIT/Parse/Ora.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 58 - (show annotations)
Fri Jan 4 14:51:09 2002 UTC (22 years, 3 months ago) by ulpfr
File size: 1962 byte(s)
New parser based on HTML::Parser.

1 #!/usr/bin/perl
2 # -*- Mode: Perl -*-
3 # $Basename: HTML.pm $
4 # $Revision: 1.2 $
5 # Author : Ulrich Pfeifer with Andreas König
6 # Created On : Sat Nov 1 1997
7 # Last Modified By: Ulrich Pfeifer
8 # Last Modified On: Fri Jan 4 15:44:34 2002
9 # Language : CPerl
10 # Update Count : 9
11 # Status : Unknown, Use with caution!
12 #
13 # (C) Copyright 1997, Ulrich Pfeifer, all rights reserved.
14 #
15 #
16
17 package WAIT::Parse::Ora;
18 use HTML::Parser;
19 use strict;
20 use vars qw(@ISA);
21 @ISA = qw(WAIT::Parse::Base);
22
23 my $debug = 0;
24 my %text = (
25 p => 'text',
26 # h1 => 'text',
27 # h2 => 'text',
28 # h3 => 'text',
29 title => 'title',
30 );
31
32 my $p = HTML::Parser->new(
33 api_version => 3,
34 start_h => [\&handle_start, "tagname, attr"],
35 end_h => [\&handle_end, "tagname"],
36 text_h => [\&handle_text, "dtext"],
37 marked_sections => 1,
38 );
39 my %result;
40 my $text;
41 my $open;
42
43 sub handle_start {
44 my $tag = shift;
45
46 return unless $text{$tag};
47 $open++;
48 print ">" x $open, $tag, "\n" if $debug;
49 }
50
51 sub handle_end {
52 my $tag = shift;
53
54 return unless $text{$tag};
55 print "<" x $open, $tag, "\n" if $debug;
56 $open--;
57 $text =~ s/^\s+//;
58 $text =~ s/\s+$//;
59 $text =~ s/\s+/ /g;
60 $result{$text{$tag}} .= $text . ' ';
61 $text = '';
62 }
63
64
65 sub handle_text {
66 $text .= $_[0] if $open;
67 }
68
69 sub split {
70 my ($self, $doc) = @_;
71 my %doc;
72 my $desc = $doc->{desc};
73 my $auth = $doc->{author};
74
75 %result = ();
76 $text = '';
77 $open = 0;
78
79 $p->parse($doc->{author});
80 $p->eof;
81 $doc{author} = $result{title};
82 $doc{about} = $result{text};
83
84 %result = ();
85 $text = '';
86 $open = 0;
87
88 $p->parse($doc->{desc});
89 $p->eof;
90
91 $doc{text} = $result{text};
92 $doc{title} = $result{title};
93
94 return \%doc;
95 }
96
97 1;

Properties

Name Value
cvs2svn:cvs-rev 1.2

  ViewVC Help
Powered by ViewVC 1.1.26