/[wait]/trunk/lib/WAIT/Parse/HTML.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/WAIT/Parse/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 118 - (show annotations)
Fri Jul 15 18:59:10 2005 UTC (18 years, 10 months ago) by dpavlin
File size: 1278 byte(s)
some rather old changes from 2004-05-28

1 #!/usr/bin/perl
2 # -*- Mode: Perl -*-
3 # $Basename: HTML.pm $
4 # $Revision: 1.2 $
5 # Author : Ulrich Pfeifer with Andreas König
6 # Created On : Sat Nov 1 1997
7 # Last Modified By: Ulrich Pfeifer
8 # Last Modified On: Wed Nov 5 16:48:17 1997
9 # Language : CPerl
10 # Update Count : 1
11 # Status : Unknown, Use with caution!
12 #
13 # (C) Copyright 1997, Ulrich Pfeifer, all rights reserved.
14 #
15 #
16
17 package WAIT::Parse::HTML;
18 use vars qw(@ISA);
19 require HTML::Parse;
20 require HTML::FormatText;
21 use HTML::Entities qw(decode_entities);
22 @ISA = qw(WAIT::Parse::Base);
23
24
25 sub split {
26 my ($self, $html_source) = @_;
27
28 my (undef,$title) = $html_source =~ /<(title|h1|h2|h3|h4)[^>]*>(.*?)<\/\1\s*>/si;
29
30 my $html = HTML::Parse::parse_html($html_source);
31 my $formatter = HTML::FormatText->new;
32
33 {
34 'text', $formatter->format($html),
35 'title', $title || 'no title',
36 };
37 }
38
39 sub tag {
40 my ($self, $html_source) = @_;
41
42 $html_source =~ tr/\r/\n/;
43
44 my ($pre,$title,$body)
45 = $html_source =~ /^(.*?<title\s*>)(.*?)(<\/title\s*>.+)/si;
46
47 $pre .= '';
48 $title .= '';
49 $body .= '';
50
51 (
52 {'text' => 1}, decode_entities($pre),
53 {'title' => 2}, decode_entities($title),
54 {'text' => 1}, decode_entities($body),
55 );
56 }
57
58 1;

Properties

Name Value
cvs2svn:cvs-rev 1.1

  ViewVC Help
Powered by ViewVC 1.1.26