/[wait]/trunk/lib/WAIT/Parse/HTML.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/lib/WAIT/Parse/HTML.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 10 - (hide annotations)
Fri Apr 28 15:40:52 2000 UTC (24 years, 1 month ago) by ulpfr
Original Path: cvs-head/lib/WAIT/Parse/HTML.pm
File size: 1239 byte(s)
Initial revision

1 ulpfr 10 #!/usr/bin/perl
2     # -*- Mode: Perl -*-
3     # $Basename: HTML.pm $
4     # $Revision: 1.2 $
5     # Author : Ulrich Pfeifer with Andreas König
6     # Created On : Sat Nov 1 1997
7     # Last Modified By: Ulrich Pfeifer
8     # Last Modified On: Wed Nov 5 16:48:17 1997
9     # Language : CPerl
10     # Update Count : 1
11     # Status : Unknown, Use with caution!
12     #
13     # (C) Copyright 1997, Ulrich Pfeifer, all rights reserved.
14     #
15     #
16    
17     package WAIT::Parse::HTML;
18     use vars qw(@ISA);
19     require HTML::Parse;
20     require HTML::FormatText;
21     use HTML::Entities qw(decode_entities);
22     @ISA = qw(WAIT::Parse::Base);
23    
24    
25     sub split {
26     my ($self, $html_source) = @_;
27    
28     my ($title) = $html_source =~ /<title\s*>(.*?)<\/title\s*>/si;
29     my $html = HTML::Parse::parse_html($html_source);
30     my $formatter = HTML::FormatText->new;
31    
32     {
33     'text', $formatter->format($html),
34     'title', $formatter->format(HTML::Parse::parse_html($title)),
35     };
36     }
37    
38     sub tag {
39     my ($self, $html_source) = @_;
40    
41     $html_source =~ tr/\r/\n/;
42    
43     my ($pre,$title,$body)
44     = $html_source =~ /^(.*?<title\s*>)(.*?)(<\/title\s*>.+)/si;
45    
46     (
47     {'text' => 1}, decode_entities($pre),
48     {'title' => 1}, decode_entities($title),
49     {'text' => 1}, decode_entities($body),
50     );
51     }

Properties

Name Value
cvs2svn:cvs-rev 1.1

  ViewVC Help
Powered by ViewVC 1.1.26