1 |
ulpfr |
54 |
#!/usr/bin/perl |
2 |
|
|
# -*- Mode: Perl -*- |
3 |
|
|
# $Basename: HTML.pm $ |
4 |
|
|
# $Revision: 1.1 $ |
5 |
|
|
# Author : Ulrich Pfeifer with Andreas König |
6 |
|
|
# Created On : Sat Nov 1 1997 |
7 |
|
|
# Last Modified By: Ulrich Pfeifer |
8 |
|
|
# Last Modified On: Mon Dec 31 14:51:55 2001 |
9 |
|
|
# Language : CPerl |
10 |
|
|
# Update Count : 7 |
11 |
|
|
# Status : Unknown, Use with caution! |
12 |
|
|
# |
13 |
|
|
# (C) Copyright 1997, Ulrich Pfeifer, all rights reserved. |
14 |
|
|
# |
15 |
|
|
# |
16 |
|
|
|
17 |
|
|
package WAIT::Parse::Ora; |
18 |
|
|
use vars qw(@ISA); |
19 |
|
|
require HTML::Parse; |
20 |
|
|
require HTML::FormatText; |
21 |
|
|
use HTML::Entities qw(decode_entities); |
22 |
|
|
@ISA = qw(WAIT::Parse::Base); |
23 |
|
|
|
24 |
|
|
|
25 |
|
|
sub split { |
26 |
|
|
my ($self, $doc) = @_; |
27 |
|
|
|
28 |
|
|
my $desc = $doc->{desc}; |
29 |
|
|
my $auth = $doc->{author}; |
30 |
|
|
my ($title) = $desc =~ /<title\s*>(.*?)<\/title\s*>/si; |
31 |
|
|
my ($author) = $auth =~ /<title\s*>(.*?)<\/title\s*>/si; |
32 |
|
|
my $html = HTML::Parse::parse_html($desc); |
33 |
|
|
my $formatter = HTML::FormatText->new; |
34 |
|
|
|
35 |
|
|
{ |
36 |
|
|
'text', $formatter->format($html), |
37 |
|
|
'title', $formatter->format(HTML::Parse::parse_html($title)), |
38 |
|
|
'author', $formatter->format(HTML::Parse::parse_html($author)), |
39 |
|
|
}; |
40 |
|
|
} |
41 |
|
|
|
42 |
|
|
sub tag { |
43 |
|
|
my ($self, $doc) = @_; |
44 |
|
|
|
45 |
|
|
my $html_source = $doc->{desc}; |
46 |
|
|
$html_source =~ tr/\r/\n/; |
47 |
|
|
|
48 |
|
|
my ($pre,$title,$body) |
49 |
|
|
= $html_source =~ /^(.*?<title\s*>)(.*?)(<\/title\s*>.+)/si; |
50 |
|
|
|
51 |
|
|
( |
52 |
|
|
{'text' => 1}, decode_entities($pre), |
53 |
|
|
{'title' => 1}, decode_entities($title), |
54 |
|
|
{'text' => 1}, decode_entities($body), |
55 |
|
|
); |
56 |
|
|
} |