25 |
default => 1, |
default => 1, |
26 |
); |
); |
27 |
|
|
28 |
|
has 'resolve_links' => ( |
29 |
|
is => 'rw', |
30 |
|
isa => 'Bool', |
31 |
|
default => 1, |
32 |
|
); |
33 |
|
|
34 |
#use String::TT qw/strip tt/; |
#use String::TT qw/strip tt/; |
35 |
|
|
36 |
#use pQuery; |
#use pQuery; |
37 |
use HTML::Query; |
use HTML::Query; |
38 |
use File::Slurp; |
use File::Slurp; |
39 |
use LWP::Simple (); |
use LWP::Simple qw/get/; |
40 |
use File::Path; |
use File::Path; |
41 |
use Data::Dump qw/dump/; |
use Data::Dump qw/dump/; |
42 |
|
use HTML::ResolveLink; |
43 |
|
|
44 |
sub path { |
sub path { |
45 |
my $self = shift; |
my $self = shift; |
52 |
my ( $self ) = @_; |
my ( $self ) = @_; |
53 |
|
|
54 |
my $path = $self->path; |
my $path = $self->path; |
55 |
|
my $body; |
56 |
|
|
57 |
if ( ! -e $path && $self->mirror ) { |
if ( ! -e $path && $self->mirror ) { |
58 |
|
|
63 |
my $url = $self->uri; |
my $url = $self->uri; |
64 |
warn ">> mirror $url -> $path\n"; |
warn ">> mirror $url -> $path\n"; |
65 |
|
|
66 |
LWP::Simple::mirror( $url, $path ) or die "can't mirror $url: $!"; |
$body = get( $url ) or die "can't mirror $url: $!"; |
67 |
|
|
68 |
|
if ( $self->resolve_links ) { |
69 |
|
my $resolver = HTML::ResolveLink->new( base => $url ); |
70 |
|
$body = $resolver->resolve( $body ); |
71 |
|
} |
72 |
|
|
73 |
|
write_file( $path, $body ); |
74 |
warn "WW mirror $url -> $path ", -s $path, " bytes\n"; |
warn "WW mirror $url -> $path ", -s $path, " bytes\n"; |
75 |
|
|
76 |
|
} else { |
77 |
|
$body = read_file( $path ); |
78 |
} |
} |
79 |
|
|
|
my $body = read_file( $path ); |
|
80 |
warn "# $path ", -s $path, " == ", length($body), "bytes"; |
warn "# $path ", -s $path, " == ", length($body), "bytes"; |
81 |
return $body; |
return $body; |
82 |
} |
} |
125 |
'body', |
'body', |
126 |
); |
); |
127 |
# warn dump( $dom->as_HTML ); |
# warn dump( $dom->as_HTML ); |
128 |
$body = $dom->as_HTML->[0]; |
# $body = $dom->as_HTML->[0]; |
129 |
|
|
130 |
warn "<< ", $self->uri, |
warn "<< ", $self->uri, |
131 |
" ", -s $self->path, |
" ", -s $self->path, |