194 |
die "no uri" unless ($uri); |
die "no uri" unless ($uri); |
195 |
die "feed is not a Grep::Model::Feed but ", ref $feed unless $feed->isa('Grep::Model::Feed'); |
die "feed is not a Grep::Model::Feed but ", ref $feed unless $feed->isa('Grep::Model::Feed'); |
196 |
|
|
|
sub save_html { |
|
|
my ( $file, $content ) = @_; |
|
|
if ( -w '/tmp/grep' ) { |
|
|
open(my $f, '>', "/tmp/grep/${file}.html") or die "can't open $file: $!"; |
|
|
print $f $content or die "can't write to $file: $!"; |
|
|
close $f or die "can't close $file: $!"; |
|
|
} |
|
|
} |
|
|
|
|
197 |
my $mech = WWW::Mechanize->new(); |
my $mech = WWW::Mechanize->new(); |
198 |
|
|
199 |
$mech->get( $uri ); |
$mech->get( $uri ); |
200 |
|
|
201 |
save_html( 'get', $mech->content ); |
$self->save( 'get.html', $mech->content ); |
202 |
|
|
203 |
if ( $args->{submit_form} ) { |
if ( $args->{submit_form} ) { |
204 |
warn "submit form on $uri\n"; |
warn "submit form on $uri\n"; |
205 |
$mech->submit_form( %{ $args->{submit_form} } ) or die "can't submit form"; |
$mech->submit_form( %{ $args->{submit_form} } ) or die "can't submit form"; |
206 |
save_html( 'submit', $mech->content ); |
$self->save( 'submit.html', $mech->content ); |
207 |
} |
} |
208 |
|
|
209 |
warn "parse result page\n"; |
warn "parse result page\n"; |
221 |
( $_[0]->attr( $attr ) || '' ) eq $value; |
( $_[0]->attr( $attr ) || '' ) eq $value; |
222 |
}); |
}); |
223 |
|
|
224 |
die "can't find results wrapper <$el $attr=\"$value\">" unless ( $div ); |
if ( ! $div ) { |
225 |
|
warn "can't find results wrapper <$el $attr=\"$value\">"; |
226 |
|
return; |
227 |
|
} |
228 |
|
|
229 |
my $max = 5; |
my $max = 5; |
230 |
my $nr = 1; |
my $nr = 1; |
244 |
warn "fetching page: ",$a->as_text," from $page_uri\n"; |
warn "fetching page: ",$a->as_text," from $page_uri\n"; |
245 |
if ( $mech->follow_link( url => $a->attr('href') ) ) { |
if ( $mech->follow_link( url => $a->attr('href') ) ) { |
246 |
|
|
247 |
save_html( "page-${nr}", $mech->content ); |
$self->save( "page-${nr}.html", $mech->content ); |
248 |
|
|
249 |
my $page_tree = HTML::TreeBuilder->new or die "can't create page tree"; |
my $page_tree = HTML::TreeBuilder->new or die "can't create page tree"; |
250 |
$page_tree->parse( $mech->content ) or die "can't parse page at $page_uri"; |
$page_tree->parse( $mech->content ) or die "can't parse page at $page_uri"; |
282 |
|
|
283 |
} |
} |
284 |
|
|
285 |
|
=head2 save |
286 |
|
|
287 |
|
save( 'name', $content ); |
288 |
|
|
289 |
|
Save dumps into C</tmp/grep> if writable |
290 |
|
|
291 |
|
=cut |
292 |
|
|
293 |
|
sub save { |
294 |
|
my $self = shift; |
295 |
|
my ( $file, $content ) = @_; |
296 |
|
if ( -w '/tmp/grep' ) { |
297 |
|
open(my $f, '>', "/tmp/grep/$file") or die "can't open $file: $!"; |
298 |
|
print $f $content or die "can't write to $file: $!"; |
299 |
|
close $f or die "can't close $file: $!"; |
300 |
|
Jifty->log->debug("saved $file ",length($content)," bytes"); |
301 |
|
} |
302 |
|
} |
303 |
|
|
304 |
1; |
1; |