194 |
die "no uri" unless ($uri); |
die "no uri" unless ($uri); |
195 |
die "feed is not a Grep::Model::Feed but ", ref $feed unless $feed->isa('Grep::Model::Feed'); |
die "feed is not a Grep::Model::Feed but ", ref $feed unless $feed->isa('Grep::Model::Feed'); |
196 |
|
|
197 |
my $mech = WWW::Mechanize->new(); |
sub mech_warn { |
198 |
|
my $m = shift || return; |
199 |
|
warn $m; |
200 |
|
} |
201 |
|
|
202 |
|
my $mech = WWW::Mechanize->new( |
203 |
|
cookie_jar => {}, |
204 |
|
onwarn => \&mech_warn, |
205 |
|
onerror => \&mech_warn, |
206 |
|
); |
207 |
|
|
208 |
$mech->get( $uri ); |
$mech->get( $uri ); |
209 |
|
|
210 |
$self->save( 'get.html', $mech->content ); |
$self->save( 'get.html', $mech->content ); |
211 |
|
|
212 |
if ( $args->{submit_form} ) { |
if ( my $form = $args->{submit_form} ) { |
213 |
warn "submit form on $uri\n"; |
warn "submit form on $uri with ", dump( $form ),"\n"; |
214 |
$mech->submit_form( %{ $args->{submit_form} } ) or die "can't submit form"; |
$mech->submit_form( %$form ) or die "can't submit form ", dump( $form ); |
215 |
$self->save( 'submit.html', $mech->content ); |
$self->save( 'submit.html', $mech->content ); |
216 |
} |
} |
217 |
|
|
258 |
my $page_tree = HTML::TreeBuilder->new or die "can't create page tree"; |
my $page_tree = HTML::TreeBuilder->new or die "can't create page tree"; |
259 |
$page_tree->parse( $mech->content ) or die "can't parse page at $page_uri"; |
$page_tree->parse( $mech->content ) or die "can't parse page at $page_uri"; |
260 |
|
|
261 |
my ( $el,$attr,$value ) = @{ $args->{scrape} }; |
( $el,$attr,$value ) = @{ $args->{scrape} }; |
262 |
my $div = $page_tree->look_down( '_tag', $el, sub { ( $_[0]->attr( $attr ) || '' ) eq $value } ); |
$div = $page_tree->look_down( '_tag', $el, sub { ( $_[0]->attr( $attr ) || '' ) eq $value } ); |
263 |
|
|
264 |
die "can't find <$el $attr=\"$value\">" unless ($div); |
die "can't find <$el $attr=\"$value\">" unless ($div); |
265 |
|
|