1 |
dpavlin |
1293 |
#!/usr/bin/perl |
2 |
|
|
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
my $url = 'http://ovidsp.ovid.com/ovidweb.cgi?T=JS&NEWS=n&MODE=ovid&PAGE=main&D=psyh'; |
7 |
|
|
|
8 |
|
|
our $location = 'Croatia'; |
9 |
|
|
my $results_per_page = 100; # 5 10 25 50 100 |
10 |
|
|
|
11 |
|
|
use WWW::Mechanize; |
12 |
|
|
use Data::Dump qw(dump); |
13 |
|
|
use File::Path; |
14 |
|
|
|
15 |
|
|
our $mech = WWW::Mechanize->new( |
16 |
|
|
autocheck => 1, |
17 |
|
|
cookie_jar => undef, |
18 |
|
|
); |
19 |
|
|
|
20 |
|
|
our $step = 0; |
21 |
|
|
|
22 |
|
|
my $dir = '/tmp/ovid'; |
23 |
|
|
rmtree $dir if -e $dir; |
24 |
|
|
mkdir $dir; |
25 |
|
|
|
26 |
|
|
sub save_mech { |
27 |
|
|
my $path = shift; |
28 |
|
|
$step++; |
29 |
|
|
my $base_path = sprintf('%s/%04d', $dir,$step); |
30 |
|
|
$path ||= $base_path; |
31 |
|
|
$path .= $mech->{ct} =~ m{html}i ? '.html' : '.txt'; |
32 |
|
|
$mech->save_content( $path ); |
33 |
|
|
warn "# [$step] $path ", -s $path, " ", $mech->ct, "\n"; |
34 |
|
|
open(my $dump, '>', "$base_path.dump.txt"); |
35 |
|
|
$mech->dump_all($dump); |
36 |
|
|
} |
37 |
|
|
|
38 |
|
|
warn "# get $url"; |
39 |
|
|
$mech->get( $url ); |
40 |
|
|
save_mech; |
41 |
|
|
|
42 |
|
|
warn "# multifield"; |
43 |
|
|
$mech->follow_link( url_regex => qr/multifield/ ); |
44 |
|
|
save_mech; |
45 |
|
|
|
46 |
|
|
warn "# search lo:$location"; |
47 |
|
|
$mech->submit_form( |
48 |
|
|
form_name => 'sfmultifield', |
49 |
|
|
fields => { |
50 |
|
|
'fields001' => 'lo', |
51 |
|
|
'textBox001' => $location, |
52 |
|
|
}, |
53 |
|
|
); |
54 |
|
|
save_mech; |
55 |
dpavlin |
1297 |
|
56 |
|
|
# $mech->form_id( 'nav-results' ); # XXX not supported by older WWW::Mechanize |
57 |
|
|
$mech->form_number(3); |
58 |
dpavlin |
1293 |
|
59 |
|
|
while (1) { |
60 |
|
|
|
61 |
|
|
my @records = $mech->find_all_inputs( |
62 |
|
|
type => 'radio', |
63 |
|
|
name => 'cmRecordSelect', |
64 |
|
|
); |
65 |
|
|
|
66 |
|
|
#warn '## records ', dump @records; |
67 |
|
|
my $range = $records[0]->{menu}->[1]->{value} || die "All on this page"; |
68 |
|
|
|
69 |
|
|
warn "submit_form save $range"; |
70 |
|
|
$mech->submit_form( |
71 |
|
|
fields => { |
72 |
|
|
'cmRecordSelect' => $range, |
73 |
|
|
'cmFields' => 'ALL', |
74 |
|
|
'cmFormat' => 'export', |
75 |
|
|
'saveStrategy' => 'on', |
76 |
|
|
'jumpstartLink' => 'on', |
77 |
|
|
}, |
78 |
|
|
button => 'submit:cmsave|1', |
79 |
|
|
); |
80 |
|
|
save_mech "$dir.$range"; |
81 |
|
|
|
82 |
|
|
$mech->back; |
83 |
|
|
|
84 |
|
|
my @next_page = $mech->find_all_submits( value_regex => qr/Next Page/i ); |
85 |
|
|
#warn "## next page ", dump @next_page; |
86 |
|
|
my $button = $next_page[-1]->{name} || die "next page button?"; |
87 |
|
|
warn "submit_form next page $button\n"; |
88 |
|
|
$mech->submit_form( |
89 |
|
|
fields => { |
90 |
|
|
results_per_page => $results_per_page, # FIXME doesn't work? |
91 |
|
|
}, |
92 |
|
|
button => $button, |
93 |
|
|
); |
94 |
|
|
save_mech; |
95 |
|
|
|
96 |
|
|
} |
97 |
|
|
|