/[webpac2]/trunk/bin/ovid-download-results.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/ovid-download-results.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1297 - (hide annotations)
Sat Sep 19 23:29:34 2009 UTC (14 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 2041 byte(s)
fix warning
1 dpavlin 1293 #!/usr/bin/perl
2    
3     use warnings;
4     use strict;
5    
6     my $url = 'http://ovidsp.ovid.com/ovidweb.cgi?T=JS&NEWS=n&MODE=ovid&PAGE=main&D=psyh';
7    
8     our $location = 'Croatia';
9     my $results_per_page = 100; # 5 10 25 50 100
10    
11     use WWW::Mechanize;
12     use Data::Dump qw(dump);
13     use File::Path;
14    
15     our $mech = WWW::Mechanize->new(
16     autocheck => 1,
17     cookie_jar => undef,
18     );
19    
20     our $step = 0;
21    
22     my $dir = '/tmp/ovid';
23     rmtree $dir if -e $dir;
24     mkdir $dir;
25    
26     sub save_mech {
27     my $path = shift;
28     $step++;
29     my $base_path = sprintf('%s/%04d', $dir,$step);
30     $path ||= $base_path;
31     $path .= $mech->{ct} =~ m{html}i ? '.html' : '.txt';
32     $mech->save_content( $path );
33     warn "# [$step] $path ", -s $path, " ", $mech->ct, "\n";
34     open(my $dump, '>', "$base_path.dump.txt");
35     $mech->dump_all($dump);
36     }
37    
38     warn "# get $url";
39     $mech->get( $url );
40     save_mech;
41    
42     warn "# multifield";
43     $mech->follow_link( url_regex => qr/multifield/ );
44     save_mech;
45    
46     warn "# search lo:$location";
47     $mech->submit_form(
48     form_name => 'sfmultifield',
49     fields => {
50     'fields001' => 'lo',
51     'textBox001' => $location,
52     },
53     );
54     save_mech;
55 dpavlin 1297
56     # $mech->form_id( 'nav-results' ); # XXX not supported by older WWW::Mechanize
57     $mech->form_number(3);
58 dpavlin 1293
59     while (1) {
60    
61     my @records = $mech->find_all_inputs(
62     type => 'radio',
63     name => 'cmRecordSelect',
64     );
65    
66     #warn '## records ', dump @records;
67     my $range = $records[0]->{menu}->[1]->{value} || die "All on this page";
68    
69     warn "submit_form save $range";
70     $mech->submit_form(
71     fields => {
72     'cmRecordSelect' => $range,
73     'cmFields' => 'ALL',
74     'cmFormat' => 'export',
75     'saveStrategy' => 'on',
76     'jumpstartLink' => 'on',
77     },
78     button => 'submit:cmsave|1',
79     );
80     save_mech "$dir.$range";
81    
82     $mech->back;
83    
84     my @next_page = $mech->find_all_submits( value_regex => qr/Next Page/i );
85     #warn "## next page ", dump @next_page;
86     my $button = $next_page[-1]->{name} || die "next page button?";
87     warn "submit_form next page $button\n";
88     $mech->submit_form(
89     fields => {
90     results_per_page => $results_per_page, # FIXME doesn't work?
91     },
92     button => $button,
93     );
94     save_mech;
95    
96     }
97    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26