/[webpac2]/trunk/bin/isi-download-results.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/isi-download-results.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1282 - (hide annotations)
Fri Sep 18 17:46:04 2009 UTC (14 years, 7 months ago) by dpavlin
File MIME type: text/plain
File size: 1635 byte(s)
make dump of html details optional and $range_size configurable

1 dpavlin 1280 #!/usr/bin/perl
2    
3     use warnings;
4     use strict;
5    
6     my $q = 'TS=psychology AND AD=Croatia';
7 dpavlin 1282 my $range_size = 50;
8 dpavlin 1280
9 dpavlin 1282 my $dump = @ARGV ? 1 : 0;
10    
11     $q = 'AD=Croatia';
12     $range_size = 500;
13    
14 dpavlin 1280 use WWW::Mechanize;
15     use Data::Dump qw/dump/;
16    
17     my $mech = WWW::Mechanize->new(
18     autocheck => 1,
19     cookie_jar => undef,
20     );
21    
22 dpavlin 1281 our $step = 0;
23 dpavlin 1280
24     sub save_mech {
25 dpavlin 1281 my ( $mech, $path ) = @_;
26     $step++;
27     $path ||= sprintf('/tmp/isi.%02d.%s', $step, $mech->{ct} =~ m{html}i ? 'html' : 'txt' );
28     $mech->save_content( $path );
29     warn "# [$step] $path ", -s $path, " ", $mech->ct;
30 dpavlin 1282 $mech->dump_all if $dump;
31 dpavlin 1280 }
32    
33     warn "# get session";
34     $mech->get( 'http://isiknowledge.com/?DestApp=WOS' );
35     save_mech $mech;
36    
37     warn "# advanced serach";
38     $mech->follow_link( url_regex => qr/AdvancedSearch/ );
39     save_mech $mech;
40    
41     warn "# cookie_jar ", dump $mech->cookie_jar;
42    
43     $mech->submit_form(
44     fields => {
45     'value(input1)' => $q,
46     }
47     );
48     save_mech $mech;
49    
50     warn "# summary";
51     $mech->follow_link( url_regex => qr/summary/ );
52     save_mech $mech;
53    
54     my $from = 1;
55    
56 dpavlin 1281 while ( $from ) {
57 dpavlin 1280
58 dpavlin 1281 my $to = $from + $range_size;
59 dpavlin 1280
60 dpavlin 1281 $mech->submit_form(
61     form_name => 'summary_output_form',
62     fields => {
63     record_select_type => 'range',
64     mark_from => $from,
65     mark_to => $to,
66     mark_id => 'WOS',
67 dpavlin 1280
68 dpavlin 1281 qo_fields => 'fullrecord',
69     citedref => 'citedref',
70 dpavlin 1280
71 dpavlin 1281 save_options => 'plain_text',
72 dpavlin 1280
73 dpavlin 1281 fields => 'Full',
74     format => 'save',
75     },
76     button => 'save',
77     );
78     save_mech $mech;
79    
80     last if $mech->content =~ m{invalid API call};
81    
82     warn "# save_file";
83     $mech->follow_link( url_regex => qr/save_file/ );
84     save_mech $mech => "/tmp/isi.$q.$from-$to.txt";
85    
86     $from += $range_size;
87    
88     $mech->back;
89     $mech->back;
90     #save_mech $mech;
91    
92     }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26