--- trunk/bin/isi-download-results.pl 2009/09/18 17:46:04 1282 +++ trunk/bin/isi-download-results.pl 2009/09/19 10:23:35 1289 @@ -3,18 +3,17 @@ use warnings; use strict; -my $q = 'TS=psychology AND AD=Croatia'; -my $range_size = 50; +my $q = 'AD=Croatia'; +my $range_size = 500; my $dump = @ARGV ? 1 : 0; -$q = 'AD=Croatia'; -$range_size = 500; +$q = 'TS=psychology AND AD=Croatia'; use WWW::Mechanize; use Data::Dump qw/dump/; -my $mech = WWW::Mechanize->new( +our $mech = WWW::Mechanize->new( autocheck => 1, cookie_jar => undef, ); @@ -51,42 +50,62 @@ $mech->follow_link( url_regex => qr/summary/ ); save_mech $mech; -my $from = 1; +sub get_results { + my $q = shift; + my $from = 1; + + while ( 1 ) { + + my $to = $from + $range_size; + + $mech->submit_form( + form_name => 'summary_output_form', + fields => { + record_select_type => 'range', + mark_from => $from, + mark_to => $to, + mark_id => 'WOS', + + qo_fields => 'fullrecord', + citedref => 'citedref', + + save_options => 'plain_text', + + fields => 'Full', + format => 'save', + }, + button => 'save', + ); + save_mech $mech; + + if ( $mech->content =~ m{invalid API call} ) { + $mech->back; + return; + } + + warn "# save_file $from - $to [$q]"; + $mech->follow_link( url_regex => qr/save_file/ ); + save_mech $mech => "/tmp/isi.$q.$from-$to.txt"; + + $from += $range_size; + + $mech->back; + $mech->back; + #save_mech $mech; -while ( $from ) { + } # while - my $to = $from + $range_size; - - $mech->submit_form( - form_name => 'summary_output_form', - fields => { - record_select_type => 'range', - mark_from => $from, - mark_to => $to, - mark_id => 'WOS', - - qo_fields => 'fullrecord', - citedref => 'citedref', - - save_options => 'plain_text', - - fields => 'Full', - format => 'save', - }, - button => 'save', - ); - save_mech $mech; +} - last if $mech->content =~ m{invalid API call}; +get_results $q; - warn "# save_file"; - $mech->follow_link( url_regex => qr/save_file/ ); - save_mech $mech => "/tmp/isi.$q.$from-$to.txt"; +save_mech $mech; +warn "# citations"; +$mech->follow_link( url_regex => qr/search_mode=CitationReport/ ); +save_mech $mech; - $from += $range_size; +$mech->follow_link( url_regex => qr/search_mode=TotalCitingArticles/ ); +save_mech $mech; - $mech->back; - $mech->back; - #save_mech $mech; +get_results $q . '.citing'; -}