/[webpac2]/trunk/bin/isi-download-results.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bin/isi-download-results.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1334 by dpavlin, Fri Jun 4 17:43:29 2010 UTC revision 1335 by dpavlin, Sat Jun 5 15:49:59 2010 UTC
# Line 12  use Text::Unaccent; Line 12  use Text::Unaccent;
12  # http://images.isiknowledge.com/WOK46/help/WOS/h_advanced_examples.html  # http://images.isiknowledge.com/WOK46/help/WOS/h_advanced_examples.html
13    
14  our $q = 'AD=Croatia';  our $q = 'AD=Croatia';
15    $q = 'CA=BRATKO, D';
16    
17  my $range_size = 500;  my $range_size = 500;
18  my $overlap    = 3; # between previous and this range  my $overlap    = 3; # between previous and this range
19    
20  my $results = 0;  my $results = 0;
21  my $citations = 0;  my $citations = 0;
22    
23  my $cites_by_year = 0;  my $cites_by_year = 0;
24    
25  my $max_cites = 5000; # ISI limit to get cites  my $max_cites = 5000; # ISI limit to get cites
26    
 if ( 0 ) {  
         $q = 'TS=psychology AND AD=Croatia';  
         $range_size = 50;  
         $overlap    = 0;  
         $max_cites  = 50;  
 }  
   
27  $q = unac_string( 'utf-8', join(' ', @ARGV) ) if @ARGV;  $q = unac_string( 'utf-8', join(' ', @ARGV) ) if @ARGV;
28    
29    warn "QUERY: $q\n";
30    
31  our $mech = WWW::Mechanize->new(  our $mech = WWW::Mechanize->new(
32          autocheck => 0, # it dies in reference download with it!          autocheck => 0, # it dies in reference download with it!
33          cookie_jar => undef,          cookie_jar => undef,
# Line 212  sub years { Line 211  sub years {
211          return $years;          return $years;
212  }  }
213    
214  search;  if ( $results ) {
215  years;          search;
216  get_results 'results' if $results;          years;
217            get_results;
218    }
219    
220  if ( $citations ) {  if ( $citations ) {
221    
222          citations;          citations;
223            years unless @ranges;
224    
225          do {          do {
226                  my $part;                  my $part;
# Line 232  if ( $citations ) { Line 234  if ( $citations ) {
234          } while ( @ranges );          } while ( @ranges );
235    
236  }  }
237    
238    if ( $q =~ m{CA=(.+)} ) {
239    
240            my $CA = $1;
241    
242            warn "# citated reference search";
243            $mech->follow_link( url_regex => qr/CitedReferenceSearch/ );
244            save_mech;
245    
246    
247            $mech->submit_form(
248                    form_name => 'WOS_CitedReferenceSearch_input_form',
249                    fields => {
250                            'value(input1)' => $CA,
251                    },
252            );
253    
254            my $page = 1;
255            my $records = $1 if $mech->content =~ m/(\d+)\s+records/;
256            warn "# found $records records\n";
257            my $last_span = 'fake';
258    
259            while (1) {
260                    save_mech "/tmp/isi.$q.citedref.$page";
261    
262                    $page++;
263    
264                    $mech->submit_form(
265                            form_name => 'summary_navigation',
266                            fields => {
267                                    'page' => $page,
268                            },
269                    );
270    
271                    $mech->form_name( 'summary_navigation' );
272                    last if $mech->value('page') < $page;
273    
274                    if ( $mech->content =~ m/(\d+\s*-\s*(\d+))/ ) {
275                            warn "span: $1\n";
276                            last if $2 == $records;
277                            last if $1 == $last_span;
278                            $last_span = $1;
279                    } elsif ( $page > 5 ) {
280                            warn "ARTIFICALLY LIMITED TO 5 PAGES WITHOUT VALID SPAN!";
281                            last;
282                    }
283    
284            }
285    
286    }
287    
288    warn "OVER\n";

Legend:
Removed from v.1334  
changed lines
  Added in v.1335

  ViewVC Help
Powered by ViewVC 1.1.26