/[webpac2]/trunk/bin/isi-download-results.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bin/isi-download-results.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1331 by dpavlin, Sun Apr 18 10:56:01 2010 UTC revision 1335 by dpavlin, Sat Jun 5 15:49:59 2010 UTC
# Line 3  Line 3 
3  use warnings;  use warnings;
4  use strict;  use strict;
5    
6    use WWW::Mechanize;
7    use Data::Dump qw(dump);
8    use File::Path;
9    use Text::Unaccent;
10    
11  # Advanced search syntax:  # Advanced search syntax:
12  # http://images.isiknowledge.com/WOK46/help/WOS/h_advanced_examples.html  # http://images.isiknowledge.com/WOK46/help/WOS/h_advanced_examples.html
13    
14  our $q = 'AD=Croatia';  our $q = 'AD=Croatia';
15    $q = 'CA=BRATKO, D';
16    
17  my $range_size = 500;  my $range_size = 500;
18  my $overlap    = 3; # between previous and this range  my $overlap    = 3; # between previous and this range
 my $skip_results = 1;  
 my $cites_by_year = 1;  
19    
20  my $max_cites = 5000; # ISI limit to get cites  my $results = 0;
21    my $citations = 0;
22    
23  if ( 0 ) {  my $cites_by_year = 0;
         $q = 'TS=psychology AND AD=Croatia';  
         $range_size = 50;  
         $overlap    = 0;  
         $max_cites  = 50;  
 }  
24    
25  $q = join(' ', @ARGV) if @ARGV;  my $max_cites = 5000; # ISI limit to get cites
26    
27  use WWW::Mechanize;  $q = unac_string( 'utf-8', join(' ', @ARGV) ) if @ARGV;
28  use Data::Dump qw(dump);  
29  use File::Path;  warn "QUERY: $q\n";
30    
31  our $mech = WWW::Mechanize->new(  our $mech = WWW::Mechanize->new(
32          autocheck => 1,          autocheck => 0, # it dies in reference download with it!
33          cookie_jar => undef,          cookie_jar => undef,
34  );  );
35    
# Line 210  sub years { Line 211  sub years {
211          return $years;          return $years;
212  }  }
213    
214  search;  if ( $results ) {
215  years;          search;
216  get_results unless $skip_results;          years;
217            get_results;
218    }
219    
220    if ( $citations ) {
221    
222  citations;          citations;
223            years unless @ranges;
224    
225  do {          do {
226          my $part;                  my $part;
227          if ( @ranges ) {                  if ( @ranges ) {
228                  $part .= $ranges[0]->[0] . '.';                          $part .= $ranges[0]->[0] . '.';
229                  search;                          search;
230                  citations;                          citations;
231          }                  }
232          $part .= 'citing';                  $part .= 'citing';
233          get_results $part;                  get_results $part;
234  } while ( @ranges );          } while ( @ranges );
235    
236    }
237    
238    if ( $q =~ m{CA=(.+)} ) {
239    
240            my $CA = $1;
241    
242            warn "# citated reference search";
243            $mech->follow_link( url_regex => qr/CitedReferenceSearch/ );
244            save_mech;
245    
246    
247            $mech->submit_form(
248                    form_name => 'WOS_CitedReferenceSearch_input_form',
249                    fields => {
250                            'value(input1)' => $CA,
251                    },
252            );
253    
254            my $page = 1;
255            my $records = $1 if $mech->content =~ m/(\d+)\s+records/;
256            warn "# found $records records\n";
257            my $last_span = 'fake';
258    
259            while (1) {
260                    save_mech "/tmp/isi.$q.citedref.$page";
261    
262                    $page++;
263    
264                    $mech->submit_form(
265                            form_name => 'summary_navigation',
266                            fields => {
267                                    'page' => $page,
268                            },
269                    );
270    
271                    $mech->form_name( 'summary_navigation' );
272                    last if $mech->value('page') < $page;
273    
274                    if ( $mech->content =~ m/(\d+\s*-\s*(\d+))/ ) {
275                            warn "span: $1\n";
276                            last if $2 == $records;
277                            last if $1 == $last_span;
278                            $last_span = $1;
279                    } elsif ( $page > 5 ) {
280                            warn "ARTIFICALLY LIMITED TO 5 PAGES WITHOUT VALID SPAN!";
281                            last;
282                    }
283    
284            }
285    
286    }
287    
288    warn "OVER\n";

Legend:
Removed from v.1331  
changed lines
  Added in v.1335

  ViewVC Help
Powered by ViewVC 1.1.26