/[webpac2]/trunk/bin/isi-download-results.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bin/isi-download-results.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1280 by dpavlin, Fri Sep 18 16:52:57 2009 UTC revision 1282 by dpavlin, Fri Sep 18 17:46:04 2009 UTC
# Line 4  use warnings; Line 4  use warnings;
4  use strict;  use strict;
5    
6  my $q = 'TS=psychology AND AD=Croatia';  my $q = 'TS=psychology AND AD=Croatia';
7    my $range_size = 50;
8    
9    my $dump = @ARGV ? 1 : 0;
10    
11    $q = 'AD=Croatia';
12    $range_size = 500;
13    
14  use WWW::Mechanize;  use WWW::Mechanize;
15  use Data::Dump qw/dump/;  use Data::Dump qw/dump/;
 use File::Slurp;  
16    
17  my $mech = WWW::Mechanize->new(  my $mech = WWW::Mechanize->new(
18          autocheck => 1,          autocheck => 1,
19          cookie_jar => undef,          cookie_jar => undef,
20  );  );
21    
22  my $step = 1;  our $step = 0;
23    
24  sub save_mech {  sub save_mech {
25          my $mech = shift;          my ( $mech, $path ) = @_;
         my $path = "/tmp/isi.$step.html";  
         write_file $path, $mech->content;  
         warn "# step $step ", -s $path;  
         $mech->dump_all;  
26          $step++;          $step++;
27            $path ||= sprintf('/tmp/isi.%02d.%s', $step, $mech->{ct} =~ m{html}i ? 'html' : 'txt' );
28            $mech->save_content( $path );
29            warn "# [$step] $path ", -s $path, " ", $mech->ct;
30            $mech->dump_all if $dump;
31  }  }
32    
33  warn "# get session";  warn "# get session";
# Line 47  $mech->follow_link( url_regex => qr/summ Line 52  $mech->follow_link( url_regex => qr/summ
52  save_mech $mech;  save_mech $mech;
53    
54  my $from = 1;  my $from = 1;
 my $range_size = 10;  
55    
56  $mech->submit_form(  while ( $from ) {
         form_name => 'summary_output_form',  
         fields => {  
                 record_select_type => 'range',  
                 mark_from => $from,  
                 mark_to => $from += $range_size,  
                 mark_id => 'WOS',  
   
                 qo_fields => 'fullrecord',  
                 citedref => 'citedref',  
   
                 save_options => 'plain_text',  
   
                 fields => 'Full',  
                 format => 'save',  
         },  
         button => 'save',  
 );  
 save_mech $mech;  
57    
58  warn "# save_file";          my $to = $from + $range_size;
59  $mech->follow_link( url_regex => qr/save_file/ );  
60  save_mech $mech;          $mech->submit_form(
61                    form_name => 'summary_output_form',
62                    fields => {
63                            record_select_type => 'range',
64                            mark_from => $from,
65                            mark_to => $to,
66                            mark_id => 'WOS',
67    
68                            qo_fields => 'fullrecord',
69                            citedref => 'citedref',
70    
71                            save_options => 'plain_text',
72    
73                            fields => 'Full',
74                            format => 'save',
75                    },
76                    button => 'save',
77            );
78            save_mech $mech;
79    
80            last if $mech->content =~ m{invalid API call};
81    
82            warn "# save_file";
83            $mech->follow_link( url_regex => qr/save_file/ );
84            save_mech $mech => "/tmp/isi.$q.$from-$to.txt";
85    
86            $from += $range_size;
87    
88            $mech->back;
89            $mech->back;
90            #save_mech $mech;
91    
92    }

Legend:
Removed from v.1280  
changed lines
  Added in v.1282

  ViewVC Help
Powered by ViewVC 1.1.26