/[webpac2]/trunk/bin/isi-download-results.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bin/isi-download-results.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1280 by dpavlin, Fri Sep 18 16:52:57 2009 UTC revision 1281 by dpavlin, Fri Sep 18 17:28:03 2009 UTC
# Line 7  my $q = 'TS=psychology AND AD=Croatia'; Line 7  my $q = 'TS=psychology AND AD=Croatia';
7    
8  use WWW::Mechanize;  use WWW::Mechanize;
9  use Data::Dump qw/dump/;  use Data::Dump qw/dump/;
 use File::Slurp;  
10    
11  my $mech = WWW::Mechanize->new(  my $mech = WWW::Mechanize->new(
12          autocheck => 1,          autocheck => 1,
13          cookie_jar => undef,          cookie_jar => undef,
14  );  );
15    
16  my $step = 1;  our $step = 0;
17    
18  sub save_mech {  sub save_mech {
19          my $mech = shift;          my ( $mech, $path ) = @_;
         my $path = "/tmp/isi.$step.html";  
         write_file $path, $mech->content;  
         warn "# step $step ", -s $path;  
         $mech->dump_all;  
20          $step++;          $step++;
21            $path ||= sprintf('/tmp/isi.%02d.%s', $step, $mech->{ct} =~ m{html}i ? 'html' : 'txt' );
22            $mech->save_content( $path );
23            warn "# [$step] $path ", -s $path, " ", $mech->ct;
24            $mech->dump_all;
25  }  }
26    
27  warn "# get session";  warn "# get session";
# Line 47  $mech->follow_link( url_regex => qr/summ Line 46  $mech->follow_link( url_regex => qr/summ
46  save_mech $mech;  save_mech $mech;
47    
48  my $from = 1;  my $from = 1;
49  my $range_size = 10;  my $range_size = 50;
50    
51  $mech->submit_form(  while ( $from ) {
         form_name => 'summary_output_form',  
         fields => {  
                 record_select_type => 'range',  
                 mark_from => $from,  
                 mark_to => $from += $range_size,  
                 mark_id => 'WOS',  
   
                 qo_fields => 'fullrecord',  
                 citedref => 'citedref',  
   
                 save_options => 'plain_text',  
   
                 fields => 'Full',  
                 format => 'save',  
         },  
         button => 'save',  
 );  
 save_mech $mech;  
52    
53  warn "# save_file";          my $to = $from + $range_size;
54  $mech->follow_link( url_regex => qr/save_file/ );  
55  save_mech $mech;          $mech->submit_form(
56                    form_name => 'summary_output_form',
57                    fields => {
58                            record_select_type => 'range',
59                            mark_from => $from,
60                            mark_to => $to,
61                            mark_id => 'WOS',
62    
63                            qo_fields => 'fullrecord',
64                            citedref => 'citedref',
65    
66                            save_options => 'plain_text',
67    
68                            fields => 'Full',
69                            format => 'save',
70                    },
71                    button => 'save',
72            );
73            save_mech $mech;
74    
75            last if $mech->content =~ m{invalid API call};
76    
77            warn "# save_file";
78            $mech->follow_link( url_regex => qr/save_file/ );
79            save_mech $mech => "/tmp/isi.$q.$from-$to.txt";
80    
81            $from += $range_size;
82    
83            $mech->back;
84            $mech->back;
85            #save_mech $mech;
86    
87    }

Legend:
Removed from v.1280  
changed lines
  Added in v.1281

  ViewVC Help
Powered by ViewVC 1.1.26