/[webpac2]/trunk/bin/isi-download-results.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/isi-download-results.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1289 - (hide annotations)
Sat Sep 19 10:23:35 2009 UTC (14 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 1994 byte(s)
download also citing articles
1 dpavlin 1280 #!/usr/bin/perl
2    
3     use warnings;
4     use strict;
5    
6 dpavlin 1289 my $q = 'AD=Croatia';
7     my $range_size = 500;
8 dpavlin 1280
9 dpavlin 1282 my $dump = @ARGV ? 1 : 0;
10    
11 dpavlin 1289 $q = 'TS=psychology AND AD=Croatia';
12 dpavlin 1282
13 dpavlin 1280 use WWW::Mechanize;
14     use Data::Dump qw/dump/;
15    
16 dpavlin 1289 our $mech = WWW::Mechanize->new(
17 dpavlin 1280 autocheck => 1,
18     cookie_jar => undef,
19     );
20    
21 dpavlin 1281 our $step = 0;
22 dpavlin 1280
23     sub save_mech {
24 dpavlin 1281 my ( $mech, $path ) = @_;
25     $step++;
26     $path ||= sprintf('/tmp/isi.%02d.%s', $step, $mech->{ct} =~ m{html}i ? 'html' : 'txt' );
27     $mech->save_content( $path );
28     warn "# [$step] $path ", -s $path, " ", $mech->ct;
29 dpavlin 1282 $mech->dump_all if $dump;
30 dpavlin 1280 }
31    
32     warn "# get session";
33     $mech->get( 'http://isiknowledge.com/?DestApp=WOS' );
34     save_mech $mech;
35    
36     warn "# advanced serach";
37     $mech->follow_link( url_regex => qr/AdvancedSearch/ );
38     save_mech $mech;
39    
40     warn "# cookie_jar ", dump $mech->cookie_jar;
41    
42     $mech->submit_form(
43     fields => {
44     'value(input1)' => $q,
45     }
46     );
47     save_mech $mech;
48    
49     warn "# summary";
50     $mech->follow_link( url_regex => qr/summary/ );
51     save_mech $mech;
52    
53 dpavlin 1289 sub get_results {
54     my $q = shift;
55     my $from = 1;
56 dpavlin 1280
57 dpavlin 1289 while ( 1 ) {
58 dpavlin 1280
59 dpavlin 1289 my $to = $from + $range_size;
60 dpavlin 1280
61 dpavlin 1289 $mech->submit_form(
62     form_name => 'summary_output_form',
63     fields => {
64     record_select_type => 'range',
65     mark_from => $from,
66     mark_to => $to,
67     mark_id => 'WOS',
68 dpavlin 1280
69 dpavlin 1289 qo_fields => 'fullrecord',
70     citedref => 'citedref',
71 dpavlin 1280
72 dpavlin 1289 save_options => 'plain_text',
73 dpavlin 1280
74 dpavlin 1289 fields => 'Full',
75     format => 'save',
76     },
77     button => 'save',
78     );
79     save_mech $mech;
80 dpavlin 1281
81 dpavlin 1289 if ( $mech->content =~ m{invalid API call} ) {
82     $mech->back;
83     return;
84     }
85 dpavlin 1281
86 dpavlin 1289 warn "# save_file $from - $to [$q]";
87     $mech->follow_link( url_regex => qr/save_file/ );
88     save_mech $mech => "/tmp/isi.$q.$from-$to.txt";
89 dpavlin 1281
90 dpavlin 1289 $from += $range_size;
91 dpavlin 1281
92 dpavlin 1289 $mech->back;
93     $mech->back;
94     #save_mech $mech;
95 dpavlin 1281
96 dpavlin 1289 } # while
97    
98 dpavlin 1281 }
99 dpavlin 1289
100     get_results $q;
101    
102     save_mech $mech;
103     warn "# citations";
104     $mech->follow_link( url_regex => qr/search_mode=CitationReport/ );
105     save_mech $mech;
106    
107     $mech->follow_link( url_regex => qr/search_mode=TotalCitingArticles/ );
108     save_mech $mech;
109    
110     get_results $q . '.citing';
111    

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26