/[webpac2]/trunk/bin/isi-download-results.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/isi-download-results.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1290 - (show annotations)
Sat Sep 19 12:43:03 2009 UTC (14 years, 8 months ago) by dpavlin
File MIME type: text/plain
File size: 2940 byte(s)
refactor into small mini-DSL at bottom of code
and added report about year breakdown of results

1 #!/usr/bin/perl
2
3 use warnings;
4 use strict;
5
6 my $q = 'AD=Croatia';
7 my $range_size = 500;
8
9 my $dump = @ARGV ? 1 : 0;
10
11 $q = 'TS=psychology AND AD=Croatia';
12
13 use WWW::Mechanize;
14 use Data::Dump qw/dump/;
15
16 our $mech = WWW::Mechanize->new(
17 autocheck => 1,
18 cookie_jar => undef,
19 );
20
21 our $step = 0;
22
23 sub save_mech {
24 my ( $mech, $path ) = @_;
25 $step++;
26 mkdir '/tmp/isi/' unless -e '/tmp/isi';
27 my $base_path = sprintf('/tmp/isi/%04d', $step);
28 $path ||= $base_path . ( $mech->{ct} =~ m{html}i ? '.html' : '.txt' );
29 $mech->save_content( $path );
30 warn "# [$step] $path ", -s $path, " ", $mech->ct, "\n";
31 open(my $dump, '>', "$base_path.dump.txt");
32 $mech->dump_all($dump);
33 }
34
35 warn "# get session";
36 $mech->get( 'http://isiknowledge.com/?DestApp=WOS' );
37 save_mech $mech;
38
39 sub search {
40 my $q = shift;
41
42 warn "# advanced serach";
43 $mech->follow_link( url_regex => qr/AdvancedSearch/ );
44 save_mech $mech;
45
46 warn "# cookie_jar ", dump $mech->cookie_jar;
47
48 $mech->submit_form(
49 fields => {
50 'value(input1)' => $q,
51 }
52 );
53 save_mech $mech;
54
55 warn "# summary";
56 $mech->follow_link( url_regex => qr/summary/ );
57 save_mech $mech;
58 }
59
60 sub get_results {
61 my $q = shift;
62 my $from = 1;
63
64 while ( 1 ) {
65
66 my $to = $from + $range_size;
67
68 $mech->submit_form(
69 form_name => 'summary_output_form',
70 fields => {
71 record_select_type => 'range',
72 mark_from => $from,
73 mark_to => $to,
74 mark_id => 'WOS',
75
76 qo_fields => 'fullrecord',
77 citedref => 'citedref',
78
79 save_options => 'plain_text',
80
81 fields => 'Full',
82 format => 'save',
83 },
84 button => 'save',
85 );
86 save_mech $mech;
87
88 if ( $mech->content =~ m{invalid API call} ) {
89 $mech->back;
90 last;
91 }
92
93 warn "range $from - $to [$q]\n";
94 $mech->follow_link( url_regex => qr/save_file/ );
95 save_mech $mech => "/tmp/isi.$q.$from-$to.txt";
96
97 $from += $range_size;
98
99 $mech->back;
100 $mech->back;
101 #save_mech $mech;
102
103 }
104
105 }
106
107
108 sub citations {
109 save_mech $mech;
110 warn "# citation report";
111 $mech->follow_link( url_regex => qr/search_mode=CitationReport/ );
112 save_mech $mech;
113
114 warn "view citing articles";
115 $mech->follow_link( url_regex => qr/search_mode=TotalCitingArticles/ );
116 save_mech $mech;
117 }
118
119 sub years {
120 my $years_url = $mech->find_link( text_regex => qr/more options/ )->url_abs;
121 warn "## $years_url";
122 $years_url =~ s{ra_name=\w+}{ra_name=PublicationYear} || die "ra_name";
123 warn "# refine years (hidden by javascript)";
124 warn "http://apps.isiknowledge.com/RAMore.do?product=WOS&search_mode=TotalCitingArticles&SID=T1o6bChdN9PGP1LN1Nh&qid=3&ra_mode=more&ra_name=PublicationYear&db_id=WOS&viewType=raMore\n$years_url\n";
125 $mech->get( $years_url );
126 save_mech $mech;
127
128 my $html = $mech->content;
129 my @years;
130 while ( $html =~ s{>(\d\d\d\d)\s\((\d+)\)</label.+?value="PublicationYear_}{} ) {
131 push @years, [ $1 => $2 ];
132 }
133 warn "# years ",dump @years;
134 $mech->back;
135 return @years;
136 }
137
138 search $q;
139 years;
140 get_results $q;
141
142 citations;
143 years;
144 get_results $q . '.citing';
145

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26