19 |
|
|
20 |
my $results = 0; |
my $results = 0; |
21 |
my $citations = 0; |
my $citations = 0; |
22 |
|
my $cited_reference = 0; # html tables |
23 |
|
my $citing_articles = 1; # as many files as cited articles |
24 |
|
|
25 |
my $cites_by_year = 0; |
my $cites_by_year = 0; |
26 |
|
|
120 |
last; |
last; |
121 |
} |
} |
122 |
|
|
123 |
|
if ( $mech->content =~ m{Please wait while your request is processed} ) { |
124 |
|
warn "WARNING: processing request"; |
125 |
|
} |
126 |
|
|
127 |
|
|
128 |
my $path = "/tmp/isi.$q.$from-$to"; |
my $path = "/tmp/isi.$q.$from-$to"; |
129 |
$path .= '.' . $desc if $desc; |
$path .= '.' . $desc if $desc; |
217 |
return $years; |
return $years; |
218 |
} |
} |
219 |
|
|
220 |
|
|
221 |
|
our $page = 1; |
222 |
|
sub next_page { |
223 |
|
$page++; |
224 |
|
warn "next_page $page\n"; |
225 |
|
|
226 |
|
$mech->submit_form( |
227 |
|
form_name => 'summary_navigation', |
228 |
|
fields => { |
229 |
|
'page' => $page, |
230 |
|
}, |
231 |
|
); |
232 |
|
|
233 |
|
save_mech; |
234 |
|
|
235 |
|
$mech->form_name( 'summary_navigation' ); |
236 |
|
my $is_next_page = $mech->value('page') == $page; |
237 |
|
warn "no next_page" unless $is_next_page; |
238 |
|
return $is_next_page; |
239 |
|
} |
240 |
|
|
241 |
if ( $results ) { |
if ( $results ) { |
242 |
search; |
search; |
243 |
years; |
years; |
262 |
|
|
263 |
} |
} |
264 |
|
|
265 |
if ( $q =~ m{CA=(.+)} ) { |
|
266 |
|
|
267 |
|
if ( $q =~ m{CA=(.+)} && $cited_reference ) { |
268 |
|
|
269 |
my $CA = $1; |
my $CA = $1; |
270 |
|
|
288 |
while (1) { |
while (1) { |
289 |
save_mech "/tmp/isi.$q.citedref.$page"; |
save_mech "/tmp/isi.$q.citedref.$page"; |
290 |
|
|
291 |
$page++; |
last unless next_page(); |
|
|
|
|
$mech->submit_form( |
|
|
form_name => 'summary_navigation', |
|
|
fields => { |
|
|
'page' => $page, |
|
|
}, |
|
|
); |
|
|
|
|
|
$mech->form_name( 'summary_navigation' ); |
|
|
last if $mech->value('page') < $page; |
|
292 |
|
|
293 |
if ( $mech->content =~ m/(\d+\s*-\s*(\d+))/ ) { |
if ( $mech->content =~ m/(\d+\s*-\s*(\d+))/ ) { |
294 |
warn "span: $1\n"; |
warn "span: $1\n"; |
304 |
|
|
305 |
} |
} |
306 |
|
|
307 |
|
if ( $q =~ m{CA=(.+)} && $citing_articles ) { |
308 |
|
|
309 |
|
search; |
310 |
|
|
311 |
|
my $orig_q = $q; |
312 |
|
my $nr = 0; |
313 |
|
|
314 |
|
do { |
315 |
|
|
316 |
|
foreach my $link ( $mech->find_all_links( url_regex => qr/CitingArticles.do/ ) ) { |
317 |
|
$nr++; |
318 |
|
warn "link $nr\n"; |
319 |
|
$mech->get( $link->url ); |
320 |
|
save_mech; |
321 |
|
$q = $orig_q . '.citing_article.' . $nr; |
322 |
|
get_results; |
323 |
|
$mech->back; |
324 |
|
$mech->back; |
325 |
|
|
326 |
|
#last if $nr > 3; # FIXME only for development |
327 |
|
} |
328 |
|
|
329 |
|
} while next_page; |
330 |
|
|
331 |
|
$q = $orig_q; |
332 |
|
} |
333 |
|
|
334 |
warn "OVER\n"; |
warn "OVER\n"; |