--- trunk/bin/isi-download-results.pl 2010/06/05 15:49:59 1335 +++ trunk/bin/isi-download-results.pl 2010/06/29 18:52:19 1337 @@ -19,6 +19,8 @@ my $results = 0; my $citations = 0; +my $cited_reference = 0; # html tables +my $citing_articles = 1; # as many files as cited articles my $cites_by_year = 0; @@ -118,6 +120,10 @@ last; } + if ( $mech->content =~ m{Please wait while your request is processed} ) { + warn "WARNING: processing request"; + } + my $path = "/tmp/isi.$q.$from-$to"; $path .= '.' . $desc if $desc; @@ -211,6 +217,27 @@ return $years; } + +our $page = 1; +sub next_page { + $page++; + warn "next_page $page\n"; + + $mech->submit_form( + form_name => 'summary_navigation', + fields => { + 'page' => $page, + }, + ); + + save_mech; + + $mech->form_name( 'summary_navigation' ); + my $is_next_page = $mech->value('page') == $page; + warn "no next_page" unless $is_next_page; + return $is_next_page; +} + if ( $results ) { search; years; @@ -235,7 +262,9 @@ } -if ( $q =~ m{CA=(.+)} ) { + + +if ( $q =~ m{CA=(.+)} && $cited_reference ) { my $CA = $1; @@ -259,17 +288,7 @@ while (1) { save_mech "/tmp/isi.$q.citedref.$page"; - $page++; - - $mech->submit_form( - form_name => 'summary_navigation', - fields => { - 'page' => $page, - }, - ); - - $mech->form_name( 'summary_navigation' ); - last if $mech->value('page') < $page; + last unless next_page(); if ( $mech->content =~ m/(\d+\s*-\s*(\d+))/ ) { warn "span: $1\n"; @@ -285,4 +304,31 @@ } +if ( $q =~ m{CA=(.+)} && $citing_articles ) { + + search; + + my $orig_q = $q; + my $nr = 0; + + do { + + foreach my $link ( $mech->find_all_links( url_regex => qr/CitingArticles.do/ ) ) { + $nr++; + warn "link $nr\n"; + $mech->get( $link->url ); + save_mech; + $q = $orig_q . '.citing_article.' . $nr; + get_results; + $mech->back; + $mech->back; + + #last if $nr > 3; # FIXME only for development + } + + } while next_page; + + $q = $orig_q; +} + warn "OVER\n";