--- trunk/bin/isi-download-results.pl 2009/09/19 12:43:03 1290 +++ trunk/bin/isi-download-results.pl 2009/09/19 15:05:12 1291 @@ -3,7 +3,7 @@ use warnings; use strict; -my $q = 'AD=Croatia'; +our $q = 'AD=Croatia'; my $range_size = 500; my $dump = @ARGV ? 1 : 0; @@ -11,7 +11,8 @@ $q = 'TS=psychology AND AD=Croatia'; use WWW::Mechanize; -use Data::Dump qw/dump/; +use Data::Dump qw(dump); +use File::Path; our $mech = WWW::Mechanize->new( autocheck => 1, @@ -20,11 +21,14 @@ our $step = 0; +my $dir = '/tmp/isi/'; +rmtree $dir if -e $dir; +mkdir $dir; + sub save_mech { my ( $mech, $path ) = @_; $step++; - mkdir '/tmp/isi/' unless -e '/tmp/isi'; - my $base_path = sprintf('/tmp/isi/%04d', $step); + my $base_path = sprintf('%s/%04d', $dir,$step); $path ||= $base_path . ( $mech->{ct} =~ m{html}i ? '.html' : '.txt' ); $mech->save_content( $path ); warn "# [$step] $path ", -s $path, " ", $mech->ct, "\n"; @@ -37,8 +41,6 @@ save_mech $mech; sub search { - my $q = shift; - warn "# advanced serach"; $mech->follow_link( url_regex => qr/AdvancedSearch/ ); save_mech $mech; @@ -58,7 +60,6 @@ } sub get_results { - my $q = shift; my $from = 1; while ( 1 ) { @@ -117,29 +118,39 @@ } sub years { - my $years_url = $mech->find_link( text_regex => qr/more options/ )->url_abs; + my $years_url = $mech->find_link( text_regex => qr/more options/ ); + if ( ! $years_url ) { + warn "W: can't find years\n"; + return; + } + $years_url = $years_url->url_abs; warn "## $years_url"; - $years_url =~ s{ra_name=\w+}{ra_name=PublicationYear} || die "ra_name"; + if ( $years_url !~ s{ra_name=\w+}{ra_name=PublicationYear} ) { + warn "W: no ra_name\n"; + return; + } warn "# refine years (hidden by javascript)"; warn "http://apps.isiknowledge.com/RAMore.do?product=WOS&search_mode=TotalCitingArticles&SID=T1o6bChdN9PGP1LN1Nh&qid=3&ra_mode=more&ra_name=PublicationYear&db_id=WOS&viewType=raMore\n$years_url\n"; $mech->get( $years_url ); save_mech $mech; my $html = $mech->content; - my @years; + my $years; while ( $html =~ s{>(\d\d\d\d)\s\((\d+)\) $2 ]; + $years->{$1} = $2; } - warn "# years ",dump @years; + warn "# years ",dump $years; $mech->back; - return @years; + return $years; } -search $q; +search; years; -get_results $q; +get_results; + citations; years; -get_results $q . '.citing'; +$q .= '.citing'; +get_results;