/[webpac2]/trunk/bin/isi-download-results.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Diff of /trunk/bin/isi-download-results.pl

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1289 by dpavlin, Sat Sep 19 10:23:35 2009 UTC revision 1290 by dpavlin, Sat Sep 19 12:43:03 2009 UTC
# Line 23  our $step = 0; Line 23  our $step = 0;
23  sub save_mech {  sub save_mech {
24          my ( $mech, $path ) = @_;          my ( $mech, $path ) = @_;
25          $step++;          $step++;
26          $path ||= sprintf('/tmp/isi.%02d.%s', $step, $mech->{ct} =~ m{html}i ? 'html' : 'txt' );          mkdir '/tmp/isi/' unless -e '/tmp/isi';
27            my $base_path = sprintf('/tmp/isi/%04d', $step);
28            $path ||= $base_path . ( $mech->{ct} =~ m{html}i ? '.html' : '.txt' );
29          $mech->save_content( $path );          $mech->save_content( $path );
30          warn "# [$step] $path ", -s $path, " ", $mech->ct;          warn "# [$step] $path ", -s $path, " ", $mech->ct, "\n";
31          $mech->dump_all if $dump;          open(my $dump, '>', "$base_path.dump.txt");
32            $mech->dump_all($dump);
33  }  }
34    
35  warn "# get session";  warn "# get session";
36  $mech->get( 'http://isiknowledge.com/?DestApp=WOS' );  $mech->get( 'http://isiknowledge.com/?DestApp=WOS' );
37  save_mech $mech;  save_mech $mech;
38    
39  warn "# advanced serach";  sub search {
40  $mech->follow_link( url_regex => qr/AdvancedSearch/ );          my $q = shift;
 save_mech $mech;  
   
 warn "# cookie_jar ", dump $mech->cookie_jar;  
41    
42  $mech->submit_form(          warn "# advanced serach";
43          fields => {          $mech->follow_link( url_regex => qr/AdvancedSearch/ );
44                  'value(input1)' => $q,          save_mech $mech;
45          }  
46  );          warn "# cookie_jar ", dump $mech->cookie_jar;
47  save_mech $mech;  
48            $mech->submit_form(
49                    fields => {
50                            'value(input1)' => $q,
51                    }
52            );
53            save_mech $mech;
54    
55  warn "# summary";          warn "# summary";
56  $mech->follow_link( url_regex => qr/summary/ );          $mech->follow_link( url_regex => qr/summary/ );
57  save_mech $mech;          save_mech $mech;
58    }
59    
60  sub get_results {  sub get_results {
61          my $q = shift;          my $q = shift;
# Line 80  sub get_results { Line 87  sub get_results {
87    
88                  if ( $mech->content =~ m{invalid API call} ) {                  if ( $mech->content =~ m{invalid API call} ) {
89                          $mech->back;                          $mech->back;
90                          return;                          last;
91                  }                  }
92    
93                  warn "# save_file $from - $to [$q]";                  warn "range $from - $to [$q]\n";
94                  $mech->follow_link( url_regex => qr/save_file/ );                  $mech->follow_link( url_regex => qr/save_file/ );
95                  save_mech $mech => "/tmp/isi.$q.$from-$to.txt";                  save_mech $mech => "/tmp/isi.$q.$from-$to.txt";
96    
# Line 93  sub get_results { Line 100  sub get_results {
100                  $mech->back;                  $mech->back;
101                  #save_mech $mech;                  #save_mech $mech;
102    
103          } # while          }
104    
105  }  }
106    
 get_results $q;  
107    
108  save_mech $mech;  sub citations {
109  warn "# citations";          save_mech $mech;
110  $mech->follow_link( url_regex => qr/search_mode=CitationReport/ );          warn "# citation report";
111  save_mech $mech;          $mech->follow_link( url_regex => qr/search_mode=CitationReport/ );
112            save_mech $mech;
113    
114            warn "view citing articles";
115            $mech->follow_link( url_regex => qr/search_mode=TotalCitingArticles/ );
116            save_mech $mech;
117    }
118    
119  $mech->follow_link( url_regex => qr/search_mode=TotalCitingArticles/ );  sub years {
120  save_mech $mech;          my $years_url = $mech->find_link( text_regex => qr/more options/ )->url_abs;
121            warn "## $years_url";
122            $years_url =~ s{ra_name=\w+}{ra_name=PublicationYear} || die "ra_name";
123            warn "# refine years (hidden by javascript)";
124            warn "http://apps.isiknowledge.com/RAMore.do?product=WOS&search_mode=TotalCitingArticles&SID=T1o6bChdN9PGP1LN1Nh&qid=3&ra_mode=more&ra_name=PublicationYear&db_id=WOS&viewType=raMore\n$years_url\n";
125            $mech->get( $years_url );
126            save_mech $mech;
127    
128            my $html = $mech->content;
129            my @years;
130            while ( $html =~ s{>(\d\d\d\d)\s\((\d+)\)</label.+?value="PublicationYear_}{} ) {
131                    push @years, [ $1 => $2 ];
132            }
133            warn "# years ",dump @years;
134            $mech->back;
135            return @years;
136    }
137    
138    search $q;
139    years;
140    get_results $q;
141    
142    citations;
143    years;
144  get_results $q . '.citing';  get_results $q . '.citing';
145    

Legend:
Removed from v.1289  
changed lines
  Added in v.1290

  ViewVC Help
Powered by ViewVC 1.1.26