Parent Directory | Revision Log
crawl first page or google groups archive
1 | #!/usr/bin/perl |
2 | use warnings; |
3 | use strict; |
4 | |
5 | my $group = 'angular'; |
6 | |
7 | use WWW::Mechanize; |
8 | use Data::Dump qw(dump); |
9 | |
10 | my $mech = WWW::Mechanize->new(); |
11 | |
12 | $mech->get( "http://groups.google.com/group/$group/topics?gvc=2" ); |
13 | |
14 | foreach my $link ( $mech->find_all_links( url_regex => qr/browse_thread/ ) ) { |
15 | print STDERR "# ",$link->text; |
16 | $mech->follow_link( url => $link->url ); |
17 | foreach my $m_link ( $mech->find_all_links( url_regex => qr/dmode=source/ ) ) { |
18 | $mech->get( $m_link->url . '&output=gplain' ); |
19 | print STDERR "."; |
20 | my $msg = $mech->content; |
21 | $msg =~ s/\r//gs; |
22 | $msg =~ s/^\s+//s; |
23 | print "From $group\@googlegroups.com " . localtime() . "\n$msg\n"; |
24 | $mech->back; |
25 | } |
26 | print STDERR "\n"; |
27 | $mech->back; |
28 | } |
Name | Value |
---|---|
svn:executable | * |
ViewVC Help | |
Powered by ViewVC 1.1.26 |