Parent Directory | Revision Log
crawl first page or google groups archive
1 | dpavlin | 140 | #!/usr/bin/perl |
2 | use warnings; | ||
3 | use strict; | ||
4 | |||
5 | my $group = 'angular'; | ||
6 | |||
7 | use WWW::Mechanize; | ||
8 | use Data::Dump qw(dump); | ||
9 | |||
10 | my $mech = WWW::Mechanize->new(); | ||
11 | |||
12 | $mech->get( "http://groups.google.com/group/$group/topics?gvc=2" ); | ||
13 | |||
14 | foreach my $link ( $mech->find_all_links( url_regex => qr/browse_thread/ ) ) { | ||
15 | print STDERR "# ",$link->text; | ||
16 | $mech->follow_link( url => $link->url ); | ||
17 | foreach my $m_link ( $mech->find_all_links( url_regex => qr/dmode=source/ ) ) { | ||
18 | $mech->get( $m_link->url . '&output=gplain' ); | ||
19 | print STDERR "."; | ||
20 | my $msg = $mech->content; | ||
21 | $msg =~ s/\r//gs; | ||
22 | $msg =~ s/^\s+//s; | ||
23 | print "From $group\@googlegroups.com " . localtime() . "\n$msg\n"; | ||
24 | $mech->back; | ||
25 | } | ||
26 | print STDERR "\n"; | ||
27 | $mech->back; | ||
28 | } |
Name | Value |
---|---|
svn:executable | * |
ViewVC Help | |
Powered by ViewVC 1.1.26 |