/[scripts]/trunk/google-groups2mbox.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/google-groups2mbox.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 140 - (show annotations)
Fri Nov 26 14:37:09 2010 UTC (13 years, 5 months ago) by dpavlin
File MIME type: text/plain
File size: 712 byte(s)
crawl first page or google groups archive

1 #!/usr/bin/perl
2 use warnings;
3 use strict;
4
5 my $group = 'angular';
6
7 use WWW::Mechanize;
8 use Data::Dump qw(dump);
9
10 my $mech = WWW::Mechanize->new();
11
12 $mech->get( "http://groups.google.com/group/$group/topics?gvc=2" );
13
14 foreach my $link ( $mech->find_all_links( url_regex => qr/browse_thread/ ) ) {
15 print STDERR "# ",$link->text;
16 $mech->follow_link( url => $link->url );
17 foreach my $m_link ( $mech->find_all_links( url_regex => qr/dmode=source/ ) ) {
18 $mech->get( $m_link->url . '&output=gplain' );
19 print STDERR ".";
20 my $msg = $mech->content;
21 $msg =~ s/\r//gs;
22 $msg =~ s/^\s+//s;
23 print "From $group\@googlegroups.com " . localtime() . "\n$msg\n";
24 $mech->back;
25 }
26 print STDERR "\n";
27 $mech->back;
28 }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26