/[scripts]/trunk/google-groups2mbox.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/google-groups2mbox.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 140 - (hide annotations)
Fri Nov 26 14:37:09 2010 UTC (13 years, 4 months ago) by dpavlin
File MIME type: text/plain
File size: 712 byte(s)
crawl first page or google groups archive

1 dpavlin 140 #!/usr/bin/perl
2     use warnings;
3     use strict;
4    
5     my $group = 'angular';
6    
7     use WWW::Mechanize;
8     use Data::Dump qw(dump);
9    
10     my $mech = WWW::Mechanize->new();
11    
12     $mech->get( "http://groups.google.com/group/$group/topics?gvc=2" );
13    
14     foreach my $link ( $mech->find_all_links( url_regex => qr/browse_thread/ ) ) {
15     print STDERR "# ",$link->text;
16     $mech->follow_link( url => $link->url );
17     foreach my $m_link ( $mech->find_all_links( url_regex => qr/dmode=source/ ) ) {
18     $mech->get( $m_link->url . '&output=gplain' );
19     print STDERR ".";
20     my $msg = $mech->content;
21     $msg =~ s/\r//gs;
22     $msg =~ s/^\s+//s;
23     print "From $group\@googlegroups.com " . localtime() . "\n$msg\n";
24     $mech->back;
25     }
26     print STDERR "\n";
27     $mech->back;
28     }

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26