1 |
#!/usr/bin/perl |
2 |
|
3 |
# http://wiki.apache.org/couchdb/HTTP_Bulk_Document_API |
4 |
|
5 |
use warnings; |
6 |
use strict; |
7 |
|
8 |
use IO::Socket::INET; |
9 |
use Storable qw(); |
10 |
use JSON; |
11 |
use Data::Dump qw(dump); |
12 |
use Time::HiRes qw(time); |
13 |
use File::Path qw(make_path remove_tree); |
14 |
use MongoDB; |
15 |
|
16 |
my $name = shift @ARGV || 'pxelator'; |
17 |
|
18 |
my $conn = MongoDB::Connection->new; |
19 |
my $db = $conn->get_database( $name ); |
20 |
my $audit = $db->get_collection("audit"); |
21 |
|
22 |
sub couchdb_socket { |
23 |
IO::Socket::INET->new( |
24 |
PeerAddr => '10.60.0.91', |
25 |
PeerPort => 5984, |
26 |
Proto => 'tcp', |
27 |
) || die $!; |
28 |
} |
29 |
|
30 |
sub get_chunk { |
31 |
my $sock = shift; |
32 |
my $chunk; |
33 |
while(<$sock>) { |
34 |
$chunk .= $_; |
35 |
last if /^[\n\r]+$/; |
36 |
} |
37 |
# warn "# $sock\n$chunk\n"; |
38 |
return $chunk; |
39 |
} |
40 |
|
41 |
my $sock = couchdb_socket; |
42 |
|
43 |
print $sock "GET /$name/_all_docs?include_docs=true HTTP/1.0\r\n\r\n"; |
44 |
|
45 |
get_chunk($sock); |
46 |
|
47 |
my $total = <$sock>; |
48 |
$total =~ s{^.*total_rows\D+(\d+).+$}{$1}; |
49 |
|
50 |
$|=1; |
51 |
print "# $name total: $total\n"; |
52 |
|
53 |
my $start_t = time(); |
54 |
my $count = 0; |
55 |
|
56 |
|
57 |
while(<$sock>) { |
58 |
if ( /"id":"([^"]+)"/ ) { |
59 |
|
60 |
s/,[\r\n]+$//; # cleanup JSON |
61 |
my $json = from_json( $_ ); |
62 |
$audit->insert( $json->{doc} ); |
63 |
printf " %d/%d %.2f%% %.2f/s\r", $count, $total |
64 |
, ( $count / $total ) * 100 |
65 |
, ( $count / ( time() - $start_t ) ) |
66 |
if ++$count % 1000 == 0; |
67 |
} else { |
68 |
warn "UNKNOWN: $_"; |
69 |
} |
70 |
} |
71 |
|