1 |
#!/usr/bin/perl |
2 |
|
3 |
# http://wiki.apache.org/couchdb/HTTP_Bulk_Document_API |
4 |
|
5 |
use warnings; |
6 |
use strict; |
7 |
|
8 |
use IO::Socket::INET; |
9 |
use Storable qw(); |
10 |
use JSON; |
11 |
use Data::Dump qw(dump); |
12 |
use Time::HiRes qw(time); |
13 |
use File::Path qw(make_path remove_tree); |
14 |
|
15 |
my $name = 'pxelator'; |
16 |
my $shard_size = 5000; |
17 |
|
18 |
|
19 |
my $path = "/tmp/sack/$name"; |
20 |
remove_tree $path; |
21 |
make_path $path; |
22 |
|
23 |
|
24 |
sub couchdb_socket { |
25 |
IO::Socket::INET->new( |
26 |
PeerAddr => '10.60.0.91', |
27 |
PeerPort => 5984, |
28 |
Proto => 'tcp', |
29 |
) || die $!; |
30 |
} |
31 |
|
32 |
sub get_chunk { |
33 |
my $sock = shift; |
34 |
my $chunk; |
35 |
while(<$sock>) { |
36 |
$chunk .= $_; |
37 |
last if /^[\n\r]+$/; |
38 |
} |
39 |
# warn "# $sock\n$chunk\n"; |
40 |
return $chunk; |
41 |
} |
42 |
|
43 |
my $sock = couchdb_socket; |
44 |
|
45 |
print $sock "GET /$name/_all_docs?include_docs=true HTTP/1.0\r\n\r\n"; |
46 |
|
47 |
get_chunk($sock); |
48 |
|
49 |
my $total = <$sock>; |
50 |
$total =~ s{^.*total_rows\D+(\d+).+$}{$1}; |
51 |
warn "# total: $total\n"; |
52 |
|
53 |
our $shard; |
54 |
our $shard_nr = 0; |
55 |
my $t_start = time(); |
56 |
my $total_bytes; |
57 |
|
58 |
sub save_shard { |
59 |
my $shard_path = sprintf("%s/%06d.%d", $path, $shard_nr++ * $shard_size, $shard_size); |
60 |
Storable::store( $shard, $shard_path ); |
61 |
|
62 |
my $dt = time() - $t_start; |
63 |
my $pos = $shard_nr * $shard_size; |
64 |
my $rec_s = $pos / $dt; |
65 |
my $end_t = ( $total / $rec_s ) - $dt; |
66 |
my $shard_size = -s $shard_path; |
67 |
|
68 |
warn sprintf "shard %s %d bytes\t%8.2f%% %8.2f/s ETA %d:%02ds\n" |
69 |
, $shard_path, $shard_size |
70 |
, $pos * 100 / $total |
71 |
, $rec_s, |
72 |
, $end_t / 60, $end_t % 60 |
73 |
; |
74 |
|
75 |
$total_bytes += $shard_size; |
76 |
|
77 |
$shard = []; |
78 |
} |
79 |
|
80 |
while(<$sock>) { |
81 |
if ( /"id":"([^"]+)"/ ) { |
82 |
|
83 |
s/,[\r\n]+$//; # cleanup JSON |
84 |
my $json = from_json( $_ ); |
85 |
push @$shard, $json->{doc}; |
86 |
save_shard if $#{ $shard } == $shard_size; |
87 |
} else { |
88 |
warn "UNKNOWN: $_"; |
89 |
} |
90 |
} |
91 |
save_shard; |
92 |
|
93 |
warn "sharded $path $total_bytes bytes\n"; |