1 |
dpavlin |
201 |
#!/usr/bin/perl |
2 |
|
|
|
3 |
|
|
use warnings; |
4 |
|
|
use strict; |
5 |
|
|
|
6 |
|
|
use IO::Socket::INET; |
7 |
|
|
use Storable qw(); |
8 |
|
|
use JSON; |
9 |
|
|
use Data::Dump qw(dump); |
10 |
|
|
|
11 |
|
|
my $name = 'pxelator'; |
12 |
|
|
my $shard_size = 1000; |
13 |
|
|
|
14 |
|
|
|
15 |
|
|
my $path = '/tmp/sack'; |
16 |
|
|
mkdir $path; |
17 |
|
|
$path .= '/' . $name; |
18 |
|
|
mkdir $path; |
19 |
|
|
|
20 |
|
|
|
21 |
|
|
sub couchdb_socket { |
22 |
|
|
IO::Socket::INET->new( |
23 |
|
|
PeerAddr => '10.60.0.91', |
24 |
|
|
PeerPort => 5984, |
25 |
|
|
Proto => 'tcp', |
26 |
|
|
) || die $!; |
27 |
|
|
} |
28 |
|
|
|
29 |
|
|
sub get_chunk { |
30 |
|
|
my $sock = shift; |
31 |
|
|
my $chunk; |
32 |
|
|
while(<$sock>) { |
33 |
|
|
$chunk .= $_; |
34 |
|
|
last if /^[\n\r]+$/; |
35 |
|
|
} |
36 |
|
|
# warn "# $sock\n$chunk\n"; |
37 |
|
|
return $chunk; |
38 |
|
|
} |
39 |
|
|
|
40 |
|
|
my $sock = couchdb_socket; |
41 |
|
|
my $docs = couchdb_socket; |
42 |
|
|
|
43 |
|
|
print $sock "GET /$name/_all_docs HTTP/1.0\r\n\r\n"; |
44 |
|
|
|
45 |
|
|
get_chunk($sock); |
46 |
|
|
|
47 |
|
|
my $total = <$sock>; |
48 |
|
|
warn "# skip total: $total\n"; |
49 |
|
|
|
50 |
|
|
our $shard; |
51 |
|
|
our $shard_nr = 0; |
52 |
|
|
|
53 |
|
|
sub save_shard { |
54 |
|
|
my $shard_path = sprintf("%s/%06d.%d", $path, $shard_nr++ * $shard_size, $shard_size); |
55 |
|
|
Storable::store( $shard, $shard_path ); |
56 |
|
|
warn "shard $shard_path ", -s $shard_path, " bytes\n"; |
57 |
|
|
$shard = []; |
58 |
|
|
} |
59 |
|
|
|
60 |
|
|
while(<$sock>) { |
61 |
|
|
if ( /"id":"([^"]+)"/ ) { |
62 |
|
|
# warn "get $1\n"; |
63 |
|
|
print $docs "GET /$name/$1 HTTP/1.0\r\nConnection: Keep-Alive\r\n\r\n"; |
64 |
|
|
get_chunk($docs); |
65 |
|
|
my $json = from_json( <$docs> ); |
66 |
|
|
push @$shard, $json; |
67 |
|
|
save_shard if $#{ $shard } == $shard_size; |
68 |
|
|
} else { |
69 |
|
|
warn "UNKNOWN: $_"; |
70 |
|
|
} |
71 |
|
|
} |
72 |
|
|
save_shard; |