1 |
dpavlin |
201 |
#!/usr/bin/perl |
2 |
|
|
|
3 |
dpavlin |
242 |
# http://wiki.apache.org/couchdb/HTTP_Bulk_Document_API |
4 |
|
|
|
5 |
dpavlin |
256 |
use lib 'lib'; |
6 |
|
|
use Sack::Shard; |
7 |
dpavlin |
201 |
|
8 |
|
|
use IO::Socket::INET; |
9 |
|
|
use JSON; |
10 |
|
|
use Data::Dump qw(dump); |
11 |
dpavlin |
256 |
use autodie; |
12 |
dpavlin |
201 |
|
13 |
|
|
my $name = 'pxelator'; |
14 |
dpavlin |
238 |
my $shard_size = 5000; |
15 |
dpavlin |
201 |
|
16 |
|
|
|
17 |
|
|
sub couchdb_socket { |
18 |
|
|
IO::Socket::INET->new( |
19 |
|
|
PeerAddr => '10.60.0.91', |
20 |
|
|
PeerPort => 5984, |
21 |
|
|
Proto => 'tcp', |
22 |
|
|
) || die $!; |
23 |
|
|
} |
24 |
|
|
|
25 |
|
|
sub get_chunk { |
26 |
|
|
my $sock = shift; |
27 |
|
|
my $chunk; |
28 |
|
|
while(<$sock>) { |
29 |
|
|
$chunk .= $_; |
30 |
|
|
last if /^[\n\r]+$/; |
31 |
|
|
} |
32 |
|
|
# warn "# $sock\n$chunk\n"; |
33 |
|
|
return $chunk; |
34 |
|
|
} |
35 |
|
|
|
36 |
|
|
my $sock = couchdb_socket; |
37 |
|
|
|
38 |
dpavlin |
242 |
print $sock "GET /$name/_all_docs?include_docs=true HTTP/1.0\r\n\r\n"; |
39 |
dpavlin |
201 |
|
40 |
|
|
get_chunk($sock); |
41 |
|
|
|
42 |
|
|
my $total = <$sock>; |
43 |
dpavlin |
206 |
$total =~ s{^.*total_rows\D+(\d+).+$}{$1}; |
44 |
dpavlin |
256 |
warn "# $name total: $total\n"; |
45 |
dpavlin |
201 |
|
46 |
dpavlin |
256 |
Sack::Shard::create( $name, $total, $shard_size ); |
47 |
dpavlin |
201 |
|
48 |
|
|
while(<$sock>) { |
49 |
|
|
if ( /"id":"([^"]+)"/ ) { |
50 |
dpavlin |
242 |
|
51 |
|
|
s/,[\r\n]+$//; # cleanup JSON |
52 |
|
|
my $json = from_json( $_ ); |
53 |
dpavlin |
256 |
Sack::Shard::add( $json->{doc} ); |
54 |
dpavlin |
201 |
} else { |
55 |
|
|
warn "UNKNOWN: $_"; |
56 |
|
|
} |
57 |
|
|
} |
58 |
dpavlin |
256 |
Sack::Shard::finish; |
59 |
dpavlin |
207 |
|