/[Sack]/trunk/bin/couchdb2shards.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/bin/couchdb2shards.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 201 - (show annotations)
Sun Nov 8 20:36:23 2009 UTC (14 years, 6 months ago) by dpavlin
File MIME type: text/plain
File size: 1267 byte(s)
dump CouchDB into 1000 record shards

1 #!/usr/bin/perl
2
3 use warnings;
4 use strict;
5
6 use IO::Socket::INET;
7 use Storable qw();
8 use JSON;
9 use Data::Dump qw(dump);
10
11 my $name = 'pxelator';
12 my $shard_size = 1000;
13
14
15 my $path = '/tmp/sack';
16 mkdir $path;
17 $path .= '/' . $name;
18 mkdir $path;
19
20
21 sub couchdb_socket {
22 IO::Socket::INET->new(
23 PeerAddr => '10.60.0.91',
24 PeerPort => 5984,
25 Proto => 'tcp',
26 ) || die $!;
27 }
28
29 sub get_chunk {
30 my $sock = shift;
31 my $chunk;
32 while(<$sock>) {
33 $chunk .= $_;
34 last if /^[\n\r]+$/;
35 }
36 # warn "# $sock\n$chunk\n";
37 return $chunk;
38 }
39
40 my $sock = couchdb_socket;
41 my $docs = couchdb_socket;
42
43 print $sock "GET /$name/_all_docs HTTP/1.0\r\n\r\n";
44
45 get_chunk($sock);
46
47 my $total = <$sock>;
48 warn "# skip total: $total\n";
49
50 our $shard;
51 our $shard_nr = 0;
52
53 sub save_shard {
54 my $shard_path = sprintf("%s/%06d.%d", $path, $shard_nr++ * $shard_size, $shard_size);
55 Storable::store( $shard, $shard_path );
56 warn "shard $shard_path ", -s $shard_path, " bytes\n";
57 $shard = [];
58 }
59
60 while(<$sock>) {
61 if ( /"id":"([^"]+)"/ ) {
62 # warn "get $1\n";
63 print $docs "GET /$name/$1 HTTP/1.0\r\nConnection: Keep-Alive\r\n\r\n";
64 get_chunk($docs);
65 my $json = from_json( <$docs> );
66 push @$shard, $json;
67 save_shard if $#{ $shard } == $shard_size;
68 } else {
69 warn "UNKNOWN: $_";
70 }
71 }
72 save_shard;

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26