/[Sack]/trunk/bin/couchdb2shards.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /trunk/bin/couchdb2shards.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 243 - (hide annotations)
Thu Nov 26 18:14:43 2009 UTC (14 years, 6 months ago) by dpavlin
File MIME type: text/plain
File size: 1727 byte(s)
take just document from json response

1 dpavlin 201 #!/usr/bin/perl
2    
3 dpavlin 242 # http://wiki.apache.org/couchdb/HTTP_Bulk_Document_API
4    
5 dpavlin 201 use warnings;
6     use strict;
7    
8     use IO::Socket::INET;
9     use Storable qw();
10     use JSON;
11     use Data::Dump qw(dump);
12 dpavlin 206 use Time::HiRes qw(time);
13 dpavlin 207 use File::Path qw(make_path remove_tree);
14 dpavlin 201
15     my $name = 'pxelator';
16 dpavlin 238 my $shard_size = 5000;
17 dpavlin 201
18    
19 dpavlin 207 my $path = "/tmp/sack/$name";
20     remove_tree $path;
21     make_path $path;
22 dpavlin 201
23    
24     sub couchdb_socket {
25     IO::Socket::INET->new(
26     PeerAddr => '10.60.0.91',
27     PeerPort => 5984,
28     Proto => 'tcp',
29     ) || die $!;
30     }
31    
32     sub get_chunk {
33     my $sock = shift;
34     my $chunk;
35     while(<$sock>) {
36     $chunk .= $_;
37     last if /^[\n\r]+$/;
38     }
39     # warn "# $sock\n$chunk\n";
40     return $chunk;
41     }
42    
43     my $sock = couchdb_socket;
44    
45 dpavlin 242 print $sock "GET /$name/_all_docs?include_docs=true HTTP/1.0\r\n\r\n";
46 dpavlin 201
47     get_chunk($sock);
48    
49     my $total = <$sock>;
50 dpavlin 206 $total =~ s{^.*total_rows\D+(\d+).+$}{$1};
51 dpavlin 242 warn "# total: $total\n";
52 dpavlin 201
53     our $shard;
54     our $shard_nr = 0;
55 dpavlin 206 my $t_start = time();
56 dpavlin 207 my $total_bytes;
57 dpavlin 201
58     sub save_shard {
59     my $shard_path = sprintf("%s/%06d.%d", $path, $shard_nr++ * $shard_size, $shard_size);
60     Storable::store( $shard, $shard_path );
61 dpavlin 206
62     my $dt = time() - $t_start;
63     my $pos = $shard_nr * $shard_size;
64     my $rec_s = $pos / $dt;
65 dpavlin 207 my $end_t = ( $total / $rec_s ) - $dt;
66     my $shard_size = -s $shard_path;
67 dpavlin 206
68 dpavlin 207 warn sprintf "shard %s %d bytes\t%8.2f%% %8.2f/s ETA %d:%02ds\n"
69     , $shard_path, $shard_size
70 dpavlin 206 , $pos * 100 / $total
71     , $rec_s,
72 dpavlin 207 , $end_t / 60, $end_t % 60
73 dpavlin 206 ;
74    
75 dpavlin 207 $total_bytes += $shard_size;
76    
77 dpavlin 201 $shard = [];
78     }
79    
80     while(<$sock>) {
81     if ( /"id":"([^"]+)"/ ) {
82 dpavlin 242
83     s/,[\r\n]+$//; # cleanup JSON
84     my $json = from_json( $_ );
85 dpavlin 243 push @$shard, $json->{doc};
86 dpavlin 201 save_shard if $#{ $shard } == $shard_size;
87     } else {
88     warn "UNKNOWN: $_";
89     }
90     }
91     save_shard;
92 dpavlin 207
93     warn "sharded $path $total_bytes bytes\n";

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.26