/[Sack]/trunk/lib/Sack/Shards.pm
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /trunk/lib/Sack/Shards.pm

Parent Directory Parent Directory | Revision Log Revision Log


Revision 186 - (show annotations)
Sun Nov 8 13:21:01 2009 UTC (14 years, 7 months ago) by dpavlin
File size: 1327 byte(s)
shart input data into 1000 record shards

1 package Sack::Shards;
2
3 use warnings;
4 use strict;
5
6 use Storable;
7 use Data::Dump qw(dump);
8
9 use lib '/srv/Sack/lib';
10 use Sack;
11
12 use lib "/srv/webpac2/lib/";
13 use WebPAC::Input::ISI;
14
15 $WebPAC::Input::ISI::subfields = undef; # disable parsing of subfields
16
17 sub new {
18 my $class = shift;
19 my $args = {@_};
20 my $self = bless $args, $class;
21 my $limit = delete $args->{limit} || die "no shard size limit?";
22
23 duration 'sharding',dump $args;
24
25 $self->{input} = WebPAC::Input::ISI->new( %$args );
26
27 duration "got ", $self->size, " records";
28
29 $self->{pos} = 1;
30 my $offset = 0;
31
32 while ( $offset <= $self->size ) {
33 $self->shard( $offset, $limit );
34 $offset += $limit;
35 }
36
37 return $self;
38 }
39
40 sub input { $_[0]->{input} }
41 sub size { $_[0]->{input}->size }
42
43 sub shard {
44 my ($self,$offset,$limit) = @_;
45
46 my $r_len = length $self->size;
47 my $range = sprintf "%0${r_len}d-%0${r_len}d", $offset, $offset + $limit - 1;
48
49 my $path = "/tmp/sack.shard.$range.$limit";
50
51 if ( -e $path ) {
52 warn "retrive $path ", -s $path, " bytes\n";
53 return retrieve $path;
54 }
55
56 my $pos = $offset;
57 my $data;
58
59 foreach ( 1 .. $limit ) {
60 push @$data, $self->input->fetch_rec( $pos++ );
61 }
62
63 warn "shard $range / ", $#{ $data }, "\n";
64
65 store $data, $path;
66 warn "store $path ", -s $path, " bytes\n";
67
68 duration "shard", $path;
69
70 return $data;
71 }
72
73 1;

  ViewVC Help
Powered by ViewVC 1.1.26