1 |
package Sack::Shards; |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
|
6 |
use Storable; |
7 |
use Data::Dump qw(dump); |
8 |
|
9 |
use lib '/srv/Sack/lib'; |
10 |
use Sack; |
11 |
|
12 |
use lib "/srv/webpac2/lib/"; |
13 |
use WebPAC::Input::ISI; |
14 |
|
15 |
$WebPAC::Input::ISI::subfields = undef; # disable parsing of subfields |
16 |
|
17 |
sub new { |
18 |
my $class = shift; |
19 |
my $args = {@_}; |
20 |
my $self = bless $args, $class; |
21 |
my $limit = delete $args->{limit} || die "no shard size limit?"; |
22 |
|
23 |
duration 'sharding',dump $args; |
24 |
|
25 |
$self->{input} = WebPAC::Input::ISI->new( %$args ); |
26 |
|
27 |
duration "got ", $self->size, " records"; |
28 |
|
29 |
$self->{pos} = 1; |
30 |
my $offset = 0; |
31 |
|
32 |
while ( $offset <= $self->size ) { |
33 |
$self->shard( $offset, $limit ); |
34 |
$offset += $limit; |
35 |
} |
36 |
|
37 |
return $self; |
38 |
} |
39 |
|
40 |
sub input { $_[0]->{input} } |
41 |
sub size { $_[0]->{input}->size } |
42 |
|
43 |
sub shard { |
44 |
my ($self,$offset,$limit) = @_; |
45 |
|
46 |
my $r_len = length $self->size; |
47 |
my $range = sprintf "%0${r_len}d-%0${r_len}d", $offset, $offset + $limit - 1; |
48 |
|
49 |
my $path = "/tmp/sack.shard.$range.$limit"; |
50 |
|
51 |
if ( -e $path ) { |
52 |
warn "retrive $path ", -s $path, " bytes\n"; |
53 |
return retrieve $path; |
54 |
} |
55 |
|
56 |
my $pos = $offset; |
57 |
my $data; |
58 |
|
59 |
foreach ( 1 .. $limit ) { |
60 |
push @$data, $self->input->fetch_rec( $pos++ ); |
61 |
} |
62 |
|
63 |
warn "shard $range / ", $#{ $data }, "\n"; |
64 |
|
65 |
store $data, $path; |
66 |
warn "store $path ", -s $path, " bytes\n"; |
67 |
|
68 |
duration "shard", $path; |
69 |
|
70 |
return $data; |
71 |
} |
72 |
|
73 |
1; |