1 |
package Sack::Merge; |
2 |
|
3 |
use warnings; |
4 |
use strict; |
5 |
|
6 |
use Digest::MD5 qw(md5); |
7 |
use Time::HiRes qw(time); |
8 |
use Data::Dump qw(dump); |
9 |
|
10 |
our $out; |
11 |
sub out { $out } |
12 |
|
13 |
our $nr = 0; |
14 |
our $md5_nr; |
15 |
our $digest_fh; |
16 |
our @digest_offset; |
17 |
|
18 |
sub clean { |
19 |
undef $out; |
20 |
} |
21 |
|
22 |
sub add { |
23 |
my ( $self, $new ) = @_; |
24 |
|
25 |
my $t_merge = time(); |
26 |
|
27 |
my $tick = 0; |
28 |
|
29 |
my $missing; |
30 |
|
31 |
foreach my $k1 ( keys %$new ) { |
32 |
|
33 |
foreach my $k2 ( keys %{ $new->{$k1} } ) { |
34 |
|
35 |
my $n = delete $new->{$k1}->{$k2}; |
36 |
|
37 |
if ( $k1 =~ m{#} ) { |
38 |
my $md5 = md5 $k2; |
39 |
if ( defined $md5_nr->{$md5} ) { |
40 |
$k2 = $md5_nr->{$md5}; |
41 |
} else { |
42 |
open( $digest_fh, '>', '/tmp/sack.digest' ) unless $digest_fh; |
43 |
$digest_offset[ $nr ] = tell( $digest_fh ); |
44 |
print $digest_fh "$k2\n"; |
45 |
|
46 |
$k2 = $md5_nr->{$md5} = $nr; |
47 |
$nr++; |
48 |
} |
49 |
} |
50 |
|
51 |
my $ref = ref $out->{$k1}->{$k2}; |
52 |
|
53 |
if ( ! defined $out->{$k1}->{$k2} ) { |
54 |
$out->{$k1}->{$k2} = $n; |
55 |
} elsif ( $k1 =~ m{\+} ) { |
56 |
# warn "## agregate $k1 $k2"; |
57 |
$out->{$k1}->{$k2} += $n; |
58 |
} elsif ( $ref eq 'ARRAY' ) { |
59 |
if ( ref $n eq 'ARRAY' ) { |
60 |
push @{ $out->{$k1}->{$k2} }, $_ foreach @$n; |
61 |
} else { |
62 |
push @{ $out->{$k1}->{$k2} }, $n; |
63 |
} |
64 |
} elsif ( $ref eq '' ) { |
65 |
$out->{$k1}->{$k2} = [ $out->{$k1}->{$k2}, $n ]; |
66 |
} else { |
67 |
die "can't merge $k2 [$ref] from ",dump($n), " into ", dump($out->{$k1}->{$k2}); |
68 |
} |
69 |
|
70 |
if ( $tick++ % 1000 == 0 ) { |
71 |
print STDERR "."; |
72 |
} elsif ( $tick % 10000 == 0 ) { |
73 |
print STDERR $tick; |
74 |
} |
75 |
} |
76 |
} |
77 |
|
78 |
$t_merge = time - $t_merge; |
79 |
warn sprintf "\nmerged %d in %.4fs\n", $tick, $t_merge; |
80 |
|
81 |
return $tick; |
82 |
} |
83 |
|
84 |
1; |