1 |
package Sack::Digest; |
2 |
|
3 |
=head1 NAME |
4 |
|
5 |
Sack::Digest - turn long fields into integers and back |
6 |
|
7 |
=cut |
8 |
|
9 |
use warnings; |
10 |
use strict; |
11 |
|
12 |
use BerkeleyDB; |
13 |
use Digest::MD5 qw(md5); |
14 |
use Data::Dump qw/dump/; |
15 |
|
16 |
|
17 |
our $debug = 0; |
18 |
our $port = '?'; |
19 |
|
20 |
sub new { |
21 |
my $class = shift; |
22 |
my $self = bless {@_}, $class; |
23 |
die "no port" unless defined $self->{port}; |
24 |
$port = $self->{port}; |
25 |
|
26 |
$self->clean if $self->{clean}; |
27 |
|
28 |
my $path = "/dev/shm/sack.$port"; |
29 |
|
30 |
|
31 |
$self->{db_md5_nr} ||= tie my %md5_nr, 'BerkeleyDB::Btree', |
32 |
-Filename => "$path.md5_nr", |
33 |
# -Cachesize => 700_000_000, |
34 |
-Flags => DB_CREATE |
35 |
|| die "$path.md5_nr $!"; |
36 |
|
37 |
$self->{md5_nr} = \%md5_nr; |
38 |
|
39 |
|
40 |
$self->{db_nr_md5} ||= tie my @nr_md5, 'BerkeleyDB::Recno', |
41 |
-Filename => "$path.nr_md5", |
42 |
# -Cachesize => 700_000_000, |
43 |
-Flags => DB_CREATE, |
44 |
|| die "$path.nr_md5 $!"; |
45 |
|
46 |
$self->{nr_md5} = \@nr_md5; |
47 |
|
48 |
|
49 |
$self->{db_md5} ||= tie my %md5, 'BerkeleyDB::Btree', |
50 |
-Filename => "$path.md5", |
51 |
-Flags => DB_CREATE, |
52 |
|| die "$path.md5 $!"; |
53 |
|
54 |
$self->{md5} = \%md5; |
55 |
|
56 |
warn "[$port] BDB open $path\n"; |
57 |
|
58 |
return $self; |
59 |
} |
60 |
|
61 |
|
62 |
sub close { |
63 |
my $self = shift; |
64 |
my $error = 0; |
65 |
foreach ( qw( md5 md5_nr nr_md5 ) ) { |
66 |
my $db = delete $self->{"db_$_"} || next; |
67 |
warn "[$port] close $_\n"; |
68 |
$error += $db->db_close; |
69 |
} |
70 |
return not $error; |
71 |
} |
72 |
|
73 |
|
74 |
sub clean { |
75 |
my $self = shift; |
76 |
$self->close; |
77 |
foreach ( glob "/dev/shm/sack.$port.*" ) { |
78 |
warn "[$port] clean $_ ", -s $_, " bytes\n"; |
79 |
unlink $_ || warn "[$port] ERROR can't remove $_:$!"; |
80 |
} |
81 |
1; |
82 |
} |
83 |
|
84 |
|
85 |
our $seq = 0; |
86 |
|
87 |
sub to_int { |
88 |
my ( $self, $full ) = @_; |
89 |
my $nr; |
90 |
|
91 |
my $md5 = md5 $full; |
92 |
|
93 |
if ( my $nr = $self->{md5_nr}->{ $md5 } ) { |
94 |
return $nr; |
95 |
} else { |
96 |
$seq++; |
97 |
$self->{md5} ->{ $md5 } = $full; |
98 |
$self->{md5_nr}->{ $md5 } = $seq; |
99 |
$self->{nr_md5}->[ $seq ] = $md5; |
100 |
return $seq; |
101 |
} |
102 |
} |
103 |
|
104 |
|
105 |
sub from_int { |
106 |
my ( $self, $d ) = @_; |
107 |
my $v = $self->{nr_md5}->[ $d ]; |
108 |
$v = $self->{md5}->{ $v } if defined $v; |
109 |
# warn "## from_int $d = $v\n"; |
110 |
defined $v ? $v : $d; |
111 |
} |
112 |
|
113 |
sub undigest_out { |
114 |
my ( $self, $out ) = @_; |
115 |
|
116 |
foreach my $k1 ( grep { m/#/ } keys %$out ) { |
117 |
my @k2 = keys %{ $out->{$k1} }; |
118 |
foreach my $k2 ( @k2 ) { |
119 |
my $v = delete $out->{$k1}->{$k2}; |
120 |
# warn "## k2 $k2 = $v"; |
121 |
$out->{$k1}->{ $self->from_int($k2) } = $v; |
122 |
} |
123 |
} |
124 |
|
125 |
return $out; |
126 |
} |
127 |
|
128 |
|
129 |
sub undigest_node_k_v { |
130 |
my ( $self, $node, $k, $v ) = @_; |
131 |
$self->from_int( $v ); |
132 |
} |
133 |
|
134 |
|
135 |
sub sync { |
136 |
my $self = shift; |
137 |
warn "[$port] sync"; |
138 |
|
139 |
return |
140 |
not $self->{db_md5_nr}->db_sync |
141 |
+ $self->{db_nr_md5}->db_sync |
142 |
+ $self->{db_md5}->db_sync |
143 |
; |
144 |
} |
145 |
|
146 |
|
147 |
sub info { |
148 |
my $self = shift; |
149 |
my $info; |
150 |
$info->{$_} = $self->{"db_$_"}->db_stat foreach qw( md5_nr nr_md5 md5 ); |
151 |
warn "[$port] BDB info [$seq] ", dump $info; |
152 |
return $info; |
153 |
} |
154 |
|
155 |
1; |