1 |
#!/usr/bin/perl -w |
2 |
|
3 |
# Compare data in two different databases |
4 |
# |
5 |
# 2003-08-07 Dobrica Pavlinusic |
6 |
# |
7 |
# somewhat based on ides from |
8 |
# DataDiff 0.1 by Jon D. Frisby, http://www.mrjoy.com |
9 |
|
10 |
use strict; |
11 |
use Getopt::Long; |
12 |
use DBI; |
13 |
use Data::Dumper; |
14 |
use Pg::Scheme; |
15 |
|
16 |
$| = 1; |
17 |
|
18 |
my ($debug,$verbose) = (0,0); |
19 |
my ($help,$masterhost,$masterport,$masteruser,$masterpassword, |
20 |
$slavehost,$slaveport,$slaveuser,$slavepassword, |
21 |
$masterfile, |
22 |
$slavefile, |
23 |
); |
24 |
my $tables; |
25 |
|
26 |
my $result = GetOptions( |
27 |
"debug!" => \$debug, "verbose!" => \$verbose, "help" => \$help, |
28 |
"masterhost=s" => \$masterhost, "masterport=i" => \$masterport, |
29 |
"masteruser=s" => \$masteruser, "masterpassword=s" => \$masterpassword, |
30 |
"masterfile=s" => \$masterfile, |
31 |
"slavehost=s" => \$slavehost, "slaveport=i" => \$slaveport, |
32 |
"slaveuser=s" => \$slaveuser, "slavepassword=s" => \$slavepassword, |
33 |
"slavefile=s" => \$slavefile, |
34 |
"tables=s" => \$tables, |
35 |
); |
36 |
|
37 |
if (defined($help) || (scalar(@ARGV) < 2)) { |
38 |
print "Usage: $0 [options] masterdb slavedb |
39 |
Options: |
40 |
--masterhost=hostname --masterport=port |
41 |
--masteruser=username --masterpassword=string |
42 |
--masterfile=filename |
43 |
--slavehost=hostname --slaveport=port |
44 |
--slaveuser=username --slavepassword=string |
45 |
--slavefile=filename |
46 |
--tables[s]=table[,table...] |
47 |
"; |
48 |
# exit ((scalar(@ARGV) < 2)? 1:0); |
49 |
exit; |
50 |
} |
51 |
|
52 |
my $master = $ARGV[0] || "master"; |
53 |
my $slave = $ARGV[1] || "slave"; |
54 |
|
55 |
my $minfo = "dbname=$master"; |
56 |
$minfo = "$minfo host=$masterhost" if (defined($masterhost)); |
57 |
$minfo = "$minfo port=$masterport" if (defined($masterport)); |
58 |
|
59 |
my $sinfo = "dbname=$slave"; |
60 |
$sinfo = "$sinfo host=$slavehost" if (defined($slavehost)); |
61 |
$sinfo = "$sinfo port=$slaveport" if (defined($slaveport)); |
62 |
|
63 |
print "Master connection is $minfo\n" if ($debug); |
64 |
print "Slave connection is $sinfo\n" if ($debug); |
65 |
|
66 |
my $mdbh = DBI->connect("DBI:Pg:$minfo", $masteruser, $masterpassword, { PrintError => 0 } ); |
67 |
if (! $mdbh) { |
68 |
print "Can't connect to master database $master"; |
69 |
print "on $masterhost" if ($masterhost); |
70 |
print "\n"; |
71 |
exit 1; |
72 |
} |
73 |
my $sdbh = DBI->connect("DBI:Pg:$sinfo", $slaveuser, $slavepassword, { PrintError => 0 }); |
74 |
if (! $sdbh) { |
75 |
print "Can't connect to slave database $slave"; |
76 |
print "on $slavehost" if ($slavehost); |
77 |
print "\n"; |
78 |
exit 1; |
79 |
} |
80 |
|
81 |
my ($diff_shema,$diff_data) = (0,0); |
82 |
|
83 |
my $sql; |
84 |
|
85 |
sub debug_sql { |
86 |
return if (! $debug); |
87 |
my $sql = shift; |
88 |
$sql =~ s/[\n\r]/ /gs; |
89 |
$sql =~ s/\s\s+/ /g; |
90 |
print STDERR "DEBUG: SQL: $sql\n"; |
91 |
} |
92 |
|
93 |
sub debug_row { |
94 |
return if (! $debug); |
95 |
my $row = shift; |
96 |
my @cols = @_; |
97 |
if (! $row) { |
98 |
print STDERR "DEBUG: ROW data is undef!\n"; |
99 |
return; |
100 |
} |
101 |
print STDERR "DEBUG: ROW: [",$#cols+1,"] "; |
102 |
foreach my $col (@cols) { |
103 |
print STDERR "$col:"; |
104 |
if ($row->{$col}) { |
105 |
print $row->{$col}; |
106 |
} else { |
107 |
print "null"; |
108 |
} |
109 |
print " "; |
110 |
} |
111 |
print STDERR "\n"; |
112 |
} |
113 |
|
114 |
sub debug { |
115 |
return if (!$debug); |
116 |
print STDERR "DEBUG: ",@_; |
117 |
} |
118 |
|
119 |
$verbose = 1 if ($debug); |
120 |
|
121 |
# init object for scheme in master database |
122 |
my $mscheme = new Pg::Scheme( 'dbh' => $mdbh, 'DEBUG' => 0 ) || die "can't query master schema"; |
123 |
my $sscheme = new Pg::Scheme( 'dbh' => $sdbh, 'DEBUG' => 0 ) || die "can't query slave schema"; |
124 |
|
125 |
# which tables to compare? |
126 |
|
127 |
my @tables = $mscheme->list_tables($tables); |
128 |
|
129 |
debug "Comparing tables: ".join(", ",@tables)."\n"; |
130 |
|
131 |
# start transaction |
132 |
print "begin work;\n"; |
133 |
|
134 |
# disable active triggers on slave database |
135 |
my @triggers = $sscheme->get_triggers(); |
136 |
|
137 |
foreach my $tr (@triggers) { |
138 |
print "update pg_trigger set tgenabled = false where tgname='$tr';\n"; |
139 |
} |
140 |
|
141 |
my $cols; |
142 |
my $diff_total = 0; |
143 |
|
144 |
foreach my $table (@tables) { |
145 |
|
146 |
my $sth; |
147 |
|
148 |
# diff schema |
149 |
|
150 |
# all colums (for insert) |
151 |
my @cols = @{$mscheme->cols($table)}; |
152 |
|
153 |
# colums compared by a=b |
154 |
my @cols_notnull = @{$mscheme->cols_notnull($table)}; |
155 |
|
156 |
# colums compared by a=b or a is null and b is null |
157 |
my @cols_null = @{$mscheme->cols_null($table)}; |
158 |
|
159 |
# primary key columns |
160 |
my @cols_pk = @{$mscheme->cols_pk($table)}; |
161 |
|
162 |
# columns to compare (not in primary key) |
163 |
my @cols_cmp = @{$mscheme->cols_notpk($table)}; |
164 |
|
165 |
my @cols_skip; # skipped columns |
166 |
my @cols_test; # all colums to test (without skipped) |
167 |
|
168 |
foreach my $row (@{$mscheme->pg_attribute($table)}) { |
169 |
# attname | format_type | attnotnull | atthasdef | attnum |
170 |
|
171 |
# FIXME: do something with attributes which shouldn't be compared |
172 |
# (date, time, datetime, timestamp) |
173 |
if ($row->{format_type} =~ /(date)|(time)/i) { |
174 |
push @cols_skip,$row->{attname}; |
175 |
} else { |
176 |
push @cols_test,$row->{attname}; |
177 |
} |
178 |
|
179 |
} |
180 |
|
181 |
if ($debug) { |
182 |
print STDERR "DEBUG: table $table not null cols: (",join(", ",@cols_notnull),")"; |
183 |
print STDERR " - null cols: (",join(", ",@cols_null),")" if (@cols_null); |
184 |
print STDERR " - skip cols: (",join(", ",@cols_skip),")" if (@cols_skip); |
185 |
print STDERR "\n"; |
186 |
} |
187 |
|
188 |
# diff data |
189 |
|
190 |
if (! @cols_pk) { |
191 |
print STDERR "can't find PK rows for table '$table' using all\n"; |
192 |
@cols_pk = @cols; |
193 |
} |
194 |
|
195 |
|
196 |
if ($verbose) { |
197 |
print "table '$table' using for key: (",join(", ",@cols_pk),") to compare cols: (",join(", ",@cols_cmp),")\n"; |
198 |
} |
199 |
|
200 |
# diff data |
201 |
|
202 |
my $msql = "select ".join(",",@cols)." from $table"; |
203 |
my $ssql = $msql; |
204 |
|
205 |
sub sql_where { |
206 |
my @cols = @_; |
207 |
my $and = ""; |
208 |
my $where = " where "; |
209 |
foreach my $col (@cols) { |
210 |
$where .= "$and$col=?"; |
211 |
$and = " and "; |
212 |
} |
213 |
return $where; |
214 |
} |
215 |
|
216 |
sub sql_order { |
217 |
my @cols = @_; |
218 |
my $order = " order by "; |
219 |
my $comma = ""; |
220 |
foreach my $col (@cols) { |
221 |
$order .= "$comma$col asc"; |
222 |
$comma = ", "; |
223 |
} |
224 |
return $order; |
225 |
} |
226 |
|
227 |
my $order = sql_order(@cols_pk); |
228 |
$msql .= $order; |
229 |
$ssql .= $order; |
230 |
|
231 |
debug_sql($msql); |
232 |
|
233 |
my $msth = $mdbh->prepare($msql) || die; |
234 |
$msth->execute() || die; |
235 |
|
236 |
my $ssth = $sdbh->prepare($ssql) || die; |
237 |
$ssth->execute() || die; |
238 |
|
239 |
my $diff_row = 0; |
240 |
|
241 |
my ($mrow,$srow); |
242 |
# have_* |
243 |
use constant NO_ROW => 0; |
244 |
use constant FETCH_ROW => 1; |
245 |
use constant HAVE_ROW => 2; |
246 |
my ($have_mrow,$have_srow) = (FETCH_ROW,FETCH_ROW); |
247 |
|
248 |
while ($have_mrow != NO_ROW || $have_srow != NO_ROW) { |
249 |
|
250 |
debug "have mrow: $have_mrow srow: $have_srow\n"; |
251 |
|
252 |
sub pk_val { |
253 |
my $row = shift || die "need row"; |
254 |
my @cols = shift || die "need cols"; |
255 |
my @val; |
256 |
foreach my $col (@cols) { |
257 |
push @val,$row->{$col}; |
258 |
} |
259 |
return @val; |
260 |
} |
261 |
|
262 |
# fetch row from master |
263 |
if ($have_mrow == FETCH_ROW) { |
264 |
debug "fetch row from master: $msql\n"; |
265 |
$mrow = $msth->fetchrow_hashref(); |
266 |
debug_row($mrow,@cols); |
267 |
|
268 |
if ($mrow) { |
269 |
# fill-in primary key values |
270 |
$have_mrow = HAVE_ROW; |
271 |
} else { |
272 |
$have_mrow = NO_ROW; |
273 |
} |
274 |
} |
275 |
|
276 |
# fetch row from slave |
277 |
if ($have_srow == FETCH_ROW) { |
278 |
debug "fetch row from slave: $ssql\n"; |
279 |
$srow = $ssth->fetchrow_hashref(); |
280 |
debug_row($srow,@cols); |
281 |
if ($srow) { |
282 |
$have_srow = HAVE_ROW; |
283 |
} else { |
284 |
$have_srow = NO_ROW; |
285 |
} |
286 |
} |
287 |
|
288 |
debug "have mrow: $have_mrow srow: $have_srow\n"; |
289 |
|
290 |
# insert into slave database |
291 |
sub sql_insert { |
292 |
my $dbh = shift @_ || die "need dbh"; |
293 |
my $table = shift @_ || die "need table as argument"; |
294 |
my $row = shift @_ || die "need row data"; |
295 |
my @cols = @_; |
296 |
|
297 |
my $sql = "insert into $table (".join(",",@cols).") values ("; |
298 |
my $comma = ""; |
299 |
foreach my $col (@cols) { |
300 |
$sql .= $comma.$dbh->quote($row->{$col}); |
301 |
$comma = ","; |
302 |
} |
303 |
$sql.=")"; |
304 |
debug_sql($sql); |
305 |
return $sql; |
306 |
} |
307 |
|
308 |
# delete from slave database |
309 |
sub sql_delete { |
310 |
my $dbh = shift @_ || die "need dbh"; |
311 |
my $table = shift @_ || die "need table as argument"; |
312 |
my $row = shift @_ || die "need row as argument"; |
313 |
my @cols_pk = @_; |
314 |
|
315 |
my $where = sql_where(@cols_pk); |
316 |
|
317 |
my $sql = "delete from $table"; |
318 |
foreach my $col (@cols_pk) { |
319 |
my $val = $dbh->quote($row->{$col}) || die "can't find value in row for col $col"; |
320 |
$where =~ s/\?/$val/; |
321 |
} |
322 |
$sql .= $where; |
323 |
debug_sql($sql); |
324 |
return $sql; |
325 |
} |
326 |
|
327 |
# update row in slave database |
328 |
sub sql_update { |
329 |
my $dbh = shift @_ || die "need dbh"; |
330 |
my $table = shift @_ || die "need table as argument"; |
331 |
my $col = shift @_ || die "need col to update"; |
332 |
my $row = shift @_ || die "need row"; |
333 |
my @cols_pk = @_; |
334 |
|
335 |
my $sql = "update $table set $col=".$dbh->quote($row->{$col}); |
336 |
my $where = sql_where(@cols_pk); |
337 |
foreach my $col (@cols_pk) { |
338 |
my $val = $dbh->quote($row->{$col}) || die "can't find value in row for col $col"; |
339 |
$where =~ s/\?/$val/; |
340 |
} |
341 |
$sql .= $where; |
342 |
debug_sql($sql); |
343 |
return $sql; |
344 |
} |
345 |
# master slave |
346 |
# 1 = 1 test |
347 |
# 1 < 2 insert mrow |
348 |
# 2 > 1 delete srow |
349 |
# 1 = undef insert mrow |
350 |
# undef = 1 delete srow |
351 |
|
352 |
my $pk_same = 1; |
353 |
|
354 |
# check key cols for row |
355 |
foreach my $col (@cols_pk) { |
356 |
if ( ($have_mrow == NO_ROW && $have_srow == HAVE_ROW) || |
357 |
($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} gt $srow->{$col}) ) { |
358 |
$diff_row++; |
359 |
$pk_same = 0; |
360 |
print STDERR "EXTRA row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($srow,@cols_pk)).")\n" if ($verbose); |
361 |
print sql_delete($sdbh,$table,$srow,@cols_pk),";\n"; |
362 |
$have_srow = FETCH_ROW; |
363 |
last; |
364 |
} elsif ( ($have_mrow == HAVE_ROW && $have_srow == NO_ROW) || |
365 |
($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} lt $srow->{$col}) ) { |
366 |
$diff_row++; |
367 |
$pk_same = 0; |
368 |
print STDERR "MISSING row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($mrow,@cols_pk)).")\n" if ($verbose); |
369 |
print sql_insert($mdbh,$table,$mrow,@cols),";\n"; |
370 |
$have_mrow = FETCH_ROW; |
371 |
last; |
372 |
} |
373 |
} |
374 |
|
375 |
if ($pk_same && $have_mrow == HAVE_ROW && $have_srow == HAVE_ROW) { |
376 |
# check non-key cols for row |
377 |
foreach my $col (@cols_cmp) { |
378 |
if ($mrow->{$col} ne $srow->{$col}) { |
379 |
$diff_row++; |
380 |
print STDERR "DIFF in table '$table' row ($col): [".join(",",@cols_pk)."] '$mrow->{$col}' != '$srow->{$col}'\n" if ($verbose); |
381 |
print sql_update($mdbh,$table,$col,$mrow,@cols_pk),";\n"; |
382 |
} |
383 |
} |
384 |
$have_mrow = FETCH_ROW; |
385 |
$have_srow = FETCH_ROW; |
386 |
} |
387 |
} |
388 |
|
389 |
print STDERR "$diff_row differences in table $table\n" if ($verbose && $diff_row > 0); |
390 |
$diff_total += $diff_row; |
391 |
} |
392 |
|
393 |
if ($verbose) { |
394 |
if ($diff_total == 0) { |
395 |
print STDERR "databases are same"; |
396 |
} elsif ($diff_total > 0) { |
397 |
print STDERR "$diff_total differences in all tables\n"; |
398 |
} else { |
399 |
die "this shouldn't happend. please report a bug!"; |
400 |
} |
401 |
} |
402 |
|
403 |
# enable triggers again on slave |
404 |
foreach my $tr (@triggers) { |
405 |
print "update pg_trigger set tgenabled = true where tgname='$tr';\n"; |
406 |
} |
407 |
# end transaction |
408 |
print "commit;\n"; |
409 |
|
410 |
$mdbh->disconnect(); |
411 |
$sdbh->disconnect(); |