1 |
#!/usr/bin/perl -w |
2 |
|
3 |
# Compare data in two different databases |
4 |
# |
5 |
# 2003-08-07 Dobrica Pavlinusic |
6 |
# |
7 |
# somewhat based on ides from |
8 |
# DataDiff 0.1 by Jon D. Frisby, http://www.mrjoy.com |
9 |
|
10 |
use strict; |
11 |
use Getopt::Long; |
12 |
use DBI; |
13 |
use Data::Dumper; |
14 |
use Pg::Scheme; |
15 |
|
16 |
$| = 1; |
17 |
|
18 |
my ($debug,$verbose) = (0,0); |
19 |
my ($help,$masterhost,$masterport,$masteruser,$masterpassword, |
20 |
$slavehost,$slaveport,$slaveuser,$slavepassword, |
21 |
$masterfile, |
22 |
$slavefile, |
23 |
); |
24 |
my $tables; |
25 |
|
26 |
my $result = GetOptions( |
27 |
"debug!" => \$debug, "verbose!" => \$verbose, "help" => \$help, |
28 |
"masterhost=s" => \$masterhost, "masterport=i" => \$masterport, |
29 |
"masteruser=s" => \$masteruser, "masterpassword=s" => \$masterpassword, |
30 |
"masterfile=s" => \$masterfile, |
31 |
"slavehost=s" => \$slavehost, "slaveport=i" => \$slaveport, |
32 |
"slaveuser=s" => \$slaveuser, "slavepassword=s" => \$slavepassword, |
33 |
"slavefile=s" => \$slavefile, |
34 |
"tables=s" => \$tables, |
35 |
); |
36 |
|
37 |
if (defined($help) || (scalar(@ARGV) < 2)) { |
38 |
print "Usage: $0 [options] masterdb slavedb |
39 |
Options: |
40 |
--masterhost=hostname --masterport=port |
41 |
--masteruser=username --masterpassword=string |
42 |
--masterfile=filename |
43 |
--slavehost=hostname --slaveport=port |
44 |
--slaveuser=username --slavepassword=string |
45 |
--slavefile=filename |
46 |
--tables[s]=table[,table...] |
47 |
"; |
48 |
# exit ((scalar(@ARGV) < 2)? 1:0); |
49 |
exit; |
50 |
} |
51 |
|
52 |
my $master = $ARGV[0] || "master"; |
53 |
my $slave = $ARGV[1] || "slave"; |
54 |
|
55 |
my $minfo = "dbname=$master"; |
56 |
$minfo = "$minfo host=$masterhost" if (defined($masterhost)); |
57 |
$minfo = "$minfo port=$masterport" if (defined($masterport)); |
58 |
|
59 |
my $sinfo = "dbname=$slave"; |
60 |
$sinfo = "$sinfo host=$slavehost" if (defined($slavehost)); |
61 |
$sinfo = "$sinfo port=$slaveport" if (defined($slaveport)); |
62 |
|
63 |
print "Master connection is $minfo\n" if ($debug); |
64 |
print "Slave connection is $sinfo\n" if ($debug); |
65 |
|
66 |
my $mdbh = DBI->connect("DBI:Pg:$minfo", $masteruser, $masterpassword, { PrintError => 1 }); |
67 |
my $sdbh = DBI->connect("DBI:Pg:$sinfo", $slaveuser, $slavepassword, { PrintError => 1 }); |
68 |
|
69 |
my ($diff_shema,$diff_data) = (0,0); |
70 |
|
71 |
my $sql; |
72 |
|
73 |
sub debug_sql { |
74 |
return if (! $debug); |
75 |
my $sql = shift; |
76 |
$sql =~ s/[\n\r]/ /gs; |
77 |
$sql =~ s/\s\s+/ /g; |
78 |
print STDERR "DEBUG: SQL: $sql\n"; |
79 |
} |
80 |
|
81 |
sub debug_row { |
82 |
return if (! $debug); |
83 |
my $row = shift; |
84 |
my @cols = @_; |
85 |
if (! $row) { |
86 |
print STDERR "DEBUG: ROW data is undef!\n"; |
87 |
return; |
88 |
} |
89 |
print STDERR "DEBUG: ROW: [",$#cols+1,"] "; |
90 |
foreach my $col (@cols) { |
91 |
print STDERR "$col:"; |
92 |
if ($row->{$col}) { |
93 |
print $row->{$col}; |
94 |
} else { |
95 |
print "null"; |
96 |
} |
97 |
print " "; |
98 |
} |
99 |
print STDERR "\n"; |
100 |
} |
101 |
|
102 |
sub debug { |
103 |
return if (!$debug); |
104 |
print STDERR "DEBUG: ",@_; |
105 |
} |
106 |
|
107 |
$verbose = 1 if ($debug); |
108 |
|
109 |
# init object for scheme in master database |
110 |
my $mscheme = new Pg::Scheme( 'dbh' => $mdbh ) || die "can't query schema"; |
111 |
|
112 |
# which tables to compare? |
113 |
|
114 |
my @tables = $mscheme->list_tables($tables); |
115 |
|
116 |
debug "Comparing tables: ".join(", ",@tables)."\n"; |
117 |
|
118 |
my $cols; |
119 |
my $diff_total = 0; |
120 |
|
121 |
foreach my $table (@tables) { |
122 |
|
123 |
my $sth; |
124 |
|
125 |
# diff schema |
126 |
|
127 |
my @cols_notnull;# colums compared by a=b |
128 |
my @cols_null; # colums compared by a=b or a is null and b is null |
129 |
my @cols_skip; # skipped columns |
130 |
my @cols_test; # all colums to test (without skipped) |
131 |
my @cols; # all colums (for insert) |
132 |
|
133 |
|
134 |
foreach my $row ($mscheme->explain_table($table)) { |
135 |
# attname | format_type | attnotnull | atthasdef | attnum |
136 |
|
137 |
push @cols,$row->{attname}; |
138 |
|
139 |
# FIXME: do something with attributes which shouldn't be compared |
140 |
# (date, time, datetime, timestamp) |
141 |
if ($row->{format_type} =~ /(date)|(time)/i) { |
142 |
push @cols_skip,$row->{attname}; |
143 |
next; |
144 |
} |
145 |
|
146 |
push @cols_test,$row->{attname}; |
147 |
|
148 |
if ($row->{attnotnull}) { |
149 |
push @cols_notnull,$row->{attname}; |
150 |
} else { |
151 |
push @cols_null,$row->{attname}; |
152 |
} |
153 |
} |
154 |
|
155 |
if ($debug) { |
156 |
print STDERR "DEBUG: table $table not null cols: (",join(", ",@cols_notnull),")"; |
157 |
print STDERR " - null cols: (",join(", ",@cols_null),")" if (@cols_null); |
158 |
print STDERR " - skip cols: (",join(", ",@cols_skip),")" if (@cols_skip); |
159 |
print STDERR "\n"; |
160 |
} |
161 |
|
162 |
# diff data |
163 |
|
164 |
my @cols_pk; # columns which are primary key |
165 |
my %in_pk; |
166 |
|
167 |
$sql=" |
168 |
SELECT |
169 |
i.indexrelid as indexrelid, i.indrelid as indrelid, |
170 |
count(a.attname) as cols_in_pk |
171 |
FROM |
172 |
pg_catalog.pg_class c, |
173 |
pg_catalog.pg_index i, |
174 |
pg_catalog.pg_attribute a |
175 |
WHERE |
176 |
c.oid = i.indrelid |
177 |
and i.indisunique |
178 |
and c.relname = '$table' |
179 |
and a.attrelid = i.indexrelid |
180 |
GROUP BY |
181 |
i.indexrelid, i.indrelid, c.relname, i.indisprimary, i.indisunique |
182 |
ORDER BY |
183 |
cols_in_pk ASC, i.indisprimary DESC, i.indisunique DESC, c.relname DESC |
184 |
"; |
185 |
debug_sql($sql); |
186 |
$sth = $mdbh->prepare($sql); |
187 |
$sth->execute() || die; |
188 |
my $row = $sth->fetchrow_hashref(); |
189 |
if ($row) { |
190 |
$sql=" |
191 |
select a1.attname as attname from pg_attribute a1, pg_attribute a2 where a1.attrelid = ".$row->{indexrelid}." and a2.attrelid=".$row->{indrelid}." and a1.attname = a2.attname and a2.attnotnull"; |
192 |
|
193 |
debug_sql($sql); |
194 |
my $sth2 = $mdbh->prepare($sql); |
195 |
$sth2->execute() || die; |
196 |
@cols_pk = (); |
197 |
while (my $row2 = $sth2->fetchrow_hashref()) { |
198 |
push @cols_pk,$row2->{attname}; |
199 |
$in_pk{$row2->{attname}}++; |
200 |
} |
201 |
|
202 |
} |
203 |
if (! @cols_pk) { |
204 |
print STDERR "can't find PK rows for table '$table' using all\n"; |
205 |
@cols_pk = @cols; |
206 |
} |
207 |
|
208 |
my @cols_cmp; # columns to compare |
209 |
|
210 |
foreach my $col (@cols_test) { |
211 |
push @cols_cmp,$col if (! $in_pk{$col}); |
212 |
} |
213 |
|
214 |
if ($verbose) { |
215 |
print "table '$table' using for key: (",join(", ",@cols_pk),") to compare cols: (",join(", ",@cols_cmp),")\n"; |
216 |
} |
217 |
|
218 |
# diff data |
219 |
|
220 |
my $msql = "select ".join(",",@cols)." from $table"; |
221 |
my $ssql = $msql; |
222 |
|
223 |
sub sql_where { |
224 |
my @cols = @_; |
225 |
my $and = ""; |
226 |
my $where = " where "; |
227 |
foreach my $col (@cols) { |
228 |
$where .= "$and$col=?"; |
229 |
$and = " and "; |
230 |
} |
231 |
return $where; |
232 |
} |
233 |
|
234 |
sub sql_order { |
235 |
my @cols = @_; |
236 |
my $order = " order by "; |
237 |
my $comma = ""; |
238 |
foreach my $col (@cols) { |
239 |
$order .= "$comma$col asc"; |
240 |
$comma = ", "; |
241 |
} |
242 |
return $order; |
243 |
} |
244 |
|
245 |
my $order = sql_order(@cols_pk); |
246 |
$msql .= $order; |
247 |
$ssql .= $order; |
248 |
|
249 |
debug_sql($msql); |
250 |
|
251 |
my $msth = $mdbh->prepare($msql) || die; |
252 |
$msth->execute() || die; |
253 |
|
254 |
my $ssth = $sdbh->prepare($ssql) || die; |
255 |
$ssth->execute() || die; |
256 |
|
257 |
my $diff_row = 0; |
258 |
|
259 |
my ($mrow,$srow); |
260 |
# have_* |
261 |
use constant NO_ROW => 0; |
262 |
use constant FETCH_ROW => 1; |
263 |
use constant HAVE_ROW => 2; |
264 |
my ($have_mrow,$have_srow) = (FETCH_ROW,FETCH_ROW); |
265 |
|
266 |
while ($have_mrow != NO_ROW || $have_srow != NO_ROW) { |
267 |
|
268 |
debug "have mrow: $have_mrow srow: $have_srow\n"; |
269 |
|
270 |
sub pk_val { |
271 |
my $row = shift || die "need row"; |
272 |
my @cols = shift || die "need cols"; |
273 |
my @val; |
274 |
foreach my $col (@cols) { |
275 |
push @val,$row->{$col}; |
276 |
} |
277 |
return @val; |
278 |
} |
279 |
|
280 |
# fetch row from master |
281 |
if ($have_mrow == FETCH_ROW) { |
282 |
debug "fetch row from master: $msql\n"; |
283 |
$mrow = $msth->fetchrow_hashref(); |
284 |
debug_row($mrow,@cols); |
285 |
|
286 |
if ($mrow) { |
287 |
# fill-in primary key values |
288 |
$have_mrow = HAVE_ROW; |
289 |
} else { |
290 |
$have_mrow = NO_ROW; |
291 |
} |
292 |
} |
293 |
|
294 |
# fetch row from slave |
295 |
if ($have_srow == FETCH_ROW) { |
296 |
debug "fetch row from slave: $ssql\n"; |
297 |
$srow = $ssth->fetchrow_hashref(); |
298 |
debug_row($srow,@cols); |
299 |
if ($srow) { |
300 |
$have_srow = HAVE_ROW; |
301 |
} else { |
302 |
$have_srow = NO_ROW; |
303 |
} |
304 |
} |
305 |
|
306 |
debug "have mrow: $have_mrow srow: $have_srow\n"; |
307 |
|
308 |
# insert into slave database |
309 |
sub sql_insert { |
310 |
my $table = shift @_ || die "need table as argument"; |
311 |
my $row = shift @_ || die "need row data"; |
312 |
my @cols = @_; |
313 |
|
314 |
my $sql = "insert into $table (".join(",",@cols).") values ("; |
315 |
my $comma = ""; |
316 |
foreach my $col (@cols) { |
317 |
$sql .= $comma.$mdbh->quote($row->{$col}); |
318 |
$comma = ","; |
319 |
} |
320 |
$sql.=")"; |
321 |
debug_sql($sql); |
322 |
return $sql; |
323 |
} |
324 |
|
325 |
# delete from slave database |
326 |
sub sql_delete { |
327 |
my $table = shift @_ || die "need table as argument"; |
328 |
my $row = shift @_ || die "need row as argument"; |
329 |
my @cols_pk = @_; |
330 |
|
331 |
my $where = sql_where(@cols_pk); |
332 |
|
333 |
my $sql = "delete from $table "; |
334 |
foreach my $col (@cols_pk) { |
335 |
my $val = $sdbh->quote($row->{$col}) || die "can't find value in row for col $col"; |
336 |
$where =~ s/\?/$val/; |
337 |
} |
338 |
$sql .= $where; |
339 |
debug_sql($sql); |
340 |
return $sql; |
341 |
} |
342 |
|
343 |
# update row in slave database |
344 |
sub sql_update { |
345 |
my $table = shift @_ || die "need table as argument"; |
346 |
my $col = shift @_ || die "need col to update"; |
347 |
my $val = shift @_ || die "need new val"; |
348 |
my @cols_pk = @_ || die "need pk idenitifier"; |
349 |
|
350 |
my $sql = "udate $table set $col=".$mdbh->quote($val); |
351 |
debug_sql($sql); |
352 |
return $sql; |
353 |
} |
354 |
# master slave |
355 |
# 1 = 1 test |
356 |
# 1 < 2 insert mrow |
357 |
# 2 > 1 delete srow |
358 |
# 1 = undef insert mrow |
359 |
# undef = 1 delete srow |
360 |
|
361 |
my $pk_same = 1; |
362 |
|
363 |
# check key cols for row |
364 |
foreach my $col (@cols_pk) { |
365 |
if ( ($have_mrow == NO_ROW && $have_srow == HAVE_ROW) || |
366 |
($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} gt $srow->{$col}) ) { |
367 |
$diff_row++; |
368 |
$pk_same = 0; |
369 |
print STDERR "EXTRA row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($srow,@cols_pk)).")\n" if ($verbose); |
370 |
print sql_delete($table,$srow,@cols_pk),";\n"; |
371 |
$have_srow = FETCH_ROW; |
372 |
last; |
373 |
} elsif ( ($have_mrow == HAVE_ROW && $have_srow == NO_ROW) || |
374 |
($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} lt $srow->{$col}) ) { |
375 |
$diff_row++; |
376 |
$pk_same = 0; |
377 |
print STDERR "MISSING row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($mrow,@cols_pk)).")\n" if ($verbose); |
378 |
print sql_insert($table,$mrow,@cols),";\n"; |
379 |
$have_mrow = FETCH_ROW; |
380 |
last; |
381 |
} |
382 |
} |
383 |
|
384 |
if ($pk_same && $have_mrow == HAVE_ROW && $have_srow == HAVE_ROW) { |
385 |
# check non-key cols for row |
386 |
foreach my $col (@cols_cmp) { |
387 |
if ($mrow->{$col} ne $srow->{$col}) { |
388 |
$diff_row++; |
389 |
print STDERR "DIFF in table '$table' row ($col): [".join(",",@cols_pk)."] '$mrow->{$col}' != '$srow->{$col}'\n" if ($verbose); |
390 |
print sql_update($table,$col,$mrow->{$col},@cols_pk),";\n"; |
391 |
} |
392 |
} |
393 |
$have_mrow = FETCH_ROW; |
394 |
$have_srow = FETCH_ROW; |
395 |
} |
396 |
} |
397 |
|
398 |
print STDERR "$diff_row differences in table $table\n" if ($verbose && $diff_row > 0); |
399 |
$diff_total += $diff_row; |
400 |
} |
401 |
|
402 |
if ($verbose) { |
403 |
if ($diff_total == 0) { |
404 |
print STDERR "databases are same"; |
405 |
} elsif ($diff_total > 0) { |
406 |
print STDERR "$diff_total differences in all tables\n"; |
407 |
} else { |
408 |
die "this shouldn't happend. please report a bug!"; |
409 |
} |
410 |
} |
411 |
|
412 |
$mdbh->disconnect(); |
413 |
$sdbh->disconnect(); |