1 |
#!/usr/bin/perl -w |
2 |
|
3 |
# Compare data in two different databases |
4 |
# |
5 |
# 2003-08-07 Dobrica Pavlinusic |
6 |
# |
7 |
# somewhat based on ides from |
8 |
# DataDiff 0.1 by Jon D. Frisby, http://www.mrjoy.com |
9 |
|
10 |
use strict; |
11 |
use Getopt::Long; |
12 |
use DBI; |
13 |
use Data::Dumper; |
14 |
|
15 |
$| = 1; |
16 |
|
17 |
my ($debug,$verbose) = (0,0); |
18 |
my ($help,$masterhost,$masterport,$masteruser,$masterpassword, |
19 |
$slavehost,$slaveport,$slaveuser,$slavepassword, |
20 |
$masterfile, |
21 |
$slavefile, |
22 |
); |
23 |
my $tables; |
24 |
|
25 |
my $result = GetOptions( |
26 |
"debug!" => \$debug, "verbose!" => \$verbose, "help" => \$help, |
27 |
"masterhost=s" => \$masterhost, "masterport=i" => \$masterport, |
28 |
"masteruser=s" => \$masteruser, "masterpassword=s" => \$masterpassword, |
29 |
"masterfile=s" => \$masterfile, |
30 |
"slavehost=s" => \$slavehost, "slaveport=i" => \$slaveport, |
31 |
"slaveuser=s" => \$slaveuser, "slavepassword=s" => \$slavepassword, |
32 |
"slavefile=s" => \$slavefile, |
33 |
"tables=s" => \$tables, |
34 |
); |
35 |
|
36 |
if (defined($help) || (scalar(@ARGV) < 2)) { |
37 |
print "Usage: $0 [options] masterdb slavedb |
38 |
Options: |
39 |
--masterhost=hostname --masterport=port |
40 |
--masteruser=username --masterpassword=string |
41 |
--masterfile=filename |
42 |
--slavehost=hostname --slaveport=port |
43 |
--slaveuser=username --slavepassword=string |
44 |
--slavefile=filename |
45 |
--tables[s]=table[,table...] |
46 |
"; |
47 |
# exit ((scalar(@ARGV) < 2)? 1:0); |
48 |
exit; |
49 |
} |
50 |
|
51 |
my $master = $ARGV[0] || "master"; |
52 |
my $slave = $ARGV[1] || "slave"; |
53 |
|
54 |
my $minfo = "dbname=$master"; |
55 |
$minfo = "$minfo host=$masterhost" if (defined($masterhost)); |
56 |
$minfo = "$minfo port=$masterport" if (defined($masterport)); |
57 |
|
58 |
my $sinfo = "dbname=$slave"; |
59 |
$sinfo = "$sinfo host=$slavehost" if (defined($slavehost)); |
60 |
$sinfo = "$sinfo port=$slaveport" if (defined($slaveport)); |
61 |
|
62 |
print "Master connection is $minfo\n" if ($debug); |
63 |
print "Slave connection is $sinfo\n" if ($debug); |
64 |
|
65 |
my $mdbh = DBI->connect("DBI:Pg:$minfo", $masteruser, $masterpassword, { PrintError => 1 }); |
66 |
my $sdbh = DBI->connect("DBI:Pg:$sinfo", $slaveuser, $slavepassword, { PrintError => 1 }); |
67 |
|
68 |
my ($diff_shema,$diff_data) = (0,0); |
69 |
|
70 |
my $sql; |
71 |
|
72 |
sub debug_sql { |
73 |
return if (! $debug); |
74 |
my $sql = shift; |
75 |
$sql =~ s/[\n\r]/ /gs; |
76 |
$sql =~ s/\s\s+/ /g; |
77 |
print STDERR "SQL: $sql\n"; |
78 |
} |
79 |
|
80 |
sub debug_row { |
81 |
return if (! $debug); |
82 |
my $row = shift; |
83 |
my @cols = @_; |
84 |
if (! $row) { |
85 |
print STDERR "ROW data is undef!\n"; |
86 |
return; |
87 |
} |
88 |
print STDERR "ROW: [",$#cols+1,"] "; |
89 |
foreach my $col (@cols) { |
90 |
print STDERR "$col:"; |
91 |
if ($row->{$col}) { |
92 |
print $row->{$col}; |
93 |
} else { |
94 |
print "null"; |
95 |
} |
96 |
print " "; |
97 |
} |
98 |
print STDERR "\n"; |
99 |
} |
100 |
|
101 |
$verbose = 1 if ($debug); |
102 |
|
103 |
# which tables to compare? |
104 |
|
105 |
my @tables; |
106 |
if ($tables) { |
107 |
@tables = split(/,/,$tables); |
108 |
} else { |
109 |
# take all tables |
110 |
#$sql="select tablename from pg_tables where tablename not like 'pg_%' and tablename not like '_rserv_%'"; |
111 |
# show tables (based on psql \dt) |
112 |
$sql = " |
113 |
SELECT c.relname as table |
114 |
FROM pg_catalog.pg_class c |
115 |
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace |
116 |
WHERE c.relkind = 'r' |
117 |
AND n.nspname NOT IN ('pg_catalog', 'pg_toast') |
118 |
AND pg_catalog.pg_table_is_visible(c.oid) |
119 |
and c.relname not like '_rserv_%' |
120 |
"; |
121 |
my $sth = $mdbh->prepare($sql); |
122 |
$sth->execute() || die; |
123 |
while(my $row = $sth->fetchrow_hashref()) { |
124 |
push @tables,$row->{table}; |
125 |
} |
126 |
} |
127 |
|
128 |
print "Comparing tables: ",join(", ",@tables),"\n" if ($debug); |
129 |
|
130 |
my $cols; |
131 |
my $diff_total = 0; |
132 |
|
133 |
foreach my $table (@tables) { |
134 |
|
135 |
my ($sth); |
136 |
|
137 |
# find table oid |
138 |
$sql = " |
139 |
SELECT c.oid, n.nspname, c.relname |
140 |
FROM pg_catalog.pg_class c |
141 |
LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace |
142 |
WHERE pg_catalog.pg_table_is_visible(c.oid) |
143 |
AND c.relname = '$table' |
144 |
ORDER BY 2, 3 |
145 |
"; |
146 |
|
147 |
$sth = $mdbh->prepare($sql); |
148 |
$sth->execute() || die; |
149 |
my $row = $sth->fetchrow_hashref(); |
150 |
if (! $row) { |
151 |
print STDERR "Can't find OID of table '$table'\n"; |
152 |
exit 1; |
153 |
} |
154 |
|
155 |
my $oid = $row->{oid}; |
156 |
|
157 |
# diff schema |
158 |
|
159 |
my @cols_notnull;# colums compared by a=b |
160 |
my @cols_null; # colums compared by a=b or a is null and b is null |
161 |
my @cols_skip; # skipped columns |
162 |
my @cols_test; # all colums to test (without skipped) |
163 |
my @cols; # all colums (for insert) |
164 |
|
165 |
my $sql=" |
166 |
SELECT a.attname, |
167 |
pg_catalog.format_type(a.atttypid, a.atttypmod), |
168 |
a.attnotnull, a.atthasdef, a.attnum |
169 |
FROM pg_catalog.pg_attribute a |
170 |
WHERE a.attrelid = $oid AND a.attnum > 0 AND NOT a.attisdropped |
171 |
ORDER BY a.attnum |
172 |
"; |
173 |
|
174 |
$sth = $mdbh->prepare($sql); |
175 |
$sth->execute() || die; |
176 |
while(my $row = $sth->fetchrow_hashref()) { |
177 |
# attname | format_type | attnotnull | atthasdef | attnum |
178 |
|
179 |
push @cols,$row->{attname}; |
180 |
|
181 |
# FIXME: do something with attributes which shouldn't be compared |
182 |
# (date, time, datetime, timestamp) |
183 |
if ($row->{format_type} =~ /(date)|(time)/i) { |
184 |
push @cols_skip,$row->{attname}; |
185 |
next; |
186 |
} |
187 |
|
188 |
push @cols_test,$row->{attname}; |
189 |
|
190 |
if ($row->{attnotnull}) { |
191 |
push @cols_notnull,$row->{attname}; |
192 |
} else { |
193 |
push @cols_null,$row->{attname}; |
194 |
} |
195 |
} |
196 |
$sth->finish(); |
197 |
|
198 |
if ($debug) { |
199 |
print STDERR "table $table [$oid] not null: (",join(", ",@cols_notnull),")"; |
200 |
print STDERR " - null: (",join(", ",@cols_null),")" if (@cols_null); |
201 |
print STDERR " - skip: (",join(", ",@cols_skip),")" if (@cols_skip); |
202 |
print STDERR "\n"; |
203 |
} |
204 |
|
205 |
# diff data |
206 |
|
207 |
my @cols_pk; # columns which are primary key |
208 |
my %in_pk; |
209 |
|
210 |
$sql=" |
211 |
SELECT |
212 |
i.indexrelid as indexrelid, i.indrelid as indrelid, |
213 |
count(a.attname) as cols_in_pk |
214 |
FROM |
215 |
pg_catalog.pg_class c, |
216 |
pg_catalog.pg_index i, |
217 |
pg_catalog.pg_attribute a |
218 |
WHERE |
219 |
c.oid = i.indrelid |
220 |
and i.indisunique |
221 |
and c.relname = '$table' |
222 |
and a.attrelid = i.indexrelid |
223 |
GROUP BY |
224 |
i.indexrelid, i.indrelid, c.relname, i.indisprimary, i.indisunique |
225 |
ORDER BY |
226 |
cols_in_pk ASC, i.indisprimary DESC, i.indisunique DESC, c.relname DESC |
227 |
"; |
228 |
debug_sql($sql); |
229 |
$sth = $mdbh->prepare($sql); |
230 |
$sth->execute() || die; |
231 |
$row = $sth->fetchrow_hashref(); |
232 |
if ($row) { |
233 |
$sql=" |
234 |
select a1.attname as attname from pg_attribute a1, pg_attribute a2 where a1.attrelid = ".$row->{indexrelid}." and a2.attrelid=".$row->{indrelid}." and a1.attname = a2.attname and a2.attnotnull"; |
235 |
|
236 |
debug_sql($sql); |
237 |
my $sth2 = $mdbh->prepare($sql); |
238 |
$sth2->execute() || die; |
239 |
@cols_pk = (); |
240 |
while (my $row2 = $sth2->fetchrow_hashref()) { |
241 |
push @cols_pk,$row2->{attname}; |
242 |
$in_pk{$row2->{attname}}++; |
243 |
} |
244 |
|
245 |
} |
246 |
if (! @cols_pk) { |
247 |
print STDERR "can't find PK rows for table '$table' using all\n"; |
248 |
@cols_pk = @cols; |
249 |
} |
250 |
|
251 |
my @cols_cmp; # columns to compare |
252 |
|
253 |
foreach my $col (@cols_test) { |
254 |
push @cols_cmp,$col if (! $in_pk{$col}); |
255 |
} |
256 |
|
257 |
if ($verbose) { |
258 |
print "table '$table' using for key: (",join(", ",@cols_pk),") to compare cols: (",join(", ",@cols_cmp),")\n"; |
259 |
} |
260 |
|
261 |
# diff data |
262 |
|
263 |
my $msql = "select ".join(",",@cols)." from $table"; |
264 |
my $ssql = $msql; |
265 |
|
266 |
sub sql_where { |
267 |
my @cols = @_; |
268 |
my $and = ""; |
269 |
my $where = " where "; |
270 |
foreach my $col (@cols) { |
271 |
$where .= "$and$col=?"; |
272 |
$and = " and "; |
273 |
} |
274 |
return $where; |
275 |
} |
276 |
|
277 |
sub sql_order { |
278 |
my @cols = @_; |
279 |
my $order = " order by "; |
280 |
my $comma = ""; |
281 |
foreach my $col (@cols) { |
282 |
$order .= "$comma$col asc"; |
283 |
$comma = ", "; |
284 |
} |
285 |
return $order; |
286 |
} |
287 |
|
288 |
my $order = sql_order(@cols_pk); |
289 |
$msql .= $order; |
290 |
$ssql .= sql_where(@cols_pk) . $order; |
291 |
|
292 |
debug_sql($msql); |
293 |
debug_sql($ssql); |
294 |
|
295 |
my $msth = $mdbh->prepare($msql) || die; |
296 |
$msth->execute() || die; |
297 |
|
298 |
my $ssth = $sdbh->prepare($ssql) || die; |
299 |
|
300 |
my $diff_row = 0; |
301 |
|
302 |
my ($mrow,$srow); |
303 |
my ($have_mrow,$have_srow) = (0,0); |
304 |
my @pk_val; |
305 |
|
306 |
my $more_rows = 1; |
307 |
while (!$have_mrow || !$have_srow) { |
308 |
# fetch row from master |
309 |
if (!$have_mrow) { |
310 |
print "fetch row from master [$more_rows]: $msql\n" if ($debug); |
311 |
$mrow = $msth->fetchrow_hashref(); |
312 |
debug_row($mrow,@cols); |
313 |
|
314 |
if ($mrow) { |
315 |
# fill-in primary key values |
316 |
@pk_val = (); |
317 |
foreach my $col (@cols_pk) { |
318 |
push @pk_val,$mrow->{$col}; |
319 |
} |
320 |
$have_mrow = 1; |
321 |
} else { |
322 |
$have_mrow = 0; |
323 |
} |
324 |
} |
325 |
|
326 |
# fetch row from slave |
327 |
if (!$have_srow) { |
328 |
print "fetch row from slave [$more_rows]: $ssql\n" if ($debug); |
329 |
$ssth->execute(@pk_val) || die; |
330 |
$srow = $ssth->fetchrow_hashref(); |
331 |
debug_row($srow,@cols); |
332 |
if ($srow) { |
333 |
$have_srow = 1; |
334 |
} else { |
335 |
$have_srow = 0; |
336 |
} |
337 |
} |
338 |
|
339 |
# end of this table? |
340 |
if (!$have_mrow && !$have_srow) { |
341 |
last; |
342 |
} |
343 |
|
344 |
# insert into slave database |
345 |
sub sql_insert { |
346 |
my $table = shift @_ || die "need table as argument"; |
347 |
my $row = shift @_ || die "need row data"; |
348 |
my @cols = @_; |
349 |
|
350 |
my $sql = "insert into $table (".join(",",@cols).") values ("; |
351 |
my $comma = ""; |
352 |
foreach my $col (@cols) { |
353 |
$sql .= $comma.$mdbh->quote($row->{$col}); |
354 |
$comma = ","; |
355 |
} |
356 |
$sql.=")"; |
357 |
debug_sql($sql); |
358 |
return $sql; |
359 |
} |
360 |
|
361 |
# delete from slave database |
362 |
sub sql_delete { |
363 |
my $table = shift @_ || die "need table as argument"; |
364 |
my $row = shift @_ || die "need row as argument"; |
365 |
my @cols_pk = @_; |
366 |
|
367 |
my $where = sql_where(@cols_pk); |
368 |
|
369 |
my $sql = "delete from $table "; |
370 |
foreach my $col (@cols_pk) { |
371 |
my $val = $sdbh->quote($row->{$col}) || die "can't find value in row for col $col"; |
372 |
$where =~ s/\?/$val/; |
373 |
} |
374 |
$sql .= $where; |
375 |
debug_sql($sql); |
376 |
return $sql; |
377 |
} |
378 |
|
379 |
# master slave |
380 |
# 1 = 1 test |
381 |
# 1 < 2 insert mrow |
382 |
# 2 > 1 delete srow |
383 |
# 1 = undef insert mrow |
384 |
# undef = 1 delete srow |
385 |
|
386 |
my $pk_same = 1; |
387 |
|
388 |
# check key cols for row |
389 |
foreach my $col (@cols_pk) { |
390 |
if ( (!$mrow && $srow) || ($mrow && $srow && ($mrow->{$col} gt $srow->{$col})) ) { |
391 |
$diff_row++; |
392 |
$pk_same = 0; |
393 |
print sql_delete($table,$srow,@cols_pk),"\n"; |
394 |
$have_srow = 0; # fetch new slave row |
395 |
last; |
396 |
} elsif ( ($mrow && !$srow) || ($mrow && $srow && ($mrow->{$col} lt $srow->{$col})) ) { |
397 |
$diff_row++; |
398 |
$pk_same = 0; |
399 |
print sql_insert($table,$mrow,@cols),"\n"; |
400 |
$have_mrow = 0; |
401 |
last; |
402 |
} |
403 |
} |
404 |
|
405 |
if ($pk_same) { |
406 |
# check non-key cols for row |
407 |
foreach my $col (@cols_cmp) { |
408 |
if ($mrow->{$col} ne $srow->{$col}) { |
409 |
$diff_row++; |
410 |
print STDERR "DIFF in table '$table' row ($col): [".join(",",@pk_val)."] '$mrow->{$col}' != '$srow->{$col}'\n" if ($verbose); |
411 |
print sql_delete($table,$srow,@cols_pk),"\n"; |
412 |
print sql_insert($table,$mrow,@cols),"\n"; |
413 |
} |
414 |
} |
415 |
} |
416 |
} |
417 |
|
418 |
print STDERR "$diff_row differences in table $table\n" if ($verbose && $diff_row > 0); |
419 |
$diff_total += $diff_row; |
420 |
} |
421 |
|
422 |
print STDERR "$diff_total differences in all tables\n" if ($verbose && $diff_total > 0); |
423 |
|
424 |
$mdbh->disconnect(); |
425 |
$sdbh->disconnect(); |