1 |
dpavlin |
1.1 |
#!/usr/bin/perl -w |
2 |
|
|
|
3 |
|
|
# Compare data in two different databases |
4 |
|
|
# |
5 |
|
|
# 2003-08-07 Dobrica Pavlinusic |
6 |
|
|
# |
7 |
|
|
# somewhat based on ides from |
8 |
|
|
# DataDiff 0.1 by Jon D. Frisby, http://www.mrjoy.com |
9 |
|
|
|
10 |
|
|
use strict; |
11 |
|
|
use Getopt::Long; |
12 |
|
|
use DBI; |
13 |
|
|
use Data::Dumper; |
14 |
dpavlin |
1.4 |
use Pg::Scheme; |
15 |
dpavlin |
1.1 |
|
16 |
|
|
$| = 1; |
17 |
|
|
|
18 |
|
|
my ($debug,$verbose) = (0,0); |
19 |
|
|
my ($help,$masterhost,$masterport,$masteruser,$masterpassword, |
20 |
|
|
$slavehost,$slaveport,$slaveuser,$slavepassword, |
21 |
|
|
$masterfile, |
22 |
|
|
$slavefile, |
23 |
|
|
); |
24 |
|
|
my $tables; |
25 |
|
|
|
26 |
|
|
my $result = GetOptions( |
27 |
|
|
"debug!" => \$debug, "verbose!" => \$verbose, "help" => \$help, |
28 |
|
|
"masterhost=s" => \$masterhost, "masterport=i" => \$masterport, |
29 |
|
|
"masteruser=s" => \$masteruser, "masterpassword=s" => \$masterpassword, |
30 |
|
|
"masterfile=s" => \$masterfile, |
31 |
|
|
"slavehost=s" => \$slavehost, "slaveport=i" => \$slaveport, |
32 |
|
|
"slaveuser=s" => \$slaveuser, "slavepassword=s" => \$slavepassword, |
33 |
|
|
"slavefile=s" => \$slavefile, |
34 |
|
|
"tables=s" => \$tables, |
35 |
|
|
); |
36 |
|
|
|
37 |
|
|
if (defined($help) || (scalar(@ARGV) < 2)) { |
38 |
|
|
print "Usage: $0 [options] masterdb slavedb |
39 |
|
|
Options: |
40 |
|
|
--masterhost=hostname --masterport=port |
41 |
|
|
--masteruser=username --masterpassword=string |
42 |
|
|
--masterfile=filename |
43 |
|
|
--slavehost=hostname --slaveport=port |
44 |
|
|
--slaveuser=username --slavepassword=string |
45 |
|
|
--slavefile=filename |
46 |
|
|
--tables[s]=table[,table...] |
47 |
|
|
"; |
48 |
|
|
# exit ((scalar(@ARGV) < 2)? 1:0); |
49 |
|
|
exit; |
50 |
|
|
} |
51 |
|
|
|
52 |
|
|
my $master = $ARGV[0] || "master"; |
53 |
|
|
my $slave = $ARGV[1] || "slave"; |
54 |
|
|
|
55 |
|
|
my $minfo = "dbname=$master"; |
56 |
|
|
$minfo = "$minfo host=$masterhost" if (defined($masterhost)); |
57 |
|
|
$minfo = "$minfo port=$masterport" if (defined($masterport)); |
58 |
|
|
|
59 |
|
|
my $sinfo = "dbname=$slave"; |
60 |
|
|
$sinfo = "$sinfo host=$slavehost" if (defined($slavehost)); |
61 |
|
|
$sinfo = "$sinfo port=$slaveport" if (defined($slaveport)); |
62 |
|
|
|
63 |
|
|
print "Master connection is $minfo\n" if ($debug); |
64 |
|
|
print "Slave connection is $sinfo\n" if ($debug); |
65 |
|
|
|
66 |
dpavlin |
1.7 |
my $mdbh = DBI->connect("DBI:Pg:$minfo", $masteruser, $masterpassword, { PrintError => 0 } ); |
67 |
|
|
if (! $mdbh) { |
68 |
|
|
print "Can't connect to master database $master"; |
69 |
|
|
print "on $masterhost" if ($masterhost); |
70 |
|
|
print "\n"; |
71 |
|
|
exit 1; |
72 |
|
|
} |
73 |
|
|
my $sdbh = DBI->connect("DBI:Pg:$sinfo", $slaveuser, $slavepassword, { PrintError => 0 }); |
74 |
|
|
if (! $sdbh) { |
75 |
|
|
print "Can't connect to slave database $slave"; |
76 |
|
|
print "on $slavehost" if ($slavehost); |
77 |
|
|
print "\n"; |
78 |
|
|
exit 1; |
79 |
|
|
} |
80 |
dpavlin |
1.1 |
|
81 |
|
|
my ($diff_shema,$diff_data) = (0,0); |
82 |
|
|
|
83 |
|
|
my $sql; |
84 |
|
|
|
85 |
|
|
sub debug_sql { |
86 |
|
|
return if (! $debug); |
87 |
|
|
my $sql = shift; |
88 |
|
|
$sql =~ s/[\n\r]/ /gs; |
89 |
|
|
$sql =~ s/\s\s+/ /g; |
90 |
dpavlin |
1.2 |
print STDERR "DEBUG: SQL: $sql\n"; |
91 |
dpavlin |
1.1 |
} |
92 |
|
|
|
93 |
|
|
sub debug_row { |
94 |
|
|
return if (! $debug); |
95 |
|
|
my $row = shift; |
96 |
|
|
my @cols = @_; |
97 |
|
|
if (! $row) { |
98 |
dpavlin |
1.2 |
print STDERR "DEBUG: ROW data is undef!\n"; |
99 |
dpavlin |
1.1 |
return; |
100 |
|
|
} |
101 |
dpavlin |
1.2 |
print STDERR "DEBUG: ROW: [",$#cols+1,"] "; |
102 |
dpavlin |
1.1 |
foreach my $col (@cols) { |
103 |
|
|
print STDERR "$col:"; |
104 |
|
|
if ($row->{$col}) { |
105 |
|
|
print $row->{$col}; |
106 |
|
|
} else { |
107 |
|
|
print "null"; |
108 |
|
|
} |
109 |
|
|
print " "; |
110 |
|
|
} |
111 |
|
|
print STDERR "\n"; |
112 |
|
|
} |
113 |
|
|
|
114 |
dpavlin |
1.2 |
sub debug { |
115 |
|
|
return if (!$debug); |
116 |
|
|
print STDERR "DEBUG: ",@_; |
117 |
|
|
} |
118 |
|
|
|
119 |
dpavlin |
1.1 |
$verbose = 1 if ($debug); |
120 |
|
|
|
121 |
dpavlin |
1.4 |
# init object for scheme in master database |
122 |
dpavlin |
1.5 |
my $mscheme = new Pg::Scheme( 'dbh' => $mdbh, 'DEBUG' => 0 ) || die "can't query schema"; |
123 |
dpavlin |
1.4 |
|
124 |
dpavlin |
1.1 |
# which tables to compare? |
125 |
|
|
|
126 |
dpavlin |
1.4 |
my @tables = $mscheme->list_tables($tables); |
127 |
dpavlin |
1.1 |
|
128 |
dpavlin |
1.2 |
debug "Comparing tables: ".join(", ",@tables)."\n"; |
129 |
dpavlin |
1.1 |
|
130 |
|
|
my $cols; |
131 |
|
|
my $diff_total = 0; |
132 |
|
|
|
133 |
|
|
foreach my $table (@tables) { |
134 |
|
|
|
135 |
dpavlin |
1.4 |
my $sth; |
136 |
dpavlin |
1.1 |
|
137 |
|
|
# diff schema |
138 |
|
|
|
139 |
dpavlin |
1.5 |
# all colums (for insert) |
140 |
|
|
my @cols = @{$mscheme->cols($table)}; |
141 |
|
|
|
142 |
|
|
# colums compared by a=b |
143 |
|
|
my @cols_notnull = @{$mscheme->cols_notnull($table)}; |
144 |
|
|
|
145 |
|
|
# colums compared by a=b or a is null and b is null |
146 |
|
|
my @cols_null = @{$mscheme->cols_null($table)}; |
147 |
|
|
|
148 |
|
|
# primary key columns |
149 |
|
|
my @cols_pk = @{$mscheme->cols_pk($table)}; |
150 |
|
|
|
151 |
|
|
# columns to compare (not in primary key) |
152 |
|
|
my @cols_cmp = @{$mscheme->cols_notpk($table)}; |
153 |
|
|
|
154 |
dpavlin |
1.1 |
my @cols_skip; # skipped columns |
155 |
|
|
my @cols_test; # all colums to test (without skipped) |
156 |
|
|
|
157 |
dpavlin |
1.5 |
foreach my $row (@{$mscheme->pg_attribute($table)}) { |
158 |
dpavlin |
1.1 |
# attname | format_type | attnotnull | atthasdef | attnum |
159 |
|
|
|
160 |
|
|
# FIXME: do something with attributes which shouldn't be compared |
161 |
|
|
# (date, time, datetime, timestamp) |
162 |
|
|
if ($row->{format_type} =~ /(date)|(time)/i) { |
163 |
|
|
push @cols_skip,$row->{attname}; |
164 |
dpavlin |
1.5 |
} else { |
165 |
|
|
push @cols_test,$row->{attname}; |
166 |
dpavlin |
1.1 |
} |
167 |
|
|
|
168 |
|
|
} |
169 |
|
|
|
170 |
|
|
if ($debug) { |
171 |
dpavlin |
1.4 |
print STDERR "DEBUG: table $table not null cols: (",join(", ",@cols_notnull),")"; |
172 |
|
|
print STDERR " - null cols: (",join(", ",@cols_null),")" if (@cols_null); |
173 |
|
|
print STDERR " - skip cols: (",join(", ",@cols_skip),")" if (@cols_skip); |
174 |
dpavlin |
1.1 |
print STDERR "\n"; |
175 |
|
|
} |
176 |
|
|
|
177 |
|
|
# diff data |
178 |
|
|
|
179 |
|
|
if (! @cols_pk) { |
180 |
|
|
print STDERR "can't find PK rows for table '$table' using all\n"; |
181 |
|
|
@cols_pk = @cols; |
182 |
|
|
} |
183 |
|
|
|
184 |
|
|
|
185 |
|
|
if ($verbose) { |
186 |
|
|
print "table '$table' using for key: (",join(", ",@cols_pk),") to compare cols: (",join(", ",@cols_cmp),")\n"; |
187 |
|
|
} |
188 |
|
|
|
189 |
|
|
# diff data |
190 |
|
|
|
191 |
|
|
my $msql = "select ".join(",",@cols)." from $table"; |
192 |
|
|
my $ssql = $msql; |
193 |
|
|
|
194 |
|
|
sub sql_where { |
195 |
|
|
my @cols = @_; |
196 |
|
|
my $and = ""; |
197 |
|
|
my $where = " where "; |
198 |
|
|
foreach my $col (@cols) { |
199 |
|
|
$where .= "$and$col=?"; |
200 |
|
|
$and = " and "; |
201 |
|
|
} |
202 |
|
|
return $where; |
203 |
|
|
} |
204 |
|
|
|
205 |
|
|
sub sql_order { |
206 |
|
|
my @cols = @_; |
207 |
|
|
my $order = " order by "; |
208 |
|
|
my $comma = ""; |
209 |
|
|
foreach my $col (@cols) { |
210 |
|
|
$order .= "$comma$col asc"; |
211 |
|
|
$comma = ", "; |
212 |
|
|
} |
213 |
|
|
return $order; |
214 |
|
|
} |
215 |
|
|
|
216 |
|
|
my $order = sql_order(@cols_pk); |
217 |
|
|
$msql .= $order; |
218 |
dpavlin |
1.2 |
$ssql .= $order; |
219 |
dpavlin |
1.1 |
|
220 |
|
|
debug_sql($msql); |
221 |
|
|
|
222 |
|
|
my $msth = $mdbh->prepare($msql) || die; |
223 |
|
|
$msth->execute() || die; |
224 |
|
|
|
225 |
|
|
my $ssth = $sdbh->prepare($ssql) || die; |
226 |
dpavlin |
1.2 |
$ssth->execute() || die; |
227 |
dpavlin |
1.1 |
|
228 |
|
|
my $diff_row = 0; |
229 |
|
|
|
230 |
|
|
my ($mrow,$srow); |
231 |
dpavlin |
1.2 |
# have_* |
232 |
|
|
use constant NO_ROW => 0; |
233 |
|
|
use constant FETCH_ROW => 1; |
234 |
|
|
use constant HAVE_ROW => 2; |
235 |
|
|
my ($have_mrow,$have_srow) = (FETCH_ROW,FETCH_ROW); |
236 |
|
|
|
237 |
|
|
while ($have_mrow != NO_ROW || $have_srow != NO_ROW) { |
238 |
|
|
|
239 |
|
|
debug "have mrow: $have_mrow srow: $have_srow\n"; |
240 |
|
|
|
241 |
|
|
sub pk_val { |
242 |
|
|
my $row = shift || die "need row"; |
243 |
|
|
my @cols = shift || die "need cols"; |
244 |
|
|
my @val; |
245 |
|
|
foreach my $col (@cols) { |
246 |
|
|
push @val,$row->{$col}; |
247 |
|
|
} |
248 |
|
|
return @val; |
249 |
|
|
} |
250 |
dpavlin |
1.1 |
|
251 |
|
|
# fetch row from master |
252 |
dpavlin |
1.2 |
if ($have_mrow == FETCH_ROW) { |
253 |
|
|
debug "fetch row from master: $msql\n"; |
254 |
dpavlin |
1.1 |
$mrow = $msth->fetchrow_hashref(); |
255 |
|
|
debug_row($mrow,@cols); |
256 |
|
|
|
257 |
|
|
if ($mrow) { |
258 |
|
|
# fill-in primary key values |
259 |
dpavlin |
1.2 |
$have_mrow = HAVE_ROW; |
260 |
dpavlin |
1.1 |
} else { |
261 |
dpavlin |
1.2 |
$have_mrow = NO_ROW; |
262 |
dpavlin |
1.1 |
} |
263 |
|
|
} |
264 |
|
|
|
265 |
|
|
# fetch row from slave |
266 |
dpavlin |
1.2 |
if ($have_srow == FETCH_ROW) { |
267 |
|
|
debug "fetch row from slave: $ssql\n"; |
268 |
dpavlin |
1.1 |
$srow = $ssth->fetchrow_hashref(); |
269 |
|
|
debug_row($srow,@cols); |
270 |
|
|
if ($srow) { |
271 |
dpavlin |
1.2 |
$have_srow = HAVE_ROW; |
272 |
dpavlin |
1.1 |
} else { |
273 |
dpavlin |
1.2 |
$have_srow = NO_ROW; |
274 |
dpavlin |
1.1 |
} |
275 |
|
|
} |
276 |
|
|
|
277 |
dpavlin |
1.2 |
debug "have mrow: $have_mrow srow: $have_srow\n"; |
278 |
dpavlin |
1.1 |
|
279 |
|
|
# insert into slave database |
280 |
|
|
sub sql_insert { |
281 |
dpavlin |
1.6 |
my $dbh = shift @_ || die "need dbh"; |
282 |
dpavlin |
1.1 |
my $table = shift @_ || die "need table as argument"; |
283 |
|
|
my $row = shift @_ || die "need row data"; |
284 |
|
|
my @cols = @_; |
285 |
|
|
|
286 |
|
|
my $sql = "insert into $table (".join(",",@cols).") values ("; |
287 |
|
|
my $comma = ""; |
288 |
|
|
foreach my $col (@cols) { |
289 |
dpavlin |
1.6 |
$sql .= $comma.$dbh->quote($row->{$col}); |
290 |
dpavlin |
1.1 |
$comma = ","; |
291 |
|
|
} |
292 |
|
|
$sql.=")"; |
293 |
|
|
debug_sql($sql); |
294 |
|
|
return $sql; |
295 |
|
|
} |
296 |
|
|
|
297 |
|
|
# delete from slave database |
298 |
|
|
sub sql_delete { |
299 |
dpavlin |
1.6 |
my $dbh = shift @_ || die "need dbh"; |
300 |
dpavlin |
1.1 |
my $table = shift @_ || die "need table as argument"; |
301 |
|
|
my $row = shift @_ || die "need row as argument"; |
302 |
|
|
my @cols_pk = @_; |
303 |
|
|
|
304 |
|
|
my $where = sql_where(@cols_pk); |
305 |
|
|
|
306 |
dpavlin |
1.6 |
my $sql = "delete from $table"; |
307 |
dpavlin |
1.1 |
foreach my $col (@cols_pk) { |
308 |
dpavlin |
1.6 |
my $val = $dbh->quote($row->{$col}) || die "can't find value in row for col $col"; |
309 |
dpavlin |
1.1 |
$where =~ s/\?/$val/; |
310 |
|
|
} |
311 |
|
|
$sql .= $where; |
312 |
|
|
debug_sql($sql); |
313 |
|
|
return $sql; |
314 |
|
|
} |
315 |
|
|
|
316 |
dpavlin |
1.3 |
# update row in slave database |
317 |
|
|
sub sql_update { |
318 |
dpavlin |
1.6 |
my $dbh = shift @_ || die "need dbh"; |
319 |
dpavlin |
1.3 |
my $table = shift @_ || die "need table as argument"; |
320 |
|
|
my $col = shift @_ || die "need col to update"; |
321 |
dpavlin |
1.6 |
my $row = shift @_ || die "need row"; |
322 |
|
|
my @cols_pk = @_; |
323 |
dpavlin |
1.3 |
|
324 |
dpavlin |
1.6 |
my $sql = "update $table set $col=".$dbh->quote($row->{$col}); |
325 |
|
|
my $where = sql_where(@cols_pk); |
326 |
|
|
foreach my $col (@cols_pk) { |
327 |
|
|
my $val = $dbh->quote($row->{$col}) || die "can't find value in row for col $col"; |
328 |
|
|
$where =~ s/\?/$val/; |
329 |
|
|
} |
330 |
|
|
$sql .= $where; |
331 |
dpavlin |
1.3 |
debug_sql($sql); |
332 |
|
|
return $sql; |
333 |
|
|
} |
334 |
dpavlin |
1.1 |
# master slave |
335 |
|
|
# 1 = 1 test |
336 |
|
|
# 1 < 2 insert mrow |
337 |
|
|
# 2 > 1 delete srow |
338 |
|
|
# 1 = undef insert mrow |
339 |
|
|
# undef = 1 delete srow |
340 |
|
|
|
341 |
|
|
my $pk_same = 1; |
342 |
|
|
|
343 |
|
|
# check key cols for row |
344 |
|
|
foreach my $col (@cols_pk) { |
345 |
dpavlin |
1.2 |
if ( ($have_mrow == NO_ROW && $have_srow == HAVE_ROW) || |
346 |
|
|
($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} gt $srow->{$col}) ) { |
347 |
dpavlin |
1.1 |
$diff_row++; |
348 |
|
|
$pk_same = 0; |
349 |
dpavlin |
1.3 |
print STDERR "EXTRA row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($srow,@cols_pk)).")\n" if ($verbose); |
350 |
dpavlin |
1.6 |
print sql_delete($sdbh,$table,$srow,@cols_pk),";\n"; |
351 |
dpavlin |
1.2 |
$have_srow = FETCH_ROW; |
352 |
dpavlin |
1.1 |
last; |
353 |
dpavlin |
1.2 |
} elsif ( ($have_mrow == HAVE_ROW && $have_srow == NO_ROW) || |
354 |
|
|
($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} lt $srow->{$col}) ) { |
355 |
dpavlin |
1.1 |
$diff_row++; |
356 |
|
|
$pk_same = 0; |
357 |
dpavlin |
1.3 |
print STDERR "MISSING row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($mrow,@cols_pk)).")\n" if ($verbose); |
358 |
dpavlin |
1.6 |
print sql_insert($mdbh,$table,$mrow,@cols),";\n"; |
359 |
dpavlin |
1.2 |
$have_mrow = FETCH_ROW; |
360 |
dpavlin |
1.1 |
last; |
361 |
|
|
} |
362 |
|
|
} |
363 |
|
|
|
364 |
dpavlin |
1.2 |
if ($pk_same && $have_mrow == HAVE_ROW && $have_srow == HAVE_ROW) { |
365 |
dpavlin |
1.1 |
# check non-key cols for row |
366 |
|
|
foreach my $col (@cols_cmp) { |
367 |
|
|
if ($mrow->{$col} ne $srow->{$col}) { |
368 |
|
|
$diff_row++; |
369 |
dpavlin |
1.2 |
print STDERR "DIFF in table '$table' row ($col): [".join(",",@cols_pk)."] '$mrow->{$col}' != '$srow->{$col}'\n" if ($verbose); |
370 |
dpavlin |
1.6 |
print sql_update($mdbh,$table,$col,$mrow,@cols_pk),";\n"; |
371 |
dpavlin |
1.1 |
} |
372 |
|
|
} |
373 |
dpavlin |
1.3 |
$have_mrow = FETCH_ROW; |
374 |
|
|
$have_srow = FETCH_ROW; |
375 |
dpavlin |
1.1 |
} |
376 |
|
|
} |
377 |
|
|
|
378 |
|
|
print STDERR "$diff_row differences in table $table\n" if ($verbose && $diff_row > 0); |
379 |
|
|
$diff_total += $diff_row; |
380 |
|
|
} |
381 |
|
|
|
382 |
dpavlin |
1.4 |
if ($verbose) { |
383 |
|
|
if ($diff_total == 0) { |
384 |
|
|
print STDERR "databases are same"; |
385 |
|
|
} elsif ($diff_total > 0) { |
386 |
|
|
print STDERR "$diff_total differences in all tables\n"; |
387 |
|
|
} else { |
388 |
|
|
die "this shouldn't happend. please report a bug!"; |
389 |
|
|
} |
390 |
|
|
} |
391 |
dpavlin |
1.1 |
|
392 |
|
|
$mdbh->disconnect(); |
393 |
|
|
$sdbh->disconnect(); |