/[pgdiff]/pgdiff
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /pgdiff

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.3 - (hide annotations)
Tue Aug 12 12:00:07 2003 UTC (20 years, 7 months ago) by dpavlin
Branch: MAIN
Changes since 1.2: +18 -7 lines
fixes, add update for diff rows (stupid overlook, really...)

1 dpavlin 1.1 #!/usr/bin/perl -w
2    
3     # Compare data in two different databases
4     #
5     # 2003-08-07 Dobrica Pavlinusic
6     #
7     # somewhat based on ides from
8     # DataDiff 0.1 by Jon D. Frisby, http://www.mrjoy.com
9    
10     use strict;
11     use Getopt::Long;
12     use DBI;
13     use Data::Dumper;
14    
15     $| = 1;
16    
17     my ($debug,$verbose) = (0,0);
18     my ($help,$masterhost,$masterport,$masteruser,$masterpassword,
19     $slavehost,$slaveport,$slaveuser,$slavepassword,
20     $masterfile,
21     $slavefile,
22     );
23     my $tables;
24    
25     my $result = GetOptions(
26     "debug!" => \$debug, "verbose!" => \$verbose, "help" => \$help,
27     "masterhost=s" => \$masterhost, "masterport=i" => \$masterport,
28     "masteruser=s" => \$masteruser, "masterpassword=s" => \$masterpassword,
29     "masterfile=s" => \$masterfile,
30     "slavehost=s" => \$slavehost, "slaveport=i" => \$slaveport,
31     "slaveuser=s" => \$slaveuser, "slavepassword=s" => \$slavepassword,
32     "slavefile=s" => \$slavefile,
33     "tables=s" => \$tables,
34     );
35    
36     if (defined($help) || (scalar(@ARGV) < 2)) {
37     print "Usage: $0 [options] masterdb slavedb
38     Options:
39     --masterhost=hostname --masterport=port
40     --masteruser=username --masterpassword=string
41     --masterfile=filename
42     --slavehost=hostname --slaveport=port
43     --slaveuser=username --slavepassword=string
44     --slavefile=filename
45     --tables[s]=table[,table...]
46     ";
47     # exit ((scalar(@ARGV) < 2)? 1:0);
48     exit;
49     }
50    
51     my $master = $ARGV[0] || "master";
52     my $slave = $ARGV[1] || "slave";
53    
54     my $minfo = "dbname=$master";
55     $minfo = "$minfo host=$masterhost" if (defined($masterhost));
56     $minfo = "$minfo port=$masterport" if (defined($masterport));
57    
58     my $sinfo = "dbname=$slave";
59     $sinfo = "$sinfo host=$slavehost" if (defined($slavehost));
60     $sinfo = "$sinfo port=$slaveport" if (defined($slaveport));
61    
62     print "Master connection is $minfo\n" if ($debug);
63     print "Slave connection is $sinfo\n" if ($debug);
64    
65     my $mdbh = DBI->connect("DBI:Pg:$minfo", $masteruser, $masterpassword, { PrintError => 1 });
66     my $sdbh = DBI->connect("DBI:Pg:$sinfo", $slaveuser, $slavepassword, { PrintError => 1 });
67    
68     my ($diff_shema,$diff_data) = (0,0);
69    
70     my $sql;
71    
72     sub debug_sql {
73     return if (! $debug);
74     my $sql = shift;
75     $sql =~ s/[\n\r]/ /gs;
76     $sql =~ s/\s\s+/ /g;
77 dpavlin 1.2 print STDERR "DEBUG: SQL: $sql\n";
78 dpavlin 1.1 }
79    
80     sub debug_row {
81     return if (! $debug);
82     my $row = shift;
83     my @cols = @_;
84     if (! $row) {
85 dpavlin 1.2 print STDERR "DEBUG: ROW data is undef!\n";
86 dpavlin 1.1 return;
87     }
88 dpavlin 1.2 print STDERR "DEBUG: ROW: [",$#cols+1,"] ";
89 dpavlin 1.1 foreach my $col (@cols) {
90     print STDERR "$col:";
91     if ($row->{$col}) {
92     print $row->{$col};
93     } else {
94     print "null";
95     }
96     print " ";
97     }
98     print STDERR "\n";
99     }
100    
101 dpavlin 1.2 sub debug {
102     return if (!$debug);
103     print STDERR "DEBUG: ",@_;
104     }
105    
106 dpavlin 1.1 $verbose = 1 if ($debug);
107    
108     # which tables to compare?
109    
110     my @tables;
111     if ($tables) {
112     @tables = split(/,/,$tables);
113     } else {
114     # take all tables
115     #$sql="select tablename from pg_tables where tablename not like 'pg_%' and tablename not like '_rserv_%'";
116     # show tables (based on psql \dt)
117     $sql = "
118     SELECT c.relname as table
119     FROM pg_catalog.pg_class c
120     LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
121     WHERE c.relkind = 'r'
122     AND n.nspname NOT IN ('pg_catalog', 'pg_toast')
123     AND pg_catalog.pg_table_is_visible(c.oid)
124     and c.relname not like '_rserv_%'
125     ";
126 dpavlin 1.2 foreach my $table (@tables) {
127     $sql .= " and c.relname like '$table'";
128     }
129 dpavlin 1.1 my $sth = $mdbh->prepare($sql);
130     $sth->execute() || die;
131     while(my $row = $sth->fetchrow_hashref()) {
132     push @tables,$row->{table};
133     }
134     }
135    
136 dpavlin 1.2 debug "Comparing tables: ".join(", ",@tables)."\n";
137 dpavlin 1.1
138     my $cols;
139     my $diff_total = 0;
140    
141     foreach my $table (@tables) {
142    
143     my ($sth);
144    
145     # find table oid
146     $sql = "
147     SELECT c.oid, n.nspname, c.relname
148     FROM pg_catalog.pg_class c
149     LEFT JOIN pg_catalog.pg_namespace n ON n.oid = c.relnamespace
150     WHERE pg_catalog.pg_table_is_visible(c.oid)
151     AND c.relname = '$table'
152     ORDER BY 2, 3
153     ";
154    
155     $sth = $mdbh->prepare($sql);
156     $sth->execute() || die;
157     my $row = $sth->fetchrow_hashref();
158     if (! $row) {
159     print STDERR "Can't find OID of table '$table'\n";
160     exit 1;
161     }
162    
163     my $oid = $row->{oid};
164    
165     # diff schema
166    
167     my @cols_notnull;# colums compared by a=b
168     my @cols_null; # colums compared by a=b or a is null and b is null
169     my @cols_skip; # skipped columns
170     my @cols_test; # all colums to test (without skipped)
171     my @cols; # all colums (for insert)
172    
173     my $sql="
174     SELECT a.attname,
175     pg_catalog.format_type(a.atttypid, a.atttypmod),
176     a.attnotnull, a.atthasdef, a.attnum
177     FROM pg_catalog.pg_attribute a
178     WHERE a.attrelid = $oid AND a.attnum > 0 AND NOT a.attisdropped
179     ORDER BY a.attnum
180     ";
181    
182     $sth = $mdbh->prepare($sql);
183     $sth->execute() || die;
184     while(my $row = $sth->fetchrow_hashref()) {
185     # attname | format_type | attnotnull | atthasdef | attnum
186    
187     push @cols,$row->{attname};
188    
189     # FIXME: do something with attributes which shouldn't be compared
190     # (date, time, datetime, timestamp)
191     if ($row->{format_type} =~ /(date)|(time)/i) {
192     push @cols_skip,$row->{attname};
193     next;
194     }
195    
196     push @cols_test,$row->{attname};
197    
198     if ($row->{attnotnull}) {
199     push @cols_notnull,$row->{attname};
200     } else {
201     push @cols_null,$row->{attname};
202     }
203     }
204     $sth->finish();
205    
206     if ($debug) {
207 dpavlin 1.2 print STDERR "DEBUG: table $table [$oid] not null: (",join(", ",@cols_notnull),")";
208 dpavlin 1.1 print STDERR " - null: (",join(", ",@cols_null),")" if (@cols_null);
209     print STDERR " - skip: (",join(", ",@cols_skip),")" if (@cols_skip);
210     print STDERR "\n";
211     }
212    
213     # diff data
214    
215     my @cols_pk; # columns which are primary key
216     my %in_pk;
217    
218     $sql="
219     SELECT
220     i.indexrelid as indexrelid, i.indrelid as indrelid,
221     count(a.attname) as cols_in_pk
222     FROM
223     pg_catalog.pg_class c,
224     pg_catalog.pg_index i,
225     pg_catalog.pg_attribute a
226     WHERE
227     c.oid = i.indrelid
228     and i.indisunique
229     and c.relname = '$table'
230     and a.attrelid = i.indexrelid
231     GROUP BY
232     i.indexrelid, i.indrelid, c.relname, i.indisprimary, i.indisunique
233     ORDER BY
234     cols_in_pk ASC, i.indisprimary DESC, i.indisunique DESC, c.relname DESC
235     ";
236     debug_sql($sql);
237     $sth = $mdbh->prepare($sql);
238     $sth->execute() || die;
239     $row = $sth->fetchrow_hashref();
240     if ($row) {
241     $sql="
242     select a1.attname as attname from pg_attribute a1, pg_attribute a2 where a1.attrelid = ".$row->{indexrelid}." and a2.attrelid=".$row->{indrelid}." and a1.attname = a2.attname and a2.attnotnull";
243    
244     debug_sql($sql);
245     my $sth2 = $mdbh->prepare($sql);
246     $sth2->execute() || die;
247     @cols_pk = ();
248     while (my $row2 = $sth2->fetchrow_hashref()) {
249     push @cols_pk,$row2->{attname};
250     $in_pk{$row2->{attname}}++;
251     }
252    
253     }
254     if (! @cols_pk) {
255     print STDERR "can't find PK rows for table '$table' using all\n";
256     @cols_pk = @cols;
257     }
258    
259     my @cols_cmp; # columns to compare
260    
261     foreach my $col (@cols_test) {
262     push @cols_cmp,$col if (! $in_pk{$col});
263     }
264    
265     if ($verbose) {
266     print "table '$table' using for key: (",join(", ",@cols_pk),") to compare cols: (",join(", ",@cols_cmp),")\n";
267     }
268    
269     # diff data
270    
271     my $msql = "select ".join(",",@cols)." from $table";
272     my $ssql = $msql;
273    
274     sub sql_where {
275     my @cols = @_;
276     my $and = "";
277     my $where = " where ";
278     foreach my $col (@cols) {
279     $where .= "$and$col=?";
280     $and = " and ";
281     }
282     return $where;
283     }
284    
285     sub sql_order {
286     my @cols = @_;
287     my $order = " order by ";
288     my $comma = "";
289     foreach my $col (@cols) {
290     $order .= "$comma$col asc";
291     $comma = ", ";
292     }
293     return $order;
294     }
295    
296     my $order = sql_order(@cols_pk);
297     $msql .= $order;
298 dpavlin 1.2 $ssql .= $order;
299 dpavlin 1.1
300     debug_sql($msql);
301    
302     my $msth = $mdbh->prepare($msql) || die;
303     $msth->execute() || die;
304    
305     my $ssth = $sdbh->prepare($ssql) || die;
306 dpavlin 1.2 $ssth->execute() || die;
307 dpavlin 1.1
308     my $diff_row = 0;
309    
310     my ($mrow,$srow);
311 dpavlin 1.2 # have_*
312     use constant NO_ROW => 0;
313     use constant FETCH_ROW => 1;
314     use constant HAVE_ROW => 2;
315     my ($have_mrow,$have_srow) = (FETCH_ROW,FETCH_ROW);
316    
317     while ($have_mrow != NO_ROW || $have_srow != NO_ROW) {
318    
319     debug "have mrow: $have_mrow srow: $have_srow\n";
320    
321     sub pk_val {
322     my $row = shift || die "need row";
323     my @cols = shift || die "need cols";
324     my @val;
325     foreach my $col (@cols) {
326     push @val,$row->{$col};
327     }
328     return @val;
329     }
330 dpavlin 1.1
331     # fetch row from master
332 dpavlin 1.2 if ($have_mrow == FETCH_ROW) {
333     debug "fetch row from master: $msql\n";
334 dpavlin 1.1 $mrow = $msth->fetchrow_hashref();
335     debug_row($mrow,@cols);
336    
337     if ($mrow) {
338     # fill-in primary key values
339 dpavlin 1.2 $have_mrow = HAVE_ROW;
340 dpavlin 1.1 } else {
341 dpavlin 1.2 $have_mrow = NO_ROW;
342 dpavlin 1.1 }
343     }
344    
345     # fetch row from slave
346 dpavlin 1.2 if ($have_srow == FETCH_ROW) {
347     debug "fetch row from slave: $ssql\n";
348 dpavlin 1.1 $srow = $ssth->fetchrow_hashref();
349     debug_row($srow,@cols);
350     if ($srow) {
351 dpavlin 1.2 $have_srow = HAVE_ROW;
352 dpavlin 1.1 } else {
353 dpavlin 1.2 $have_srow = NO_ROW;
354 dpavlin 1.1 }
355     }
356    
357 dpavlin 1.2 debug "have mrow: $have_mrow srow: $have_srow\n";
358 dpavlin 1.1
359     # insert into slave database
360     sub sql_insert {
361     my $table = shift @_ || die "need table as argument";
362     my $row = shift @_ || die "need row data";
363     my @cols = @_;
364    
365     my $sql = "insert into $table (".join(",",@cols).") values (";
366     my $comma = "";
367     foreach my $col (@cols) {
368     $sql .= $comma.$mdbh->quote($row->{$col});
369     $comma = ",";
370     }
371     $sql.=")";
372     debug_sql($sql);
373     return $sql;
374     }
375    
376     # delete from slave database
377     sub sql_delete {
378     my $table = shift @_ || die "need table as argument";
379     my $row = shift @_ || die "need row as argument";
380     my @cols_pk = @_;
381    
382     my $where = sql_where(@cols_pk);
383    
384     my $sql = "delete from $table ";
385     foreach my $col (@cols_pk) {
386     my $val = $sdbh->quote($row->{$col}) || die "can't find value in row for col $col";
387     $where =~ s/\?/$val/;
388     }
389     $sql .= $where;
390     debug_sql($sql);
391     return $sql;
392     }
393    
394 dpavlin 1.3 # update row in slave database
395     sub sql_update {
396     my $table = shift @_ || die "need table as argument";
397     my $col = shift @_ || die "need col to update";
398     my $val = shift @_ || die "need new val";
399     my @cols_pk = @_ || die "need pk idenitifier";
400    
401     my $sql = "udate $table set $col=".$mdbh->quote($val);
402     debug_sql($sql);
403     return $sql;
404     }
405 dpavlin 1.1 # master slave
406     # 1 = 1 test
407     # 1 < 2 insert mrow
408     # 2 > 1 delete srow
409     # 1 = undef insert mrow
410     # undef = 1 delete srow
411    
412     my $pk_same = 1;
413    
414     # check key cols for row
415     foreach my $col (@cols_pk) {
416 dpavlin 1.2 if ( ($have_mrow == NO_ROW && $have_srow == HAVE_ROW) ||
417     ($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} gt $srow->{$col}) ) {
418 dpavlin 1.1 $diff_row++;
419     $pk_same = 0;
420 dpavlin 1.3 print STDERR "EXTRA row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($srow,@cols_pk)).")\n" if ($verbose);
421     print sql_delete($table,$srow,@cols_pk),";\n";
422 dpavlin 1.2 $have_srow = FETCH_ROW;
423 dpavlin 1.1 last;
424 dpavlin 1.2 } elsif ( ($have_mrow == HAVE_ROW && $have_srow == NO_ROW) ||
425     ($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} lt $srow->{$col}) ) {
426 dpavlin 1.1 $diff_row++;
427     $pk_same = 0;
428 dpavlin 1.3 print STDERR "MISSING row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($mrow,@cols_pk)).")\n" if ($verbose);
429     print sql_insert($table,$mrow,@cols),";\n";
430 dpavlin 1.2 $have_mrow = FETCH_ROW;
431 dpavlin 1.1 last;
432     }
433     }
434    
435 dpavlin 1.2 if ($pk_same && $have_mrow == HAVE_ROW && $have_srow == HAVE_ROW) {
436 dpavlin 1.1 # check non-key cols for row
437     foreach my $col (@cols_cmp) {
438     if ($mrow->{$col} ne $srow->{$col}) {
439     $diff_row++;
440 dpavlin 1.2 print STDERR "DIFF in table '$table' row ($col): [".join(",",@cols_pk)."] '$mrow->{$col}' != '$srow->{$col}'\n" if ($verbose);
441 dpavlin 1.3 print sql_update($table,$col,$mrow->{$col},@cols_pk),";\n";
442 dpavlin 1.1 }
443     }
444 dpavlin 1.3 $have_mrow = FETCH_ROW;
445     $have_srow = FETCH_ROW;
446 dpavlin 1.1 }
447     }
448    
449     print STDERR "$diff_row differences in table $table\n" if ($verbose && $diff_row > 0);
450     $diff_total += $diff_row;
451     }
452    
453     print STDERR "$diff_total differences in all tables\n" if ($verbose && $diff_total > 0);
454    
455     $mdbh->disconnect();
456     $sdbh->disconnect();

  ViewVC Help
Powered by ViewVC 1.1.26