/[pgdiff]/pgdiff
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Annotation of /pgdiff

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.4 - (hide annotations)
Tue Aug 12 18:45:04 2003 UTC (20 years, 8 months ago) by dpavlin
Branch: MAIN
Changes since 1.3: +20 -63 lines
begin more to module, more verbose output

1 dpavlin 1.1 #!/usr/bin/perl -w
2    
3     # Compare data in two different databases
4     #
5     # 2003-08-07 Dobrica Pavlinusic
6     #
7     # somewhat based on ides from
8     # DataDiff 0.1 by Jon D. Frisby, http://www.mrjoy.com
9    
10     use strict;
11     use Getopt::Long;
12     use DBI;
13     use Data::Dumper;
14 dpavlin 1.4 use Pg::Scheme;
15 dpavlin 1.1
16     $| = 1;
17    
18     my ($debug,$verbose) = (0,0);
19     my ($help,$masterhost,$masterport,$masteruser,$masterpassword,
20     $slavehost,$slaveport,$slaveuser,$slavepassword,
21     $masterfile,
22     $slavefile,
23     );
24     my $tables;
25    
26     my $result = GetOptions(
27     "debug!" => \$debug, "verbose!" => \$verbose, "help" => \$help,
28     "masterhost=s" => \$masterhost, "masterport=i" => \$masterport,
29     "masteruser=s" => \$masteruser, "masterpassword=s" => \$masterpassword,
30     "masterfile=s" => \$masterfile,
31     "slavehost=s" => \$slavehost, "slaveport=i" => \$slaveport,
32     "slaveuser=s" => \$slaveuser, "slavepassword=s" => \$slavepassword,
33     "slavefile=s" => \$slavefile,
34     "tables=s" => \$tables,
35     );
36    
37     if (defined($help) || (scalar(@ARGV) < 2)) {
38     print "Usage: $0 [options] masterdb slavedb
39     Options:
40     --masterhost=hostname --masterport=port
41     --masteruser=username --masterpassword=string
42     --masterfile=filename
43     --slavehost=hostname --slaveport=port
44     --slaveuser=username --slavepassword=string
45     --slavefile=filename
46     --tables[s]=table[,table...]
47     ";
48     # exit ((scalar(@ARGV) < 2)? 1:0);
49     exit;
50     }
51    
52     my $master = $ARGV[0] || "master";
53     my $slave = $ARGV[1] || "slave";
54    
55     my $minfo = "dbname=$master";
56     $minfo = "$minfo host=$masterhost" if (defined($masterhost));
57     $minfo = "$minfo port=$masterport" if (defined($masterport));
58    
59     my $sinfo = "dbname=$slave";
60     $sinfo = "$sinfo host=$slavehost" if (defined($slavehost));
61     $sinfo = "$sinfo port=$slaveport" if (defined($slaveport));
62    
63     print "Master connection is $minfo\n" if ($debug);
64     print "Slave connection is $sinfo\n" if ($debug);
65    
66     my $mdbh = DBI->connect("DBI:Pg:$minfo", $masteruser, $masterpassword, { PrintError => 1 });
67     my $sdbh = DBI->connect("DBI:Pg:$sinfo", $slaveuser, $slavepassword, { PrintError => 1 });
68    
69     my ($diff_shema,$diff_data) = (0,0);
70    
71     my $sql;
72    
73     sub debug_sql {
74     return if (! $debug);
75     my $sql = shift;
76     $sql =~ s/[\n\r]/ /gs;
77     $sql =~ s/\s\s+/ /g;
78 dpavlin 1.2 print STDERR "DEBUG: SQL: $sql\n";
79 dpavlin 1.1 }
80    
81     sub debug_row {
82     return if (! $debug);
83     my $row = shift;
84     my @cols = @_;
85     if (! $row) {
86 dpavlin 1.2 print STDERR "DEBUG: ROW data is undef!\n";
87 dpavlin 1.1 return;
88     }
89 dpavlin 1.2 print STDERR "DEBUG: ROW: [",$#cols+1,"] ";
90 dpavlin 1.1 foreach my $col (@cols) {
91     print STDERR "$col:";
92     if ($row->{$col}) {
93     print $row->{$col};
94     } else {
95     print "null";
96     }
97     print " ";
98     }
99     print STDERR "\n";
100     }
101    
102 dpavlin 1.2 sub debug {
103     return if (!$debug);
104     print STDERR "DEBUG: ",@_;
105     }
106    
107 dpavlin 1.1 $verbose = 1 if ($debug);
108    
109 dpavlin 1.4 # init object for scheme in master database
110     my $mscheme = new Pg::Scheme( 'dbh' => $mdbh ) || die "can't query schema";
111    
112 dpavlin 1.1 # which tables to compare?
113    
114 dpavlin 1.4 my @tables = $mscheme->list_tables($tables);
115 dpavlin 1.1
116 dpavlin 1.2 debug "Comparing tables: ".join(", ",@tables)."\n";
117 dpavlin 1.1
118     my $cols;
119     my $diff_total = 0;
120    
121     foreach my $table (@tables) {
122    
123 dpavlin 1.4 my $sth;
124 dpavlin 1.1
125     # diff schema
126    
127     my @cols_notnull;# colums compared by a=b
128     my @cols_null; # colums compared by a=b or a is null and b is null
129     my @cols_skip; # skipped columns
130     my @cols_test; # all colums to test (without skipped)
131     my @cols; # all colums (for insert)
132    
133    
134 dpavlin 1.4 foreach my $row ($mscheme->explain_table($table)) {
135 dpavlin 1.1 # attname | format_type | attnotnull | atthasdef | attnum
136    
137     push @cols,$row->{attname};
138    
139     # FIXME: do something with attributes which shouldn't be compared
140     # (date, time, datetime, timestamp)
141     if ($row->{format_type} =~ /(date)|(time)/i) {
142     push @cols_skip,$row->{attname};
143     next;
144     }
145    
146     push @cols_test,$row->{attname};
147    
148     if ($row->{attnotnull}) {
149     push @cols_notnull,$row->{attname};
150     } else {
151     push @cols_null,$row->{attname};
152     }
153     }
154    
155     if ($debug) {
156 dpavlin 1.4 print STDERR "DEBUG: table $table not null cols: (",join(", ",@cols_notnull),")";
157     print STDERR " - null cols: (",join(", ",@cols_null),")" if (@cols_null);
158     print STDERR " - skip cols: (",join(", ",@cols_skip),")" if (@cols_skip);
159 dpavlin 1.1 print STDERR "\n";
160     }
161    
162     # diff data
163    
164     my @cols_pk; # columns which are primary key
165     my %in_pk;
166    
167     $sql="
168     SELECT
169     i.indexrelid as indexrelid, i.indrelid as indrelid,
170     count(a.attname) as cols_in_pk
171     FROM
172     pg_catalog.pg_class c,
173     pg_catalog.pg_index i,
174     pg_catalog.pg_attribute a
175     WHERE
176     c.oid = i.indrelid
177     and i.indisunique
178     and c.relname = '$table'
179     and a.attrelid = i.indexrelid
180     GROUP BY
181     i.indexrelid, i.indrelid, c.relname, i.indisprimary, i.indisunique
182     ORDER BY
183     cols_in_pk ASC, i.indisprimary DESC, i.indisunique DESC, c.relname DESC
184     ";
185     debug_sql($sql);
186     $sth = $mdbh->prepare($sql);
187     $sth->execute() || die;
188 dpavlin 1.4 my $row = $sth->fetchrow_hashref();
189 dpavlin 1.1 if ($row) {
190     $sql="
191     select a1.attname as attname from pg_attribute a1, pg_attribute a2 where a1.attrelid = ".$row->{indexrelid}." and a2.attrelid=".$row->{indrelid}." and a1.attname = a2.attname and a2.attnotnull";
192    
193     debug_sql($sql);
194     my $sth2 = $mdbh->prepare($sql);
195     $sth2->execute() || die;
196     @cols_pk = ();
197     while (my $row2 = $sth2->fetchrow_hashref()) {
198     push @cols_pk,$row2->{attname};
199     $in_pk{$row2->{attname}}++;
200     }
201    
202     }
203     if (! @cols_pk) {
204     print STDERR "can't find PK rows for table '$table' using all\n";
205     @cols_pk = @cols;
206     }
207    
208     my @cols_cmp; # columns to compare
209    
210     foreach my $col (@cols_test) {
211     push @cols_cmp,$col if (! $in_pk{$col});
212     }
213    
214     if ($verbose) {
215     print "table '$table' using for key: (",join(", ",@cols_pk),") to compare cols: (",join(", ",@cols_cmp),")\n";
216     }
217    
218     # diff data
219    
220     my $msql = "select ".join(",",@cols)." from $table";
221     my $ssql = $msql;
222    
223     sub sql_where {
224     my @cols = @_;
225     my $and = "";
226     my $where = " where ";
227     foreach my $col (@cols) {
228     $where .= "$and$col=?";
229     $and = " and ";
230     }
231     return $where;
232     }
233    
234     sub sql_order {
235     my @cols = @_;
236     my $order = " order by ";
237     my $comma = "";
238     foreach my $col (@cols) {
239     $order .= "$comma$col asc";
240     $comma = ", ";
241     }
242     return $order;
243     }
244    
245     my $order = sql_order(@cols_pk);
246     $msql .= $order;
247 dpavlin 1.2 $ssql .= $order;
248 dpavlin 1.1
249     debug_sql($msql);
250    
251     my $msth = $mdbh->prepare($msql) || die;
252     $msth->execute() || die;
253    
254     my $ssth = $sdbh->prepare($ssql) || die;
255 dpavlin 1.2 $ssth->execute() || die;
256 dpavlin 1.1
257     my $diff_row = 0;
258    
259     my ($mrow,$srow);
260 dpavlin 1.2 # have_*
261     use constant NO_ROW => 0;
262     use constant FETCH_ROW => 1;
263     use constant HAVE_ROW => 2;
264     my ($have_mrow,$have_srow) = (FETCH_ROW,FETCH_ROW);
265    
266     while ($have_mrow != NO_ROW || $have_srow != NO_ROW) {
267    
268     debug "have mrow: $have_mrow srow: $have_srow\n";
269    
270     sub pk_val {
271     my $row = shift || die "need row";
272     my @cols = shift || die "need cols";
273     my @val;
274     foreach my $col (@cols) {
275     push @val,$row->{$col};
276     }
277     return @val;
278     }
279 dpavlin 1.1
280     # fetch row from master
281 dpavlin 1.2 if ($have_mrow == FETCH_ROW) {
282     debug "fetch row from master: $msql\n";
283 dpavlin 1.1 $mrow = $msth->fetchrow_hashref();
284     debug_row($mrow,@cols);
285    
286     if ($mrow) {
287     # fill-in primary key values
288 dpavlin 1.2 $have_mrow = HAVE_ROW;
289 dpavlin 1.1 } else {
290 dpavlin 1.2 $have_mrow = NO_ROW;
291 dpavlin 1.1 }
292     }
293    
294     # fetch row from slave
295 dpavlin 1.2 if ($have_srow == FETCH_ROW) {
296     debug "fetch row from slave: $ssql\n";
297 dpavlin 1.1 $srow = $ssth->fetchrow_hashref();
298     debug_row($srow,@cols);
299     if ($srow) {
300 dpavlin 1.2 $have_srow = HAVE_ROW;
301 dpavlin 1.1 } else {
302 dpavlin 1.2 $have_srow = NO_ROW;
303 dpavlin 1.1 }
304     }
305    
306 dpavlin 1.2 debug "have mrow: $have_mrow srow: $have_srow\n";
307 dpavlin 1.1
308     # insert into slave database
309     sub sql_insert {
310     my $table = shift @_ || die "need table as argument";
311     my $row = shift @_ || die "need row data";
312     my @cols = @_;
313    
314     my $sql = "insert into $table (".join(",",@cols).") values (";
315     my $comma = "";
316     foreach my $col (@cols) {
317     $sql .= $comma.$mdbh->quote($row->{$col});
318     $comma = ",";
319     }
320     $sql.=")";
321     debug_sql($sql);
322     return $sql;
323     }
324    
325     # delete from slave database
326     sub sql_delete {
327     my $table = shift @_ || die "need table as argument";
328     my $row = shift @_ || die "need row as argument";
329     my @cols_pk = @_;
330    
331     my $where = sql_where(@cols_pk);
332    
333     my $sql = "delete from $table ";
334     foreach my $col (@cols_pk) {
335     my $val = $sdbh->quote($row->{$col}) || die "can't find value in row for col $col";
336     $where =~ s/\?/$val/;
337     }
338     $sql .= $where;
339     debug_sql($sql);
340     return $sql;
341     }
342    
343 dpavlin 1.3 # update row in slave database
344     sub sql_update {
345     my $table = shift @_ || die "need table as argument";
346     my $col = shift @_ || die "need col to update";
347     my $val = shift @_ || die "need new val";
348     my @cols_pk = @_ || die "need pk idenitifier";
349    
350     my $sql = "udate $table set $col=".$mdbh->quote($val);
351     debug_sql($sql);
352     return $sql;
353     }
354 dpavlin 1.1 # master slave
355     # 1 = 1 test
356     # 1 < 2 insert mrow
357     # 2 > 1 delete srow
358     # 1 = undef insert mrow
359     # undef = 1 delete srow
360    
361     my $pk_same = 1;
362    
363     # check key cols for row
364     foreach my $col (@cols_pk) {
365 dpavlin 1.2 if ( ($have_mrow == NO_ROW && $have_srow == HAVE_ROW) ||
366     ($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} gt $srow->{$col}) ) {
367 dpavlin 1.1 $diff_row++;
368     $pk_same = 0;
369 dpavlin 1.3 print STDERR "EXTRA row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($srow,@cols_pk)).")\n" if ($verbose);
370     print sql_delete($table,$srow,@cols_pk),";\n";
371 dpavlin 1.2 $have_srow = FETCH_ROW;
372 dpavlin 1.1 last;
373 dpavlin 1.2 } elsif ( ($have_mrow == HAVE_ROW && $have_srow == NO_ROW) ||
374     ($have_mrow == HAVE_ROW && $have_srow == HAVE_ROW && $mrow->{$col} lt $srow->{$col}) ) {
375 dpavlin 1.1 $diff_row++;
376     $pk_same = 0;
377 dpavlin 1.3 print STDERR "MISSING row in table '$table' pk: [".join(",",@cols_pk)."] value (".join(",",pk_val($mrow,@cols_pk)).")\n" if ($verbose);
378     print sql_insert($table,$mrow,@cols),";\n";
379 dpavlin 1.2 $have_mrow = FETCH_ROW;
380 dpavlin 1.1 last;
381     }
382     }
383    
384 dpavlin 1.2 if ($pk_same && $have_mrow == HAVE_ROW && $have_srow == HAVE_ROW) {
385 dpavlin 1.1 # check non-key cols for row
386     foreach my $col (@cols_cmp) {
387     if ($mrow->{$col} ne $srow->{$col}) {
388     $diff_row++;
389 dpavlin 1.2 print STDERR "DIFF in table '$table' row ($col): [".join(",",@cols_pk)."] '$mrow->{$col}' != '$srow->{$col}'\n" if ($verbose);
390 dpavlin 1.3 print sql_update($table,$col,$mrow->{$col},@cols_pk),";\n";
391 dpavlin 1.1 }
392     }
393 dpavlin 1.3 $have_mrow = FETCH_ROW;
394     $have_srow = FETCH_ROW;
395 dpavlin 1.1 }
396     }
397    
398     print STDERR "$diff_row differences in table $table\n" if ($verbose && $diff_row > 0);
399     $diff_total += $diff_row;
400     }
401    
402 dpavlin 1.4 if ($verbose) {
403     if ($diff_total == 0) {
404     print STDERR "databases are same";
405     } elsif ($diff_total > 0) {
406     print STDERR "$diff_total differences in all tables\n";
407     } else {
408     die "this shouldn't happend. please report a bug!";
409     }
410     }
411 dpavlin 1.1
412     $mdbh->disconnect();
413     $sdbh->disconnect();

  ViewVC Help
Powered by ViewVC 1.1.26