--- psinib.pl 2003/01/04 12:14:54 1.2 +++ psinib.pl 2003/10/12 21:46:42 1.15 @@ -12,7 +12,7 @@ # # # usage: -# $ backup.pl mountscript +# $ psinib.pl mountscript use strict 'vars'; use Data::Dumper; @@ -22,22 +22,26 @@ use Filesys::SmbClient; #use Taint; use Fcntl qw(LOCK_EX LOCK_NB); +use Digest::MD5; +use File::Basename; +use Getopt::Long; # configuration my $LOG_TIME_FMT = '%Y-%m-%d %H:%M:%S'; # strftime format for logfile my $DIR_TIME_FMT = '%Y%m%d'; # strftime format for backup dir my $LOG = '/var/log/backup.log'; # add path here... -$LOG = '/tmp/backup.log'; +#$LOG = '/tmp/backup.log'; # store backups in which directory -my $BACKUP_DEST = '/data/isis_backup'; +my $BACKUP_DEST = '/backup/isis_backup'; +#my $BACKUP_DEST = '/tmp/backup/'; # files to ignore in backup my @ignore = ('.md5sum', '.backupignore', 'backupignore.txt'); # open log -open(L, "> $LOG") || die "can't open log $LOG: $!"; +open(L, ">> $LOG") || die "can't open log $LOG: $!"; select((select(L), $|=1)[0]); # flush output # make a lock on logfile @@ -48,13 +52,19 @@ sleep 1; redo if ++$c < 10; # no response for 10 sec, bail out - print STDERR "can't take lock on $LOG -- another $0 running?\n"; + xlog("ABORT","can't take lock on $LOG -- another $0 running?"); exit 1; } # taint path: nmblookup should be there! $ENV{'PATH'} = "/usr/bin:/bin"; +my $use_ping = 1; # deault: use ping to verify that host is up + +my $result = GetOptions( + "ping!" => \$use_ping, "backupdest!" => \$BACKUP_DEST, +); + my $mounts = shift @ARGV || 'mountscript'; # die "usage: $0 mountscript"; @@ -62,13 +72,19 @@ my @in_backup; # shares which are backeduped this run -my $p = new Net::Ping->new(); +my $ping; +if ($use_ping) { + $ping = new Net::Ping->new("tcp", 2); + # ping will try tcp connect to netbios-ssn (139) + $ping->{port_num} = getservbyname("netbios-ssn", "tcp"); +} my $backup_ok = 0; my $smb; my %smb_atime; my %smb_mtime; +my %file_md5; open(M, $mounts) || die "can't open $mounts: $!"; while() { @@ -76,7 +92,7 @@ next if !/^\s*smbmount\s/; my (undef,$share,undef,$opt) = split(/\s+/,$_,4); - my ($user,$passwd,$workgroup); + my ($user,$passwd,$workgroup,$ip); foreach (split(/,/,$opt)) { my ($n,$v) = split(/=/,$_,2); @@ -92,20 +108,41 @@ } } elsif ($n =~ m#workgroup#i) { $workgroup = $v; + } elsif ($n =~ m#ip#i) { + $ip = $v; } } push @in_backup,$share; + + my ($host,$dir,$date_dir) = share2host_dir($share); + my $bl = "$BACKUP_DEST/$host/$dir/latest"; # latest backup + my $bc = "$BACKUP_DEST/$host/$dir/$date_dir"; # current one + my $real_bl; + if (-l $bl) { + $real_bl=readlink($bl) || die "can't read link $bl: $!"; + $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/"); + if (-l $bc && $real_bl eq $bc) { + print "$share allready backuped...\n"; + $backup_ok++; + next; + } + + } + + print "working on $share\n"; - my $ip = get_ip($share); + # try to nmblookup IP + $ip = get_ip($share) if (! $ip); if ($ip) { xlog($share,"IP is $ip"); - if ($p->ping($ip)) { - snap_share($share,$user,$passwd,$workgroup); - $backup_ok++; + if (($use_ping && $ping->ping($ip)) || 1) { + if (snap_share($share,$user,$passwd,$workgroup)) { + $backup_ok++; + } } } } @@ -117,6 +154,7 @@ #------------------------------------------------------------------------- + # get IP number from share sub get_ip { my $share = shift; @@ -129,6 +167,8 @@ } } + +# write entry to screen and log sub xlog { my $share = shift; my $t = strftime $LOG_TIME_FMT, localtime; @@ -137,16 +177,14 @@ print L "$t $share\t$m\n"; } -sub snap_share { +# dump warn and dies into log +BEGIN { $SIG{'__WARN__'} = sub { xlog('WARN',$_[0]) ; warn $_[0] } } +BEGIN { $SIG{'__DIE__'} = sub { xlog('DIE',$_[0]) ; die $_[0] } } - my $share = shift; - - my %param = ( debug => 0 ); - - $param{username} = shift; - $param{password} = shift; - $param{workgroup} = shift; +# split share name to host, dir and currnet date dir +sub share2host_dir { + my $share = shift; my ($host,$dir); if ($share =~ m#//([^/]+)/(.+)$#) { ($host,$dir) = ($1,$2); @@ -157,8 +195,22 @@ print "Can't parse share $share into host and directory!\n"; return; } + return ($host,$dir,strftime $DIR_TIME_FMT, localtime); +} - my $date_dir = strftime $DIR_TIME_FMT, localtime; + +# make a snapshot of a share +sub snap_share { + + my $share = shift; + + my %param = ( debug => 0 ); + + $param{username} = shift || warn "can't find username for share $share"; + $param{password} = shift || warn "can't find passwod for share $share"; + $param{workgroup} = shift || warn "can't find workgroup for share $share"; + + my ($host,$dir,$date_dir) = share2host_dir($share); # latest backup directory my $bl = "$BACKUP_DEST/$host/$dir/latest"; @@ -166,16 +218,30 @@ my $bc = "$BACKUP_DEST/$host/$dir/$date_dir"; my $real_bl; - if (-e $bl) { + if (-l $bl) { $real_bl=readlink($bl) || die "can't read link $bl: $!"; $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/"); } else { - print "no old backup, this is first run...\n"; + print "no old backup, trying to find last backup, "; + if (opendir(BL_DIR, "$BACKUP_DEST/$host/$dir")) { + my @bl_dirs = sort grep { !/^\./ && -d "$BACKUP_DEST/$host/$dir/$_" } readdir(BL_DIR); + closedir(BL_DIR); + $real_bl=pop @bl_dirs; + print "using $real_bl as latest...\n"; + $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/"); + if ($real_bl eq $bc) { + xlog($share,"latest from today (possible partial backup)"); + rename $real_bl,$real_bl.".partial" || warn "can't reaname partial backup: $!"; + $real_bl .= ".partial"; + } + } else { + print "this is first run...\n"; + } } - if (-e $bc && $real_bl && $real_bl eq $bc) { + if (-l $bc && $real_bl && $real_bl eq $bc) { print "$share allready backuped...\n"; - return; + return 1; } die "You should really create BACKUP_DEST [$BACKUP_DEST] by hand! " if (!-e $BACKUP_DEST); @@ -202,14 +268,13 @@ my %file_size; my %file_atime; my %file_mtime; - my %file_md5; + #my %file_md5; + %file_md5 = (); my @smb_files; my %smb_size; #my %smb_atime; #my %smb_mtime; - my %smb_md5; - sub norm_dir { my $foo = shift; @@ -225,24 +290,24 @@ my $di = 0; while ($di <= $#dirs && $real_bl) { my $d=$dirs[$di++]; - opendir(DIR,"$bl/$d") || warn "opendir($bl/$d): $!\n"; + opendir(DIR,"$real_bl/$d") || warn "opendir($real_bl/$d): $!\n"; # read .backupignore if exists - if (-f "$bl/$d/.backupignore") { - open(I,"$bl/$d/.backupignore"); + if (-f "$real_bl/$d/.backupignore") { + open(I,"$real_bl/$d/.backupignore"); while() { chomp; push @ignore,norm_dir("$d/$_"); } close(I); -print STDERR "ignore: ",join("|",@ignore),"\n"; - link "$bl/$d/.backupignore","$bc/$d/.backupignore" || - warn "can't copy $bl/$d/.backupignore to current backup dir: $!\n"; +#print STDERR "ignore: ",join("|",@ignore),"\n"; + link "$real_bl/$d/.backupignore","$bc/$d/.backupignore" || + warn "can't copy $real_bl/$d/.backupignore to current backup dir: $!\n"; } # read .md5sum if exists - if (-f "$bl/$d/.md5sum") { - open(I,"$bl/$d/.md5sum"); + if (-f "$real_bl/$d/.md5sum") { + open(I,"$real_bl/$d/.md5sum"); while() { chomp; my ($md5,$f) = split(/\s+/,$_,2); @@ -256,7 +321,7 @@ next if ($f eq '.'); next if ($f eq '..'); my $pr = norm_dir("$d/$f"); # path relative - my $pf = norm_dir("$d/$f","$bl/"); # path full + my $pf = norm_dir("$d/$f","$real_bl/"); # path full if (grep(/^\Q$pr\E$/,@ignore) == 0) { if (-f $pf) { push @files,$pr; @@ -266,7 +331,7 @@ } elsif (-d $pf) { push @dirs,$pr; } else { - print STDERR "unknown type: $pf\n"; + print STDERR "not file or directory: $pf\n"; } } else { print STDERR "ignored: $pr\n"; @@ -274,7 +339,8 @@ } } - xlog($share,($#files+1)." files and ".($#dirs+1)." dirs on local disk before backup"); + # local dir always include / + xlog($share,($#files+1)." files and ".($#dirs)." dirs on local disk before backup"); # read smb filesystem @@ -285,9 +351,16 @@ $di = 0; while ($di <= $#smb_dirs) { - my $d=$smb_dirs[$di++]; + my $d=$smb_dirs[$di]; my $pf = norm_dir($d,"smb:$share/"); # path full - my $D = $smb->opendir($pf) || warn "smb->opendir($pf): $!\n"; + my $D = $smb->opendir($pf); + if (! $D) { + xlog($share,"FATAL: $share [$pf]: $!"); + # remove failing dir + delete $smb_dirs[$di]; + return 0; # failed + } + $di++; my @clutter = $smb->readdir_struct($D); foreach my $item (@clutter) { @@ -305,7 +378,7 @@ } elsif ($item->[0] == main::SMBC_DIR) { push @smb_dirs,$pr; } else { - print STDERR "unknown type: $pf\n"; + print STDERR "not file or directory [",$item->[0],"]: $pf\n"; } } else { print STDERR "smb ignored: $pr\n"; @@ -313,7 +386,7 @@ } } - xlog($share,($#smb_files+1)." files and ".($#smb_dirs+1)." dirs on remote share"); + xlog($share,($#smb_files+1)." files and ".($#smb_dirs)." dirs on remote share"); # sync dirs my $lc = List::Compare->new(\@dirs, \@smb_dirs); @@ -345,25 +418,30 @@ foreach my $f (@_) { #print "smb_copy $from/$f -> $to/$f\n"; - if (! open(F,"> $to/$f")) { - print STDERR "can't open new file $to/$f: $!\n"; - next; - } + my $md5 = Digest::MD5->new; my $fd = $smb->open("$from/$f"); if (! $fd) { - print STDERR "can't open smb file $from/$f: $!\n"; + xlog("WARNING","can't open smb file $from/$f: $!"); + next; + } + + if (! open(F,"> $to/$f")) { + xlog("WARNING","can't open new file $to/$f: $!"); next; } while (defined(my $b=$smb->read($fd,4096))) { print F $b; $l += length($b); + $md5->add($b); } $smb->close($fd); close(F); + $file_md5{$f} = $md5->hexdigest; + # FIX: this fails with -T my ($a,$m) = ($smb->stat("$from/$f"))[10,11]; utime $a, $m, "$to/$f" || @@ -417,12 +495,13 @@ xlog($share,"$transfer bytes transfered..."); foreach (@ln_files) { - link "$bl/$_","$bc/$_" || warn "link $bl/$_ -> $bc/$_: $!\n"; + link "$real_bl/$_","$bc/$_" || warn "link $real_bl/$_ -> $bc/$_: $!\n"; } # remove files foreach (sort @files2erase) { unlink "$bc/$_" || warn "unlink $_: $!\n"; + delete $file_md5{$_}; } # remove not needed dirs (after files) @@ -430,14 +509,196 @@ rmdir "$bc/$_" || warn "rmdir $_: $!\n"; } - - # FIX: create .md5sum + # remove old .md5sum + foreach (sort @dirs) { + unlink "$bc/$_/.md5sum" if (-e "$bc/$_/.md5sum"); + } + + # erase stale entries in .md5sum + my @md5_files = keys %file_md5; + $lc = List::Compare->new(\@md5_files, \@smb_files); + foreach my $file ($lc->get_Lonly) { + xlog("NOTICE","removing stale '$file' from .md5sum"); + delete $file_md5{$file}; + } + + # create .md5sum + my $last_dir = ''; + my $md5; + foreach my $f (sort { $file_md5{$a} cmp $file_md5{$b} } keys %file_md5) { + my $dir = dirname($f); + my $file = basename($f); +#print "$f -- $dir / $file<--\n"; + if ($dir ne $last_dir) { + close($md5) if ($md5); + open($md5, ">> $bc/$dir/.md5sum") || warn "can't create $bc/$dir/.md5sum: $!"; + $last_dir = $dir; +#print STDERR "writing $last_dir/.md5sum\n"; + } + print $md5 $file_md5{$f}," $file\n"; + } + close($md5) if ($md5); # create leatest link +#print "ln -s $bc $real_bl\n"; + if (-l $bl) { + unlink $bl || warn "can't remove old latest symlink $bl: $!\n"; + } symlink $bc,$bl || warn "can't create latest symlink $bl -> $bc: $!\n"; + # FIX: sanity check -- remove for speedup + xlog($share,"failed to create latest symlink $bl -> $bc...") if (readlink($bl) ne $bc || ! -l $bl); + xlog($share,"backup completed..."); -} + return 1; +} +__END__ #------------------------------------------------------------------------- + +=head1 NAME + +psinib - Perl Snapshot Is Not Incremental Backup + +=head1 SYNOPSIS + +./psinib.pl + +=head1 DESCRIPTION + +This script in current version support just backup of Samba (or Micro$oft +Winblowz) shares to central disk space. Central disk space is organized in +multiple directories named after: + +=over 4 + +=item * +server which is sharing files to be backed up + +=item * +name of share on server + +=item * +dated directory named like standard ISO date format (YYYYMMDD). + +=back + +In each dated directory you will find I of all files on +exported share on that particular date. + +You can also use symlink I which will lead you to +last completed backup. After that you can use some other backup +software to transfer I to tape, CD-ROM or some other media. + +=head2 Design considerations + +Since taking of share snapshot every day requires a lot of disk space and +network bandwidth, B uses several techniques to keep disk usage and +network traffic at acceptable level: + +=over 3 + +=item - usage of hard-links to provide same files in each snapshot (as opposed +to have multiple copies of same file) + +=item - usage of file size, atime and mtime to find changes of files without +transferring whole file over network (just share browsing is transfered +over network) + +=item - usage of C<.md5sum> files (compatible with command-line utility +C) to keep file between snapshots hard-linked + +=back + +=head1 CONFIGURATION + +This section is not yet written. + +=head1 HACKS, TRICKS, BUGS and LIMITATIONS + +This chapter will have all content that doesn't fit anywhere else. + +=head2 Can snapshots be more frequent than daily? + +There is not real reason why you can't take snapshot more often than +once a day. Actually, if you are using B to backup Windows +workstations you already know that they tend to come-and-go during the day +(reboots probably ;-), so running B several times a day increases +your chance of having up-to-date backup (B will not make multiple +snapshots for same day, nor will it update snapshot for current day if +it already exists). + +However, changing B to produce snapshots which are, for example, hourly +is a simple change of C<$DIR_TIME_FMT> which is currently set to +C<'%Y%m%d'> (see I documentation for explanation of that +format). If you change that to C<'%Y%m%d-%H> you can have hourly snapshots +(if your network is fast enough, that is...). Also, some of messages in +program will sound strange, but other than that it should work. +I. + +=head2 Do I really need to share every directory which I want to snapshot? + +Actually, no. Due to usage of C module, you can also +specify sub-directory inside your share that you want to backup. This feature +is most useful if you want to use administrative shares (but, have in mind +that you have to enter your Win administrator password in unencrypted file on +disk to do that) like this: + + smbmount //server/c$/WinNT/fonts /mnt -o username=administrator%win + +After that you will get directories with snapshots like: + + server/c_WinNT_fonts/yyyymmdd/.... + +=head2 Won't I run out of disk space? + +Of course you will... Snapshots and logfiles will eventually fill-up your disk. +However, you can do two things to stop that: + +=head3 Clean snapshort older than x days + +You can add following command to your C crontab: + + find /backup/isis_backup -type d -mindepth 3 -maxdepth 3 -mtime +11 -exec rm -Rf {} \; + +I assume that C is directory in which are your snapshots +and that you don't want to keep snapshots older than 11 days (that's +C<-mtime +11> part of command). + +=head3 Rotate your logs + +I will leave that to you. I relay on GNU/Debian's C to do it for me. + +=head2 What are I directories? + +If there isn't I symlink in snapshot directory, it's preatty safe to +assume that previous backup from that day failed. So, that directory will +be renamed to I and snapshot will be performed again, +linking same files (other alternative would be to erase that dir and find +second-oldest directory, but this seemed like more correct approach). + +=head2 I can't connect to any share + +Please verify that nmblookup (which is part of samba package) is in /bin or +/usr/bin. Also verify that nmblookup returns IP address for your server +using: + + $ nmblookup tvhouse + querying tvhouse on 192.168.34.255 + 192.168.34.30 tvhouse<00> + +If you don't get any output, your samba might not listen to correct interface +(see interfaces in smb.conf). + +=head1 AUTHOR + +Dobrica Pavlinusic + +L + +=head1 LICENSE + +This product is licensed under GNU Public License (GPL) v2 or later. + +=cut