--- psinib.pl 2003/01/04 11:42:56 1.1 +++ psinib.pl 2003/01/04 15:59:14 1.4 @@ -12,7 +12,7 @@ # # # usage: -# $ backup.pl mountscript +# $ psinib.pl mountscript use strict 'vars'; use Data::Dumper; @@ -21,6 +21,9 @@ use List::Compare; use Filesys::SmbClient; #use Taint; +use Fcntl qw(LOCK_EX LOCK_NB); +use Digest::MD5; +use File::Basename; # configuration my $LOG_TIME_FMT = '%Y-%m-%d %H:%M:%S'; # strftime format for logfile @@ -39,6 +42,18 @@ open(L, "> $LOG") || die "can't open log $LOG: $!"; select((select(L), $|=1)[0]); # flush output +# make a lock on logfile + +my $c = 0; +{ + flock L, LOCK_EX | LOCK_NB and last; + sleep 1; + redo if ++$c < 10; + # no response for 10 sec, bail out + print STDERR "can't take lock on $LOG -- another $0 running?\n"; + exit 1; +} + # taint path: nmblookup should be there! $ENV{'PATH'} = "/usr/bin:/bin"; @@ -56,6 +71,7 @@ my $smb; my %smb_atime; my %smb_mtime; +my %file_md5; open(M, $mounts) || die "can't open $mounts: $!"; while() { @@ -84,8 +100,26 @@ push @in_backup,$share; + + my ($host,$dir,$date_dir) = share2host_dir($share); + my $bl = "$BACKUP_DEST/$host/$dir/latest"; # latest backup + my $bc = "$BACKUP_DEST/$host/$dir/$date_dir"; # current one + my $real_bl; + if (-e $bl) { + $real_bl=readlink($bl) || die "can't read link $bl: $!"; + $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/"); + if (-e $bc && $real_bl eq $bc) { + print "$share allready backuped...\n"; + $backup_ok++; + next; + } + + } + + print "working on $share\n"; + my $ip = get_ip($share); if ($ip) { @@ -104,6 +138,7 @@ #------------------------------------------------------------------------- + # get IP number from share sub get_ip { my $share = shift; @@ -116,6 +151,8 @@ } } + +# write entry to screen and log sub xlog { my $share = shift; my $t = strftime $LOG_TIME_FMT, localtime; @@ -124,16 +161,10 @@ print L "$t $share\t$m\n"; } -sub snap_share { +# split share name to host, dir and currnet date dir +sub share2host_dir { my $share = shift; - - my %param = ( debug => 0 ); - - $param{username} = shift; - $param{password} = shift; - $param{workgroup} = shift; - my ($host,$dir); if ($share =~ m#//([^/]+)/(.+)$#) { ($host,$dir) = ($1,$2); @@ -144,8 +175,22 @@ print "Can't parse share $share into host and directory!\n"; return; } + return ($host,$dir,strftime $DIR_TIME_FMT, localtime); +} + + +# make a snapshot of a share +sub snap_share { + + my $share = shift; - my $date_dir = strftime $DIR_TIME_FMT, localtime; + my %param = ( debug => 0 ); + + $param{username} = shift; + $param{password} = shift; + $param{workgroup} = shift; + + my ($host,$dir,$date_dir) = share2host_dir($share); # latest backup directory my $bl = "$BACKUP_DEST/$host/$dir/latest"; @@ -189,14 +234,12 @@ my %file_size; my %file_atime; my %file_mtime; - my %file_md5; + #my %file_md5; my @smb_files; my %smb_size; #my %smb_atime; #my %smb_mtime; - my %smb_md5; - sub norm_dir { my $foo = shift; @@ -337,6 +380,8 @@ next; } + my $md5 = Digest::MD5->new; + my $fd = $smb->open("$from/$f"); if (! $fd) { print STDERR "can't open smb file $from/$f: $!\n"; @@ -346,11 +391,14 @@ while (defined(my $b=$smb->read($fd,4096))) { print F $b; $l += length($b); + $md5->add($b); } $smb->close($fd); close(F); + $file_md5{$f} = $md5->hexdigest; + # FIX: this fails with -T my ($a,$m) = ($smb->stat("$from/$f"))[10,11]; utime $a, $m, "$to/$f" || @@ -417,14 +465,141 @@ rmdir "$bc/$_" || warn "rmdir $_: $!\n"; } - - # FIX: create .md5sum + # remove old .md5sum + foreach (sort @dirs) { + unlink "$bc/$_/.md5sum" if (-e "$bc/$_/.md5sum"); + } + + # create .md5sum + my $last_dir = ''; + my $md5; + foreach my $f (sort { $file_md5{$a}<=>$file_md5{$b} } keys %file_md5) { + my $dir = dirname($f); + my $file = basename($f); +print "$f -- $dir / $file<--\n"; + if ($dir ne $last_dir) { + close($md5) if ($md5); + open($md5, ">> $bc/$dir/.md5sum") || warn "can't create $bc/$dir/.md5sum: $!"; + $last_dir = $dir; +print STDERR "writing $last_dir/.md5sum\n"; + } + print $md5 $file_md5{$f}," $file\n"; + } + close($md5); # create leatest link - symlink $bc,$bl || warn "can't create latest symlink $bl -> $bc: $!\n"; +# symlink $bc,$bl || warn "can't create latest symlink $bl -> $bc: $!\n"; xlog($share,"backup completed..."); } +__END__ #------------------------------------------------------------------------- + +=head1 NAME + +psinib - Perl Snapshot Is Not Incremental Backup + +=head1 SYNOPSIS + +./psinib.pl + +=head1 DESCRIPTION + +This script in current version support just backup of Samba (or Micro$oft +Winblowz) shares to central disk space. Central disk space is organized in +multiple directories named after: + +=over 4 + +=item * +server which is sharing files to be backed up + +=item * +name of share on server + +=item * +dated directory named like standard ISO date format (YYYYMMDD). + +=back + +In each dated directory you will find I of all files on +exported share on that particular date. + +You can also use symlink I which will lead you to +last completed backup. After that you can use some other backup +software to transfer I to tape, CD-ROM or some other media. + +=head2 Design considerations + +Since taking of share snapshot every day requires a lot of disk space and +network bandwidth, B uses several techniques to keep disk usage and +network traffic at acceptable level: + +=over 3 + +=item - usage of hard-links to provide same files in each snapshot (as opposed +to have multiple copies of same file) + +=item - usage of file size, atime and mtime to find changes of files without +transferring whole file over network (just share browsing is transfered +over network) + +=item - usage of C<.md5sum> files (compatible with command-line utility +C to keep file between snapshots hard-linked + +=back + +=head1 CONFIGURATION + +This section is not yet written. + +=head1 HACKS, TRICKS, BUGS and LIMITATIONS + +This chapter will have all content that doesn't fit anywhere else. + +=head2 Can snapshots be more frequent than daily? + +There is not real reason why you can't take snapshot more often than +once a day. Actually, if you are using B to backup Windows +workstations you already know that they tend to come-and-go during the day +(reboots probably ;-), so running B several times a day increases +your chance of having up-to-date backup (B will not make multiple +snapshots for same day, nor will it update snapshot for current day if +it already exists). + +However, changing B to produce snapshots which are, for example, hourly +is a simple change of C<$DIR_TIME_FMT> which is currently set to +C<'%Y%m%d'> (see I documentation for explanation of that +format). If you change that to C<'%Y%m%d-%H> you can have hourly snapshots +(if your network is fast enough, that is...). Also, some of messages in +program will sound strange, but other than that it should work. +I. + +=head2 Do I really need to share every directory which I want to snapshot? + +Actually, no. Due to usage of C module, you can also +specify sub-directory inside your share that you want to backup. This feature +is most useful if you want to use administrative shares (but, have in mind +that you have to enter your Win administrator password in unencrypted file on +disk to do that) like this: + + smbmount //server/c$/WinNT/fonts /mnt -o username=administrator%win + +After that you will get directories with snapshots like: + + server/c_WinNT_fonts/yyyymmdd/.... + + +=head1 AUTHOR + +Dobrica Pavlinusic + +L + +=head1 LICENSE + +This product is licensed under GNU Public License (GPL) v2 or later. + +=cut