/[psinib]/psinib.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /psinib.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.14 - (show annotations)
Sun Oct 12 18:47:35 2003 UTC (20 years, 5 months ago) by dpavlin
Branch: MAIN
Changes since 1.13: +14 -4 lines
File MIME type: text/plain
command-line options

1 #!/usr/bin/perl -w
2 #
3 # psinib - Perl Snapshot Is Not Incremental Backup
4 #
5 # written by Dobrica Pavlinusic <dpavlin@rot13.org> 2003-01-03
6 # released under GPL v2 or later.
7 #
8 # Backup SMB directories using file produced by LinNeighbourhood (or some
9 # other program [vi :-)] which produces file in format:
10 #
11 # smbmount service mountpoint options
12 #
13 #
14 # usage:
15 # $ psinib.pl mountscript
16
17 use strict 'vars';
18 use Data::Dumper;
19 use Net::Ping;
20 use POSIX qw(strftime);
21 use List::Compare;
22 use Filesys::SmbClient;
23 #use Taint;
24 use Fcntl qw(LOCK_EX LOCK_NB);
25 use Digest::MD5;
26 use File::Basename;
27 use Getopt::Long;
28
29 # configuration
30 my $LOG_TIME_FMT = '%Y-%m-%d %H:%M:%S'; # strftime format for logfile
31 my $DIR_TIME_FMT = '%Y%m%d'; # strftime format for backup dir
32
33 my $LOG = '/var/log/backup.log'; # add path here...
34 #$LOG = '/tmp/backup.log';
35
36 # store backups in which directory
37 my $BACKUP_DEST = '/backup/isis_backup';
38 #my $BACKUP_DEST = '/tmp/backup/';
39
40 # files to ignore in backup
41 my @ignore = ('.md5sum', '.backupignore', 'backupignore.txt');
42
43 # open log
44 open(L, ">> $LOG") || die "can't open log $LOG: $!";
45 select((select(L), $|=1)[0]); # flush output
46
47 # make a lock on logfile
48
49 my $c = 0;
50 {
51 flock L, LOCK_EX | LOCK_NB and last;
52 sleep 1;
53 redo if ++$c < 10;
54 # no response for 10 sec, bail out
55 xlog("ABORT","can't take lock on $LOG -- another $0 running?");
56 exit 1;
57 }
58
59 # taint path: nmblookup should be there!
60 $ENV{'PATH'} = "/usr/bin:/bin";
61
62 my $use_ping = 1; # deault: use ping to verify that host is up
63
64 my $result = GetOptions(
65 "ping!" => \$use_ping, "backupdest!" => \$BACKUP_DEST,
66 );
67
68 my $mounts = shift @ARGV ||
69 'mountscript';
70 # die "usage: $0 mountscript";
71
72
73 my @in_backup; # shares which are backeduped this run
74
75 my $ping;
76 if ($use_ping) {
77 $ping = new Net::Ping->new("tcp", 2);
78 # ping will try tcp connect to netbios-ssn (139)
79 $ping->{port_num} = getservbyname("netbios-ssn", "tcp");
80 }
81
82 my $backup_ok = 0;
83
84 my $smb;
85 my %smb_atime;
86 my %smb_mtime;
87 my %file_md5;
88
89 open(M, $mounts) || die "can't open $mounts: $!";
90 while(<M>) {
91 chomp;
92 next if !/^\s*smbmount\s/;
93 my (undef,$share,undef,$opt) = split(/\s+/,$_,4);
94
95 my ($user,$passwd,$workgroup,$ip);
96
97 foreach (split(/,/,$opt)) {
98 my ($n,$v) = split(/=/,$_,2);
99 if ($n =~ m/username/i) {
100 if ($v =~ m#^(.+)/(.+)%(.+)$#) {
101 ($user,$passwd,$workgroup) = ($1,$2,$3);
102 } elsif ($v =~ m#^(.+)/(.+)$#) {
103 ($user,$workgroup) = ($1,$2);
104 } elsif ($v =~ m#^(.+)%(.+)$#) {
105 ($user,$passwd) = ($1,$2);
106 } else {
107 $user = $v;
108 }
109 } elsif ($n =~ m#workgroup#i) {
110 $workgroup = $v;
111 } elsif ($n =~ m#ip#i) {
112 $ip = $v;
113 }
114 }
115
116 push @in_backup,$share;
117
118
119 my ($host,$dir,$date_dir) = share2host_dir($share);
120 my $bl = "$BACKUP_DEST/$host/$dir/latest"; # latest backup
121 my $bc = "$BACKUP_DEST/$host/$dir/$date_dir"; # current one
122 my $real_bl;
123 if (-l $bl) {
124 $real_bl=readlink($bl) || die "can't read link $bl: $!";
125 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
126 if (-l $bc && $real_bl eq $bc) {
127 print "$share allready backuped...\n";
128 $backup_ok++;
129 next;
130 }
131
132 }
133
134
135 print "working on $share\n";
136
137 # try to nmblookup IP
138 $ip = get_ip($share) if (! $ip);
139
140 if ($ip) {
141 xlog($share,"IP is $ip");
142 if (($use_ping && $ping->ping($ip)) || 1) {
143 if (snap_share($share,$user,$passwd,$workgroup)) {
144 $backup_ok++;
145 }
146 }
147 }
148 }
149 close(M);
150
151 xlog("","$backup_ok backups completed of total ".($#in_backup+1)." this time (".int($backup_ok*100/($#in_backup+1))." %)");
152
153 1;
154
155 #-------------------------------------------------------------------------
156
157
158 # get IP number from share
159 sub get_ip {
160 my $share = shift;
161
162 my $host = $1 if ($share =~ m#//([^/]+)/#);
163
164 my $ip = `nmblookup $host`;
165 if ($ip =~ m/(\d+\.\d+\.\d+\.\d+)\s$host/i) {
166 return $1;
167 }
168 }
169
170
171 # write entry to screen and log
172 sub xlog {
173 my $share = shift;
174 my $t = strftime $LOG_TIME_FMT, localtime;
175 my $m = shift || '[no log entry]';
176 print STDERR $m,"\n";
177 print L "$t $share\t$m\n";
178 }
179
180 # dump warn and dies into log
181 BEGIN { $SIG{'__WARN__'} = sub { xlog('WARN',$_[0]) ; warn $_[0] } }
182 BEGIN { $SIG{'__DIE__'} = sub { xlog('DIE',$_[0]) ; die $_[0] } }
183
184
185 # split share name to host, dir and currnet date dir
186 sub share2host_dir {
187 my $share = shift;
188 my ($host,$dir);
189 if ($share =~ m#//([^/]+)/(.+)$#) {
190 ($host,$dir) = ($1,$2);
191 $dir =~ s/\W/_/g;
192 $dir =~ s/^_+//;
193 $dir =~ s/_+$//;
194 } else {
195 print "Can't parse share $share into host and directory!\n";
196 return;
197 }
198 return ($host,$dir,strftime $DIR_TIME_FMT, localtime);
199 }
200
201
202 # make a snapshot of a share
203 sub snap_share {
204
205 my $share = shift;
206
207 my %param = ( debug => 0 );
208
209 $param{username} = shift || warn "can't find username for share $share";
210 $param{password} = shift || warn "can't find passwod for share $share";
211 $param{workgroup} = shift || warn "can't find workgroup for share $share";
212
213 my ($host,$dir,$date_dir) = share2host_dir($share);
214
215 # latest backup directory
216 my $bl = "$BACKUP_DEST/$host/$dir/latest";
217 # current backup directory
218 my $bc = "$BACKUP_DEST/$host/$dir/$date_dir";
219
220 my $real_bl;
221 if (-l $bl) {
222 $real_bl=readlink($bl) || die "can't read link $bl: $!";
223 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
224 } else {
225 print "no old backup, trying to find last backup, ";
226 if (opendir(BL_DIR, "$BACKUP_DEST/$host/$dir")) {
227 my @bl_dirs = sort grep { !/^\./ && -d "$BACKUP_DEST/$host/$dir/$_" } readdir(BL_DIR);
228 closedir(BL_DIR);
229 $real_bl=pop @bl_dirs;
230 print "using $real_bl as latest...\n";
231 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
232 if ($real_bl eq $bc) {
233 xlog($share,"latest from today (possible partial backup)");
234 rename $real_bl,$real_bl.".partial" || warn "can't reaname partial backup: $!";
235 $real_bl .= ".partial";
236 }
237 } else {
238 print "this is first run...\n";
239 }
240 }
241
242 if (-l $bc && $real_bl && $real_bl eq $bc) {
243 print "$share allready backuped...\n";
244 return 1;
245 }
246
247 die "You should really create BACKUP_DEST [$BACKUP_DEST] by hand! " if (!-e $BACKUP_DEST);
248
249 if (! -e "$BACKUP_DEST/$host") {
250 mkdir "$BACKUP_DEST/$host" || die "can't make dir for host $host, $BACKUP_DEST/$host: $!";
251 print "created host directory $BACKUP_DEST/$host...\n";
252 }
253
254 if (! -e "$BACKUP_DEST/$host/$dir") {
255 mkdir "$BACKUP_DEST/$host/$dir" || die "can't make dir for share $share, $BACKUP_DEST/$host/$dir $!";
256 print "created dir for share $share, $BACKUP_DEST/$host/$dir...\n";
257 }
258
259 mkdir $bc || die "can't make dir for current backup $bc: $!";
260
261 my @dirs = ( "/" );
262 my @smb_dirs = ( "/" );
263
264 my $transfer = 0; # bytes transfered over network
265
266 # this will store all available files and sizes
267 my @files;
268 my %file_size;
269 my %file_atime;
270 my %file_mtime;
271 #my %file_md5;
272 %file_md5 = ();
273
274 my @smb_files;
275 my %smb_size;
276 #my %smb_atime;
277 #my %smb_mtime;
278
279 sub norm_dir {
280 my $foo = shift;
281 my $prefix = shift;
282 $foo =~ s#//+#/#g;
283 $foo =~ s#/+$##g;
284 $foo =~ s#^/+##g;
285 return $prefix.$foo if ($prefix);
286 return $foo;
287 }
288
289 # read local filesystem
290 my $di = 0;
291 while ($di <= $#dirs && $real_bl) {
292 my $d=$dirs[$di++];
293 opendir(DIR,"$real_bl/$d") || warn "opendir($real_bl/$d): $!\n";
294
295 # read .backupignore if exists
296 if (-f "$real_bl/$d/.backupignore") {
297 open(I,"$real_bl/$d/.backupignore");
298 while(<I>) {
299 chomp;
300 push @ignore,norm_dir("$d/$_");
301 }
302 close(I);
303 #print STDERR "ignore: ",join("|",@ignore),"\n";
304 link "$real_bl/$d/.backupignore","$bc/$d/.backupignore" ||
305 warn "can't copy $real_bl/$d/.backupignore to current backup dir: $!\n";
306 }
307
308 # read .md5sum if exists
309 if (-f "$real_bl/$d/.md5sum") {
310 open(I,"$real_bl/$d/.md5sum");
311 while(<I>) {
312 chomp;
313 my ($md5,$f) = split(/\s+/,$_,2);
314 $file_md5{$f}=$md5;
315 }
316 close(I);
317 }
318
319 my @clutter = readdir(DIR);
320 foreach my $f (@clutter) {
321 next if ($f eq '.');
322 next if ($f eq '..');
323 my $pr = norm_dir("$d/$f"); # path relative
324 my $pf = norm_dir("$d/$f","$real_bl/"); # path full
325 if (grep(/^\Q$pr\E$/,@ignore) == 0) {
326 if (-f $pf) {
327 push @files,$pr;
328 $file_size{$pr}=(stat($pf))[7];
329 $file_atime{$pr}=(stat($pf))[8];
330 $file_mtime{$pr}=(stat($pf))[9];
331 } elsif (-d $pf) {
332 push @dirs,$pr;
333 } else {
334 print STDERR "not file or directory: $pf\n";
335 }
336 } else {
337 print STDERR "ignored: $pr\n";
338 }
339 }
340 }
341
342 # local dir always include /
343 xlog($share,($#files+1)." files and ".($#dirs)." dirs on local disk before backup");
344
345 # read smb filesystem
346
347 xlog($share,"smb to $share as $param{username}/$param{workgroup}");
348
349 # FIX: how to aviod creation of ~/.smb/smb.conf ?
350 $smb = new Filesys::SmbClient(%param) || die "SmbClient :$!\n";
351
352 $di = 0;
353 while ($di <= $#smb_dirs) {
354 my $d=$smb_dirs[$di];
355 my $pf = norm_dir($d,"smb:$share/"); # path full
356 my $D = $smb->opendir($pf);
357 if (! $D) {
358 xlog($share,"FATAL: $share [$pf]: $!");
359 # remove failing dir
360 delete $smb_dirs[$di];
361 return 0; # failed
362 }
363 $di++;
364
365 my @clutter = $smb->readdir_struct($D);
366 foreach my $item (@clutter) {
367 my $f = $item->[1];
368 next if ($f eq '.');
369 next if ($f eq '..');
370 my $pr = norm_dir("$d/$f"); # path relative
371 my $pf = norm_dir("$d/$f","smb:$share/"); # path full
372 if (grep(/^\Q$pr\E$/,@ignore) == 0) {
373 if ($item->[0] == main::SMBC_FILE) {
374 push @smb_files,$pr;
375 $smb_size{$pr}=($smb->stat($pf))[7];
376 $smb_atime{$pr}=($smb->stat($pf))[10];
377 $smb_mtime{$pr}=($smb->stat($pf))[11];
378 } elsif ($item->[0] == main::SMBC_DIR) {
379 push @smb_dirs,$pr;
380 } else {
381 print STDERR "not file or directory [",$item->[0],"]: $pf\n";
382 }
383 } else {
384 print STDERR "smb ignored: $pr\n";
385 }
386 }
387 }
388
389 xlog($share,($#smb_files+1)." files and ".($#smb_dirs)." dirs on remote share");
390
391 # sync dirs
392 my $lc = List::Compare->new(\@dirs, \@smb_dirs);
393
394 my @dirs2erase = $lc->get_Lonly;
395 my @dirs2create = $lc->get_Ronly;
396 xlog($share,($#dirs2erase+1)." dirs to erase and ".($#dirs2create+1)." dirs to create");
397
398 # create new dirs
399 foreach (sort @smb_dirs) {
400 mkdir "$bc/$_" || warn "mkdir $_: $!\n";
401 }
402
403 # sync files
404 $lc = List::Compare->new(\@files, \@smb_files);
405
406 my @files2erase = $lc->get_Lonly;
407 my @files2create = $lc->get_Ronly;
408 xlog($share,($#files2erase+1)." files to erase and ".($#files2create+1)." files to create");
409
410 sub smb_copy {
411 my $smb = shift;
412
413 my $from = shift;
414 my $to = shift;
415
416
417 my $l = 0;
418
419 foreach my $f (@_) {
420 #print "smb_copy $from/$f -> $to/$f\n";
421 my $md5 = Digest::MD5->new;
422
423 my $fd = $smb->open("$from/$f");
424 if (! $fd) {
425 xlog("WARNING","can't open smb file $from/$f: $!");
426 next;
427 }
428
429 if (! open(F,"> $to/$f")) {
430 xlog("WARNING","can't open new file $to/$f: $!");
431 next;
432 }
433
434 while (defined(my $b=$smb->read($fd,4096))) {
435 print F $b;
436 $l += length($b);
437 $md5->add($b);
438 }
439
440 $smb->close($fd);
441 close(F);
442
443 $file_md5{$f} = $md5->hexdigest;
444
445 # FIX: this fails with -T
446 my ($a,$m) = ($smb->stat("$from/$f"))[10,11];
447 utime $a, $m, "$to/$f" ||
448 warn "can't update utime on $to/$f: $!\n";
449
450 }
451 return $l;
452 }
453
454 # copy new files
455 foreach (@files2create) {
456 $transfer += smb_copy($smb,"smb:$share",$bc,$_);
457 }
458
459 my $size_sync = 0;
460 my $atime_sync = 0;
461 my $mtime_sync = 0;
462 my @sync_files;
463 my @ln_files;
464
465 foreach ($lc->get_intersection) {
466
467 my $f;
468
469 if ($file_size{$_} != $smb_size{$_}) {
470 $f=$_;
471 $size_sync++;
472 }
473 if ($file_atime{$_} != $smb_atime{$_}) {
474 $f=$_;
475 $atime_sync++;
476 }
477 if ($file_mtime{$_} != $smb_mtime{$_}) {
478 $f=$_;
479 $mtime_sync++;
480 }
481
482 if ($f) {
483 push @sync_files, $f;
484 } else {
485 push @ln_files, $_;
486 }
487 }
488
489 xlog($share,($#sync_files+1)." files will be updated (diff: $size_sync size, $atime_sync atime, $mtime_sync mtime), ".($#ln_files+1)." will be linked.");
490
491 foreach (@sync_files) {
492 $transfer += smb_copy($smb,"smb:$share",$bc,$_);
493 }
494
495 xlog($share,"$transfer bytes transfered...");
496
497 foreach (@ln_files) {
498 link "$real_bl/$_","$bc/$_" || warn "link $real_bl/$_ -> $bc/$_: $!\n";
499 }
500
501 # remove files
502 foreach (sort @files2erase) {
503 unlink "$bc/$_" || warn "unlink $_: $!\n";
504 delete $file_md5{$_};
505 }
506
507 # remove not needed dirs (after files)
508 foreach (sort @dirs2erase) {
509 rmdir "$bc/$_" || warn "rmdir $_: $!\n";
510 }
511
512 # remove old .md5sum
513 foreach (sort @dirs) {
514 unlink "$bc/$_/.md5sum" if (-e "$bc/$_/.md5sum");
515 }
516
517 # erase stale entries in .md5sum
518 my @md5_files = keys %file_md5;
519 $lc = List::Compare->new(\@md5_files, \@smb_files);
520 foreach my $file ($lc->get_Lonly) {
521 xlog("NOTICE","removing stale '$file' from .md5sum");
522 delete $file_md5{$file};
523 }
524
525 # create .md5sum
526 my $last_dir = '';
527 my $md5;
528 foreach my $f (sort { $file_md5{$a} cmp $file_md5{$b} } keys %file_md5) {
529 my $dir = dirname($f);
530 my $file = basename($f);
531 #print "$f -- $dir / $file<--\n";
532 if ($dir ne $last_dir) {
533 close($md5) if ($md5);
534 open($md5, ">> $bc/$dir/.md5sum") || warn "can't create $bc/$dir/.md5sum: $!";
535 $last_dir = $dir;
536 #print STDERR "writing $last_dir/.md5sum\n";
537 }
538 print $md5 $file_md5{$f}," $file\n";
539 }
540 close($md5) if ($md5);
541
542 # create leatest link
543 #print "ln -s $bc $real_bl\n";
544 if (-l $bl) {
545 unlink $bl || warn "can't remove old latest symlink $bl: $!\n";
546 }
547 symlink $bc,$bl || warn "can't create latest symlink $bl -> $bc: $!\n";
548
549 # FIX: sanity check -- remove for speedup
550 xlog($share,"failed to create latest symlink $bl -> $bc...") if (readlink($bl) ne $bc || ! -l $bl);
551
552 xlog($share,"backup completed...");
553
554 return 1;
555 }
556 __END__
557 #-------------------------------------------------------------------------
558
559
560 =head1 NAME
561
562 psinib - Perl Snapshot Is Not Incremental Backup
563
564 =head1 SYNOPSIS
565
566 ./psinib.pl
567
568 =head1 DESCRIPTION
569
570 This script in current version support just backup of Samba (or Micro$oft
571 Winblowz) shares to central disk space. Central disk space is organized in
572 multiple directories named after:
573
574 =over 4
575
576 =item *
577 server which is sharing files to be backed up
578
579 =item *
580 name of share on server
581
582 =item *
583 dated directory named like standard ISO date format (YYYYMMDD).
584
585 =back
586
587 In each dated directory you will find I<snapshot> of all files on
588 exported share on that particular date.
589
590 You can also use symlink I<latest> which will lead you to
591 last completed backup. After that you can use some other backup
592 software to transfer I<snapshot> to tape, CD-ROM or some other media.
593
594 =head2 Design considerations
595
596 Since taking of share snapshot every day requires a lot of disk space and
597 network bandwidth, B<psinib> uses several techniques to keep disk usage and
598 network traffic at acceptable level:
599
600 =over 3
601
602 =item - usage of hard-links to provide same files in each snapshot (as opposed
603 to have multiple copies of same file)
604
605 =item - usage of file size, atime and mtime to find changes of files without
606 transferring whole file over network (just share browsing is transfered
607 over network)
608
609 =item - usage of C<.md5sum> files (compatible with command-line utility
610 C<md5sum>) to keep file between snapshots hard-linked
611
612 =back
613
614 =head1 CONFIGURATION
615
616 This section is not yet written.
617
618 =head1 HACKS, TRICKS, BUGS and LIMITATIONS
619
620 This chapter will have all content that doesn't fit anywhere else.
621
622 =head2 Can snapshots be more frequent than daily?
623
624 There is not real reason why you can't take snapshot more often than
625 once a day. Actually, if you are using B<psinib> to backup Windows
626 workstations you already know that they tend to come-and-go during the day
627 (reboots probably ;-), so running B<psinib> several times a day increases
628 your chance of having up-to-date backup (B<psinib> will not make multiple
629 snapshots for same day, nor will it update snapshot for current day if
630 it already exists).
631
632 However, changing B<psinib> to produce snapshots which are, for example, hourly
633 is a simple change of C<$DIR_TIME_FMT> which is currently set to
634 C<'%Y%m%d'> (see I<strftime> documentation for explanation of that
635 format). If you change that to C<'%Y%m%d-%H> you can have hourly snapshots
636 (if your network is fast enough, that is...). Also, some of messages in
637 program will sound strange, but other than that it should work.
638 I<You have been warned>.
639
640 =head2 Do I really need to share every directory which I want to snapshot?
641
642 Actually, no. Due to usage of C<Filesys::SmbClient> module, you can also
643 specify sub-directory inside your share that you want to backup. This feature
644 is most useful if you want to use administrative shares (but, have in mind
645 that you have to enter your Win administrator password in unencrypted file on
646 disk to do that) like this:
647
648 smbmount //server/c$/WinNT/fonts /mnt -o username=administrator%win
649
650 After that you will get directories with snapshots like:
651
652 server/c_WinNT_fonts/yyyymmdd/....
653
654 =head2 Won't I run out of disk space?
655
656 Of course you will... Snapshots and logfiles will eventually fill-up your disk.
657 However, you can do two things to stop that:
658
659 =head3 Clean snapshort older than x days
660
661 You can add following command to your C<root> crontab:
662
663 find /backup/isis_backup -type d -mindepth 3 -maxdepth 3 -mtime +11 -exec rm -Rf {} \;
664
665 I assume that C</backup/isis_backup> is directory in which are your snapshots
666 and that you don't want to keep snapshots older than 11 days (that's
667 C<-mtime +11> part of command).
668
669 =head3 Rotate your logs
670
671 I will leave that to you. I relay on GNU/Debian's C<logrotate> to do it for me.
672
673 =head2 What are I<YYYYMMDD.partial> directories?
674
675 If there isn't I<latest> symlink in snapshot directory, it's preatty safe to
676 assume that previous backup from that day failed. So, that directory will
677 be renamed to I<YYYYMMDD.partial> and snapshot will be performed again,
678 linking same files (other alternative would be to erase that dir and find
679 second-oldest directory, but this seemed like more correct approach).
680
681 =head1 AUTHOR
682
683 Dobrica Pavlinusic <dpavlin@rot13.org>
684
685 L<http://www.rot13.org/~dpavlin/>
686
687 =head1 LICENSE
688
689 This product is licensed under GNU Public License (GPL) v2 or later.
690
691 =cut

  ViewVC Help
Powered by ViewVC 1.1.26