/[psinib]/psinib.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /psinib.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.13 - (show annotations)
Sun Oct 12 17:44:21 2003 UTC (20 years, 5 months ago) by dpavlin
Branch: MAIN
Changes since 1.12: +12 -2 lines
File MIME type: text/plain
remove stale entries in .md5sum

1 #!/usr/bin/perl -w
2 #
3 # psinib - Perl Snapshot Is Not Incremental Backup
4 #
5 # written by Dobrica Pavlinusic <dpavlin@rot13.org> 2003-01-03
6 # released under GPL v2 or later.
7 #
8 # Backup SMB directories using file produced by LinNeighbourhood (or some
9 # other program [vi :-)] which produces file in format:
10 #
11 # smbmount service mountpoint options
12 #
13 #
14 # usage:
15 # $ psinib.pl mountscript
16
17 use strict 'vars';
18 use Data::Dumper;
19 use Net::Ping;
20 use POSIX qw(strftime);
21 use List::Compare;
22 use Filesys::SmbClient;
23 #use Taint;
24 use Fcntl qw(LOCK_EX LOCK_NB);
25 use Digest::MD5;
26 use File::Basename;
27
28 # configuration
29 my $LOG_TIME_FMT = '%Y-%m-%d %H:%M:%S'; # strftime format for logfile
30 my $DIR_TIME_FMT = '%Y%m%d'; # strftime format for backup dir
31
32 my $LOG = '/var/log/backup.log'; # add path here...
33 #$LOG = '/tmp/backup.log';
34
35 # store backups in which directory
36 my $BACKUP_DEST = '/backup/isis_backup';
37 #my $BACKUP_DEST = '/tmp/backup/';
38
39 # files to ignore in backup
40 my @ignore = ('.md5sum', '.backupignore', 'backupignore.txt');
41
42 # open log
43 open(L, ">> $LOG") || die "can't open log $LOG: $!";
44 select((select(L), $|=1)[0]); # flush output
45
46 # make a lock on logfile
47
48 my $c = 0;
49 {
50 flock L, LOCK_EX | LOCK_NB and last;
51 sleep 1;
52 redo if ++$c < 10;
53 # no response for 10 sec, bail out
54 xlog("ABORT","can't take lock on $LOG -- another $0 running?");
55 exit 1;
56 }
57
58 # taint path: nmblookup should be there!
59 $ENV{'PATH'} = "/usr/bin:/bin";
60
61 my $mounts = shift @ARGV ||
62 'mountscript';
63 # die "usage: $0 mountscript";
64
65
66 my @in_backup; # shares which are backeduped this run
67
68 my $p = new Net::Ping->new("tcp", 2);
69 # ping will try tcp connect to netbios-ssn (139)
70 $p->{port_num} = getservbyname("netbios-ssn", "tcp");
71
72 my $backup_ok = 0;
73
74 my $smb;
75 my %smb_atime;
76 my %smb_mtime;
77 my %file_md5;
78
79 open(M, $mounts) || die "can't open $mounts: $!";
80 while(<M>) {
81 chomp;
82 next if !/^\s*smbmount\s/;
83 my (undef,$share,undef,$opt) = split(/\s+/,$_,4);
84
85 my ($user,$passwd,$workgroup,$ip);
86
87 foreach (split(/,/,$opt)) {
88 my ($n,$v) = split(/=/,$_,2);
89 if ($n =~ m/username/i) {
90 if ($v =~ m#^(.+)/(.+)%(.+)$#) {
91 ($user,$passwd,$workgroup) = ($1,$2,$3);
92 } elsif ($v =~ m#^(.+)/(.+)$#) {
93 ($user,$workgroup) = ($1,$2);
94 } elsif ($v =~ m#^(.+)%(.+)$#) {
95 ($user,$passwd) = ($1,$2);
96 } else {
97 $user = $v;
98 }
99 } elsif ($n =~ m#workgroup#i) {
100 $workgroup = $v;
101 } elsif ($n =~ m#ip#i) {
102 $ip = $v;
103 }
104 }
105
106 push @in_backup,$share;
107
108
109 my ($host,$dir,$date_dir) = share2host_dir($share);
110 my $bl = "$BACKUP_DEST/$host/$dir/latest"; # latest backup
111 my $bc = "$BACKUP_DEST/$host/$dir/$date_dir"; # current one
112 my $real_bl;
113 if (-l $bl) {
114 $real_bl=readlink($bl) || die "can't read link $bl: $!";
115 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
116 if (-l $bc && $real_bl eq $bc) {
117 print "$share allready backuped...\n";
118 $backup_ok++;
119 next;
120 }
121
122 }
123
124
125 print "working on $share\n";
126
127 # try to nmblookup IP
128 $ip = get_ip($share) if (! $ip);
129
130 if ($ip) {
131 xlog($share,"IP is $ip");
132 if ($p->ping($ip)) {
133 if (snap_share($share,$user,$passwd,$workgroup)) {
134 $backup_ok++;
135 }
136 }
137 }
138 }
139 close(M);
140
141 xlog("","$backup_ok backups completed of total ".($#in_backup+1)." this time (".int($backup_ok*100/($#in_backup+1))." %)");
142
143 1;
144
145 #-------------------------------------------------------------------------
146
147
148 # get IP number from share
149 sub get_ip {
150 my $share = shift;
151
152 my $host = $1 if ($share =~ m#//([^/]+)/#);
153
154 my $ip = `nmblookup $host`;
155 if ($ip =~ m/(\d+\.\d+\.\d+\.\d+)\s$host/i) {
156 return $1;
157 }
158 }
159
160
161 # write entry to screen and log
162 sub xlog {
163 my $share = shift;
164 my $t = strftime $LOG_TIME_FMT, localtime;
165 my $m = shift || '[no log entry]';
166 print STDERR $m,"\n";
167 print L "$t $share\t$m\n";
168 }
169
170 # dump warn and dies into log
171 BEGIN { $SIG{'__WARN__'} = sub { xlog('WARN',$_[0]) ; warn $_[0] } }
172 BEGIN { $SIG{'__DIE__'} = sub { xlog('DIE',$_[0]) ; die $_[0] } }
173
174
175 # split share name to host, dir and currnet date dir
176 sub share2host_dir {
177 my $share = shift;
178 my ($host,$dir);
179 if ($share =~ m#//([^/]+)/(.+)$#) {
180 ($host,$dir) = ($1,$2);
181 $dir =~ s/\W/_/g;
182 $dir =~ s/^_+//;
183 $dir =~ s/_+$//;
184 } else {
185 print "Can't parse share $share into host and directory!\n";
186 return;
187 }
188 return ($host,$dir,strftime $DIR_TIME_FMT, localtime);
189 }
190
191
192 # make a snapshot of a share
193 sub snap_share {
194
195 my $share = shift;
196
197 my %param = ( debug => 0 );
198
199 $param{username} = shift || warn "can't find username for share $share";
200 $param{password} = shift || warn "can't find passwod for share $share";
201 $param{workgroup} = shift || warn "can't find workgroup for share $share";
202
203 my ($host,$dir,$date_dir) = share2host_dir($share);
204
205 # latest backup directory
206 my $bl = "$BACKUP_DEST/$host/$dir/latest";
207 # current backup directory
208 my $bc = "$BACKUP_DEST/$host/$dir/$date_dir";
209
210 my $real_bl;
211 if (-l $bl) {
212 $real_bl=readlink($bl) || die "can't read link $bl: $!";
213 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
214 } else {
215 print "no old backup, trying to find last backup, ";
216 if (opendir(BL_DIR, "$BACKUP_DEST/$host/$dir")) {
217 my @bl_dirs = sort grep { !/^\./ && -d "$BACKUP_DEST/$host/$dir/$_" } readdir(BL_DIR);
218 closedir(BL_DIR);
219 $real_bl=pop @bl_dirs;
220 print "using $real_bl as latest...\n";
221 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
222 if ($real_bl eq $bc) {
223 xlog($share,"latest from today (possible partial backup)");
224 rename $real_bl,$real_bl.".partial" || warn "can't reaname partial backup: $!";
225 $real_bl .= ".partial";
226 }
227 } else {
228 print "this is first run...\n";
229 }
230 }
231
232 if (-l $bc && $real_bl && $real_bl eq $bc) {
233 print "$share allready backuped...\n";
234 return 1;
235 }
236
237 die "You should really create BACKUP_DEST [$BACKUP_DEST] by hand! " if (!-e $BACKUP_DEST);
238
239 if (! -e "$BACKUP_DEST/$host") {
240 mkdir "$BACKUP_DEST/$host" || die "can't make dir for host $host, $BACKUP_DEST/$host: $!";
241 print "created host directory $BACKUP_DEST/$host...\n";
242 }
243
244 if (! -e "$BACKUP_DEST/$host/$dir") {
245 mkdir "$BACKUP_DEST/$host/$dir" || die "can't make dir for share $share, $BACKUP_DEST/$host/$dir $!";
246 print "created dir for share $share, $BACKUP_DEST/$host/$dir...\n";
247 }
248
249 mkdir $bc || die "can't make dir for current backup $bc: $!";
250
251 my @dirs = ( "/" );
252 my @smb_dirs = ( "/" );
253
254 my $transfer = 0; # bytes transfered over network
255
256 # this will store all available files and sizes
257 my @files;
258 my %file_size;
259 my %file_atime;
260 my %file_mtime;
261 #my %file_md5;
262 %file_md5 = ();
263
264 my @smb_files;
265 my %smb_size;
266 #my %smb_atime;
267 #my %smb_mtime;
268
269 sub norm_dir {
270 my $foo = shift;
271 my $prefix = shift;
272 $foo =~ s#//+#/#g;
273 $foo =~ s#/+$##g;
274 $foo =~ s#^/+##g;
275 return $prefix.$foo if ($prefix);
276 return $foo;
277 }
278
279 # read local filesystem
280 my $di = 0;
281 while ($di <= $#dirs && $real_bl) {
282 my $d=$dirs[$di++];
283 opendir(DIR,"$real_bl/$d") || warn "opendir($real_bl/$d): $!\n";
284
285 # read .backupignore if exists
286 if (-f "$real_bl/$d/.backupignore") {
287 open(I,"$real_bl/$d/.backupignore");
288 while(<I>) {
289 chomp;
290 push @ignore,norm_dir("$d/$_");
291 }
292 close(I);
293 #print STDERR "ignore: ",join("|",@ignore),"\n";
294 link "$real_bl/$d/.backupignore","$bc/$d/.backupignore" ||
295 warn "can't copy $real_bl/$d/.backupignore to current backup dir: $!\n";
296 }
297
298 # read .md5sum if exists
299 if (-f "$real_bl/$d/.md5sum") {
300 open(I,"$real_bl/$d/.md5sum");
301 while(<I>) {
302 chomp;
303 my ($md5,$f) = split(/\s+/,$_,2);
304 $file_md5{$f}=$md5;
305 }
306 close(I);
307 }
308
309 my @clutter = readdir(DIR);
310 foreach my $f (@clutter) {
311 next if ($f eq '.');
312 next if ($f eq '..');
313 my $pr = norm_dir("$d/$f"); # path relative
314 my $pf = norm_dir("$d/$f","$real_bl/"); # path full
315 if (grep(/^\Q$pr\E$/,@ignore) == 0) {
316 if (-f $pf) {
317 push @files,$pr;
318 $file_size{$pr}=(stat($pf))[7];
319 $file_atime{$pr}=(stat($pf))[8];
320 $file_mtime{$pr}=(stat($pf))[9];
321 } elsif (-d $pf) {
322 push @dirs,$pr;
323 } else {
324 print STDERR "not file or directory: $pf\n";
325 }
326 } else {
327 print STDERR "ignored: $pr\n";
328 }
329 }
330 }
331
332 # local dir always include /
333 xlog($share,($#files+1)." files and ".($#dirs)." dirs on local disk before backup");
334
335 # read smb filesystem
336
337 xlog($share,"smb to $share as $param{username}/$param{workgroup}");
338
339 # FIX: how to aviod creation of ~/.smb/smb.conf ?
340 $smb = new Filesys::SmbClient(%param) || die "SmbClient :$!\n";
341
342 $di = 0;
343 while ($di <= $#smb_dirs) {
344 my $d=$smb_dirs[$di];
345 my $pf = norm_dir($d,"smb:$share/"); # path full
346 my $D = $smb->opendir($pf);
347 if (! $D) {
348 xlog($share,"FATAL: $share [$pf]: $!");
349 # remove failing dir
350 delete $smb_dirs[$di];
351 return 0; # failed
352 }
353 $di++;
354
355 my @clutter = $smb->readdir_struct($D);
356 foreach my $item (@clutter) {
357 my $f = $item->[1];
358 next if ($f eq '.');
359 next if ($f eq '..');
360 my $pr = norm_dir("$d/$f"); # path relative
361 my $pf = norm_dir("$d/$f","smb:$share/"); # path full
362 if (grep(/^\Q$pr\E$/,@ignore) == 0) {
363 if ($item->[0] == main::SMBC_FILE) {
364 push @smb_files,$pr;
365 $smb_size{$pr}=($smb->stat($pf))[7];
366 $smb_atime{$pr}=($smb->stat($pf))[10];
367 $smb_mtime{$pr}=($smb->stat($pf))[11];
368 } elsif ($item->[0] == main::SMBC_DIR) {
369 push @smb_dirs,$pr;
370 } else {
371 print STDERR "not file or directory [",$item->[0],"]: $pf\n";
372 }
373 } else {
374 print STDERR "smb ignored: $pr\n";
375 }
376 }
377 }
378
379 xlog($share,($#smb_files+1)." files and ".($#smb_dirs)." dirs on remote share");
380
381 # sync dirs
382 my $lc = List::Compare->new(\@dirs, \@smb_dirs);
383
384 my @dirs2erase = $lc->get_Lonly;
385 my @dirs2create = $lc->get_Ronly;
386 xlog($share,($#dirs2erase+1)." dirs to erase and ".($#dirs2create+1)." dirs to create");
387
388 # create new dirs
389 foreach (sort @smb_dirs) {
390 mkdir "$bc/$_" || warn "mkdir $_: $!\n";
391 }
392
393 # sync files
394 $lc = List::Compare->new(\@files, \@smb_files);
395
396 my @files2erase = $lc->get_Lonly;
397 my @files2create = $lc->get_Ronly;
398 xlog($share,($#files2erase+1)." files to erase and ".($#files2create+1)." files to create");
399
400 sub smb_copy {
401 my $smb = shift;
402
403 my $from = shift;
404 my $to = shift;
405
406
407 my $l = 0;
408
409 foreach my $f (@_) {
410 #print "smb_copy $from/$f -> $to/$f\n";
411 my $md5 = Digest::MD5->new;
412
413 my $fd = $smb->open("$from/$f");
414 if (! $fd) {
415 xlog("WARNING","can't open smb file $from/$f: $!");
416 next;
417 }
418
419 if (! open(F,"> $to/$f")) {
420 xlog("WARNING","can't open new file $to/$f: $!");
421 next;
422 }
423
424 while (defined(my $b=$smb->read($fd,4096))) {
425 print F $b;
426 $l += length($b);
427 $md5->add($b);
428 }
429
430 $smb->close($fd);
431 close(F);
432
433 $file_md5{$f} = $md5->hexdigest;
434
435 # FIX: this fails with -T
436 my ($a,$m) = ($smb->stat("$from/$f"))[10,11];
437 utime $a, $m, "$to/$f" ||
438 warn "can't update utime on $to/$f: $!\n";
439
440 }
441 return $l;
442 }
443
444 # copy new files
445 foreach (@files2create) {
446 $transfer += smb_copy($smb,"smb:$share",$bc,$_);
447 }
448
449 my $size_sync = 0;
450 my $atime_sync = 0;
451 my $mtime_sync = 0;
452 my @sync_files;
453 my @ln_files;
454
455 foreach ($lc->get_intersection) {
456
457 my $f;
458
459 if ($file_size{$_} != $smb_size{$_}) {
460 $f=$_;
461 $size_sync++;
462 }
463 if ($file_atime{$_} != $smb_atime{$_}) {
464 $f=$_;
465 $atime_sync++;
466 }
467 if ($file_mtime{$_} != $smb_mtime{$_}) {
468 $f=$_;
469 $mtime_sync++;
470 }
471
472 if ($f) {
473 push @sync_files, $f;
474 } else {
475 push @ln_files, $_;
476 }
477 }
478
479 xlog($share,($#sync_files+1)." files will be updated (diff: $size_sync size, $atime_sync atime, $mtime_sync mtime), ".($#ln_files+1)." will be linked.");
480
481 foreach (@sync_files) {
482 $transfer += smb_copy($smb,"smb:$share",$bc,$_);
483 }
484
485 xlog($share,"$transfer bytes transfered...");
486
487 foreach (@ln_files) {
488 link "$real_bl/$_","$bc/$_" || warn "link $real_bl/$_ -> $bc/$_: $!\n";
489 }
490
491 # remove files
492 foreach (sort @files2erase) {
493 unlink "$bc/$_" || warn "unlink $_: $!\n";
494 delete $file_md5{$_};
495 }
496
497 # remove not needed dirs (after files)
498 foreach (sort @dirs2erase) {
499 rmdir "$bc/$_" || warn "rmdir $_: $!\n";
500 }
501
502 # remove old .md5sum
503 foreach (sort @dirs) {
504 unlink "$bc/$_/.md5sum" if (-e "$bc/$_/.md5sum");
505 }
506
507 # erase stale entries in .md5sum
508 my @md5_files = keys %file_md5;
509 $lc = List::Compare->new(\@md5_files, \@smb_files);
510 foreach my $file ($lc->get_Lonly) {
511 xlog("NOTICE","removing stale '$file' from .md5sum");
512 delete $file_md5{$file};
513 }
514
515 # create .md5sum
516 my $last_dir = '';
517 my $md5;
518 foreach my $f (sort { $file_md5{$a} cmp $file_md5{$b} } keys %file_md5) {
519 my $dir = dirname($f);
520 my $file = basename($f);
521 #print "$f -- $dir / $file<--\n";
522 if ($dir ne $last_dir) {
523 close($md5) if ($md5);
524 open($md5, ">> $bc/$dir/.md5sum") || warn "can't create $bc/$dir/.md5sum: $!";
525 $last_dir = $dir;
526 #print STDERR "writing $last_dir/.md5sum\n";
527 }
528 print $md5 $file_md5{$f}," $file\n";
529 }
530 close($md5) if ($md5);
531
532 # create leatest link
533 #print "ln -s $bc $real_bl\n";
534 if (-l $bl) {
535 unlink $bl || warn "can't remove old latest symlink $bl: $!\n";
536 }
537 symlink $bc,$bl || warn "can't create latest symlink $bl -> $bc: $!\n";
538
539 # FIX: sanity check -- remove for speedup
540 xlog($share,"failed to create latest symlink $bl -> $bc...") if (readlink($bl) ne $bc || ! -l $bl);
541
542 xlog($share,"backup completed...");
543
544 return 1;
545 }
546 __END__
547 #-------------------------------------------------------------------------
548
549
550 =head1 NAME
551
552 psinib - Perl Snapshot Is Not Incremental Backup
553
554 =head1 SYNOPSIS
555
556 ./psinib.pl
557
558 =head1 DESCRIPTION
559
560 This script in current version support just backup of Samba (or Micro$oft
561 Winblowz) shares to central disk space. Central disk space is organized in
562 multiple directories named after:
563
564 =over 4
565
566 =item *
567 server which is sharing files to be backed up
568
569 =item *
570 name of share on server
571
572 =item *
573 dated directory named like standard ISO date format (YYYYMMDD).
574
575 =back
576
577 In each dated directory you will find I<snapshot> of all files on
578 exported share on that particular date.
579
580 You can also use symlink I<latest> which will lead you to
581 last completed backup. After that you can use some other backup
582 software to transfer I<snapshot> to tape, CD-ROM or some other media.
583
584 =head2 Design considerations
585
586 Since taking of share snapshot every day requires a lot of disk space and
587 network bandwidth, B<psinib> uses several techniques to keep disk usage and
588 network traffic at acceptable level:
589
590 =over 3
591
592 =item - usage of hard-links to provide same files in each snapshot (as opposed
593 to have multiple copies of same file)
594
595 =item - usage of file size, atime and mtime to find changes of files without
596 transferring whole file over network (just share browsing is transfered
597 over network)
598
599 =item - usage of C<.md5sum> files (compatible with command-line utility
600 C<md5sum>) to keep file between snapshots hard-linked
601
602 =back
603
604 =head1 CONFIGURATION
605
606 This section is not yet written.
607
608 =head1 HACKS, TRICKS, BUGS and LIMITATIONS
609
610 This chapter will have all content that doesn't fit anywhere else.
611
612 =head2 Can snapshots be more frequent than daily?
613
614 There is not real reason why you can't take snapshot more often than
615 once a day. Actually, if you are using B<psinib> to backup Windows
616 workstations you already know that they tend to come-and-go during the day
617 (reboots probably ;-), so running B<psinib> several times a day increases
618 your chance of having up-to-date backup (B<psinib> will not make multiple
619 snapshots for same day, nor will it update snapshot for current day if
620 it already exists).
621
622 However, changing B<psinib> to produce snapshots which are, for example, hourly
623 is a simple change of C<$DIR_TIME_FMT> which is currently set to
624 C<'%Y%m%d'> (see I<strftime> documentation for explanation of that
625 format). If you change that to C<'%Y%m%d-%H> you can have hourly snapshots
626 (if your network is fast enough, that is...). Also, some of messages in
627 program will sound strange, but other than that it should work.
628 I<You have been warned>.
629
630 =head2 Do I really need to share every directory which I want to snapshot?
631
632 Actually, no. Due to usage of C<Filesys::SmbClient> module, you can also
633 specify sub-directory inside your share that you want to backup. This feature
634 is most useful if you want to use administrative shares (but, have in mind
635 that you have to enter your Win administrator password in unencrypted file on
636 disk to do that) like this:
637
638 smbmount //server/c$/WinNT/fonts /mnt -o username=administrator%win
639
640 After that you will get directories with snapshots like:
641
642 server/c_WinNT_fonts/yyyymmdd/....
643
644 =head2 Won't I run out of disk space?
645
646 Of course you will... Snapshots and logfiles will eventually fill-up your disk.
647 However, you can do two things to stop that:
648
649 =head3 Clean snapshort older than x days
650
651 You can add following command to your C<root> crontab:
652
653 find /backup/isis_backup -type d -mindepth 3 -maxdepth 3 -mtime +11 -exec rm -Rf {} \;
654
655 I assume that C</backup/isis_backup> is directory in which are your snapshots
656 and that you don't want to keep snapshots older than 11 days (that's
657 C<-mtime +11> part of command).
658
659 =head3 Rotate your logs
660
661 I will leave that to you. I relay on GNU/Debian's C<logrotate> to do it for me.
662
663 =head2 What are I<YYYYMMDD.partial> directories?
664
665 If there isn't I<latest> symlink in snapshot directory, it's preatty safe to
666 assume that previous backup from that day failed. So, that directory will
667 be renamed to I<YYYYMMDD.partial> and snapshot will be performed again,
668 linking same files (other alternative would be to erase that dir and find
669 second-oldest directory, but this seemed like more correct approach).
670
671 =head1 AUTHOR
672
673 Dobrica Pavlinusic <dpavlin@rot13.org>
674
675 L<http://www.rot13.org/~dpavlin/>
676
677 =head1 LICENSE
678
679 This product is licensed under GNU Public License (GPL) v2 or later.
680
681 =cut

  ViewVC Help
Powered by ViewVC 1.1.26