/[psinib]/psinib.pl
This is repository of my old source code which isn't updated any more. Go to git.rot13.org for current projects!
ViewVC logotype

Contents of /psinib.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 1.7 - (show annotations)
Fri Jan 31 22:31:41 2003 UTC (21 years, 2 months ago) by dpavlin
Branch: MAIN
Changes since 1.6: +50 -15 lines
File MIME type: text/plain
Lot of changes:
- ping using tcp connect to port 139
- save warn and die in log
- use newest directory as latest (or partial) if latest symlink doesn't exist
- actually CREATE latest symlink

1 #!/usr/bin/perl -w
2 #
3 # psinib - Perl Snapshot Is Not Incremental Backup
4 #
5 # written by Dobrica Pavlinusic <dpavlin@rot13.org> 2003-01-03
6 # released under GPL v2 or later.
7 #
8 # Backup SMB directories using file produced by LinNeighbourhood (or some
9 # other program [vi :-)] which produces file in format:
10 #
11 # smbmount service mountpoint options
12 #
13 #
14 # usage:
15 # $ psinib.pl mountscript
16
17 use strict 'vars';
18 use Data::Dumper;
19 use Net::Ping;
20 use POSIX qw(strftime);
21 use List::Compare;
22 use Filesys::SmbClient;
23 #use Taint;
24 use Fcntl qw(LOCK_EX LOCK_NB);
25 use Digest::MD5;
26 use File::Basename;
27
28 # configuration
29 my $LOG_TIME_FMT = '%Y-%m-%d %H:%M:%S'; # strftime format for logfile
30 my $DIR_TIME_FMT = '%Y%m%d'; # strftime format for backup dir
31
32 my $LOG = '/var/log/backup.log'; # add path here...
33 #$LOG = '/tmp/backup.log';
34
35 # store backups in which directory
36 my $BACKUP_DEST = '/backup/isis_backup';
37
38 # files to ignore in backup
39 my @ignore = ('.md5sum', '.backupignore', 'backupignore.txt');
40
41 # open log
42 open(L, ">> $LOG") || die "can't open log $LOG: $!";
43 select((select(L), $|=1)[0]); # flush output
44
45 # make a lock on logfile
46
47 my $c = 0;
48 {
49 flock L, LOCK_EX | LOCK_NB and last;
50 sleep 1;
51 redo if ++$c < 10;
52 # no response for 10 sec, bail out
53 print STDERR "can't take lock on $LOG -- another $0 running?\n";
54 exit 1;
55 }
56
57 # taint path: nmblookup should be there!
58 $ENV{'PATH'} = "/usr/bin:/bin";
59
60 my $mounts = shift @ARGV ||
61 'mountscript';
62 # die "usage: $0 mountscript";
63
64
65 my @in_backup; # shares which are backeduped this run
66
67 my $p = new Net::Ping->new("tcp", 2);
68 # ping will try tcp connect to netbios-ssn (139)
69 $p->{port_num} = getservbyname("netbios-ssn", "tcp");
70
71 my $backup_ok = 0;
72
73 my $smb;
74 my %smb_atime;
75 my %smb_mtime;
76 my %file_md5;
77
78 open(M, $mounts) || die "can't open $mounts: $!";
79 while(<M>) {
80 chomp;
81 next if !/^\s*smbmount\s/;
82 my (undef,$share,undef,$opt) = split(/\s+/,$_,4);
83
84 my ($user,$passwd,$workgroup);
85
86 foreach (split(/,/,$opt)) {
87 my ($n,$v) = split(/=/,$_,2);
88 if ($n =~ m/username/i) {
89 if ($v =~ m#^(.+)/(.+)%(.+)$#) {
90 ($user,$passwd,$workgroup) = ($1,$2,$3);
91 } elsif ($v =~ m#^(.+)/(.+)$#) {
92 ($user,$workgroup) = ($1,$2);
93 } elsif ($v =~ m#^(.+)%(.+)$#) {
94 ($user,$passwd) = ($1,$2);
95 } else {
96 $user = $v;
97 }
98 } elsif ($n =~ m#workgroup#i) {
99 $workgroup = $v;
100 }
101 }
102
103 push @in_backup,$share;
104
105
106 my ($host,$dir,$date_dir) = share2host_dir($share);
107 my $bl = "$BACKUP_DEST/$host/$dir/latest"; # latest backup
108 my $bc = "$BACKUP_DEST/$host/$dir/$date_dir"; # current one
109 my $real_bl;
110 if (-e $bl) {
111 $real_bl=readlink($bl) || die "can't read link $bl: $!";
112 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
113 if (-e $bc && $real_bl eq $bc) {
114 print "$share allready backuped...\n";
115 $backup_ok++;
116 next;
117 }
118
119 }
120
121
122 print "working on $share\n";
123
124
125 my $ip = get_ip($share);
126
127 if ($ip) {
128 xlog($share,"IP is $ip");
129 if ($p->ping($ip)) {
130 snap_share($share,$user,$passwd,$workgroup);
131 $backup_ok++;
132 }
133 }
134 }
135 close(M);
136
137 xlog("","$backup_ok backups completed of total ".($#in_backup+1)." this time (".int($backup_ok*100/($#in_backup+1))." %)");
138
139 1;
140
141 #-------------------------------------------------------------------------
142
143
144 # get IP number from share
145 sub get_ip {
146 my $share = shift;
147
148 my $host = $1 if ($share =~ m#//([^/]+)/#);
149
150 my $ip = `nmblookup $host`;
151 if ($ip =~ m/(\d+\.\d+\.\d+\.\d+)\s$host/i) {
152 return $1;
153 }
154 }
155
156
157 # write entry to screen and log
158 sub xlog {
159 my $share = shift;
160 my $t = strftime $LOG_TIME_FMT, localtime;
161 my $m = shift || '[no log entry]';
162 print STDERR $m,"\n";
163 print L "$t $share\t$m\n";
164 }
165
166 # dump warn and dies into log
167 BEGIN { $SIG{'__WARN__'} = sub { xlog('WARN',$_[0]) ; warn $_[0] } }
168 BEGIN { $SIG{'__DIE__'} = sub { xlog('DIE',$_[0]) ; die $_[0] } }
169
170
171 # split share name to host, dir and currnet date dir
172 sub share2host_dir {
173 my $share = shift;
174 my ($host,$dir);
175 if ($share =~ m#//([^/]+)/(.+)$#) {
176 ($host,$dir) = ($1,$2);
177 $dir =~ s/\W/_/g;
178 $dir =~ s/^_+//;
179 $dir =~ s/_+$//;
180 } else {
181 print "Can't parse share $share into host and directory!\n";
182 return;
183 }
184 return ($host,$dir,strftime $DIR_TIME_FMT, localtime);
185 }
186
187
188 # make a snapshot of a share
189 sub snap_share {
190
191 my $share = shift;
192
193 my %param = ( debug => 0 );
194
195 $param{username} = shift;
196 $param{password} = shift;
197 $param{workgroup} = shift;
198
199 my ($host,$dir,$date_dir) = share2host_dir($share);
200
201 # latest backup directory
202 my $bl = "$BACKUP_DEST/$host/$dir/latest";
203 # current backup directory
204 my $bc = "$BACKUP_DEST/$host/$dir/$date_dir";
205
206 my $real_bl;
207 if (-e $bl) {
208 $real_bl=readlink($bl) || die "can't read link $bl: $!";
209 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
210 } else {
211 print "no old backup, trying to find last backup, ";
212 if (opendir(BL_DIR, "$BACKUP_DEST/$host/$dir")) {
213 my @bl_dirs = sort grep { !/^\./ && -d "$BACKUP_DEST/$host/$dir/$_" } readdir(BL_DIR);
214 closedir(BL_DIR);
215 $real_bl=pop @bl_dirs;
216 print "using $real_bl as latest...\n";
217 $real_bl="$BACKUP_DEST/$host/$dir/$real_bl" if (substr($real_bl,0,1) ne "/");
218 if ($real_bl eq $bc) {
219 xlog($share,"latest from today (possible partial backup)");
220 rename $real_bl,$real_bl.".partial" || warn "can't reaname partial backup: $!";
221 $real_bl .= ".partial";
222 }
223 } else {
224 print "this is first run...\n";
225 }
226 }
227
228 if (-e $bc && $real_bl && $real_bl eq $bc) {
229 print "$share allready backuped...\n";
230 return;
231 }
232
233 die "You should really create BACKUP_DEST [$BACKUP_DEST] by hand! " if (!-e $BACKUP_DEST);
234
235 if (! -e "$BACKUP_DEST/$host") {
236 mkdir "$BACKUP_DEST/$host" || die "can't make dir for host $host, $BACKUP_DEST/$host: $!";
237 print "created host directory $BACKUP_DEST/$host...\n";
238 }
239
240 if (! -e "$BACKUP_DEST/$host/$dir") {
241 mkdir "$BACKUP_DEST/$host/$dir" || die "can't make dir for share $share, $BACKUP_DEST/$host/$dir $!";
242 print "created dir for share $share, $BACKUP_DEST/$host/$dir...\n";
243 }
244
245 mkdir $bc || die "can't make dir for current backup $bc: $!";
246
247 my @dirs = ( "/" );
248 my @smb_dirs = ( "/" );
249
250 my $transfer = 0; # bytes transfered over network
251
252 # this will store all available files and sizes
253 my @files;
254 my %file_size;
255 my %file_atime;
256 my %file_mtime;
257 #my %file_md5;
258
259 my @smb_files;
260 my %smb_size;
261 #my %smb_atime;
262 #my %smb_mtime;
263
264 sub norm_dir {
265 my $foo = shift;
266 my $prefix = shift;
267 $foo =~ s#//+#/#g;
268 $foo =~ s#/+$##g;
269 $foo =~ s#^/+##g;
270 return $prefix.$foo if ($prefix);
271 return $foo;
272 }
273
274 # read local filesystem
275 my $di = 0;
276 while ($di <= $#dirs && $real_bl) {
277 my $d=$dirs[$di++];
278 opendir(DIR,"$real_bl/$d") || warn "opendir($real_bl/$d): $!\n";
279
280 # read .backupignore if exists
281 if (-f "$real_bl/$d/.backupignore") {
282 open(I,"$real_bl/$d/.backupignore");
283 while(<I>) {
284 chomp;
285 push @ignore,norm_dir("$d/$_");
286 }
287 close(I);
288 print STDERR "ignore: ",join("|",@ignore),"\n";
289 link "$real_bl/$d/.backupignore","$bc/$d/.backupignore" ||
290 warn "can't copy $real_bl/$d/.backupignore to current backup dir: $!\n";
291 }
292
293 # read .md5sum if exists
294 if (-f "$real_bl/$d/.md5sum") {
295 open(I,"$real_bl/$d/.md5sum");
296 while(<I>) {
297 chomp;
298 my ($md5,$f) = split(/\s+/,$_,2);
299 $file_md5{$f}=$md5;
300 }
301 close(I);
302 }
303
304 my @clutter = readdir(DIR);
305 foreach my $f (@clutter) {
306 next if ($f eq '.');
307 next if ($f eq '..');
308 my $pr = norm_dir("$d/$f"); # path relative
309 my $pf = norm_dir("$d/$f","$real_bl/"); # path full
310 if (grep(/^\Q$pr\E$/,@ignore) == 0) {
311 if (-f $pf) {
312 push @files,$pr;
313 $file_size{$pr}=(stat($pf))[7];
314 $file_atime{$pr}=(stat($pf))[8];
315 $file_mtime{$pr}=(stat($pf))[9];
316 } elsif (-d $pf) {
317 push @dirs,$pr;
318 } else {
319 print STDERR "unknown type: $pf\n";
320 }
321 } else {
322 print STDERR "ignored: $pr\n";
323 }
324 }
325 }
326
327 xlog($share,($#files+1)." files and ".($#dirs+1)." dirs on local disk before backup");
328
329 # read smb filesystem
330
331 xlog($share,"smb to $share as $param{username}/$param{workgroup}");
332
333 # FIX: how to aviod creation of ~/.smb/smb.conf ?
334 $smb = new Filesys::SmbClient(%param) || die "SmbClient :$!\n";
335
336 $di = 0;
337 while ($di <= $#smb_dirs) {
338 my $d=$smb_dirs[$di++];
339 my $pf = norm_dir($d,"smb:$share/"); # path full
340 my $D = $smb->opendir($pf) || warn "smb->opendir($pf): $!\n";
341
342 my @clutter = $smb->readdir_struct($D);
343 foreach my $item (@clutter) {
344 my $f = $item->[1];
345 next if ($f eq '.');
346 next if ($f eq '..');
347 my $pr = norm_dir("$d/$f"); # path relative
348 my $pf = norm_dir("$d/$f","smb:$share/"); # path full
349 if (grep(/^\Q$pr\E$/,@ignore) == 0) {
350 if ($item->[0] == main::SMBC_FILE) {
351 push @smb_files,$pr;
352 $smb_size{$pr}=($smb->stat($pf))[7];
353 $smb_atime{$pr}=($smb->stat($pf))[10];
354 $smb_mtime{$pr}=($smb->stat($pf))[11];
355 } elsif ($item->[0] == main::SMBC_DIR) {
356 push @smb_dirs,$pr;
357 } else {
358 print STDERR "unknown type: $pf\n";
359 }
360 } else {
361 print STDERR "smb ignored: $pr\n";
362 }
363 }
364 }
365
366 xlog($share,($#smb_files+1)." files and ".($#smb_dirs+1)." dirs on remote share");
367
368 # sync dirs
369 my $lc = List::Compare->new(\@dirs, \@smb_dirs);
370
371 my @dirs2erase = $lc->get_Lonly;
372 my @dirs2create = $lc->get_Ronly;
373 xlog($share,($#dirs2erase+1)." dirs to erase and ".($#dirs2create+1)." dirs to create");
374
375 # create new dirs
376 foreach (sort @smb_dirs) {
377 mkdir "$bc/$_" || warn "mkdir $_: $!\n";
378 }
379
380 # sync files
381 $lc = List::Compare->new(\@files, \@smb_files);
382
383 my @files2erase = $lc->get_Lonly;
384 my @files2create = $lc->get_Ronly;
385 xlog($share,($#files2erase+1)." files to erase and ".($#files2create+1)." files to create");
386
387 sub smb_copy {
388 my $smb = shift;
389
390 my $from = shift;
391 my $to = shift;
392
393
394 my $l = 0;
395
396 foreach my $f (@_) {
397 #print "smb_copy $from/$f -> $to/$f\n";
398 if (! open(F,"> $to/$f")) {
399 print STDERR "can't open new file $to/$f: $!\n";
400 next;
401 }
402
403 my $md5 = Digest::MD5->new;
404
405 my $fd = $smb->open("$from/$f");
406 if (! $fd) {
407 print STDERR "can't open smb file $from/$f: $!\n";
408 next;
409 }
410
411 while (defined(my $b=$smb->read($fd,4096))) {
412 print F $b;
413 $l += length($b);
414 $md5->add($b);
415 }
416
417 $smb->close($fd);
418 close(F);
419
420 $file_md5{$f} = $md5->hexdigest;
421
422 # FIX: this fails with -T
423 my ($a,$m) = ($smb->stat("$from/$f"))[10,11];
424 utime $a, $m, "$to/$f" ||
425 warn "can't update utime on $to/$f: $!\n";
426
427 }
428 return $l;
429 }
430
431 # copy new files
432 foreach (@files2create) {
433 $transfer += smb_copy($smb,"smb:$share",$bc,$_);
434 }
435
436 my $size_sync = 0;
437 my $atime_sync = 0;
438 my $mtime_sync = 0;
439 my @sync_files;
440 my @ln_files;
441
442 foreach ($lc->get_intersection) {
443
444 my $f;
445
446 if ($file_size{$_} != $smb_size{$_}) {
447 $f=$_;
448 $size_sync++;
449 }
450 if ($file_atime{$_} != $smb_atime{$_}) {
451 $f=$_;
452 $atime_sync++;
453 }
454 if ($file_mtime{$_} != $smb_mtime{$_}) {
455 $f=$_;
456 $mtime_sync++;
457 }
458
459 if ($f) {
460 push @sync_files, $f;
461 } else {
462 push @ln_files, $_;
463 }
464 }
465
466 xlog($share,($#sync_files+1)." files will be updated (diff: $size_sync size, $atime_sync atime, $mtime_sync mtime), ".($#ln_files+1)." will be linked.");
467
468 foreach (@sync_files) {
469 $transfer += smb_copy($smb,"smb:$share",$bc,$_);
470 }
471
472 xlog($share,"$transfer bytes transfered...");
473
474 foreach (@ln_files) {
475 link "$real_bl/$_","$bc/$_" || warn "link $real_bl/$_ -> $bc/$_: $!\n";
476 }
477
478 # remove files
479 foreach (sort @files2erase) {
480 unlink "$bc/$_" || warn "unlink $_: $!\n";
481 }
482
483 # remove not needed dirs (after files)
484 foreach (sort @dirs2erase) {
485 rmdir "$bc/$_" || warn "rmdir $_: $!\n";
486 }
487
488 # remove old .md5sum
489 foreach (sort @dirs) {
490 unlink "$bc/$_/.md5sum" if (-e "$bc/$_/.md5sum");
491 }
492
493 # create .md5sum
494 my $last_dir = '';
495 my $md5;
496 foreach my $f (sort { $file_md5{$a} cmp $file_md5{$b} } keys %file_md5) {
497 my $dir = dirname($f);
498 my $file = basename($f);
499 #print "$f -- $dir / $file<--\n";
500 if ($dir ne $last_dir) {
501 close($md5) if ($md5);
502 open($md5, ">> $bc/$dir/.md5sum") || warn "can't create $bc/$dir/.md5sum: $!";
503 $last_dir = $dir;
504 #print STDERR "writing $last_dir/.md5sum\n";
505 }
506 print $md5 $file_md5{$f}," $file\n";
507 }
508 close($md5);
509
510 # create leatest link
511 #print "ln -s $bc $real_bl\n";
512 if (-e $bl) {
513 unlink $bl || warn "can't remove old latest symlink $bl: $!\n";
514 }
515 symlink $bc,$bl || warn "can't create latest symlink $bl -> $bc: $!\n";
516
517 # FIX: sanity check -- remove for speedup
518 xlog($share,"failed to create latest symlink...") if (readlink($bl) ne $bc || ! -e $bl);
519
520 xlog($share,"backup completed...");
521 }
522
523 __END__
524 #-------------------------------------------------------------------------
525
526
527 =head1 NAME
528
529 psinib - Perl Snapshot Is Not Incremental Backup
530
531 =head1 SYNOPSIS
532
533 ./psinib.pl
534
535 =head1 DESCRIPTION
536
537 This script in current version support just backup of Samba (or Micro$oft
538 Winblowz) shares to central disk space. Central disk space is organized in
539 multiple directories named after:
540
541 =over 4
542
543 =item *
544 server which is sharing files to be backed up
545
546 =item *
547 name of share on server
548
549 =item *
550 dated directory named like standard ISO date format (YYYYMMDD).
551
552 =back
553
554 In each dated directory you will find I<snapshot> of all files on
555 exported share on that particular date.
556
557 You can also use symlink I<latest> which will lead you to
558 last completed backup. After that you can use some other backup
559 software to transfer I<snapshot> to tape, CD-ROM or some other media.
560
561 =head2 Design considerations
562
563 Since taking of share snapshot every day requires a lot of disk space and
564 network bandwidth, B<psinib> uses several techniques to keep disk usage and
565 network traffic at acceptable level:
566
567 =over 3
568
569 =item - usage of hard-links to provide same files in each snapshot (as opposed
570 to have multiple copies of same file)
571
572 =item - usage of file size, atime and mtime to find changes of files without
573 transferring whole file over network (just share browsing is transfered
574 over network)
575
576 =item - usage of C<.md5sum> files (compatible with command-line utility
577 C<md5sum>) to keep file between snapshots hard-linked
578
579 =back
580
581 =head1 CONFIGURATION
582
583 This section is not yet written.
584
585 =head1 HACKS, TRICKS, BUGS and LIMITATIONS
586
587 This chapter will have all content that doesn't fit anywhere else.
588
589 =head2 Can snapshots be more frequent than daily?
590
591 There is not real reason why you can't take snapshot more often than
592 once a day. Actually, if you are using B<psinib> to backup Windows
593 workstations you already know that they tend to come-and-go during the day
594 (reboots probably ;-), so running B<psinib> several times a day increases
595 your chance of having up-to-date backup (B<psinib> will not make multiple
596 snapshots for same day, nor will it update snapshot for current day if
597 it already exists).
598
599 However, changing B<psinib> to produce snapshots which are, for example, hourly
600 is a simple change of C<$DIR_TIME_FMT> which is currently set to
601 C<'%Y%m%d'> (see I<strftime> documentation for explanation of that
602 format). If you change that to C<'%Y%m%d-%H> you can have hourly snapshots
603 (if your network is fast enough, that is...). Also, some of messages in
604 program will sound strange, but other than that it should work.
605 I<You have been warned>.
606
607 =head2 Do I really need to share every directory which I want to snapshot?
608
609 Actually, no. Due to usage of C<Filesys::SmbClient> module, you can also
610 specify sub-directory inside your share that you want to backup. This feature
611 is most useful if you want to use administrative shares (but, have in mind
612 that you have to enter your Win administrator password in unencrypted file on
613 disk to do that) like this:
614
615 smbmount //server/c$/WinNT/fonts /mnt -o username=administrator%win
616
617 After that you will get directories with snapshots like:
618
619 server/c_WinNT_fonts/yyyymmdd/....
620
621 =head2 Won't I run out of disk space?
622
623 Of course you will... Snapshots and logfiles will eventually fill-up your disk.
624 However, you can do two things to stop that:
625
626 =head3 Clean snapshort older than x days
627
628 You can add following command to your C<root> crontab:
629
630 find /backup/isis_backup -type d -mindepth 3 -maxdepth 3 -mtime +11 -exec rm -Rf {} \;
631
632 I assume that C</backup/isis_backup> is directory in which are your snapshots
633 and that you don't want to keep snapshots older than 11 days (that's
634 C<-mtime +11> part of command).
635
636 =head3 Rotate your logs
637
638 I will leave that to you. I relay on GNU/Debian's C<logrotate> to do it for me.
639
640 =head2 What are I<YYYYMMDD.partial> directories?
641
642 If there isn't I<latest> symlink in snapshot directory, it's preatty safe to
643 assume that previous backup from that day failed. So, that directory will
644 be renamed to I<YYYYMMDD.partial> and snapshot will be performed again,
645 linking same files (other alternative would be to erase that dir and find
646 second-oldest directory, but this seemed like more correct approach).
647
648 =head1 AUTHOR
649
650 Dobrica Pavlinusic <dpavlin@rot13.org>
651
652 L<http://www.rot13.org/~dpavlin/>
653
654 =head1 LICENSE
655
656 This product is licensed under GNU Public License (GPL) v2 or later.
657
658 =cut

  ViewVC Help
Powered by ViewVC 1.1.26