f8f8c26f324fe5cb4d830d553a086e8f0668f141
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """A simple backup solution."""
3
4 __version__ = "0.1"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess, fcntl, select
12 import random, re
13 import logging
14
15
16 Modes = ["full", "incr", "diff"]
17
18 class Epoch:
19
20 units = {
21 "hour" : datetime.timedelta(0, 3600),
22 "day" : datetime.timedelta(1),
23 "week" : datetime.timedelta(7),
24 "month" : datetime.timedelta(31),
25 "year" : datetime.timedelta(365) }
26
27 def __init__(self, unit=None, mult=1, mode="full", numkeeps=None):
28 self.unit = unit
29 self.mult = mult
30 self.mode = mode
31 self.numkeeps = numkeeps
32 self.excludes = []
33
34 def __repr__(self):
35 return "[unit: " + repr(self.unit) + \
36 ", mult:" + repr(self.mult) + \
37 ", mode: " + repr(self.mode) + \
38 ", numkeeps: " + repr(self.numkeeps) + \
39 ", excludes: " + repr(self.excludes) + "]"
40
41 def getTimeDelta(self):
42 if self.unit == None:
43 return None
44 return self.mult*Epoch.units[self.unit]
45
46 def isRipe(self, oldest, now):
47
48 if self.unit==None:
49 return True
50
51 delta = now-oldest
52 mult = self.mult
53
54 if delta >= self.getTimeDelta():
55 return True
56
57 if self.unit == "hour":
58 return abs(now.hour - oldest.hour) >= mult
59 elif self.unit == "day":
60 return abs(now.day - oldest.day) >= mult
61 elif self.unit == "week":
62 return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult
63 elif self.unit == "month":
64 return abs(now.month - oldest.month) >= mult
65 elif self.unit == "year":
66 return abs(now.year - oldest.year) >= mult
67
68 return None
69
70
71 @staticmethod
72 def parseTimedelta( deltastr ):
73 tokens = [ s.strip() for s in deltastr.split("*") ]
74 unit = None
75 mult = 1
76 if len(tokens) == 1:
77 unit = tokens[0]
78 elif len(tokens) == 2:
79 mult = int(tokens[0])
80 unit = tokens[1]
81 else:
82 raise ValueError("Invalid format: '{0}'".format(deltastr))
83
84 if not unit in Epoch.units:
85 raise ValueError("Unknown unit '{0}'".format(unit))
86
87 if mult <= 0:
88 raise ValueError("Non-positive factor '{0}' given.".format(mult))
89
90 return mult, unit
91
92
93
94 class FileSet:
95 """A fileset has a name and a list of directories."""
96 def __init__(self, name, dirs, excludes):
97 self.name = name
98 self.dirs = dirs
99 self.excludes = excludes
100
101 def __repr__(self):
102 return "[name: " + self.name + \
103 ", dirs: " + str(self.dirs) + \
104 ", excludes: " + str(self.excludes) + "]"
105
106
107 class Backup:
108 """A single backup has a date, an epoch and a mode."""
109
110 def __init__(self, date, epoch, mode):
111 self.date = date
112 self.epoch = epoch
113 self.mode = mode
114 self.excludes = []
115
116 @staticmethod
117 def fromDirName(dirname):
118 [strdate, strtime, epoch, mode] = dirname.split("-")
119
120 if not mode in Modes:
121 raise ValueError("Invalid mode: " + mode)
122
123 date = datetime.datetime(int(strdate[0:4]),
124 int(strdate[4:6]), int(strdate[6:8]),\
125 int(strtime[0:2]), int(strtime[2:4]))
126
127 return Backup(date, epoch, mode)
128
129 def __repr__(self):
130 return "[date: " + self.date.ctime() + \
131 ", epoch: " + self.epoch + \
132 ", mode: " + self.mode + "]"
133
134 def colAlignedString(self):
135 age = datetime.datetime.now() - self.date
136 total_hours = age.total_seconds()/3600
137 if total_hours <= 48:
138 agestr = "(%s h)" % int(total_hours)
139 else:
140 agestr = "(%s d)" % age.days
141 return "%16s %7s %10s %4s" % (
142 self.date.strftime("%Y-%m-%d %H:%M"), agestr,
143 self.epoch, self.mode)
144
145 @staticmethod
146 def getDirName(date, epoch, mode):
147 """Get directory name of backup by given properties."""
148 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
149
150 @staticmethod
151 def isBackupDir(dirname):
152 """Is directory a backup directory?"""
153 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
154 return p.match(dirname)
155
156
157
158 class Config:
159 """Encapsules the configuration for the backup program."""
160
161 class ReadError(RuntimeError):
162 """An exception raised when reading configurations."""
163 def __init__(self, value):
164 self.value = value
165 self.message = value
166
167
168 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
169
170 # Filename where checksum of config is saved
171 checksumfn = "checksum"
172
173 def __init__(self):
174 self.backupdir = None
175 self.format = self.formats[0]
176 self.tarbin = "/bin/tar"
177 self.excludes = []
178 self.sets = []
179 self.checksum = None
180 self.lastchecksum = None
181 self.epochs = Epochs = { "sporadic" : Epoch() }
182
183
184 def __repr__(self):
185 return "[backupdir: " + self.backupdir + \
186 ", format: " + self.format + \
187 ", tarbin: " + self.tarbin + \
188 ", excludes: " + repr(self.excludes) + \
189 ", epochs: " + repr(self.epochs) + \
190 ", sets: " + repr(self.sets) + "]"
191
192 def getRealEpochsSorted(self):
193 """Return all epochs with have a non-None unit, sorted by
194 Epoch.getTimeDelta(), starting with the longest dela."""
195 epochs = self.epochs
196 realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ]
197 deltakey = lambda e: epochs[e].getTimeDelta()
198 realepochs.sort(key=deltakey, reverse=True)
199 return realepochs
200
201
202 def _read_global(self, config, sec):
203 for opt in config.options(sec):
204 if opt=="backupdir":
205 self.backupdir = config.get(sec, opt)
206 if not os.path.isdir(self.backupdir):
207 raise Config.ReadError("Backupdir '{0}' does not exist.".format(self.backupdir))
208 elif opt=="format":
209 self.format = config.get(sec, opt)
210 if not self.format in Config.formats:
211 raise Config.ReadError("Invalid 'format' given.")
212 elif opt=="tarbin":
213 self.tarbin = config.get(sec, opt)
214 if not os.path.isfile(self.tarbin):
215 raise Config.ReadError("Tar binary '{0}' does not exist.".format(self.tarbin))
216 elif opt.startswith("exclude"):
217 self.excludes += [ config.get(sec, opt) ]
218 else:
219 raise Config.ReadError("Unknown option '{0}'.".format(opt))
220
221
222 def _read_epoch(self, config, sec):
223 name = sec[6:].strip()
224 e = Epoch()
225 if name in self.epochs:
226 raise Config.ReadError("Epoch '{0}' already defined.".format(name))
227 if name in Epoch.units:
228 e.unit = name
229
230 for opt in config.options(sec):
231 if opt=="numkeeps":
232 try:
233 e.numkeeps = int(config.getint(sec, opt))
234 except ValueError:
235 raise Config.ReadError("Invalid integer given for '{0}'.".format(opt))
236 if e.numkeeps <= 0:
237 raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps))
238
239 elif opt=="mode":
240 e.mode = config.get(sec, opt)
241 if not e.mode in Modes:
242 raise Config.ReadError("Invalid mode '{0}'.".format(e.mode))
243
244 elif opt=="timedelta":
245 if name in Epoch.units:
246 raise Config.ReadError("The time delta of a standard epoch " + \
247 "is not supposed to be redefined. ")
248 td = config.get(sec,opt)
249 try:
250 mult, unit = Epoch.parseTimedelta(td)
251 e.unit = unit
252 e.mult = mult
253 except ValueError as e:
254 raise Config.ReadError("Invalid timedelta '{0}': {1}".format(td, str(e)))
255
256 elif opt.startswith("exclude"):
257 e.excludes += [config.get(sec, opt)]
258
259 else:
260 raise Config.ReadError("Unknown option '" + opt + "'.")
261
262 if e.numkeeps == None:
263 raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name))
264
265 self.epochs[name] = e
266
267
268 def _read_set(self, config, sec):
269 name = sec[4:].strip()
270 dirs = []
271 excludes = []
272
273 for opt in config.options(sec):
274 if opt.startswith("dir"):
275 dirs += [config.get(sec, opt)]
276 elif opt.startswith("exclude"):
277 excludes += [config.get(sec,opt)]
278 else:
279 raise Config.ReadError("Unknown option '" + opt + "'.")
280
281 self.sets += [FileSet(name, dirs, excludes)]
282
283
284 def read(self, filename):
285 """Read configuration from file"""
286
287 if not os.path.isfile(filename):
288 raise Config.ReadError("Cannot read config file '" + filename + "'.")
289
290 config = configparser.RawConfigParser()
291 config.read(filename)
292
293 for reqsec in ["global"]:
294 if not config.has_section(reqsec):
295 raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.")
296
297 for sec in config.sections():
298
299 if sec=="global":
300 self._read_global(config, sec)
301
302 elif sec.startswith("epoch "):
303 self._read_epoch(config, sec)
304
305 elif sec.startswith("set "):
306 self._read_set(config, sec)
307
308 else:
309 raise Config.ReadError("Unknown section '" + sec + "'.")
310
311 if self.backupdir == None:
312 raise Config.ReadError("No backup directory set.")
313
314
315 # Compute checksum of config file
316 m = hashlib.sha1()
317 f = open(filename, 'rb')
318 try:
319 m.update(f.read())
320 self.checksum = m.hexdigest()
321 finally:
322 f.close()
323
324 try:
325 f = open(os.path.join(self.backupdir, self.checksumfn), 'r')
326 self.lastchecksum = f.read().strip()
327 f.close()
328 except IOError:
329 self.lastchecksum = None
330
331
332 class BackupManager:
333 """List and create backups"""
334
335 def __init__(self, conffn):
336 self.conf = Config()
337 self.conf.read(conffn)
338
339
340 def listAllDirs(self):
341 """List all dirs in backupdir"""
342
343 # Get all entries
344 basedir = self.conf.backupdir
345 dirs = os.listdir(basedir)
346 # Filter directories
347 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
348
349
350 def listOldBackups(self):
351 """Returns a list of old backups."""
352
353 backups = []
354
355 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
356 backups += [ Backup.fromDirName(entry) ]
357
358 return backups
359
360
361 def getDesiredEpochs(self, backups, now):
362 """Get desired epoch based on self.configuration and list of old backups"""
363
364 # Find the longest epoch for which we would like the make a backup
365 latest = datetime.datetime(1900, 1, 1)
366 for e in self.conf.getRealEpochsSorted():
367 epoch = self.conf.epochs[e]
368 if epoch.numkeeps <= 0:
369 continue
370
371 # Get backups of that epoch
372 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
373 key=lambda b: b.date))
374
375 # If there are any, determine the latest
376 if len(byepoch) > 0:
377 latest = max(latest, byepoch[-1].date )
378
379 if epoch.isRipe(latest, now):
380 return e
381
382 # No backup is to be made
383 return None
384
385
386
387 def backupFileSet(self, fileset, targetdir, excludes, since=None):
388 """Create an archive for given fileset at given target directory."""
389
390 logfile = logging.getLogger('backuplog')
391 logfile.info("Running file set: " + fileset.name)
392
393 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
394 taropts = []
395
396 # Add the since date, if given
397 if since != None:
398 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
399
400 # Add the exclude patterns
401 for pat in excludes:
402 taropts += ["--exclude", pat]
403
404 #Add exclude patterns from fileset
405 for pat in fileset.excludes:
406 taropts += ["--exclude", pat]
407
408 # Adding directories to backup
409 taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
410
411 # Launch the tar process
412 tarargs = [self.conf.tarbin] + ["-cpvaf", fsfn] + taropts
413 logfile.debug("tar call: " + " ".join(tarargs))
414 tarp = subprocess.Popen( tarargs, bufsize=-1, \
415 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
416
417 # Change tarp's stdout and stderr to non-blocking
418 for s in [tarp.stdout, tarp.stderr]:
419 fd = s.fileno()
420 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
421 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
422
423 # Read stdout and stderr of tarp
424 errmsg = b""
425 while tarp.poll() == None:
426 rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
427 if tarp.stdout in rd:
428 logging.debug( tarp.stdout.readline()[:-1].decode() )
429 if tarp.stderr in rd:
430 errmsg += tarp.stderr.read()
431
432 # Get the remainging output of tarp
433 for l in tarp.stdout.readlines():
434 logging.debug(l.decode().rstrip())
435 errmsg += tarp.stderr.read()
436
437 # Get return code of tarp
438 rett = tarp.wait()
439 if rett != 0:
440 for l in errmsg.decode().split("\n"):
441 logfile.error(l)
442 logfile.error(tarpath + " returned with exit status " + str(rett) + ".")
443
444
445 def backup(self, epoch=None, mode=None):
446 """Make a new backup, if necessary. If epoch is None then determine
447 desired epoch automatically. Use given epoch otherwise. If mode is None
448 then use mode for given epoch. Use given mode otherwise."""
449
450 now = datetime.datetime.now()
451 oldbackups = self.listOldBackups()
452
453 # Get epoch of backup
454 if epoch == None:
455 epoch = self.getDesiredEpochs(oldbackups, now)
456 if epoch == None:
457 logging.info("No backup planned.")
458 return
459
460 # Get mode of backup
461 if mode == None:
462 mode = self.conf.epochs[epoch].mode
463 logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode)
464
465 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
466
467 # No old full backups existing
468 if mode != "full" and len(oldfullbackups)==0:
469 logging.info("No full backups existing. Making a full backup.")
470
471 # Checksum changed -> self.config file changed
472 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
473 logging.warning("Full backup recommended as config file has changed.")
474
475
476 # If we have a full backup, we backup everything
477 since = None
478 if mode == "diff":
479 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
480 elif mode == "incr":
481 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
482
483 if since != None:
484 logging.debug("Making backup relative to " + since.ctime())
485
486 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
487 if yesno == "n":
488 return
489
490 # Create new backup directory
491 basedir = self.conf.backupdir
492 dirname = Backup.getDirName(now, epoch, mode)
493 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
494 targetdir = os.path.join(basedir, tmpdirname)
495 os.mkdir( targetdir )
496
497
498 # Add file logger
499 logfile = logging.getLogger("backuplog")
500 fil = logging.FileHandler( os.path.join(targetdir, "log") )
501 fil.setLevel(logging.DEBUG)
502 logfile.addHandler(fil)
503
504 logfile.info("Started: " + now.ctime())
505
506 # Backup all file sets
507 for s in self.conf.sets:
508 excludes = self.conf.excludes + self.conf.epochs[epoch].excludes
509 self.backupFileSet(s, targetdir, excludes, since)
510
511 logfile.info("Stopped: " + datetime.datetime.now().ctime())
512
513 # Rename backup directory to final name
514 os.rename( targetdir, os.path.join(basedir, dirname) )
515
516 # We made a full backup -- recall checksum of config
517 if mode == "full":
518 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
519 f.write( self.conf.checksum )
520 f.close()
521
522
523
524 def prune(self):
525 """Prune old backup files"""
526
527 allDirs = sorted(self.listAllDirs())
528 # Collect all directories not matching backup name
529 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
530
531 # Get all directories which are kept
532 backups = self.listOldBackups()
533 keepdirs = []
534 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
535 key=lambda b : b.date, reverse=True)) for e in self.conf.getRealEpochsSorted() }
536 for e in byepoch:
537 epoch = self.conf.epochs[e]
538 old = byepoch[e][epoch.numkeeps:]
539 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
540
541
542 logging.info("List of stale/outdated entries:")
543 for d in allDirs:
544 msg = ""
545 if d in removeDirs:
546 msg = "[*] "
547 else:
548 msg = "[ ] "
549
550 if Backup.isBackupDir(d):
551 msg += Backup.fromDirName(d).colAlignedString()
552 else:
553 msg += d
554
555 logging.info(msg)
556
557 # Check that dirs to be removed is in list of all dirs
558 for d in removeDirs:
559 assert( d in allDirs )
560
561 if len(removeDirs) == 0:
562 logging.info("No stale/outdated entries to remove.")
563 return
564
565 basedir = self.conf.backupdir
566 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
567 if yesno == "y":
568 for d in removeDirs:
569 try:
570 shutil.rmtree(os.path.join(basedir, d))
571 except OSError as e:
572 logging.error("Error when removing '%s': %s" % (d,e.strerror) )
573
574
575 def ask_user_yesno(self, question):
576 if LogConf.con.level <= logging.INFO:
577 return input(question)
578 else:
579 return "y"
580
581
582 def printUsage():
583 """Print --help text"""
584
585 print("shbackup - a simple backup solution.")
586 print("")
587 print("Usage:")
588 print(" " + sys.argv[0] + " {options} [cmd]")
589 print(" " + sys.argv[0] + " --help")
590 print("")
591 print("Commands:")
592 print(" backup make a new backup, if necessary")
593 print(" list list all backups (default)")
594 print(" prune prune outdated/old backups")
595 print("")
596 print("Options:")
597 print(" -h, --help print this usage text")
598 print(" -c, --conf <configfile> use given configuration file")
599 print(" default: /etc/shbackup.conf")
600 print(" -e, --epoch <epoch> force to create backup for given epoch, which")
601 print(" can be 'sporadic' or one of the configured epochs")
602 print(" -m, --mode <mode> override mode: full, diff, or incr")
603 print(" -v, --verbose be more verbose and interact with user")
604 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
605 print(" error, warning, info, debug")
606 print(" -V, --version print version info")
607
608
609
610 class LogConf:
611 """Encapsulates logging configuration"""
612
613 con = logging.StreamHandler(sys.stderr)
614
615 @classmethod
616 def setup(cls):
617 """Setup logging system"""
618 conlog = logging.getLogger()
619 conlog.setLevel(logging.DEBUG)
620
621 cls.con.setLevel(logging.WARNING)
622 conlog.addHandler(cls.con)
623
624 fillog = logging.getLogger("backuplog")
625 fillog.setLevel(logging.DEBUG)
626
627
628 if __name__ == "__main__":
629
630 LogConf.setup()
631
632 conffn = "/etc/shbackup.conf"
633 cmd = "list"
634 mode = None
635 epoch = None
636
637 i = 0
638 while i < len(sys.argv)-1:
639 i += 1
640 opt = sys.argv[i]
641
642 if opt in ["-h", "--help"]:
643 printUsage()
644 exit(0)
645
646 elif opt in ["-c", "--conf"]:
647 i += 1
648 conffn = sys.argv[i]
649
650 elif opt in ["-V", "--version"]:
651 print("shbackup " + __version__)
652 exit(0)
653
654 elif opt in ["-v", "--verbose"]:
655 LogConf.con.setLevel(logging.INFO)
656
657 elif opt in ["--verbosity"]:
658 i += 1
659 level = sys.argv[i]
660 numlevel = getattr(logging, level.upper(), None)
661 if not isinstance(numlevel, int):
662 raise ValueError('Invalid verbosity level: %s' % level)
663 LogConf.con.setLevel(numlevel)
664
665 elif opt in ["-m", "--mode"]:
666 i += 1
667 mode = sys.argv[i]
668 if not mode in Modes:
669 logging.error("Unknown mode '" + mode + "'.")
670 exit(1)
671
672 elif opt in ["-e", "--epoch"]:
673 i += 1
674 epoch = sys.argv[i]
675
676 elif opt in ["backup", "list", "prune"]:
677 cmd = opt
678
679 else:
680 logging.error("Unknown option: " + opt)
681 exit(1)
682
683 try:
684 man = BackupManager(conffn)
685
686 logging.debug("Config: " + str(man.conf))
687
688 if epoch!=None and not epoch in man.conf.epochs.keys():
689 logging.error("Unknown epoch '" + epoch + "'.")
690 exit(1)
691
692 if cmd == "backup":
693 man.backup(epoch, mode)
694
695 if cmd == "list":
696 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
697 print(b.colAlignedString())
698
699 if cmd == "prune":
700 man.prune()
701
702 except (Config.ReadError, configparser.Error) as e:
703 logging.error("Error: " + e.message)
704
705
706
707