From: Stefan Huber Date: Sun, 13 May 2012 16:22:37 +0000 (+0200) Subject: Initial commit X-Git-Tag: v1.0~21 X-Git-Url: https://git.sthu.org/?p=sitarba.git;a=commitdiff_plain;h=391d27ec791eed92fa8baa8a398245bf15684680;ds=inline Initial commit --- 391d27ec791eed92fa8baa8a398245bf15684680 diff --git a/shbackup.py b/shbackup.py new file mode 100755 index 0000000..0aedc6f --- /dev/null +++ b/shbackup.py @@ -0,0 +1,366 @@ +#!/usr/bin/python3 +"""Stefan Huber's simplistic backup solution.""" + +import datetime +import os, shutil, sys +import configparser +import hashlib +import subprocess +import random, re + + +class Config: + """Encapsules the configuration for the backup program.""" + + class ReadException(Exception): + """An exception raised when reading configurations.""" + pass + + class FileSet: + """A fileset has a name and a list of directories.""" + def __init__(self, name, dirs): + self.name = name + self.dirs = dirs + + def __str__(self): + return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]" + + formats = ["tar.gz", "tar.bz2", "tar.xz" ] + + # Filename where checksum of config is saved + checksumfn = "checksum" + + def __init__(self): + self.directory = "/media/backup" + self.format = self.formats[0] + self.epochkeeps = { k : 0 for k in Epoch.keys() } + self.epochmodes = { k : "full" for k in Epoch.keys() } + self.exclpatterns = [] + self.sets = [] + self.checksum = None + self.lastchecksum = None + + def __str__(self): + return "[directory: " + self.directory + \ + ", format: " + self.format + \ + ", keeps: " + str(self.epochkeeps) + \ + ", modes: " + str(self.epochmodes) + \ + ", exclpatterns: " + str(self.exclpatterns) + \ + ", sets: " + str([str(s) for s in self.sets]) + "]"; + + def read(self, filename): + """Read configuration from file""" + + + config = configparser.RawConfigParser() + config.read(filename) + + for reqsec in ["destination"]: + if not config.has_section(reqsec): + raise Config.ReadException("Section '" + reqsec + "' is missing.") + + self.directory = config.get("destination", "directory") + + self.format = config.get("destination", "format") + if not self.format in Config.formats: + raise Config.ReadException("Invalid 'format' given.") + + + if config.has_section("history"): + for opt in config.options("history"): + if opt.startswith("keep"): + epoch = opt[4:] + if not epoch in Epoch.keys(): + raise Config.ReadException("Invalid option 'keep" + epoch + "'.") + self.epochkeeps[epoch] = int(config.getint("history", opt)) + elif opt.startswith("mode"): + epoch = opt[4:] + if not epoch in Epoch.keys(): + raise Config.ReadException("Invalid option 'mode" + epoch + "'.") + self.epochmodes[epoch] = config.get("history", opt) + if not self.epochmodes[epoch] in Mode: + raise Config.ReadException("Invalid mode given.") + else: + raise Config.ReadException("Invalid option '" + opt + "'.") + + if config.has_section("input"): + for opt in config.options("input"): + if opt.startswith("exclude"): + self.exclpatterns += [ config.get("input", opt) ] + else: + raise Config.ReadException("Invalid option '" + opt + "'.") + + for sec in config.sections(): + if sec in ["destination", "history", "input"]: + continue + elif sec.startswith("set "): + name = sec[4:].strip() + dirs = [] + + for opt in config.options(sec): + if not opt.startswith("dir"): + raise Config.ReadException("Unknown option '" + opt + "'.") + else: + dirs += [config.get(sec, opt)] + self.sets += [Config.FileSet(name, dirs)] + else: + raise Config.ReadException("Unknown section '" + sec + "'.") + + # Compute checksum of config file + m = hashlib.sha1() + f = open(filename, 'rb') + try: + m.update(f.read()) + self.checksum = m.hexdigest() + finally: + f.close() + + try: + f = open(os.path.join(self.directory, self.checksumfn), 'r') + self.lastchecksum = f.read().strip() + f.close() + except IOError: + self.lastchecksum = None + + +Mode = ["full", "incr", "diff"] + +Epoch = { "hour" : datetime.timedelta(0, 3600), \ + "day" : datetime.timedelta(1), \ + "week" : datetime.timedelta(7), \ + "month" : datetime.timedelta(30), \ + "year" : datetime.timedelta(365) } + +class Backup: + """A single backup has a date, an epoch and a mode.""" + + def __init__(self, date, epoch, mode): + self.date = date + self.epoch = epoch + self.mode = mode + + def __str__(self): + return "[date: " + self.date.ctime() + \ + ", epoch: " + self.epoch + \ + ", mode: " + self.mode + "]" + + @staticmethod + def getDirName(date, epoch, mode): + """Get directory name of backup by given properties.""" + return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode + + @staticmethod + def isBackupDir(dirname): + """Is directory a backup directory?""" + p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$') + return p.match(dirname) + + +class BackupManager: + """List and create backups""" + + def __init__(self, conffn): + self.conf = Config() + self.conf.read(conffn) + + + def listAllDirs(self): + """List all dirs in destination directory""" + + # Get all entries + basedir = self.conf.directory + dirs = os.listdir(basedir) + # Filter directories + return filter( lambda d: os.path.isdir(os.path.join(basedir, d)), dirs) + + def listOldBackups(self): + """Returns a list of old backups.""" + + backups = [] + + for entry in filter(Backup.isBackupDir, self.listAllDirs()): + [strdate, strtime, epoch, mode] = entry.split("-") + + if not epoch in Epoch.keys(): + raise ValueError("Invalid epoch: " + epoch) + + if not mode in Mode: + raise ValueError("Invalid mode: " + mode) + + date = datetime.datetime(int(strdate[0:4]), + int(strdate[4:6]), int(strdate[6:8]),\ + int(strtime[0:2]), int(strtime[2:4])) + backups += [ Backup(date, epoch, mode) ] + + return backups + + + def getDesiredEpoch(self, backups, now): + """Get desired epoch based on self.configuration and list of old backups""" + + # Find the longest epoch for which we would like the make a backup + latest = datetime.datetime(1900, 1, 1) + for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in Epoch ] )): + # We make backups of that epoch + if self.conf.epochkeeps[e] == 0: + continue + + # Get backups of that epoch + byepoch = list(sorted(filter( lambda b: b.epoch==e, backups), \ + key=lambda b: b.date)) + + # If there are any, determine the latest + if len(byepoch) > 0: + latest = max(latest, byepoch[-1].date ) + + # the latest backup is too old + if now-latest > timespan: + return e + + # No backup is to be made + return None + + + + def backupFileSet(self, fileset, targetdir, since=None): + """Create an archive for given fileset at given target directory.""" + + print("Running file set: " + fileset.name) + tarpath = "/bin/tar" + fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format + + taropts = ["-cpva"] + + if since != None: + taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")] + + for pat in self.conf.exclpatterns: + taropts += ["--exclude", pat] + + tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs + print("tarargs: ", tarargs) + tarp = subprocess.Popen( tarargs, \ + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + while tarp.poll(): + l = tarp.stdout.readline() + if len(l) > 0: + print(l.decode(), end="") + l = tarp.stderr.readline() + if len(l) > 0: + print(l.decode(), end="") + + for l in tarp.stdout.readlines(): + print(l.decode(), end="") + + for l in tarp.stderr.readlines(): + print(l.decode(), end="") + + rett = tarp.wait() + if rett != 0: + print(tarpath + " returned with exit status " + str(rett) + ":") + print( tarp.stderr.read().decode() ) + + + def backup(self): + """Make a new backup, if necessary""" + + now = datetime.datetime.now() + oldbackups = self.listOldBackups() + epoch = self.getDesiredEpoch(oldbackups, now) + + if epoch == None: + print("No backup planned.") + return + + + # Get mode of backup + mode = self.conf.epochmodes[epoch] + print("Making a backup. Epoch: " + epoch + ", mode: " + mode) + + oldfullbackups = list(filter(lambda b: b.mode=="full", oldbackups)) + + # No old full backups existing + if mode != "full" and len(oldfullbackups)==0: + print("No full backups existing. Making a full backup.") + + # Checksum changed -> self.config file changed + if self.conf.checksum != self.conf.lastchecksum: + print("Config file changed since last time.") + if mode != "full": + print("** Warning: full backup recommended!") + + # Create new target directory + basedir = self.conf.directory + dirname = Backup.getDirName(now, epoch, mode) + tmpdirname = dirname + ("-%x" % (random.random()*2e16) ) + targetdir = os.path.join(basedir, tmpdirname) + os.mkdir( targetdir ) + + # If we have a full backup, we backup everything + since = None + + # Get latest full backup time + if mode == "diff": + since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date + # Get latest backup time + elif mode == "incr": + since = sorted(oldbackups, key=lambda b: b.date)[-1].date + + # Backup all file sets + for s in self.conf.sets: + self.backupFileSet(s, targetdir, since) + + # Rename backup directory to final name + os.rename( targetdir, os.path.join(basedir, dirname) ) + + # We made a full backup -- recall checksum of config + if mode == "full": + f = open( os.path.join(basedir, self.conf.checksumfn), "w") + f.write( self.conf.checksum ) + f.close() + + + def prune(self): + """Prune old backup files""" + + noBackupDir = lambda d: not Backup.isBackupDir(d) + dirs = list(filter(noBackupDir, self.listAllDirs())) + + backups = self.listOldBackups() + byepoch = { e : list(reversed(sorted(filter(lambda b: b.epoch==e, backups), \ + key=lambda b : b.date))) for e in Epoch } + for e in byepoch: + keep = self.conf.epochkeeps[e] + old = byepoch[e][keep:] + dirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old] + + if len(dirs) == 0: + print("No stale/outdated entries to remove.") + return + + print("List of stale/outdated entries:") + for d in dirs: + print(" " + d) + + basedir = self.conf.directory + yesno = input("Remove listed entries? [y,N] ") + if yesno == "y": + for d in dirs: + shutil.rmtree(os.path.join(basedir,d)) + + +if __name__ == "__main__": + + conffn = "shbackup.conf" + + if len(sys.argv) > 1: + conffn = sys.argv[1] + + man = BackupManager(conffn) + man.backup() + man.prune() + + + +