#!/usr/bin/python3 """A simple backup solution.""" __version__ = "0.1" __author__ = "Stefan Huber" import datetime import os, shutil, sys import configparser import hashlib import subprocess, fcntl, select import random, re import logging Modes = ["full", "incr", "diff"] class Epoch: units = { "hour" : datetime.timedelta(0, 3600), "day" : datetime.timedelta(1), "week" : datetime.timedelta(7), "month" : datetime.timedelta(31), "year" : datetime.timedelta(365) } def __init__(self, unit=None, mult=1, mode="full", numkeeps=None): self.unit = unit self.mult = mult self.mode = mode self.numkeeps = numkeeps self.excludes = [] def __repr__(self): return "[unit: " + repr(self.unit) + \ ", mult:" + repr(self.mult) + \ ", mode: " + repr(self.mode) + \ ", numkeeps: " + repr(self.numkeeps) + \ ", excludes: " + repr(self.excludes) + "]" def getTimeDelta(self): if self.unit == None: return None return self.mult*Epoch.units[self.unit] def isRipe(self, oldest, now): if self.unit==None: return True delta = now-oldest mult = self.mult if delta >= self.getTimeDelta(): return True if self.unit == "hour": return abs(now.hour - oldest.hour) >= mult elif self.unit == "day": return abs(now.day - oldest.day) >= mult elif self.unit == "week": return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult elif self.unit == "month": return abs(now.month - oldest.month) >= mult elif self.unit == "year": return abs(now.year - oldest.year) >= mult return None @staticmethod def parseTimedelta( deltastr ): tokens = [ s.strip() for s in deltastr.split("*") ] unit = None mult = 1 if len(tokens) == 1: unit = tokens[0] elif len(tokens) == 2: mult = int(tokens[0]) unit = tokens[1] else: raise ValueError("Invalid format: '{0}'".format(deltastr)) if not unit in Epoch.units: raise ValueError("Unknown unit '{0}'".format(unit)) if mult <= 0: raise ValueError("Non-positive factor '{0}' given.".format(mult)) return mult, unit class FileSet: """A fileset has a name and a list of directories.""" def __init__(self, name, dirs, excludes): self.name = name self.dirs = dirs self.excludes = excludes def __repr__(self): return "[name: " + self.name + \ ", dirs: " + str(self.dirs) + \ ", excludes: " + str(self.excludes) + "]" class Backup: """A single backup has a date, an epoch and a mode.""" def __init__(self, date, epoch, mode): self.date = date self.epoch = epoch self.mode = mode self.excludes = [] @staticmethod def fromDirName(dirname): [strdate, strtime, epoch, mode] = dirname.split("-") if not mode in Modes: raise ValueError("Invalid mode: " + mode) date = datetime.datetime(int(strdate[0:4]), int(strdate[4:6]), int(strdate[6:8]),\ int(strtime[0:2]), int(strtime[2:4])) return Backup(date, epoch, mode) def __repr__(self): return "[date: " + self.date.ctime() + \ ", epoch: " + self.epoch + \ ", mode: " + self.mode + "]" def colAlignedString(self): age = datetime.datetime.now() - self.date total_hours = age.total_seconds()/3600 if total_hours <= 48: agestr = "(%s h)" % int(total_hours) else: agestr = "(%s d)" % age.days return "%16s %7s %10s %4s" % ( self.date.strftime("%Y-%m-%d %H:%M"), agestr, self.epoch, self.mode) @staticmethod def getDirName(date, epoch, mode): """Get directory name of backup by given properties.""" return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode @staticmethod def isBackupDir(dirname): """Is directory a backup directory?""" p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$') return p.match(dirname) class Config: """Encapsules the configuration for the backup program.""" class ReadError(RuntimeError): """An exception raised when reading configurations.""" def __init__(self, value): self.value = value self.message = value formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ] # Filename where checksum of config is saved checksumfn = "checksum" def __init__(self): self.directory = None self.format = self.formats[0] self.excludes = [] self.sets = [] self.checksum = None self.lastchecksum = None self.epochs = Epochs = { "sporadic" : Epoch() } def __repr__(self): return "[directory: " + self.directory + \ ", format: " + self.format + \ ", excludes: " + repr(self.excludes) + \ ", epochs: " + repr(self.epochs) + \ ", sets: " + repr(self.sets) + "]" def getRealEpochsSorted(self): """Return all epochs with have a non-None unit, sorted by Epoch.getTimeDelta(), starting with the longest dela.""" epochs = self.epochs realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ] deltakey = lambda e: epochs[e].getTimeDelta() realepochs.sort(key=deltakey, reverse=True) return realepochs def _read_destination(self, config, sec): for opt in config.options(sec): if opt=="directory": self.directory = config.get(sec, opt) if not os.path.isdir(self.directory): raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory)) elif opt=="format": self.format = config.get(sec, opt) if not self.format in Config.formats: raise Config.ReadError("Invalid 'format' given.") else: raise Config.ReadError("Unknown option '{0}'.".format(opt)) def _read_global(self, config, sec): for opt in config.options(sec): if opt.startswith("exclude"): self.excludes += [ config.get(sec, opt) ] else: raise Config.ReadError("Unknown option '{0}'.".format(opt)) def _read_epoch(self, config, sec): name = sec[6:].strip() e = Epoch() if name in self.epochs: raise Config.ReadError("Epoch '{0}' already defined.".format(name)) if name in Epoch.units: e.unit = name for opt in config.options(sec): if opt=="numkeeps": try: e.numkeeps = int(config.getint(sec, opt)) except ValueError: raise Config.ReadError("Invalid integer given for '{0}'.".format(opt)) if e.numkeeps <= 0: raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps)) elif opt=="mode": e.mode = config.get(sec, opt) if not e.mode in Modes: raise Config.ReadError("Invalid mode '{0}'.".format(e.mode)) elif opt=="timedelta": if name in Epoch.units: raise Config.ReadError("The time delta of a standard epoch " + \ "is not supposed to be redefined. ") td = config.get(sec,opt) try: mult, unit = Epoch.parseTimedelta(td) e.unit = unit e.mult = mult except ValueError as e: raise Config.ReadError("Invalid timedelta '{0}': {1}".format(td, str(e))) elif opt.startswith("exclude"): e.excludes += [config.get(sec, opt)] else: raise Config.ReadError("Unknown option '" + opt + "'.") if e.numkeeps == None: raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name)) self.epochs[name] = e def _read_set(self, config, sec): name = sec[4:].strip() dirs = [] excludes = [] for opt in config.options(sec): if opt.startswith("dir"): dirs += [config.get(sec, opt)] elif opt.startswith("exclude"): excludes += [config.get(sec,opt)] else: raise Config.ReadError("Unknown option '" + opt + "'.") self.sets += [FileSet(name, dirs, excludes)] def read(self, filename): """Read configuration from file""" if not os.path.isfile(filename): raise Config.ReadError("Cannot read config file '" + filename + "'.") config = configparser.RawConfigParser() config.read(filename) for reqsec in ["destination"]: if not config.has_section(reqsec): raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.") for sec in config.sections(): if sec=="destination": self._read_destination(config, sec) elif sec=="global": self._read_global(config, sec) elif sec.startswith("epoch "): self._read_epoch(config, sec) elif sec.startswith("set "): self._read_set(config, sec) else: raise Config.ReadError("Unknown section '" + sec + "'.") if self.directory == None: raise Config.ReadError("No destination directory set.") # Compute checksum of config file m = hashlib.sha1() f = open(filename, 'rb') try: m.update(f.read()) self.checksum = m.hexdigest() finally: f.close() try: f = open(os.path.join(self.directory, self.checksumfn), 'r') self.lastchecksum = f.read().strip() f.close() except IOError: self.lastchecksum = None class BackupManager: """List and create backups""" def __init__(self, conffn): self.conf = Config() self.conf.read(conffn) def listAllDirs(self): """List all dirs in destination directory""" # Get all entries basedir = self.conf.directory dirs = os.listdir(basedir) # Filter directories return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ] def listOldBackups(self): """Returns a list of old backups.""" backups = [] for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]: backups += [ Backup.fromDirName(entry) ] return backups def getDesiredEpochs(self, backups, now): """Get desired epoch based on self.configuration and list of old backups""" # Find the longest epoch for which we would like the make a backup latest = datetime.datetime(1900, 1, 1) for e in self.conf.getRealEpochsSorted(): epoch = self.conf.epochs[e] if epoch.numkeeps <= 0: continue # Get backups of that epoch byepoch = list(sorted( [ b for b in backups if b.epoch==e], \ key=lambda b: b.date)) # If there are any, determine the latest if len(byepoch) > 0: latest = max(latest, byepoch[-1].date ) if epoch.isRipe(latest, now): return e # No backup is to be made return None def backupFileSet(self, fileset, targetdir, excludes, since=None): """Create an archive for given fileset at given target directory.""" logfile = logging.getLogger('backuplog') logfile.info("Running file set: " + fileset.name) tarpath = "/bin/tar" fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format taropts = [] # Add the since date, if given if since != None: taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")] # Add the exclude patterns for pat in excludes: taropts += ["--exclude", pat] #Add exclude patterns from fileset for pat in fileset.excludes: taropts += ["--exclude", pat] # Adding directories to backup taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs] # Launch the tar process tarargs = [tarpath] + ["-cpvaf", fsfn] + taropts logfile.debug("tar call: " + " ".join(tarargs)) tarp = subprocess.Popen( tarargs, bufsize=-1, \ stdout=subprocess.PIPE, stderr=subprocess.PIPE ) # Change tarp's stdout and stderr to non-blocking for s in [tarp.stdout, tarp.stderr]: fd = s.fileno() fl = fcntl.fcntl(fd, fcntl.F_GETFL) fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK) # Read stdout and stderr of tarp errmsg = b"" while tarp.poll() == None: rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05) if tarp.stdout in rd: logging.debug( tarp.stdout.readline()[:-1].decode() ) if tarp.stderr in rd: errmsg += tarp.stderr.read() # Get the remainging output of tarp for l in tarp.stdout.readlines(): logging.debug(l.decode().rstrip()) errmsg += tarp.stderr.read() # Get return code of tarp rett = tarp.wait() if rett != 0: for l in errmsg.decode().split("\n"): logfile.error(l) logfile.error(tarpath + " returned with exit status " + str(rett) + ".") def backup(self, epoch=None, mode=None): """Make a new backup, if necessary. If epoch is None then determine desired epoch automatically. Use given epoch otherwise. If mode is None then use mode for given epoch. Use given mode otherwise.""" now = datetime.datetime.now() oldbackups = self.listOldBackups() # Get epoch of backup if epoch == None: epoch = self.getDesiredEpochs(oldbackups, now) if epoch == None: logging.info("No backup planned.") return # Get mode of backup if mode == None: mode = self.conf.epochs[epoch].mode logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode) oldfullbackups = [ b for b in oldbackups if b.mode == "full" ] # No old full backups existing if mode != "full" and len(oldfullbackups)==0: logging.info("No full backups existing. Making a full backup.") # Checksum changed -> self.config file changed if self.conf.checksum != self.conf.lastchecksum and mode != "full": logging.warning("Full backup recommended as config file has changed.") # If we have a full backup, we backup everything since = None if mode == "diff": since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date elif mode == "incr": since = sorted(oldbackups, key=lambda b: b.date)[-1].date if since != None: logging.debug("Making backup relative to " + since.ctime()) yesno = self.ask_user_yesno("Proceed? [Y, n] ") if yesno == "n": return # Create new target directory basedir = self.conf.directory dirname = Backup.getDirName(now, epoch, mode) tmpdirname = dirname + ("-%x" % (random.random()*2e16) ) targetdir = os.path.join(basedir, tmpdirname) os.mkdir( targetdir ) # Add file logger logfile = logging.getLogger("backuplog") fil = logging.FileHandler( os.path.join(targetdir, "log") ) fil.setLevel(logging.DEBUG) logfile.addHandler(fil) logfile.info("Started: " + now.ctime()) # Backup all file sets for s in self.conf.sets: excludes = self.conf.excludes + self.conf.epochs[epoch].excludes self.backupFileSet(s, targetdir, excludes, since) logfile.info("Stopped: " + datetime.datetime.now().ctime()) # Rename backup directory to final name os.rename( targetdir, os.path.join(basedir, dirname) ) # We made a full backup -- recall checksum of config if mode == "full": f = open( os.path.join(basedir, self.conf.checksumfn), "w") f.write( self.conf.checksum ) f.close() def prune(self): """Prune old backup files""" allDirs = sorted(self.listAllDirs()) # Collect all directories not matching backup name removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ] # Get all directories which are kept backups = self.listOldBackups() keepdirs = [] byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \ key=lambda b : b.date, reverse=True)) for e in self.conf.getRealEpochsSorted() } for e in byepoch: epoch = self.conf.epochs[e] old = byepoch[e][epoch.numkeeps:] removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old] logging.info("List of stale/outdated entries:") for d in allDirs: msg = "" if d in removeDirs: msg = "[*] " else: msg = "[ ] " if Backup.isBackupDir(d): msg += Backup.fromDirName(d).colAlignedString() else: msg += d logging.info(msg) # Check that dirs to be removed is in list of all dirs for d in removeDirs: assert( d in allDirs ) if len(removeDirs) == 0: logging.info("No stale/outdated entries to remove.") return basedir = self.conf.directory yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ") if yesno == "y": for d in removeDirs: try: shutil.rmtree(os.path.join(basedir, d)) except OSError as e: logging.error("Error when removing '%s': %s" % (d,e.strerror) ) def ask_user_yesno(self, question): if LogConf.con.level <= logging.INFO: return input(question) else: return "y" def printUsage(): """Print --help text""" print("shbackup - a simple backup solution.") print("") print("Usage:") print(" " + sys.argv[0] + " {options} [cmd]") print(" " + sys.argv[0] + " --help") print("") print("Commands:") print(" backup make a new backup, if necessary") print(" list list all backups (default)") print(" prune prune outdated/old backups") print("") print("Options:") print(" -h, --help print this usage text") print(" -c, --conf use given configuration file") print(" default: /etc/shbackup.conf") print(" -e, --epoch force to create backup for given epoch:") print(" year, month, week, day, hour, sporadic") print(" -m, --mode override mode: full, diff, or incr") print(" -v, --verbose be more verbose and interact with user") print(" --verbosity LEVEL set verbosity to LEVEL, which can be") print(" error, warning, info, debug") print(" -V, --version print version info") class LogConf: """Encapsulates logging configuration""" con = logging.StreamHandler(sys.stderr) @classmethod def setup(cls): """Setup logging system""" conlog = logging.getLogger() conlog.setLevel(logging.DEBUG) cls.con.setLevel(logging.WARNING) conlog.addHandler(cls.con) fillog = logging.getLogger("backuplog") fillog.setLevel(logging.DEBUG) if __name__ == "__main__": LogConf.setup() conffn = "/etc/shbackup.conf" cmd = "list" mode = None epoch = None i = 0 while i < len(sys.argv)-1: i += 1 opt = sys.argv[i] if opt in ["-h", "--help"]: printUsage() exit(0) elif opt in ["-c", "--conf"]: i += 1 conffn = sys.argv[i] elif opt in ["-V", "--version"]: print("shbackup " + __version__) exit(0) elif opt in ["-v", "--verbose"]: LogConf.con.setLevel(logging.INFO) elif opt in ["--verbosity"]: i += 1 level = sys.argv[i] numlevel = getattr(logging, level.upper(), None) if not isinstance(numlevel, int): raise ValueError('Invalid verbosity level: %s' % level) LogConf.con.setLevel(numlevel) elif opt in ["-m", "--mode"]: i += 1 mode = sys.argv[i] if not mode in Modes: logging.error("Unknown mode '" + mode + "'.") exit(1) elif opt in ["-e", "--epoch"]: i += 1 epoch = sys.argv[i] elif opt in ["backup", "list", "prune"]: cmd = opt else: logging.error("Unknown option: " + opt) exit(1) try: man = BackupManager(conffn) logging.debug("Config: " + str(man.conf)) if epoch!=None and not epoch in man.conf.epochs.keys(): logging.error("Unknown epoch '" + epoch + "'.") exit(1) if cmd == "backup": man.backup(epoch, mode) if cmd == "list": for b in sorted(man.listOldBackups(), key=lambda b: b.date): print(b.colAlignedString()) if cmd == "prune": man.prune() except (Config.ReadError, configparser.Error) as e: logging.error("Error: " + e.message)