X-Git-Url: https://git.sthu.org/?p=sitarba.git;a=blobdiff_plain;f=sitarba;fp=sitarba;h=e57e2508500fe22a966357413cb6d398c73a9f20;hp=0000000000000000000000000000000000000000;hb=a585f311a72f239ecc7d87218a0a35978d11133e;hpb=49e5f540ea30ec5a28a98dc560a34cfcace55d2f diff --git a/sitarba b/sitarba new file mode 100755 index 0000000..e57e250 --- /dev/null +++ b/sitarba @@ -0,0 +1,708 @@ +#!/usr/bin/python3 +"""A simple backup solution.""" + +__version__ = "2.0" +__author__ = "Stefan Huber" + +import datetime +import os, shutil, sys +import configparser +import hashlib +import subprocess, fcntl, select +import random, re +import logging + + +Modes = ["full", "incr", "diff"] + +class Epoch: + + units = { + "hour" : datetime.timedelta(0, 3600), + "day" : datetime.timedelta(1), + "week" : datetime.timedelta(7), + "month" : datetime.timedelta(31), + "year" : datetime.timedelta(365) } + + def __init__(self, unit=None, mult=1, mode="full", numkeeps=None): + self.unit = unit + self.mult = mult + self.mode = mode + self.numkeeps = numkeeps + self.excludes = [] + + def __repr__(self): + return "[unit: " + repr(self.unit) + \ + ", mult:" + repr(self.mult) + \ + ", mode: " + repr(self.mode) + \ + ", numkeeps: " + repr(self.numkeeps) + \ + ", excludes: " + repr(self.excludes) + "]" + + def getTimeDelta(self): + if self.unit == None: + return None + return self.mult*Epoch.units[self.unit] + + def isRipe(self, oldest, now): + + if self.unit==None: + return True + + delta = now-oldest + mult = self.mult + + if delta >= self.getTimeDelta(): + return True + + if self.unit == "hour": + return abs(now.hour - oldest.hour) >= mult + elif self.unit == "day": + return abs(now.day - oldest.day) >= mult + elif self.unit == "week": + return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult + elif self.unit == "month": + return abs(now.month - oldest.month) >= mult + elif self.unit == "year": + return abs(now.year - oldest.year) >= mult + + return None + + + @staticmethod + def parseTimedelta( deltastr ): + tokens = [ s.strip() for s in deltastr.split("*") ] + unit = None + mult = 1 + if len(tokens) == 1: + unit = tokens[0] + elif len(tokens) == 2: + mult = int(tokens[0]) + unit = tokens[1] + else: + raise ValueError("Invalid format: '{0}'".format(deltastr)) + + if not unit in Epoch.units: + raise ValueError("Unknown unit '{0}'".format(unit)) + + if mult <= 0: + raise ValueError("Non-positive factor '{0}' given.".format(mult)) + + return mult, unit + + + +class FileSet: + """A fileset has a name and a list of directories.""" + def __init__(self, name, dirs, excludes): + self.name = name + self.dirs = dirs + self.excludes = excludes + + def __repr__(self): + return "[name: " + self.name + \ + ", dirs: " + str(self.dirs) + \ + ", excludes: " + str(self.excludes) + "]" + + +class Backup: + """A single backup has a date, an epoch and a mode.""" + + def __init__(self, date, epoch, mode): + self.date = date + self.epoch = epoch + self.mode = mode + self.excludes = [] + + @staticmethod + def fromDirName(dirname): + [strdate, strtime, epoch, mode] = dirname.split("-") + + if not mode in Modes: + raise ValueError("Invalid mode: " + mode) + + date = datetime.datetime(int(strdate[0:4]), + int(strdate[4:6]), int(strdate[6:8]),\ + int(strtime[0:2]), int(strtime[2:4])) + + return Backup(date, epoch, mode) + + def __repr__(self): + return "[date: " + self.date.ctime() + \ + ", epoch: " + self.epoch + \ + ", mode: " + self.mode + "]" + + def colAlignedString(self): + age = datetime.datetime.now() - self.date + total_hours = age.total_seconds()/3600 + if total_hours <= 48: + agestr = "(%s h)" % int(total_hours) + else: + agestr = "(%s d)" % age.days + return "%16s %7s %10s %4s" % ( + self.date.strftime("%Y-%m-%d %H:%M"), agestr, + self.epoch, self.mode) + + @staticmethod + def getDirName(date, epoch, mode): + """Get directory name of backup by given properties.""" + return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode + + @staticmethod + def isBackupDir(dirname): + """Is directory a backup directory?""" + p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$') + return p.match(dirname) + + + +class Config: + """Encapsules the configuration for the backup program.""" + + class ReadError(RuntimeError): + """An exception raised when reading configurations.""" + def __init__(self, value): + self.value = value + self.message = value + + + formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ] + + # Filename where checksum of config is saved + checksumfn = "checksum" + + def __init__(self): + self.backupdir = None + self.format = self.formats[1] + self.tarbin = "/bin/tar" + self.excludes = [] + self.sets = [] + self.checksum = None + self.lastchecksum = None + self.epochs = Epochs = { "sporadic" : Epoch() } + + + def __repr__(self): + return "[backupdir: " + self.backupdir + \ + ", format: " + self.format + \ + ", tarbin: " + self.tarbin + \ + ", excludes: " + repr(self.excludes) + \ + ", epochs: " + repr(self.epochs) + \ + ", sets: " + repr(self.sets) + "]" + + def getRealEpochsSorted(self): + """Return all epochs with have a non-None unit, sorted by + Epoch.getTimeDelta(), starting with the longest dela.""" + epochs = self.epochs + realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ] + deltakey = lambda e: epochs[e].getTimeDelta() + realepochs.sort(key=deltakey, reverse=True) + return realepochs + + + def _read_global(self, config, sec): + for opt in config.options(sec): + if opt=="backupdir": + self.backupdir = config.get(sec, opt) + if not os.path.isdir(self.backupdir): + raise Config.ReadError("Backupdir '{0}' does not exist.".format(self.backupdir)) + elif opt=="format": + self.format = config.get(sec, opt) + if not self.format in Config.formats: + raise Config.ReadError("Invalid 'format' given.") + elif opt=="tarbin": + self.tarbin = config.get(sec, opt) + if not os.path.isfile(self.tarbin): + raise Config.ReadError("Tar binary '{0}' does not exist.".format(self.tarbin)) + elif opt.startswith("exclude"): + self.excludes += [ config.get(sec, opt) ] + else: + raise Config.ReadError("Unknown option '{0}'.".format(opt)) + + + def _read_epoch(self, config, sec): + name = sec[6:].strip() + e = Epoch() + if name in self.epochs: + raise Config.ReadError("Epoch '{0}' already defined.".format(name)) + if name in Epoch.units: + e.unit = name + + for opt in config.options(sec): + if opt=="numkeeps": + try: + e.numkeeps = int(config.getint(sec, opt)) + except ValueError: + raise Config.ReadError("Invalid integer given for '{0}'.".format(opt)) + if e.numkeeps <= 0: + raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps)) + + elif opt=="mode": + e.mode = config.get(sec, opt) + if not e.mode in Modes: + raise Config.ReadError("Invalid mode '{0}'.".format(e.mode)) + + elif opt=="timespan": + if name in Epoch.units: + raise Config.ReadError("The time delta of a standard epoch " + \ + "is not supposed to be redefined. ") + td = config.get(sec,opt) + try: + mult, unit = Epoch.parseTimedelta(td) + e.unit = unit + e.mult = mult + except ValueError as e: + raise Config.ReadError("Invalid timespan '{0}': {1}".format(td, str(e))) + + elif opt.startswith("exclude"): + e.excludes += [config.get(sec, opt)] + + else: + raise Config.ReadError("Unknown option '" + opt + "'.") + + if e.numkeeps == None: + raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name)) + + self.epochs[name] = e + + + def _read_set(self, config, sec): + name = sec[4:].strip() + dirs = [] + excludes = [] + + for opt in config.options(sec): + if opt.startswith("dir"): + dirs += [config.get(sec, opt)] + elif opt.startswith("exclude"): + excludes += [config.get(sec,opt)] + else: + raise Config.ReadError("Unknown option '" + opt + "'.") + + self.sets += [FileSet(name, dirs, excludes)] + + + def read(self, filename): + """Read configuration from file""" + + if not os.path.isfile(filename): + raise Config.ReadError("Cannot read config file '" + filename + "'.") + + config = configparser.RawConfigParser() + config.read(filename) + + for reqsec in ["global"]: + if not config.has_section(reqsec): + raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.") + + for sec in config.sections(): + + if sec=="global": + self._read_global(config, sec) + + elif sec.startswith("epoch "): + self._read_epoch(config, sec) + + elif sec.startswith("set "): + self._read_set(config, sec) + + else: + raise Config.ReadError("Unknown section '" + sec + "'.") + + if self.backupdir == None: + raise Config.ReadError("No backup directory set.") + + + # Compute checksum of config file + m = hashlib.sha1() + f = open(filename, 'rb') + try: + m.update(f.read()) + self.checksum = m.hexdigest() + finally: + f.close() + + try: + f = open(os.path.join(self.backupdir, self.checksumfn), 'r') + self.lastchecksum = f.read().strip() + f.close() + except IOError: + self.lastchecksum = None + + +class BackupManager: + """List and create backups""" + + def __init__(self, conffn): + self.conf = Config() + self.conf.read(conffn) + + + def listAllDirs(self): + """List all dirs in backupdir""" + + # Get all entries + basedir = self.conf.backupdir + dirs = os.listdir(basedir) + # Filter directories + return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ] + + + def listOldBackups(self): + """Returns a list of old backups.""" + + backups = [] + + for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]: + backups += [ Backup.fromDirName(entry) ] + + return backups + + + def getDesiredEpochs(self, backups, now): + """Get desired epoch based on self.configuration and list of old backups""" + + # Find the longest epoch for which we would like the make a backup + latest = datetime.datetime(1900, 1, 1) + for e in self.conf.getRealEpochsSorted(): + epoch = self.conf.epochs[e] + if epoch.numkeeps <= 0: + continue + + # Get backups of that epoch + byepoch = list(sorted( [ b for b in backups if b.epoch==e], \ + key=lambda b: b.date)) + + # If there are any, determine the latest + if len(byepoch) > 0: + latest = max(latest, byepoch[-1].date ) + + if epoch.isRipe(latest, now): + return e + + # No backup is to be made + return None + + + + def backupFileSet(self, fileset, targetdir, excludes, since=None): + """Create an archive for given fileset at given target directory.""" + + logfile = logging.getLogger('backuplog') + logfile.info("Running file set: " + fileset.name) + + fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format + taropts = [] + + # Add the since date, if given + if since != None: + taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")] + + # Add the exclude patterns + for pat in excludes: + taropts += ["--exclude", pat] + + #Add exclude patterns from fileset + for pat in fileset.excludes: + taropts += ["--exclude", pat] + + # Adding directories to backup + taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs] + + # Launch the tar process + tarargs = [self.conf.tarbin] + ["-cpvaf", fsfn] + taropts + logfile.debug("tar call: " + " ".join(tarargs)) + tarp = subprocess.Popen( tarargs, bufsize=-1, \ + stdout=subprocess.PIPE, stderr=subprocess.PIPE ) + + # Change tarp's stdout and stderr to non-blocking + for s in [tarp.stdout, tarp.stderr]: + fd = s.fileno() + fl = fcntl.fcntl(fd, fcntl.F_GETFL) + fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK) + + # Read stdout and stderr of tarp + errmsg = b"" + while tarp.poll() == None: + rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05) + if tarp.stdout in rd: + logging.debug( tarp.stdout.readline()[:-1].decode() ) + if tarp.stderr in rd: + errmsg += tarp.stderr.read() + + # Get the remainging output of tarp + for l in tarp.stdout.readlines(): + logging.debug(l.decode().rstrip()) + errmsg += tarp.stderr.read() + + # Get return code of tarp + rett = tarp.wait() + if rett != 0: + for l in errmsg.decode().split("\n"): + logfile.error(l) + logfile.error(self.conf.tarbin + " returned with exit status " + \ + str(rett) + ".") + + + def backup(self, epoch=None, mode=None): + """Make a new backup, if necessary. If epoch is None then determine + desired epoch automatically. Use given epoch otherwise. If mode is None + then use mode for given epoch. Use given mode otherwise.""" + + now = datetime.datetime.now() + oldbackups = self.listOldBackups() + + # Get epoch of backup + if epoch == None: + epoch = self.getDesiredEpochs(oldbackups, now) + if epoch == None: + logging.info("No backup planned.") + return + + # Get mode of backup + if mode == None: + mode = self.conf.epochs[epoch].mode + logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode) + + oldfullbackups = [ b for b in oldbackups if b.mode == "full" ] + + # No old full backups existing + if mode != "full" and len(oldfullbackups)==0: + logging.info("No full backups existing. Making a full backup.") + + # Checksum changed -> self.config file changed + if self.conf.checksum != self.conf.lastchecksum and mode != "full": + logging.warning("Full backup recommended as config file has changed.") + + + # If we have a full backup, we backup everything + since = None + if mode == "diff": + since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date + elif mode == "incr": + since = sorted(oldbackups, key=lambda b: b.date)[-1].date + + if since != None: + logging.debug("Making backup relative to " + since.ctime()) + + yesno = self.ask_user_yesno("Proceed? [Y, n] ") + if yesno == "n": + return + + # Create new backup directory + basedir = self.conf.backupdir + dirname = Backup.getDirName(now, epoch, mode) + tmpdirname = dirname + ("-%x" % (random.random()*2e16) ) + targetdir = os.path.join(basedir, tmpdirname) + os.mkdir( targetdir ) + + + # Add file logger + logfile = logging.getLogger("backuplog") + fil = logging.FileHandler( os.path.join(targetdir, "log") ) + fil.setLevel(logging.DEBUG) + logfile.addHandler(fil) + + logfile.info("Started: " + now.ctime()) + + # Backup all file sets + for s in self.conf.sets: + excludes = self.conf.excludes + self.conf.epochs[epoch].excludes + self.backupFileSet(s, targetdir, excludes, since) + + logfile.info("Stopped: " + datetime.datetime.now().ctime()) + + # Rename backup directory to final name + os.rename( targetdir, os.path.join(basedir, dirname) ) + + # We made a full backup -- recall checksum of config + if mode == "full": + f = open( os.path.join(basedir, self.conf.checksumfn), "w") + f.write( self.conf.checksum ) + f.close() + + + + def prune(self): + """Prune old backup files""" + + allDirs = sorted(self.listAllDirs()) + # Collect all directories not matching backup name + removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ] + + # Get all directories which are kept + backups = self.listOldBackups() + keepdirs = [] + byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \ + key=lambda b : b.date, reverse=True)) for e in self.conf.getRealEpochsSorted() } + for e in byepoch: + epoch = self.conf.epochs[e] + old = byepoch[e][epoch.numkeeps:] + removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old] + + + logging.info("List of stale/outdated entries:") + for d in allDirs: + msg = "" + if d in removeDirs: + msg = "[*] " + else: + msg = "[ ] " + + if Backup.isBackupDir(d): + msg += Backup.fromDirName(d).colAlignedString() + else: + msg += d + + logging.info(msg) + + # Check that dirs to be removed is in list of all dirs + for d in removeDirs: + assert( d in allDirs ) + + if len(removeDirs) == 0: + logging.info("No stale/outdated entries to remove.") + return + + basedir = self.conf.backupdir + yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ") + if yesno == "y": + for d in removeDirs: + try: + shutil.rmtree(os.path.join(basedir, d)) + except OSError as e: + logging.error("Error when removing '%s': %s" % (d,e.strerror) ) + + + def ask_user_yesno(self, question): + if LogConf.con.level <= logging.INFO: + return input(question) + else: + return "y" + + +def printUsage(): + """Print --help text""" + + print("shbackup - a simple backup solution.") + print("") + print("Usage:") + print(" " + sys.argv[0] + " {options} [cmd]") + print(" " + sys.argv[0] + " --help") + print("") + print("Commands:") + print(" backup make a new backup, if necessary") + print(" list list all backups (default)") + print(" prune prune outdated/old backups") + print("") + print("Options:") + print(" -h, --help print this usage text") + print(" -c, --conf FILE use given configuration file") + print(" default: /etc/shbackup.conf") + print(" -e, --epoch EPOCH force to create backup for given epoch, which") + print(" can be 'sporadic' or one of the configured epochs") + print(" -m, --mode MODE override mode: full, diff, or incr") + print(" -v, --verbose be more verbose and interact with user") + print(" --verbosity LEVEL set verbosity to LEVEL, which can be") + print(" error, warning, info, debug") + print(" -V, --version print version info") + + + +class LogConf: + """Encapsulates logging configuration""" + + con = logging.StreamHandler(sys.stderr) + + @classmethod + def setup(cls): + """Setup logging system""" + conlog = logging.getLogger() + conlog.setLevel(logging.DEBUG) + + cls.con.setLevel(logging.WARNING) + conlog.addHandler(cls.con) + + fillog = logging.getLogger("backuplog") + fillog.setLevel(logging.DEBUG) + + +if __name__ == "__main__": + + LogConf.setup() + + conffn = "/etc/shbackup.conf" + cmd = "list" + mode = None + epoch = None + + i = 0 + while i < len(sys.argv)-1: + i += 1 + opt = sys.argv[i] + + if opt in ["-h", "--help"]: + printUsage() + exit(0) + + elif opt in ["-c", "--conf"]: + i += 1 + conffn = sys.argv[i] + + elif opt in ["-V", "--version"]: + print("shbackup " + __version__) + exit(0) + + elif opt in ["-v", "--verbose"]: + LogConf.con.setLevel(logging.INFO) + + elif opt in ["--verbosity"]: + i += 1 + level = sys.argv[i] + numlevel = getattr(logging, level.upper(), None) + if not isinstance(numlevel, int): + raise ValueError('Invalid verbosity level: %s' % level) + LogConf.con.setLevel(numlevel) + + elif opt in ["-m", "--mode"]: + i += 1 + mode = sys.argv[i] + if not mode in Modes: + logging.error("Unknown mode '" + mode + "'.") + exit(1) + + elif opt in ["-e", "--epoch"]: + i += 1 + epoch = sys.argv[i] + + elif opt in ["backup", "list", "prune"]: + cmd = opt + + else: + logging.error("Unknown option: " + opt) + exit(1) + + try: + man = BackupManager(conffn) + + logging.debug("Config: " + str(man.conf)) + + if epoch!=None and not epoch in man.conf.epochs.keys(): + logging.error("Unknown epoch '" + epoch + "'.") + exit(1) + + if cmd == "backup": + man.backup(epoch, mode) + + if cmd == "list": + for b in sorted(man.listOldBackups(), key=lambda b: b.date): + print(b.colAlignedString()) + + if cmd == "prune": + man.prune() + + except (Config.ReadError, configparser.Error) as e: + logging.error("Error: " + e.message) + + + +