X-Git-Url: https://git.sthu.org/?p=sitarba.git;a=blobdiff_plain;f=shbackup;h=e57e2508500fe22a966357413cb6d398c73a9f20;hp=351aef95d877e3f0d095289ab582e7619ef21811;hb=49e5f540ea30ec5a28a98dc560a34cfcace55d2f;hpb=dc452a7d1f4a5c28017febc8b4bbfc29e50709bd diff --git a/shbackup b/shbackup index 351aef9..e57e250 100755 --- a/shbackup +++ b/shbackup @@ -1,26 +1,107 @@ #!/usr/bin/python3 -"""Stefan Huber's simplistic backup solution.""" +"""A simple backup solution.""" + +__version__ = "2.0" +__author__ = "Stefan Huber" import datetime import os, shutil, sys import configparser import hashlib -import subprocess +import subprocess, fcntl, select import random, re +import logging + + +Modes = ["full", "incr", "diff"] +class Epoch: + + units = { + "hour" : datetime.timedelta(0, 3600), + "day" : datetime.timedelta(1), + "week" : datetime.timedelta(7), + "month" : datetime.timedelta(31), + "year" : datetime.timedelta(365) } + + def __init__(self, unit=None, mult=1, mode="full", numkeeps=None): + self.unit = unit + self.mult = mult + self.mode = mode + self.numkeeps = numkeeps + self.excludes = [] + + def __repr__(self): + return "[unit: " + repr(self.unit) + \ + ", mult:" + repr(self.mult) + \ + ", mode: " + repr(self.mode) + \ + ", numkeeps: " + repr(self.numkeeps) + \ + ", excludes: " + repr(self.excludes) + "]" + + def getTimeDelta(self): + if self.unit == None: + return None + return self.mult*Epoch.units[self.unit] + + def isRipe(self, oldest, now): + + if self.unit==None: + return True + + delta = now-oldest + mult = self.mult + + if delta >= self.getTimeDelta(): + return True + + if self.unit == "hour": + return abs(now.hour - oldest.hour) >= mult + elif self.unit == "day": + return abs(now.day - oldest.day) >= mult + elif self.unit == "week": + return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult + elif self.unit == "month": + return abs(now.month - oldest.month) >= mult + elif self.unit == "year": + return abs(now.year - oldest.year) >= mult -Mode = ["full", "incr", "diff"] + return None + + + @staticmethod + def parseTimedelta( deltastr ): + tokens = [ s.strip() for s in deltastr.split("*") ] + unit = None + mult = 1 + if len(tokens) == 1: + unit = tokens[0] + elif len(tokens) == 2: + mult = int(tokens[0]) + unit = tokens[1] + else: + raise ValueError("Invalid format: '{0}'".format(deltastr)) -RealEpoch = { \ - "hour" : datetime.timedelta(0, 3600), \ - "day" : datetime.timedelta(1), \ - "week" : datetime.timedelta(7), \ - "month" : datetime.timedelta(30), \ - "year" : datetime.timedelta(365) } + if not unit in Epoch.units: + raise ValueError("Unknown unit '{0}'".format(unit)) -Epoch = dict(RealEpoch, **{ \ - "sporadic" : datetime.timedelta(0,0) \ - }) + if mult <= 0: + raise ValueError("Non-positive factor '{0}' given.".format(mult)) + + return mult, unit + + + +class FileSet: + """A fileset has a name and a list of directories.""" + def __init__(self, name, dirs, excludes): + self.name = name + self.dirs = dirs + self.excludes = excludes + + def __repr__(self): + return "[name: " + self.name + \ + ", dirs: " + str(self.dirs) + \ + ", excludes: " + str(self.excludes) + "]" class Backup: @@ -30,15 +111,13 @@ class Backup: self.date = date self.epoch = epoch self.mode = mode + self.excludes = [] @staticmethod def fromDirName(dirname): [strdate, strtime, epoch, mode] = dirname.split("-") - if not epoch in Epoch.keys(): - raise ValueError("Invalid epoch: " + epoch) - - if not mode in Mode: + if not mode in Modes: raise ValueError("Invalid mode: " + mode) date = datetime.datetime(int(strdate[0:4]), @@ -47,14 +126,21 @@ class Backup: return Backup(date, epoch, mode) - def __str__(self): + def __repr__(self): return "[date: " + self.date.ctime() + \ ", epoch: " + self.epoch + \ ", mode: " + self.mode + "]" def colAlignedString(self): - return "%16s %8s %4s" % ( \ - self.date.strftime("%Y-%m-%d %H:%M"), self.epoch, self.mode) + age = datetime.datetime.now() - self.date + total_hours = age.total_seconds()/3600 + if total_hours <= 48: + agestr = "(%s h)" % int(total_hours) + else: + agestr = "(%s d)" % age.days + return "%16s %7s %10s %4s" % ( + self.date.strftime("%Y-%m-%d %H:%M"), agestr, + self.epoch, self.mode) @staticmethod def getDirName(date, epoch, mode): @@ -78,14 +164,6 @@ class Config: self.value = value self.message = value - class FileSet: - """A fileset has a name and a list of directories.""" - def __init__(self, name, dirs): - self.name = name - self.dirs = dirs - - def __str__(self): - return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]" formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ] @@ -93,22 +171,115 @@ class Config: checksumfn = "checksum" def __init__(self): - self.directory = "/media/backup" - self.format = self.formats[0] - self.epochkeeps = { k : 0 for k in RealEpoch.keys() } - self.epochmodes = { k : "full" for k in RealEpoch.keys() } - self.exclpatterns = [] + self.backupdir = None + self.format = self.formats[1] + self.tarbin = "/bin/tar" + self.excludes = [] self.sets = [] self.checksum = None self.lastchecksum = None + self.epochs = Epochs = { "sporadic" : Epoch() } - def __str__(self): - return "[directory: " + self.directory + \ + + def __repr__(self): + return "[backupdir: " + self.backupdir + \ ", format: " + self.format + \ - ", keeps: " + str(self.epochkeeps) + \ - ", modes: " + str(self.epochmodes) + \ - ", exclpatterns: " + str(self.exclpatterns) + \ - ", sets: " + str([str(s) for s in self.sets]) + "]" + ", tarbin: " + self.tarbin + \ + ", excludes: " + repr(self.excludes) + \ + ", epochs: " + repr(self.epochs) + \ + ", sets: " + repr(self.sets) + "]" + + def getRealEpochsSorted(self): + """Return all epochs with have a non-None unit, sorted by + Epoch.getTimeDelta(), starting with the longest dela.""" + epochs = self.epochs + realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ] + deltakey = lambda e: epochs[e].getTimeDelta() + realepochs.sort(key=deltakey, reverse=True) + return realepochs + + + def _read_global(self, config, sec): + for opt in config.options(sec): + if opt=="backupdir": + self.backupdir = config.get(sec, opt) + if not os.path.isdir(self.backupdir): + raise Config.ReadError("Backupdir '{0}' does not exist.".format(self.backupdir)) + elif opt=="format": + self.format = config.get(sec, opt) + if not self.format in Config.formats: + raise Config.ReadError("Invalid 'format' given.") + elif opt=="tarbin": + self.tarbin = config.get(sec, opt) + if not os.path.isfile(self.tarbin): + raise Config.ReadError("Tar binary '{0}' does not exist.".format(self.tarbin)) + elif opt.startswith("exclude"): + self.excludes += [ config.get(sec, opt) ] + else: + raise Config.ReadError("Unknown option '{0}'.".format(opt)) + + + def _read_epoch(self, config, sec): + name = sec[6:].strip() + e = Epoch() + if name in self.epochs: + raise Config.ReadError("Epoch '{0}' already defined.".format(name)) + if name in Epoch.units: + e.unit = name + + for opt in config.options(sec): + if opt=="numkeeps": + try: + e.numkeeps = int(config.getint(sec, opt)) + except ValueError: + raise Config.ReadError("Invalid integer given for '{0}'.".format(opt)) + if e.numkeeps <= 0: + raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps)) + + elif opt=="mode": + e.mode = config.get(sec, opt) + if not e.mode in Modes: + raise Config.ReadError("Invalid mode '{0}'.".format(e.mode)) + + elif opt=="timespan": + if name in Epoch.units: + raise Config.ReadError("The time delta of a standard epoch " + \ + "is not supposed to be redefined. ") + td = config.get(sec,opt) + try: + mult, unit = Epoch.parseTimedelta(td) + e.unit = unit + e.mult = mult + except ValueError as e: + raise Config.ReadError("Invalid timespan '{0}': {1}".format(td, str(e))) + + elif opt.startswith("exclude"): + e.excludes += [config.get(sec, opt)] + + else: + raise Config.ReadError("Unknown option '" + opt + "'.") + + if e.numkeeps == None: + raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name)) + + self.epochs[name] = e + + + def _read_set(self, config, sec): + name = sec[4:].strip() + dirs = [] + excludes = [] + + for opt in config.options(sec): + if opt.startswith("dir"): + dirs += [config.get(sec, opt)] + elif opt.startswith("exclude"): + excludes += [config.get(sec,opt)] + else: + raise Config.ReadError("Unknown option '" + opt + "'.") + + self.sets += [FileSet(name, dirs, excludes)] + def read(self, filename): """Read configuration from file""" @@ -119,62 +290,28 @@ class Config: config = configparser.RawConfigParser() config.read(filename) - for reqsec in ["destination"]: + for reqsec in ["global"]: if not config.has_section(reqsec): - raise Config.ReadError("Section '" + reqsec + "' is missing.") - - self.directory = config.get("destination", "directory") - if not os.path.isdir(self.directory): - raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory)) - - self.format = config.get("destination", "format") - if not self.format in Config.formats: - raise Config.ReadError("Invalid 'format' given.") - - - if config.has_section("history"): - for opt in config.options("history"): - if opt.startswith("keep"): - epoch = opt[4:] - if not epoch in RealEpoch.keys(): - raise Config.ReadError("Invalid option 'keep" + epoch + "'.") - try: - self.epochkeeps[epoch] = int(config.getint("history", opt)) - except ValueError: - raise Config.ReadError("Invalid integer given for '" + opt + "'.") - elif opt.startswith("mode"): - epoch = opt[4:] - if not epoch in RealEpoch.keys(): - raise Config.ReadError("Invalid option 'mode" + epoch + "'.") - self.epochmodes[epoch] = config.get("history", opt) - if not self.epochmodes[epoch] in Mode: - raise Config.ReadError("Invalid mode given.") - else: - raise Config.ReadError("Invalid option '" + opt + "'.") - - if config.has_section("input"): - for opt in config.options("input"): - if opt.startswith("exclude"): - self.exclpatterns += [ config.get("input", opt) ] - else: - raise Config.ReadError("Invalid option '" + opt + "'.") + raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.") for sec in config.sections(): - if sec in ["destination", "history", "input"]: - continue + + if sec=="global": + self._read_global(config, sec) + + elif sec.startswith("epoch "): + self._read_epoch(config, sec) + elif sec.startswith("set "): - name = sec[4:].strip() - dirs = [] - - for opt in config.options(sec): - if not opt.startswith("dir"): - raise Config.ReadError("Unknown option '" + opt + "'.") - else: - dirs += [config.get(sec, opt)] - self.sets += [Config.FileSet(name, dirs)] + self._read_set(config, sec) + else: raise Config.ReadError("Unknown section '" + sec + "'.") + if self.backupdir == None: + raise Config.ReadError("No backup directory set.") + + # Compute checksum of config file m = hashlib.sha1() f = open(filename, 'rb') @@ -185,7 +322,7 @@ class Config: f.close() try: - f = open(os.path.join(self.directory, self.checksumfn), 'r') + f = open(os.path.join(self.backupdir, self.checksumfn), 'r') self.lastchecksum = f.read().strip() f.close() except IOError: @@ -195,17 +332,16 @@ class Config: class BackupManager: """List and create backups""" - def __init__(self, conffn, alwaysyes): + def __init__(self, conffn): self.conf = Config() - self.alwaysyes = alwaysyes self.conf.read(conffn) def listAllDirs(self): - """List all dirs in destination directory""" + """List all dirs in backupdir""" # Get all entries - basedir = self.conf.directory + basedir = self.conf.backupdir dirs = os.listdir(basedir) # Filter directories return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ] @@ -222,14 +358,14 @@ class BackupManager: return backups - def getDesiredEpoch(self, backups, now): + def getDesiredEpochs(self, backups, now): """Get desired epoch based on self.configuration and list of old backups""" # Find the longest epoch for which we would like the make a backup latest = datetime.datetime(1900, 1, 1) - for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )): - # We make backups of that epoch - if self.conf.epochkeeps[e] == 0: + for e in self.conf.getRealEpochsSorted(): + epoch = self.conf.epochs[e] + if epoch.numkeeps <= 0: continue # Get backups of that epoch @@ -240,8 +376,7 @@ class BackupManager: if len(byepoch) > 0: latest = max(latest, byepoch[-1].date ) - # the latest backup is too old - if now-latest > timespan: + if epoch.isRipe(latest, now): return e # No backup is to be made @@ -249,28 +384,63 @@ class BackupManager: - def backupFileSet(self, fileset, targetdir, since=None): + def backupFileSet(self, fileset, targetdir, excludes, since=None): """Create an archive for given fileset at given target directory.""" - print("Running file set: " + fileset.name) - tarpath = "/bin/tar" - fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format + logfile = logging.getLogger('backuplog') + logfile.info("Running file set: " + fileset.name) - taropts = ["-cpva"] + fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format + taropts = [] + # Add the since date, if given if since != None: taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")] - for pat in self.conf.exclpatterns: + # Add the exclude patterns + for pat in excludes: taropts += ["--exclude", pat] - tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs - #print("tarargs: ", tarargs) - tarp = subprocess.Popen( tarargs ) + #Add exclude patterns from fileset + for pat in fileset.excludes: + taropts += ["--exclude", pat] + # Adding directories to backup + taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs] + + # Launch the tar process + tarargs = [self.conf.tarbin] + ["-cpvaf", fsfn] + taropts + logfile.debug("tar call: " + " ".join(tarargs)) + tarp = subprocess.Popen( tarargs, bufsize=-1, \ + stdout=subprocess.PIPE, stderr=subprocess.PIPE ) + + # Change tarp's stdout and stderr to non-blocking + for s in [tarp.stdout, tarp.stderr]: + fd = s.fileno() + fl = fcntl.fcntl(fd, fcntl.F_GETFL) + fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK) + + # Read stdout and stderr of tarp + errmsg = b"" + while tarp.poll() == None: + rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05) + if tarp.stdout in rd: + logging.debug( tarp.stdout.readline()[:-1].decode() ) + if tarp.stderr in rd: + errmsg += tarp.stderr.read() + + # Get the remainging output of tarp + for l in tarp.stdout.readlines(): + logging.debug(l.decode().rstrip()) + errmsg += tarp.stderr.read() + + # Get return code of tarp rett = tarp.wait() if rett != 0: - print(tarpath + " returned with exit status " + str(rett) + ":") + for l in errmsg.decode().split("\n"): + logfile.error(l) + logfile.error(self.conf.tarbin + " returned with exit status " + \ + str(rett) + ".") def backup(self, epoch=None, mode=None): @@ -283,48 +453,63 @@ class BackupManager: # Get epoch of backup if epoch == None: - epoch = self.getDesiredEpoch(oldbackups, now) + epoch = self.getDesiredEpochs(oldbackups, now) if epoch == None: - print("No backup planned.") + logging.info("No backup planned.") return # Get mode of backup if mode == None: - mode = self.conf.epochmodes[epoch] - print("Making a backup. Epoch: " + epoch + ", mode: " + mode) + mode = self.conf.epochs[epoch].mode + logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode) oldfullbackups = [ b for b in oldbackups if b.mode == "full" ] # No old full backups existing if mode != "full" and len(oldfullbackups)==0: - print("No full backups existing. Making a full backup.") + logging.info("No full backups existing. Making a full backup.") # Checksum changed -> self.config file changed - if self.conf.checksum != self.conf.lastchecksum: - print("Config file changed since last time.") - if mode != "full": - print("** Warning: full backup recommended!") + if self.conf.checksum != self.conf.lastchecksum and mode != "full": + logging.warning("Full backup recommended as config file has changed.") - # Create new target directory - basedir = self.conf.directory - dirname = Backup.getDirName(now, epoch, mode) - tmpdirname = dirname + ("-%x" % (random.random()*2e16) ) - targetdir = os.path.join(basedir, tmpdirname) - os.mkdir( targetdir ) # If we have a full backup, we backup everything since = None - - # Get latest full backup time if mode == "diff": since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date - # Get latest backup time elif mode == "incr": since = sorted(oldbackups, key=lambda b: b.date)[-1].date + if since != None: + logging.debug("Making backup relative to " + since.ctime()) + + yesno = self.ask_user_yesno("Proceed? [Y, n] ") + if yesno == "n": + return + + # Create new backup directory + basedir = self.conf.backupdir + dirname = Backup.getDirName(now, epoch, mode) + tmpdirname = dirname + ("-%x" % (random.random()*2e16) ) + targetdir = os.path.join(basedir, tmpdirname) + os.mkdir( targetdir ) + + + # Add file logger + logfile = logging.getLogger("backuplog") + fil = logging.FileHandler( os.path.join(targetdir, "log") ) + fil.setLevel(logging.DEBUG) + logfile.addHandler(fil) + + logfile.info("Started: " + now.ctime()) + # Backup all file sets for s in self.conf.sets: - self.backupFileSet(s, targetdir, since) + excludes = self.conf.excludes + self.conf.epochs[epoch].excludes + self.backupFileSet(s, targetdir, excludes, since) + + logfile.info("Stopped: " + datetime.datetime.now().ctime()) # Rename backup directory to final name os.rename( targetdir, os.path.join(basedir, dirname) ) @@ -336,10 +521,11 @@ class BackupManager: f.close() + def prune(self): """Prune old backup files""" - allDirs = self.listAllDirs() + allDirs = sorted(self.listAllDirs()) # Collect all directories not matching backup name removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ] @@ -347,45 +533,51 @@ class BackupManager: backups = self.listOldBackups() keepdirs = [] byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \ - key=lambda b : b.date, reverse=True)) for e in RealEpoch } + key=lambda b : b.date, reverse=True)) for e in self.conf.getRealEpochsSorted() } for e in byepoch: - keep = self.conf.epochkeeps[e] - old = byepoch[e][keep:] + epoch = self.conf.epochs[e] + old = byepoch[e][epoch.numkeeps:] removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old] - print("List of stale/outdated entries:") + logging.info("List of stale/outdated entries:") for d in allDirs: + msg = "" if d in removeDirs: - print("[*] ", end="") + msg = "[*] " else: - print("[ ] ", end="") + msg = "[ ] " if Backup.isBackupDir(d): - print( Backup.fromDirName(d).colAlignedString()) + msg += Backup.fromDirName(d).colAlignedString() else: - print(d) + msg += d + + logging.info(msg) # Check that dirs to be removed is in list of all dirs for d in removeDirs: assert( d in allDirs ) if len(removeDirs) == 0: - print("No stale/outdated entries to remove.") + logging.info("No stale/outdated entries to remove.") return - basedir = self.conf.directory - yesno = self.ask_user_yesno("Remove entries marked by '*'?") + basedir = self.conf.backupdir + yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ") if yesno == "y": for d in removeDirs: - shutil.rmtree(os.path.join(basedir, d)) + try: + shutil.rmtree(os.path.join(basedir, d)) + except OSError as e: + logging.error("Error when removing '%s': %s" % (d,e.strerror) ) + def ask_user_yesno(self, question): - if self.alwaysyes: - print(question + " y") - return "y" + if LogConf.con.level <= logging.INFO: + return input(question) else: - return input(question + " [y, N] ") + return "y" def printUsage(): @@ -404,21 +596,44 @@ def printUsage(): print("") print("Options:") print(" -h, --help print this usage text") - print(" -c, --conf use given configuration file") + print(" -c, --conf FILE use given configuration file") print(" default: /etc/shbackup.conf") - print(" -e, --epoch force to create backup for given epoch:") - print(" year, month, week, day, hour, sporadic") - print(" -m, --mode override mode: full, diff, or incr") - print(" -y, --yes always assume 'yes' when user is asked") + print(" -e, --epoch EPOCH force to create backup for given epoch, which") + print(" can be 'sporadic' or one of the configured epochs") + print(" -m, --mode MODE override mode: full, diff, or incr") + print(" -v, --verbose be more verbose and interact with user") + print(" --verbosity LEVEL set verbosity to LEVEL, which can be") + print(" error, warning, info, debug") + print(" -V, --version print version info") + + + +class LogConf: + """Encapsulates logging configuration""" + + con = logging.StreamHandler(sys.stderr) + + @classmethod + def setup(cls): + """Setup logging system""" + conlog = logging.getLogger() + conlog.setLevel(logging.DEBUG) + + cls.con.setLevel(logging.WARNING) + conlog.addHandler(cls.con) + + fillog = logging.getLogger("backuplog") + fillog.setLevel(logging.DEBUG) if __name__ == "__main__": + LogConf.setup() + conffn = "/etc/shbackup.conf" cmd = "list" mode = None epoch = None - yes = False i = 0 while i < len(sys.argv)-1: @@ -433,33 +648,47 @@ if __name__ == "__main__": i += 1 conffn = sys.argv[i] - elif opt in ["-y", "--yes"]: - yes = True + elif opt in ["-V", "--version"]: + print("shbackup " + __version__) + exit(0) + + elif opt in ["-v", "--verbose"]: + LogConf.con.setLevel(logging.INFO) + + elif opt in ["--verbosity"]: + i += 1 + level = sys.argv[i] + numlevel = getattr(logging, level.upper(), None) + if not isinstance(numlevel, int): + raise ValueError('Invalid verbosity level: %s' % level) + LogConf.con.setLevel(numlevel) elif opt in ["-m", "--mode"]: i += 1 mode = sys.argv[i] - if not mode in Mode: - print("Unknown mode '" + mode + "'.") + if not mode in Modes: + logging.error("Unknown mode '" + mode + "'.") exit(1) elif opt in ["-e", "--epoch"]: i += 1 epoch = sys.argv[i] - if not epoch in Epoch: - print("Unknown epoch '" + epoch + "'.") - exit(1) - elif opt in ["backup", "list", "prune"]: cmd = opt else: - print("Unknown option: " + opt) + logging.error("Unknown option: " + opt) exit(1) try: - man = BackupManager(conffn, yes) + man = BackupManager(conffn) + + logging.debug("Config: " + str(man.conf)) + + if epoch!=None and not epoch in man.conf.epochs.keys(): + logging.error("Unknown epoch '" + epoch + "'.") + exit(1) if cmd == "backup": man.backup(epoch, mode) @@ -471,8 +700,8 @@ if __name__ == "__main__": if cmd == "prune": man.prune() - except (Config.ReadError, configparser.DuplicateOptionError) as e: - print("Error reading config file: " + e.message) + except (Config.ReadError, configparser.Error) as e: + logging.error("Error: " + e.message)