+#!/usr/bin/python3
+"""A simple backup solution."""
+
+__version__ = "2.0"
+__author__ = "Stefan Huber"
+
+import datetime
+import os, shutil, sys
+import configparser
+import hashlib
+import subprocess, fcntl, select
+import random, re
+import logging
+
+
+Modes = ["full", "incr", "diff"]
+
+class Epoch:
+
+ units = {
+ "hour" : datetime.timedelta(0, 3600),
+ "day" : datetime.timedelta(1),
+ "week" : datetime.timedelta(7),
+ "month" : datetime.timedelta(31),
+ "year" : datetime.timedelta(365) }
+
+ def __init__(self, unit=None, mult=1, mode="full", numkeeps=None):
+ self.unit = unit
+ self.mult = mult
+ self.mode = mode
+ self.numkeeps = numkeeps
+ self.excludes = []
+
+ def __repr__(self):
+ return "[unit: " + repr(self.unit) + \
+ ", mult:" + repr(self.mult) + \
+ ", mode: " + repr(self.mode) + \
+ ", numkeeps: " + repr(self.numkeeps) + \
+ ", excludes: " + repr(self.excludes) + "]"
+
+ def getTimeDelta(self):
+ if self.unit == None:
+ return None
+ return self.mult*Epoch.units[self.unit]
+
+ def isRipe(self, oldest, now):
+
+ if self.unit==None:
+ return True
+
+ delta = now-oldest
+ mult = self.mult
+
+ if delta >= self.getTimeDelta():
+ return True
+
+ if self.unit == "hour":
+ return abs(now.hour - oldest.hour) >= mult
+ elif self.unit == "day":
+ return abs(now.day - oldest.day) >= mult
+ elif self.unit == "week":
+ return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult
+ elif self.unit == "month":
+ return abs(now.month - oldest.month) >= mult
+ elif self.unit == "year":
+ return abs(now.year - oldest.year) >= mult
+
+ return None
+
+
+ @staticmethod
+ def parseTimedelta( deltastr ):
+ tokens = [ s.strip() for s in deltastr.split("*") ]
+ unit = None
+ mult = 1
+ if len(tokens) == 1:
+ unit = tokens[0]
+ elif len(tokens) == 2:
+ mult = int(tokens[0])
+ unit = tokens[1]
+ else:
+ raise ValueError("Invalid format: '{0}'".format(deltastr))
+
+ if not unit in Epoch.units:
+ raise ValueError("Unknown unit '{0}'".format(unit))
+
+ if mult <= 0:
+ raise ValueError("Non-positive factor '{0}' given.".format(mult))
+
+ return mult, unit
+
+
+
+class FileSet:
+ """A fileset has a name and a list of directories."""
+ def __init__(self, name, dirs, excludes):
+ self.name = name
+ self.dirs = dirs
+ self.excludes = excludes
+
+ def __repr__(self):
+ return "[name: " + self.name + \
+ ", dirs: " + str(self.dirs) + \
+ ", excludes: " + str(self.excludes) + "]"
+
+
+class Backup:
+ """A single backup has a date, an epoch and a mode."""
+
+ def __init__(self, date, epoch, mode):
+ self.date = date
+ self.epoch = epoch
+ self.mode = mode
+ self.excludes = []
+
+ @staticmethod
+ def fromDirName(dirname):
+ [strdate, strtime, epoch, mode] = dirname.split("-")
+
+ if not mode in Modes:
+ raise ValueError("Invalid mode: " + mode)
+
+ date = datetime.datetime(int(strdate[0:4]),
+ int(strdate[4:6]), int(strdate[6:8]),\
+ int(strtime[0:2]), int(strtime[2:4]))
+
+ return Backup(date, epoch, mode)
+
+ def __repr__(self):
+ return "[date: " + self.date.ctime() + \
+ ", epoch: " + self.epoch + \
+ ", mode: " + self.mode + "]"
+
+ def colAlignedString(self):
+ age = datetime.datetime.now() - self.date
+ total_hours = age.total_seconds()/3600
+ if total_hours <= 48:
+ agestr = "(%s h)" % int(total_hours)
+ else:
+ agestr = "(%s d)" % age.days
+ return "%16s %7s %10s %4s" % (
+ self.date.strftime("%Y-%m-%d %H:%M"), agestr,
+ self.epoch, self.mode)
+
+ @staticmethod
+ def getDirName(date, epoch, mode):
+ """Get directory name of backup by given properties."""
+ return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
+
+ @staticmethod
+ def isBackupDir(dirname):
+ """Is directory a backup directory?"""
+ p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
+ return p.match(dirname)
+
+
+
+class Config:
+ """Encapsules the configuration for the backup program."""
+
+ class ReadError(RuntimeError):
+ """An exception raised when reading configurations."""
+ def __init__(self, value):
+ self.value = value
+ self.message = value
+
+
+ formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
+
+ # Filename where checksum of config is saved
+ checksumfn = "checksum"
+
+ def __init__(self):
+ self.backupdir = None
+ self.format = self.formats[1]
+ self.tarbin = "/bin/tar"
+ self.excludes = []
+ self.sets = []
+ self.checksum = None
+ self.lastchecksum = None
+ self.epochs = Epochs = { "sporadic" : Epoch() }
+
+
+ def __repr__(self):
+ return "[backupdir: " + self.backupdir + \
+ ", format: " + self.format + \
+ ", tarbin: " + self.tarbin + \
+ ", excludes: " + repr(self.excludes) + \
+ ", epochs: " + repr(self.epochs) + \
+ ", sets: " + repr(self.sets) + "]"
+
+ def getRealEpochsSorted(self):
+ """Return all epochs with have a non-None unit, sorted by
+ Epoch.getTimeDelta(), starting with the longest dela."""
+ epochs = self.epochs
+ realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ]
+ deltakey = lambda e: epochs[e].getTimeDelta()
+ realepochs.sort(key=deltakey, reverse=True)
+ return realepochs
+
+
+ def _read_global(self, config, sec):
+ for opt in config.options(sec):
+ if opt=="backupdir":
+ self.backupdir = config.get(sec, opt)
+ if not os.path.isdir(self.backupdir):
+ raise Config.ReadError("Backupdir '{0}' does not exist.".format(self.backupdir))
+ elif opt=="format":
+ self.format = config.get(sec, opt)
+ if not self.format in Config.formats:
+ raise Config.ReadError("Invalid 'format' given.")
+ elif opt=="tarbin":
+ self.tarbin = config.get(sec, opt)
+ if not os.path.isfile(self.tarbin):
+ raise Config.ReadError("Tar binary '{0}' does not exist.".format(self.tarbin))
+ elif opt.startswith("exclude"):
+ self.excludes += [ config.get(sec, opt) ]
+ else:
+ raise Config.ReadError("Unknown option '{0}'.".format(opt))
+
+
+ def _read_epoch(self, config, sec):
+ name = sec[6:].strip()
+ e = Epoch()
+ if name in self.epochs:
+ raise Config.ReadError("Epoch '{0}' already defined.".format(name))
+ if name in Epoch.units:
+ e.unit = name
+
+ for opt in config.options(sec):
+ if opt=="numkeeps":
+ try:
+ e.numkeeps = int(config.getint(sec, opt))
+ except ValueError:
+ raise Config.ReadError("Invalid integer given for '{0}'.".format(opt))
+ if e.numkeeps <= 0:
+ raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps))
+
+ elif opt=="mode":
+ e.mode = config.get(sec, opt)
+ if not e.mode in Modes:
+ raise Config.ReadError("Invalid mode '{0}'.".format(e.mode))
+
+ elif opt=="timespan":
+ if name in Epoch.units:
+ raise Config.ReadError("The time delta of a standard epoch " + \
+ "is not supposed to be redefined. ")
+ td = config.get(sec,opt)
+ try:
+ mult, unit = Epoch.parseTimedelta(td)
+ e.unit = unit
+ e.mult = mult
+ except ValueError as e:
+ raise Config.ReadError("Invalid timespan '{0}': {1}".format(td, str(e)))
+
+ elif opt.startswith("exclude"):
+ e.excludes += [config.get(sec, opt)]
+
+ else:
+ raise Config.ReadError("Unknown option '" + opt + "'.")
+
+ if e.numkeeps == None:
+ raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name))
+
+ self.epochs[name] = e
+
+
+ def _read_set(self, config, sec):
+ name = sec[4:].strip()
+ dirs = []
+ excludes = []
+
+ for opt in config.options(sec):
+ if opt.startswith("dir"):
+ dirs += [config.get(sec, opt)]
+ elif opt.startswith("exclude"):
+ excludes += [config.get(sec,opt)]
+ else:
+ raise Config.ReadError("Unknown option '" + opt + "'.")
+
+ self.sets += [FileSet(name, dirs, excludes)]
+
+
+ def read(self, filename):
+ """Read configuration from file"""
+
+ if not os.path.isfile(filename):
+ raise Config.ReadError("Cannot read config file '" + filename + "'.")
+
+ config = configparser.RawConfigParser()
+ config.read(filename)
+
+ for reqsec in ["global"]:
+ if not config.has_section(reqsec):
+ raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.")
+
+ for sec in config.sections():
+
+ if sec=="global":
+ self._read_global(config, sec)
+
+ elif sec.startswith("epoch "):
+ self._read_epoch(config, sec)
+
+ elif sec.startswith("set "):
+ self._read_set(config, sec)
+
+ else:
+ raise Config.ReadError("Unknown section '" + sec + "'.")
+
+ if self.backupdir == None:
+ raise Config.ReadError("No backup directory set.")
+
+
+ # Compute checksum of config file
+ m = hashlib.sha1()
+ f = open(filename, 'rb')
+ try:
+ m.update(f.read())
+ self.checksum = m.hexdigest()
+ finally:
+ f.close()
+
+ try:
+ f = open(os.path.join(self.backupdir, self.checksumfn), 'r')
+ self.lastchecksum = f.read().strip()
+ f.close()
+ except IOError:
+ self.lastchecksum = None
+
+
+class BackupManager:
+ """List and create backups"""
+
+ def __init__(self, conffn):
+ self.conf = Config()
+ self.conf.read(conffn)
+
+
+ def listAllDirs(self):
+ """List all dirs in backupdir"""
+
+ # Get all entries
+ basedir = self.conf.backupdir
+ dirs = os.listdir(basedir)
+ # Filter directories
+ return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
+
+
+ def listOldBackups(self):
+ """Returns a list of old backups."""
+
+ backups = []
+
+ for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
+ backups += [ Backup.fromDirName(entry) ]
+
+ return backups
+
+
+ def getDesiredEpochs(self, backups, now):
+ """Get desired epoch based on self.configuration and list of old backups"""
+
+ # Find the longest epoch for which we would like the make a backup
+ latest = datetime.datetime(1900, 1, 1)
+ for e in self.conf.getRealEpochsSorted():
+ epoch = self.conf.epochs[e]
+ if epoch.numkeeps <= 0:
+ continue
+
+ # Get backups of that epoch
+ byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
+ key=lambda b: b.date))
+
+ # If there are any, determine the latest
+ if len(byepoch) > 0:
+ latest = max(latest, byepoch[-1].date )
+
+ if epoch.isRipe(latest, now):
+ return e
+
+ # No backup is to be made
+ return None
+
+
+
+ def backupFileSet(self, fileset, targetdir, excludes, since=None):
+ """Create an archive for given fileset at given target directory."""
+
+ logfile = logging.getLogger('backuplog')
+ logfile.info("Running file set: " + fileset.name)
+
+ fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
+ taropts = []
+
+ # Add the since date, if given
+ if since != None:
+ taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
+
+ # Add the exclude patterns
+ for pat in excludes:
+ taropts += ["--exclude", pat]
+
+ #Add exclude patterns from fileset
+ for pat in fileset.excludes:
+ taropts += ["--exclude", pat]
+
+ # Adding directories to backup
+ taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
+
+ # Launch the tar process
+ tarargs = [self.conf.tarbin] + ["-cpvaf", fsfn] + taropts
+ logfile.debug("tar call: " + " ".join(tarargs))
+ tarp = subprocess.Popen( tarargs, bufsize=-1, \
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE )
+
+ # Change tarp's stdout and stderr to non-blocking
+ for s in [tarp.stdout, tarp.stderr]:
+ fd = s.fileno()
+ fl = fcntl.fcntl(fd, fcntl.F_GETFL)
+ fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
+
+ # Read stdout and stderr of tarp
+ errmsg = b""
+ while tarp.poll() == None:
+ rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
+ if tarp.stdout in rd:
+ logging.debug( tarp.stdout.readline()[:-1].decode() )
+ if tarp.stderr in rd:
+ errmsg += tarp.stderr.read()
+
+ # Get the remainging output of tarp
+ for l in tarp.stdout.readlines():
+ logging.debug(l.decode().rstrip())
+ errmsg += tarp.stderr.read()
+
+ # Get return code of tarp
+ rett = tarp.wait()
+ if rett != 0:
+ for l in errmsg.decode().split("\n"):
+ logfile.error(l)
+ logfile.error(self.conf.tarbin + " returned with exit status " + \
+ str(rett) + ".")
+
+
+ def backup(self, epoch=None, mode=None):
+ """Make a new backup, if necessary. If epoch is None then determine
+ desired epoch automatically. Use given epoch otherwise. If mode is None
+ then use mode for given epoch. Use given mode otherwise."""
+
+ now = datetime.datetime.now()
+ oldbackups = self.listOldBackups()
+
+ # Get epoch of backup
+ if epoch == None:
+ epoch = self.getDesiredEpochs(oldbackups, now)
+ if epoch == None:
+ logging.info("No backup planned.")
+ return
+
+ # Get mode of backup
+ if mode == None:
+ mode = self.conf.epochs[epoch].mode
+ logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode)
+
+ oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
+
+ # No old full backups existing
+ if mode != "full" and len(oldfullbackups)==0:
+ logging.info("No full backups existing. Making a full backup.")
+
+ # Checksum changed -> self.config file changed
+ if self.conf.checksum != self.conf.lastchecksum and mode != "full":
+ logging.warning("Full backup recommended as config file has changed.")
+
+
+ # If we have a full backup, we backup everything
+ since = None
+ if mode == "diff":
+ since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
+ elif mode == "incr":
+ since = sorted(oldbackups, key=lambda b: b.date)[-1].date
+
+ if since != None:
+ logging.debug("Making backup relative to " + since.ctime())
+
+ yesno = self.ask_user_yesno("Proceed? [Y, n] ")
+ if yesno == "n":
+ return
+
+ # Create new backup directory
+ basedir = self.conf.backupdir
+ dirname = Backup.getDirName(now, epoch, mode)
+ tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
+ targetdir = os.path.join(basedir, tmpdirname)
+ os.mkdir( targetdir )
+
+
+ # Add file logger
+ logfile = logging.getLogger("backuplog")
+ fil = logging.FileHandler( os.path.join(targetdir, "log") )
+ fil.setLevel(logging.DEBUG)
+ logfile.addHandler(fil)
+
+ logfile.info("Started: " + now.ctime())
+
+ # Backup all file sets
+ for s in self.conf.sets:
+ excludes = self.conf.excludes + self.conf.epochs[epoch].excludes
+ self.backupFileSet(s, targetdir, excludes, since)
+
+ logfile.info("Stopped: " + datetime.datetime.now().ctime())
+
+ # Rename backup directory to final name
+ os.rename( targetdir, os.path.join(basedir, dirname) )
+
+ # We made a full backup -- recall checksum of config
+ if mode == "full":
+ f = open( os.path.join(basedir, self.conf.checksumfn), "w")
+ f.write( self.conf.checksum )
+ f.close()
+
+
+
+ def prune(self):
+ """Prune old backup files"""
+
+ allDirs = sorted(self.listAllDirs())
+ # Collect all directories not matching backup name
+ removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
+
+ # Get all directories which are kept
+ backups = self.listOldBackups()
+ keepdirs = []
+ byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
+ key=lambda b : b.date, reverse=True)) for e in self.conf.getRealEpochsSorted() }
+ for e in byepoch:
+ epoch = self.conf.epochs[e]
+ old = byepoch[e][epoch.numkeeps:]
+ removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
+
+
+ logging.info("List of stale/outdated entries:")
+ for d in allDirs:
+ msg = ""
+ if d in removeDirs:
+ msg = "[*] "
+ else:
+ msg = "[ ] "
+
+ if Backup.isBackupDir(d):
+ msg += Backup.fromDirName(d).colAlignedString()
+ else:
+ msg += d
+
+ logging.info(msg)
+
+ # Check that dirs to be removed is in list of all dirs
+ for d in removeDirs:
+ assert( d in allDirs )
+
+ if len(removeDirs) == 0:
+ logging.info("No stale/outdated entries to remove.")
+ return
+
+ basedir = self.conf.backupdir
+ yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
+ if yesno == "y":
+ for d in removeDirs:
+ try:
+ shutil.rmtree(os.path.join(basedir, d))
+ except OSError as e:
+ logging.error("Error when removing '%s': %s" % (d,e.strerror) )
+
+
+ def ask_user_yesno(self, question):
+ if LogConf.con.level <= logging.INFO:
+ return input(question)
+ else:
+ return "y"
+
+
+def printUsage():
+ """Print --help text"""
+
+ print("shbackup - a simple backup solution.")
+ print("")
+ print("Usage:")
+ print(" " + sys.argv[0] + " {options} [cmd]")
+ print(" " + sys.argv[0] + " --help")
+ print("")
+ print("Commands:")
+ print(" backup make a new backup, if necessary")
+ print(" list list all backups (default)")
+ print(" prune prune outdated/old backups")
+ print("")
+ print("Options:")
+ print(" -h, --help print this usage text")
+ print(" -c, --conf FILE use given configuration file")
+ print(" default: /etc/shbackup.conf")
+ print(" -e, --epoch EPOCH force to create backup for given epoch, which")
+ print(" can be 'sporadic' or one of the configured epochs")
+ print(" -m, --mode MODE override mode: full, diff, or incr")
+ print(" -v, --verbose be more verbose and interact with user")
+ print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
+ print(" error, warning, info, debug")
+ print(" -V, --version print version info")
+
+
+
+class LogConf:
+ """Encapsulates logging configuration"""
+
+ con = logging.StreamHandler(sys.stderr)
+
+ @classmethod
+ def setup(cls):
+ """Setup logging system"""
+ conlog = logging.getLogger()
+ conlog.setLevel(logging.DEBUG)
+
+ cls.con.setLevel(logging.WARNING)
+ conlog.addHandler(cls.con)
+
+ fillog = logging.getLogger("backuplog")
+ fillog.setLevel(logging.DEBUG)
+
+
+if __name__ == "__main__":
+
+ LogConf.setup()
+
+ conffn = "/etc/shbackup.conf"
+ cmd = "list"
+ mode = None
+ epoch = None
+
+ i = 0
+ while i < len(sys.argv)-1:
+ i += 1
+ opt = sys.argv[i]
+
+ if opt in ["-h", "--help"]:
+ printUsage()
+ exit(0)
+
+ elif opt in ["-c", "--conf"]:
+ i += 1
+ conffn = sys.argv[i]
+
+ elif opt in ["-V", "--version"]:
+ print("shbackup " + __version__)
+ exit(0)
+
+ elif opt in ["-v", "--verbose"]:
+ LogConf.con.setLevel(logging.INFO)
+
+ elif opt in ["--verbosity"]:
+ i += 1
+ level = sys.argv[i]
+ numlevel = getattr(logging, level.upper(), None)
+ if not isinstance(numlevel, int):
+ raise ValueError('Invalid verbosity level: %s' % level)
+ LogConf.con.setLevel(numlevel)
+
+ elif opt in ["-m", "--mode"]:
+ i += 1
+ mode = sys.argv[i]
+ if not mode in Modes:
+ logging.error("Unknown mode '" + mode + "'.")
+ exit(1)
+
+ elif opt in ["-e", "--epoch"]:
+ i += 1
+ epoch = sys.argv[i]
+
+ elif opt in ["backup", "list", "prune"]:
+ cmd = opt
+
+ else:
+ logging.error("Unknown option: " + opt)
+ exit(1)
+
+ try:
+ man = BackupManager(conffn)
+
+ logging.debug("Config: " + str(man.conf))
+
+ if epoch!=None and not epoch in man.conf.epochs.keys():
+ logging.error("Unknown epoch '" + epoch + "'.")
+ exit(1)
+
+ if cmd == "backup":
+ man.backup(epoch, mode)
+
+ if cmd == "list":
+ for b in sorted(man.listOldBackups(), key=lambda b: b.date):
+ print(b.colAlignedString())
+
+ if cmd == "prune":
+ man.prune()
+
+ except (Config.ReadError, configparser.Error) as e:
+ logging.error("Error: " + e.message)
+
+
+
+