rename shbackup -> sitarba
[sitarba.git] / sitarba
diff --git a/sitarba b/sitarba
new file mode 100755 (executable)
index 0000000..e57e250
--- /dev/null
+++ b/sitarba
@@ -0,0 +1,708 @@
+#!/usr/bin/python3
+"""A simple backup solution."""
+
+__version__ = "2.0"
+__author__ = "Stefan Huber"
+
+import datetime
+import os, shutil, sys
+import configparser
+import hashlib
+import subprocess, fcntl, select
+import random, re
+import logging
+
+
+Modes = ["full", "incr", "diff"]
+
+class Epoch:
+
+    units = {
+                "hour" : datetime.timedelta(0, 3600),
+                "day" : datetime.timedelta(1),
+                "week" : datetime.timedelta(7),
+                "month" : datetime.timedelta(31),
+                "year" : datetime.timedelta(365) }
+
+    def __init__(self, unit=None, mult=1, mode="full", numkeeps=None):
+        self.unit = unit
+        self.mult = mult
+        self.mode = mode
+        self.numkeeps = numkeeps
+        self.excludes = []
+
+    def __repr__(self):
+        return "[unit: " + repr(self.unit) + \
+                ", mult:" + repr(self.mult) + \
+                ", mode: " + repr(self.mode) + \
+                ", numkeeps: " + repr(self.numkeeps) + \
+                ", excludes: " + repr(self.excludes) + "]"
+
+    def getTimeDelta(self):
+        if self.unit == None:
+            return None
+        return self.mult*Epoch.units[self.unit]
+
+    def isRipe(self, oldest, now):
+
+        if self.unit==None:
+            return True
+
+        delta = now-oldest
+        mult = self.mult
+
+        if delta >= self.getTimeDelta():
+            return True
+
+        if self.unit == "hour":
+            return abs(now.hour - oldest.hour) >= mult
+        elif self.unit == "day":
+            return abs(now.day - oldest.day) >= mult
+        elif self.unit == "week":
+            return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult
+        elif self.unit == "month":
+            return abs(now.month - oldest.month) >= mult
+        elif self.unit == "year":
+            return abs(now.year - oldest.year) >= mult
+
+        return None
+
+
+    @staticmethod
+    def parseTimedelta( deltastr ):
+        tokens = [ s.strip() for s in deltastr.split("*") ]
+        unit = None
+        mult = 1
+        if len(tokens) == 1:
+            unit = tokens[0]
+        elif len(tokens) == 2:
+            mult = int(tokens[0])
+            unit = tokens[1]
+        else:
+            raise ValueError("Invalid format: '{0}'".format(deltastr))
+
+        if not unit in Epoch.units:
+            raise ValueError("Unknown unit '{0}'".format(unit))
+
+        if mult <= 0:
+            raise ValueError("Non-positive factor '{0}' given.".format(mult))
+
+        return mult, unit
+
+
+
+class FileSet:
+    """A fileset has a name and a list of directories."""
+    def __init__(self, name, dirs, excludes):
+        self.name = name
+        self.dirs = dirs
+        self.excludes = excludes
+
+    def __repr__(self):
+        return "[name: " + self.name + \
+                ", dirs: " + str(self.dirs) + \
+                ", excludes: " + str(self.excludes) + "]"
+
+
+class Backup:
+    """A single backup has a date, an epoch and a mode."""
+
+    def __init__(self, date, epoch, mode):
+        self.date = date
+        self.epoch = epoch
+        self.mode = mode
+        self.excludes = []
+
+    @staticmethod
+    def fromDirName(dirname):
+            [strdate, strtime, epoch, mode] = dirname.split("-")
+
+            if not mode in Modes:
+                raise ValueError("Invalid mode: " + mode)
+
+            date = datetime.datetime(int(strdate[0:4]),
+                    int(strdate[4:6]), int(strdate[6:8]),\
+                    int(strtime[0:2]), int(strtime[2:4]))
+
+            return Backup(date, epoch, mode)
+
+    def __repr__(self):
+        return "[date: " + self.date.ctime() + \
+                ", epoch: " + self.epoch + \
+                ", mode: " + self.mode + "]"
+
+    def colAlignedString(self):
+        age = datetime.datetime.now() - self.date
+        total_hours = age.total_seconds()/3600
+        if total_hours <= 48:
+            agestr = "(%s h)" % int(total_hours)
+        else:
+            agestr = "(%s d)" % age.days
+        return "%16s  %7s  %10s  %4s" % (
+                self.date.strftime("%Y-%m-%d %H:%M"), agestr,
+                self.epoch, self.mode)
+
+    @staticmethod
+    def getDirName(date, epoch, mode):
+        """Get directory name of backup by given properties."""
+        return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
+
+    @staticmethod
+    def isBackupDir(dirname):
+        """Is directory a backup directory?"""
+        p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
+        return p.match(dirname)
+
+
+
+class Config:
+    """Encapsules the configuration for the backup program."""
+
+    class ReadError(RuntimeError):
+        """An exception raised when reading configurations."""
+        def __init__(self, value):
+            self.value = value
+            self.message = value
+
+
+    formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
+
+    # Filename where checksum of config is saved
+    checksumfn = "checksum"
+
+    def __init__(self):
+        self.backupdir = None
+        self.format = self.formats[1]
+        self.tarbin = "/bin/tar"
+        self.excludes = []
+        self.sets = []
+        self.checksum = None
+        self.lastchecksum = None
+        self.epochs = Epochs = { "sporadic" : Epoch() }
+
+
+    def __repr__(self):
+        return "[backupdir: " + self.backupdir + \
+                                 ", format: " + self.format + \
+                                 ", tarbin: " + self.tarbin + \
+                                 ", excludes: " + repr(self.excludes) + \
+                  ", epochs: " + repr(self.epochs) + \
+                                 ", sets: " + repr(self.sets) + "]"
+
+    def getRealEpochsSorted(self):
+        """Return all epochs with have a non-None unit, sorted by
+        Epoch.getTimeDelta(), starting with the longest dela."""
+        epochs = self.epochs
+        realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ]
+        deltakey = lambda e: epochs[e].getTimeDelta()
+        realepochs.sort(key=deltakey, reverse=True)
+        return realepochs
+
+
+    def _read_global(self, config, sec):
+        for opt in config.options(sec):
+            if opt=="backupdir":
+                self.backupdir = config.get(sec, opt)
+                if not os.path.isdir(self.backupdir):
+                    raise Config.ReadError("Backupdir '{0}' does not exist.".format(self.backupdir))
+            elif opt=="format":
+                self.format = config.get(sec, opt)
+                if not self.format in Config.formats:
+                    raise Config.ReadError("Invalid 'format' given.")
+            elif opt=="tarbin":
+                self.tarbin = config.get(sec, opt)
+                if not os.path.isfile(self.tarbin):
+                    raise Config.ReadError("Tar binary '{0}' does not exist.".format(self.tarbin))
+            elif opt.startswith("exclude"):
+                self.excludes += [ config.get(sec, opt) ]
+            else:
+                raise Config.ReadError("Unknown option '{0}'.".format(opt))
+
+
+    def _read_epoch(self, config, sec):
+        name = sec[6:].strip()
+        e = Epoch()
+        if name in self.epochs:
+            raise Config.ReadError("Epoch '{0}' already defined.".format(name))
+        if name in Epoch.units:
+            e.unit = name
+
+        for opt in config.options(sec):
+            if opt=="numkeeps":
+                try:
+                    e.numkeeps = int(config.getint(sec, opt))
+                except ValueError:
+                    raise Config.ReadError("Invalid integer given for '{0}'.".format(opt))
+                if e.numkeeps <= 0:
+                    raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps))
+
+            elif opt=="mode":
+                e.mode = config.get(sec, opt)
+                if not e.mode in Modes:
+                    raise Config.ReadError("Invalid mode '{0}'.".format(e.mode))
+
+            elif opt=="timespan":
+                if name in Epoch.units:
+                    raise Config.ReadError("The time delta of a standard epoch " + \
+                            "is not supposed to be redefined. ")
+                td = config.get(sec,opt)
+                try:
+                    mult, unit = Epoch.parseTimedelta(td)
+                    e.unit = unit
+                    e.mult = mult
+                except ValueError as e:
+                    raise Config.ReadError("Invalid timespan '{0}': {1}".format(td, str(e)))
+
+            elif opt.startswith("exclude"):
+                e.excludes += [config.get(sec, opt)]
+
+            else:
+                raise Config.ReadError("Unknown option '" + opt + "'.")
+
+        if e.numkeeps == None:
+            raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name))
+
+        self.epochs[name] = e
+
+
+    def _read_set(self, config, sec):
+        name = sec[4:].strip()
+        dirs = []
+        excludes = []
+
+        for opt in config.options(sec):
+            if opt.startswith("dir"):
+                dirs += [config.get(sec, opt)]
+            elif opt.startswith("exclude"):
+                excludes += [config.get(sec,opt)]
+            else:
+                raise Config.ReadError("Unknown option '" + opt + "'.")
+
+        self.sets += [FileSet(name, dirs, excludes)]
+
+
+    def read(self, filename):
+        """Read configuration from file"""
+
+        if not os.path.isfile(filename):
+            raise Config.ReadError("Cannot read config file '" + filename + "'.")
+
+        config = configparser.RawConfigParser()
+        config.read(filename)
+
+        for reqsec in ["global"]:
+            if not config.has_section(reqsec):
+                raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.")
+
+        for sec in config.sections():
+
+            if sec=="global":
+                self._read_global(config, sec)
+
+            elif sec.startswith("epoch "):
+                self._read_epoch(config, sec)
+
+            elif sec.startswith("set "):
+                self._read_set(config, sec)
+
+            else:
+                raise Config.ReadError("Unknown section '" + sec + "'.")
+
+        if self.backupdir == None:
+            raise Config.ReadError("No backup directory set.")
+
+
+        # Compute checksum of config file
+        m = hashlib.sha1()
+        f = open(filename, 'rb')
+        try:
+            m.update(f.read())
+            self.checksum = m.hexdigest()
+        finally:
+            f.close()
+
+        try:
+            f = open(os.path.join(self.backupdir, self.checksumfn), 'r')
+            self.lastchecksum = f.read().strip()
+            f.close()
+        except IOError:
+            self.lastchecksum = None
+
+
+class BackupManager:
+    """List and create backups"""
+
+    def __init__(self, conffn):
+        self.conf = Config()
+        self.conf.read(conffn)
+
+
+    def listAllDirs(self):
+        """List all dirs in backupdir"""
+
+        # Get all entries
+        basedir = self.conf.backupdir
+        dirs = os.listdir(basedir)
+        # Filter directories
+        return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
+
+
+    def listOldBackups(self):
+        """Returns a list of old backups."""
+
+        backups = []
+
+        for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
+            backups += [ Backup.fromDirName(entry) ]
+
+        return backups
+
+
+    def getDesiredEpochs(self, backups, now):
+        """Get desired epoch based on self.configuration and list of old backups"""
+
+        # Find the longest epoch for which we would like the make a backup
+        latest = datetime.datetime(1900, 1, 1)
+        for e in self.conf.getRealEpochsSorted():
+            epoch = self.conf.epochs[e]
+            if epoch.numkeeps <= 0:
+                continue
+
+            # Get backups of that epoch
+            byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
+                key=lambda b: b.date))
+
+            # If there are any, determine the latest
+            if len(byepoch) > 0:
+                latest = max(latest, byepoch[-1].date )
+
+            if epoch.isRipe(latest, now):
+                return e
+
+        # No backup is to be made
+        return None
+
+
+
+    def backupFileSet(self, fileset, targetdir, excludes, since=None):
+        """Create an archive for given fileset at given target directory."""
+
+        logfile = logging.getLogger('backuplog')
+        logfile.info("Running file set: " + fileset.name)
+
+        fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
+        taropts = []
+
+        # Add the since date, if given
+        if since != None:
+            taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
+
+        # Add the exclude patterns
+        for pat in excludes:
+            taropts += ["--exclude", pat]
+
+        #Add exclude patterns from fileset
+        for pat in fileset.excludes:
+            taropts += ["--exclude", pat]
+
+        # Adding directories to backup
+        taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
+
+        # Launch the tar process
+        tarargs = [self.conf.tarbin] + ["-cpvaf", fsfn] + taropts
+        logfile.debug("tar call: " + " ".join(tarargs))
+        tarp = subprocess.Popen( tarargs, bufsize=-1, \
+                stdout=subprocess.PIPE, stderr=subprocess.PIPE )
+
+        # Change tarp's stdout and stderr to non-blocking
+        for s in [tarp.stdout, tarp.stderr]:
+            fd = s.fileno()
+            fl = fcntl.fcntl(fd, fcntl.F_GETFL)
+            fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
+
+        # Read stdout and stderr of tarp
+        errmsg = b""
+        while tarp.poll() == None:
+            rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
+            if tarp.stdout in rd:
+                logging.debug( tarp.stdout.readline()[:-1].decode() )
+            if tarp.stderr in rd:
+                errmsg += tarp.stderr.read()
+
+        # Get the remainging output of tarp
+        for l in tarp.stdout.readlines():
+            logging.debug(l.decode().rstrip())
+        errmsg += tarp.stderr.read()
+
+        # Get return code of tarp
+        rett = tarp.wait()
+        if rett != 0:
+            for l in errmsg.decode().split("\n"):
+                logfile.error(l)
+            logfile.error(self.conf.tarbin + " returned with exit status " + \
+                   str(rett) + ".")
+
+
+    def backup(self, epoch=None, mode=None):
+        """Make a new backup, if necessary. If epoch is None then determine
+        desired epoch automatically. Use given epoch otherwise. If mode is None
+        then use mode for given epoch. Use given mode otherwise."""
+
+        now = datetime.datetime.now()
+        oldbackups = self.listOldBackups()
+
+        # Get epoch of backup
+        if epoch == None:
+            epoch = self.getDesiredEpochs(oldbackups, now)
+        if epoch == None:
+            logging.info("No backup planned.")
+            return
+
+        # Get mode of backup
+        if mode == None:
+            mode = self.conf.epochs[epoch].mode
+        logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode)
+
+        oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
+
+        # No old full backups existing
+        if mode != "full" and len(oldfullbackups)==0:
+            logging.info("No full backups existing. Making a full backup.")
+
+        # Checksum changed -> self.config file changed
+        if self.conf.checksum != self.conf.lastchecksum and mode != "full":
+            logging.warning("Full backup recommended as config file has changed.")
+
+
+        # If we have a full backup, we backup everything
+        since = None
+        if mode == "diff":
+            since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
+        elif mode == "incr":
+            since = sorted(oldbackups, key=lambda b: b.date)[-1].date
+
+        if since != None:
+            logging.debug("Making backup relative to " + since.ctime())
+
+        yesno = self.ask_user_yesno("Proceed? [Y, n] ")
+        if yesno == "n":
+            return
+
+        # Create new backup directory
+        basedir = self.conf.backupdir
+        dirname = Backup.getDirName(now, epoch, mode)
+        tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
+        targetdir = os.path.join(basedir, tmpdirname)
+        os.mkdir( targetdir )
+
+
+        # Add file logger
+        logfile = logging.getLogger("backuplog")
+        fil = logging.FileHandler( os.path.join(targetdir, "log") )
+        fil.setLevel(logging.DEBUG)
+        logfile.addHandler(fil)
+
+        logfile.info("Started: " + now.ctime())
+
+        # Backup all file sets
+        for s in self.conf.sets:
+            excludes = self.conf.excludes + self.conf.epochs[epoch].excludes
+            self.backupFileSet(s, targetdir, excludes, since)
+
+        logfile.info("Stopped: " + datetime.datetime.now().ctime())
+
+        # Rename backup directory to final name
+        os.rename( targetdir, os.path.join(basedir, dirname) )
+
+        # We made a full backup -- recall checksum of config
+        if mode == "full":
+            f = open( os.path.join(basedir, self.conf.checksumfn), "w")
+            f.write( self.conf.checksum )
+            f.close()
+
+
+
+    def prune(self):
+        """Prune old backup files"""
+
+        allDirs = sorted(self.listAllDirs())
+        # Collect all directories not matching backup name
+        removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
+
+        # Get all directories which are kept
+        backups = self.listOldBackups()
+        keepdirs = []
+        byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
+                key=lambda b : b.date, reverse=True)) for e in self.conf.getRealEpochsSorted() }
+        for e in byepoch:
+            epoch = self.conf.epochs[e]
+            old = byepoch[e][epoch.numkeeps:]
+            removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
+
+
+        logging.info("List of stale/outdated entries:")
+        for d in allDirs:
+            msg = ""
+            if d in removeDirs:
+                msg = "[*]  "
+            else:
+                msg = "[ ]  "
+
+            if Backup.isBackupDir(d):
+                msg += Backup.fromDirName(d).colAlignedString()
+            else:
+                msg += d
+
+            logging.info(msg)
+
+        # Check that dirs to be removed is in list of all dirs
+        for d in removeDirs:
+            assert( d in allDirs )
+
+        if len(removeDirs) == 0:
+            logging.info("No stale/outdated entries to remove.")
+            return
+
+        basedir = self.conf.backupdir
+        yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
+        if yesno == "y":
+            for d in removeDirs:
+                try:
+                    shutil.rmtree(os.path.join(basedir, d))
+                except OSError as e:
+                    logging.error("Error when removing '%s': %s" % (d,e.strerror) )
+
+
+    def ask_user_yesno(self, question):
+        if LogConf.con.level <= logging.INFO:
+            return input(question)
+        else:
+            return "y"
+
+
+def printUsage():
+    """Print --help text"""
+
+    print("shbackup - a simple backup solution.")
+    print("")
+    print("Usage:")
+    print("  " + sys.argv[0] + " {options} [cmd]")
+    print("  " + sys.argv[0] + " --help")
+    print("")
+    print("Commands:")
+    print("  backup                     make a new backup, if necessary")
+    print("  list                       list all backups (default)")
+    print("  prune                      prune outdated/old backups")
+    print("")
+    print("Options:")
+    print("  -h, --help                 print this usage text")
+    print("  -c, --conf FILE            use given configuration file")
+    print("                             default: /etc/shbackup.conf")
+    print("  -e, --epoch EPOCH          force to create backup for given epoch, which")
+    print("                             can be 'sporadic' or one of the configured epochs")
+    print("  -m, --mode MODE            override mode: full, diff, or incr")
+    print("  -v, --verbose              be more verbose and interact with user")
+    print("  --verbosity LEVEL          set verbosity to LEVEL, which can be")
+    print("                             error, warning, info, debug")
+    print("  -V, --version              print version info")
+
+
+
+class LogConf:
+    """Encapsulates logging configuration"""
+
+    con = logging.StreamHandler(sys.stderr)
+
+    @classmethod
+    def setup(cls):
+        """Setup logging system"""
+        conlog = logging.getLogger()
+        conlog.setLevel(logging.DEBUG)
+
+        cls.con.setLevel(logging.WARNING)
+        conlog.addHandler(cls.con)
+
+        fillog = logging.getLogger("backuplog")
+        fillog.setLevel(logging.DEBUG)
+
+
+if __name__ == "__main__":
+
+    LogConf.setup()
+
+    conffn = "/etc/shbackup.conf"
+    cmd = "list"
+    mode = None
+    epoch = None
+
+    i = 0
+    while i < len(sys.argv)-1:
+        i += 1
+        opt = sys.argv[i]
+
+        if opt in ["-h", "--help"]:
+            printUsage()
+            exit(0)
+
+        elif opt in ["-c", "--conf"]:
+            i += 1
+            conffn = sys.argv[i]
+
+        elif opt in ["-V", "--version"]:
+            print("shbackup " + __version__)
+            exit(0)
+
+        elif opt in ["-v", "--verbose"]:
+            LogConf.con.setLevel(logging.INFO)
+
+        elif opt in ["--verbosity"]:
+            i += 1
+            level = sys.argv[i]
+            numlevel = getattr(logging, level.upper(), None)
+            if not isinstance(numlevel, int):
+                raise ValueError('Invalid verbosity level: %s' % level)
+            LogConf.con.setLevel(numlevel)
+
+        elif opt in ["-m", "--mode"]:
+            i += 1
+            mode = sys.argv[i]
+            if not mode in Modes:
+                logging.error("Unknown mode '" + mode + "'.")
+                exit(1)
+
+        elif opt in ["-e", "--epoch"]:
+            i += 1
+            epoch = sys.argv[i]
+
+        elif opt in ["backup", "list", "prune"]:
+            cmd = opt
+
+        else:
+            logging.error("Unknown option: " + opt)
+            exit(1)
+
+    try:
+        man = BackupManager(conffn)
+
+        logging.debug("Config: " + str(man.conf))
+
+        if epoch!=None and not epoch in man.conf.epochs.keys():
+            logging.error("Unknown epoch '" + epoch + "'.")
+            exit(1)
+
+        if cmd == "backup":
+            man.backup(epoch, mode)
+
+        if cmd == "list":
+            for b in sorted(man.listOldBackups(), key=lambda b: b.date):
+                print(b.colAlignedString())
+
+        if cmd == "prune":
+            man.prune()
+
+    except (Config.ReadError, configparser.Error) as e:
+        logging.error("Error: " + e.message)
+
+
+
+