Make epochs configurable with exclude patterns
[sitarba.git] / shbackup
index 0cfc4c74f3a03f5a0d2c846ef7c191f11600770f..e97dbf5182790944bfd2eb1c26987d6e8503ae2e 100755 (executable)
--- a/shbackup
+++ b/shbackup
@@ -1,5 +1,5 @@
 #!/usr/bin/python3
-"""Stefan Huber's simplistic backup solution."""
+"""A simple backup solution."""
 
 __version__ = "0.1"
 __author__ = "Stefan Huber"
@@ -13,18 +13,95 @@ import random, re
 import logging
 
 
-Mode = ["full", "incr", "diff"]
+Modes = ["full", "incr", "diff"]
 
-RealEpoch = { \
-        "hour" : datetime.timedelta(0, 3600), \
-        "day" : datetime.timedelta(1), \
-        "week" : datetime.timedelta(7), \
-        "month" : datetime.timedelta(30), \
-        "year" : datetime.timedelta(365) }
+class Epoch:
 
-Epoch = dict(RealEpoch, **{ \
-        "sporadic" : datetime.timedelta(0,0) \
-        })
+    units = {
+                "hour" : datetime.timedelta(0, 3600),
+                "day" : datetime.timedelta(1),
+                "week" : datetime.timedelta(7),
+                "month" : datetime.timedelta(31),
+                "year" : datetime.timedelta(365) }
+
+    def __init__(self, unit=None, mult=1, mode="full", numkeeps=None):
+        self.unit = unit
+        self.mult = mult
+        self.mode = mode
+        self.numkeeps = numkeeps
+        self.excludes = []
+
+    def __repr__(self):
+        return "[unit: " + repr(self.unit) + \
+                ", mult:" + repr(self.mult) + \
+                ", mode: " + repr(self.mode) + \
+                ", numkeeps: " + repr(self.numkeeps) + \
+                ", excludes: " + repr(self.excludes) + "]"
+
+    def getTimeDelta(self):
+        if self.unit == None:
+            return None
+        return self.mult*Epoch.units[self.unit]
+
+    def isRipe(self, oldest, now):
+
+        if self.unit==None:
+            return True
+
+        delta = now-oldest
+        mult = self.mult
+
+        if delta >= self.getTimeDelta():
+            return True
+
+        if self.unit == "hour":
+            return abs(now.hour - oldest.hour) >= mult
+        elif self.unit == "day":
+            return abs(now.day - oldest.day) >= mult
+        elif self.unit == "week":
+            return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult
+        elif self.unit == "month":
+            return abs(now.month - oldest.month) >= mult
+        elif self.unit == "year":
+            return abs(now.year - oldest.year) >= mult
+
+        return None
+
+
+    @staticmethod
+    def parseTimedelta( deltastr ):
+        tokens = [ s.strip() for s in deltastr.split("*") ]
+        unit = None
+        mult = 1
+        if len(tokens) == 1:
+            unit = tokens[0]
+        elif len(tokens) == 2:
+            mult = int(tokens[0])
+            unit = tokens[1]
+        else:
+            raise ValueError("Invalid format: '{0}'".format(deltastr))
+
+        if not unit in Epoch.units:
+            raise ValueError("Unknown unit '{0}'".format(unit))
+
+        if mult <= 0:
+            raise ValueError("Non-positive factor '{0}' given.".format(mult))
+
+        return mult, unit
+
+
+
+class FileSet:
+    """A fileset has a name and a list of directories."""
+    def __init__(self, name, dirs, excludes):
+        self.name = name
+        self.dirs = dirs
+        self.excludes = excludes
+
+    def __repr__(self):
+        return "[name: " + self.name + \
+                ", dirs: " + str(self.dirs) + \
+                ", excludes: " + str(self.excludes) + "]"
 
 
 class Backup:
@@ -34,15 +111,13 @@ class Backup:
         self.date = date
         self.epoch = epoch
         self.mode = mode
+        self.excludes = []
 
     @staticmethod
     def fromDirName(dirname):
             [strdate, strtime, epoch, mode] = dirname.split("-")
 
-            if not epoch in Epoch.keys():
-                raise ValueError("Invalid epoch: " + epoch)
-
-            if not mode in Mode:
+            if not mode in Modes:
                 raise ValueError("Invalid mode: " + mode)
 
             date = datetime.datetime(int(strdate[0:4]),
@@ -51,7 +126,7 @@ class Backup:
 
             return Backup(date, epoch, mode)
 
-    def __str__(self):
+    def __repr__(self):
         return "[date: " + self.date.ctime() + \
                 ", epoch: " + self.epoch + \
                 ", mode: " + self.mode + "]"
@@ -63,8 +138,9 @@ class Backup:
             agestr = "(%s h)" % int(total_hours)
         else:
             agestr = "(%s d)" % age.days
-        return "%16s  %7s  %8s  %4s" % ( \
-                self.date.strftime("%Y-%m-%d %H:%M"), agestr, self.epoch, self.mode)
+        return "%16s  %7s  %10s  %4s" % (
+                self.date.strftime("%Y-%m-%d %H:%M"), agestr,
+                self.epoch, self.mode)
 
     @staticmethod
     def getDirName(date, epoch, mode):
@@ -88,14 +164,6 @@ class Config:
             self.value = value
             self.message = value
 
-    class FileSet:
-        """A fileset has a name and a list of directories."""
-        def __init__(self, name, dirs):
-            self.name = name
-            self.dirs = dirs
-
-        def __str__(self):
-            return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
 
     formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
 
@@ -103,22 +171,115 @@ class Config:
     checksumfn = "checksum"
 
     def __init__(self):
-        self.directory = "/media/backup"
+        self.directory = None
         self.format = self.formats[0]
-        self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
-        self.epochmodes = { k : "full" for k in RealEpoch.keys() }
-        self.exclpatterns = []
+        self.excludes = []
         self.sets = []
         self.checksum = None
         self.lastchecksum = None
+        self.epochs = Epochs = { "sporadic" : Epoch() }
+
 
-    def __str__(self):
+    def __repr__(self):
         return "[directory: " + self.directory + \
                                  ", format: " + self.format + \
-                                 ", keeps: " + str(self.epochkeeps) + \
-                                 ", modes: " + str(self.epochmodes) + \
-                                 ", exclpatterns: " + str(self.exclpatterns) + \
-                                 ", sets: " + str([str(s) for s in self.sets]) + "]"
+                                 ", excludes: " + repr(self.excludes) + \
+                  ", epochs: " + repr(self.epochs) + \
+                                 ", sets: " + repr(self.sets) + "]"
+
+    def getRealEpochsSorted(self):
+        """Return all epochs with have a non-None unit, sorted by
+        Epoch.getTimeDelta(), starting with the longest dela."""
+        epochs = self.epochs
+        realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ]
+        deltakey = lambda e: epochs[e].getTimeDelta()
+        realepochs.sort(key=deltakey, reverse=True)
+        return realepochs
+
+
+    def _read_destination(self, config, sec):
+        for opt in config.options(sec):
+            if opt=="directory":
+                self.directory = config.get(sec, opt)
+                if not os.path.isdir(self.directory):
+                    raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
+            elif opt=="format":
+                self.format = config.get(sec, opt)
+                if not self.format in Config.formats:
+                    raise Config.ReadError("Invalid 'format' given.")
+            else:
+                raise Config.ReadError("Unknown option '{0}'.".format(opt))
+
+
+    def _read_global(self, config, sec):
+        for opt in config.options(sec):
+            if opt.startswith("exclude"):
+                self.excludes += [ config.get(sec, opt) ]
+            else:
+                raise Config.ReadError("Unknown option '{0}'.".format(opt))
+
+
+    def _read_epoch(self, config, sec):
+        name = sec[6:].strip()
+        e = Epoch()
+        if name in self.epochs:
+            raise Config.ReadError("Epoch '{0}' already defined.".format(name))
+        if name in Epoch.units:
+            e.unit = name
+
+        for opt in config.options(sec):
+            if opt=="numkeeps":
+                try:
+                    e.numkeeps = int(config.getint(sec, opt))
+                except ValueError:
+                    raise Config.ReadError("Invalid integer given for '{0}'.".format(opt))
+                if e.numkeeps <= 0:
+                    raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps))
+
+            elif opt=="mode":
+                e.mode = config.get(sec, opt)
+                if not e.mode in Modes:
+                    raise Config.ReadError("Invalid mode '{0}'.".format(e.mode))
+
+            elif opt=="timedelta":
+                if name in Epoch.units:
+                    raise Config.ReadError("The time delta of a standard epoch " + \
+                            "is not supposed to be redefined. ")
+                td = config.get(sec,opt)
+                try:
+                    mult, unit = Epoch.parseTimedelta(td)
+                    e.unit = unit
+                    e.mult = mult
+                except ValueError as e:
+                    raise Config.ReadError("Invalid timedelta '{0}': {1}".format(td, str(e)))
+
+            elif opt.startswith("exclude"):
+                e.excludes += [config.get(sec, opt)]
+
+            else:
+                raise Config.ReadError("Unknown option '" + opt + "'.")
+
+        if e.numkeeps == None:
+            raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name))
+
+        self.epochs[name] = e
+
+
+    def _read_set(self, config, sec):
+        name = sec[4:].strip()
+        dirs = []
+        excludes = []
+
+        for opt in config.options(sec):
+            if opt.startswith("dir"):
+                dirs += [config.get(sec, opt)]
+            elif opt.startswith("exclude"):
+                excludes += [config.get(sec,opt)]
+            else:
+                raise Config.ReadError("Unknown option '" + opt + "'.")
+
+        self.sets += [FileSet(name, dirs, excludes)]
+
 
     def read(self, filename):
         """Read configuration from file"""
@@ -131,60 +292,29 @@ class Config:
 
         for reqsec in ["destination"]:
             if not config.has_section(reqsec):
-                raise Config.ReadError("Section '" + reqsec + "' is missing.")
-
-        self.directory = config.get("destination", "directory")
-        if not os.path.isdir(self.directory):
-            raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
-
-        self.format = config.get("destination", "format")
-        if not self.format in Config.formats:
-            raise Config.ReadError("Invalid 'format' given.")
-
-
-        if config.has_section("history"):
-            for opt in config.options("history"):
-                if opt.startswith("keep"):
-                    epoch = opt[4:]
-                    if not epoch in RealEpoch.keys():
-                        raise Config.ReadError("Invalid option 'keep" + epoch + "'.")
-                    try:
-                        self.epochkeeps[epoch] = int(config.getint("history", opt))
-                    except ValueError:
-                        raise Config.ReadError("Invalid integer given for '" + opt + "'.")
-                elif opt.startswith("mode"):
-                    epoch = opt[4:]
-                    if not epoch in RealEpoch.keys():
-                        raise Config.ReadError("Invalid option 'mode" + epoch + "'.")
-                    self.epochmodes[epoch] = config.get("history", opt)
-                    if not self.epochmodes[epoch] in Mode:
-                        raise Config.ReadError("Invalid mode given.")
-                else:
-                    raise Config.ReadError("Invalid option '" + opt + "'.")
-
-        if config.has_section("input"):
-            for opt in config.options("input"):
-                if opt.startswith("exclude"):
-                    self.exclpatterns += [ config.get("input", opt) ]
-                else:
-                    raise Config.ReadError("Invalid option '" + opt + "'.")
+                raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.")
 
         for sec in config.sections():
-            if sec in ["destination", "history", "input"]:
-                continue
+
+            if sec=="destination":
+                self._read_destination(config, sec)
+
+            elif sec=="global":
+                self._read_global(config, sec)
+
+            elif sec.startswith("epoch "):
+                self._read_epoch(config, sec)
+
             elif sec.startswith("set "):
-                name = sec[4:].strip()
-                dirs = []
-
-                for opt in config.options(sec):
-                    if not opt.startswith("dir"):
-                        raise Config.ReadError("Unknown option '" + opt + "'.")
-                    else:
-                        dirs += [config.get(sec, opt)]
-                self.sets += [Config.FileSet(name, dirs)]
+                self._read_set(config, sec)
+
             else:
                 raise Config.ReadError("Unknown section '" + sec + "'.")
 
+        if self.directory == None:
+            raise Config.ReadError("No destination directory set.")
+
+
         # Compute checksum of config file
         m = hashlib.sha1()
         f = open(filename, 'rb')
@@ -231,14 +361,14 @@ class BackupManager:
         return backups
 
 
-    def getDesiredEpoch(self, backups, now):
+    def getDesiredEpochs(self, backups, now):
         """Get desired epoch based on self.configuration and list of old backups"""
 
         # Find the longest epoch for which we would like the make a backup
         latest = datetime.datetime(1900, 1, 1)
-        for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
-            # We make backups of that epoch
-            if self.conf.epochkeeps[e] == 0:
+        for e in self.conf.getRealEpochsSorted():
+            epoch = self.conf.epochs[e]
+            if epoch.numkeeps <= 0:
                 continue
 
             # Get backups of that epoch
@@ -249,8 +379,7 @@ class BackupManager:
             if len(byepoch) > 0:
                 latest = max(latest, byepoch[-1].date )
 
-            # the latest backup is too old
-            if now-latest > timespan:
+            if epoch.isRipe(latest, now):
                 return e
 
         # No backup is to be made
@@ -258,7 +387,7 @@ class BackupManager:
 
 
 
-    def backupFileSet(self, fileset, targetdir, since=None):
+    def backupFileSet(self, fileset, targetdir, excludes, since=None):
         """Create an archive for given fileset at given target directory."""
 
         logfile = logging.getLogger('backuplog')
@@ -274,7 +403,11 @@ class BackupManager:
             taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
 
         # Add the exclude patterns
-        for pat in self.conf.exclpatterns:
+        for pat in excludes:
+            taropts += ["--exclude", pat]
+
+        #Add exclude patterns from fileset
+        for pat in fileset.excludes:
             taropts += ["--exclude", pat]
 
         # Adding directories to backup
@@ -324,15 +457,15 @@ class BackupManager:
 
         # Get epoch of backup
         if epoch == None:
-            epoch = self.getDesiredEpoch(oldbackups, now)
+            epoch = self.getDesiredEpochs(oldbackups, now)
         if epoch == None:
             logging.info("No backup planned.")
             return
 
         # Get mode of backup
         if mode == None:
-            mode = self.conf.epochmodes[epoch]
-        logging.info("Making a backup. Epoch: " + epoch + ", mode: " + mode)
+            mode = self.conf.epochs[epoch].mode
+        logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode)
 
         oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
 
@@ -377,7 +510,8 @@ class BackupManager:
 
         # Backup all file sets
         for s in self.conf.sets:
-            self.backupFileSet(s, targetdir, since)
+            excludes = self.conf.excludes + self.conf.epochs[epoch].excludes
+            self.backupFileSet(s, targetdir, excludes, since)
 
         logfile.info("Stopped: " + datetime.datetime.now().ctime())
 
@@ -403,10 +537,10 @@ class BackupManager:
         backups = self.listOldBackups()
         keepdirs = []
         byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
-                key=lambda b : b.date, reverse=True)) for e in RealEpoch }
+                key=lambda b : b.date, reverse=True)) for e in self.conf.getRealEpochsSorted() }
         for e in byepoch:
-            keep = self.conf.epochkeeps[e]
-            old = byepoch[e][keep:]
+            epoch = self.conf.epochs[e]
+            old = byepoch[e][epoch.numkeeps:]
             removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
 
 
@@ -536,17 +670,13 @@ if __name__ == "__main__":
         elif opt in ["-m", "--mode"]:
             i += 1
             mode = sys.argv[i]
-            if not mode in Mode:
+            if not mode in Modes:
                 logging.error("Unknown mode '" + mode + "'.")
                 exit(1)
 
         elif opt in ["-e", "--epoch"]:
             i += 1
             epoch = sys.argv[i]
-            if not epoch in Epoch:
-                logging.error("Unknown epoch '" + epoch + "'.")
-                exit(1)
-
 
         elif opt in ["backup", "list", "prune"]:
             cmd = opt
@@ -558,6 +688,12 @@ if __name__ == "__main__":
     try:
         man = BackupManager(conffn)
 
+        logging.debug("Config: " + str(man.conf))
+
+        if epoch!=None and not epoch in man.conf.epochs.keys():
+            logging.error("Unknown epoch '" + epoch + "'.")
+            exit(1)
+
         if cmd == "backup":
             man.backup(epoch, mode)
 
@@ -568,8 +704,8 @@ if __name__ == "__main__":
         if cmd == "prune":
             man.prune()
 
-    except (Config.ReadError, configparser.DuplicateOptionError) as e:
-        logging.error("Error reading config file: " + e.message)
+    except (Config.ReadError, configparser.Error) as e:
+        logging.error("Error: " + e.message)