Initial commit
authorStefan Huber <shuber2@gmx.at>
Sun, 13 May 2012 16:22:37 +0000 (18:22 +0200)
committerStefan Huber <shuber2@gmx.at>
Sun, 13 May 2012 16:22:37 +0000 (18:22 +0200)
shbackup.py [new file with mode: 0755]

diff --git a/shbackup.py b/shbackup.py
new file mode 100755 (executable)
index 0000000..0aedc6f
--- /dev/null
@@ -0,0 +1,366 @@
+#!/usr/bin/python3
+"""Stefan Huber's simplistic backup solution."""
+
+import datetime
+import os, shutil, sys
+import configparser
+import hashlib
+import subprocess
+import random, re
+
+
+class Config:
+    """Encapsules the configuration for the backup program."""
+
+    class ReadException(Exception):
+        """An exception raised when reading configurations."""
+        pass
+
+    class FileSet:
+        """A fileset has a name and a list of directories."""
+        def __init__(self, name, dirs):
+            self.name = name
+            self.dirs = dirs
+
+        def __str__(self):
+            return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
+
+    formats = ["tar.gz", "tar.bz2", "tar.xz" ]
+
+    # Filename where checksum of config is saved
+    checksumfn = "checksum"
+
+    def __init__(self):
+        self.directory = "/media/backup"
+        self.format = self.formats[0]
+        self.epochkeeps = { k : 0 for k in Epoch.keys() }
+        self.epochmodes = { k : "full" for k in Epoch.keys() }
+        self.exclpatterns = []
+        self.sets = []
+        self.checksum = None
+        self.lastchecksum = None
+
+    def __str__(self):
+        return "[directory: " + self.directory + \
+                                 ", format: " + self.format + \
+                                 ", keeps: " + str(self.epochkeeps) + \
+                                 ", modes: " + str(self.epochmodes) + \
+                                 ", exclpatterns: " + str(self.exclpatterns) + \
+                                 ", sets: " + str([str(s) for s in self.sets]) + "]";
+
+    def read(self, filename):
+        """Read configuration from file"""
+
+
+        config = configparser.RawConfigParser()
+        config.read(filename)
+
+        for reqsec in ["destination"]:
+            if not config.has_section(reqsec):
+                raise Config.ReadException("Section '" + reqsec + "' is missing.")
+
+        self.directory = config.get("destination", "directory")
+
+        self.format = config.get("destination", "format")
+        if not self.format in Config.formats:
+            raise Config.ReadException("Invalid 'format' given.")
+
+
+        if config.has_section("history"):
+            for opt in config.options("history"):
+                if opt.startswith("keep"):
+                    epoch = opt[4:]
+                    if not epoch in Epoch.keys():
+                        raise Config.ReadException("Invalid option 'keep" + epoch + "'.")
+                    self.epochkeeps[epoch] = int(config.getint("history", opt))
+                elif opt.startswith("mode"):
+                    epoch = opt[4:]
+                    if not epoch in Epoch.keys():
+                        raise Config.ReadException("Invalid option 'mode" + epoch + "'.")
+                    self.epochmodes[epoch] = config.get("history", opt)
+                    if not self.epochmodes[epoch] in Mode:
+                        raise Config.ReadException("Invalid mode given.")
+                else:
+                    raise Config.ReadException("Invalid option '" + opt + "'.")
+    
+        if config.has_section("input"):
+            for opt in config.options("input"):
+                if opt.startswith("exclude"):
+                    self.exclpatterns += [ config.get("input", opt) ]
+                else:
+                    raise Config.ReadException("Invalid option '" + opt + "'.")
+
+        for sec in config.sections():
+            if sec in ["destination", "history", "input"]:
+                continue
+            elif sec.startswith("set "):
+                name = sec[4:].strip()
+                dirs = []
+
+                for opt in config.options(sec):
+                    if not opt.startswith("dir"):
+                        raise Config.ReadException("Unknown option '" + opt + "'.")
+                    else:
+                        dirs += [config.get(sec, opt)]
+                self.sets += [Config.FileSet(name, dirs)]
+            else:
+                raise Config.ReadException("Unknown section '" + sec + "'.")
+
+        # Compute checksum of config file
+        m = hashlib.sha1()
+        f = open(filename, 'rb')
+        try:
+            m.update(f.read())
+            self.checksum = m.hexdigest()
+        finally:
+            f.close()
+
+        try:
+            f = open(os.path.join(self.directory, self.checksumfn), 'r')
+            self.lastchecksum = f.read().strip()
+            f.close()
+        except IOError:
+            self.lastchecksum = None
+
+
+Mode = ["full", "incr", "diff"]
+
+Epoch = { "hour" : datetime.timedelta(0, 3600), \
+        "day" : datetime.timedelta(1), \
+        "week" : datetime.timedelta(7), \
+        "month" : datetime.timedelta(30), \
+        "year" : datetime.timedelta(365) }
+
+class Backup:
+    """A single backup has a date, an epoch and a mode."""
+
+    def __init__(self, date, epoch, mode):
+        self.date = date
+        self.epoch = epoch
+        self.mode = mode
+
+    def __str__(self):
+        return "[date: " + self.date.ctime() + \
+                ", epoch: " + self.epoch + \
+                ", mode: " + self.mode + "]"
+
+    @staticmethod
+    def getDirName(date, epoch, mode):
+        """Get directory name of backup by given properties."""
+        return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
+
+    @staticmethod
+    def isBackupDir(dirname):
+        """Is directory a backup directory?"""
+        p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
+        return p.match(dirname)
+
+
+class BackupManager:
+    """List and create backups"""
+
+    def __init__(self, conffn):
+        self.conf = Config()
+        self.conf.read(conffn)
+
+
+    def listAllDirs(self):
+        """List all dirs in destination directory"""
+        
+        # Get all entries
+        basedir = self.conf.directory
+        dirs = os.listdir(basedir)
+        # Filter directories
+        return filter( lambda d: os.path.isdir(os.path.join(basedir, d)), dirs)
+
+    def listOldBackups(self):
+        """Returns a list of old backups."""
+
+        backups = []
+
+        for entry in filter(Backup.isBackupDir, self.listAllDirs()):
+            [strdate, strtime, epoch, mode] = entry.split("-")
+
+            if not epoch in Epoch.keys():
+                raise ValueError("Invalid epoch: " + epoch)
+
+            if not mode in Mode:
+                raise ValueError("Invalid mode: " + mode)
+
+            date = datetime.datetime(int(strdate[0:4]),
+                    int(strdate[4:6]), int(strdate[6:8]),\
+                    int(strtime[0:2]), int(strtime[2:4]))
+            backups += [ Backup(date, epoch, mode) ]
+
+        return backups
+
+
+    def getDesiredEpoch(self, backups, now):
+        """Get desired epoch based on self.configuration and list of old backups"""
+
+        # Find the longest epoch for which we would like the make a backup
+        latest = datetime.datetime(1900, 1, 1)
+        for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in Epoch ] )):
+            # We make backups of that epoch
+            if self.conf.epochkeeps[e] == 0:
+                continue
+
+            # Get backups of that epoch
+            byepoch = list(sorted(filter( lambda b: b.epoch==e, backups), \
+                key=lambda b: b.date))
+
+            # If there are any, determine the latest
+            if len(byepoch) > 0:
+                latest = max(latest, byepoch[-1].date )
+
+            # the latest backup is too old
+            if now-latest > timespan:
+                return e
+
+        # No backup is to be made
+        return None
+
+
+
+    def backupFileSet(self, fileset, targetdir, since=None):
+        """Create an archive for given fileset at given target directory."""
+
+        print("Running file set: " + fileset.name)
+        tarpath = "/bin/tar"
+        fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
+
+        taropts = ["-cpva"]
+
+        if since != None:
+            taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
+        
+        for pat in self.conf.exclpatterns:
+            taropts += ["--exclude", pat]
+
+        tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
+        print("tarargs: ", tarargs)
+        tarp = subprocess.Popen( tarargs, \
+                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+  
+        while tarp.poll():
+            l = tarp.stdout.readline()
+            if len(l) > 0:
+                print(l.decode(), end="")
+            l = tarp.stderr.readline()
+            if len(l) > 0:
+                print(l.decode(), end="")
+
+        for l in tarp.stdout.readlines():
+            print(l.decode(), end="")
+
+        for l in tarp.stderr.readlines():
+            print(l.decode(), end="")
+
+        rett = tarp.wait()
+        if rett != 0:
+            print(tarpath + " returned with exit status " + str(rett) + ":")
+            print( tarp.stderr.read().decode() )
+
+
+    def backup(self):
+        """Make a new backup, if necessary"""
+
+        now = datetime.datetime.now()
+        oldbackups = self.listOldBackups()
+        epoch = self.getDesiredEpoch(oldbackups, now)
+
+        if epoch == None:
+            print("No backup planned.")
+            return
+
+
+        # Get mode of backup
+        mode = self.conf.epochmodes[epoch]
+        print("Making a backup. Epoch: " + epoch + ", mode: " + mode)
+
+        oldfullbackups = list(filter(lambda b: b.mode=="full", oldbackups))
+
+        # No old full backups existing
+        if mode != "full" and len(oldfullbackups)==0:
+            print("No full backups existing. Making a full backup.")        
+
+        # Checksum changed -> self.config file changed
+        if self.conf.checksum != self.conf.lastchecksum:
+            print("Config file changed since last time.")
+            if mode != "full":
+                print("** Warning: full backup recommended!")
+
+        # Create new target directory
+        basedir = self.conf.directory
+        dirname = Backup.getDirName(now, epoch, mode)
+        tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
+        targetdir = os.path.join(basedir, tmpdirname)
+        os.mkdir( targetdir )
+
+        # If we have a full backup, we backup everything
+        since = None
+
+        # Get latest full backup time
+        if mode == "diff":
+            since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
+        # Get latest backup time
+        elif mode == "incr":
+            since = sorted(oldbackups, key=lambda b: b.date)[-1].date
+
+        # Backup all file sets
+        for s in self.conf.sets:
+            self.backupFileSet(s, targetdir, since)
+
+        # Rename backup directory to final name
+        os.rename( targetdir, os.path.join(basedir, dirname) )
+
+        # We made a full backup -- recall checksum of config
+        if mode == "full":
+            f = open( os.path.join(basedir, self.conf.checksumfn), "w")
+            f.write( self.conf.checksum )
+            f.close()
+               
+
+    def prune(self):
+        """Prune old backup files"""
+
+        noBackupDir = lambda d: not Backup.isBackupDir(d)        
+        dirs = list(filter(noBackupDir, self.listAllDirs()))
+
+        backups = self.listOldBackups()
+        byepoch = { e : list(reversed(sorted(filter(lambda b: b.epoch==e, backups), \
+                key=lambda b : b.date))) for e in Epoch }
+        for e in byepoch:
+            keep = self.conf.epochkeeps[e]
+            old = byepoch[e][keep:]
+            dirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
+
+        if len(dirs) == 0:
+            print("No stale/outdated entries to remove.")
+            return
+
+        print("List of stale/outdated entries:")
+        for d in dirs:
+            print("  " + d)
+
+        basedir = self.conf.directory
+        yesno = input("Remove listed entries? [y,N] ")
+        if yesno == "y":
+            for d in dirs:
+                shutil.rmtree(os.path.join(basedir,d))
+
+
+if __name__ == "__main__":
+
+    conffn = "shbackup.conf"
+
+    if len(sys.argv) > 1:
+        conffn = sys.argv[1]
+
+    man = BackupManager(conffn)
+    man.backup()
+    man.prune()
+
+
+
+