Initial commit
[sitarba.git] / shbackup.py
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 import datetime
5 import os, shutil, sys
6 import configparser
7 import hashlib
8 import subprocess
9 import random, re
10
11
12 class Config:
13 """Encapsules the configuration for the backup program."""
14
15 class ReadException(Exception):
16 """An exception raised when reading configurations."""
17 pass
18
19 class FileSet:
20 """A fileset has a name and a list of directories."""
21 def __init__(self, name, dirs):
22 self.name = name
23 self.dirs = dirs
24
25 def __str__(self):
26 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
27
28 formats = ["tar.gz", "tar.bz2", "tar.xz" ]
29
30 # Filename where checksum of config is saved
31 checksumfn = "checksum"
32
33 def __init__(self):
34 self.directory = "/media/backup"
35 self.format = self.formats[0]
36 self.epochkeeps = { k : 0 for k in Epoch.keys() }
37 self.epochmodes = { k : "full" for k in Epoch.keys() }
38 self.exclpatterns = []
39 self.sets = []
40 self.checksum = None
41 self.lastchecksum = None
42
43 def __str__(self):
44 return "[directory: " + self.directory + \
45 ", format: " + self.format + \
46 ", keeps: " + str(self.epochkeeps) + \
47 ", modes: " + str(self.epochmodes) + \
48 ", exclpatterns: " + str(self.exclpatterns) + \
49 ", sets: " + str([str(s) for s in self.sets]) + "]";
50
51 def read(self, filename):
52 """Read configuration from file"""
53
54
55 config = configparser.RawConfigParser()
56 config.read(filename)
57
58 for reqsec in ["destination"]:
59 if not config.has_section(reqsec):
60 raise Config.ReadException("Section '" + reqsec + "' is missing.")
61
62 self.directory = config.get("destination", "directory")
63
64 self.format = config.get("destination", "format")
65 if not self.format in Config.formats:
66 raise Config.ReadException("Invalid 'format' given.")
67
68
69 if config.has_section("history"):
70 for opt in config.options("history"):
71 if opt.startswith("keep"):
72 epoch = opt[4:]
73 if not epoch in Epoch.keys():
74 raise Config.ReadException("Invalid option 'keep" + epoch + "'.")
75 self.epochkeeps[epoch] = int(config.getint("history", opt))
76 elif opt.startswith("mode"):
77 epoch = opt[4:]
78 if not epoch in Epoch.keys():
79 raise Config.ReadException("Invalid option 'mode" + epoch + "'.")
80 self.epochmodes[epoch] = config.get("history", opt)
81 if not self.epochmodes[epoch] in Mode:
82 raise Config.ReadException("Invalid mode given.")
83 else:
84 raise Config.ReadException("Invalid option '" + opt + "'.")
85
86 if config.has_section("input"):
87 for opt in config.options("input"):
88 if opt.startswith("exclude"):
89 self.exclpatterns += [ config.get("input", opt) ]
90 else:
91 raise Config.ReadException("Invalid option '" + opt + "'.")
92
93 for sec in config.sections():
94 if sec in ["destination", "history", "input"]:
95 continue
96 elif sec.startswith("set "):
97 name = sec[4:].strip()
98 dirs = []
99
100 for opt in config.options(sec):
101 if not opt.startswith("dir"):
102 raise Config.ReadException("Unknown option '" + opt + "'.")
103 else:
104 dirs += [config.get(sec, opt)]
105 self.sets += [Config.FileSet(name, dirs)]
106 else:
107 raise Config.ReadException("Unknown section '" + sec + "'.")
108
109 # Compute checksum of config file
110 m = hashlib.sha1()
111 f = open(filename, 'rb')
112 try:
113 m.update(f.read())
114 self.checksum = m.hexdigest()
115 finally:
116 f.close()
117
118 try:
119 f = open(os.path.join(self.directory, self.checksumfn), 'r')
120 self.lastchecksum = f.read().strip()
121 f.close()
122 except IOError:
123 self.lastchecksum = None
124
125
126 Mode = ["full", "incr", "diff"]
127
128 Epoch = { "hour" : datetime.timedelta(0, 3600), \
129 "day" : datetime.timedelta(1), \
130 "week" : datetime.timedelta(7), \
131 "month" : datetime.timedelta(30), \
132 "year" : datetime.timedelta(365) }
133
134 class Backup:
135 """A single backup has a date, an epoch and a mode."""
136
137 def __init__(self, date, epoch, mode):
138 self.date = date
139 self.epoch = epoch
140 self.mode = mode
141
142 def __str__(self):
143 return "[date: " + self.date.ctime() + \
144 ", epoch: " + self.epoch + \
145 ", mode: " + self.mode + "]"
146
147 @staticmethod
148 def getDirName(date, epoch, mode):
149 """Get directory name of backup by given properties."""
150 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
151
152 @staticmethod
153 def isBackupDir(dirname):
154 """Is directory a backup directory?"""
155 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
156 return p.match(dirname)
157
158
159 class BackupManager:
160 """List and create backups"""
161
162 def __init__(self, conffn):
163 self.conf = Config()
164 self.conf.read(conffn)
165
166
167 def listAllDirs(self):
168 """List all dirs in destination directory"""
169
170 # Get all entries
171 basedir = self.conf.directory
172 dirs = os.listdir(basedir)
173 # Filter directories
174 return filter( lambda d: os.path.isdir(os.path.join(basedir, d)), dirs)
175
176 def listOldBackups(self):
177 """Returns a list of old backups."""
178
179 backups = []
180
181 for entry in filter(Backup.isBackupDir, self.listAllDirs()):
182 [strdate, strtime, epoch, mode] = entry.split("-")
183
184 if not epoch in Epoch.keys():
185 raise ValueError("Invalid epoch: " + epoch)
186
187 if not mode in Mode:
188 raise ValueError("Invalid mode: " + mode)
189
190 date = datetime.datetime(int(strdate[0:4]),
191 int(strdate[4:6]), int(strdate[6:8]),\
192 int(strtime[0:2]), int(strtime[2:4]))
193 backups += [ Backup(date, epoch, mode) ]
194
195 return backups
196
197
198 def getDesiredEpoch(self, backups, now):
199 """Get desired epoch based on self.configuration and list of old backups"""
200
201 # Find the longest epoch for which we would like the make a backup
202 latest = datetime.datetime(1900, 1, 1)
203 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in Epoch ] )):
204 # We make backups of that epoch
205 if self.conf.epochkeeps[e] == 0:
206 continue
207
208 # Get backups of that epoch
209 byepoch = list(sorted(filter( lambda b: b.epoch==e, backups), \
210 key=lambda b: b.date))
211
212 # If there are any, determine the latest
213 if len(byepoch) > 0:
214 latest = max(latest, byepoch[-1].date )
215
216 # the latest backup is too old
217 if now-latest > timespan:
218 return e
219
220 # No backup is to be made
221 return None
222
223
224
225 def backupFileSet(self, fileset, targetdir, since=None):
226 """Create an archive for given fileset at given target directory."""
227
228 print("Running file set: " + fileset.name)
229 tarpath = "/bin/tar"
230 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
231
232 taropts = ["-cpva"]
233
234 if since != None:
235 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
236
237 for pat in self.conf.exclpatterns:
238 taropts += ["--exclude", pat]
239
240 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
241 print("tarargs: ", tarargs)
242 tarp = subprocess.Popen( tarargs, \
243 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
244
245 while tarp.poll():
246 l = tarp.stdout.readline()
247 if len(l) > 0:
248 print(l.decode(), end="")
249 l = tarp.stderr.readline()
250 if len(l) > 0:
251 print(l.decode(), end="")
252
253 for l in tarp.stdout.readlines():
254 print(l.decode(), end="")
255
256 for l in tarp.stderr.readlines():
257 print(l.decode(), end="")
258
259 rett = tarp.wait()
260 if rett != 0:
261 print(tarpath + " returned with exit status " + str(rett) + ":")
262 print( tarp.stderr.read().decode() )
263
264
265 def backup(self):
266 """Make a new backup, if necessary"""
267
268 now = datetime.datetime.now()
269 oldbackups = self.listOldBackups()
270 epoch = self.getDesiredEpoch(oldbackups, now)
271
272 if epoch == None:
273 print("No backup planned.")
274 return
275
276
277 # Get mode of backup
278 mode = self.conf.epochmodes[epoch]
279 print("Making a backup. Epoch: " + epoch + ", mode: " + mode)
280
281 oldfullbackups = list(filter(lambda b: b.mode=="full", oldbackups))
282
283 # No old full backups existing
284 if mode != "full" and len(oldfullbackups)==0:
285 print("No full backups existing. Making a full backup.")
286
287 # Checksum changed -> self.config file changed
288 if self.conf.checksum != self.conf.lastchecksum:
289 print("Config file changed since last time.")
290 if mode != "full":
291 print("** Warning: full backup recommended!")
292
293 # Create new target directory
294 basedir = self.conf.directory
295 dirname = Backup.getDirName(now, epoch, mode)
296 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
297 targetdir = os.path.join(basedir, tmpdirname)
298 os.mkdir( targetdir )
299
300 # If we have a full backup, we backup everything
301 since = None
302
303 # Get latest full backup time
304 if mode == "diff":
305 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
306 # Get latest backup time
307 elif mode == "incr":
308 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
309
310 # Backup all file sets
311 for s in self.conf.sets:
312 self.backupFileSet(s, targetdir, since)
313
314 # Rename backup directory to final name
315 os.rename( targetdir, os.path.join(basedir, dirname) )
316
317 # We made a full backup -- recall checksum of config
318 if mode == "full":
319 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
320 f.write( self.conf.checksum )
321 f.close()
322
323
324 def prune(self):
325 """Prune old backup files"""
326
327 noBackupDir = lambda d: not Backup.isBackupDir(d)
328 dirs = list(filter(noBackupDir, self.listAllDirs()))
329
330 backups = self.listOldBackups()
331 byepoch = { e : list(reversed(sorted(filter(lambda b: b.epoch==e, backups), \
332 key=lambda b : b.date))) for e in Epoch }
333 for e in byepoch:
334 keep = self.conf.epochkeeps[e]
335 old = byepoch[e][keep:]
336 dirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
337
338 if len(dirs) == 0:
339 print("No stale/outdated entries to remove.")
340 return
341
342 print("List of stale/outdated entries:")
343 for d in dirs:
344 print(" " + d)
345
346 basedir = self.conf.directory
347 yesno = input("Remove listed entries? [y,N] ")
348 if yesno == "y":
349 for d in dirs:
350 shutil.rmtree(os.path.join(basedir,d))
351
352
353 if __name__ == "__main__":
354
355 conffn = "shbackup.conf"
356
357 if len(sys.argv) > 1:
358 conffn = sys.argv[1]
359
360 man = BackupManager(conffn)
361 man.backup()
362 man.prune()
363
364
365
366