restructuring code
[sitarba.git] / shbackup.py
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 import datetime
5 import os, shutil, sys
6 import configparser
7 import hashlib
8 import subprocess
9 import random, re
10
11
12 Mode = ["full", "incr", "diff"]
13
14 Epoch = { "hour" : datetime.timedelta(0, 3600), \
15 "day" : datetime.timedelta(1), \
16 "week" : datetime.timedelta(7), \
17 "month" : datetime.timedelta(30), \
18 "year" : datetime.timedelta(365) }
19
20 class Backup:
21 """A single backup has a date, an epoch and a mode."""
22
23 def __init__(self, date, epoch, mode):
24 self.date = date
25 self.epoch = epoch
26 self.mode = mode
27
28 def __str__(self):
29 return "[date: " + self.date.ctime() + \
30 ", epoch: " + self.epoch + \
31 ", mode: " + self.mode + "]"
32
33 @staticmethod
34 def getDirName(date, epoch, mode):
35 """Get directory name of backup by given properties."""
36 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
37
38 @staticmethod
39 def isBackupDir(dirname):
40 """Is directory a backup directory?"""
41 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
42 return p.match(dirname)
43
44
45
46
47 class Config:
48 """Encapsules the configuration for the backup program."""
49
50 class ReadException(Exception):
51 """An exception raised when reading configurations."""
52 pass
53
54 class FileSet:
55 """A fileset has a name and a list of directories."""
56 def __init__(self, name, dirs):
57 self.name = name
58 self.dirs = dirs
59
60 def __str__(self):
61 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
62
63 formats = ["tar.gz", "tar.bz2", "tar.xz" ]
64
65 # Filename where checksum of config is saved
66 checksumfn = "checksum"
67
68 def __init__(self):
69 self.directory = "/media/backup"
70 self.format = self.formats[0]
71 self.epochkeeps = { k : 0 for k in Epoch.keys() }
72 self.epochmodes = { k : "full" for k in Epoch.keys() }
73 self.exclpatterns = []
74 self.sets = []
75 self.checksum = None
76 self.lastchecksum = None
77
78 def __str__(self):
79 return "[directory: " + self.directory + \
80 ", format: " + self.format + \
81 ", keeps: " + str(self.epochkeeps) + \
82 ", modes: " + str(self.epochmodes) + \
83 ", exclpatterns: " + str(self.exclpatterns) + \
84 ", sets: " + str([str(s) for s in self.sets]) + "]"
85
86 def read(self, filename):
87 """Read configuration from file"""
88
89 if not os.path.isfile(filename):
90 raise Config.ReadException("No file '" + filename + "'.")
91
92 config = configparser.RawConfigParser()
93 config.read(filename)
94
95 for reqsec in ["destination"]:
96 if not config.has_section(reqsec):
97 raise Config.ReadException("Section '" + reqsec + "' is missing.")
98
99 self.directory = config.get("destination", "directory")
100
101 self.format = config.get("destination", "format")
102 if not self.format in Config.formats:
103 raise Config.ReadException("Invalid 'format' given.")
104
105
106 if config.has_section("history"):
107 for opt in config.options("history"):
108 if opt.startswith("keep"):
109 epoch = opt[4:]
110 if not epoch in Epoch.keys():
111 raise Config.ReadException("Invalid option 'keep" + epoch + "'.")
112 self.epochkeeps[epoch] = int(config.getint("history", opt))
113 elif opt.startswith("mode"):
114 epoch = opt[4:]
115 if not epoch in Epoch.keys():
116 raise Config.ReadException("Invalid option 'mode" + epoch + "'.")
117 self.epochmodes[epoch] = config.get("history", opt)
118 if not self.epochmodes[epoch] in Mode:
119 raise Config.ReadException("Invalid mode given.")
120 else:
121 raise Config.ReadException("Invalid option '" + opt + "'.")
122
123 if config.has_section("input"):
124 for opt in config.options("input"):
125 if opt.startswith("exclude"):
126 self.exclpatterns += [ config.get("input", opt) ]
127 else:
128 raise Config.ReadException("Invalid option '" + opt + "'.")
129
130 for sec in config.sections():
131 if sec in ["destination", "history", "input"]:
132 continue
133 elif sec.startswith("set "):
134 name = sec[4:].strip()
135 dirs = []
136
137 for opt in config.options(sec):
138 if not opt.startswith("dir"):
139 raise Config.ReadException("Unknown option '" + opt + "'.")
140 else:
141 dirs += [config.get(sec, opt)]
142 self.sets += [Config.FileSet(name, dirs)]
143 else:
144 raise Config.ReadException("Unknown section '" + sec + "'.")
145
146 # Compute checksum of config file
147 m = hashlib.sha1()
148 f = open(filename, 'rb')
149 try:
150 m.update(f.read())
151 self.checksum = m.hexdigest()
152 finally:
153 f.close()
154
155 try:
156 f = open(os.path.join(self.directory, self.checksumfn), 'r')
157 self.lastchecksum = f.read().strip()
158 f.close()
159 except IOError:
160 self.lastchecksum = None
161
162
163 class BackupManager:
164 """List and create backups"""
165
166 def __init__(self, conffn):
167 self.conf = Config()
168 self.conf.read(conffn)
169
170
171 def listAllDirs(self):
172 """List all dirs in destination directory"""
173
174 # Get all entries
175 basedir = self.conf.directory
176 dirs = os.listdir(basedir)
177 # Filter directories
178 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
179
180 def listOldBackups(self):
181 """Returns a list of old backups."""
182
183 backups = []
184
185 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
186 [strdate, strtime, epoch, mode] = entry.split("-")
187
188 if not epoch in Epoch.keys():
189 raise ValueError("Invalid epoch: " + epoch)
190
191 if not mode in Mode:
192 raise ValueError("Invalid mode: " + mode)
193
194 date = datetime.datetime(int(strdate[0:4]),
195 int(strdate[4:6]), int(strdate[6:8]),\
196 int(strtime[0:2]), int(strtime[2:4]))
197 backups += [ Backup(date, epoch, mode) ]
198
199 return backups
200
201
202 def getDesiredEpoch(self, backups, now):
203 """Get desired epoch based on self.configuration and list of old backups"""
204
205 # Find the longest epoch for which we would like the make a backup
206 latest = datetime.datetime(1900, 1, 1)
207 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in Epoch ] )):
208 # We make backups of that epoch
209 if self.conf.epochkeeps[e] == 0:
210 continue
211
212 # Get backups of that epoch
213 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
214 key=lambda b: b.date))
215
216 # If there are any, determine the latest
217 if len(byepoch) > 0:
218 latest = max(latest, byepoch[-1].date )
219
220 # the latest backup is too old
221 if now-latest > timespan:
222 return e
223
224 # No backup is to be made
225 return None
226
227
228
229 def backupFileSet(self, fileset, targetdir, since=None):
230 """Create an archive for given fileset at given target directory."""
231
232 print("Running file set: " + fileset.name)
233 tarpath = "/bin/tar"
234 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
235
236 taropts = ["-cpva"]
237
238 if since != None:
239 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
240
241 for pat in self.conf.exclpatterns:
242 taropts += ["--exclude", pat]
243
244 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
245 print("tarargs: ", tarargs)
246 tarp = subprocess.Popen( tarargs, \
247 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
248
249 while tarp.poll():
250 l = tarp.stdout.readline()
251 if len(l) > 0:
252 print(l.decode(), end="")
253 l = tarp.stderr.readline()
254 if len(l) > 0:
255 print(l.decode(), end="")
256
257 for l in tarp.stdout.readlines():
258 print(l.decode(), end="")
259
260 for l in tarp.stderr.readlines():
261 print(l.decode(), end="")
262
263 rett = tarp.wait()
264 if rett != 0:
265 print(tarpath + " returned with exit status " + str(rett) + ":")
266 print( tarp.stderr.read().decode() )
267
268
269 def backup(self):
270 """Make a new backup, if necessary"""
271
272 now = datetime.datetime.now()
273 oldbackups = self.listOldBackups()
274 epoch = self.getDesiredEpoch(oldbackups, now)
275
276 if epoch == None:
277 print("No backup planned.")
278 return
279
280
281 # Get mode of backup
282 mode = self.conf.epochmodes[epoch]
283 print("Making a backup. Epoch: " + epoch + ", mode: " + mode)
284
285 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
286
287 # No old full backups existing
288 if mode != "full" and len(oldfullbackups)==0:
289 print("No full backups existing. Making a full backup.")
290
291 # Checksum changed -> self.config file changed
292 if self.conf.checksum != self.conf.lastchecksum:
293 print("Config file changed since last time.")
294 if mode != "full":
295 print("** Warning: full backup recommended!")
296
297 # Create new target directory
298 basedir = self.conf.directory
299 dirname = Backup.getDirName(now, epoch, mode)
300 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
301 targetdir = os.path.join(basedir, tmpdirname)
302 os.mkdir( targetdir )
303
304 # If we have a full backup, we backup everything
305 since = None
306
307 # Get latest full backup time
308 if mode == "diff":
309 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
310 # Get latest backup time
311 elif mode == "incr":
312 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
313
314 # Backup all file sets
315 for s in self.conf.sets:
316 self.backupFileSet(s, targetdir, since)
317
318 # Rename backup directory to final name
319 os.rename( targetdir, os.path.join(basedir, dirname) )
320
321 # We made a full backup -- recall checksum of config
322 if mode == "full":
323 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
324 f.write( self.conf.checksum )
325 f.close()
326
327
328 def prune(self):
329 """Prune old backup files"""
330
331 # Collect all directories not matching backup name
332 dirs = [ d for d in self.listAllDirs() if not Backup.isBackupDir(d) ]
333
334 # Get all directories which are outdated
335 backups = self.listOldBackups()
336 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
337 key=lambda b : b.date, reverse=True)) for e in Epoch }
338 for e in byepoch:
339 keep = self.conf.epochkeeps[e]
340 old = byepoch[e][keep:]
341 dirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
342
343 if len(dirs) == 0:
344 print("No stale/outdated entries to remove.")
345 return
346
347 print("List of stale/outdated entries:")
348 for d in dirs:
349 print(" " + d)
350
351 basedir = self.conf.directory
352 yesno = input("Remove listed entries? [y, N] ")
353 if yesno == "y":
354 for d in dirs:
355 shutil.rmtree(os.path.join(basedir, d))
356
357
358 def printUsage():
359 """Print --help text"""
360
361 print("shbackup - a simple backup solution.")
362 print("")
363 print("Usage:")
364 print(" " + sys.argv[0] + " [-C <configfile>")
365 print(" " + sys.argv[0] + " --help")
366 print("")
367 print("Options:")
368 print(" -C <configfile> default: /etc/shbackup.conf")
369
370
371 if __name__ == "__main__":
372
373 conffn = "/etc/shbackup.conf"
374
375 i = 0
376 while i < len(sys.argv)-1:
377 i += 1
378 opt = sys.argv[i]
379
380 if opt in ["-h", "--help"]:
381 printUsage()
382 exit(0)
383
384 elif opt in ["-C", "--config"]:
385 i += 1
386 conffn = sys.argv[i]
387 continue
388
389 else:
390 print("Unknown option: " + opt)
391 exit(1)
392
393 try:
394 man = BackupManager(conffn)
395 man.backup()
396 man.prune()
397
398 except Config.ReadException as e:
399 print("Error reading config file: ", end="")
400 for a in e.args:
401 print(a, end=" ")
402 print()
403
404
405
406