f9f700719f4bc8c950fea2099062c57dbd0ce68a
[sitarba.git] / shbackup.py
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 import datetime
5 import os, shutil, sys
6 import configparser
7 import hashlib
8 import subprocess
9 import random, re
10
11
12 class Config:
13 """Encapsules the configuration for the backup program."""
14
15 class ReadException(Exception):
16 """An exception raised when reading configurations."""
17 pass
18
19 class FileSet:
20 """A fileset has a name and a list of directories."""
21 def __init__(self, name, dirs):
22 self.name = name
23 self.dirs = dirs
24
25 def __str__(self):
26 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
27
28 formats = ["tar.gz", "tar.bz2", "tar.xz" ]
29
30 # Filename where checksum of config is saved
31 checksumfn = "checksum"
32
33 def __init__(self):
34 self.directory = "/media/backup"
35 self.format = self.formats[0]
36 self.epochkeeps = { k : 0 for k in Epoch.keys() }
37 self.epochmodes = { k : "full" for k in Epoch.keys() }
38 self.exclpatterns = []
39 self.sets = []
40 self.checksum = None
41 self.lastchecksum = None
42
43 def __str__(self):
44 return "[directory: " + self.directory + \
45 ", format: " + self.format + \
46 ", keeps: " + str(self.epochkeeps) + \
47 ", modes: " + str(self.epochmodes) + \
48 ", exclpatterns: " + str(self.exclpatterns) + \
49 ", sets: " + str([str(s) for s in self.sets]) + "]"
50
51 def read(self, filename):
52 """Read configuration from file"""
53
54 if not os.path.isfile(filename):
55 raise Config.ReadException("No file '" + filename + "'.")
56
57 config = configparser.RawConfigParser()
58 config.read(filename)
59
60 for reqsec in ["destination"]:
61 if not config.has_section(reqsec):
62 raise Config.ReadException("Section '" + reqsec + "' is missing.")
63
64 self.directory = config.get("destination", "directory")
65
66 self.format = config.get("destination", "format")
67 if not self.format in Config.formats:
68 raise Config.ReadException("Invalid 'format' given.")
69
70
71 if config.has_section("history"):
72 for opt in config.options("history"):
73 if opt.startswith("keep"):
74 epoch = opt[4:]
75 if not epoch in Epoch.keys():
76 raise Config.ReadException("Invalid option 'keep" + epoch + "'.")
77 self.epochkeeps[epoch] = int(config.getint("history", opt))
78 elif opt.startswith("mode"):
79 epoch = opt[4:]
80 if not epoch in Epoch.keys():
81 raise Config.ReadException("Invalid option 'mode" + epoch + "'.")
82 self.epochmodes[epoch] = config.get("history", opt)
83 if not self.epochmodes[epoch] in Mode:
84 raise Config.ReadException("Invalid mode given.")
85 else:
86 raise Config.ReadException("Invalid option '" + opt + "'.")
87
88 if config.has_section("input"):
89 for opt in config.options("input"):
90 if opt.startswith("exclude"):
91 self.exclpatterns += [ config.get("input", opt) ]
92 else:
93 raise Config.ReadException("Invalid option '" + opt + "'.")
94
95 for sec in config.sections():
96 if sec in ["destination", "history", "input"]:
97 continue
98 elif sec.startswith("set "):
99 name = sec[4:].strip()
100 dirs = []
101
102 for opt in config.options(sec):
103 if not opt.startswith("dir"):
104 raise Config.ReadException("Unknown option '" + opt + "'.")
105 else:
106 dirs += [config.get(sec, opt)]
107 self.sets += [Config.FileSet(name, dirs)]
108 else:
109 raise Config.ReadException("Unknown section '" + sec + "'.")
110
111 # Compute checksum of config file
112 m = hashlib.sha1()
113 f = open(filename, 'rb')
114 try:
115 m.update(f.read())
116 self.checksum = m.hexdigest()
117 finally:
118 f.close()
119
120 try:
121 f = open(os.path.join(self.directory, self.checksumfn), 'r')
122 self.lastchecksum = f.read().strip()
123 f.close()
124 except IOError:
125 self.lastchecksum = None
126
127
128 Mode = ["full", "incr", "diff"]
129
130 Epoch = { "hour" : datetime.timedelta(0, 3600), \
131 "day" : datetime.timedelta(1), \
132 "week" : datetime.timedelta(7), \
133 "month" : datetime.timedelta(30), \
134 "year" : datetime.timedelta(365) }
135
136 class Backup:
137 """A single backup has a date, an epoch and a mode."""
138
139 def __init__(self, date, epoch, mode):
140 self.date = date
141 self.epoch = epoch
142 self.mode = mode
143
144 def __str__(self):
145 return "[date: " + self.date.ctime() + \
146 ", epoch: " + self.epoch + \
147 ", mode: " + self.mode + "]"
148
149 @staticmethod
150 def getDirName(date, epoch, mode):
151 """Get directory name of backup by given properties."""
152 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
153
154 @staticmethod
155 def isBackupDir(dirname):
156 """Is directory a backup directory?"""
157 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
158 return p.match(dirname)
159
160
161 class BackupManager:
162 """List and create backups"""
163
164 def __init__(self, conffn):
165 self.conf = Config()
166 self.conf.read(conffn)
167
168
169 def listAllDirs(self):
170 """List all dirs in destination directory"""
171
172 # Get all entries
173 basedir = self.conf.directory
174 dirs = os.listdir(basedir)
175 # Filter directories
176 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
177
178 def listOldBackups(self):
179 """Returns a list of old backups."""
180
181 backups = []
182
183 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
184 [strdate, strtime, epoch, mode] = entry.split("-")
185
186 if not epoch in Epoch.keys():
187 raise ValueError("Invalid epoch: " + epoch)
188
189 if not mode in Mode:
190 raise ValueError("Invalid mode: " + mode)
191
192 date = datetime.datetime(int(strdate[0:4]),
193 int(strdate[4:6]), int(strdate[6:8]),\
194 int(strtime[0:2]), int(strtime[2:4]))
195 backups += [ Backup(date, epoch, mode) ]
196
197 return backups
198
199
200 def getDesiredEpoch(self, backups, now):
201 """Get desired epoch based on self.configuration and list of old backups"""
202
203 # Find the longest epoch for which we would like the make a backup
204 latest = datetime.datetime(1900, 1, 1)
205 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in Epoch ] )):
206 # We make backups of that epoch
207 if self.conf.epochkeeps[e] == 0:
208 continue
209
210 # Get backups of that epoch
211 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
212 key=lambda b: b.date))
213
214 # If there are any, determine the latest
215 if len(byepoch) > 0:
216 latest = max(latest, byepoch[-1].date )
217
218 # the latest backup is too old
219 if now-latest > timespan:
220 return e
221
222 # No backup is to be made
223 return None
224
225
226
227 def backupFileSet(self, fileset, targetdir, since=None):
228 """Create an archive for given fileset at given target directory."""
229
230 print("Running file set: " + fileset.name)
231 tarpath = "/bin/tar"
232 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
233
234 taropts = ["-cpva"]
235
236 if since != None:
237 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
238
239 for pat in self.conf.exclpatterns:
240 taropts += ["--exclude", pat]
241
242 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
243 print("tarargs: ", tarargs)
244 tarp = subprocess.Popen( tarargs, \
245 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
246
247 while tarp.poll():
248 l = tarp.stdout.readline()
249 if len(l) > 0:
250 print(l.decode(), end="")
251 l = tarp.stderr.readline()
252 if len(l) > 0:
253 print(l.decode(), end="")
254
255 for l in tarp.stdout.readlines():
256 print(l.decode(), end="")
257
258 for l in tarp.stderr.readlines():
259 print(l.decode(), end="")
260
261 rett = tarp.wait()
262 if rett != 0:
263 print(tarpath + " returned with exit status " + str(rett) + ":")
264 print( tarp.stderr.read().decode() )
265
266
267 def backup(self):
268 """Make a new backup, if necessary"""
269
270 now = datetime.datetime.now()
271 oldbackups = self.listOldBackups()
272 epoch = self.getDesiredEpoch(oldbackups, now)
273
274 if epoch == None:
275 print("No backup planned.")
276 return
277
278
279 # Get mode of backup
280 mode = self.conf.epochmodes[epoch]
281 print("Making a backup. Epoch: " + epoch + ", mode: " + mode)
282
283 oldfullbackups = [ b for b in oldbackups if b.mode=="full" ]
284
285 # No old full backups existing
286 if mode != "full" and len(oldfullbackups)==0:
287 print("No full backups existing. Making a full backup.")
288
289 # Checksum changed -> self.config file changed
290 if self.conf.checksum != self.conf.lastchecksum:
291 print("Config file changed since last time.")
292 if mode != "full":
293 print("** Warning: full backup recommended!")
294
295 # Create new target directory
296 basedir = self.conf.directory
297 dirname = Backup.getDirName(now, epoch, mode)
298 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
299 targetdir = os.path.join(basedir, tmpdirname)
300 os.mkdir( targetdir )
301
302 # If we have a full backup, we backup everything
303 since = None
304
305 # Get latest full backup time
306 if mode == "diff":
307 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
308 # Get latest backup time
309 elif mode == "incr":
310 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
311
312 # Backup all file sets
313 for s in self.conf.sets:
314 self.backupFileSet(s, targetdir, since)
315
316 # Rename backup directory to final name
317 os.rename( targetdir, os.path.join(basedir, dirname) )
318
319 # We made a full backup -- recall checksum of config
320 if mode == "full":
321 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
322 f.write( self.conf.checksum )
323 f.close()
324
325
326 def prune(self):
327 """Prune old backup files"""
328
329 # Collect all directories not matching backup name
330 dirs = [ d for d in self.listAllDirs() if not Backup.isBackupDir(d) ]
331
332 # Get all directories which are outdated
333 backups = self.listOldBackups()
334 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
335 key=lambda b : b.date, reverse=True)) for e in Epoch }
336 for e in byepoch:
337 keep = self.conf.epochkeeps[e]
338 old = byepoch[e][keep:]
339 dirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
340
341 if len(dirs) == 0:
342 print("No stale/outdated entries to remove.")
343 return
344
345 print("List of stale/outdated entries:")
346 for d in dirs:
347 print(" " + d)
348
349 basedir = self.conf.directory
350 yesno = input("Remove listed entries? [y, N] ")
351 if yesno == "y":
352 for d in dirs:
353 shutil.rmtree(os.path.join(basedir, d))
354
355
356 def printUsage():
357 """Print --help text"""
358
359 print("shbackup - a simple backup solution.")
360 print("")
361 print("Usage:")
362 print(" " + sys.argv[0] + " [-C <configfile>")
363 print(" " + sys.argv[0] + " --help")
364 print("")
365 print("Options:")
366 print(" -C <configfile> default: /etc/shbackup.conf")
367
368
369 if __name__ == "__main__":
370
371 conffn = "/etc/shbackup.conf"
372
373 i=0
374 while i < len(sys.argv)-1:
375 i += 1
376 opt = sys.argv[i]
377
378 if opt in ["-h", "--help"]:
379 printUsage()
380 exit(0)
381
382 elif opt in ["-C", "--config"]:
383 i += 1
384 conffn = sys.argv[i]
385 continue
386
387 else:
388 print("Unknown option: " + opt)
389 exit(1)
390
391 try:
392 man = BackupManager(conffn)
393 man.backup()
394 man.prune()
395
396 except Config.ReadException as e:
397 print("Error reading config file: ", end="")
398 for a in e.args:
399 print(a, end=" ")
400 print()
401
402
403
404