ask before making backup
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 import datetime
5 import os, shutil, sys
6 import configparser
7 import hashlib
8 import subprocess
9 import random, re
10
11
12 Mode = ["full", "incr", "diff"]
13
14 RealEpoch = { \
15 "hour" : datetime.timedelta(0, 3600), \
16 "day" : datetime.timedelta(1), \
17 "week" : datetime.timedelta(7), \
18 "month" : datetime.timedelta(30), \
19 "year" : datetime.timedelta(365) }
20
21 Epoch = dict(RealEpoch, **{ \
22 "sporadic" : datetime.timedelta(0,0) \
23 })
24
25
26 class Backup:
27 """A single backup has a date, an epoch and a mode."""
28
29 def __init__(self, date, epoch, mode):
30 self.date = date
31 self.epoch = epoch
32 self.mode = mode
33
34 @staticmethod
35 def fromDirName(dirname):
36 [strdate, strtime, epoch, mode] = dirname.split("-")
37
38 if not epoch in Epoch.keys():
39 raise ValueError("Invalid epoch: " + epoch)
40
41 if not mode in Mode:
42 raise ValueError("Invalid mode: " + mode)
43
44 date = datetime.datetime(int(strdate[0:4]),
45 int(strdate[4:6]), int(strdate[6:8]),\
46 int(strtime[0:2]), int(strtime[2:4]))
47
48 return Backup(date, epoch, mode)
49
50 def __str__(self):
51 return "[date: " + self.date.ctime() + \
52 ", epoch: " + self.epoch + \
53 ", mode: " + self.mode + "]"
54
55 def colAlignedString(self):
56 return "%16s %8s %4s" % ( \
57 self.date.strftime("%Y-%m-%d %H:%M"), self.epoch, self.mode)
58
59 @staticmethod
60 def getDirName(date, epoch, mode):
61 """Get directory name of backup by given properties."""
62 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
63
64 @staticmethod
65 def isBackupDir(dirname):
66 """Is directory a backup directory?"""
67 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
68 return p.match(dirname)
69
70
71
72 class Config:
73 """Encapsules the configuration for the backup program."""
74
75 class ReadError(RuntimeError):
76 """An exception raised when reading configurations."""
77 def __init__(self, value):
78 self.value = value
79 self.message = value
80
81 class FileSet:
82 """A fileset has a name and a list of directories."""
83 def __init__(self, name, dirs):
84 self.name = name
85 self.dirs = dirs
86
87 def __str__(self):
88 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
89
90 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
91
92 # Filename where checksum of config is saved
93 checksumfn = "checksum"
94
95 def __init__(self):
96 self.directory = "/media/backup"
97 self.format = self.formats[0]
98 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
99 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
100 self.exclpatterns = []
101 self.sets = []
102 self.checksum = None
103 self.lastchecksum = None
104
105 def __str__(self):
106 return "[directory: " + self.directory + \
107 ", format: " + self.format + \
108 ", keeps: " + str(self.epochkeeps) + \
109 ", modes: " + str(self.epochmodes) + \
110 ", exclpatterns: " + str(self.exclpatterns) + \
111 ", sets: " + str([str(s) for s in self.sets]) + "]"
112
113 def read(self, filename):
114 """Read configuration from file"""
115
116 if not os.path.isfile(filename):
117 raise Config.ReadError("Cannot read config file '" + filename + "'.")
118
119 config = configparser.RawConfigParser()
120 config.read(filename)
121
122 for reqsec in ["destination"]:
123 if not config.has_section(reqsec):
124 raise Config.ReadError("Section '" + reqsec + "' is missing.")
125
126 self.directory = config.get("destination", "directory")
127 if not os.path.isdir(self.directory):
128 raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
129
130 self.format = config.get("destination", "format")
131 if not self.format in Config.formats:
132 raise Config.ReadError("Invalid 'format' given.")
133
134
135 if config.has_section("history"):
136 for opt in config.options("history"):
137 if opt.startswith("keep"):
138 epoch = opt[4:]
139 if not epoch in RealEpoch.keys():
140 raise Config.ReadError("Invalid option 'keep" + epoch + "'.")
141 try:
142 self.epochkeeps[epoch] = int(config.getint("history", opt))
143 except ValueError:
144 raise Config.ReadError("Invalid integer given for '" + opt + "'.")
145 elif opt.startswith("mode"):
146 epoch = opt[4:]
147 if not epoch in RealEpoch.keys():
148 raise Config.ReadError("Invalid option 'mode" + epoch + "'.")
149 self.epochmodes[epoch] = config.get("history", opt)
150 if not self.epochmodes[epoch] in Mode:
151 raise Config.ReadError("Invalid mode given.")
152 else:
153 raise Config.ReadError("Invalid option '" + opt + "'.")
154
155 if config.has_section("input"):
156 for opt in config.options("input"):
157 if opt.startswith("exclude"):
158 self.exclpatterns += [ config.get("input", opt) ]
159 else:
160 raise Config.ReadError("Invalid option '" + opt + "'.")
161
162 for sec in config.sections():
163 if sec in ["destination", "history", "input"]:
164 continue
165 elif sec.startswith("set "):
166 name = sec[4:].strip()
167 dirs = []
168
169 for opt in config.options(sec):
170 if not opt.startswith("dir"):
171 raise Config.ReadError("Unknown option '" + opt + "'.")
172 else:
173 dirs += [config.get(sec, opt)]
174 self.sets += [Config.FileSet(name, dirs)]
175 else:
176 raise Config.ReadError("Unknown section '" + sec + "'.")
177
178 # Compute checksum of config file
179 m = hashlib.sha1()
180 f = open(filename, 'rb')
181 try:
182 m.update(f.read())
183 self.checksum = m.hexdigest()
184 finally:
185 f.close()
186
187 try:
188 f = open(os.path.join(self.directory, self.checksumfn), 'r')
189 self.lastchecksum = f.read().strip()
190 f.close()
191 except IOError:
192 self.lastchecksum = None
193
194
195 class BackupManager:
196 """List and create backups"""
197
198 def __init__(self, conffn, alwaysyes):
199 self.conf = Config()
200 self.alwaysyes = alwaysyes
201 self.conf.read(conffn)
202
203
204 def listAllDirs(self):
205 """List all dirs in destination directory"""
206
207 # Get all entries
208 basedir = self.conf.directory
209 dirs = os.listdir(basedir)
210 # Filter directories
211 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
212
213
214 def listOldBackups(self):
215 """Returns a list of old backups."""
216
217 backups = []
218
219 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
220 backups += [ Backup.fromDirName(entry) ]
221
222 return backups
223
224
225 def getDesiredEpoch(self, backups, now):
226 """Get desired epoch based on self.configuration and list of old backups"""
227
228 # Find the longest epoch for which we would like the make a backup
229 latest = datetime.datetime(1900, 1, 1)
230 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
231 # We make backups of that epoch
232 if self.conf.epochkeeps[e] == 0:
233 continue
234
235 # Get backups of that epoch
236 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
237 key=lambda b: b.date))
238
239 # If there are any, determine the latest
240 if len(byepoch) > 0:
241 latest = max(latest, byepoch[-1].date )
242
243 # the latest backup is too old
244 if now-latest > timespan:
245 return e
246
247 # No backup is to be made
248 return None
249
250
251
252 def backupFileSet(self, fileset, targetdir, since=None):
253 """Create an archive for given fileset at given target directory."""
254
255 print("Running file set: " + fileset.name)
256 tarpath = "/bin/tar"
257 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
258
259 taropts = ["-cpva"]
260
261 if since != None:
262 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
263
264 for pat in self.conf.exclpatterns:
265 taropts += ["--exclude", pat]
266
267 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
268 #print("tarargs: ", tarargs)
269 tarp = subprocess.Popen( tarargs )
270
271 rett = tarp.wait()
272 if rett != 0:
273 print(tarpath + " returned with exit status " + str(rett) + ":")
274
275
276 def backup(self, epoch=None, mode=None):
277 """Make a new backup, if necessary. If epoch is None then determine
278 desired epoch automatically. Use given epoch otherwise. If mode is None
279 then use mode for given epoch. Use given mode otherwise."""
280
281 now = datetime.datetime.now()
282 oldbackups = self.listOldBackups()
283
284 # Get epoch of backup
285 if epoch == None:
286 epoch = self.getDesiredEpoch(oldbackups, now)
287 if epoch == None:
288 print("No backup planned.")
289 return
290
291 # Get mode of backup
292 if mode == None:
293 mode = self.conf.epochmodes[epoch]
294 print("Making a backup. Epoch: " + epoch + ", mode: " + mode)
295
296 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
297
298 # No old full backups existing
299 if mode != "full" and len(oldfullbackups)==0:
300 print("No full backups existing. Making a full backup.")
301
302 # Checksum changed -> self.config file changed
303 if self.conf.checksum != self.conf.lastchecksum:
304 print("Config file changed since last time.")
305 if mode != "full":
306 print("** Warning: full backup recommended!")
307
308
309 # If we have a full backup, we backup everything
310 since = None
311 if mode == "diff":
312 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
313 elif mode == "incr":
314 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
315
316 if since != None:
317 print("Making backup relative to ", since.ctime())
318
319 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
320 if yesno == "n":
321 return
322
323 # Create new target directory
324 basedir = self.conf.directory
325 dirname = Backup.getDirName(now, epoch, mode)
326 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
327 targetdir = os.path.join(basedir, tmpdirname)
328 os.mkdir( targetdir )
329
330
331 # Backup all file sets
332 for s in self.conf.sets:
333 self.backupFileSet(s, targetdir, since)
334
335 # Rename backup directory to final name
336 os.rename( targetdir, os.path.join(basedir, dirname) )
337
338 # We made a full backup -- recall checksum of config
339 if mode == "full":
340 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
341 f.write( self.conf.checksum )
342 f.close()
343
344
345 def prune(self):
346 """Prune old backup files"""
347
348 allDirs = self.listAllDirs()
349 # Collect all directories not matching backup name
350 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
351
352 # Get all directories which are kept
353 backups = self.listOldBackups()
354 keepdirs = []
355 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
356 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
357 for e in byepoch:
358 keep = self.conf.epochkeeps[e]
359 old = byepoch[e][keep:]
360 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
361
362
363 print("List of stale/outdated entries:")
364 for d in allDirs:
365 if d in removeDirs:
366 print("[*] ", end="")
367 else:
368 print("[ ] ", end="")
369
370 if Backup.isBackupDir(d):
371 print( Backup.fromDirName(d).colAlignedString())
372 else:
373 print(d)
374
375 # Check that dirs to be removed is in list of all dirs
376 for d in removeDirs:
377 assert( d in allDirs )
378
379 if len(removeDirs) == 0:
380 print("No stale/outdated entries to remove.")
381 return
382
383 basedir = self.conf.directory
384 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
385 if yesno == "y":
386 for d in removeDirs:
387 shutil.rmtree(os.path.join(basedir, d))
388
389 def ask_user_yesno(self, question):
390 if self.alwaysyes:
391 print(question + " y")
392 return "y"
393 else:
394 return input(question)
395
396
397 def printUsage():
398 """Print --help text"""
399
400 print("shbackup - a simple backup solution.")
401 print("")
402 print("Usage:")
403 print(" " + sys.argv[0] + " {options} [cmd]")
404 print(" " + sys.argv[0] + " --help")
405 print("")
406 print("Commands:")
407 print(" backup make a new backup, if necessary")
408 print(" list list all backups (default)")
409 print(" prune prune outdated/old backups")
410 print("")
411 print("Options:")
412 print(" -h, --help print this usage text")
413 print(" -c, --conf <configfile> use given configuration file")
414 print(" default: /etc/shbackup.conf")
415 print(" -e, --epoch <epoch> force to create backup for given epoch:")
416 print(" year, month, week, day, hour, sporadic")
417 print(" -m, --mode <mode> override mode: full, diff, or incr")
418 print(" -y, --yes always assume 'yes' when user is asked")
419
420
421 if __name__ == "__main__":
422
423 conffn = "/etc/shbackup.conf"
424 cmd = "list"
425 mode = None
426 epoch = None
427 yes = False
428
429 i = 0
430 while i < len(sys.argv)-1:
431 i += 1
432 opt = sys.argv[i]
433
434 if opt in ["-h", "--help"]:
435 printUsage()
436 exit(0)
437
438 elif opt in ["-c", "--conf"]:
439 i += 1
440 conffn = sys.argv[i]
441
442 elif opt in ["-y", "--yes"]:
443 yes = True
444
445 elif opt in ["-m", "--mode"]:
446 i += 1
447 mode = sys.argv[i]
448 if not mode in Mode:
449 print("Unknown mode '" + mode + "'.")
450 exit(1)
451
452 elif opt in ["-e", "--epoch"]:
453 i += 1
454 epoch = sys.argv[i]
455 if not epoch in Epoch:
456 print("Unknown epoch '" + epoch + "'.")
457 exit(1)
458
459
460 elif opt in ["backup", "list", "prune"]:
461 cmd = opt
462
463 else:
464 print("Unknown option: " + opt)
465 exit(1)
466
467 try:
468 man = BackupManager(conffn, yes)
469
470 if cmd == "backup":
471 man.backup(epoch, mode)
472
473 if cmd == "list":
474 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
475 print(b.colAlignedString())
476
477 if cmd == "prune":
478 man.prune()
479
480 except (Config.ReadError, configparser.DuplicateOptionError) as e:
481 print("Error reading config file: " + e.message)
482
483
484
485