Add option -y, prettier output
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 import datetime
5 import os, shutil, sys
6 import configparser
7 import hashlib
8 import subprocess
9 import random, re
10
11
12 Mode = ["full", "incr", "diff"]
13
14 RealEpoch = { \
15 "hour" : datetime.timedelta(0, 3600), \
16 "day" : datetime.timedelta(1), \
17 "week" : datetime.timedelta(7), \
18 "month" : datetime.timedelta(30), \
19 "year" : datetime.timedelta(365) }
20
21 Epoch = dict(RealEpoch, **{ \
22 "sporadic" : datetime.timedelta(0,0) \
23 })
24
25
26 class Backup:
27 """A single backup has a date, an epoch and a mode."""
28
29 def __init__(self, date, epoch, mode):
30 self.date = date
31 self.epoch = epoch
32 self.mode = mode
33
34 @staticmethod
35 def fromDirName(dirname):
36 [strdate, strtime, epoch, mode] = dirname.split("-")
37
38 if not epoch in Epoch.keys():
39 raise ValueError("Invalid epoch: " + epoch)
40
41 if not mode in Mode:
42 raise ValueError("Invalid mode: " + mode)
43
44 date = datetime.datetime(int(strdate[0:4]),
45 int(strdate[4:6]), int(strdate[6:8]),\
46 int(strtime[0:2]), int(strtime[2:4]))
47
48 return Backup(date, epoch, mode)
49
50 def __str__(self):
51 return "[date: " + self.date.ctime() + \
52 ", epoch: " + self.epoch + \
53 ", mode: " + self.mode + "]"
54
55 def colAlignedString(self):
56 return "%16s %8s %4s" % ( \
57 self.date.strftime("%Y-%m-%d %H:%M"), self.epoch, self.mode)
58
59 @staticmethod
60 def getDirName(date, epoch, mode):
61 """Get directory name of backup by given properties."""
62 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
63
64 @staticmethod
65 def isBackupDir(dirname):
66 """Is directory a backup directory?"""
67 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
68 return p.match(dirname)
69
70
71
72 class Config:
73 """Encapsules the configuration for the backup program."""
74
75 class ReadError(RuntimeError):
76 """An exception raised when reading configurations."""
77 def __init__(self, value):
78 self.value = value
79 self.message = value
80
81 class FileSet:
82 """A fileset has a name and a list of directories."""
83 def __init__(self, name, dirs):
84 self.name = name
85 self.dirs = dirs
86
87 def __str__(self):
88 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
89
90 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
91
92 # Filename where checksum of config is saved
93 checksumfn = "checksum"
94
95 def __init__(self):
96 self.directory = "/media/backup"
97 self.format = self.formats[0]
98 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
99 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
100 self.exclpatterns = []
101 self.sets = []
102 self.checksum = None
103 self.lastchecksum = None
104
105 def __str__(self):
106 return "[directory: " + self.directory + \
107 ", format: " + self.format + \
108 ", keeps: " + str(self.epochkeeps) + \
109 ", modes: " + str(self.epochmodes) + \
110 ", exclpatterns: " + str(self.exclpatterns) + \
111 ", sets: " + str([str(s) for s in self.sets]) + "]"
112
113 def read(self, filename):
114 """Read configuration from file"""
115
116 if not os.path.isfile(filename):
117 raise Config.ReadError("Cannot read config file '" + filename + "'.")
118
119 config = configparser.RawConfigParser()
120 config.read(filename)
121
122 for reqsec in ["destination"]:
123 if not config.has_section(reqsec):
124 raise Config.ReadError("Section '" + reqsec + "' is missing.")
125
126 self.directory = config.get("destination", "directory")
127 if not os.path.isdir(self.directory):
128 raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
129
130 self.format = config.get("destination", "format")
131 if not self.format in Config.formats:
132 raise Config.ReadError("Invalid 'format' given.")
133
134
135 if config.has_section("history"):
136 for opt in config.options("history"):
137 if opt.startswith("keep"):
138 epoch = opt[4:]
139 if not epoch in RealEpoch.keys():
140 raise Config.ReadError("Invalid option 'keep" + epoch + "'.")
141 try:
142 self.epochkeeps[epoch] = int(config.getint("history", opt))
143 except ValueError:
144 raise Config.ReadError("Invalid integer given for '" + opt + "'.")
145 elif opt.startswith("mode"):
146 epoch = opt[4:]
147 if not epoch in RealEpoch.keys():
148 raise Config.ReadError("Invalid option 'mode" + epoch + "'.")
149 self.epochmodes[epoch] = config.get("history", opt)
150 if not self.epochmodes[epoch] in Mode:
151 raise Config.ReadError("Invalid mode given.")
152 else:
153 raise Config.ReadError("Invalid option '" + opt + "'.")
154
155 if config.has_section("input"):
156 for opt in config.options("input"):
157 if opt.startswith("exclude"):
158 self.exclpatterns += [ config.get("input", opt) ]
159 else:
160 raise Config.ReadError("Invalid option '" + opt + "'.")
161
162 for sec in config.sections():
163 if sec in ["destination", "history", "input"]:
164 continue
165 elif sec.startswith("set "):
166 name = sec[4:].strip()
167 dirs = []
168
169 for opt in config.options(sec):
170 if not opt.startswith("dir"):
171 raise Config.ReadError("Unknown option '" + opt + "'.")
172 else:
173 dirs += [config.get(sec, opt)]
174 self.sets += [Config.FileSet(name, dirs)]
175 else:
176 raise Config.ReadError("Unknown section '" + sec + "'.")
177
178 # Compute checksum of config file
179 m = hashlib.sha1()
180 f = open(filename, 'rb')
181 try:
182 m.update(f.read())
183 self.checksum = m.hexdigest()
184 finally:
185 f.close()
186
187 try:
188 f = open(os.path.join(self.directory, self.checksumfn), 'r')
189 self.lastchecksum = f.read().strip()
190 f.close()
191 except IOError:
192 self.lastchecksum = None
193
194
195 class BackupManager:
196 """List and create backups"""
197
198 def __init__(self, conffn, alwaysyes):
199 self.conf = Config()
200 self.alwaysyes = alwaysyes
201 self.conf.read(conffn)
202
203
204 def listAllDirs(self):
205 """List all dirs in destination directory"""
206
207 # Get all entries
208 basedir = self.conf.directory
209 dirs = os.listdir(basedir)
210 # Filter directories
211 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
212
213
214 def listOldBackups(self):
215 """Returns a list of old backups."""
216
217 backups = []
218
219 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
220 backups += [ Backup.fromDirName(entry) ]
221
222 return backups
223
224
225 def getDesiredEpoch(self, backups, now):
226 """Get desired epoch based on self.configuration and list of old backups"""
227
228 # Find the longest epoch for which we would like the make a backup
229 latest = datetime.datetime(1900, 1, 1)
230 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
231 # We make backups of that epoch
232 if self.conf.epochkeeps[e] == 0:
233 continue
234
235 # Get backups of that epoch
236 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
237 key=lambda b: b.date))
238
239 # If there are any, determine the latest
240 if len(byepoch) > 0:
241 latest = max(latest, byepoch[-1].date )
242
243 # the latest backup is too old
244 if now-latest > timespan:
245 return e
246
247 # No backup is to be made
248 return None
249
250
251
252 def backupFileSet(self, fileset, targetdir, since=None):
253 """Create an archive for given fileset at given target directory."""
254
255 print("Running file set: " + fileset.name)
256 tarpath = "/bin/tar"
257 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
258
259 taropts = ["-cpva"]
260
261 if since != None:
262 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
263
264 for pat in self.conf.exclpatterns:
265 taropts += ["--exclude", pat]
266
267 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
268 #print("tarargs: ", tarargs)
269 tarp = subprocess.Popen( tarargs )
270
271 rett = tarp.wait()
272 if rett != 0:
273 print(tarpath + " returned with exit status " + str(rett) + ":")
274
275
276 def backup(self, epoch=None, mode=None):
277 """Make a new backup, if necessary. If epoch is None then determine
278 desired epoch automatically. Use given epoch otherwise. If mode is None
279 then use mode for given epoch. Use given mode otherwise."""
280
281 now = datetime.datetime.now()
282 oldbackups = self.listOldBackups()
283
284 # Get epoch of backup
285 if epoch == None:
286 epoch = self.getDesiredEpoch(oldbackups, now)
287 if epoch == None:
288 print("No backup planned.")
289 return
290
291 # Get mode of backup
292 if mode == None:
293 mode = self.conf.epochmodes[epoch]
294 print("Making a backup. Epoch: " + epoch + ", mode: " + mode)
295
296 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
297
298 # No old full backups existing
299 if mode != "full" and len(oldfullbackups)==0:
300 print("No full backups existing. Making a full backup.")
301
302 # Checksum changed -> self.config file changed
303 if self.conf.checksum != self.conf.lastchecksum:
304 print("Config file changed since last time.")
305 if mode != "full":
306 print("** Warning: full backup recommended!")
307
308 # Create new target directory
309 basedir = self.conf.directory
310 dirname = Backup.getDirName(now, epoch, mode)
311 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
312 targetdir = os.path.join(basedir, tmpdirname)
313 os.mkdir( targetdir )
314
315 # If we have a full backup, we backup everything
316 since = None
317
318 # Get latest full backup time
319 if mode == "diff":
320 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
321 # Get latest backup time
322 elif mode == "incr":
323 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
324
325 # Backup all file sets
326 for s in self.conf.sets:
327 self.backupFileSet(s, targetdir, since)
328
329 # Rename backup directory to final name
330 os.rename( targetdir, os.path.join(basedir, dirname) )
331
332 # We made a full backup -- recall checksum of config
333 if mode == "full":
334 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
335 f.write( self.conf.checksum )
336 f.close()
337
338
339 def prune(self):
340 """Prune old backup files"""
341
342 allDirs = self.listAllDirs()
343 # Collect all directories not matching backup name
344 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
345
346 # Get all directories which are kept
347 backups = self.listOldBackups()
348 keepdirs = []
349 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
350 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
351 for e in byepoch:
352 keep = self.conf.epochkeeps[e]
353 old = byepoch[e][keep:]
354 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
355
356
357 print("List of stale/outdated entries:")
358 for d in allDirs:
359 if d in removeDirs:
360 print("[*] ", end="")
361 else:
362 print("[ ] ", end="")
363
364 if Backup.isBackupDir(d):
365 print( Backup.fromDirName(d).colAlignedString())
366 else:
367 print(d)
368
369 # Check that dirs to be removed is in list of all dirs
370 for d in removeDirs:
371 assert( d in allDirs )
372
373 if len(removeDirs) == 0:
374 print("No stale/outdated entries to remove.")
375 return
376
377 basedir = self.conf.directory
378 yesno = self.ask_user_yesno("Remove entries marked by '*'?")
379 if yesno == "y":
380 for d in removeDirs:
381 shutil.rmtree(os.path.join(basedir, d))
382
383 def ask_user_yesno(self, question):
384 if self.alwaysyes:
385 print(question + " y")
386 return "y"
387 else:
388 return input(question + " [y, N] ")
389
390
391 def printUsage():
392 """Print --help text"""
393
394 print("shbackup - a simple backup solution.")
395 print("")
396 print("Usage:")
397 print(" " + sys.argv[0] + " {options} [cmd]")
398 print(" " + sys.argv[0] + " --help")
399 print("")
400 print("Commands:")
401 print(" backup make a new backup, if necessary")
402 print(" list list all backups (default)")
403 print(" prune prune outdated/old backups")
404 print("")
405 print("Options:")
406 print(" -h, --help print this usage text")
407 print(" -c, --conf <configfile> use given configuration file")
408 print(" default: /etc/shbackup.conf")
409 print(" -e, --epoch <epoch> force to create backup for given epoch:")
410 print(" year, month, week, day, hour, sporadic")
411 print(" -m, --mode <mode> override mode: full, diff, or incr")
412 print(" -y, --yes always assume 'yes' when user is asked")
413
414
415 if __name__ == "__main__":
416
417 conffn = "/etc/shbackup.conf"
418 cmd = "list"
419 mode = None
420 epoch = None
421 yes = False
422
423 i = 0
424 while i < len(sys.argv)-1:
425 i += 1
426 opt = sys.argv[i]
427
428 if opt in ["-h", "--help"]:
429 printUsage()
430 exit(0)
431
432 elif opt in ["-c", "--conf"]:
433 i += 1
434 conffn = sys.argv[i]
435
436 elif opt in ["-y", "--yes"]:
437 yes = True
438
439 elif opt in ["-m", "--mode"]:
440 i += 1
441 mode = sys.argv[i]
442 if not mode in Mode:
443 print("Unknown mode '" + mode + "'.")
444 exit(1)
445
446 elif opt in ["-e", "--epoch"]:
447 i += 1
448 epoch = sys.argv[i]
449 if not epoch in Epoch:
450 print("Unknown epoch '" + epoch + "'.")
451 exit(1)
452
453
454 elif opt in ["backup", "list", "prune"]:
455 cmd = opt
456
457 else:
458 print("Unknown option: " + opt)
459 exit(1)
460
461 try:
462 man = BackupManager(conffn, yes)
463
464 if cmd == "backup":
465 man.backup(epoch, mode)
466
467 if cmd == "list":
468 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
469 print(b.colAlignedString())
470
471 if cmd == "prune":
472 man.prune()
473
474 except (Config.ReadError, configparser.DuplicateOptionError) as e:
475 print("Error reading config file: " + e.message)
476
477
478
479