write log file and print tar's stderr afterwards
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 import datetime
5 import os, shutil, sys
6 import configparser
7 import hashlib
8 import subprocess
9 import random, re
10
11
12 Mode = ["full", "incr", "diff"]
13
14 RealEpoch = { \
15 "hour" : datetime.timedelta(0, 3600), \
16 "day" : datetime.timedelta(1), \
17 "week" : datetime.timedelta(7), \
18 "month" : datetime.timedelta(30), \
19 "year" : datetime.timedelta(365) }
20
21 Epoch = dict(RealEpoch, **{ \
22 "sporadic" : datetime.timedelta(0,0) \
23 })
24
25
26 class Backup:
27 """A single backup has a date, an epoch and a mode."""
28
29 def __init__(self, date, epoch, mode):
30 self.date = date
31 self.epoch = epoch
32 self.mode = mode
33
34 @staticmethod
35 def fromDirName(dirname):
36 [strdate, strtime, epoch, mode] = dirname.split("-")
37
38 if not epoch in Epoch.keys():
39 raise ValueError("Invalid epoch: " + epoch)
40
41 if not mode in Mode:
42 raise ValueError("Invalid mode: " + mode)
43
44 date = datetime.datetime(int(strdate[0:4]),
45 int(strdate[4:6]), int(strdate[6:8]),\
46 int(strtime[0:2]), int(strtime[2:4]))
47
48 return Backup(date, epoch, mode)
49
50 def __str__(self):
51 return "[date: " + self.date.ctime() + \
52 ", epoch: " + self.epoch + \
53 ", mode: " + self.mode + "]"
54
55 def colAlignedString(self):
56 return "%16s %8s %4s" % ( \
57 self.date.strftime("%Y-%m-%d %H:%M"), self.epoch, self.mode)
58
59 @staticmethod
60 def getDirName(date, epoch, mode):
61 """Get directory name of backup by given properties."""
62 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
63
64 @staticmethod
65 def isBackupDir(dirname):
66 """Is directory a backup directory?"""
67 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
68 return p.match(dirname)
69
70
71
72 class Config:
73 """Encapsules the configuration for the backup program."""
74
75 class ReadError(RuntimeError):
76 """An exception raised when reading configurations."""
77 def __init__(self, value):
78 self.value = value
79 self.message = value
80
81 class FileSet:
82 """A fileset has a name and a list of directories."""
83 def __init__(self, name, dirs):
84 self.name = name
85 self.dirs = dirs
86
87 def __str__(self):
88 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
89
90 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
91
92 # Filename where checksum of config is saved
93 checksumfn = "checksum"
94
95 def __init__(self):
96 self.directory = "/media/backup"
97 self.format = self.formats[0]
98 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
99 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
100 self.exclpatterns = []
101 self.sets = []
102 self.checksum = None
103 self.lastchecksum = None
104
105 def __str__(self):
106 return "[directory: " + self.directory + \
107 ", format: " + self.format + \
108 ", keeps: " + str(self.epochkeeps) + \
109 ", modes: " + str(self.epochmodes) + \
110 ", exclpatterns: " + str(self.exclpatterns) + \
111 ", sets: " + str([str(s) for s in self.sets]) + "]"
112
113 def read(self, filename):
114 """Read configuration from file"""
115
116 if not os.path.isfile(filename):
117 raise Config.ReadError("Cannot read config file '" + filename + "'.")
118
119 config = configparser.RawConfigParser()
120 config.read(filename)
121
122 for reqsec in ["destination"]:
123 if not config.has_section(reqsec):
124 raise Config.ReadError("Section '" + reqsec + "' is missing.")
125
126 self.directory = config.get("destination", "directory")
127 if not os.path.isdir(self.directory):
128 raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
129
130 self.format = config.get("destination", "format")
131 if not self.format in Config.formats:
132 raise Config.ReadError("Invalid 'format' given.")
133
134
135 if config.has_section("history"):
136 for opt in config.options("history"):
137 if opt.startswith("keep"):
138 epoch = opt[4:]
139 if not epoch in RealEpoch.keys():
140 raise Config.ReadError("Invalid option 'keep" + epoch + "'.")
141 try:
142 self.epochkeeps[epoch] = int(config.getint("history", opt))
143 except ValueError:
144 raise Config.ReadError("Invalid integer given for '" + opt + "'.")
145 elif opt.startswith("mode"):
146 epoch = opt[4:]
147 if not epoch in RealEpoch.keys():
148 raise Config.ReadError("Invalid option 'mode" + epoch + "'.")
149 self.epochmodes[epoch] = config.get("history", opt)
150 if not self.epochmodes[epoch] in Mode:
151 raise Config.ReadError("Invalid mode given.")
152 else:
153 raise Config.ReadError("Invalid option '" + opt + "'.")
154
155 if config.has_section("input"):
156 for opt in config.options("input"):
157 if opt.startswith("exclude"):
158 self.exclpatterns += [ config.get("input", opt) ]
159 else:
160 raise Config.ReadError("Invalid option '" + opt + "'.")
161
162 for sec in config.sections():
163 if sec in ["destination", "history", "input"]:
164 continue
165 elif sec.startswith("set "):
166 name = sec[4:].strip()
167 dirs = []
168
169 for opt in config.options(sec):
170 if not opt.startswith("dir"):
171 raise Config.ReadError("Unknown option '" + opt + "'.")
172 else:
173 dirs += [config.get(sec, opt)]
174 self.sets += [Config.FileSet(name, dirs)]
175 else:
176 raise Config.ReadError("Unknown section '" + sec + "'.")
177
178 # Compute checksum of config file
179 m = hashlib.sha1()
180 f = open(filename, 'rb')
181 try:
182 m.update(f.read())
183 self.checksum = m.hexdigest()
184 finally:
185 f.close()
186
187 try:
188 f = open(os.path.join(self.directory, self.checksumfn), 'r')
189 self.lastchecksum = f.read().strip()
190 f.close()
191 except IOError:
192 self.lastchecksum = None
193
194
195 class BackupManager:
196 """List and create backups"""
197
198 def __init__(self, conffn, alwaysyes):
199 self.conf = Config()
200 self.alwaysyes = alwaysyes
201 self.conf.read(conffn)
202
203
204 def listAllDirs(self):
205 """List all dirs in destination directory"""
206
207 # Get all entries
208 basedir = self.conf.directory
209 dirs = os.listdir(basedir)
210 # Filter directories
211 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
212
213
214 def listOldBackups(self):
215 """Returns a list of old backups."""
216
217 backups = []
218
219 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
220 backups += [ Backup.fromDirName(entry) ]
221
222 return backups
223
224
225 def getDesiredEpoch(self, backups, now):
226 """Get desired epoch based on self.configuration and list of old backups"""
227
228 # Find the longest epoch for which we would like the make a backup
229 latest = datetime.datetime(1900, 1, 1)
230 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
231 # We make backups of that epoch
232 if self.conf.epochkeeps[e] == 0:
233 continue
234
235 # Get backups of that epoch
236 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
237 key=lambda b: b.date))
238
239 # If there are any, determine the latest
240 if len(byepoch) > 0:
241 latest = max(latest, byepoch[-1].date )
242
243 # the latest backup is too old
244 if now-latest > timespan:
245 return e
246
247 # No backup is to be made
248 return None
249
250
251
252 def backupFileSet(self, fileset, targetdir, log, since=None):
253 """Create an archive for given fileset at given target directory."""
254
255 print("Running file set: " + fileset.name)
256 tarpath = "/bin/tar"
257 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
258
259 taropts = ["-cpva"]
260
261 if since != None:
262 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
263
264 for pat in self.conf.exclpatterns:
265 taropts += ["--exclude", pat]
266
267 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
268 #print("tarargs: ", tarargs)
269 print("tar call: " + " ".join(tarargs), file=log)
270 tarp = subprocess.Popen( tarargs, stderr=subprocess.PIPE )
271
272 rett = tarp.wait()
273 if rett != 0:
274 sys.stderr.write( tarp.stderr.read() )
275 msg = tarpath + " returned with exit status " + str(rett) + "."
276 print(msg)
277 print(msg, log)
278
279
280 def backup(self, epoch=None, mode=None):
281 """Make a new backup, if necessary. If epoch is None then determine
282 desired epoch automatically. Use given epoch otherwise. If mode is None
283 then use mode for given epoch. Use given mode otherwise."""
284
285 now = datetime.datetime.now()
286 oldbackups = self.listOldBackups()
287
288 # Get epoch of backup
289 if epoch == None:
290 epoch = self.getDesiredEpoch(oldbackups, now)
291 if epoch == None:
292 print("No backup planned.")
293 return
294
295 # Get mode of backup
296 if mode == None:
297 mode = self.conf.epochmodes[epoch]
298 print("Making a backup. Epoch: " + epoch + ", mode: " + mode)
299
300 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
301
302 # No old full backups existing
303 if mode != "full" and len(oldfullbackups)==0:
304 print("No full backups existing. Making a full backup.")
305
306 # Checksum changed -> self.config file changed
307 if self.conf.checksum != self.conf.lastchecksum:
308 print("Config file changed since last time.")
309 if mode != "full":
310 print("** Warning: full backup recommended!")
311
312
313 # If we have a full backup, we backup everything
314 since = None
315 if mode == "diff":
316 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
317 elif mode == "incr":
318 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
319
320 if since != None:
321 print("Making backup relative to ", since.ctime())
322
323 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
324 if yesno == "n":
325 return
326
327 # Create new target directory
328 basedir = self.conf.directory
329 dirname = Backup.getDirName(now, epoch, mode)
330 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
331 targetdir = os.path.join(basedir, tmpdirname)
332 os.mkdir( targetdir )
333
334
335 log = open(os.path.join(targetdir, "log.log"), 'w')
336 print("Started: " + now.ctime(), file=log)
337
338 # Backup all file sets
339 for s in self.conf.sets:
340 self.backupFileSet(s, targetdir, log, since)
341
342 print("Stopped: " + datetime.datetime.now().ctime(), file=log)
343 log.close()
344
345 # Rename backup directory to final name
346 os.rename( targetdir, os.path.join(basedir, dirname) )
347
348 # We made a full backup -- recall checksum of config
349 if mode == "full":
350 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
351 f.write( self.conf.checksum )
352 f.close()
353
354
355
356 def prune(self):
357 """Prune old backup files"""
358
359 allDirs = self.listAllDirs()
360 # Collect all directories not matching backup name
361 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
362
363 # Get all directories which are kept
364 backups = self.listOldBackups()
365 keepdirs = []
366 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
367 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
368 for e in byepoch:
369 keep = self.conf.epochkeeps[e]
370 old = byepoch[e][keep:]
371 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
372
373
374 print("List of stale/outdated entries:")
375 for d in allDirs:
376 if d in removeDirs:
377 print("[*] ", end="")
378 else:
379 print("[ ] ", end="")
380
381 if Backup.isBackupDir(d):
382 print( Backup.fromDirName(d).colAlignedString())
383 else:
384 print(d)
385
386 # Check that dirs to be removed is in list of all dirs
387 for d in removeDirs:
388 assert( d in allDirs )
389
390 if len(removeDirs) == 0:
391 print("No stale/outdated entries to remove.")
392 return
393
394 basedir = self.conf.directory
395 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
396 if yesno == "y":
397 for d in removeDirs:
398 shutil.rmtree(os.path.join(basedir, d))
399
400 def ask_user_yesno(self, question):
401 if self.alwaysyes:
402 print(question + " y")
403 return "y"
404 else:
405 return input(question)
406
407
408 def printUsage():
409 """Print --help text"""
410
411 print("shbackup - a simple backup solution.")
412 print("")
413 print("Usage:")
414 print(" " + sys.argv[0] + " {options} [cmd]")
415 print(" " + sys.argv[0] + " --help")
416 print("")
417 print("Commands:")
418 print(" backup make a new backup, if necessary")
419 print(" list list all backups (default)")
420 print(" prune prune outdated/old backups")
421 print("")
422 print("Options:")
423 print(" -h, --help print this usage text")
424 print(" -c, --conf <configfile> use given configuration file")
425 print(" default: /etc/shbackup.conf")
426 print(" -e, --epoch <epoch> force to create backup for given epoch:")
427 print(" year, month, week, day, hour, sporadic")
428 print(" -m, --mode <mode> override mode: full, diff, or incr")
429 print(" -y, --yes always assume 'yes' when user is asked")
430
431
432 if __name__ == "__main__":
433
434 conffn = "/etc/shbackup.conf"
435 cmd = "list"
436 mode = None
437 epoch = None
438 yes = False
439
440 i = 0
441 while i < len(sys.argv)-1:
442 i += 1
443 opt = sys.argv[i]
444
445 if opt in ["-h", "--help"]:
446 printUsage()
447 exit(0)
448
449 elif opt in ["-c", "--conf"]:
450 i += 1
451 conffn = sys.argv[i]
452
453 elif opt in ["-y", "--yes"]:
454 yes = True
455
456 elif opt in ["-m", "--mode"]:
457 i += 1
458 mode = sys.argv[i]
459 if not mode in Mode:
460 print("Unknown mode '" + mode + "'.")
461 exit(1)
462
463 elif opt in ["-e", "--epoch"]:
464 i += 1
465 epoch = sys.argv[i]
466 if not epoch in Epoch:
467 print("Unknown epoch '" + epoch + "'.")
468 exit(1)
469
470
471 elif opt in ["backup", "list", "prune"]:
472 cmd = opt
473
474 else:
475 print("Unknown option: " + opt)
476 exit(1)
477
478 try:
479 man = BackupManager(conffn, yes)
480
481 if cmd == "backup":
482 man.backup(epoch, mode)
483
484 if cmd == "list":
485 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
486 print(b.colAlignedString())
487
488 if cmd == "prune":
489 man.prune()
490
491 except (Config.ReadError, configparser.DuplicateOptionError) as e:
492 print("Error reading config file: " + e.message)
493
494
495
496