41658696b6097a9272b3e49a0f7e291ec515e004
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 __version__ = "0.1"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess
12 import random, re
13 import logging
14
15
16 Mode = ["full", "incr", "diff"]
17
18 RealEpoch = { \
19 "hour" : datetime.timedelta(0, 3600), \
20 "day" : datetime.timedelta(1), \
21 "week" : datetime.timedelta(7), \
22 "month" : datetime.timedelta(30), \
23 "year" : datetime.timedelta(365) }
24
25 Epoch = dict(RealEpoch, **{ \
26 "sporadic" : datetime.timedelta(0,0) \
27 })
28
29
30 class Backup:
31 """A single backup has a date, an epoch and a mode."""
32
33 def __init__(self, date, epoch, mode):
34 self.date = date
35 self.epoch = epoch
36 self.mode = mode
37
38 @staticmethod
39 def fromDirName(dirname):
40 [strdate, strtime, epoch, mode] = dirname.split("-")
41
42 if not epoch in Epoch.keys():
43 raise ValueError("Invalid epoch: " + epoch)
44
45 if not mode in Mode:
46 raise ValueError("Invalid mode: " + mode)
47
48 date = datetime.datetime(int(strdate[0:4]),
49 int(strdate[4:6]), int(strdate[6:8]),\
50 int(strtime[0:2]), int(strtime[2:4]))
51
52 return Backup(date, epoch, mode)
53
54 def __str__(self):
55 return "[date: " + self.date.ctime() + \
56 ", epoch: " + self.epoch + \
57 ", mode: " + self.mode + "]"
58
59 def colAlignedString(self):
60 return "%16s %8s %4s" % ( \
61 self.date.strftime("%Y-%m-%d %H:%M"), self.epoch, self.mode)
62
63 @staticmethod
64 def getDirName(date, epoch, mode):
65 """Get directory name of backup by given properties."""
66 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
67
68 @staticmethod
69 def isBackupDir(dirname):
70 """Is directory a backup directory?"""
71 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
72 return p.match(dirname)
73
74
75
76 class Config:
77 """Encapsules the configuration for the backup program."""
78
79 class ReadError(RuntimeError):
80 """An exception raised when reading configurations."""
81 def __init__(self, value):
82 self.value = value
83 self.message = value
84
85 class FileSet:
86 """A fileset has a name and a list of directories."""
87 def __init__(self, name, dirs):
88 self.name = name
89 self.dirs = dirs
90
91 def __str__(self):
92 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
93
94 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
95
96 # Filename where checksum of config is saved
97 checksumfn = "checksum"
98
99 def __init__(self):
100 self.directory = "/media/backup"
101 self.format = self.formats[0]
102 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
103 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
104 self.exclpatterns = []
105 self.sets = []
106 self.checksum = None
107 self.lastchecksum = None
108
109 def __str__(self):
110 return "[directory: " + self.directory + \
111 ", format: " + self.format + \
112 ", keeps: " + str(self.epochkeeps) + \
113 ", modes: " + str(self.epochmodes) + \
114 ", exclpatterns: " + str(self.exclpatterns) + \
115 ", sets: " + str([str(s) for s in self.sets]) + "]"
116
117 def read(self, filename):
118 """Read configuration from file"""
119
120 if not os.path.isfile(filename):
121 raise Config.ReadError("Cannot read config file '" + filename + "'.")
122
123 config = configparser.RawConfigParser()
124 config.read(filename)
125
126 for reqsec in ["destination"]:
127 if not config.has_section(reqsec):
128 raise Config.ReadError("Section '" + reqsec + "' is missing.")
129
130 self.directory = config.get("destination", "directory")
131 if not os.path.isdir(self.directory):
132 raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
133
134 self.format = config.get("destination", "format")
135 if not self.format in Config.formats:
136 raise Config.ReadError("Invalid 'format' given.")
137
138
139 if config.has_section("history"):
140 for opt in config.options("history"):
141 if opt.startswith("keep"):
142 epoch = opt[4:]
143 if not epoch in RealEpoch.keys():
144 raise Config.ReadError("Invalid option 'keep" + epoch + "'.")
145 try:
146 self.epochkeeps[epoch] = int(config.getint("history", opt))
147 except ValueError:
148 raise Config.ReadError("Invalid integer given for '" + opt + "'.")
149 elif opt.startswith("mode"):
150 epoch = opt[4:]
151 if not epoch in RealEpoch.keys():
152 raise Config.ReadError("Invalid option 'mode" + epoch + "'.")
153 self.epochmodes[epoch] = config.get("history", opt)
154 if not self.epochmodes[epoch] in Mode:
155 raise Config.ReadError("Invalid mode given.")
156 else:
157 raise Config.ReadError("Invalid option '" + opt + "'.")
158
159 if config.has_section("input"):
160 for opt in config.options("input"):
161 if opt.startswith("exclude"):
162 self.exclpatterns += [ config.get("input", opt) ]
163 else:
164 raise Config.ReadError("Invalid option '" + opt + "'.")
165
166 for sec in config.sections():
167 if sec in ["destination", "history", "input"]:
168 continue
169 elif sec.startswith("set "):
170 name = sec[4:].strip()
171 dirs = []
172
173 for opt in config.options(sec):
174 if not opt.startswith("dir"):
175 raise Config.ReadError("Unknown option '" + opt + "'.")
176 else:
177 dirs += [config.get(sec, opt)]
178 self.sets += [Config.FileSet(name, dirs)]
179 else:
180 raise Config.ReadError("Unknown section '" + sec + "'.")
181
182 # Compute checksum of config file
183 m = hashlib.sha1()
184 f = open(filename, 'rb')
185 try:
186 m.update(f.read())
187 self.checksum = m.hexdigest()
188 finally:
189 f.close()
190
191 try:
192 f = open(os.path.join(self.directory, self.checksumfn), 'r')
193 self.lastchecksum = f.read().strip()
194 f.close()
195 except IOError:
196 self.lastchecksum = None
197
198
199 class BackupManager:
200 """List and create backups"""
201
202 def __init__(self, conffn):
203 self.conf = Config()
204 self.conf.read(conffn)
205
206
207 def listAllDirs(self):
208 """List all dirs in destination directory"""
209
210 # Get all entries
211 basedir = self.conf.directory
212 dirs = os.listdir(basedir)
213 # Filter directories
214 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
215
216
217 def listOldBackups(self):
218 """Returns a list of old backups."""
219
220 backups = []
221
222 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
223 backups += [ Backup.fromDirName(entry) ]
224
225 return backups
226
227
228 def getDesiredEpoch(self, backups, now):
229 """Get desired epoch based on self.configuration and list of old backups"""
230
231 # Find the longest epoch for which we would like the make a backup
232 latest = datetime.datetime(1900, 1, 1)
233 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
234 # We make backups of that epoch
235 if self.conf.epochkeeps[e] == 0:
236 continue
237
238 # Get backups of that epoch
239 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
240 key=lambda b: b.date))
241
242 # If there are any, determine the latest
243 if len(byepoch) > 0:
244 latest = max(latest, byepoch[-1].date )
245
246 # the latest backup is too old
247 if now-latest > timespan:
248 return e
249
250 # No backup is to be made
251 return None
252
253
254
255 def backupFileSet(self, fileset, targetdir, since=None):
256 """Create an archive for given fileset at given target directory."""
257
258 logfile = logging.getLogger('backuplog')
259 logfile.info("Running file set: " + fileset.name)
260
261 tarpath = "/bin/tar"
262 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
263
264 taropts = ["-cpva"]
265
266 if since != None:
267 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
268
269 for pat in self.conf.exclpatterns:
270 taropts += ["--exclude", pat]
271
272 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
273 logfile.debug("tar call: " + " ".join(tarargs))
274 tarp = subprocess.Popen( tarargs, bufsize=-1, \
275 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
276
277 # Output stdout of tar
278 while tarp.poll() == None:
279 l = tarp.stdout.readline()
280 if l != "":
281 logging.debug(l.decode().rstrip())
282
283 # Output remaining output of tar
284 for l in tarp.stdout.readlines():
285 logging.debug(l.decode().rstrip())
286
287 rett = tarp.wait()
288 if rett != 0:
289 for l in tarp.stderr.readlines():
290 logfile.error( l.decode().strip().rstrip() )
291 sys.stderr.write( tarp.stderr.read().decode() )
292 logfile.error(tarpath + " returned with exit status " + str(rett) + ".")
293
294
295 def backup(self, epoch=None, mode=None):
296 """Make a new backup, if necessary. If epoch is None then determine
297 desired epoch automatically. Use given epoch otherwise. If mode is None
298 then use mode for given epoch. Use given mode otherwise."""
299
300 now = datetime.datetime.now()
301 oldbackups = self.listOldBackups()
302
303 # Get epoch of backup
304 if epoch == None:
305 epoch = self.getDesiredEpoch(oldbackups, now)
306 if epoch == None:
307 logging.info("No backup planned.")
308 return
309
310 # Get mode of backup
311 if mode == None:
312 mode = self.conf.epochmodes[epoch]
313 logging.info("Making a backup. Epoch: " + epoch + ", mode: " + mode)
314
315 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
316
317 # No old full backups existing
318 if mode != "full" and len(oldfullbackups)==0:
319 logging.info("No full backups existing. Making a full backup.")
320
321 # Checksum changed -> self.config file changed
322 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
323 logging.warning("Full backup recommended as config file has changed.")
324
325
326 # If we have a full backup, we backup everything
327 since = None
328 if mode == "diff":
329 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
330 elif mode == "incr":
331 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
332
333 if since != None:
334 logging.debug("Making backup relative to " + since.ctime())
335
336 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
337 if yesno == "n":
338 return
339
340 # Create new target directory
341 basedir = self.conf.directory
342 dirname = Backup.getDirName(now, epoch, mode)
343 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
344 targetdir = os.path.join(basedir, tmpdirname)
345 os.mkdir( targetdir )
346
347
348 # Add file logger
349 logfile = logging.getLogger("backuplog")
350 fil = logging.FileHandler( os.path.join(targetdir, "log") )
351 fil.setLevel(logging.DEBUG)
352 logfile.addHandler(fil)
353
354 logfile.info("Started: " + now.ctime())
355
356 # Backup all file sets
357 for s in self.conf.sets:
358 self.backupFileSet(s, targetdir, since)
359
360 logfile.info("Stopped: " + datetime.datetime.now().ctime())
361
362 # Rename backup directory to final name
363 os.rename( targetdir, os.path.join(basedir, dirname) )
364
365 # We made a full backup -- recall checksum of config
366 if mode == "full":
367 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
368 f.write( self.conf.checksum )
369 f.close()
370
371
372
373 def prune(self):
374 """Prune old backup files"""
375
376 allDirs = sorted(self.listAllDirs())
377 # Collect all directories not matching backup name
378 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
379
380 # Get all directories which are kept
381 backups = self.listOldBackups()
382 keepdirs = []
383 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
384 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
385 for e in byepoch:
386 keep = self.conf.epochkeeps[e]
387 old = byepoch[e][keep:]
388 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
389
390
391 logging.info("List of stale/outdated entries:")
392 for d in allDirs:
393 msg = ""
394 if d in removeDirs:
395 msg = "[*] "
396 else:
397 msg = "[ ] "
398
399 if Backup.isBackupDir(d):
400 msg += Backup.fromDirName(d).colAlignedString()
401 else:
402 msg += d
403
404 logging.info(msg)
405
406 # Check that dirs to be removed is in list of all dirs
407 for d in removeDirs:
408 assert( d in allDirs )
409
410 if len(removeDirs) == 0:
411 logging.info("No stale/outdated entries to remove.")
412 return
413
414 basedir = self.conf.directory
415 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
416 if yesno == "y":
417 for d in removeDirs:
418 shutil.rmtree(os.path.join(basedir, d))
419
420 def ask_user_yesno(self, question):
421 if LogConf.con.level <= logging.INFO:
422 return input(question)
423 else:
424 return "y"
425
426
427 def printUsage():
428 """Print --help text"""
429
430 print("shbackup - a simple backup solution.")
431 print("")
432 print("Usage:")
433 print(" " + sys.argv[0] + " {options} [cmd]")
434 print(" " + sys.argv[0] + " --help")
435 print("")
436 print("Commands:")
437 print(" backup make a new backup, if necessary")
438 print(" list list all backups (default)")
439 print(" prune prune outdated/old backups")
440 print("")
441 print("Options:")
442 print(" -h, --help print this usage text")
443 print(" -c, --conf <configfile> use given configuration file")
444 print(" default: /etc/shbackup.conf")
445 print(" -e, --epoch <epoch> force to create backup for given epoch:")
446 print(" year, month, week, day, hour, sporadic")
447 print(" -m, --mode <mode> override mode: full, diff, or incr")
448 print(" -v, --verbose be more verbose and interact with user")
449 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
450 print(" warning, info, debug")
451 print(" -V, --version print version info")
452
453
454
455 class LogConf:
456 """Encapsulates logging configuration"""
457
458 con = logging.StreamHandler(sys.stderr)
459
460 @classmethod
461 def setup(cls):
462 """Setup logging system"""
463 conlog = logging.getLogger()
464 conlog.setLevel(logging.DEBUG)
465
466 cls.con.setLevel(logging.WARNING)
467 conlog.addHandler(cls.con)
468
469 fillog = logging.getLogger("backuplog")
470 fillog.setLevel(logging.DEBUG)
471
472
473 if __name__ == "__main__":
474
475 LogConf.setup()
476
477 conffn = "/etc/shbackup.conf"
478 cmd = "list"
479 mode = None
480 epoch = None
481
482 i = 0
483 while i < len(sys.argv)-1:
484 i += 1
485 opt = sys.argv[i]
486
487 if opt in ["-h", "--help"]:
488 printUsage()
489 exit(0)
490
491 elif opt in ["-c", "--conf"]:
492 i += 1
493 conffn = sys.argv[i]
494
495 elif opt in ["-V", "--version"]:
496 print("shbackup " + __version__)
497 exit(0)
498
499 elif opt in ["-v", "--verbose"]:
500 LogConf.con.setLevel(logging.INFO)
501
502 elif opt in ["--verbosity"]:
503 i += 1
504 level = sys.argv[i]
505 numlevel = getattr(logging, level.upper(), None)
506 if not isinstance(numlevel, int):
507 raise ValueError('Invalid verbosity level: %s' % level)
508 LogConf.con.setLevel(numlevel)
509
510 elif opt in ["-m", "--mode"]:
511 i += 1
512 mode = sys.argv[i]
513 if not mode in Mode:
514 logging.error("Unknown mode '" + mode + "'.")
515 exit(1)
516
517 elif opt in ["-e", "--epoch"]:
518 i += 1
519 epoch = sys.argv[i]
520 if not epoch in Epoch:
521 logging.error("Unknown epoch '" + epoch + "'.")
522 exit(1)
523
524
525 elif opt in ["backup", "list", "prune"]:
526 cmd = opt
527
528 else:
529 logging.error("Unknown option: " + opt)
530 exit(1)
531
532 try:
533 man = BackupManager(conffn)
534
535 if cmd == "backup":
536 man.backup(epoch, mode)
537
538 if cmd == "list":
539 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
540 print(b.colAlignedString())
541
542 if cmd == "prune":
543 man.prune()
544
545 except (Config.ReadError, configparser.DuplicateOptionError) as e:
546 logging.error("Error reading config file: " + e.message)
547
548
549
550