adding logging system, added --version
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 __version__ = "0.1"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess
12 import random, re
13 import logging
14
15
16 Mode = ["full", "incr", "diff"]
17
18 RealEpoch = { \
19 "hour" : datetime.timedelta(0, 3600), \
20 "day" : datetime.timedelta(1), \
21 "week" : datetime.timedelta(7), \
22 "month" : datetime.timedelta(30), \
23 "year" : datetime.timedelta(365) }
24
25 Epoch = dict(RealEpoch, **{ \
26 "sporadic" : datetime.timedelta(0,0) \
27 })
28
29
30 class Backup:
31 """A single backup has a date, an epoch and a mode."""
32
33 def __init__(self, date, epoch, mode):
34 self.date = date
35 self.epoch = epoch
36 self.mode = mode
37
38 @staticmethod
39 def fromDirName(dirname):
40 [strdate, strtime, epoch, mode] = dirname.split("-")
41
42 if not epoch in Epoch.keys():
43 raise ValueError("Invalid epoch: " + epoch)
44
45 if not mode in Mode:
46 raise ValueError("Invalid mode: " + mode)
47
48 date = datetime.datetime(int(strdate[0:4]),
49 int(strdate[4:6]), int(strdate[6:8]),\
50 int(strtime[0:2]), int(strtime[2:4]))
51
52 return Backup(date, epoch, mode)
53
54 def __str__(self):
55 return "[date: " + self.date.ctime() + \
56 ", epoch: " + self.epoch + \
57 ", mode: " + self.mode + "]"
58
59 def colAlignedString(self):
60 return "%16s %8s %4s" % ( \
61 self.date.strftime("%Y-%m-%d %H:%M"), self.epoch, self.mode)
62
63 @staticmethod
64 def getDirName(date, epoch, mode):
65 """Get directory name of backup by given properties."""
66 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
67
68 @staticmethod
69 def isBackupDir(dirname):
70 """Is directory a backup directory?"""
71 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
72 return p.match(dirname)
73
74
75
76 class Config:
77 """Encapsules the configuration for the backup program."""
78
79 class ReadError(RuntimeError):
80 """An exception raised when reading configurations."""
81 def __init__(self, value):
82 self.value = value
83 self.message = value
84
85 class FileSet:
86 """A fileset has a name and a list of directories."""
87 def __init__(self, name, dirs):
88 self.name = name
89 self.dirs = dirs
90
91 def __str__(self):
92 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
93
94 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
95
96 # Filename where checksum of config is saved
97 checksumfn = "checksum"
98
99 def __init__(self):
100 self.directory = "/media/backup"
101 self.format = self.formats[0]
102 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
103 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
104 self.exclpatterns = []
105 self.sets = []
106 self.checksum = None
107 self.lastchecksum = None
108
109 def __str__(self):
110 return "[directory: " + self.directory + \
111 ", format: " + self.format + \
112 ", keeps: " + str(self.epochkeeps) + \
113 ", modes: " + str(self.epochmodes) + \
114 ", exclpatterns: " + str(self.exclpatterns) + \
115 ", sets: " + str([str(s) for s in self.sets]) + "]"
116
117 def read(self, filename):
118 """Read configuration from file"""
119
120 if not os.path.isfile(filename):
121 raise Config.ReadError("Cannot read config file '" + filename + "'.")
122
123 config = configparser.RawConfigParser()
124 config.read(filename)
125
126 for reqsec in ["destination"]:
127 if not config.has_section(reqsec):
128 raise Config.ReadError("Section '" + reqsec + "' is missing.")
129
130 self.directory = config.get("destination", "directory")
131 if not os.path.isdir(self.directory):
132 raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
133
134 self.format = config.get("destination", "format")
135 if not self.format in Config.formats:
136 raise Config.ReadError("Invalid 'format' given.")
137
138
139 if config.has_section("history"):
140 for opt in config.options("history"):
141 if opt.startswith("keep"):
142 epoch = opt[4:]
143 if not epoch in RealEpoch.keys():
144 raise Config.ReadError("Invalid option 'keep" + epoch + "'.")
145 try:
146 self.epochkeeps[epoch] = int(config.getint("history", opt))
147 except ValueError:
148 raise Config.ReadError("Invalid integer given for '" + opt + "'.")
149 elif opt.startswith("mode"):
150 epoch = opt[4:]
151 if not epoch in RealEpoch.keys():
152 raise Config.ReadError("Invalid option 'mode" + epoch + "'.")
153 self.epochmodes[epoch] = config.get("history", opt)
154 if not self.epochmodes[epoch] in Mode:
155 raise Config.ReadError("Invalid mode given.")
156 else:
157 raise Config.ReadError("Invalid option '" + opt + "'.")
158
159 if config.has_section("input"):
160 for opt in config.options("input"):
161 if opt.startswith("exclude"):
162 self.exclpatterns += [ config.get("input", opt) ]
163 else:
164 raise Config.ReadError("Invalid option '" + opt + "'.")
165
166 for sec in config.sections():
167 if sec in ["destination", "history", "input"]:
168 continue
169 elif sec.startswith("set "):
170 name = sec[4:].strip()
171 dirs = []
172
173 for opt in config.options(sec):
174 if not opt.startswith("dir"):
175 raise Config.ReadError("Unknown option '" + opt + "'.")
176 else:
177 dirs += [config.get(sec, opt)]
178 self.sets += [Config.FileSet(name, dirs)]
179 else:
180 raise Config.ReadError("Unknown section '" + sec + "'.")
181
182 # Compute checksum of config file
183 m = hashlib.sha1()
184 f = open(filename, 'rb')
185 try:
186 m.update(f.read())
187 self.checksum = m.hexdigest()
188 finally:
189 f.close()
190
191 try:
192 f = open(os.path.join(self.directory, self.checksumfn), 'r')
193 self.lastchecksum = f.read().strip()
194 f.close()
195 except IOError:
196 self.lastchecksum = None
197
198
199 class BackupManager:
200 """List and create backups"""
201
202 def __init__(self, conffn):
203 self.conf = Config()
204 self.conf.read(conffn)
205
206
207 def listAllDirs(self):
208 """List all dirs in destination directory"""
209
210 # Get all entries
211 basedir = self.conf.directory
212 dirs = os.listdir(basedir)
213 # Filter directories
214 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
215
216
217 def listOldBackups(self):
218 """Returns a list of old backups."""
219
220 backups = []
221
222 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
223 backups += [ Backup.fromDirName(entry) ]
224
225 return backups
226
227
228 def getDesiredEpoch(self, backups, now):
229 """Get desired epoch based on self.configuration and list of old backups"""
230
231 # Find the longest epoch for which we would like the make a backup
232 latest = datetime.datetime(1900, 1, 1)
233 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
234 # We make backups of that epoch
235 if self.conf.epochkeeps[e] == 0:
236 continue
237
238 # Get backups of that epoch
239 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
240 key=lambda b: b.date))
241
242 # If there are any, determine the latest
243 if len(byepoch) > 0:
244 latest = max(latest, byepoch[-1].date )
245
246 # the latest backup is too old
247 if now-latest > timespan:
248 return e
249
250 # No backup is to be made
251 return None
252
253
254
255 def backupFileSet(self, fileset, targetdir, since=None):
256 """Create an archive for given fileset at given target directory."""
257
258 logger = logging.getLogger('backup')
259
260 logger.info("Running file set: " + fileset.name)
261 tarpath = "/bin/tar"
262 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
263
264 taropts = ["-cpva"]
265
266 if since != None:
267 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
268
269 for pat in self.conf.exclpatterns:
270 taropts += ["--exclude", pat]
271
272 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
273 logger.debug("tar call: " + " ".join(tarargs))
274 tarp = subprocess.Popen( tarargs, bufsize=-1, \
275 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
276
277 # Output stdout of tar
278 while tarp.poll() == None:
279 l = tarp.stdout.readline()
280 if l != "":
281 logging.debug(l.decode().rstrip())
282
283 # Output remaining output of tar
284 for l in tarp.stdout.readlines():
285 logging.debug(l.decode().rstrip())
286
287 rett = tarp.wait()
288 if rett != 0:
289 for l in tarp.stderr.readlines():
290 logger.error( l.decode().strip().rstrip() )
291 sys.stderr.write( tarp.stderr.read().decode() )
292 logger.error(tarpath + " returned with exit status " + str(rett) + ".")
293
294
295 def backup(self, epoch=None, mode=None):
296 """Make a new backup, if necessary. If epoch is None then determine
297 desired epoch automatically. Use given epoch otherwise. If mode is None
298 then use mode for given epoch. Use given mode otherwise."""
299
300 now = datetime.datetime.now()
301 oldbackups = self.listOldBackups()
302
303 # Get epoch of backup
304 if epoch == None:
305 epoch = self.getDesiredEpoch(oldbackups, now)
306 if epoch == None:
307 logging.info("No backup planned.")
308 return
309
310 # Get mode of backup
311 if mode == None:
312 mode = self.conf.epochmodes[epoch]
313 logging.info("Making a backup. Epoch: " + epoch + ", mode: " + mode)
314
315 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
316
317 # No old full backups existing
318 if mode != "full" and len(oldfullbackups)==0:
319 logging.info("No full backups existing. Making a full backup.")
320
321 # Checksum changed -> self.config file changed
322 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
323 logging.warning("Full backup recommended as config file has changed.")
324
325
326 # If we have a full backup, we backup everything
327 since = None
328 if mode == "diff":
329 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
330 elif mode == "incr":
331 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
332
333 if since != None:
334 logging.debug("Making backup relative to " + since.ctime())
335
336 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
337 if yesno == "n":
338 return
339
340 # Create new target directory
341 basedir = self.conf.directory
342 dirname = Backup.getDirName(now, epoch, mode)
343 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
344 targetdir = os.path.join(basedir, tmpdirname)
345 os.mkdir( targetdir )
346
347
348 logger = logging.getLogger('backup')
349 ch = logging.FileHandler( os.path.join(targetdir, "log") )
350 ch.setLevel(logging.INFO)
351 logger.addHandler(ch)
352 logger.info("Started: " + now.ctime())
353
354 # Backup all file sets
355 for s in self.conf.sets:
356 self.backupFileSet(s, targetdir, since)
357
358 logger.info("Stopped: " + datetime.datetime.now().ctime())
359
360 # Rename backup directory to final name
361 os.rename( targetdir, os.path.join(basedir, dirname) )
362
363 # We made a full backup -- recall checksum of config
364 if mode == "full":
365 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
366 f.write( self.conf.checksum )
367 f.close()
368
369
370
371 def prune(self):
372 """Prune old backup files"""
373
374 allDirs = sorted(self.listAllDirs())
375 # Collect all directories not matching backup name
376 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
377
378 # Get all directories which are kept
379 backups = self.listOldBackups()
380 keepdirs = []
381 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
382 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
383 for e in byepoch:
384 keep = self.conf.epochkeeps[e]
385 old = byepoch[e][keep:]
386 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
387
388
389 logging.info("List of stale/outdated entries:")
390 for d in allDirs:
391 msg = ""
392 if d in removeDirs:
393 msg = "[*] "
394 else:
395 msg = "[ ] "
396
397 if Backup.isBackupDir(d):
398 msg += Backup.fromDirName(d).colAlignedString()
399 else:
400 msg += d
401
402 logging.info(msg)
403
404 # Check that dirs to be removed is in list of all dirs
405 for d in removeDirs:
406 assert( d in allDirs )
407
408 if len(removeDirs) == 0:
409 logging.info("No stale/outdated entries to remove.")
410 return
411
412 basedir = self.conf.directory
413 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
414 if yesno == "y":
415 for d in removeDirs:
416 shutil.rmtree(os.path.join(basedir, d))
417
418 def ask_user_yesno(self, question):
419 if logging.getLogger().isEnabledFor(logging.INFO):
420 return input(question)
421 else:
422 return "y"
423
424
425 def printUsage():
426 """Print --help text"""
427
428 print("shbackup - a simple backup solution.")
429 print("")
430 print("Usage:")
431 print(" " + sys.argv[0] + " {options} [cmd]")
432 print(" " + sys.argv[0] + " --help")
433 print("")
434 print("Commands:")
435 print(" backup make a new backup, if necessary")
436 print(" list list all backups (default)")
437 print(" prune prune outdated/old backups")
438 print("")
439 print("Options:")
440 print(" -h, --help print this usage text")
441 print(" -c, --conf <configfile> use given configuration file")
442 print(" default: /etc/shbackup.conf")
443 print(" -e, --epoch <epoch> force to create backup for given epoch:")
444 print(" year, month, week, day, hour, sporadic")
445 print(" -m, --mode <mode> override mode: full, diff, or incr")
446 print(" -v, --verbose be more verbose and interact with user")
447 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
448 print(" warning, info, debug")
449 print(" -V, --version print version info")
450
451
452 if __name__ == "__main__":
453
454 logging.basicConfig(format='%(message)s')
455 conffn = "/etc/shbackup.conf"
456 cmd = "list"
457 mode = None
458 epoch = None
459
460 i = 0
461 while i < len(sys.argv)-1:
462 i += 1
463 opt = sys.argv[i]
464
465 if opt in ["-h", "--help"]:
466 printUsage()
467 exit(0)
468
469 elif opt in ["-c", "--conf"]:
470 i += 1
471 conffn = sys.argv[i]
472
473 elif opt in ["-V", "--version"]:
474 print("shbackup " + __version__)
475 exit(0)
476
477 elif opt in ["-v", "--verbose"]:
478 logging.getLogger().setLevel(logging.INFO)
479
480 elif opt in ["--verbosity"]:
481 i += 1
482 level = sys.argv[i]
483 numlevel = getattr(logging, level.upper(), None)
484 if not isinstance(numlevel, int):
485 raise ValueError('Invalid verbosity level: %s' % level)
486 logging.getLogger().setLevel(numlevel)
487
488 elif opt in ["-m", "--mode"]:
489 i += 1
490 mode = sys.argv[i]
491 if not mode in Mode:
492 logging.error("Unknown mode '" + mode + "'.")
493 exit(1)
494
495 elif opt in ["-e", "--epoch"]:
496 i += 1
497 epoch = sys.argv[i]
498 if not epoch in Epoch:
499 logging.error("Unknown epoch '" + epoch + "'.")
500 exit(1)
501
502
503 elif opt in ["backup", "list", "prune"]:
504 cmd = opt
505
506 else:
507 logging.error("Unknown option: " + opt)
508 exit(1)
509
510 try:
511 man = BackupManager(conffn)
512
513 if cmd == "backup":
514 man.backup(epoch, mode)
515
516 if cmd == "list":
517 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
518 print(b.colAlignedString())
519
520 if cmd == "prune":
521 man.prune()
522
523 except (Config.ReadError, configparser.DuplicateOptionError) as e:
524 logging.error("Error reading config file: " + e.message)
525
526
527
528