tarp's output: clean up code
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 __version__ = "0.1"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess, fcntl, select
12 import random, re
13 import logging
14
15
16 Mode = ["full", "incr", "diff"]
17
18 RealEpoch = { \
19 "hour" : datetime.timedelta(0, 3600), \
20 "day" : datetime.timedelta(1), \
21 "week" : datetime.timedelta(7), \
22 "month" : datetime.timedelta(30), \
23 "year" : datetime.timedelta(365) }
24
25 Epoch = dict(RealEpoch, **{ \
26 "sporadic" : datetime.timedelta(0,0) \
27 })
28
29
30 class Backup:
31 """A single backup has a date, an epoch and a mode."""
32
33 def __init__(self, date, epoch, mode):
34 self.date = date
35 self.epoch = epoch
36 self.mode = mode
37
38 @staticmethod
39 def fromDirName(dirname):
40 [strdate, strtime, epoch, mode] = dirname.split("-")
41
42 if not epoch in Epoch.keys():
43 raise ValueError("Invalid epoch: " + epoch)
44
45 if not mode in Mode:
46 raise ValueError("Invalid mode: " + mode)
47
48 date = datetime.datetime(int(strdate[0:4]),
49 int(strdate[4:6]), int(strdate[6:8]),\
50 int(strtime[0:2]), int(strtime[2:4]))
51
52 return Backup(date, epoch, mode)
53
54 def __str__(self):
55 return "[date: " + self.date.ctime() + \
56 ", epoch: " + self.epoch + \
57 ", mode: " + self.mode + "]"
58
59 def colAlignedString(self):
60 return "%16s %8s %4s" % ( \
61 self.date.strftime("%Y-%m-%d %H:%M"), self.epoch, self.mode)
62
63 @staticmethod
64 def getDirName(date, epoch, mode):
65 """Get directory name of backup by given properties."""
66 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
67
68 @staticmethod
69 def isBackupDir(dirname):
70 """Is directory a backup directory?"""
71 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
72 return p.match(dirname)
73
74
75
76 class Config:
77 """Encapsules the configuration for the backup program."""
78
79 class ReadError(RuntimeError):
80 """An exception raised when reading configurations."""
81 def __init__(self, value):
82 self.value = value
83 self.message = value
84
85 class FileSet:
86 """A fileset has a name and a list of directories."""
87 def __init__(self, name, dirs):
88 self.name = name
89 self.dirs = dirs
90
91 def __str__(self):
92 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
93
94 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
95
96 # Filename where checksum of config is saved
97 checksumfn = "checksum"
98
99 def __init__(self):
100 self.directory = "/media/backup"
101 self.format = self.formats[0]
102 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
103 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
104 self.exclpatterns = []
105 self.sets = []
106 self.checksum = None
107 self.lastchecksum = None
108
109 def __str__(self):
110 return "[directory: " + self.directory + \
111 ", format: " + self.format + \
112 ", keeps: " + str(self.epochkeeps) + \
113 ", modes: " + str(self.epochmodes) + \
114 ", exclpatterns: " + str(self.exclpatterns) + \
115 ", sets: " + str([str(s) for s in self.sets]) + "]"
116
117 def read(self, filename):
118 """Read configuration from file"""
119
120 if not os.path.isfile(filename):
121 raise Config.ReadError("Cannot read config file '" + filename + "'.")
122
123 config = configparser.RawConfigParser()
124 config.read(filename)
125
126 for reqsec in ["destination"]:
127 if not config.has_section(reqsec):
128 raise Config.ReadError("Section '" + reqsec + "' is missing.")
129
130 self.directory = config.get("destination", "directory")
131 if not os.path.isdir(self.directory):
132 raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
133
134 self.format = config.get("destination", "format")
135 if not self.format in Config.formats:
136 raise Config.ReadError("Invalid 'format' given.")
137
138
139 if config.has_section("history"):
140 for opt in config.options("history"):
141 if opt.startswith("keep"):
142 epoch = opt[4:]
143 if not epoch in RealEpoch.keys():
144 raise Config.ReadError("Invalid option 'keep" + epoch + "'.")
145 try:
146 self.epochkeeps[epoch] = int(config.getint("history", opt))
147 except ValueError:
148 raise Config.ReadError("Invalid integer given for '" + opt + "'.")
149 elif opt.startswith("mode"):
150 epoch = opt[4:]
151 if not epoch in RealEpoch.keys():
152 raise Config.ReadError("Invalid option 'mode" + epoch + "'.")
153 self.epochmodes[epoch] = config.get("history", opt)
154 if not self.epochmodes[epoch] in Mode:
155 raise Config.ReadError("Invalid mode given.")
156 else:
157 raise Config.ReadError("Invalid option '" + opt + "'.")
158
159 if config.has_section("input"):
160 for opt in config.options("input"):
161 if opt.startswith("exclude"):
162 self.exclpatterns += [ config.get("input", opt) ]
163 else:
164 raise Config.ReadError("Invalid option '" + opt + "'.")
165
166 for sec in config.sections():
167 if sec in ["destination", "history", "input"]:
168 continue
169 elif sec.startswith("set "):
170 name = sec[4:].strip()
171 dirs = []
172
173 for opt in config.options(sec):
174 if not opt.startswith("dir"):
175 raise Config.ReadError("Unknown option '" + opt + "'.")
176 else:
177 dirs += [config.get(sec, opt)]
178 self.sets += [Config.FileSet(name, dirs)]
179 else:
180 raise Config.ReadError("Unknown section '" + sec + "'.")
181
182 # Compute checksum of config file
183 m = hashlib.sha1()
184 f = open(filename, 'rb')
185 try:
186 m.update(f.read())
187 self.checksum = m.hexdigest()
188 finally:
189 f.close()
190
191 try:
192 f = open(os.path.join(self.directory, self.checksumfn), 'r')
193 self.lastchecksum = f.read().strip()
194 f.close()
195 except IOError:
196 self.lastchecksum = None
197
198
199 class BackupManager:
200 """List and create backups"""
201
202 def __init__(self, conffn):
203 self.conf = Config()
204 self.conf.read(conffn)
205
206
207 def listAllDirs(self):
208 """List all dirs in destination directory"""
209
210 # Get all entries
211 basedir = self.conf.directory
212 dirs = os.listdir(basedir)
213 # Filter directories
214 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
215
216
217 def listOldBackups(self):
218 """Returns a list of old backups."""
219
220 backups = []
221
222 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
223 backups += [ Backup.fromDirName(entry) ]
224
225 return backups
226
227
228 def getDesiredEpoch(self, backups, now):
229 """Get desired epoch based on self.configuration and list of old backups"""
230
231 # Find the longest epoch for which we would like the make a backup
232 latest = datetime.datetime(1900, 1, 1)
233 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
234 # We make backups of that epoch
235 if self.conf.epochkeeps[e] == 0:
236 continue
237
238 # Get backups of that epoch
239 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
240 key=lambda b: b.date))
241
242 # If there are any, determine the latest
243 if len(byepoch) > 0:
244 latest = max(latest, byepoch[-1].date )
245
246 # the latest backup is too old
247 if now-latest > timespan:
248 return e
249
250 # No backup is to be made
251 return None
252
253
254
255 def backupFileSet(self, fileset, targetdir, since=None):
256 """Create an archive for given fileset at given target directory."""
257
258 logfile = logging.getLogger('backuplog')
259 logfile.info("Running file set: " + fileset.name)
260
261 tarpath = "/bin/tar"
262 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
263
264 taropts = []
265
266 # Add the since date, if given
267 if since != None:
268 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
269
270 # Add the exclude patterns
271 for pat in self.conf.exclpatterns:
272 taropts += ["--exclude", pat]
273
274 # Adding directories to backup
275 taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
276
277 # Launch the tar process
278 tarargs = [tarpath] + ["-cpvaf", fsfn] + taropts
279 logfile.debug("tar call: " + " ".join(tarargs))
280 tarp = subprocess.Popen( tarargs, bufsize=-1, \
281 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
282
283 # Change tarp's stdout and stderr to non-blocking
284 for s in [tarp.stdout, tarp.stderr]:
285 fd = s.fileno()
286 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
287 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
288
289 # Read stdout and stderr of tarp
290 errmsg = b""
291 while tarp.poll() == None:
292 rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
293 if tarp.stdout in rd:
294 logging.debug( tarp.stdout.readline()[:-1].decode() )
295 if tarp.stderr in rd:
296 errmsg += tarp.stderr.read()
297
298 # Get the remainging output of tarp
299 for l in tarp.stdout.readlines():
300 logging.debug(l.decode().rstrip())
301 errmsg += tarp.stderr.read()
302
303 # Get return code of tarp
304 rett = tarp.wait()
305 if rett != 0:
306 for l in errmsg.decode().split("\n"):
307 logfile.error(l)
308 logfile.error(tarpath + " returned with exit status " + str(rett) + ".")
309
310
311 def backup(self, epoch=None, mode=None):
312 """Make a new backup, if necessary. If epoch is None then determine
313 desired epoch automatically. Use given epoch otherwise. If mode is None
314 then use mode for given epoch. Use given mode otherwise."""
315
316 now = datetime.datetime.now()
317 oldbackups = self.listOldBackups()
318
319 # Get epoch of backup
320 if epoch == None:
321 epoch = self.getDesiredEpoch(oldbackups, now)
322 if epoch == None:
323 logging.info("No backup planned.")
324 return
325
326 # Get mode of backup
327 if mode == None:
328 mode = self.conf.epochmodes[epoch]
329 logging.info("Making a backup. Epoch: " + epoch + ", mode: " + mode)
330
331 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
332
333 # No old full backups existing
334 if mode != "full" and len(oldfullbackups)==0:
335 logging.info("No full backups existing. Making a full backup.")
336
337 # Checksum changed -> self.config file changed
338 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
339 logging.warning("Full backup recommended as config file has changed.")
340
341
342 # If we have a full backup, we backup everything
343 since = None
344 if mode == "diff":
345 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
346 elif mode == "incr":
347 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
348
349 if since != None:
350 logging.debug("Making backup relative to " + since.ctime())
351
352 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
353 if yesno == "n":
354 return
355
356 # Create new target directory
357 basedir = self.conf.directory
358 dirname = Backup.getDirName(now, epoch, mode)
359 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
360 targetdir = os.path.join(basedir, tmpdirname)
361 os.mkdir( targetdir )
362
363
364 # Add file logger
365 logfile = logging.getLogger("backuplog")
366 fil = logging.FileHandler( os.path.join(targetdir, "log") )
367 fil.setLevel(logging.DEBUG)
368 logfile.addHandler(fil)
369
370 logfile.info("Started: " + now.ctime())
371
372 # Backup all file sets
373 for s in self.conf.sets:
374 self.backupFileSet(s, targetdir, since)
375
376 logfile.info("Stopped: " + datetime.datetime.now().ctime())
377
378 # Rename backup directory to final name
379 os.rename( targetdir, os.path.join(basedir, dirname) )
380
381 # We made a full backup -- recall checksum of config
382 if mode == "full":
383 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
384 f.write( self.conf.checksum )
385 f.close()
386
387
388
389 def prune(self):
390 """Prune old backup files"""
391
392 allDirs = sorted(self.listAllDirs())
393 # Collect all directories not matching backup name
394 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
395
396 # Get all directories which are kept
397 backups = self.listOldBackups()
398 keepdirs = []
399 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
400 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
401 for e in byepoch:
402 keep = self.conf.epochkeeps[e]
403 old = byepoch[e][keep:]
404 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
405
406
407 logging.info("List of stale/outdated entries:")
408 for d in allDirs:
409 msg = ""
410 if d in removeDirs:
411 msg = "[*] "
412 else:
413 msg = "[ ] "
414
415 if Backup.isBackupDir(d):
416 msg += Backup.fromDirName(d).colAlignedString()
417 else:
418 msg += d
419
420 logging.info(msg)
421
422 # Check that dirs to be removed is in list of all dirs
423 for d in removeDirs:
424 assert( d in allDirs )
425
426 if len(removeDirs) == 0:
427 logging.info("No stale/outdated entries to remove.")
428 return
429
430 basedir = self.conf.directory
431 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
432 if yesno == "y":
433 for d in removeDirs:
434 try:
435 shutil.rmtree(os.path.join(basedir, d))
436 except OSError as e:
437 logging.error("Error when removing '%s': %s" % (d,e.strerror) )
438
439
440 def ask_user_yesno(self, question):
441 if LogConf.con.level <= logging.INFO:
442 return input(question)
443 else:
444 return "y"
445
446
447 def printUsage():
448 """Print --help text"""
449
450 print("shbackup - a simple backup solution.")
451 print("")
452 print("Usage:")
453 print(" " + sys.argv[0] + " {options} [cmd]")
454 print(" " + sys.argv[0] + " --help")
455 print("")
456 print("Commands:")
457 print(" backup make a new backup, if necessary")
458 print(" list list all backups (default)")
459 print(" prune prune outdated/old backups")
460 print("")
461 print("Options:")
462 print(" -h, --help print this usage text")
463 print(" -c, --conf <configfile> use given configuration file")
464 print(" default: /etc/shbackup.conf")
465 print(" -e, --epoch <epoch> force to create backup for given epoch:")
466 print(" year, month, week, day, hour, sporadic")
467 print(" -m, --mode <mode> override mode: full, diff, or incr")
468 print(" -v, --verbose be more verbose and interact with user")
469 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
470 print(" error, warning, info, debug")
471 print(" -V, --version print version info")
472
473
474
475 class LogConf:
476 """Encapsulates logging configuration"""
477
478 con = logging.StreamHandler(sys.stderr)
479
480 @classmethod
481 def setup(cls):
482 """Setup logging system"""
483 conlog = logging.getLogger()
484 conlog.setLevel(logging.DEBUG)
485
486 cls.con.setLevel(logging.WARNING)
487 conlog.addHandler(cls.con)
488
489 fillog = logging.getLogger("backuplog")
490 fillog.setLevel(logging.DEBUG)
491
492
493 if __name__ == "__main__":
494
495 LogConf.setup()
496
497 conffn = "/etc/shbackup.conf"
498 cmd = "list"
499 mode = None
500 epoch = None
501
502 i = 0
503 while i < len(sys.argv)-1:
504 i += 1
505 opt = sys.argv[i]
506
507 if opt in ["-h", "--help"]:
508 printUsage()
509 exit(0)
510
511 elif opt in ["-c", "--conf"]:
512 i += 1
513 conffn = sys.argv[i]
514
515 elif opt in ["-V", "--version"]:
516 print("shbackup " + __version__)
517 exit(0)
518
519 elif opt in ["-v", "--verbose"]:
520 LogConf.con.setLevel(logging.INFO)
521
522 elif opt in ["--verbosity"]:
523 i += 1
524 level = sys.argv[i]
525 numlevel = getattr(logging, level.upper(), None)
526 if not isinstance(numlevel, int):
527 raise ValueError('Invalid verbosity level: %s' % level)
528 LogConf.con.setLevel(numlevel)
529
530 elif opt in ["-m", "--mode"]:
531 i += 1
532 mode = sys.argv[i]
533 if not mode in Mode:
534 logging.error("Unknown mode '" + mode + "'.")
535 exit(1)
536
537 elif opt in ["-e", "--epoch"]:
538 i += 1
539 epoch = sys.argv[i]
540 if not epoch in Epoch:
541 logging.error("Unknown epoch '" + epoch + "'.")
542 exit(1)
543
544
545 elif opt in ["backup", "list", "prune"]:
546 cmd = opt
547
548 else:
549 logging.error("Unknown option: " + opt)
550 exit(1)
551
552 try:
553 man = BackupManager(conffn)
554
555 if cmd == "backup":
556 man.backup(epoch, mode)
557
558 if cmd == "list":
559 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
560 print(b.colAlignedString())
561
562 if cmd == "prune":
563 man.prune()
564
565 except (Config.ReadError, configparser.DuplicateOptionError) as e:
566 logging.error("Error reading config file: " + e.message)
567
568
569
570