Fix blocking bug of tar's output
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 __version__ = "0.1"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess, fcntl
12 import random, re
13 import logging
14
15
16 Mode = ["full", "incr", "diff"]
17
18 RealEpoch = { \
19 "hour" : datetime.timedelta(0, 3600), \
20 "day" : datetime.timedelta(1), \
21 "week" : datetime.timedelta(7), \
22 "month" : datetime.timedelta(30), \
23 "year" : datetime.timedelta(365) }
24
25 Epoch = dict(RealEpoch, **{ \
26 "sporadic" : datetime.timedelta(0,0) \
27 })
28
29
30 class Backup:
31 """A single backup has a date, an epoch and a mode."""
32
33 def __init__(self, date, epoch, mode):
34 self.date = date
35 self.epoch = epoch
36 self.mode = mode
37
38 @staticmethod
39 def fromDirName(dirname):
40 [strdate, strtime, epoch, mode] = dirname.split("-")
41
42 if not epoch in Epoch.keys():
43 raise ValueError("Invalid epoch: " + epoch)
44
45 if not mode in Mode:
46 raise ValueError("Invalid mode: " + mode)
47
48 date = datetime.datetime(int(strdate[0:4]),
49 int(strdate[4:6]), int(strdate[6:8]),\
50 int(strtime[0:2]), int(strtime[2:4]))
51
52 return Backup(date, epoch, mode)
53
54 def __str__(self):
55 return "[date: " + self.date.ctime() + \
56 ", epoch: " + self.epoch + \
57 ", mode: " + self.mode + "]"
58
59 def colAlignedString(self):
60 return "%16s %8s %4s" % ( \
61 self.date.strftime("%Y-%m-%d %H:%M"), self.epoch, self.mode)
62
63 @staticmethod
64 def getDirName(date, epoch, mode):
65 """Get directory name of backup by given properties."""
66 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
67
68 @staticmethod
69 def isBackupDir(dirname):
70 """Is directory a backup directory?"""
71 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
72 return p.match(dirname)
73
74
75
76 class Config:
77 """Encapsules the configuration for the backup program."""
78
79 class ReadError(RuntimeError):
80 """An exception raised when reading configurations."""
81 def __init__(self, value):
82 self.value = value
83 self.message = value
84
85 class FileSet:
86 """A fileset has a name and a list of directories."""
87 def __init__(self, name, dirs):
88 self.name = name
89 self.dirs = dirs
90
91 def __str__(self):
92 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
93
94 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
95
96 # Filename where checksum of config is saved
97 checksumfn = "checksum"
98
99 def __init__(self):
100 self.directory = "/media/backup"
101 self.format = self.formats[0]
102 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
103 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
104 self.exclpatterns = []
105 self.sets = []
106 self.checksum = None
107 self.lastchecksum = None
108
109 def __str__(self):
110 return "[directory: " + self.directory + \
111 ", format: " + self.format + \
112 ", keeps: " + str(self.epochkeeps) + \
113 ", modes: " + str(self.epochmodes) + \
114 ", exclpatterns: " + str(self.exclpatterns) + \
115 ", sets: " + str([str(s) for s in self.sets]) + "]"
116
117 def read(self, filename):
118 """Read configuration from file"""
119
120 if not os.path.isfile(filename):
121 raise Config.ReadError("Cannot read config file '" + filename + "'.")
122
123 config = configparser.RawConfigParser()
124 config.read(filename)
125
126 for reqsec in ["destination"]:
127 if not config.has_section(reqsec):
128 raise Config.ReadError("Section '" + reqsec + "' is missing.")
129
130 self.directory = config.get("destination", "directory")
131 if not os.path.isdir(self.directory):
132 raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
133
134 self.format = config.get("destination", "format")
135 if not self.format in Config.formats:
136 raise Config.ReadError("Invalid 'format' given.")
137
138
139 if config.has_section("history"):
140 for opt in config.options("history"):
141 if opt.startswith("keep"):
142 epoch = opt[4:]
143 if not epoch in RealEpoch.keys():
144 raise Config.ReadError("Invalid option 'keep" + epoch + "'.")
145 try:
146 self.epochkeeps[epoch] = int(config.getint("history", opt))
147 except ValueError:
148 raise Config.ReadError("Invalid integer given for '" + opt + "'.")
149 elif opt.startswith("mode"):
150 epoch = opt[4:]
151 if not epoch in RealEpoch.keys():
152 raise Config.ReadError("Invalid option 'mode" + epoch + "'.")
153 self.epochmodes[epoch] = config.get("history", opt)
154 if not self.epochmodes[epoch] in Mode:
155 raise Config.ReadError("Invalid mode given.")
156 else:
157 raise Config.ReadError("Invalid option '" + opt + "'.")
158
159 if config.has_section("input"):
160 for opt in config.options("input"):
161 if opt.startswith("exclude"):
162 self.exclpatterns += [ config.get("input", opt) ]
163 else:
164 raise Config.ReadError("Invalid option '" + opt + "'.")
165
166 for sec in config.sections():
167 if sec in ["destination", "history", "input"]:
168 continue
169 elif sec.startswith("set "):
170 name = sec[4:].strip()
171 dirs = []
172
173 for opt in config.options(sec):
174 if not opt.startswith("dir"):
175 raise Config.ReadError("Unknown option '" + opt + "'.")
176 else:
177 dirs += [config.get(sec, opt)]
178 self.sets += [Config.FileSet(name, dirs)]
179 else:
180 raise Config.ReadError("Unknown section '" + sec + "'.")
181
182 # Compute checksum of config file
183 m = hashlib.sha1()
184 f = open(filename, 'rb')
185 try:
186 m.update(f.read())
187 self.checksum = m.hexdigest()
188 finally:
189 f.close()
190
191 try:
192 f = open(os.path.join(self.directory, self.checksumfn), 'r')
193 self.lastchecksum = f.read().strip()
194 f.close()
195 except IOError:
196 self.lastchecksum = None
197
198
199 class BackupManager:
200 """List and create backups"""
201
202 def __init__(self, conffn):
203 self.conf = Config()
204 self.conf.read(conffn)
205
206
207 def listAllDirs(self):
208 """List all dirs in destination directory"""
209
210 # Get all entries
211 basedir = self.conf.directory
212 dirs = os.listdir(basedir)
213 # Filter directories
214 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
215
216
217 def listOldBackups(self):
218 """Returns a list of old backups."""
219
220 backups = []
221
222 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
223 backups += [ Backup.fromDirName(entry) ]
224
225 return backups
226
227
228 def getDesiredEpoch(self, backups, now):
229 """Get desired epoch based on self.configuration and list of old backups"""
230
231 # Find the longest epoch for which we would like the make a backup
232 latest = datetime.datetime(1900, 1, 1)
233 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
234 # We make backups of that epoch
235 if self.conf.epochkeeps[e] == 0:
236 continue
237
238 # Get backups of that epoch
239 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
240 key=lambda b: b.date))
241
242 # If there are any, determine the latest
243 if len(byepoch) > 0:
244 latest = max(latest, byepoch[-1].date )
245
246 # the latest backup is too old
247 if now-latest > timespan:
248 return e
249
250 # No backup is to be made
251 return None
252
253
254
255 def backupFileSet(self, fileset, targetdir, since=None):
256 """Create an archive for given fileset at given target directory."""
257
258 logfile = logging.getLogger('backuplog')
259 logfile.info("Running file set: " + fileset.name)
260
261 tarpath = "/bin/tar"
262 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
263
264 taropts = ["-cpva"]
265
266 if since != None:
267 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
268
269 for pat in self.conf.exclpatterns:
270 taropts += ["--exclude", pat]
271
272 # Launch the tar process
273 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
274 logfile.debug("tar call: " + " ".join(tarargs))
275 tarp = subprocess.Popen( tarargs, bufsize=-1, \
276 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
277
278 # Change tarp's stdout and stderr to non-blocking
279 for s in [tarp.stdout, tarp.stderr]:
280 fd = s.fileno()
281 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
282 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
283
284 def readlineNonBlocking(stream):
285 """Read a line nonblocking. Returns b'' if nothing read."""
286 try:
287 return stream.readline()
288 except:
289 return b''
290 pass
291
292
293 # Read stdout and stderr of tarp
294 errmsg = b""
295 while tarp.poll() == None:
296 l = readlineNonBlocking(tarp.stdout)
297 if l != b"":
298 logging.debug(l[:-1].decode())
299 errmsg += readlineNonBlocking(tarp.stderr)
300
301
302 # Get the remainging output of tarp
303 for l in tarp.stdout.readlines():
304 logging.debug(l.decode().rstrip())
305 errmsg += tarp.stderr.read()
306
307 # Get return code of tarp
308 rett = tarp.wait()
309 if rett != 0:
310 for l in errmsg.split("\n"):
311 logfile.error( l.decode().strip().rstrip() )
312 logfile.error(tarpath + " returned with exit status " + str(rett) + ".")
313
314
315 def backup(self, epoch=None, mode=None):
316 """Make a new backup, if necessary. If epoch is None then determine
317 desired epoch automatically. Use given epoch otherwise. If mode is None
318 then use mode for given epoch. Use given mode otherwise."""
319
320 now = datetime.datetime.now()
321 oldbackups = self.listOldBackups()
322
323 # Get epoch of backup
324 if epoch == None:
325 epoch = self.getDesiredEpoch(oldbackups, now)
326 if epoch == None:
327 logging.info("No backup planned.")
328 return
329
330 # Get mode of backup
331 if mode == None:
332 mode = self.conf.epochmodes[epoch]
333 logging.info("Making a backup. Epoch: " + epoch + ", mode: " + mode)
334
335 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
336
337 # No old full backups existing
338 if mode != "full" and len(oldfullbackups)==0:
339 logging.info("No full backups existing. Making a full backup.")
340
341 # Checksum changed -> self.config file changed
342 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
343 logging.warning("Full backup recommended as config file has changed.")
344
345
346 # If we have a full backup, we backup everything
347 since = None
348 if mode == "diff":
349 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
350 elif mode == "incr":
351 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
352
353 if since != None:
354 logging.debug("Making backup relative to " + since.ctime())
355
356 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
357 if yesno == "n":
358 return
359
360 # Create new target directory
361 basedir = self.conf.directory
362 dirname = Backup.getDirName(now, epoch, mode)
363 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
364 targetdir = os.path.join(basedir, tmpdirname)
365 os.mkdir( targetdir )
366
367
368 # Add file logger
369 logfile = logging.getLogger("backuplog")
370 fil = logging.FileHandler( os.path.join(targetdir, "log") )
371 fil.setLevel(logging.DEBUG)
372 logfile.addHandler(fil)
373
374 logfile.info("Started: " + now.ctime())
375
376 # Backup all file sets
377 for s in self.conf.sets:
378 self.backupFileSet(s, targetdir, since)
379
380 logfile.info("Stopped: " + datetime.datetime.now().ctime())
381
382 # Rename backup directory to final name
383 os.rename( targetdir, os.path.join(basedir, dirname) )
384
385 # We made a full backup -- recall checksum of config
386 if mode == "full":
387 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
388 f.write( self.conf.checksum )
389 f.close()
390
391
392
393 def prune(self):
394 """Prune old backup files"""
395
396 allDirs = sorted(self.listAllDirs())
397 # Collect all directories not matching backup name
398 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
399
400 # Get all directories which are kept
401 backups = self.listOldBackups()
402 keepdirs = []
403 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
404 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
405 for e in byepoch:
406 keep = self.conf.epochkeeps[e]
407 old = byepoch[e][keep:]
408 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
409
410
411 logging.info("List of stale/outdated entries:")
412 for d in allDirs:
413 msg = ""
414 if d in removeDirs:
415 msg = "[*] "
416 else:
417 msg = "[ ] "
418
419 if Backup.isBackupDir(d):
420 msg += Backup.fromDirName(d).colAlignedString()
421 else:
422 msg += d
423
424 logging.info(msg)
425
426 # Check that dirs to be removed is in list of all dirs
427 for d in removeDirs:
428 assert( d in allDirs )
429
430 if len(removeDirs) == 0:
431 logging.info("No stale/outdated entries to remove.")
432 return
433
434 basedir = self.conf.directory
435 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
436 if yesno == "y":
437 for d in removeDirs:
438 try:
439 shutil.rmtree(os.path.join(basedir, d))
440 except OSError as e:
441 logging.error("Error when removing '%s': %s" % (d,e.strerror) )
442
443
444 def ask_user_yesno(self, question):
445 if LogConf.con.level <= logging.INFO:
446 return input(question)
447 else:
448 return "y"
449
450
451 def printUsage():
452 """Print --help text"""
453
454 print("shbackup - a simple backup solution.")
455 print("")
456 print("Usage:")
457 print(" " + sys.argv[0] + " {options} [cmd]")
458 print(" " + sys.argv[0] + " --help")
459 print("")
460 print("Commands:")
461 print(" backup make a new backup, if necessary")
462 print(" list list all backups (default)")
463 print(" prune prune outdated/old backups")
464 print("")
465 print("Options:")
466 print(" -h, --help print this usage text")
467 print(" -c, --conf <configfile> use given configuration file")
468 print(" default: /etc/shbackup.conf")
469 print(" -e, --epoch <epoch> force to create backup for given epoch:")
470 print(" year, month, week, day, hour, sporadic")
471 print(" -m, --mode <mode> override mode: full, diff, or incr")
472 print(" -v, --verbose be more verbose and interact with user")
473 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
474 print(" error, warning, info, debug")
475 print(" -V, --version print version info")
476
477
478
479 class LogConf:
480 """Encapsulates logging configuration"""
481
482 con = logging.StreamHandler(sys.stderr)
483
484 @classmethod
485 def setup(cls):
486 """Setup logging system"""
487 conlog = logging.getLogger()
488 conlog.setLevel(logging.DEBUG)
489
490 cls.con.setLevel(logging.WARNING)
491 conlog.addHandler(cls.con)
492
493 fillog = logging.getLogger("backuplog")
494 fillog.setLevel(logging.DEBUG)
495
496
497 if __name__ == "__main__":
498
499 LogConf.setup()
500
501 conffn = "/etc/shbackup.conf"
502 cmd = "list"
503 mode = None
504 epoch = None
505
506 i = 0
507 while i < len(sys.argv)-1:
508 i += 1
509 opt = sys.argv[i]
510
511 if opt in ["-h", "--help"]:
512 printUsage()
513 exit(0)
514
515 elif opt in ["-c", "--conf"]:
516 i += 1
517 conffn = sys.argv[i]
518
519 elif opt in ["-V", "--version"]:
520 print("shbackup " + __version__)
521 exit(0)
522
523 elif opt in ["-v", "--verbose"]:
524 LogConf.con.setLevel(logging.INFO)
525
526 elif opt in ["--verbosity"]:
527 i += 1
528 level = sys.argv[i]
529 numlevel = getattr(logging, level.upper(), None)
530 if not isinstance(numlevel, int):
531 raise ValueError('Invalid verbosity level: %s' % level)
532 LogConf.con.setLevel(numlevel)
533
534 elif opt in ["-m", "--mode"]:
535 i += 1
536 mode = sys.argv[i]
537 if not mode in Mode:
538 logging.error("Unknown mode '" + mode + "'.")
539 exit(1)
540
541 elif opt in ["-e", "--epoch"]:
542 i += 1
543 epoch = sys.argv[i]
544 if not epoch in Epoch:
545 logging.error("Unknown epoch '" + epoch + "'.")
546 exit(1)
547
548
549 elif opt in ["backup", "list", "prune"]:
550 cmd = opt
551
552 else:
553 logging.error("Unknown option: " + opt)
554 exit(1)
555
556 try:
557 man = BackupManager(conffn)
558
559 if cmd == "backup":
560 man.backup(epoch, mode)
561
562 if cmd == "list":
563 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
564 print(b.colAlignedString())
565
566 if cmd == "prune":
567 man.prune()
568
569 except (Config.ReadError, configparser.DuplicateOptionError) as e:
570 logging.error("Error reading config file: " + e.message)
571
572
573
574