Using select.select on tarp's output
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 __version__ = "0.1"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess, fcntl, select
12 import random, re
13 import logging
14
15
16 Mode = ["full", "incr", "diff"]
17
18 RealEpoch = { \
19 "hour" : datetime.timedelta(0, 3600), \
20 "day" : datetime.timedelta(1), \
21 "week" : datetime.timedelta(7), \
22 "month" : datetime.timedelta(30), \
23 "year" : datetime.timedelta(365) }
24
25 Epoch = dict(RealEpoch, **{ \
26 "sporadic" : datetime.timedelta(0,0) \
27 })
28
29
30 class Backup:
31 """A single backup has a date, an epoch and a mode."""
32
33 def __init__(self, date, epoch, mode):
34 self.date = date
35 self.epoch = epoch
36 self.mode = mode
37
38 @staticmethod
39 def fromDirName(dirname):
40 [strdate, strtime, epoch, mode] = dirname.split("-")
41
42 if not epoch in Epoch.keys():
43 raise ValueError("Invalid epoch: " + epoch)
44
45 if not mode in Mode:
46 raise ValueError("Invalid mode: " + mode)
47
48 date = datetime.datetime(int(strdate[0:4]),
49 int(strdate[4:6]), int(strdate[6:8]),\
50 int(strtime[0:2]), int(strtime[2:4]))
51
52 return Backup(date, epoch, mode)
53
54 def __str__(self):
55 return "[date: " + self.date.ctime() + \
56 ", epoch: " + self.epoch + \
57 ", mode: " + self.mode + "]"
58
59 def colAlignedString(self):
60 return "%16s %8s %4s" % ( \
61 self.date.strftime("%Y-%m-%d %H:%M"), self.epoch, self.mode)
62
63 @staticmethod
64 def getDirName(date, epoch, mode):
65 """Get directory name of backup by given properties."""
66 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
67
68 @staticmethod
69 def isBackupDir(dirname):
70 """Is directory a backup directory?"""
71 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
72 return p.match(dirname)
73
74
75
76 class Config:
77 """Encapsules the configuration for the backup program."""
78
79 class ReadError(RuntimeError):
80 """An exception raised when reading configurations."""
81 def __init__(self, value):
82 self.value = value
83 self.message = value
84
85 class FileSet:
86 """A fileset has a name and a list of directories."""
87 def __init__(self, name, dirs):
88 self.name = name
89 self.dirs = dirs
90
91 def __str__(self):
92 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
93
94 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
95
96 # Filename where checksum of config is saved
97 checksumfn = "checksum"
98
99 def __init__(self):
100 self.directory = "/media/backup"
101 self.format = self.formats[0]
102 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
103 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
104 self.exclpatterns = []
105 self.sets = []
106 self.checksum = None
107 self.lastchecksum = None
108
109 def __str__(self):
110 return "[directory: " + self.directory + \
111 ", format: " + self.format + \
112 ", keeps: " + str(self.epochkeeps) + \
113 ", modes: " + str(self.epochmodes) + \
114 ", exclpatterns: " + str(self.exclpatterns) + \
115 ", sets: " + str([str(s) for s in self.sets]) + "]"
116
117 def read(self, filename):
118 """Read configuration from file"""
119
120 if not os.path.isfile(filename):
121 raise Config.ReadError("Cannot read config file '" + filename + "'.")
122
123 config = configparser.RawConfigParser()
124 config.read(filename)
125
126 for reqsec in ["destination"]:
127 if not config.has_section(reqsec):
128 raise Config.ReadError("Section '" + reqsec + "' is missing.")
129
130 self.directory = config.get("destination", "directory")
131 if not os.path.isdir(self.directory):
132 raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
133
134 self.format = config.get("destination", "format")
135 if not self.format in Config.formats:
136 raise Config.ReadError("Invalid 'format' given.")
137
138
139 if config.has_section("history"):
140 for opt in config.options("history"):
141 if opt.startswith("keep"):
142 epoch = opt[4:]
143 if not epoch in RealEpoch.keys():
144 raise Config.ReadError("Invalid option 'keep" + epoch + "'.")
145 try:
146 self.epochkeeps[epoch] = int(config.getint("history", opt))
147 except ValueError:
148 raise Config.ReadError("Invalid integer given for '" + opt + "'.")
149 elif opt.startswith("mode"):
150 epoch = opt[4:]
151 if not epoch in RealEpoch.keys():
152 raise Config.ReadError("Invalid option 'mode" + epoch + "'.")
153 self.epochmodes[epoch] = config.get("history", opt)
154 if not self.epochmodes[epoch] in Mode:
155 raise Config.ReadError("Invalid mode given.")
156 else:
157 raise Config.ReadError("Invalid option '" + opt + "'.")
158
159 if config.has_section("input"):
160 for opt in config.options("input"):
161 if opt.startswith("exclude"):
162 self.exclpatterns += [ config.get("input", opt) ]
163 else:
164 raise Config.ReadError("Invalid option '" + opt + "'.")
165
166 for sec in config.sections():
167 if sec in ["destination", "history", "input"]:
168 continue
169 elif sec.startswith("set "):
170 name = sec[4:].strip()
171 dirs = []
172
173 for opt in config.options(sec):
174 if not opt.startswith("dir"):
175 raise Config.ReadError("Unknown option '" + opt + "'.")
176 else:
177 dirs += [config.get(sec, opt)]
178 self.sets += [Config.FileSet(name, dirs)]
179 else:
180 raise Config.ReadError("Unknown section '" + sec + "'.")
181
182 # Compute checksum of config file
183 m = hashlib.sha1()
184 f = open(filename, 'rb')
185 try:
186 m.update(f.read())
187 self.checksum = m.hexdigest()
188 finally:
189 f.close()
190
191 try:
192 f = open(os.path.join(self.directory, self.checksumfn), 'r')
193 self.lastchecksum = f.read().strip()
194 f.close()
195 except IOError:
196 self.lastchecksum = None
197
198
199 class BackupManager:
200 """List and create backups"""
201
202 def __init__(self, conffn):
203 self.conf = Config()
204 self.conf.read(conffn)
205
206
207 def listAllDirs(self):
208 """List all dirs in destination directory"""
209
210 # Get all entries
211 basedir = self.conf.directory
212 dirs = os.listdir(basedir)
213 # Filter directories
214 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
215
216
217 def listOldBackups(self):
218 """Returns a list of old backups."""
219
220 backups = []
221
222 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
223 backups += [ Backup.fromDirName(entry) ]
224
225 return backups
226
227
228 def getDesiredEpoch(self, backups, now):
229 """Get desired epoch based on self.configuration and list of old backups"""
230
231 # Find the longest epoch for which we would like the make a backup
232 latest = datetime.datetime(1900, 1, 1)
233 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
234 # We make backups of that epoch
235 if self.conf.epochkeeps[e] == 0:
236 continue
237
238 # Get backups of that epoch
239 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
240 key=lambda b: b.date))
241
242 # If there are any, determine the latest
243 if len(byepoch) > 0:
244 latest = max(latest, byepoch[-1].date )
245
246 # the latest backup is too old
247 if now-latest > timespan:
248 return e
249
250 # No backup is to be made
251 return None
252
253
254
255 def backupFileSet(self, fileset, targetdir, since=None):
256 """Create an archive for given fileset at given target directory."""
257
258 logfile = logging.getLogger('backuplog')
259 logfile.info("Running file set: " + fileset.name)
260
261 tarpath = "/bin/tar"
262 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
263
264 taropts = []
265
266 # Add the since date, if given
267 if since != None:
268 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
269
270 # Add the exclude patterns
271 for pat in self.conf.exclpatterns:
272 taropts += ["--exclude", pat]
273
274 # Adding directories to backup
275 taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
276
277 # Launch the tar process
278 tarargs = [tarpath] + ["-cpvaf", fsfn] + taropts
279 logfile.debug("tar call: " + " ".join(tarargs))
280 tarp = subprocess.Popen( tarargs, bufsize=-1, \
281 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
282
283 # Change tarp's stdout and stderr to non-blocking
284 for s in [tarp.stdout, tarp.stderr]:
285 fd = s.fileno()
286 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
287 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
288
289 def readlineNonBlocking(stream):
290 """Read a line nonblocking. Returns b'' if nothing read."""
291 try:
292 return stream.readline()
293 except:
294 return b''
295 pass
296
297
298 # Read stdout and stderr of tarp
299 errmsg = b""
300 while tarp.poll() == None:
301 rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
302
303 if tarp.stdout in rd:
304 l = readlineNonBlocking(tarp.stdout)
305 if l != b"":
306 logging.debug(l[:-1].decode())
307
308 if tarp.stderr in rd:
309 errmsg += readlineNonBlocking(tarp.stderr)
310
311
312 # Get the remainging output of tarp
313 for l in tarp.stdout.readlines():
314 logging.debug(l.decode().rstrip())
315 errmsg += tarp.stderr.read()
316
317 # Get return code of tarp
318 rett = tarp.wait()
319 if rett != 0:
320 for l in errmsg.decode().split("\n"):
321 logfile.error(l)
322 logfile.error(tarpath + " returned with exit status " + str(rett) + ".")
323
324
325 def backup(self, epoch=None, mode=None):
326 """Make a new backup, if necessary. If epoch is None then determine
327 desired epoch automatically. Use given epoch otherwise. If mode is None
328 then use mode for given epoch. Use given mode otherwise."""
329
330 now = datetime.datetime.now()
331 oldbackups = self.listOldBackups()
332
333 # Get epoch of backup
334 if epoch == None:
335 epoch = self.getDesiredEpoch(oldbackups, now)
336 if epoch == None:
337 logging.info("No backup planned.")
338 return
339
340 # Get mode of backup
341 if mode == None:
342 mode = self.conf.epochmodes[epoch]
343 logging.info("Making a backup. Epoch: " + epoch + ", mode: " + mode)
344
345 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
346
347 # No old full backups existing
348 if mode != "full" and len(oldfullbackups)==0:
349 logging.info("No full backups existing. Making a full backup.")
350
351 # Checksum changed -> self.config file changed
352 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
353 logging.warning("Full backup recommended as config file has changed.")
354
355
356 # If we have a full backup, we backup everything
357 since = None
358 if mode == "diff":
359 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
360 elif mode == "incr":
361 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
362
363 if since != None:
364 logging.debug("Making backup relative to " + since.ctime())
365
366 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
367 if yesno == "n":
368 return
369
370 # Create new target directory
371 basedir = self.conf.directory
372 dirname = Backup.getDirName(now, epoch, mode)
373 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
374 targetdir = os.path.join(basedir, tmpdirname)
375 os.mkdir( targetdir )
376
377
378 # Add file logger
379 logfile = logging.getLogger("backuplog")
380 fil = logging.FileHandler( os.path.join(targetdir, "log") )
381 fil.setLevel(logging.DEBUG)
382 logfile.addHandler(fil)
383
384 logfile.info("Started: " + now.ctime())
385
386 # Backup all file sets
387 for s in self.conf.sets:
388 self.backupFileSet(s, targetdir, since)
389
390 logfile.info("Stopped: " + datetime.datetime.now().ctime())
391
392 # Rename backup directory to final name
393 os.rename( targetdir, os.path.join(basedir, dirname) )
394
395 # We made a full backup -- recall checksum of config
396 if mode == "full":
397 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
398 f.write( self.conf.checksum )
399 f.close()
400
401
402
403 def prune(self):
404 """Prune old backup files"""
405
406 allDirs = sorted(self.listAllDirs())
407 # Collect all directories not matching backup name
408 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
409
410 # Get all directories which are kept
411 backups = self.listOldBackups()
412 keepdirs = []
413 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
414 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
415 for e in byepoch:
416 keep = self.conf.epochkeeps[e]
417 old = byepoch[e][keep:]
418 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
419
420
421 logging.info("List of stale/outdated entries:")
422 for d in allDirs:
423 msg = ""
424 if d in removeDirs:
425 msg = "[*] "
426 else:
427 msg = "[ ] "
428
429 if Backup.isBackupDir(d):
430 msg += Backup.fromDirName(d).colAlignedString()
431 else:
432 msg += d
433
434 logging.info(msg)
435
436 # Check that dirs to be removed is in list of all dirs
437 for d in removeDirs:
438 assert( d in allDirs )
439
440 if len(removeDirs) == 0:
441 logging.info("No stale/outdated entries to remove.")
442 return
443
444 basedir = self.conf.directory
445 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
446 if yesno == "y":
447 for d in removeDirs:
448 try:
449 shutil.rmtree(os.path.join(basedir, d))
450 except OSError as e:
451 logging.error("Error when removing '%s': %s" % (d,e.strerror) )
452
453
454 def ask_user_yesno(self, question):
455 if LogConf.con.level <= logging.INFO:
456 return input(question)
457 else:
458 return "y"
459
460
461 def printUsage():
462 """Print --help text"""
463
464 print("shbackup - a simple backup solution.")
465 print("")
466 print("Usage:")
467 print(" " + sys.argv[0] + " {options} [cmd]")
468 print(" " + sys.argv[0] + " --help")
469 print("")
470 print("Commands:")
471 print(" backup make a new backup, if necessary")
472 print(" list list all backups (default)")
473 print(" prune prune outdated/old backups")
474 print("")
475 print("Options:")
476 print(" -h, --help print this usage text")
477 print(" -c, --conf <configfile> use given configuration file")
478 print(" default: /etc/shbackup.conf")
479 print(" -e, --epoch <epoch> force to create backup for given epoch:")
480 print(" year, month, week, day, hour, sporadic")
481 print(" -m, --mode <mode> override mode: full, diff, or incr")
482 print(" -v, --verbose be more verbose and interact with user")
483 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
484 print(" error, warning, info, debug")
485 print(" -V, --version print version info")
486
487
488
489 class LogConf:
490 """Encapsulates logging configuration"""
491
492 con = logging.StreamHandler(sys.stderr)
493
494 @classmethod
495 def setup(cls):
496 """Setup logging system"""
497 conlog = logging.getLogger()
498 conlog.setLevel(logging.DEBUG)
499
500 cls.con.setLevel(logging.WARNING)
501 conlog.addHandler(cls.con)
502
503 fillog = logging.getLogger("backuplog")
504 fillog.setLevel(logging.DEBUG)
505
506
507 if __name__ == "__main__":
508
509 LogConf.setup()
510
511 conffn = "/etc/shbackup.conf"
512 cmd = "list"
513 mode = None
514 epoch = None
515
516 i = 0
517 while i < len(sys.argv)-1:
518 i += 1
519 opt = sys.argv[i]
520
521 if opt in ["-h", "--help"]:
522 printUsage()
523 exit(0)
524
525 elif opt in ["-c", "--conf"]:
526 i += 1
527 conffn = sys.argv[i]
528
529 elif opt in ["-V", "--version"]:
530 print("shbackup " + __version__)
531 exit(0)
532
533 elif opt in ["-v", "--verbose"]:
534 LogConf.con.setLevel(logging.INFO)
535
536 elif opt in ["--verbosity"]:
537 i += 1
538 level = sys.argv[i]
539 numlevel = getattr(logging, level.upper(), None)
540 if not isinstance(numlevel, int):
541 raise ValueError('Invalid verbosity level: %s' % level)
542 LogConf.con.setLevel(numlevel)
543
544 elif opt in ["-m", "--mode"]:
545 i += 1
546 mode = sys.argv[i]
547 if not mode in Mode:
548 logging.error("Unknown mode '" + mode + "'.")
549 exit(1)
550
551 elif opt in ["-e", "--epoch"]:
552 i += 1
553 epoch = sys.argv[i]
554 if not epoch in Epoch:
555 logging.error("Unknown epoch '" + epoch + "'.")
556 exit(1)
557
558
559 elif opt in ["backup", "list", "prune"]:
560 cmd = opt
561
562 else:
563 logging.error("Unknown option: " + opt)
564 exit(1)
565
566 try:
567 man = BackupManager(conffn)
568
569 if cmd == "backup":
570 man.backup(epoch, mode)
571
572 if cmd == "list":
573 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
574 print(b.colAlignedString())
575
576 if cmd == "prune":
577 man.prune()
578
579 except (Config.ReadError, configparser.DuplicateOptionError) as e:
580 logging.error("Error reading config file: " + e.message)
581
582
583
584