Adding MIT License
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """A simple backup solution."""
3
4 __version__ = "0.1"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess, fcntl, select
12 import random, re
13 import logging
14
15
16 Modes = ["full", "incr", "diff"]
17
18 class Epoch:
19
20 units = {
21 "hour" : datetime.timedelta(0, 3600),
22 "day" : datetime.timedelta(1),
23 "week" : datetime.timedelta(7),
24 "month" : datetime.timedelta(31),
25 "year" : datetime.timedelta(365) }
26
27 def __init__(self, unit=None, mult=1, mode="full", numkeeps=None):
28 self.unit = unit
29 self.mult = mult
30 self.mode = mode
31 self.numkeeps = numkeeps
32 self.excludes = []
33
34 def __repr__(self):
35 return "[unit: " + repr(self.unit) + \
36 ", mult:" + repr(self.mult) + \
37 ", mode: " + repr(self.mode) + \
38 ", numkeeps: " + repr(self.numkeeps) + \
39 ", excludes: " + repr(self.excludes) + "]"
40
41 def getTimeDelta(self):
42 if self.unit == None:
43 return None
44 return self.mult*Epoch.units[self.unit]
45
46 def isRipe(self, oldest, now):
47
48 if self.unit==None:
49 return True
50
51 delta = now-oldest
52 mult = self.mult
53
54 if delta >= self.getTimeDelta():
55 return True
56
57 if self.unit == "hour":
58 return abs(now.hour - oldest.hour) >= mult
59 elif self.unit == "day":
60 return abs(now.day - oldest.day) >= mult
61 elif self.unit == "week":
62 return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult
63 elif self.unit == "month":
64 return abs(now.month - oldest.month) >= mult
65 elif self.unit == "year":
66 return abs(now.year - oldest.year) >= mult
67
68 return None
69
70
71 @staticmethod
72 def parseTimedelta( deltastr ):
73 tokens = [ s.strip() for s in deltastr.split("*") ]
74 unit = None
75 mult = 1
76 if len(tokens) == 1:
77 unit = tokens[0]
78 elif len(tokens) == 2:
79 mult = int(tokens[0])
80 unit = tokens[1]
81 else:
82 raise ValueError("Invalid format: '{0}'".format(deltastr))
83
84 if not unit in Epoch.units:
85 raise ValueError("Unknown unit '{0}'".format(unit))
86
87 if mult <= 0:
88 raise ValueError("Non-positive factor '{0}' given.".format(mult))
89
90 return mult, unit
91
92
93
94 class FileSet:
95 """A fileset has a name and a list of directories."""
96 def __init__(self, name, dirs, excludes):
97 self.name = name
98 self.dirs = dirs
99 self.excludes = excludes
100
101 def __repr__(self):
102 return "[name: " + self.name + \
103 ", dirs: " + str(self.dirs) + \
104 ", excludes: " + str(self.excludes) + "]"
105
106
107 class Backup:
108 """A single backup has a date, an epoch and a mode."""
109
110 def __init__(self, date, epoch, mode):
111 self.date = date
112 self.epoch = epoch
113 self.mode = mode
114 self.excludes = []
115
116 @staticmethod
117 def fromDirName(dirname):
118 [strdate, strtime, epoch, mode] = dirname.split("-")
119
120 if not mode in Modes:
121 raise ValueError("Invalid mode: " + mode)
122
123 date = datetime.datetime(int(strdate[0:4]),
124 int(strdate[4:6]), int(strdate[6:8]),\
125 int(strtime[0:2]), int(strtime[2:4]))
126
127 return Backup(date, epoch, mode)
128
129 def __repr__(self):
130 return "[date: " + self.date.ctime() + \
131 ", epoch: " + self.epoch + \
132 ", mode: " + self.mode + "]"
133
134 def colAlignedString(self):
135 age = datetime.datetime.now() - self.date
136 total_hours = age.total_seconds()/3600
137 if total_hours <= 48:
138 agestr = "(%s h)" % int(total_hours)
139 else:
140 agestr = "(%s d)" % age.days
141 return "%16s %7s %10s %4s" % (
142 self.date.strftime("%Y-%m-%d %H:%M"), agestr,
143 self.epoch, self.mode)
144
145 @staticmethod
146 def getDirName(date, epoch, mode):
147 """Get directory name of backup by given properties."""
148 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
149
150 @staticmethod
151 def isBackupDir(dirname):
152 """Is directory a backup directory?"""
153 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
154 return p.match(dirname)
155
156
157
158 class Config:
159 """Encapsules the configuration for the backup program."""
160
161 class ReadError(RuntimeError):
162 """An exception raised when reading configurations."""
163 def __init__(self, value):
164 self.value = value
165 self.message = value
166
167
168 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
169
170 # Filename where checksum of config is saved
171 checksumfn = "checksum"
172
173 def __init__(self):
174 self.backupdir = None
175 self.format = self.formats[1]
176 self.tarbin = "/bin/tar"
177 self.excludes = []
178 self.sets = []
179 self.checksum = None
180 self.lastchecksum = None
181 self.epochs = Epochs = { "sporadic" : Epoch() }
182
183
184 def __repr__(self):
185 return "[backupdir: " + self.backupdir + \
186 ", format: " + self.format + \
187 ", tarbin: " + self.tarbin + \
188 ", excludes: " + repr(self.excludes) + \
189 ", epochs: " + repr(self.epochs) + \
190 ", sets: " + repr(self.sets) + "]"
191
192 def getRealEpochsSorted(self):
193 """Return all epochs with have a non-None unit, sorted by
194 Epoch.getTimeDelta(), starting with the longest dela."""
195 epochs = self.epochs
196 realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ]
197 deltakey = lambda e: epochs[e].getTimeDelta()
198 realepochs.sort(key=deltakey, reverse=True)
199 return realepochs
200
201
202 def _read_global(self, config, sec):
203 for opt in config.options(sec):
204 if opt=="backupdir":
205 self.backupdir = config.get(sec, opt)
206 if not os.path.isdir(self.backupdir):
207 raise Config.ReadError("Backupdir '{0}' does not exist.".format(self.backupdir))
208 elif opt=="format":
209 self.format = config.get(sec, opt)
210 if not self.format in Config.formats:
211 raise Config.ReadError("Invalid 'format' given.")
212 elif opt=="tarbin":
213 self.tarbin = config.get(sec, opt)
214 if not os.path.isfile(self.tarbin):
215 raise Config.ReadError("Tar binary '{0}' does not exist.".format(self.tarbin))
216 elif opt.startswith("exclude"):
217 self.excludes += [ config.get(sec, opt) ]
218 else:
219 raise Config.ReadError("Unknown option '{0}'.".format(opt))
220
221
222 def _read_epoch(self, config, sec):
223 name = sec[6:].strip()
224 e = Epoch()
225 if name in self.epochs:
226 raise Config.ReadError("Epoch '{0}' already defined.".format(name))
227 if name in Epoch.units:
228 e.unit = name
229
230 for opt in config.options(sec):
231 if opt=="numkeeps":
232 try:
233 e.numkeeps = int(config.getint(sec, opt))
234 except ValueError:
235 raise Config.ReadError("Invalid integer given for '{0}'.".format(opt))
236 if e.numkeeps <= 0:
237 raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps))
238
239 elif opt=="mode":
240 e.mode = config.get(sec, opt)
241 if not e.mode in Modes:
242 raise Config.ReadError("Invalid mode '{0}'.".format(e.mode))
243
244 elif opt=="timespan":
245 if name in Epoch.units:
246 raise Config.ReadError("The time delta of a standard epoch " + \
247 "is not supposed to be redefined. ")
248 td = config.get(sec,opt)
249 try:
250 mult, unit = Epoch.parseTimedelta(td)
251 e.unit = unit
252 e.mult = mult
253 except ValueError as e:
254 raise Config.ReadError("Invalid timespan '{0}': {1}".format(td, str(e)))
255
256 elif opt.startswith("exclude"):
257 e.excludes += [config.get(sec, opt)]
258
259 else:
260 raise Config.ReadError("Unknown option '" + opt + "'.")
261
262 if e.numkeeps == None:
263 raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name))
264
265 self.epochs[name] = e
266
267
268 def _read_set(self, config, sec):
269 name = sec[4:].strip()
270 dirs = []
271 excludes = []
272
273 for opt in config.options(sec):
274 if opt.startswith("dir"):
275 dirs += [config.get(sec, opt)]
276 elif opt.startswith("exclude"):
277 excludes += [config.get(sec,opt)]
278 else:
279 raise Config.ReadError("Unknown option '" + opt + "'.")
280
281 self.sets += [FileSet(name, dirs, excludes)]
282
283
284 def read(self, filename):
285 """Read configuration from file"""
286
287 if not os.path.isfile(filename):
288 raise Config.ReadError("Cannot read config file '" + filename + "'.")
289
290 config = configparser.RawConfigParser()
291 config.read(filename)
292
293 for reqsec in ["global"]:
294 if not config.has_section(reqsec):
295 raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.")
296
297 for sec in config.sections():
298
299 if sec=="global":
300 self._read_global(config, sec)
301
302 elif sec.startswith("epoch "):
303 self._read_epoch(config, sec)
304
305 elif sec.startswith("set "):
306 self._read_set(config, sec)
307
308 else:
309 raise Config.ReadError("Unknown section '" + sec + "'.")
310
311 if self.backupdir == None:
312 raise Config.ReadError("No backup directory set.")
313
314
315 # Compute checksum of config file
316 m = hashlib.sha1()
317 f = open(filename, 'rb')
318 try:
319 m.update(f.read())
320 self.checksum = m.hexdigest()
321 finally:
322 f.close()
323
324 try:
325 f = open(os.path.join(self.backupdir, self.checksumfn), 'r')
326 self.lastchecksum = f.read().strip()
327 f.close()
328 except IOError:
329 self.lastchecksum = None
330
331
332 class BackupManager:
333 """List and create backups"""
334
335 def __init__(self, conffn):
336 self.conf = Config()
337 self.conf.read(conffn)
338
339
340 def listAllDirs(self):
341 """List all dirs in backupdir"""
342
343 # Get all entries
344 basedir = self.conf.backupdir
345 dirs = os.listdir(basedir)
346 # Filter directories
347 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
348
349
350 def listOldBackups(self):
351 """Returns a list of old backups."""
352
353 backups = []
354
355 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
356 backups += [ Backup.fromDirName(entry) ]
357
358 return backups
359
360
361 def getDesiredEpochs(self, backups, now):
362 """Get desired epoch based on self.configuration and list of old backups"""
363
364 # Find the longest epoch for which we would like the make a backup
365 latest = datetime.datetime(1900, 1, 1)
366 for e in self.conf.getRealEpochsSorted():
367 epoch = self.conf.epochs[e]
368 if epoch.numkeeps <= 0:
369 continue
370
371 # Get backups of that epoch
372 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
373 key=lambda b: b.date))
374
375 # If there are any, determine the latest
376 if len(byepoch) > 0:
377 latest = max(latest, byepoch[-1].date )
378
379 if epoch.isRipe(latest, now):
380 return e
381
382 # No backup is to be made
383 return None
384
385
386
387 def backupFileSet(self, fileset, targetdir, excludes, since=None):
388 """Create an archive for given fileset at given target directory."""
389
390 logfile = logging.getLogger('backuplog')
391 logfile.info("Running file set: " + fileset.name)
392
393 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
394 taropts = []
395
396 # Add the since date, if given
397 if since != None:
398 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
399
400 # Add the exclude patterns
401 for pat in excludes:
402 taropts += ["--exclude", pat]
403
404 #Add exclude patterns from fileset
405 for pat in fileset.excludes:
406 taropts += ["--exclude", pat]
407
408 # Adding directories to backup
409 taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
410
411 # Launch the tar process
412 tarargs = [self.conf.tarbin] + ["-cpvaf", fsfn] + taropts
413 logfile.debug("tar call: " + " ".join(tarargs))
414 tarp = subprocess.Popen( tarargs, bufsize=-1, \
415 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
416
417 # Change tarp's stdout and stderr to non-blocking
418 for s in [tarp.stdout, tarp.stderr]:
419 fd = s.fileno()
420 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
421 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
422
423 # Read stdout and stderr of tarp
424 errmsg = b""
425 while tarp.poll() == None:
426 rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
427 if tarp.stdout in rd:
428 logging.debug( tarp.stdout.readline()[:-1].decode() )
429 if tarp.stderr in rd:
430 errmsg += tarp.stderr.read()
431
432 # Get the remainging output of tarp
433 for l in tarp.stdout.readlines():
434 logging.debug(l.decode().rstrip())
435 errmsg += tarp.stderr.read()
436
437 # Get return code of tarp
438 rett = tarp.wait()
439 if rett != 0:
440 for l in errmsg.decode().split("\n"):
441 logfile.error(l)
442 logfile.error(self.conf.tarbin + " returned with exit status " + \
443 str(rett) + ".")
444
445
446 def backup(self, epoch=None, mode=None):
447 """Make a new backup, if necessary. If epoch is None then determine
448 desired epoch automatically. Use given epoch otherwise. If mode is None
449 then use mode for given epoch. Use given mode otherwise."""
450
451 now = datetime.datetime.now()
452 oldbackups = self.listOldBackups()
453
454 # Get epoch of backup
455 if epoch == None:
456 epoch = self.getDesiredEpochs(oldbackups, now)
457 if epoch == None:
458 logging.info("No backup planned.")
459 return
460
461 # Get mode of backup
462 if mode == None:
463 mode = self.conf.epochs[epoch].mode
464 logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode)
465
466 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
467
468 # No old full backups existing
469 if mode != "full" and len(oldfullbackups)==0:
470 logging.info("No full backups existing. Making a full backup.")
471
472 # Checksum changed -> self.config file changed
473 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
474 logging.warning("Full backup recommended as config file has changed.")
475
476
477 # If we have a full backup, we backup everything
478 since = None
479 if mode == "diff":
480 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
481 elif mode == "incr":
482 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
483
484 if since != None:
485 logging.debug("Making backup relative to " + since.ctime())
486
487 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
488 if yesno == "n":
489 return
490
491 # Create new backup directory
492 basedir = self.conf.backupdir
493 dirname = Backup.getDirName(now, epoch, mode)
494 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
495 targetdir = os.path.join(basedir, tmpdirname)
496 os.mkdir( targetdir )
497
498
499 # Add file logger
500 logfile = logging.getLogger("backuplog")
501 fil = logging.FileHandler( os.path.join(targetdir, "log") )
502 fil.setLevel(logging.DEBUG)
503 logfile.addHandler(fil)
504
505 logfile.info("Started: " + now.ctime())
506
507 # Backup all file sets
508 for s in self.conf.sets:
509 excludes = self.conf.excludes + self.conf.epochs[epoch].excludes
510 self.backupFileSet(s, targetdir, excludes, since)
511
512 logfile.info("Stopped: " + datetime.datetime.now().ctime())
513
514 # Rename backup directory to final name
515 os.rename( targetdir, os.path.join(basedir, dirname) )
516
517 # We made a full backup -- recall checksum of config
518 if mode == "full":
519 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
520 f.write( self.conf.checksum )
521 f.close()
522
523
524
525 def prune(self):
526 """Prune old backup files"""
527
528 allDirs = sorted(self.listAllDirs())
529 # Collect all directories not matching backup name
530 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
531
532 # Get all directories which are kept
533 backups = self.listOldBackups()
534 keepdirs = []
535 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
536 key=lambda b : b.date, reverse=True)) for e in self.conf.getRealEpochsSorted() }
537 for e in byepoch:
538 epoch = self.conf.epochs[e]
539 old = byepoch[e][epoch.numkeeps:]
540 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
541
542
543 logging.info("List of stale/outdated entries:")
544 for d in allDirs:
545 msg = ""
546 if d in removeDirs:
547 msg = "[*] "
548 else:
549 msg = "[ ] "
550
551 if Backup.isBackupDir(d):
552 msg += Backup.fromDirName(d).colAlignedString()
553 else:
554 msg += d
555
556 logging.info(msg)
557
558 # Check that dirs to be removed is in list of all dirs
559 for d in removeDirs:
560 assert( d in allDirs )
561
562 if len(removeDirs) == 0:
563 logging.info("No stale/outdated entries to remove.")
564 return
565
566 basedir = self.conf.backupdir
567 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
568 if yesno == "y":
569 for d in removeDirs:
570 try:
571 shutil.rmtree(os.path.join(basedir, d))
572 except OSError as e:
573 logging.error("Error when removing '%s': %s" % (d,e.strerror) )
574
575
576 def ask_user_yesno(self, question):
577 if LogConf.con.level <= logging.INFO:
578 return input(question)
579 else:
580 return "y"
581
582
583 def printUsage():
584 """Print --help text"""
585
586 print("shbackup - a simple backup solution.")
587 print("")
588 print("Usage:")
589 print(" " + sys.argv[0] + " {options} [cmd]")
590 print(" " + sys.argv[0] + " --help")
591 print("")
592 print("Commands:")
593 print(" backup make a new backup, if necessary")
594 print(" list list all backups (default)")
595 print(" prune prune outdated/old backups")
596 print("")
597 print("Options:")
598 print(" -h, --help print this usage text")
599 print(" -c, --conf <configfile> use given configuration file")
600 print(" default: /etc/shbackup.conf")
601 print(" -e, --epoch <epoch> force to create backup for given epoch, which")
602 print(" can be 'sporadic' or one of the configured epochs")
603 print(" -m, --mode <mode> override mode: full, diff, or incr")
604 print(" -v, --verbose be more verbose and interact with user")
605 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
606 print(" error, warning, info, debug")
607 print(" -V, --version print version info")
608
609
610
611 class LogConf:
612 """Encapsulates logging configuration"""
613
614 con = logging.StreamHandler(sys.stderr)
615
616 @classmethod
617 def setup(cls):
618 """Setup logging system"""
619 conlog = logging.getLogger()
620 conlog.setLevel(logging.DEBUG)
621
622 cls.con.setLevel(logging.WARNING)
623 conlog.addHandler(cls.con)
624
625 fillog = logging.getLogger("backuplog")
626 fillog.setLevel(logging.DEBUG)
627
628
629 if __name__ == "__main__":
630
631 LogConf.setup()
632
633 conffn = "/etc/shbackup.conf"
634 cmd = "list"
635 mode = None
636 epoch = None
637
638 i = 0
639 while i < len(sys.argv)-1:
640 i += 1
641 opt = sys.argv[i]
642
643 if opt in ["-h", "--help"]:
644 printUsage()
645 exit(0)
646
647 elif opt in ["-c", "--conf"]:
648 i += 1
649 conffn = sys.argv[i]
650
651 elif opt in ["-V", "--version"]:
652 print("shbackup " + __version__)
653 exit(0)
654
655 elif opt in ["-v", "--verbose"]:
656 LogConf.con.setLevel(logging.INFO)
657
658 elif opt in ["--verbosity"]:
659 i += 1
660 level = sys.argv[i]
661 numlevel = getattr(logging, level.upper(), None)
662 if not isinstance(numlevel, int):
663 raise ValueError('Invalid verbosity level: %s' % level)
664 LogConf.con.setLevel(numlevel)
665
666 elif opt in ["-m", "--mode"]:
667 i += 1
668 mode = sys.argv[i]
669 if not mode in Modes:
670 logging.error("Unknown mode '" + mode + "'.")
671 exit(1)
672
673 elif opt in ["-e", "--epoch"]:
674 i += 1
675 epoch = sys.argv[i]
676
677 elif opt in ["backup", "list", "prune"]:
678 cmd = opt
679
680 else:
681 logging.error("Unknown option: " + opt)
682 exit(1)
683
684 try:
685 man = BackupManager(conffn)
686
687 logging.debug("Config: " + str(man.conf))
688
689 if epoch!=None and not epoch in man.conf.epochs.keys():
690 logging.error("Unknown epoch '" + epoch + "'.")
691 exit(1)
692
693 if cmd == "backup":
694 man.backup(epoch, mode)
695
696 if cmd == "list":
697 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
698 print(b.colAlignedString())
699
700 if cmd == "prune":
701 man.prune()
702
703 except (Config.ReadError, configparser.Error) as e:
704 logging.error("Error: " + e.message)
705
706
707
708