Formatting
[sitarba.git] / sitarba
1 #!/usr/bin/python3
2 """A simple backup solution."""
3
4 __version__ = "2.0"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess, fcntl, select
12 import random, re
13 import logging
14
15
16 Modes = ["full", "incr", "diff"]
17
18
19 class Options:
20 dryrun = False
21
22
23 class Epoch:
24
25 units = {
26 "hour" : datetime.timedelta(0, 3600),
27 "day" : datetime.timedelta(1),
28 "week" : datetime.timedelta(7),
29 "month" : datetime.timedelta(31),
30 "year" : datetime.timedelta(365) }
31
32 def __init__(self, unit=None, mult=1, mode="full", numkeeps=None):
33 self.unit = unit
34 self.mult = mult
35 self.mode = mode
36 self.numkeeps = numkeeps
37 self.excludes = []
38
39 def __repr__(self):
40 return "[unit: " + repr(self.unit) + \
41 ", mult:" + repr(self.mult) + \
42 ", mode: " + repr(self.mode) + \
43 ", numkeeps: " + repr(self.numkeeps) + \
44 ", excludes: " + repr(self.excludes) + "]"
45
46 def getTimeDelta(self):
47 if self.unit == None:
48 return None
49 return self.mult*Epoch.units[self.unit]
50
51 def isRipe(self, oldest, now):
52
53 if self.unit == None:
54 return True
55
56 delta = now-oldest
57 mult = self.mult
58
59 if delta >= self.getTimeDelta():
60 return True
61
62 if self.unit == "hour":
63 return abs(now.hour - oldest.hour) >= mult
64 elif self.unit == "day":
65 return abs(now.day - oldest.day) >= mult
66 elif self.unit == "week":
67 return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult
68 elif self.unit == "month":
69 return abs(now.month - oldest.month) >= mult
70 elif self.unit == "year":
71 return abs(now.year - oldest.year) >= mult
72
73 return None
74
75
76 @staticmethod
77 def parseTimedelta( deltastr ):
78 tokens = [ s.strip() for s in deltastr.split("*") ]
79 unit = None
80 mult = 1
81 if len(tokens) == 1:
82 unit = tokens[0]
83 elif len(tokens) == 2:
84 mult = int(tokens[0])
85 unit = tokens[1]
86 else:
87 raise ValueError("Invalid format: '{0}'".format(deltastr))
88
89 if not unit in Epoch.units:
90 raise ValueError("Unknown unit '{0}'".format(unit))
91
92 if mult <= 0:
93 raise ValueError("Non-positive factor '{0}' given.".format(mult))
94
95 return mult, unit
96
97
98
99 class FileSet:
100 """A fileset has a name and a list of directories."""
101 def __init__(self, name, dirs, excludes):
102 self.name = name
103 self.dirs = dirs
104 self.excludes = excludes
105
106 def __repr__(self):
107 return "[name: " + self.name + \
108 ", dirs: " + str(self.dirs) + \
109 ", excludes: " + str(self.excludes) + "]"
110
111
112 class Backup:
113 """A single backup has a date, an epoch and a mode."""
114
115 def __init__(self, date, epoch, mode):
116 self.date = date
117 self.epoch = epoch
118 self.mode = mode
119 self.excludes = []
120
121 @staticmethod
122 def fromDirName(dirname):
123 [strdate, strtime, epoch, mode] = dirname.split("-")
124
125 if not mode in Modes:
126 raise ValueError("Invalid mode: " + mode)
127
128 date = datetime.datetime(int(strdate[0:4]),
129 int(strdate[4:6]), int(strdate[6:8]),\
130 int(strtime[0:2]), int(strtime[2:4]))
131
132 return Backup(date, epoch, mode)
133
134 def __repr__(self):
135 return "[date: " + self.date.ctime() + \
136 ", epoch: " + self.epoch + \
137 ", mode: " + self.mode + "]"
138
139 def colAlignedString(self):
140 age = datetime.datetime.now() - self.date
141 total_hours = age.total_seconds()/3600
142 if total_hours <= 48:
143 agestr = "(%s h)" % int(total_hours)
144 else:
145 agestr = "(%s d)" % age.days
146 return "%16s %7s %10s %4s" % (
147 self.date.strftime("%Y-%m-%d %H:%M"), agestr,
148 self.epoch, self.mode)
149
150 @staticmethod
151 def getDirName(date, epoch, mode):
152 """Get directory name of backup by given properties."""
153 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
154
155 @staticmethod
156 def isBackupDir(dirname):
157 """Is directory a backup directory?"""
158 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
159 return p.match(dirname)
160
161
162
163 class Config:
164 """Encapsules the configuration for the backup program."""
165
166 class ReadError(RuntimeError):
167 """An exception raised when reading configurations."""
168 def __init__(self, value):
169 self.value = value
170 self.message = value
171
172
173 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
174
175 # Filename where checksum of config is saved
176 checksumfn = "checksum"
177
178 def __init__(self):
179 self.backupdir = None
180 self.format = self.formats[1]
181 self.tarbin = "/bin/tar"
182 self.excludes = []
183 self.sets = []
184 self.checksum = None
185 self.lastchecksum = None
186 self.epochs = { "sporadic" : Epoch() }
187
188
189 def __repr__(self):
190 return "[backupdir: " + self.backupdir + \
191 ", format: " + self.format + \
192 ", tarbin: " + self.tarbin + \
193 ", excludes: " + repr(self.excludes) + \
194 ", epochs: " + repr(self.epochs) + \
195 ", sets: " + repr(self.sets) + "]"
196
197 def getRealEpochsSorted(self):
198 """Return all epochs with have a non-None unit, sorted by
199 Epoch.getTimeDelta(), starting with the longest dela."""
200 epochs = self.epochs
201 realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ]
202 deltakey = lambda e: epochs[e].getTimeDelta()
203 realepochs.sort(key=deltakey, reverse=True)
204 return realepochs
205
206
207 def _read_global(self, config, sec):
208 for opt in config.options(sec):
209 if opt == "backupdir":
210 self.backupdir = config.get(sec, opt)
211 if not os.path.isdir(self.backupdir):
212 raise Config.ReadError("Backupdir '{0}' does not exist.".format(self.backupdir))
213 elif opt == "format":
214 self.format = config.get(sec, opt)
215 if not self.format in Config.formats:
216 raise Config.ReadError("Invalid 'format' given.")
217 elif opt == "tarbin":
218 self.tarbin = config.get(sec, opt)
219 if not os.path.isfile(self.tarbin):
220 raise Config.ReadError("Tar binary '{0}' does not exist.".format(self.tarbin))
221 elif opt.startswith("exclude"):
222 self.excludes += [ config.get(sec, opt) ]
223 else:
224 raise Config.ReadError("Unknown option '{0}'.".format(opt))
225
226
227 def _read_epoch(self, config, sec):
228 name = sec[6:].strip()
229 e = Epoch()
230 if name in self.epochs:
231 raise Config.ReadError("Epoch '{0}' already defined.".format(name))
232 p = re.compile(r'^\w+$')
233 if not p.match(name):
234 raise Config.ReadError("Epoch name '{0}' does not only " + \
235 "comprise alphanumeric characters.".format(name))
236 if name in Epoch.units:
237 e.unit = name
238
239 for opt in config.options(sec):
240 if opt == "numkeeps":
241 try:
242 e.numkeeps = int(config.getint(sec, opt))
243 except ValueError:
244 raise Config.ReadError("Invalid integer given for '{0}'.".format(opt))
245 if e.numkeeps <= 0:
246 raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps))
247
248 elif opt == "mode":
249 e.mode = config.get(sec, opt)
250 if not e.mode in Modes:
251 raise Config.ReadError("Invalid mode '{0}'.".format(e.mode))
252
253 elif opt == "timespan":
254 if name in Epoch.units:
255 raise Config.ReadError("The time delta of a standard epoch " + \
256 "is not supposed to be redefined. ")
257 td = config.get(sec, opt)
258 try:
259 mult, unit = Epoch.parseTimedelta(td)
260 e.unit = unit
261 e.mult = mult
262 except ValueError as e:
263 raise Config.ReadError("Invalid timespan '{0}': {1}".format(td, str(e)))
264
265 elif opt.startswith("exclude"):
266 e.excludes += [config.get(sec, opt)]
267
268 else:
269 raise Config.ReadError("Unknown option '" + opt + "'.")
270
271 if e.numkeeps == None:
272 raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name))
273
274 self.epochs[name] = e
275
276
277 def _read_set(self, config, sec):
278 name = sec[4:].strip()
279 p = re.compile(r'^\w+$')
280 if not p.match(name):
281 raise Config.ReadError("Set name '{0}' does not only " + \
282 "comprise alphanumeric characters.".format(name))
283
284 dirs = []
285 excludes = []
286
287 for opt in config.options(sec):
288 if opt.startswith("dir"):
289 dirs += [config.get(sec, opt)]
290 elif opt.startswith("exclude"):
291 excludes += [config.get(sec, opt)]
292 else:
293 raise Config.ReadError("Unknown option '" + opt + "'.")
294
295 self.sets += [FileSet(name, dirs, excludes)]
296
297
298 def read(self, filename):
299 """Read configuration from file"""
300
301 if not os.path.isfile(filename):
302 raise Config.ReadError("Cannot read config file '" + filename + "'.")
303
304 config = configparser.RawConfigParser()
305 config.read(filename)
306
307 for reqsec in ["global"]:
308 if not config.has_section(reqsec):
309 raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.")
310
311 for sec in config.sections():
312
313 if sec == "global":
314 self._read_global(config, sec)
315
316 elif sec.startswith("epoch "):
317 self._read_epoch(config, sec)
318
319 elif sec.startswith("set "):
320 self._read_set(config, sec)
321
322 else:
323 raise Config.ReadError("Unknown section '" + sec + "'.")
324
325 if self.backupdir == None:
326 raise Config.ReadError("No backup directory set.")
327
328
329 # Compute checksum of config file
330 m = hashlib.sha1()
331 f = open(filename, 'rb')
332 try:
333 m.update(f.read())
334 self.checksum = m.hexdigest()
335 finally:
336 f.close()
337
338 try:
339 f = open(os.path.join(self.backupdir, self.checksumfn), 'r')
340 self.lastchecksum = f.read().strip()
341 f.close()
342 except IOError:
343 self.lastchecksum = None
344
345
346 class BackupManager:
347 """List and create backups"""
348
349 def __init__(self, conffn):
350 self.conf = Config()
351 self.conf.read(conffn)
352
353
354 def listAllDirs(self):
355 """List all dirs in backupdir"""
356
357 # Get all entries
358 basedir = self.conf.backupdir
359 dirs = os.listdir(basedir)
360 # Filter directories
361 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
362
363
364 def listExistingBackups(self):
365 """Returns a list of old backups."""
366
367 backups = []
368
369 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
370 backups += [ Backup.fromDirName(entry) ]
371
372 return backups
373
374
375 def getDesiredEpochs(self, backups, now):
376 """Get desired epoch based on self.configuration and list of old backups"""
377
378 # Find the longest epoch for which we would like the make a backup
379 latest = datetime.datetime(1900, 1, 1)
380 for e in self.conf.getRealEpochsSorted():
381 epoch = self.conf.epochs[e]
382 if epoch.numkeeps <= 0:
383 continue
384
385 # Get backups of that epoch
386 byepoch = list(sorted( [ b for b in backups if b.epoch == e], \
387 key=lambda b: b.date))
388
389 # If there are any, determine the latest
390 if len(byepoch) > 0:
391 latest = max(latest, byepoch[-1].date )
392
393 if epoch.isRipe(latest, now):
394 return e
395
396 # No backup is to be made
397 return None
398
399
400
401 def backupFileSet(self, fileset, targetdir, excludes, since=None):
402 """Create an archive for given fileset at given target directory."""
403
404 logfile = logging.getLogger('backuplog')
405 logfile.info("Running file set: " + fileset.name)
406
407 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
408 taropts = []
409
410 # Tar is verbose is sitarba is verbose
411 if LogConf.con.level <= logging.DEBUG:
412 taropts += ["--verbose"]
413
414 # Add the since date, if given
415 if since != None:
416 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
417
418 # Add the exclude patterns
419 for pat in excludes:
420 taropts += ["--exclude", pat]
421
422 #Add exclude patterns from fileset
423 for pat in fileset.excludes:
424 taropts += ["--exclude", pat]
425
426
427 # Adding directories to backup
428 taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
429
430 # Launch the tar process
431 tarargs = [self.conf.tarbin] + ["-cpaf", fsfn] + taropts
432 logfile.debug("tar call: " + " ".join(tarargs))
433 tarp = subprocess.Popen( tarargs, bufsize=-1, \
434 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
435
436 # Change tarp's stdout and stderr to non-blocking
437 for s in [tarp.stdout, tarp.stderr]:
438 fd = s.fileno()
439 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
440 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
441
442 # Read stdout and stderr of tarp
443 errmsg = b""
444 while tarp.poll() == None:
445 rd, wr, ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
446 if tarp.stdout in rd:
447 logging.debug( tarp.stdout.readline()[:-1].decode() )
448 if tarp.stderr in rd:
449 errmsg += tarp.stderr.read()
450
451 # Get the remainging output of tarp
452 for l in tarp.stdout.readlines():
453 logging.debug(l.decode().rstrip())
454 errmsg += tarp.stderr.read()
455
456 # Get return code of tarp
457 rett = tarp.wait()
458 if rett != 0:
459 for l in errmsg.decode().split("\n"):
460 logfile.error(l)
461 logfile.error(self.conf.tarbin + " returned with exit status " + \
462 str(rett) + ".")
463
464
465 def backup(self, epoch=None, mode=None):
466 """Make a new backup, if necessary. If epoch is None then determine
467 desired epoch automatically. Use given epoch otherwise. If mode is None
468 then use mode for given epoch. Use given mode otherwise."""
469
470 now = datetime.datetime.now()
471 oldbackups = self.listExistingBackups()
472
473 # Get epoch of backup
474 if epoch == None:
475 epoch = self.getDesiredEpochs(oldbackups, now)
476 if epoch == None:
477 logging.info("No backup planned.")
478 return
479
480 # Get mode of backup
481 if mode == None:
482 mode = self.conf.epochs[epoch].mode
483 logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode)
484
485 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
486
487 # No old full backups existing
488 if mode != "full" and len(oldfullbackups) == 0:
489 logging.info("No full backups existing. Making a full backup.")
490
491 # Checksum changed -> self.config file changed
492 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
493 logging.warning("Full backup recommended as config file has changed.")
494
495
496 # If we have a full backup, we backup everything
497 since = None
498 if mode == "diff":
499 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
500 elif mode == "incr":
501 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
502
503 if since != None:
504 logging.debug("Making backup relative to " + since.ctime())
505
506 if Options.dryrun:
507 return
508
509 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
510 if yesno == "n":
511 return
512
513 # Create new backup directory
514 basedir = self.conf.backupdir
515 dirname = Backup.getDirName(now, epoch, mode)
516 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
517 targetdir = os.path.join(basedir, tmpdirname)
518 os.mkdir(targetdir)
519
520
521 # Add file logger
522 logfile = logging.getLogger("backuplog")
523 fil = logging.FileHandler(os.path.join(targetdir, "log"))
524 fil.setLevel(logging.DEBUG)
525 logfile.addHandler(fil)
526
527 logfile.info("Started: " + now.ctime())
528
529 # Backup all file sets
530 for s in self.conf.sets:
531 excludes = self.conf.excludes + self.conf.epochs[epoch].excludes
532 self.backupFileSet(s, targetdir, excludes, since)
533
534 logfile.info("Stopped: " + datetime.datetime.now().ctime())
535
536 # Rename backup directory to final name
537 os.rename( targetdir, os.path.join(basedir, dirname) )
538
539 # We made a full backup -- recall checksum of config
540 if mode == "full":
541 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
542 f.write( self.conf.checksum )
543 f.close()
544
545
546
547 def prune(self):
548 """Prune old backup files"""
549
550 allDirs = sorted(self.listAllDirs())
551 # Collect all directories that are removed
552 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
553
554 # Get all backups
555 backups = self.listExistingBackups()
556 # Group backups by epoch and sort them by age
557 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
558 key=lambda b : b.date, reverse=True)) \
559 for e in self.conf.getRealEpochsSorted() }
560 # If we have too many backups of a specific epoch --> add them to remove list
561 for e in byepoch:
562 epoch = self.conf.epochs[e]
563 old = byepoch[e][epoch.numkeeps:]
564 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
565
566
567 logging.info("List of stale/outdated entries:")
568 for d in allDirs:
569 msg = ""
570 if d in removeDirs:
571 msg = "[*] "
572 else:
573 msg = "[ ] "
574
575 if Backup.isBackupDir(d):
576 msg += Backup.fromDirName(d).colAlignedString()
577 else:
578 msg += d
579
580 logging.info(msg)
581
582 # Check that dirs to be removed is in list of all dirs
583 for d in removeDirs:
584 assert( d in allDirs )
585
586 if len(removeDirs) == 0:
587 logging.info("No stale/outdated entries to remove.")
588 return
589
590 if Options.dryrun:
591 return
592
593 basedir = self.conf.backupdir
594 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
595 if yesno == "y":
596 for d in removeDirs:
597 try:
598 shutil.rmtree(os.path.join(basedir, d))
599 except OSError as e:
600 logging.error("Error when removing '%s': %s" % (d, e.strerror) )
601
602
603 def ask_user_yesno(self, question):
604 if LogConf.con.level <= logging.INFO:
605 return input(question)
606 else:
607 return "y"
608
609
610 def printUsage():
611 """Print --help text"""
612
613 print("sitarba - a simple backup solution.")
614 print("")
615 print("Usage:")
616 print(" " + sys.argv[0] + " {options} [cmd]")
617 print(" " + sys.argv[0] + " --help")
618 print("")
619 print("Commands:")
620 print(" backup make a new backup, if necessary")
621 print(" list list all backups (default)")
622 print(" prune prune outdated/old backups")
623 print("")
624 print("Options:")
625 print(" -h, --help print this usage text")
626 print(" -c, --conf FILE use given configuration file")
627 print(" default: /etc/sitarba.conf")
628 print(" -e, --epoch EPOCH force to create backup for given epoch, which")
629 print(" can be 'sporadic' or one of the configured epochs")
630 print(" -m, --mode MODE override mode: full, diff, or incr")
631 print(" -n, --dry-run don't do anything, just tell what would be done")
632 print(" -v, --verbose be more verbose and interact with user")
633 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
634 print(" error, warning, info, debug")
635 print(" -V, --version print version info")
636
637
638
639 class LogConf:
640 """Encapsulates logging configuration"""
641
642 con = logging.StreamHandler(sys.stderr)
643
644 @classmethod
645 def setup(cls):
646 """Setup logging system"""
647 conlog = logging.getLogger()
648 conlog.setLevel(logging.DEBUG)
649
650 cls.con.setLevel(logging.WARNING)
651 conlog.addHandler(cls.con)
652
653 fillog = logging.getLogger("backuplog")
654 fillog.setLevel(logging.DEBUG)
655
656
657 if __name__ == "__main__":
658
659 LogConf.setup()
660
661 conffn = "/etc/sitarba.conf"
662 cmd = "list"
663 mode = None
664 epoch = None
665
666 i = 0
667 while i < len(sys.argv)-1:
668 i += 1
669 opt = sys.argv[i]
670
671 if opt in ["-h", "--help"]:
672 printUsage()
673 exit(0)
674
675 elif opt in ["-c", "--conf"]:
676 i += 1
677 conffn = sys.argv[i]
678
679 elif opt in ["-V", "--version"]:
680 print("sitarba " + __version__)
681 exit(0)
682
683 elif opt in ["-v", "--verbose"]:
684 LogConf.con.setLevel(logging.INFO)
685
686 elif opt in ["--verbosity"]:
687 i += 1
688 level = sys.argv[i]
689 numlevel = getattr(logging, level.upper(), None)
690 if not isinstance(numlevel, int):
691 raise ValueError('Invalid verbosity level: %s' % level)
692 LogConf.con.setLevel(numlevel)
693
694 elif opt in ["-m", "--mode"]:
695 i += 1
696 mode = sys.argv[i]
697 if not mode in Modes:
698 logging.error("Unknown mode '" + mode + "'.")
699 exit(1)
700
701 elif opt in ["-n", "--dry-run"]:
702 Options.dryrun = True
703
704 elif opt in ["-e", "--epoch"]:
705 i += 1
706 epoch = sys.argv[i]
707
708 elif opt in ["backup", "list", "prune"]:
709 cmd = opt
710
711 else:
712 logging.error("Unknown option: " + opt)
713 exit(1)
714
715 try:
716 man = BackupManager(conffn)
717
718 logging.debug("Config: " + str(man.conf))
719
720 if epoch != None and not epoch in man.conf.epochs.keys():
721 logging.error("Unknown epoch '" + epoch + "'.")
722 exit(1)
723
724 if cmd == "backup":
725 man.backup(epoch, mode)
726
727 if cmd == "list":
728 for b in sorted(man.listExistingBackups(), key=lambda b: b.date):
729 print(b.colAlignedString())
730
731 if cmd == "prune":
732 man.prune()
733
734 except (Config.ReadError, configparser.Error) as e:
735 logging.error("Error: " + e.message)
736
737
738
739