]> git.sthu.org Git - sitarba.git/blob - sitarba
Close logger before dir rename
[sitarba.git] / sitarba
1 #!/usr/bin/python3
2 """A simple backup solution."""
3
4 __version__ = "2.0"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess, fcntl, select
12 import random, re
13 import logging
14
15
16 Modes = ["full", "incr", "diff"]
17
18
19 class Options:
20 dryrun = False
21
22
23 class Epoch:
24
25 units = {
26 "hour" : datetime.timedelta(0, 3600),
27 "day" : datetime.timedelta(1),
28 "week" : datetime.timedelta(7),
29 "month" : datetime.timedelta(31),
30 "year" : datetime.timedelta(365) }
31
32 def __init__(self, unit=None, mult=1, mode="full", numkeeps=None):
33 self.unit = unit
34 self.mult = mult
35 self.mode = mode
36 self.numkeeps = numkeeps
37 self.excludes = []
38
39 def __repr__(self):
40 return "[unit: " + repr(self.unit) + \
41 ", mult:" + repr(self.mult) + \
42 ", mode: " + repr(self.mode) + \
43 ", numkeeps: " + repr(self.numkeeps) + \
44 ", excludes: " + repr(self.excludes) + "]"
45
46 def getTimeDelta(self):
47 if self.unit == None:
48 return None
49 return self.mult*Epoch.units[self.unit]
50
51 def isRipe(self, oldest, now):
52
53 if self.unit == None:
54 return True
55
56 delta = now-oldest
57 mult = self.mult
58
59 if delta >= self.getTimeDelta():
60 return True
61
62 if self.unit == "hour":
63 return abs(now.hour - oldest.hour) >= mult
64 elif self.unit == "day":
65 return abs(now.day - oldest.day) >= mult
66 elif self.unit == "week":
67 return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult
68 elif self.unit == "month":
69 return abs(now.month - oldest.month) >= mult
70 elif self.unit == "year":
71 return abs(now.year - oldest.year) >= mult
72
73 return None
74
75
76 @staticmethod
77 def parseTimedelta( deltastr ):
78 tokens = [ s.strip() for s in deltastr.split("*") ]
79 unit = None
80 mult = 1
81 if len(tokens) == 1:
82 unit = tokens[0]
83 elif len(tokens) == 2:
84 mult = int(tokens[0])
85 unit = tokens[1]
86 else:
87 raise ValueError("Invalid format: '{0}'".format(deltastr))
88
89 if not unit in Epoch.units:
90 raise ValueError("Unknown unit '{0}'".format(unit))
91
92 if mult <= 0:
93 raise ValueError("Non-positive factor '{0}' given.".format(mult))
94
95 return mult, unit
96
97
98
99 class FileSet:
100 """A fileset has a name and a list of directories."""
101 def __init__(self, name, dirs, excludes):
102 self.name = name
103 self.dirs = dirs
104 self.excludes = excludes
105
106 def __repr__(self):
107 return "[name: " + self.name + \
108 ", dirs: " + str(self.dirs) + \
109 ", excludes: " + str(self.excludes) + "]"
110
111
112 class Backup:
113 """A single backup has a date, an epoch and a mode."""
114
115 def __init__(self, date, epoch, mode):
116 self.date = date
117 self.epoch = epoch
118 self.mode = mode
119 self.excludes = []
120
121 @staticmethod
122 def fromDirName(dirname):
123 [strdate, strtime, epoch, mode] = dirname.split("-")
124
125 if not mode in Modes:
126 raise ValueError("Invalid mode: " + mode)
127
128 date = datetime.datetime(int(strdate[0:4]),
129 int(strdate[4:6]), int(strdate[6:8]),\
130 int(strtime[0:2]), int(strtime[2:4]))
131
132 return Backup(date, epoch, mode)
133
134 def __repr__(self):
135 return "[date: " + self.date.ctime() + \
136 ", epoch: " + self.epoch + \
137 ", mode: " + self.mode + "]"
138
139 def colAlignedString(self):
140 age = datetime.datetime.now() - self.date
141 total_hours = age.total_seconds()/3600
142 if total_hours <= 48:
143 agestr = "(%s h)" % int(total_hours)
144 else:
145 agestr = "(%s d)" % age.days
146 return "%16s %9s %10s %4s" % (
147 self.date.strftime("%Y-%m-%d %H:%M"), agestr,
148 self.epoch, self.mode)
149
150 @staticmethod
151 def getDirName(date, epoch, mode):
152 """Get directory name of backup by given properties."""
153 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
154
155 @staticmethod
156 def isBackupDir(dirname):
157 """Is directory a backup directory?"""
158 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
159 return p.match(dirname)
160
161
162
163 class Config:
164 """Encapsules the configuration for the backup program."""
165
166 class ReadError(RuntimeError):
167 """An exception raised when reading configurations."""
168 def __init__(self, value):
169 self.value = value
170 self.message = value
171
172
173 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
174
175 # Filename where checksum of config is saved
176 checksumfn = "checksum"
177
178 def __init__(self):
179 self.backupdir = None
180 self.format = self.formats[1]
181 self.tarbin = "/bin/tar"
182 self.excludes = []
183 self.sets = []
184 self.checksum = None
185 self.lastchecksum = None
186 self.epochs = { "sporadic" : Epoch() }
187
188
189 def __repr__(self):
190 return "[backupdir: " + self.backupdir + \
191 ", format: " + self.format + \
192 ", tarbin: " + self.tarbin + \
193 ", excludes: " + repr(self.excludes) + \
194 ", epochs: " + repr(self.epochs) + \
195 ", sets: " + repr(self.sets) + "]"
196
197 def getRealEpochsSorted(self):
198 """Return all epochs with have a non-None unit, sorted by
199 Epoch.getTimeDelta(), starting with the longest dela."""
200 epochs = self.epochs
201 realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ]
202 deltakey = lambda e: epochs[e].getTimeDelta()
203 realepochs.sort(key=deltakey, reverse=True)
204 return realepochs
205
206
207 def _read_global(self, config, sec):
208 for opt in config.options(sec):
209 if opt == "backupdir":
210 self.backupdir = config.get(sec, opt)
211 if not os.path.isdir(self.backupdir):
212 raise Config.ReadError("Backupdir '{0}' does not exist.".format(self.backupdir))
213 elif opt == "format":
214 self.format = config.get(sec, opt)
215 if not self.format in Config.formats:
216 raise Config.ReadError("Invalid 'format' given.")
217 elif opt == "tarbin":
218 self.tarbin = config.get(sec, opt)
219 if not os.path.isfile(self.tarbin):
220 raise Config.ReadError("Tar binary '{0}' does not exist.".format(self.tarbin))
221 elif opt.startswith("exclude"):
222 self.excludes += [ config.get(sec, opt) ]
223 else:
224 raise Config.ReadError("Unknown option '{0}'.".format(opt))
225
226
227 def _read_epoch(self, config, sec):
228 name = sec[6:].strip()
229 e = Epoch()
230 if name in self.epochs:
231 raise Config.ReadError("Epoch '{0}' already defined.".format(name))
232 p = re.compile(r'^\w+$')
233 if not p.match(name):
234 raise Config.ReadError("Epoch name '{0}' does not only " + \
235 "comprise alphanumeric characters.".format(name))
236 if name in Epoch.units:
237 e.unit = name
238
239 for opt in config.options(sec):
240 if opt == "numkeeps":
241 try:
242 e.numkeeps = int(config.getint(sec, opt))
243 except ValueError:
244 raise Config.ReadError("Invalid integer given for '{0}'.".format(opt))
245 if e.numkeeps <= 0:
246 raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps))
247
248 elif opt == "mode":
249 e.mode = config.get(sec, opt)
250 if not e.mode in Modes:
251 raise Config.ReadError("Invalid mode '{0}'.".format(e.mode))
252
253 elif opt == "timespan":
254 if name in Epoch.units:
255 raise Config.ReadError("The time delta of a standard epoch " + \
256 "is not supposed to be redefined. ")
257 td = config.get(sec, opt)
258 try:
259 mult, unit = Epoch.parseTimedelta(td)
260 e.unit = unit
261 e.mult = mult
262 except ValueError as e:
263 raise Config.ReadError("Invalid timespan '{0}': {1}".format(td, str(e)))
264
265 elif opt.startswith("exclude"):
266 e.excludes += [config.get(sec, opt)]
267
268 else:
269 raise Config.ReadError("Unknown option '" + opt + "'.")
270
271 if e.numkeeps == None:
272 raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name))
273
274 self.epochs[name] = e
275
276
277 def _read_set(self, config, sec):
278 name = sec[4:].strip()
279 p = re.compile(r'^\w+$')
280 if not p.match(name):
281 raise Config.ReadError("Set name '{0}' does not only " + \
282 "comprise alphanumeric characters.".format(name))
283
284 dirs = []
285 excludes = []
286
287 for opt in config.options(sec):
288 if opt.startswith("dir"):
289 dirs += [config.get(sec, opt)]
290 elif opt.startswith("exclude"):
291 excludes += [config.get(sec, opt)]
292 else:
293 raise Config.ReadError("Unknown option '" + opt + "'.")
294
295 self.sets += [FileSet(name, dirs, excludes)]
296
297
298 def read(self, filename):
299 """Read configuration from file"""
300
301 if not os.path.isfile(filename):
302 raise Config.ReadError("Cannot read config file '" + filename + "'.")
303
304 config = configparser.RawConfigParser()
305 config.read(filename)
306
307 for reqsec in ["global"]:
308 if not config.has_section(reqsec):
309 raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.")
310
311 for sec in config.sections():
312
313 if sec == "global":
314 self._read_global(config, sec)
315
316 elif sec.startswith("epoch "):
317 self._read_epoch(config, sec)
318
319 elif sec.startswith("set "):
320 self._read_set(config, sec)
321
322 else:
323 raise Config.ReadError("Unknown section '" + sec + "'.")
324
325 if self.backupdir == None:
326 raise Config.ReadError("No backup directory set.")
327
328
329 # Compute checksum of config file
330 m = hashlib.sha1()
331 f = open(filename, 'rb')
332 try:
333 m.update(f.read())
334 self.checksum = m.hexdigest()
335 finally:
336 f.close()
337
338 try:
339 f = open(os.path.join(self.backupdir, self.checksumfn), 'r')
340 self.lastchecksum = f.read().strip()
341 f.close()
342 except IOError:
343 self.lastchecksum = None
344
345
346 class BackupManager:
347 """List and create backups"""
348
349 def __init__(self, conffn):
350 self.conf = Config()
351 self.conf.read(conffn)
352
353
354 def listAllDirs(self):
355 """List all dirs in backupdir"""
356
357 # Get all entries
358 basedir = self.conf.backupdir
359 dirs = os.listdir(basedir)
360 # Filter directories
361 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
362
363
364 def listExistingBackups(self):
365 """Returns a list of old backups."""
366
367 backups = []
368
369 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
370 backups += [ Backup.fromDirName(entry) ]
371
372 return backups
373
374
375 def getDesiredEpochs(self, backups, now):
376 """Get desired epoch based on self.configuration and list of old backups"""
377
378 # Find the longest epoch for which we would like the make a backup
379 latest = datetime.datetime(1900, 1, 1)
380 for e in self.conf.getRealEpochsSorted():
381 epoch = self.conf.epochs[e]
382 if epoch.numkeeps <= 0:
383 continue
384
385 # Get backups of that epoch
386 byepoch = list(sorted( [ b for b in backups if b.epoch == e], \
387 key=lambda b: b.date))
388
389 # If there are any, determine the latest
390 if len(byepoch) > 0:
391 latest = max(latest, byepoch[-1].date )
392
393 if epoch.isRipe(latest, now):
394 return e
395
396 # No backup is to be made
397 return None
398
399
400
401 def backupFileSet(self, fileset, targetdir, excludes, since=None):
402 """Create an archive for given fileset at given target directory."""
403
404 logfile = logging.getLogger('backuplog')
405 logfile.info("Running file set: " + fileset.name)
406
407 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
408 taropts = []
409
410 # Tar is verbose is sitarba is verbose
411 if LogConf.con.level <= logging.DEBUG:
412 taropts += ["--verbose"]
413
414 # Add the since date, if given
415 if since != None:
416 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
417
418 # Add the exclude patterns
419 for pat in excludes:
420 taropts += ["--exclude", pat]
421
422 #Add exclude patterns from fileset
423 for pat in fileset.excludes:
424 taropts += ["--exclude", pat]
425
426
427 # Adding directories to backup
428 taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
429
430 # Launch the tar process
431 tarargs = [self.conf.tarbin] + ["-cpaf", fsfn] + taropts
432 logfile.debug("tar call: " + " ".join(tarargs))
433 tarp = subprocess.Popen( tarargs, bufsize=-1, \
434 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
435
436 # Change tarp's stdout and stderr to non-blocking
437 for s in [tarp.stdout, tarp.stderr]:
438 fd = s.fileno()
439 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
440 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
441
442 # Read stdout and stderr of tarp
443 errmsg = b""
444 while tarp.poll() == None:
445 rd, wr, ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
446 if tarp.stdout in rd:
447 logging.debug( tarp.stdout.readline()[:-1].decode() )
448 if tarp.stderr in rd:
449 errmsg += tarp.stderr.read()
450
451 # Get the remainging output of tarp
452 for l in tarp.stdout.readlines():
453 logging.debug(l.decode().rstrip())
454 errmsg += tarp.stderr.read()
455
456 # Get return code of tarp
457 rett = tarp.wait()
458 if rett != 0:
459 for l in errmsg.decode().split("\n"):
460 logfile.error(l)
461 logfile.error(self.conf.tarbin + " returned with exit status " + \
462 str(rett) + ".")
463
464
465 def backup(self, epoch=None, mode=None):
466 """Make a new backup, if necessary. If epoch is None then determine
467 desired epoch automatically. Use given epoch otherwise. If mode is None
468 then use mode for given epoch. Use given mode otherwise."""
469
470 now = datetime.datetime.now()
471 oldbackups = self.listExistingBackups()
472
473 # Get epoch of backup
474 if epoch == None:
475 epoch = self.getDesiredEpochs(oldbackups, now)
476 if epoch == None:
477 logging.info("No backup planned.")
478 return
479
480 # Get mode of backup
481 if mode == None:
482 mode = self.conf.epochs[epoch].mode
483 logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode)
484
485 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
486
487 # No old full backups existing
488 if mode != "full" and len(oldfullbackups) == 0:
489 logging.info("No full backups existing. Making a full backup.")
490
491 # Checksum changed -> self.config file changed
492 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
493 logging.warning("Full backup recommended as config file has changed.")
494
495
496 # If we have a full backup, we backup everything
497 since = None
498 if mode == "diff":
499 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
500 elif mode == "incr":
501 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
502
503 if since != None:
504 logging.debug("Making backup relative to " + since.ctime())
505
506 if Options.dryrun:
507 return
508
509 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
510 if yesno == "n":
511 return
512
513 # Create new backup directory
514 basedir = self.conf.backupdir
515 dirname = Backup.getDirName(now, epoch, mode)
516 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
517 targetdir = os.path.join(basedir, tmpdirname)
518 os.mkdir(targetdir)
519
520
521 # Add file logger
522 logfile = logging.getLogger("backuplog")
523 fil = logging.FileHandler(os.path.join(targetdir, "log"))
524 fil.setLevel(logging.DEBUG)
525 logfile.addHandler(fil)
526
527 logfile.info("Started: " + now.ctime())
528
529 # Backup all file sets
530 for s in self.conf.sets:
531 excludes = self.conf.excludes + self.conf.epochs[epoch].excludes
532 self.backupFileSet(s, targetdir, excludes, since)
533
534 logfile.info("Stopped: " + datetime.datetime.now().ctime())
535 fil.close()
536
537 # Rename backup directory to final name
538 os.rename( targetdir, os.path.join(basedir, dirname) )
539
540 # We made a full backup -- recall checksum of config
541 if mode == "full":
542 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
543 f.write( self.conf.checksum )
544 f.close()
545
546
547
548 def prune(self):
549 """Prune old backup files"""
550
551 allDirs = sorted(self.listAllDirs())
552 # Collect all directories that are removed
553 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
554
555 # Get all backups
556 backups = self.listExistingBackups()
557 # Group backups by epoch and sort them by age
558 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
559 key=lambda b : b.date, reverse=True)) \
560 for e in self.conf.getRealEpochsSorted() }
561 # If we have too many backups of a specific epoch --> add them to remove list
562 for e in byepoch:
563 epoch = self.conf.epochs[e]
564 old = byepoch[e][epoch.numkeeps:]
565 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
566
567
568 logging.info("List of stale/outdated entries:")
569 for d in allDirs:
570 msg = ""
571 if d in removeDirs:
572 msg = "[*] "
573 else:
574 msg = "[ ] "
575
576 if Backup.isBackupDir(d):
577 msg += Backup.fromDirName(d).colAlignedString()
578 else:
579 msg += d
580
581 logging.info(msg)
582
583 # Check that dirs to be removed is in list of all dirs
584 for d in removeDirs:
585 assert( d in allDirs )
586
587 if len(removeDirs) == 0:
588 logging.info("No stale/outdated entries to remove.")
589 return
590
591 if Options.dryrun:
592 return
593
594 basedir = self.conf.backupdir
595 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
596 if yesno == "y":
597 for d in removeDirs:
598 try:
599 shutil.rmtree(os.path.join(basedir, d))
600 except OSError as e:
601 logging.error("Error when removing '%s': %s" % (d, e.strerror) )
602
603
604 def ask_user_yesno(self, question):
605 if LogConf.con.level <= logging.INFO:
606 return input(question)
607 else:
608 return "y"
609
610
611 def printUsage():
612 """Print --help text"""
613
614 print("sitarba - a simple backup solution.")
615 print("")
616 print("Usage:")
617 print(" " + sys.argv[0] + " {options} [cmd]")
618 print(" " + sys.argv[0] + " --help")
619 print("")
620 print("Commands:")
621 print(" backup make a new backup, if necessary")
622 print(" list list all backups (default)")
623 print(" prune prune outdated/old backups")
624 print("")
625 print("Options:")
626 print(" -h, --help print this usage text")
627 print(" -c, --conf FILE use given configuration file")
628 print(" default: /etc/sitarba.conf")
629 print(" -e, --epoch EPOCH force to create backup for given epoch, which")
630 print(" can be 'sporadic' or one of the configured epochs")
631 print(" -m, --mode MODE override mode: full, diff, or incr")
632 print(" -n, --dry-run don't do anything, just tell what would be done")
633 print(" -v, --verbose be more verbose and interact with user")
634 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
635 print(" error, warning, info, debug")
636 print(" -V, --version print version info")
637
638
639
640 class LogConf:
641 """Encapsulates logging configuration"""
642
643 con = logging.StreamHandler(sys.stderr)
644
645 @classmethod
646 def setup(cls):
647 """Setup logging system"""
648 conlog = logging.getLogger()
649 conlog.setLevel(logging.DEBUG)
650
651 cls.con.setLevel(logging.WARNING)
652 conlog.addHandler(cls.con)
653
654 fillog = logging.getLogger("backuplog")
655 fillog.setLevel(logging.DEBUG)
656
657
658 if __name__ == "__main__":
659
660 LogConf.setup()
661
662 conffn = "/etc/sitarba.conf"
663 cmd = "list"
664 mode = None
665 epoch = None
666
667 i = 0
668 while i < len(sys.argv)-1:
669 i += 1
670 opt = sys.argv[i]
671
672 if opt in ["-h", "--help"]:
673 printUsage()
674 exit(0)
675
676 elif opt in ["-c", "--conf"]:
677 i += 1
678 conffn = sys.argv[i]
679
680 elif opt in ["-V", "--version"]:
681 print("sitarba " + __version__)
682 exit(0)
683
684 elif opt in ["-v", "--verbose"]:
685 LogConf.con.setLevel(logging.INFO)
686
687 elif opt in ["--verbosity"]:
688 i += 1
689 level = sys.argv[i]
690 numlevel = getattr(logging, level.upper(), None)
691 if not isinstance(numlevel, int):
692 raise ValueError('Invalid verbosity level: %s' % level)
693 LogConf.con.setLevel(numlevel)
694
695 elif opt in ["-m", "--mode"]:
696 i += 1
697 mode = sys.argv[i]
698 if not mode in Modes:
699 logging.error("Unknown mode '" + mode + "'.")
700 exit(1)
701
702 elif opt in ["-n", "--dry-run"]:
703 Options.dryrun = True
704
705 elif opt in ["-e", "--epoch"]:
706 i += 1
707 epoch = sys.argv[i]
708
709 elif opt in ["backup", "list", "prune"]:
710 cmd = opt
711
712 else:
713 logging.error("Unknown option: " + opt)
714 exit(1)
715
716 try:
717 man = BackupManager(conffn)
718
719 logging.debug("Config: " + str(man.conf))
720
721 if epoch != None and not epoch in man.conf.epochs.keys():
722 logging.error("Unknown epoch '" + epoch + "'.")
723 exit(1)
724
725 if cmd == "backup":
726 man.backup(epoch, mode)
727
728 if cmd == "list":
729 for b in sorted(man.listExistingBackups(), key=lambda b: b.date):
730 print(b.colAlignedString())
731
732 if cmd == "prune":
733 man.prune()
734
735 except (Config.ReadError, configparser.Error) as e:
736 logging.error("Error: " + e.message)
737
738
739
740