comments, renames
[sitarba.git] / sitarba
1 #!/usr/bin/python3
2 """A simple backup solution."""
3
4 __version__ = "2.0"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess, fcntl, select
12 import random, re
13 import logging
14
15
16 Modes = ["full", "incr", "diff"]
17
18 class Epoch:
19
20 units = {
21 "hour" : datetime.timedelta(0, 3600),
22 "day" : datetime.timedelta(1),
23 "week" : datetime.timedelta(7),
24 "month" : datetime.timedelta(31),
25 "year" : datetime.timedelta(365) }
26
27 def __init__(self, unit=None, mult=1, mode="full", numkeeps=None):
28 self.unit = unit
29 self.mult = mult
30 self.mode = mode
31 self.numkeeps = numkeeps
32 self.excludes = []
33
34 def __repr__(self):
35 return "[unit: " + repr(self.unit) + \
36 ", mult:" + repr(self.mult) + \
37 ", mode: " + repr(self.mode) + \
38 ", numkeeps: " + repr(self.numkeeps) + \
39 ", excludes: " + repr(self.excludes) + "]"
40
41 def getTimeDelta(self):
42 if self.unit == None:
43 return None
44 return self.mult*Epoch.units[self.unit]
45
46 def isRipe(self, oldest, now):
47
48 if self.unit==None:
49 return True
50
51 delta = now-oldest
52 mult = self.mult
53
54 if delta >= self.getTimeDelta():
55 return True
56
57 if self.unit == "hour":
58 return abs(now.hour - oldest.hour) >= mult
59 elif self.unit == "day":
60 return abs(now.day - oldest.day) >= mult
61 elif self.unit == "week":
62 return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult
63 elif self.unit == "month":
64 return abs(now.month - oldest.month) >= mult
65 elif self.unit == "year":
66 return abs(now.year - oldest.year) >= mult
67
68 return None
69
70
71 @staticmethod
72 def parseTimedelta( deltastr ):
73 tokens = [ s.strip() for s in deltastr.split("*") ]
74 unit = None
75 mult = 1
76 if len(tokens) == 1:
77 unit = tokens[0]
78 elif len(tokens) == 2:
79 mult = int(tokens[0])
80 unit = tokens[1]
81 else:
82 raise ValueError("Invalid format: '{0}'".format(deltastr))
83
84 if not unit in Epoch.units:
85 raise ValueError("Unknown unit '{0}'".format(unit))
86
87 if mult <= 0:
88 raise ValueError("Non-positive factor '{0}' given.".format(mult))
89
90 return mult, unit
91
92
93
94 class FileSet:
95 """A fileset has a name and a list of directories."""
96 def __init__(self, name, dirs, excludes):
97 self.name = name
98 self.dirs = dirs
99 self.excludes = excludes
100
101 def __repr__(self):
102 return "[name: " + self.name + \
103 ", dirs: " + str(self.dirs) + \
104 ", excludes: " + str(self.excludes) + "]"
105
106
107 class Backup:
108 """A single backup has a date, an epoch and a mode."""
109
110 def __init__(self, date, epoch, mode):
111 self.date = date
112 self.epoch = epoch
113 self.mode = mode
114 self.excludes = []
115
116 @staticmethod
117 def fromDirName(dirname):
118 [strdate, strtime, epoch, mode] = dirname.split("-")
119
120 if not mode in Modes:
121 raise ValueError("Invalid mode: " + mode)
122
123 date = datetime.datetime(int(strdate[0:4]),
124 int(strdate[4:6]), int(strdate[6:8]),\
125 int(strtime[0:2]), int(strtime[2:4]))
126
127 return Backup(date, epoch, mode)
128
129 def __repr__(self):
130 return "[date: " + self.date.ctime() + \
131 ", epoch: " + self.epoch + \
132 ", mode: " + self.mode + "]"
133
134 def colAlignedString(self):
135 age = datetime.datetime.now() - self.date
136 total_hours = age.total_seconds()/3600
137 if total_hours <= 48:
138 agestr = "(%s h)" % int(total_hours)
139 else:
140 agestr = "(%s d)" % age.days
141 return "%16s %7s %10s %4s" % (
142 self.date.strftime("%Y-%m-%d %H:%M"), agestr,
143 self.epoch, self.mode)
144
145 @staticmethod
146 def getDirName(date, epoch, mode):
147 """Get directory name of backup by given properties."""
148 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
149
150 @staticmethod
151 def isBackupDir(dirname):
152 """Is directory a backup directory?"""
153 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
154 return p.match(dirname)
155
156
157
158 class Config:
159 """Encapsules the configuration for the backup program."""
160
161 class ReadError(RuntimeError):
162 """An exception raised when reading configurations."""
163 def __init__(self, value):
164 self.value = value
165 self.message = value
166
167
168 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
169
170 # Filename where checksum of config is saved
171 checksumfn = "checksum"
172
173 def __init__(self):
174 self.backupdir = None
175 self.format = self.formats[1]
176 self.tarbin = "/bin/tar"
177 self.excludes = []
178 self.sets = []
179 self.checksum = None
180 self.lastchecksum = None
181 self.epochs = Epochs = { "sporadic" : Epoch() }
182
183
184 def __repr__(self):
185 return "[backupdir: " + self.backupdir + \
186 ", format: " + self.format + \
187 ", tarbin: " + self.tarbin + \
188 ", excludes: " + repr(self.excludes) + \
189 ", epochs: " + repr(self.epochs) + \
190 ", sets: " + repr(self.sets) + "]"
191
192 def getRealEpochsSorted(self):
193 """Return all epochs with have a non-None unit, sorted by
194 Epoch.getTimeDelta(), starting with the longest dela."""
195 epochs = self.epochs
196 realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ]
197 deltakey = lambda e: epochs[e].getTimeDelta()
198 realepochs.sort(key=deltakey, reverse=True)
199 return realepochs
200
201
202 def _read_global(self, config, sec):
203 for opt in config.options(sec):
204 if opt=="backupdir":
205 self.backupdir = config.get(sec, opt)
206 if not os.path.isdir(self.backupdir):
207 raise Config.ReadError("Backupdir '{0}' does not exist.".format(self.backupdir))
208 elif opt=="format":
209 self.format = config.get(sec, opt)
210 if not self.format in Config.formats:
211 raise Config.ReadError("Invalid 'format' given.")
212 elif opt=="tarbin":
213 self.tarbin = config.get(sec, opt)
214 if not os.path.isfile(self.tarbin):
215 raise Config.ReadError("Tar binary '{0}' does not exist.".format(self.tarbin))
216 elif opt.startswith("exclude"):
217 self.excludes += [ config.get(sec, opt) ]
218 else:
219 raise Config.ReadError("Unknown option '{0}'.".format(opt))
220
221
222 def _read_epoch(self, config, sec):
223 name = sec[6:].strip()
224 e = Epoch()
225 if name in self.epochs:
226 raise Config.ReadError("Epoch '{0}' already defined.".format(name))
227 p = re.compile(r'^\w+$')
228 if not p.match(name):
229 raise Config.ReadError("Epoch name '{0}' does not only " + \
230 "comprise alphanumeric characters.".format(name))
231 if name in Epoch.units:
232 e.unit = name
233
234 for opt in config.options(sec):
235 if opt=="numkeeps":
236 try:
237 e.numkeeps = int(config.getint(sec, opt))
238 except ValueError:
239 raise Config.ReadError("Invalid integer given for '{0}'.".format(opt))
240 if e.numkeeps <= 0:
241 raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps))
242
243 elif opt=="mode":
244 e.mode = config.get(sec, opt)
245 if not e.mode in Modes:
246 raise Config.ReadError("Invalid mode '{0}'.".format(e.mode))
247
248 elif opt=="timespan":
249 if name in Epoch.units:
250 raise Config.ReadError("The time delta of a standard epoch " + \
251 "is not supposed to be redefined. ")
252 td = config.get(sec,opt)
253 try:
254 mult, unit = Epoch.parseTimedelta(td)
255 e.unit = unit
256 e.mult = mult
257 except ValueError as e:
258 raise Config.ReadError("Invalid timespan '{0}': {1}".format(td, str(e)))
259
260 elif opt.startswith("exclude"):
261 e.excludes += [config.get(sec, opt)]
262
263 else:
264 raise Config.ReadError("Unknown option '" + opt + "'.")
265
266 if e.numkeeps == None:
267 raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name))
268
269 self.epochs[name] = e
270
271
272 def _read_set(self, config, sec):
273 name = sec[4:].strip()
274 p = re.compile(r'^\w+$')
275 if not p.match(name):
276 raise Config.ReadError("Set name '{0}' does not only " + \
277 "comprise alphanumeric characters.".format(name))
278
279 dirs = []
280 excludes = []
281
282 for opt in config.options(sec):
283 if opt.startswith("dir"):
284 dirs += [config.get(sec, opt)]
285 elif opt.startswith("exclude"):
286 excludes += [config.get(sec,opt)]
287 else:
288 raise Config.ReadError("Unknown option '" + opt + "'.")
289
290 self.sets += [FileSet(name, dirs, excludes)]
291
292
293 def read(self, filename):
294 """Read configuration from file"""
295
296 if not os.path.isfile(filename):
297 raise Config.ReadError("Cannot read config file '" + filename + "'.")
298
299 config = configparser.RawConfigParser()
300 config.read(filename)
301
302 for reqsec in ["global"]:
303 if not config.has_section(reqsec):
304 raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.")
305
306 for sec in config.sections():
307
308 if sec=="global":
309 self._read_global(config, sec)
310
311 elif sec.startswith("epoch "):
312 self._read_epoch(config, sec)
313
314 elif sec.startswith("set "):
315 self._read_set(config, sec)
316
317 else:
318 raise Config.ReadError("Unknown section '" + sec + "'.")
319
320 if self.backupdir == None:
321 raise Config.ReadError("No backup directory set.")
322
323
324 # Compute checksum of config file
325 m = hashlib.sha1()
326 f = open(filename, 'rb')
327 try:
328 m.update(f.read())
329 self.checksum = m.hexdigest()
330 finally:
331 f.close()
332
333 try:
334 f = open(os.path.join(self.backupdir, self.checksumfn), 'r')
335 self.lastchecksum = f.read().strip()
336 f.close()
337 except IOError:
338 self.lastchecksum = None
339
340
341 class BackupManager:
342 """List and create backups"""
343
344 def __init__(self, conffn):
345 self.conf = Config()
346 self.conf.read(conffn)
347
348
349 def listAllDirs(self):
350 """List all dirs in backupdir"""
351
352 # Get all entries
353 basedir = self.conf.backupdir
354 dirs = os.listdir(basedir)
355 # Filter directories
356 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
357
358
359 def listExistingBackups(self):
360 """Returns a list of old backups."""
361
362 backups = []
363
364 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
365 backups += [ Backup.fromDirName(entry) ]
366
367 return backups
368
369
370 def getDesiredEpochs(self, backups, now):
371 """Get desired epoch based on self.configuration and list of old backups"""
372
373 # Find the longest epoch for which we would like the make a backup
374 latest = datetime.datetime(1900, 1, 1)
375 for e in self.conf.getRealEpochsSorted():
376 epoch = self.conf.epochs[e]
377 if epoch.numkeeps <= 0:
378 continue
379
380 # Get backups of that epoch
381 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
382 key=lambda b: b.date))
383
384 # If there are any, determine the latest
385 if len(byepoch) > 0:
386 latest = max(latest, byepoch[-1].date )
387
388 if epoch.isRipe(latest, now):
389 return e
390
391 # No backup is to be made
392 return None
393
394
395
396 def backupFileSet(self, fileset, targetdir, excludes, since=None):
397 """Create an archive for given fileset at given target directory."""
398
399 logfile = logging.getLogger('backuplog')
400 logfile.info("Running file set: " + fileset.name)
401
402 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
403 taropts = []
404
405 # Tar is verbose is sitarba is verbose
406 if LogConf.con.level <= logging.DEBUG:
407 taropts += ["--verbose"]
408
409 # Add the since date, if given
410 if since != None:
411 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
412
413 # Add the exclude patterns
414 for pat in excludes:
415 taropts += ["--exclude", pat]
416
417 #Add exclude patterns from fileset
418 for pat in fileset.excludes:
419 taropts += ["--exclude", pat]
420
421
422 # Adding directories to backup
423 taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
424
425 # Launch the tar process
426 tarargs = [self.conf.tarbin] + ["-cpaf", fsfn] + taropts
427 logfile.debug("tar call: " + " ".join(tarargs))
428 tarp = subprocess.Popen( tarargs, bufsize=-1, \
429 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
430
431 # Change tarp's stdout and stderr to non-blocking
432 for s in [tarp.stdout, tarp.stderr]:
433 fd = s.fileno()
434 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
435 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
436
437 # Read stdout and stderr of tarp
438 errmsg = b""
439 while tarp.poll() == None:
440 rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
441 if tarp.stdout in rd:
442 logging.debug( tarp.stdout.readline()[:-1].decode() )
443 if tarp.stderr in rd:
444 errmsg += tarp.stderr.read()
445
446 # Get the remainging output of tarp
447 for l in tarp.stdout.readlines():
448 logging.debug(l.decode().rstrip())
449 errmsg += tarp.stderr.read()
450
451 # Get return code of tarp
452 rett = tarp.wait()
453 if rett != 0:
454 for l in errmsg.decode().split("\n"):
455 logfile.error(l)
456 logfile.error(self.conf.tarbin + " returned with exit status " + \
457 str(rett) + ".")
458
459
460 def backup(self, epoch=None, mode=None):
461 """Make a new backup, if necessary. If epoch is None then determine
462 desired epoch automatically. Use given epoch otherwise. If mode is None
463 then use mode for given epoch. Use given mode otherwise."""
464
465 now = datetime.datetime.now()
466 oldbackups = self.listExistingBackups()
467
468 # Get epoch of backup
469 if epoch == None:
470 epoch = self.getDesiredEpochs(oldbackups, now)
471 if epoch == None:
472 logging.info("No backup planned.")
473 return
474
475 # Get mode of backup
476 if mode == None:
477 mode = self.conf.epochs[epoch].mode
478 logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode)
479
480 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
481
482 # No old full backups existing
483 if mode != "full" and len(oldfullbackups)==0:
484 logging.info("No full backups existing. Making a full backup.")
485
486 # Checksum changed -> self.config file changed
487 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
488 logging.warning("Full backup recommended as config file has changed.")
489
490
491 # If we have a full backup, we backup everything
492 since = None
493 if mode == "diff":
494 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
495 elif mode == "incr":
496 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
497
498 if since != None:
499 logging.debug("Making backup relative to " + since.ctime())
500
501 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
502 if yesno == "n":
503 return
504
505 # Create new backup directory
506 basedir = self.conf.backupdir
507 dirname = Backup.getDirName(now, epoch, mode)
508 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
509 targetdir = os.path.join(basedir, tmpdirname)
510 os.mkdir( targetdir )
511
512
513 # Add file logger
514 logfile = logging.getLogger("backuplog")
515 fil = logging.FileHandler( os.path.join(targetdir, "log") )
516 fil.setLevel(logging.DEBUG)
517 logfile.addHandler(fil)
518
519 logfile.info("Started: " + now.ctime())
520
521 # Backup all file sets
522 for s in self.conf.sets:
523 excludes = self.conf.excludes + self.conf.epochs[epoch].excludes
524 self.backupFileSet(s, targetdir, excludes, since)
525
526 logfile.info("Stopped: " + datetime.datetime.now().ctime())
527
528 # Rename backup directory to final name
529 os.rename( targetdir, os.path.join(basedir, dirname) )
530
531 # We made a full backup -- recall checksum of config
532 if mode == "full":
533 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
534 f.write( self.conf.checksum )
535 f.close()
536
537
538
539 def prune(self):
540 """Prune old backup files"""
541
542 allDirs = sorted(self.listAllDirs())
543 # Collect all directories that are removed
544 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
545
546 # Get all backups
547 backups = self.listExistingBackups()
548 # Group backups by epoch and sort them by age
549 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
550 key=lambda b : b.date, reverse=True)) \
551 for e in self.conf.getRealEpochsSorted() }
552 # If we have too many backups of a specific epoch --> add them to remove list
553 for e in byepoch:
554 epoch = self.conf.epochs[e]
555 old = byepoch[e][epoch.numkeeps:]
556 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
557
558
559 logging.info("List of stale/outdated entries:")
560 for d in allDirs:
561 msg = ""
562 if d in removeDirs:
563 msg = "[*] "
564 else:
565 msg = "[ ] "
566
567 if Backup.isBackupDir(d):
568 msg += Backup.fromDirName(d).colAlignedString()
569 else:
570 msg += d
571
572 logging.info(msg)
573
574 # Check that dirs to be removed is in list of all dirs
575 for d in removeDirs:
576 assert( d in allDirs )
577
578 if len(removeDirs) == 0:
579 logging.info("No stale/outdated entries to remove.")
580 return
581
582 basedir = self.conf.backupdir
583 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
584 if yesno == "y":
585 for d in removeDirs:
586 try:
587 shutil.rmtree(os.path.join(basedir, d))
588 except OSError as e:
589 logging.error("Error when removing '%s': %s" % (d,e.strerror) )
590
591
592 def ask_user_yesno(self, question):
593 if LogConf.con.level <= logging.INFO:
594 return input(question)
595 else:
596 return "y"
597
598
599 def printUsage():
600 """Print --help text"""
601
602 print("sitarba - a simple backup solution.")
603 print("")
604 print("Usage:")
605 print(" " + sys.argv[0] + " {options} [cmd]")
606 print(" " + sys.argv[0] + " --help")
607 print("")
608 print("Commands:")
609 print(" backup make a new backup, if necessary")
610 print(" list list all backups (default)")
611 print(" prune prune outdated/old backups")
612 print("")
613 print("Options:")
614 print(" -h, --help print this usage text")
615 print(" -c, --conf FILE use given configuration file")
616 print(" default: /etc/sitarba.conf")
617 print(" -e, --epoch EPOCH force to create backup for given epoch, which")
618 print(" can be 'sporadic' or one of the configured epochs")
619 print(" -m, --mode MODE override mode: full, diff, or incr")
620 print(" -v, --verbose be more verbose and interact with user")
621 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
622 print(" error, warning, info, debug")
623 print(" -V, --version print version info")
624
625
626
627 class LogConf:
628 """Encapsulates logging configuration"""
629
630 con = logging.StreamHandler(sys.stderr)
631
632 @classmethod
633 def setup(cls):
634 """Setup logging system"""
635 conlog = logging.getLogger()
636 conlog.setLevel(logging.DEBUG)
637
638 cls.con.setLevel(logging.WARNING)
639 conlog.addHandler(cls.con)
640
641 fillog = logging.getLogger("backuplog")
642 fillog.setLevel(logging.DEBUG)
643
644
645 if __name__ == "__main__":
646
647 LogConf.setup()
648
649 conffn = "/etc/sitarba.conf"
650 cmd = "list"
651 mode = None
652 epoch = None
653
654 i = 0
655 while i < len(sys.argv)-1:
656 i += 1
657 opt = sys.argv[i]
658
659 if opt in ["-h", "--help"]:
660 printUsage()
661 exit(0)
662
663 elif opt in ["-c", "--conf"]:
664 i += 1
665 conffn = sys.argv[i]
666
667 elif opt in ["-V", "--version"]:
668 print("sitarba " + __version__)
669 exit(0)
670
671 elif opt in ["-v", "--verbose"]:
672 LogConf.con.setLevel(logging.INFO)
673
674 elif opt in ["--verbosity"]:
675 i += 1
676 level = sys.argv[i]
677 numlevel = getattr(logging, level.upper(), None)
678 if not isinstance(numlevel, int):
679 raise ValueError('Invalid verbosity level: %s' % level)
680 LogConf.con.setLevel(numlevel)
681
682 elif opt in ["-m", "--mode"]:
683 i += 1
684 mode = sys.argv[i]
685 if not mode in Modes:
686 logging.error("Unknown mode '" + mode + "'.")
687 exit(1)
688
689 elif opt in ["-e", "--epoch"]:
690 i += 1
691 epoch = sys.argv[i]
692
693 elif opt in ["backup", "list", "prune"]:
694 cmd = opt
695
696 else:
697 logging.error("Unknown option: " + opt)
698 exit(1)
699
700 try:
701 man = BackupManager(conffn)
702
703 logging.debug("Config: " + str(man.conf))
704
705 if epoch!=None and not epoch in man.conf.epochs.keys():
706 logging.error("Unknown epoch '" + epoch + "'.")
707 exit(1)
708
709 if cmd == "backup":
710 man.backup(epoch, mode)
711
712 if cmd == "list":
713 for b in sorted(man.listExistingBackups(), key=lambda b: b.date):
714 print(b.colAlignedString())
715
716 if cmd == "prune":
717 man.prune()
718
719 except (Config.ReadError, configparser.Error) as e:
720 logging.error("Error: " + e.message)
721
722
723
724