Add missing sitarba file
[sitarba.git] / sitarba
1 #!/usr/bin/python3
2 """A simple backup solution."""
3
4 __version__ = "2.0"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess, fcntl, select
12 import random, re
13 import logging
14
15
16 Modes = ["full", "incr", "diff"]
17
18 class Epoch:
19
20 units = {
21 "hour" : datetime.timedelta(0, 3600),
22 "day" : datetime.timedelta(1),
23 "week" : datetime.timedelta(7),
24 "month" : datetime.timedelta(31),
25 "year" : datetime.timedelta(365) }
26
27 def __init__(self, unit=None, mult=1, mode="full", numkeeps=None):
28 self.unit = unit
29 self.mult = mult
30 self.mode = mode
31 self.numkeeps = numkeeps
32 self.excludes = []
33
34 def __repr__(self):
35 return "[unit: " + repr(self.unit) + \
36 ", mult:" + repr(self.mult) + \
37 ", mode: " + repr(self.mode) + \
38 ", numkeeps: " + repr(self.numkeeps) + \
39 ", excludes: " + repr(self.excludes) + "]"
40
41 def getTimeDelta(self):
42 if self.unit == None:
43 return None
44 return self.mult*Epoch.units[self.unit]
45
46 def isRipe(self, oldest, now):
47
48 if self.unit==None:
49 return True
50
51 delta = now-oldest
52 mult = self.mult
53
54 if delta >= self.getTimeDelta():
55 return True
56
57 if self.unit == "hour":
58 return abs(now.hour - oldest.hour) >= mult
59 elif self.unit == "day":
60 return abs(now.day - oldest.day) >= mult
61 elif self.unit == "week":
62 return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult
63 elif self.unit == "month":
64 return abs(now.month - oldest.month) >= mult
65 elif self.unit == "year":
66 return abs(now.year - oldest.year) >= mult
67
68 return None
69
70
71 @staticmethod
72 def parseTimedelta( deltastr ):
73 tokens = [ s.strip() for s in deltastr.split("*") ]
74 unit = None
75 mult = 1
76 if len(tokens) == 1:
77 unit = tokens[0]
78 elif len(tokens) == 2:
79 mult = int(tokens[0])
80 unit = tokens[1]
81 else:
82 raise ValueError("Invalid format: '{0}'".format(deltastr))
83
84 if not unit in Epoch.units:
85 raise ValueError("Unknown unit '{0}'".format(unit))
86
87 if mult <= 0:
88 raise ValueError("Non-positive factor '{0}' given.".format(mult))
89
90 return mult, unit
91
92
93
94 class FileSet:
95 """A fileset has a name and a list of directories."""
96 def __init__(self, name, dirs, excludes):
97 self.name = name
98 self.dirs = dirs
99 self.excludes = excludes
100
101 def __repr__(self):
102 return "[name: " + self.name + \
103 ", dirs: " + str(self.dirs) + \
104 ", excludes: " + str(self.excludes) + "]"
105
106
107 class Backup:
108 """A single backup has a date, an epoch and a mode."""
109
110 def __init__(self, date, epoch, mode):
111 self.date = date
112 self.epoch = epoch
113 self.mode = mode
114 self.excludes = []
115
116 @staticmethod
117 def fromDirName(dirname):
118 [strdate, strtime, epoch, mode] = dirname.split("-")
119
120 if not mode in Modes:
121 raise ValueError("Invalid mode: " + mode)
122
123 date = datetime.datetime(int(strdate[0:4]),
124 int(strdate[4:6]), int(strdate[6:8]),\
125 int(strtime[0:2]), int(strtime[2:4]))
126
127 return Backup(date, epoch, mode)
128
129 def __repr__(self):
130 return "[date: " + self.date.ctime() + \
131 ", epoch: " + self.epoch + \
132 ", mode: " + self.mode + "]"
133
134 def colAlignedString(self):
135 age = datetime.datetime.now() - self.date
136 total_hours = age.total_seconds()/3600
137 if total_hours <= 48:
138 agestr = "(%s h)" % int(total_hours)
139 else:
140 agestr = "(%s d)" % age.days
141 return "%16s %7s %10s %4s" % (
142 self.date.strftime("%Y-%m-%d %H:%M"), agestr,
143 self.epoch, self.mode)
144
145 @staticmethod
146 def getDirName(date, epoch, mode):
147 """Get directory name of backup by given properties."""
148 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
149
150 @staticmethod
151 def isBackupDir(dirname):
152 """Is directory a backup directory?"""
153 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
154 return p.match(dirname)
155
156
157
158 class Config:
159 """Encapsules the configuration for the backup program."""
160
161 class ReadError(RuntimeError):
162 """An exception raised when reading configurations."""
163 def __init__(self, value):
164 self.value = value
165 self.message = value
166
167
168 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
169
170 # Filename where checksum of config is saved
171 checksumfn = "checksum"
172
173 def __init__(self):
174 self.backupdir = None
175 self.format = self.formats[1]
176 self.tarbin = "/bin/tar"
177 self.excludes = []
178 self.sets = []
179 self.checksum = None
180 self.lastchecksum = None
181 self.epochs = Epochs = { "sporadic" : Epoch() }
182
183
184 def __repr__(self):
185 return "[backupdir: " + self.backupdir + \
186 ", format: " + self.format + \
187 ", tarbin: " + self.tarbin + \
188 ", excludes: " + repr(self.excludes) + \
189 ", epochs: " + repr(self.epochs) + \
190 ", sets: " + repr(self.sets) + "]"
191
192 def getRealEpochsSorted(self):
193 """Return all epochs with have a non-None unit, sorted by
194 Epoch.getTimeDelta(), starting with the longest dela."""
195 epochs = self.epochs
196 realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ]
197 deltakey = lambda e: epochs[e].getTimeDelta()
198 realepochs.sort(key=deltakey, reverse=True)
199 return realepochs
200
201
202 def _read_global(self, config, sec):
203 for opt in config.options(sec):
204 if opt=="backupdir":
205 self.backupdir = config.get(sec, opt)
206 if not os.path.isdir(self.backupdir):
207 raise Config.ReadError("Backupdir '{0}' does not exist.".format(self.backupdir))
208 elif opt=="format":
209 self.format = config.get(sec, opt)
210 if not self.format in Config.formats:
211 raise Config.ReadError("Invalid 'format' given.")
212 elif opt=="tarbin":
213 self.tarbin = config.get(sec, opt)
214 if not os.path.isfile(self.tarbin):
215 raise Config.ReadError("Tar binary '{0}' does not exist.".format(self.tarbin))
216 elif opt.startswith("exclude"):
217 self.excludes += [ config.get(sec, opt) ]
218 else:
219 raise Config.ReadError("Unknown option '{0}'.".format(opt))
220
221
222 def _read_epoch(self, config, sec):
223 name = sec[6:].strip()
224 e = Epoch()
225 if name in self.epochs:
226 raise Config.ReadError("Epoch '{0}' already defined.".format(name))
227 if name in Epoch.units:
228 e.unit = name
229
230 for opt in config.options(sec):
231 if opt=="numkeeps":
232 try:
233 e.numkeeps = int(config.getint(sec, opt))
234 except ValueError:
235 raise Config.ReadError("Invalid integer given for '{0}'.".format(opt))
236 if e.numkeeps <= 0:
237 raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps))
238
239 elif opt=="mode":
240 e.mode = config.get(sec, opt)
241 if not e.mode in Modes:
242 raise Config.ReadError("Invalid mode '{0}'.".format(e.mode))
243
244 elif opt=="timespan":
245 if name in Epoch.units:
246 raise Config.ReadError("The time delta of a standard epoch " + \
247 "is not supposed to be redefined. ")
248 td = config.get(sec,opt)
249 try:
250 mult, unit = Epoch.parseTimedelta(td)
251 e.unit = unit
252 e.mult = mult
253 except ValueError as e:
254 raise Config.ReadError("Invalid timespan '{0}': {1}".format(td, str(e)))
255
256 elif opt.startswith("exclude"):
257 e.excludes += [config.get(sec, opt)]
258
259 else:
260 raise Config.ReadError("Unknown option '" + opt + "'.")
261
262 if e.numkeeps == None:
263 raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name))
264
265 self.epochs[name] = e
266
267
268 def _read_set(self, config, sec):
269 name = sec[4:].strip()
270 dirs = []
271 excludes = []
272
273 for opt in config.options(sec):
274 if opt.startswith("dir"):
275 dirs += [config.get(sec, opt)]
276 elif opt.startswith("exclude"):
277 excludes += [config.get(sec,opt)]
278 else:
279 raise Config.ReadError("Unknown option '" + opt + "'.")
280
281 self.sets += [FileSet(name, dirs, excludes)]
282
283
284 def read(self, filename):
285 """Read configuration from file"""
286
287 if not os.path.isfile(filename):
288 raise Config.ReadError("Cannot read config file '" + filename + "'.")
289
290 config = configparser.RawConfigParser()
291 config.read(filename)
292
293 for reqsec in ["global"]:
294 if not config.has_section(reqsec):
295 raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.")
296
297 for sec in config.sections():
298
299 if sec=="global":
300 self._read_global(config, sec)
301
302 elif sec.startswith("epoch "):
303 self._read_epoch(config, sec)
304
305 elif sec.startswith("set "):
306 self._read_set(config, sec)
307
308 else:
309 raise Config.ReadError("Unknown section '" + sec + "'.")
310
311 if self.backupdir == None:
312 raise Config.ReadError("No backup directory set.")
313
314
315 # Compute checksum of config file
316 m = hashlib.sha1()
317 f = open(filename, 'rb')
318 try:
319 m.update(f.read())
320 self.checksum = m.hexdigest()
321 finally:
322 f.close()
323
324 try:
325 f = open(os.path.join(self.backupdir, self.checksumfn), 'r')
326 self.lastchecksum = f.read().strip()
327 f.close()
328 except IOError:
329 self.lastchecksum = None
330
331
332 class BackupManager:
333 """List and create backups"""
334
335 def __init__(self, conffn):
336 self.conf = Config()
337 self.conf.read(conffn)
338
339
340 def listAllDirs(self):
341 """List all dirs in backupdir"""
342
343 # Get all entries
344 basedir = self.conf.backupdir
345 dirs = os.listdir(basedir)
346 # Filter directories
347 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
348
349
350 def listOldBackups(self):
351 """Returns a list of old backups."""
352
353 backups = []
354
355 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
356 backups += [ Backup.fromDirName(entry) ]
357
358 return backups
359
360
361 def getDesiredEpochs(self, backups, now):
362 """Get desired epoch based on self.configuration and list of old backups"""
363
364 # Find the longest epoch for which we would like the make a backup
365 latest = datetime.datetime(1900, 1, 1)
366 for e in self.conf.getRealEpochsSorted():
367 epoch = self.conf.epochs[e]
368 if epoch.numkeeps <= 0:
369 continue
370
371 # Get backups of that epoch
372 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
373 key=lambda b: b.date))
374
375 # If there are any, determine the latest
376 if len(byepoch) > 0:
377 latest = max(latest, byepoch[-1].date )
378
379 if epoch.isRipe(latest, now):
380 return e
381
382 # No backup is to be made
383 return None
384
385
386
387 def backupFileSet(self, fileset, targetdir, excludes, since=None):
388 """Create an archive for given fileset at given target directory."""
389
390 logfile = logging.getLogger('backuplog')
391 logfile.info("Running file set: " + fileset.name)
392
393 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
394 taropts = []
395
396 # Tar is verbose is sitarba is verbose
397 if LogConf.con.level <= logging.INFO:
398 taropts += ["--verbose"]
399
400 # Add the since date, if given
401 if since != None:
402 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
403
404 # Add the exclude patterns
405 for pat in excludes:
406 taropts += ["--exclude", pat]
407
408 #Add exclude patterns from fileset
409 for pat in fileset.excludes:
410 taropts += ["--exclude", pat]
411
412
413 # Adding directories to backup
414 taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
415
416 # Launch the tar process
417 tarargs = [self.conf.tarbin] + ["-cpaf", fsfn] + taropts
418 logfile.debug("tar call: " + " ".join(tarargs))
419 tarp = subprocess.Popen( tarargs, bufsize=-1, \
420 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
421
422 # Change tarp's stdout and stderr to non-blocking
423 for s in [tarp.stdout, tarp.stderr]:
424 fd = s.fileno()
425 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
426 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
427
428 # Read stdout and stderr of tarp
429 errmsg = b""
430 while tarp.poll() == None:
431 rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
432 if tarp.stdout in rd:
433 logging.debug( tarp.stdout.readline()[:-1].decode() )
434 if tarp.stderr in rd:
435 errmsg += tarp.stderr.read()
436
437 # Get the remainging output of tarp
438 for l in tarp.stdout.readlines():
439 logging.debug(l.decode().rstrip())
440 errmsg += tarp.stderr.read()
441
442 # Get return code of tarp
443 rett = tarp.wait()
444 if rett != 0:
445 for l in errmsg.decode().split("\n"):
446 logfile.error(l)
447 logfile.error(self.conf.tarbin + " returned with exit status " + \
448 str(rett) + ".")
449
450
451 def backup(self, epoch=None, mode=None):
452 """Make a new backup, if necessary. If epoch is None then determine
453 desired epoch automatically. Use given epoch otherwise. If mode is None
454 then use mode for given epoch. Use given mode otherwise."""
455
456 now = datetime.datetime.now()
457 oldbackups = self.listOldBackups()
458
459 # Get epoch of backup
460 if epoch == None:
461 epoch = self.getDesiredEpochs(oldbackups, now)
462 if epoch == None:
463 logging.info("No backup planned.")
464 return
465
466 # Get mode of backup
467 if mode == None:
468 mode = self.conf.epochs[epoch].mode
469 logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode)
470
471 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
472
473 # No old full backups existing
474 if mode != "full" and len(oldfullbackups)==0:
475 logging.info("No full backups existing. Making a full backup.")
476
477 # Checksum changed -> self.config file changed
478 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
479 logging.warning("Full backup recommended as config file has changed.")
480
481
482 # If we have a full backup, we backup everything
483 since = None
484 if mode == "diff":
485 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
486 elif mode == "incr":
487 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
488
489 if since != None:
490 logging.debug("Making backup relative to " + since.ctime())
491
492 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
493 if yesno == "n":
494 return
495
496 # Create new backup directory
497 basedir = self.conf.backupdir
498 dirname = Backup.getDirName(now, epoch, mode)
499 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
500 targetdir = os.path.join(basedir, tmpdirname)
501 os.mkdir( targetdir )
502
503
504 # Add file logger
505 logfile = logging.getLogger("backuplog")
506 fil = logging.FileHandler( os.path.join(targetdir, "log") )
507 fil.setLevel(logging.DEBUG)
508 logfile.addHandler(fil)
509
510 logfile.info("Started: " + now.ctime())
511
512 # Backup all file sets
513 for s in self.conf.sets:
514 excludes = self.conf.excludes + self.conf.epochs[epoch].excludes
515 self.backupFileSet(s, targetdir, excludes, since)
516
517 logfile.info("Stopped: " + datetime.datetime.now().ctime())
518
519 # Rename backup directory to final name
520 os.rename( targetdir, os.path.join(basedir, dirname) )
521
522 # We made a full backup -- recall checksum of config
523 if mode == "full":
524 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
525 f.write( self.conf.checksum )
526 f.close()
527
528
529
530 def prune(self):
531 """Prune old backup files"""
532
533 allDirs = sorted(self.listAllDirs())
534 # Collect all directories not matching backup name
535 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
536
537 # Get all directories which are kept
538 backups = self.listOldBackups()
539 keepdirs = []
540 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
541 key=lambda b : b.date, reverse=True)) for e in self.conf.getRealEpochsSorted() }
542 for e in byepoch:
543 epoch = self.conf.epochs[e]
544 old = byepoch[e][epoch.numkeeps:]
545 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
546
547
548 logging.info("List of stale/outdated entries:")
549 for d in allDirs:
550 msg = ""
551 if d in removeDirs:
552 msg = "[*] "
553 else:
554 msg = "[ ] "
555
556 if Backup.isBackupDir(d):
557 msg += Backup.fromDirName(d).colAlignedString()
558 else:
559 msg += d
560
561 logging.info(msg)
562
563 # Check that dirs to be removed is in list of all dirs
564 for d in removeDirs:
565 assert( d in allDirs )
566
567 if len(removeDirs) == 0:
568 logging.info("No stale/outdated entries to remove.")
569 return
570
571 basedir = self.conf.backupdir
572 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
573 if yesno == "y":
574 for d in removeDirs:
575 try:
576 shutil.rmtree(os.path.join(basedir, d))
577 except OSError as e:
578 logging.error("Error when removing '%s': %s" % (d,e.strerror) )
579
580
581 def ask_user_yesno(self, question):
582 if LogConf.con.level <= logging.INFO:
583 return input(question)
584 else:
585 return "y"
586
587
588 def printUsage():
589 """Print --help text"""
590
591 print("sitarba - a simple backup solution.")
592 print("")
593 print("Usage:")
594 print(" " + sys.argv[0] + " {options} [cmd]")
595 print(" " + sys.argv[0] + " --help")
596 print("")
597 print("Commands:")
598 print(" backup make a new backup, if necessary")
599 print(" list list all backups (default)")
600 print(" prune prune outdated/old backups")
601 print("")
602 print("Options:")
603 print(" -h, --help print this usage text")
604 print(" -c, --conf FILE use given configuration file")
605 print(" default: /etc/sitarba.conf")
606 print(" -e, --epoch EPOCH force to create backup for given epoch, which")
607 print(" can be 'sporadic' or one of the configured epochs")
608 print(" -m, --mode MODE override mode: full, diff, or incr")
609 print(" -v, --verbose be more verbose and interact with user")
610 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
611 print(" error, warning, info, debug")
612 print(" -V, --version print version info")
613
614
615
616 class LogConf:
617 """Encapsulates logging configuration"""
618
619 con = logging.StreamHandler(sys.stderr)
620
621 @classmethod
622 def setup(cls):
623 """Setup logging system"""
624 conlog = logging.getLogger()
625 conlog.setLevel(logging.DEBUG)
626
627 cls.con.setLevel(logging.WARNING)
628 conlog.addHandler(cls.con)
629
630 fillog = logging.getLogger("backuplog")
631 fillog.setLevel(logging.DEBUG)
632
633
634 if __name__ == "__main__":
635
636 LogConf.setup()
637
638 conffn = "/etc/sitarba.conf"
639 cmd = "list"
640 mode = None
641 epoch = None
642
643 i = 0
644 while i < len(sys.argv)-1:
645 i += 1
646 opt = sys.argv[i]
647
648 if opt in ["-h", "--help"]:
649 printUsage()
650 exit(0)
651
652 elif opt in ["-c", "--conf"]:
653 i += 1
654 conffn = sys.argv[i]
655
656 elif opt in ["-V", "--version"]:
657 print("sitarba " + __version__)
658 exit(0)
659
660 elif opt in ["-v", "--verbose"]:
661 LogConf.con.setLevel(logging.INFO)
662
663 elif opt in ["--verbosity"]:
664 i += 1
665 level = sys.argv[i]
666 numlevel = getattr(logging, level.upper(), None)
667 if not isinstance(numlevel, int):
668 raise ValueError('Invalid verbosity level: %s' % level)
669 LogConf.con.setLevel(numlevel)
670
671 elif opt in ["-m", "--mode"]:
672 i += 1
673 mode = sys.argv[i]
674 if not mode in Modes:
675 logging.error("Unknown mode '" + mode + "'.")
676 exit(1)
677
678 elif opt in ["-e", "--epoch"]:
679 i += 1
680 epoch = sys.argv[i]
681
682 elif opt in ["backup", "list", "prune"]:
683 cmd = opt
684
685 else:
686 logging.error("Unknown option: " + opt)
687 exit(1)
688
689 try:
690 man = BackupManager(conffn)
691
692 logging.debug("Config: " + str(man.conf))
693
694 if epoch!=None and not epoch in man.conf.epochs.keys():
695 logging.error("Unknown epoch '" + epoch + "'.")
696 exit(1)
697
698 if cmd == "backup":
699 man.backup(epoch, mode)
700
701 if cmd == "list":
702 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
703 print(b.colAlignedString())
704
705 if cmd == "prune":
706 man.prune()
707
708 except (Config.ReadError, configparser.Error) as e:
709 logging.error("Error: " + e.message)
710
711
712
713