Make epochs configurable with exclude patterns
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """A simple backup solution."""
3
4 __version__ = "0.1"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess, fcntl, select
12 import random, re
13 import logging
14
15
16 Modes = ["full", "incr", "diff"]
17
18 class Epoch:
19
20 units = {
21 "hour" : datetime.timedelta(0, 3600),
22 "day" : datetime.timedelta(1),
23 "week" : datetime.timedelta(7),
24 "month" : datetime.timedelta(31),
25 "year" : datetime.timedelta(365) }
26
27 def __init__(self, unit=None, mult=1, mode="full", numkeeps=None):
28 self.unit = unit
29 self.mult = mult
30 self.mode = mode
31 self.numkeeps = numkeeps
32 self.excludes = []
33
34 def __repr__(self):
35 return "[unit: " + repr(self.unit) + \
36 ", mult:" + repr(self.mult) + \
37 ", mode: " + repr(self.mode) + \
38 ", numkeeps: " + repr(self.numkeeps) + \
39 ", excludes: " + repr(self.excludes) + "]"
40
41 def getTimeDelta(self):
42 if self.unit == None:
43 return None
44 return self.mult*Epoch.units[self.unit]
45
46 def isRipe(self, oldest, now):
47
48 if self.unit==None:
49 return True
50
51 delta = now-oldest
52 mult = self.mult
53
54 if delta >= self.getTimeDelta():
55 return True
56
57 if self.unit == "hour":
58 return abs(now.hour - oldest.hour) >= mult
59 elif self.unit == "day":
60 return abs(now.day - oldest.day) >= mult
61 elif self.unit == "week":
62 return abs(now.isocalendar()[1] - oldest.isocalendar()[1]) >= mult
63 elif self.unit == "month":
64 return abs(now.month - oldest.month) >= mult
65 elif self.unit == "year":
66 return abs(now.year - oldest.year) >= mult
67
68 return None
69
70
71 @staticmethod
72 def parseTimedelta( deltastr ):
73 tokens = [ s.strip() for s in deltastr.split("*") ]
74 unit = None
75 mult = 1
76 if len(tokens) == 1:
77 unit = tokens[0]
78 elif len(tokens) == 2:
79 mult = int(tokens[0])
80 unit = tokens[1]
81 else:
82 raise ValueError("Invalid format: '{0}'".format(deltastr))
83
84 if not unit in Epoch.units:
85 raise ValueError("Unknown unit '{0}'".format(unit))
86
87 if mult <= 0:
88 raise ValueError("Non-positive factor '{0}' given.".format(mult))
89
90 return mult, unit
91
92
93
94 class FileSet:
95 """A fileset has a name and a list of directories."""
96 def __init__(self, name, dirs, excludes):
97 self.name = name
98 self.dirs = dirs
99 self.excludes = excludes
100
101 def __repr__(self):
102 return "[name: " + self.name + \
103 ", dirs: " + str(self.dirs) + \
104 ", excludes: " + str(self.excludes) + "]"
105
106
107 class Backup:
108 """A single backup has a date, an epoch and a mode."""
109
110 def __init__(self, date, epoch, mode):
111 self.date = date
112 self.epoch = epoch
113 self.mode = mode
114 self.excludes = []
115
116 @staticmethod
117 def fromDirName(dirname):
118 [strdate, strtime, epoch, mode] = dirname.split("-")
119
120 if not mode in Modes:
121 raise ValueError("Invalid mode: " + mode)
122
123 date = datetime.datetime(int(strdate[0:4]),
124 int(strdate[4:6]), int(strdate[6:8]),\
125 int(strtime[0:2]), int(strtime[2:4]))
126
127 return Backup(date, epoch, mode)
128
129 def __repr__(self):
130 return "[date: " + self.date.ctime() + \
131 ", epoch: " + self.epoch + \
132 ", mode: " + self.mode + "]"
133
134 def colAlignedString(self):
135 age = datetime.datetime.now() - self.date
136 total_hours = age.total_seconds()/3600
137 if total_hours <= 48:
138 agestr = "(%s h)" % int(total_hours)
139 else:
140 agestr = "(%s d)" % age.days
141 return "%16s %7s %10s %4s" % (
142 self.date.strftime("%Y-%m-%d %H:%M"), agestr,
143 self.epoch, self.mode)
144
145 @staticmethod
146 def getDirName(date, epoch, mode):
147 """Get directory name of backup by given properties."""
148 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
149
150 @staticmethod
151 def isBackupDir(dirname):
152 """Is directory a backup directory?"""
153 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
154 return p.match(dirname)
155
156
157
158 class Config:
159 """Encapsules the configuration for the backup program."""
160
161 class ReadError(RuntimeError):
162 """An exception raised when reading configurations."""
163 def __init__(self, value):
164 self.value = value
165 self.message = value
166
167
168 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
169
170 # Filename where checksum of config is saved
171 checksumfn = "checksum"
172
173 def __init__(self):
174 self.directory = None
175 self.format = self.formats[0]
176 self.excludes = []
177 self.sets = []
178 self.checksum = None
179 self.lastchecksum = None
180 self.epochs = Epochs = { "sporadic" : Epoch() }
181
182
183 def __repr__(self):
184 return "[directory: " + self.directory + \
185 ", format: " + self.format + \
186 ", excludes: " + repr(self.excludes) + \
187 ", epochs: " + repr(self.epochs) + \
188 ", sets: " + repr(self.sets) + "]"
189
190 def getRealEpochsSorted(self):
191 """Return all epochs with have a non-None unit, sorted by
192 Epoch.getTimeDelta(), starting with the longest dela."""
193 epochs = self.epochs
194 realepochs = [ e for e in epochs.keys() if epochs[e].unit != None ]
195 deltakey = lambda e: epochs[e].getTimeDelta()
196 realepochs.sort(key=deltakey, reverse=True)
197 return realepochs
198
199
200 def _read_destination(self, config, sec):
201 for opt in config.options(sec):
202 if opt=="directory":
203 self.directory = config.get(sec, opt)
204 if not os.path.isdir(self.directory):
205 raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
206 elif opt=="format":
207 self.format = config.get(sec, opt)
208 if not self.format in Config.formats:
209 raise Config.ReadError("Invalid 'format' given.")
210 else:
211 raise Config.ReadError("Unknown option '{0}'.".format(opt))
212
213
214 def _read_global(self, config, sec):
215 for opt in config.options(sec):
216 if opt.startswith("exclude"):
217 self.excludes += [ config.get(sec, opt) ]
218 else:
219 raise Config.ReadError("Unknown option '{0}'.".format(opt))
220
221
222 def _read_epoch(self, config, sec):
223 name = sec[6:].strip()
224 e = Epoch()
225 if name in self.epochs:
226 raise Config.ReadError("Epoch '{0}' already defined.".format(name))
227 if name in Epoch.units:
228 e.unit = name
229
230 for opt in config.options(sec):
231 if opt=="numkeeps":
232 try:
233 e.numkeeps = int(config.getint(sec, opt))
234 except ValueError:
235 raise Config.ReadError("Invalid integer given for '{0}'.".format(opt))
236 if e.numkeeps <= 0:
237 raise Config.ReadError("Non-positive numkeeps '{0}' given.".format(e.numkeeps))
238
239 elif opt=="mode":
240 e.mode = config.get(sec, opt)
241 if not e.mode in Modes:
242 raise Config.ReadError("Invalid mode '{0}'.".format(e.mode))
243
244 elif opt=="timedelta":
245 if name in Epoch.units:
246 raise Config.ReadError("The time delta of a standard epoch " + \
247 "is not supposed to be redefined. ")
248 td = config.get(sec,opt)
249 try:
250 mult, unit = Epoch.parseTimedelta(td)
251 e.unit = unit
252 e.mult = mult
253 except ValueError as e:
254 raise Config.ReadError("Invalid timedelta '{0}': {1}".format(td, str(e)))
255
256 elif opt.startswith("exclude"):
257 e.excludes += [config.get(sec, opt)]
258
259 else:
260 raise Config.ReadError("Unknown option '" + opt + "'.")
261
262 if e.numkeeps == None:
263 raise Config.ReadError("No numkeeps set for epoch '{0}'.".format(name))
264
265 self.epochs[name] = e
266
267
268 def _read_set(self, config, sec):
269 name = sec[4:].strip()
270 dirs = []
271 excludes = []
272
273 for opt in config.options(sec):
274 if opt.startswith("dir"):
275 dirs += [config.get(sec, opt)]
276 elif opt.startswith("exclude"):
277 excludes += [config.get(sec,opt)]
278 else:
279 raise Config.ReadError("Unknown option '" + opt + "'.")
280
281 self.sets += [FileSet(name, dirs, excludes)]
282
283
284 def read(self, filename):
285 """Read configuration from file"""
286
287 if not os.path.isfile(filename):
288 raise Config.ReadError("Cannot read config file '" + filename + "'.")
289
290 config = configparser.RawConfigParser()
291 config.read(filename)
292
293 for reqsec in ["destination"]:
294 if not config.has_section(reqsec):
295 raise Config.ReadError("Mandatory section '" + reqsec + "' is missing.")
296
297 for sec in config.sections():
298
299 if sec=="destination":
300 self._read_destination(config, sec)
301
302 elif sec=="global":
303 self._read_global(config, sec)
304
305 elif sec.startswith("epoch "):
306 self._read_epoch(config, sec)
307
308 elif sec.startswith("set "):
309 self._read_set(config, sec)
310
311 else:
312 raise Config.ReadError("Unknown section '" + sec + "'.")
313
314 if self.directory == None:
315 raise Config.ReadError("No destination directory set.")
316
317
318 # Compute checksum of config file
319 m = hashlib.sha1()
320 f = open(filename, 'rb')
321 try:
322 m.update(f.read())
323 self.checksum = m.hexdigest()
324 finally:
325 f.close()
326
327 try:
328 f = open(os.path.join(self.directory, self.checksumfn), 'r')
329 self.lastchecksum = f.read().strip()
330 f.close()
331 except IOError:
332 self.lastchecksum = None
333
334
335 class BackupManager:
336 """List and create backups"""
337
338 def __init__(self, conffn):
339 self.conf = Config()
340 self.conf.read(conffn)
341
342
343 def listAllDirs(self):
344 """List all dirs in destination directory"""
345
346 # Get all entries
347 basedir = self.conf.directory
348 dirs = os.listdir(basedir)
349 # Filter directories
350 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
351
352
353 def listOldBackups(self):
354 """Returns a list of old backups."""
355
356 backups = []
357
358 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
359 backups += [ Backup.fromDirName(entry) ]
360
361 return backups
362
363
364 def getDesiredEpochs(self, backups, now):
365 """Get desired epoch based on self.configuration and list of old backups"""
366
367 # Find the longest epoch for which we would like the make a backup
368 latest = datetime.datetime(1900, 1, 1)
369 for e in self.conf.getRealEpochsSorted():
370 epoch = self.conf.epochs[e]
371 if epoch.numkeeps <= 0:
372 continue
373
374 # Get backups of that epoch
375 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
376 key=lambda b: b.date))
377
378 # If there are any, determine the latest
379 if len(byepoch) > 0:
380 latest = max(latest, byepoch[-1].date )
381
382 if epoch.isRipe(latest, now):
383 return e
384
385 # No backup is to be made
386 return None
387
388
389
390 def backupFileSet(self, fileset, targetdir, excludes, since=None):
391 """Create an archive for given fileset at given target directory."""
392
393 logfile = logging.getLogger('backuplog')
394 logfile.info("Running file set: " + fileset.name)
395
396 tarpath = "/bin/tar"
397 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
398
399 taropts = []
400
401 # Add the since date, if given
402 if since != None:
403 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
404
405 # Add the exclude patterns
406 for pat in excludes:
407 taropts += ["--exclude", pat]
408
409 #Add exclude patterns from fileset
410 for pat in fileset.excludes:
411 taropts += ["--exclude", pat]
412
413 # Adding directories to backup
414 taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
415
416 # Launch the tar process
417 tarargs = [tarpath] + ["-cpvaf", fsfn] + taropts
418 logfile.debug("tar call: " + " ".join(tarargs))
419 tarp = subprocess.Popen( tarargs, bufsize=-1, \
420 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
421
422 # Change tarp's stdout and stderr to non-blocking
423 for s in [tarp.stdout, tarp.stderr]:
424 fd = s.fileno()
425 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
426 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
427
428 # Read stdout and stderr of tarp
429 errmsg = b""
430 while tarp.poll() == None:
431 rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
432 if tarp.stdout in rd:
433 logging.debug( tarp.stdout.readline()[:-1].decode() )
434 if tarp.stderr in rd:
435 errmsg += tarp.stderr.read()
436
437 # Get the remainging output of tarp
438 for l in tarp.stdout.readlines():
439 logging.debug(l.decode().rstrip())
440 errmsg += tarp.stderr.read()
441
442 # Get return code of tarp
443 rett = tarp.wait()
444 if rett != 0:
445 for l in errmsg.decode().split("\n"):
446 logfile.error(l)
447 logfile.error(tarpath + " returned with exit status " + str(rett) + ".")
448
449
450 def backup(self, epoch=None, mode=None):
451 """Make a new backup, if necessary. If epoch is None then determine
452 desired epoch automatically. Use given epoch otherwise. If mode is None
453 then use mode for given epoch. Use given mode otherwise."""
454
455 now = datetime.datetime.now()
456 oldbackups = self.listOldBackups()
457
458 # Get epoch of backup
459 if epoch == None:
460 epoch = self.getDesiredEpochs(oldbackups, now)
461 if epoch == None:
462 logging.info("No backup planned.")
463 return
464
465 # Get mode of backup
466 if mode == None:
467 mode = self.conf.epochs[epoch].mode
468 logging.info("Making a backup. Epochs: " + epoch + ", mode: " + mode)
469
470 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
471
472 # No old full backups existing
473 if mode != "full" and len(oldfullbackups)==0:
474 logging.info("No full backups existing. Making a full backup.")
475
476 # Checksum changed -> self.config file changed
477 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
478 logging.warning("Full backup recommended as config file has changed.")
479
480
481 # If we have a full backup, we backup everything
482 since = None
483 if mode == "diff":
484 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
485 elif mode == "incr":
486 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
487
488 if since != None:
489 logging.debug("Making backup relative to " + since.ctime())
490
491 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
492 if yesno == "n":
493 return
494
495 # Create new target directory
496 basedir = self.conf.directory
497 dirname = Backup.getDirName(now, epoch, mode)
498 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
499 targetdir = os.path.join(basedir, tmpdirname)
500 os.mkdir( targetdir )
501
502
503 # Add file logger
504 logfile = logging.getLogger("backuplog")
505 fil = logging.FileHandler( os.path.join(targetdir, "log") )
506 fil.setLevel(logging.DEBUG)
507 logfile.addHandler(fil)
508
509 logfile.info("Started: " + now.ctime())
510
511 # Backup all file sets
512 for s in self.conf.sets:
513 excludes = self.conf.excludes + self.conf.epochs[epoch].excludes
514 self.backupFileSet(s, targetdir, excludes, since)
515
516 logfile.info("Stopped: " + datetime.datetime.now().ctime())
517
518 # Rename backup directory to final name
519 os.rename( targetdir, os.path.join(basedir, dirname) )
520
521 # We made a full backup -- recall checksum of config
522 if mode == "full":
523 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
524 f.write( self.conf.checksum )
525 f.close()
526
527
528
529 def prune(self):
530 """Prune old backup files"""
531
532 allDirs = sorted(self.listAllDirs())
533 # Collect all directories not matching backup name
534 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
535
536 # Get all directories which are kept
537 backups = self.listOldBackups()
538 keepdirs = []
539 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
540 key=lambda b : b.date, reverse=True)) for e in self.conf.getRealEpochsSorted() }
541 for e in byepoch:
542 epoch = self.conf.epochs[e]
543 old = byepoch[e][epoch.numkeeps:]
544 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
545
546
547 logging.info("List of stale/outdated entries:")
548 for d in allDirs:
549 msg = ""
550 if d in removeDirs:
551 msg = "[*] "
552 else:
553 msg = "[ ] "
554
555 if Backup.isBackupDir(d):
556 msg += Backup.fromDirName(d).colAlignedString()
557 else:
558 msg += d
559
560 logging.info(msg)
561
562 # Check that dirs to be removed is in list of all dirs
563 for d in removeDirs:
564 assert( d in allDirs )
565
566 if len(removeDirs) == 0:
567 logging.info("No stale/outdated entries to remove.")
568 return
569
570 basedir = self.conf.directory
571 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
572 if yesno == "y":
573 for d in removeDirs:
574 try:
575 shutil.rmtree(os.path.join(basedir, d))
576 except OSError as e:
577 logging.error("Error when removing '%s': %s" % (d,e.strerror) )
578
579
580 def ask_user_yesno(self, question):
581 if LogConf.con.level <= logging.INFO:
582 return input(question)
583 else:
584 return "y"
585
586
587 def printUsage():
588 """Print --help text"""
589
590 print("shbackup - a simple backup solution.")
591 print("")
592 print("Usage:")
593 print(" " + sys.argv[0] + " {options} [cmd]")
594 print(" " + sys.argv[0] + " --help")
595 print("")
596 print("Commands:")
597 print(" backup make a new backup, if necessary")
598 print(" list list all backups (default)")
599 print(" prune prune outdated/old backups")
600 print("")
601 print("Options:")
602 print(" -h, --help print this usage text")
603 print(" -c, --conf <configfile> use given configuration file")
604 print(" default: /etc/shbackup.conf")
605 print(" -e, --epoch <epoch> force to create backup for given epoch:")
606 print(" year, month, week, day, hour, sporadic")
607 print(" -m, --mode <mode> override mode: full, diff, or incr")
608 print(" -v, --verbose be more verbose and interact with user")
609 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
610 print(" error, warning, info, debug")
611 print(" -V, --version print version info")
612
613
614
615 class LogConf:
616 """Encapsulates logging configuration"""
617
618 con = logging.StreamHandler(sys.stderr)
619
620 @classmethod
621 def setup(cls):
622 """Setup logging system"""
623 conlog = logging.getLogger()
624 conlog.setLevel(logging.DEBUG)
625
626 cls.con.setLevel(logging.WARNING)
627 conlog.addHandler(cls.con)
628
629 fillog = logging.getLogger("backuplog")
630 fillog.setLevel(logging.DEBUG)
631
632
633 if __name__ == "__main__":
634
635 LogConf.setup()
636
637 conffn = "/etc/shbackup.conf"
638 cmd = "list"
639 mode = None
640 epoch = None
641
642 i = 0
643 while i < len(sys.argv)-1:
644 i += 1
645 opt = sys.argv[i]
646
647 if opt in ["-h", "--help"]:
648 printUsage()
649 exit(0)
650
651 elif opt in ["-c", "--conf"]:
652 i += 1
653 conffn = sys.argv[i]
654
655 elif opt in ["-V", "--version"]:
656 print("shbackup " + __version__)
657 exit(0)
658
659 elif opt in ["-v", "--verbose"]:
660 LogConf.con.setLevel(logging.INFO)
661
662 elif opt in ["--verbosity"]:
663 i += 1
664 level = sys.argv[i]
665 numlevel = getattr(logging, level.upper(), None)
666 if not isinstance(numlevel, int):
667 raise ValueError('Invalid verbosity level: %s' % level)
668 LogConf.con.setLevel(numlevel)
669
670 elif opt in ["-m", "--mode"]:
671 i += 1
672 mode = sys.argv[i]
673 if not mode in Modes:
674 logging.error("Unknown mode '" + mode + "'.")
675 exit(1)
676
677 elif opt in ["-e", "--epoch"]:
678 i += 1
679 epoch = sys.argv[i]
680
681 elif opt in ["backup", "list", "prune"]:
682 cmd = opt
683
684 else:
685 logging.error("Unknown option: " + opt)
686 exit(1)
687
688 try:
689 man = BackupManager(conffn)
690
691 logging.debug("Config: " + str(man.conf))
692
693 if epoch!=None and not epoch in man.conf.epochs.keys():
694 logging.error("Unknown epoch '" + epoch + "'.")
695 exit(1)
696
697 if cmd == "backup":
698 man.backup(epoch, mode)
699
700 if cmd == "list":
701 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
702 print(b.colAlignedString())
703
704 if cmd == "prune":
705 man.prune()
706
707 except (Config.ReadError, configparser.Error) as e:
708 logging.error("Error: " + e.message)
709
710
711
712