print age of backups
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 __version__ = "0.1"
5 __author__ = "Stefan Huber"
6
7 import datetime
8 import os, shutil, sys
9 import configparser
10 import hashlib
11 import subprocess, fcntl, select
12 import random, re
13 import logging
14
15
16 Mode = ["full", "incr", "diff"]
17
18 RealEpoch = { \
19 "hour" : datetime.timedelta(0, 3600), \
20 "day" : datetime.timedelta(1), \
21 "week" : datetime.timedelta(7), \
22 "month" : datetime.timedelta(30), \
23 "year" : datetime.timedelta(365) }
24
25 Epoch = dict(RealEpoch, **{ \
26 "sporadic" : datetime.timedelta(0,0) \
27 })
28
29
30 class Backup:
31 """A single backup has a date, an epoch and a mode."""
32
33 def __init__(self, date, epoch, mode):
34 self.date = date
35 self.epoch = epoch
36 self.mode = mode
37
38 @staticmethod
39 def fromDirName(dirname):
40 [strdate, strtime, epoch, mode] = dirname.split("-")
41
42 if not epoch in Epoch.keys():
43 raise ValueError("Invalid epoch: " + epoch)
44
45 if not mode in Mode:
46 raise ValueError("Invalid mode: " + mode)
47
48 date = datetime.datetime(int(strdate[0:4]),
49 int(strdate[4:6]), int(strdate[6:8]),\
50 int(strtime[0:2]), int(strtime[2:4]))
51
52 return Backup(date, epoch, mode)
53
54 def __str__(self):
55 return "[date: " + self.date.ctime() + \
56 ", epoch: " + self.epoch + \
57 ", mode: " + self.mode + "]"
58
59 def colAlignedString(self):
60 age = datetime.datetime.now() - self.date
61 total_hours = age.total_seconds()/3600
62 if total_hours <= 48:
63 agestr = "(%s h)" % int(total_hours)
64 else:
65 agestr = "(%s d)" % age.days
66 return "%16s %7s %8s %4s" % ( \
67 self.date.strftime("%Y-%m-%d %H:%M"), agestr, self.epoch, self.mode)
68
69 @staticmethod
70 def getDirName(date, epoch, mode):
71 """Get directory name of backup by given properties."""
72 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
73
74 @staticmethod
75 def isBackupDir(dirname):
76 """Is directory a backup directory?"""
77 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
78 return p.match(dirname)
79
80
81
82 class Config:
83 """Encapsules the configuration for the backup program."""
84
85 class ReadError(RuntimeError):
86 """An exception raised when reading configurations."""
87 def __init__(self, value):
88 self.value = value
89 self.message = value
90
91 class FileSet:
92 """A fileset has a name and a list of directories."""
93 def __init__(self, name, dirs):
94 self.name = name
95 self.dirs = dirs
96
97 def __str__(self):
98 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
99
100 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
101
102 # Filename where checksum of config is saved
103 checksumfn = "checksum"
104
105 def __init__(self):
106 self.directory = "/media/backup"
107 self.format = self.formats[0]
108 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
109 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
110 self.exclpatterns = []
111 self.sets = []
112 self.checksum = None
113 self.lastchecksum = None
114
115 def __str__(self):
116 return "[directory: " + self.directory + \
117 ", format: " + self.format + \
118 ", keeps: " + str(self.epochkeeps) + \
119 ", modes: " + str(self.epochmodes) + \
120 ", exclpatterns: " + str(self.exclpatterns) + \
121 ", sets: " + str([str(s) for s in self.sets]) + "]"
122
123 def read(self, filename):
124 """Read configuration from file"""
125
126 if not os.path.isfile(filename):
127 raise Config.ReadError("Cannot read config file '" + filename + "'.")
128
129 config = configparser.RawConfigParser()
130 config.read(filename)
131
132 for reqsec in ["destination"]:
133 if not config.has_section(reqsec):
134 raise Config.ReadError("Section '" + reqsec + "' is missing.")
135
136 self.directory = config.get("destination", "directory")
137 if not os.path.isdir(self.directory):
138 raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
139
140 self.format = config.get("destination", "format")
141 if not self.format in Config.formats:
142 raise Config.ReadError("Invalid 'format' given.")
143
144
145 if config.has_section("history"):
146 for opt in config.options("history"):
147 if opt.startswith("keep"):
148 epoch = opt[4:]
149 if not epoch in RealEpoch.keys():
150 raise Config.ReadError("Invalid option 'keep" + epoch + "'.")
151 try:
152 self.epochkeeps[epoch] = int(config.getint("history", opt))
153 except ValueError:
154 raise Config.ReadError("Invalid integer given for '" + opt + "'.")
155 elif opt.startswith("mode"):
156 epoch = opt[4:]
157 if not epoch in RealEpoch.keys():
158 raise Config.ReadError("Invalid option 'mode" + epoch + "'.")
159 self.epochmodes[epoch] = config.get("history", opt)
160 if not self.epochmodes[epoch] in Mode:
161 raise Config.ReadError("Invalid mode given.")
162 else:
163 raise Config.ReadError("Invalid option '" + opt + "'.")
164
165 if config.has_section("input"):
166 for opt in config.options("input"):
167 if opt.startswith("exclude"):
168 self.exclpatterns += [ config.get("input", opt) ]
169 else:
170 raise Config.ReadError("Invalid option '" + opt + "'.")
171
172 for sec in config.sections():
173 if sec in ["destination", "history", "input"]:
174 continue
175 elif sec.startswith("set "):
176 name = sec[4:].strip()
177 dirs = []
178
179 for opt in config.options(sec):
180 if not opt.startswith("dir"):
181 raise Config.ReadError("Unknown option '" + opt + "'.")
182 else:
183 dirs += [config.get(sec, opt)]
184 self.sets += [Config.FileSet(name, dirs)]
185 else:
186 raise Config.ReadError("Unknown section '" + sec + "'.")
187
188 # Compute checksum of config file
189 m = hashlib.sha1()
190 f = open(filename, 'rb')
191 try:
192 m.update(f.read())
193 self.checksum = m.hexdigest()
194 finally:
195 f.close()
196
197 try:
198 f = open(os.path.join(self.directory, self.checksumfn), 'r')
199 self.lastchecksum = f.read().strip()
200 f.close()
201 except IOError:
202 self.lastchecksum = None
203
204
205 class BackupManager:
206 """List and create backups"""
207
208 def __init__(self, conffn):
209 self.conf = Config()
210 self.conf.read(conffn)
211
212
213 def listAllDirs(self):
214 """List all dirs in destination directory"""
215
216 # Get all entries
217 basedir = self.conf.directory
218 dirs = os.listdir(basedir)
219 # Filter directories
220 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
221
222
223 def listOldBackups(self):
224 """Returns a list of old backups."""
225
226 backups = []
227
228 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
229 backups += [ Backup.fromDirName(entry) ]
230
231 return backups
232
233
234 def getDesiredEpoch(self, backups, now):
235 """Get desired epoch based on self.configuration and list of old backups"""
236
237 # Find the longest epoch for which we would like the make a backup
238 latest = datetime.datetime(1900, 1, 1)
239 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
240 # We make backups of that epoch
241 if self.conf.epochkeeps[e] == 0:
242 continue
243
244 # Get backups of that epoch
245 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
246 key=lambda b: b.date))
247
248 # If there are any, determine the latest
249 if len(byepoch) > 0:
250 latest = max(latest, byepoch[-1].date )
251
252 # the latest backup is too old
253 if now-latest > timespan:
254 return e
255
256 # No backup is to be made
257 return None
258
259
260
261 def backupFileSet(self, fileset, targetdir, since=None):
262 """Create an archive for given fileset at given target directory."""
263
264 logfile = logging.getLogger('backuplog')
265 logfile.info("Running file set: " + fileset.name)
266
267 tarpath = "/bin/tar"
268 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
269
270 taropts = []
271
272 # Add the since date, if given
273 if since != None:
274 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
275
276 # Add the exclude patterns
277 for pat in self.conf.exclpatterns:
278 taropts += ["--exclude", pat]
279
280 # Adding directories to backup
281 taropts += ["-C", "/"] + [ "./" + d.lstrip("/") for d in fileset.dirs]
282
283 # Launch the tar process
284 tarargs = [tarpath] + ["-cpvaf", fsfn] + taropts
285 logfile.debug("tar call: " + " ".join(tarargs))
286 tarp = subprocess.Popen( tarargs, bufsize=-1, \
287 stdout=subprocess.PIPE, stderr=subprocess.PIPE )
288
289 # Change tarp's stdout and stderr to non-blocking
290 for s in [tarp.stdout, tarp.stderr]:
291 fd = s.fileno()
292 fl = fcntl.fcntl(fd, fcntl.F_GETFL)
293 fcntl.fcntl(fd, fcntl.F_SETFL, fl | os.O_NONBLOCK)
294
295 # Read stdout and stderr of tarp
296 errmsg = b""
297 while tarp.poll() == None:
298 rd,wr,ex = select.select([tarp.stdout, tarp.stderr], [], [], 0.05)
299 if tarp.stdout in rd:
300 logging.debug( tarp.stdout.readline()[:-1].decode() )
301 if tarp.stderr in rd:
302 errmsg += tarp.stderr.read()
303
304 # Get the remainging output of tarp
305 for l in tarp.stdout.readlines():
306 logging.debug(l.decode().rstrip())
307 errmsg += tarp.stderr.read()
308
309 # Get return code of tarp
310 rett = tarp.wait()
311 if rett != 0:
312 for l in errmsg.decode().split("\n"):
313 logfile.error(l)
314 logfile.error(tarpath + " returned with exit status " + str(rett) + ".")
315
316
317 def backup(self, epoch=None, mode=None):
318 """Make a new backup, if necessary. If epoch is None then determine
319 desired epoch automatically. Use given epoch otherwise. If mode is None
320 then use mode for given epoch. Use given mode otherwise."""
321
322 now = datetime.datetime.now()
323 oldbackups = self.listOldBackups()
324
325 # Get epoch of backup
326 if epoch == None:
327 epoch = self.getDesiredEpoch(oldbackups, now)
328 if epoch == None:
329 logging.info("No backup planned.")
330 return
331
332 # Get mode of backup
333 if mode == None:
334 mode = self.conf.epochmodes[epoch]
335 logging.info("Making a backup. Epoch: " + epoch + ", mode: " + mode)
336
337 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
338
339 # No old full backups existing
340 if mode != "full" and len(oldfullbackups)==0:
341 logging.info("No full backups existing. Making a full backup.")
342
343 # Checksum changed -> self.config file changed
344 if self.conf.checksum != self.conf.lastchecksum and mode != "full":
345 logging.warning("Full backup recommended as config file has changed.")
346
347
348 # If we have a full backup, we backup everything
349 since = None
350 if mode == "diff":
351 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
352 elif mode == "incr":
353 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
354
355 if since != None:
356 logging.debug("Making backup relative to " + since.ctime())
357
358 yesno = self.ask_user_yesno("Proceed? [Y, n] ")
359 if yesno == "n":
360 return
361
362 # Create new target directory
363 basedir = self.conf.directory
364 dirname = Backup.getDirName(now, epoch, mode)
365 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
366 targetdir = os.path.join(basedir, tmpdirname)
367 os.mkdir( targetdir )
368
369
370 # Add file logger
371 logfile = logging.getLogger("backuplog")
372 fil = logging.FileHandler( os.path.join(targetdir, "log") )
373 fil.setLevel(logging.DEBUG)
374 logfile.addHandler(fil)
375
376 logfile.info("Started: " + now.ctime())
377
378 # Backup all file sets
379 for s in self.conf.sets:
380 self.backupFileSet(s, targetdir, since)
381
382 logfile.info("Stopped: " + datetime.datetime.now().ctime())
383
384 # Rename backup directory to final name
385 os.rename( targetdir, os.path.join(basedir, dirname) )
386
387 # We made a full backup -- recall checksum of config
388 if mode == "full":
389 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
390 f.write( self.conf.checksum )
391 f.close()
392
393
394
395 def prune(self):
396 """Prune old backup files"""
397
398 allDirs = sorted(self.listAllDirs())
399 # Collect all directories not matching backup name
400 removeDirs = [ d for d in allDirs if not Backup.isBackupDir(d) ]
401
402 # Get all directories which are kept
403 backups = self.listOldBackups()
404 keepdirs = []
405 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
406 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
407 for e in byepoch:
408 keep = self.conf.epochkeeps[e]
409 old = byepoch[e][keep:]
410 removeDirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
411
412
413 logging.info("List of stale/outdated entries:")
414 for d in allDirs:
415 msg = ""
416 if d in removeDirs:
417 msg = "[*] "
418 else:
419 msg = "[ ] "
420
421 if Backup.isBackupDir(d):
422 msg += Backup.fromDirName(d).colAlignedString()
423 else:
424 msg += d
425
426 logging.info(msg)
427
428 # Check that dirs to be removed is in list of all dirs
429 for d in removeDirs:
430 assert( d in allDirs )
431
432 if len(removeDirs) == 0:
433 logging.info("No stale/outdated entries to remove.")
434 return
435
436 basedir = self.conf.directory
437 yesno = self.ask_user_yesno("Remove entries marked by '*'? [y, N] ")
438 if yesno == "y":
439 for d in removeDirs:
440 try:
441 shutil.rmtree(os.path.join(basedir, d))
442 except OSError as e:
443 logging.error("Error when removing '%s': %s" % (d,e.strerror) )
444
445
446 def ask_user_yesno(self, question):
447 if LogConf.con.level <= logging.INFO:
448 return input(question)
449 else:
450 return "y"
451
452
453 def printUsage():
454 """Print --help text"""
455
456 print("shbackup - a simple backup solution.")
457 print("")
458 print("Usage:")
459 print(" " + sys.argv[0] + " {options} [cmd]")
460 print(" " + sys.argv[0] + " --help")
461 print("")
462 print("Commands:")
463 print(" backup make a new backup, if necessary")
464 print(" list list all backups (default)")
465 print(" prune prune outdated/old backups")
466 print("")
467 print("Options:")
468 print(" -h, --help print this usage text")
469 print(" -c, --conf <configfile> use given configuration file")
470 print(" default: /etc/shbackup.conf")
471 print(" -e, --epoch <epoch> force to create backup for given epoch:")
472 print(" year, month, week, day, hour, sporadic")
473 print(" -m, --mode <mode> override mode: full, diff, or incr")
474 print(" -v, --verbose be more verbose and interact with user")
475 print(" --verbosity LEVEL set verbosity to LEVEL, which can be")
476 print(" error, warning, info, debug")
477 print(" -V, --version print version info")
478
479
480
481 class LogConf:
482 """Encapsulates logging configuration"""
483
484 con = logging.StreamHandler(sys.stderr)
485
486 @classmethod
487 def setup(cls):
488 """Setup logging system"""
489 conlog = logging.getLogger()
490 conlog.setLevel(logging.DEBUG)
491
492 cls.con.setLevel(logging.WARNING)
493 conlog.addHandler(cls.con)
494
495 fillog = logging.getLogger("backuplog")
496 fillog.setLevel(logging.DEBUG)
497
498
499 if __name__ == "__main__":
500
501 LogConf.setup()
502
503 conffn = "/etc/shbackup.conf"
504 cmd = "list"
505 mode = None
506 epoch = None
507
508 i = 0
509 while i < len(sys.argv)-1:
510 i += 1
511 opt = sys.argv[i]
512
513 if opt in ["-h", "--help"]:
514 printUsage()
515 exit(0)
516
517 elif opt in ["-c", "--conf"]:
518 i += 1
519 conffn = sys.argv[i]
520
521 elif opt in ["-V", "--version"]:
522 print("shbackup " + __version__)
523 exit(0)
524
525 elif opt in ["-v", "--verbose"]:
526 LogConf.con.setLevel(logging.INFO)
527
528 elif opt in ["--verbosity"]:
529 i += 1
530 level = sys.argv[i]
531 numlevel = getattr(logging, level.upper(), None)
532 if not isinstance(numlevel, int):
533 raise ValueError('Invalid verbosity level: %s' % level)
534 LogConf.con.setLevel(numlevel)
535
536 elif opt in ["-m", "--mode"]:
537 i += 1
538 mode = sys.argv[i]
539 if not mode in Mode:
540 logging.error("Unknown mode '" + mode + "'.")
541 exit(1)
542
543 elif opt in ["-e", "--epoch"]:
544 i += 1
545 epoch = sys.argv[i]
546 if not epoch in Epoch:
547 logging.error("Unknown epoch '" + epoch + "'.")
548 exit(1)
549
550
551 elif opt in ["backup", "list", "prune"]:
552 cmd = opt
553
554 else:
555 logging.error("Unknown option: " + opt)
556 exit(1)
557
558 try:
559 man = BackupManager(conffn)
560
561 if cmd == "backup":
562 man.backup(epoch, mode)
563
564 if cmd == "list":
565 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
566 print(b.colAlignedString())
567
568 if cmd == "prune":
569 man.prune()
570
571 except (Config.ReadError, configparser.DuplicateOptionError) as e:
572 logging.error("Error reading config file: " + e.message)
573
574
575
576