adding man page, sample conf, makefile
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 import datetime
5 import os, shutil, sys
6 import configparser
7 import hashlib
8 import subprocess
9 import random, re
10
11
12 Mode = ["full", "incr", "diff"]
13
14 RealEpoch = { \
15 "hour" : datetime.timedelta(0, 3600), \
16 "day" : datetime.timedelta(1), \
17 "week" : datetime.timedelta(7), \
18 "month" : datetime.timedelta(30), \
19 "year" : datetime.timedelta(365) }
20
21 Epoch = dict(RealEpoch, **{ \
22 "sporadic" : datetime.timedelta(0,0) \
23 })
24
25
26 class Backup:
27 """A single backup has a date, an epoch and a mode."""
28
29 def __init__(self, date, epoch, mode):
30 self.date = date
31 self.epoch = epoch
32 self.mode = mode
33
34 def __str__(self):
35 return "[date: " + self.date.ctime() + \
36 ", epoch: " + self.epoch + \
37 ", mode: " + self.mode + "]"
38
39 @staticmethod
40 def getDirName(date, epoch, mode):
41 """Get directory name of backup by given properties."""
42 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
43
44 @staticmethod
45 def isBackupDir(dirname):
46 """Is directory a backup directory?"""
47 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
48 return p.match(dirname)
49
50
51
52 class Config:
53 """Encapsules the configuration for the backup program."""
54
55 class ReadException(Exception):
56 """An exception raised when reading configurations."""
57 pass
58
59 class FileSet:
60 """A fileset has a name and a list of directories."""
61 def __init__(self, name, dirs):
62 self.name = name
63 self.dirs = dirs
64
65 def __str__(self):
66 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
67
68 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
69
70 # Filename where checksum of config is saved
71 checksumfn = "checksum"
72
73 def __init__(self):
74 self.directory = "/media/backup"
75 self.format = self.formats[0]
76 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
77 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
78 self.exclpatterns = []
79 self.sets = []
80 self.checksum = None
81 self.lastchecksum = None
82
83 def __str__(self):
84 return "[directory: " + self.directory + \
85 ", format: " + self.format + \
86 ", keeps: " + str(self.epochkeeps) + \
87 ", modes: " + str(self.epochmodes) + \
88 ", exclpatterns: " + str(self.exclpatterns) + \
89 ", sets: " + str([str(s) for s in self.sets]) + "]"
90
91 def read(self, filename):
92 """Read configuration from file"""
93
94 if not os.path.isfile(filename):
95 raise Config.ReadException("No file '" + filename + "'.")
96
97 config = configparser.RawConfigParser()
98 config.read(filename)
99
100 for reqsec in ["destination"]:
101 if not config.has_section(reqsec):
102 raise Config.ReadException("Section '" + reqsec + "' is missing.")
103
104 self.directory = config.get("destination", "directory")
105
106 self.format = config.get("destination", "format")
107 if not self.format in Config.formats:
108 raise Config.ReadException("Invalid 'format' given.")
109
110
111 if config.has_section("history"):
112 for opt in config.options("history"):
113 if opt.startswith("keep"):
114 epoch = opt[4:]
115 if not epoch in RealEpoch.keys():
116 raise Config.ReadException("Invalid option 'keep" + epoch + "'.")
117 self.epochkeeps[epoch] = int(config.getint("history", opt))
118 elif opt.startswith("mode"):
119 epoch = opt[4:]
120 if not epoch in RealEpoch.keys():
121 raise Config.ReadException("Invalid option 'mode" + epoch + "'.")
122 self.epochmodes[epoch] = config.get("history", opt)
123 if not self.epochmodes[epoch] in Mode:
124 raise Config.ReadException("Invalid mode given.")
125 else:
126 raise Config.ReadException("Invalid option '" + opt + "'.")
127
128 if config.has_section("input"):
129 for opt in config.options("input"):
130 if opt.startswith("exclude"):
131 self.exclpatterns += [ config.get("input", opt) ]
132 else:
133 raise Config.ReadException("Invalid option '" + opt + "'.")
134
135 for sec in config.sections():
136 if sec in ["destination", "history", "input"]:
137 continue
138 elif sec.startswith("set "):
139 name = sec[4:].strip()
140 dirs = []
141
142 for opt in config.options(sec):
143 if not opt.startswith("dir"):
144 raise Config.ReadException("Unknown option '" + opt + "'.")
145 else:
146 dirs += [config.get(sec, opt)]
147 self.sets += [Config.FileSet(name, dirs)]
148 else:
149 raise Config.ReadException("Unknown section '" + sec + "'.")
150
151 # Compute checksum of config file
152 m = hashlib.sha1()
153 f = open(filename, 'rb')
154 try:
155 m.update(f.read())
156 self.checksum = m.hexdigest()
157 finally:
158 f.close()
159
160 try:
161 f = open(os.path.join(self.directory, self.checksumfn), 'r')
162 self.lastchecksum = f.read().strip()
163 f.close()
164 except IOError:
165 self.lastchecksum = None
166
167
168 class BackupManager:
169 """List and create backups"""
170
171 def __init__(self, conffn):
172 self.conf = Config()
173 self.conf.read(conffn)
174
175
176 def listAllDirs(self):
177 """List all dirs in destination directory"""
178
179 # Get all entries
180 basedir = self.conf.directory
181 dirs = os.listdir(basedir)
182 # Filter directories
183 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
184
185
186 def listOldBackups(self):
187 """Returns a list of old backups."""
188
189 backups = []
190
191 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
192 [strdate, strtime, epoch, mode] = entry.split("-")
193
194 if not epoch in Epoch.keys():
195 raise ValueError("Invalid epoch: " + epoch)
196
197 if not mode in Mode:
198 raise ValueError("Invalid mode: " + mode)
199
200 date = datetime.datetime(int(strdate[0:4]),
201 int(strdate[4:6]), int(strdate[6:8]),\
202 int(strtime[0:2]), int(strtime[2:4]))
203 backups += [ Backup(date, epoch, mode) ]
204
205 return backups
206
207
208 def getDesiredEpoch(self, backups, now):
209 """Get desired epoch based on self.configuration and list of old backups"""
210
211 # Find the longest epoch for which we would like the make a backup
212 latest = datetime.datetime(1900, 1, 1)
213 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
214 # We make backups of that epoch
215 if self.conf.epochkeeps[e] == 0:
216 continue
217
218 # Get backups of that epoch
219 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
220 key=lambda b: b.date))
221
222 # If there are any, determine the latest
223 if len(byepoch) > 0:
224 latest = max(latest, byepoch[-1].date )
225
226 # the latest backup is too old
227 if now-latest > timespan:
228 return e
229
230 # No backup is to be made
231 return None
232
233
234
235 def backupFileSet(self, fileset, targetdir, since=None):
236 """Create an archive for given fileset at given target directory."""
237
238 print("Running file set: " + fileset.name)
239 tarpath = "/bin/tar"
240 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
241
242 taropts = ["-cpva"]
243
244 if since != None:
245 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
246
247 for pat in self.conf.exclpatterns:
248 taropts += ["--exclude", pat]
249
250 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
251 print("tarargs: ", tarargs)
252 tarp = subprocess.Popen( tarargs, \
253 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
254
255 while tarp.poll():
256 l = tarp.stdout.readline()
257 if len(l) > 0:
258 print(l.decode(), end="")
259 l = tarp.stderr.readline()
260 if len(l) > 0:
261 print(l.decode(), end="")
262
263 for l in tarp.stdout.readlines():
264 print(l.decode(), end="")
265
266 for l in tarp.stderr.readlines():
267 print(l.decode(), end="")
268
269 rett = tarp.wait()
270 if rett != 0:
271 print(tarpath + " returned with exit status " + str(rett) + ":")
272 print( tarp.stderr.read().decode() )
273
274
275 def backup(self, epoch=None, mode=None):
276 """Make a new backup, if necessary. If epoch is None then determine
277 desired epoch automatically. Use given epoch otherwise. If mode is None
278 then use mode for given epoch. Use given mode otherwise."""
279
280 now = datetime.datetime.now()
281 oldbackups = self.listOldBackups()
282
283 # Get epoch of backup
284 if epoch == None:
285 epoch = self.getDesiredEpoch(oldbackups, now)
286 if epoch == None:
287 print("No backup planned.")
288 return
289
290 # Get mode of backup
291 if mode == None:
292 mode = self.conf.epochmodes[epoch]
293 print("Making a backup. Epoch: " + epoch + ", mode: " + mode)
294
295 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
296
297 # No old full backups existing
298 if mode != "full" and len(oldfullbackups)==0:
299 print("No full backups existing. Making a full backup.")
300
301 # Checksum changed -> self.config file changed
302 if self.conf.checksum != self.conf.lastchecksum:
303 print("Config file changed since last time.")
304 if mode != "full":
305 print("** Warning: full backup recommended!")
306
307 # Create new target directory
308 basedir = self.conf.directory
309 dirname = Backup.getDirName(now, epoch, mode)
310 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
311 targetdir = os.path.join(basedir, tmpdirname)
312 os.mkdir( targetdir )
313
314 # If we have a full backup, we backup everything
315 since = None
316
317 # Get latest full backup time
318 if mode == "diff":
319 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
320 # Get latest backup time
321 elif mode == "incr":
322 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
323
324 # Backup all file sets
325 for s in self.conf.sets:
326 self.backupFileSet(s, targetdir, since)
327
328 # Rename backup directory to final name
329 os.rename( targetdir, os.path.join(basedir, dirname) )
330
331 # We made a full backup -- recall checksum of config
332 if mode == "full":
333 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
334 f.write( self.conf.checksum )
335 f.close()
336
337
338 def prune(self):
339 """Prune old backup files"""
340
341 # Collect all directories not matching backup name
342 dirs = [ d for d in self.listAllDirs() if not Backup.isBackupDir(d) ]
343
344 # Get all directories which are outdated
345 backups = self.listOldBackups()
346 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
347 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
348 for e in byepoch:
349 keep = self.conf.epochkeeps[e]
350 old = byepoch[e][keep:]
351 dirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
352
353 if len(dirs) == 0:
354 print("No stale/outdated entries to remove.")
355 return
356
357 print("List of stale/outdated entries:")
358 for d in dirs:
359 print(" " + d)
360
361 basedir = self.conf.directory
362 yesno = input("Remove listed entries? [y, N] ")
363 if yesno == "y":
364 for d in dirs:
365 shutil.rmtree(os.path.join(basedir, d))
366
367
368 def printUsage():
369 """Print --help text"""
370
371 print("shbackup - a simple backup solution.")
372 print("")
373 print("Usage:")
374 print(" " + sys.argv[0] + " {options} [cmd]")
375 print(" " + sys.argv[0] + " --help")
376 print("")
377 print("Commands:")
378 print(" backup make a new backup, if necessary")
379 print(" list list all backups (default)")
380 print(" prune prune outdated/old backups")
381 print("")
382 print("Options:")
383 print(" -h, --help print this usage text")
384 print(" -c, --conf <configfile> use given configuration file")
385 print(" default: /etc/shbackup.conf")
386 print(" -e, --epoch <epoch> force to create backup for given epoch:")
387 print(" year, month, week, day, hour, sporadic")
388 print(" -m, --mode <mode> override mode: full, diff, or incr")
389
390
391 if __name__ == "__main__":
392
393 conffn = "/etc/shbackup.conf"
394 cmd = "list"
395 mode = None
396 epoch = None
397
398 i = 0
399 while i < len(sys.argv)-1:
400 i += 1
401 opt = sys.argv[i]
402
403 if opt in ["-h", "--help"]:
404 printUsage()
405 exit(0)
406
407 elif opt in ["-c", "--conf"]:
408 i += 1
409 conffn = sys.argv[i]
410
411 elif opt in ["-m", "--mode"]:
412 i += 1
413 mode = sys.argv[i]
414 if not mode in Mode:
415 print("Unknown mode '" + mode + "'.")
416 exit(1)
417
418 elif opt in ["-e", "--epoch"]:
419 i += 1
420 epoch = sys.argv[i]
421 if not epoch in Epoch:
422 print("Unknown epoch '" + epoch + "'.")
423 exit(1)
424
425
426 elif opt in ["backup", "list", "prune"]:
427 cmd = opt
428
429 else:
430 print("Unknown option: " + opt)
431 exit(1)
432
433 try:
434 man = BackupManager(conffn)
435
436 if cmd == "backup":
437 man.backup(epoch, mode)
438
439 if cmd == "list":
440 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
441 print(b.date.strftime("%Y-%m-%d %H:%M") + \
442 "\t" + b.epoch + "\t" + b.mode)
443
444 if cmd == "prune":
445 man.prune()
446
447 except Config.ReadException as e:
448 print("Error reading config file: ", end="")
449 for a in e.args:
450 print(a, end=" ")
451 print()
452
453
454
455