cbd5396cc897636c93a272c4df36caae195c4818
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 import datetime
5 import os, shutil, sys
6 import configparser
7 import hashlib
8 import subprocess
9 import random, re
10
11
12 Mode = ["full", "incr", "diff"]
13
14 RealEpoch = { \
15 "hour" : datetime.timedelta(0, 3600), \
16 "day" : datetime.timedelta(1), \
17 "week" : datetime.timedelta(7), \
18 "month" : datetime.timedelta(30), \
19 "year" : datetime.timedelta(365) }
20
21 Epoch = dict(RealEpoch, **{ \
22 "sporadic" : datetime.timedelta(0,0) \
23 })
24
25
26 class Backup:
27 """A single backup has a date, an epoch and a mode."""
28
29 def __init__(self, date, epoch, mode):
30 self.date = date
31 self.epoch = epoch
32 self.mode = mode
33
34 def __str__(self):
35 return "[date: " + self.date.ctime() + \
36 ", epoch: " + self.epoch + \
37 ", mode: " + self.mode + "]"
38
39 @staticmethod
40 def getDirName(date, epoch, mode):
41 """Get directory name of backup by given properties."""
42 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
43
44 @staticmethod
45 def isBackupDir(dirname):
46 """Is directory a backup directory?"""
47 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
48 return p.match(dirname)
49
50
51
52 class Config:
53 """Encapsules the configuration for the backup program."""
54
55 class ReadError(RuntimeError):
56 """An exception raised when reading configurations."""
57 def __init__(self, value):
58 self.value = value
59 self.message = value
60
61 class FileSet:
62 """A fileset has a name and a list of directories."""
63 def __init__(self, name, dirs):
64 self.name = name
65 self.dirs = dirs
66
67 def __str__(self):
68 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
69
70 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
71
72 # Filename where checksum of config is saved
73 checksumfn = "checksum"
74
75 def __init__(self):
76 self.directory = "/media/backup"
77 self.format = self.formats[0]
78 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
79 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
80 self.exclpatterns = []
81 self.sets = []
82 self.checksum = None
83 self.lastchecksum = None
84
85 def __str__(self):
86 return "[directory: " + self.directory + \
87 ", format: " + self.format + \
88 ", keeps: " + str(self.epochkeeps) + \
89 ", modes: " + str(self.epochmodes) + \
90 ", exclpatterns: " + str(self.exclpatterns) + \
91 ", sets: " + str([str(s) for s in self.sets]) + "]"
92
93 def read(self, filename):
94 """Read configuration from file"""
95
96 if not os.path.isfile(filename):
97 raise Config.ReadError("Cannot read config file '" + filename + "'.")
98
99 config = configparser.RawConfigParser()
100 config.read(filename)
101
102 for reqsec in ["destination"]:
103 if not config.has_section(reqsec):
104 raise Config.ReadError("Section '" + reqsec + "' is missing.")
105
106 self.directory = config.get("destination", "directory")
107 if not os.path.isdir(self.directory):
108 raise Config.ReadError("Directory '{0}' does not exist.".format(self.directory))
109
110 self.format = config.get("destination", "format")
111 if not self.format in Config.formats:
112 raise Config.ReadError("Invalid 'format' given.")
113
114
115 if config.has_section("history"):
116 for opt in config.options("history"):
117 if opt.startswith("keep"):
118 epoch = opt[4:]
119 if not epoch in RealEpoch.keys():
120 raise Config.ReadError("Invalid option 'keep" + epoch + "'.")
121 try:
122 self.epochkeeps[epoch] = int(config.getint("history", opt))
123 except ValueError:
124 raise Config.ReadError("Invalid integer given for '" + opt + "'.")
125 elif opt.startswith("mode"):
126 epoch = opt[4:]
127 if not epoch in RealEpoch.keys():
128 raise Config.ReadError("Invalid option 'mode" + epoch + "'.")
129 self.epochmodes[epoch] = config.get("history", opt)
130 if not self.epochmodes[epoch] in Mode:
131 raise Config.ReadError("Invalid mode given.")
132 else:
133 raise Config.ReadError("Invalid option '" + opt + "'.")
134
135 if config.has_section("input"):
136 for opt in config.options("input"):
137 if opt.startswith("exclude"):
138 self.exclpatterns += [ config.get("input", opt) ]
139 else:
140 raise Config.ReadError("Invalid option '" + opt + "'.")
141
142 for sec in config.sections():
143 if sec in ["destination", "history", "input"]:
144 continue
145 elif sec.startswith("set "):
146 name = sec[4:].strip()
147 dirs = []
148
149 for opt in config.options(sec):
150 if not opt.startswith("dir"):
151 raise Config.ReadError("Unknown option '" + opt + "'.")
152 else:
153 dirs += [config.get(sec, opt)]
154 self.sets += [Config.FileSet(name, dirs)]
155 else:
156 raise Config.ReadError("Unknown section '" + sec + "'.")
157
158 # Compute checksum of config file
159 m = hashlib.sha1()
160 f = open(filename, 'rb')
161 try:
162 m.update(f.read())
163 self.checksum = m.hexdigest()
164 finally:
165 f.close()
166
167 try:
168 f = open(os.path.join(self.directory, self.checksumfn), 'r')
169 self.lastchecksum = f.read().strip()
170 f.close()
171 except IOError:
172 self.lastchecksum = None
173
174
175 class BackupManager:
176 """List and create backups"""
177
178 def __init__(self, conffn):
179 self.conf = Config()
180 self.conf.read(conffn)
181
182
183 def listAllDirs(self):
184 """List all dirs in destination directory"""
185
186 # Get all entries
187 basedir = self.conf.directory
188 dirs = os.listdir(basedir)
189 # Filter directories
190 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
191
192
193 def listOldBackups(self):
194 """Returns a list of old backups."""
195
196 backups = []
197
198 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
199 [strdate, strtime, epoch, mode] = entry.split("-")
200
201 if not epoch in Epoch.keys():
202 raise ValueError("Invalid epoch: " + epoch)
203
204 if not mode in Mode:
205 raise ValueError("Invalid mode: " + mode)
206
207 date = datetime.datetime(int(strdate[0:4]),
208 int(strdate[4:6]), int(strdate[6:8]),\
209 int(strtime[0:2]), int(strtime[2:4]))
210 backups += [ Backup(date, epoch, mode) ]
211
212 return backups
213
214
215 def getDesiredEpoch(self, backups, now):
216 """Get desired epoch based on self.configuration and list of old backups"""
217
218 # Find the longest epoch for which we would like the make a backup
219 latest = datetime.datetime(1900, 1, 1)
220 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
221 # We make backups of that epoch
222 if self.conf.epochkeeps[e] == 0:
223 continue
224
225 # Get backups of that epoch
226 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
227 key=lambda b: b.date))
228
229 # If there are any, determine the latest
230 if len(byepoch) > 0:
231 latest = max(latest, byepoch[-1].date )
232
233 # the latest backup is too old
234 if now-latest > timespan:
235 return e
236
237 # No backup is to be made
238 return None
239
240
241
242 def backupFileSet(self, fileset, targetdir, since=None):
243 """Create an archive for given fileset at given target directory."""
244
245 print("Running file set: " + fileset.name)
246 tarpath = "/bin/tar"
247 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
248
249 taropts = ["-cpva"]
250
251 if since != None:
252 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
253
254 for pat in self.conf.exclpatterns:
255 taropts += ["--exclude", pat]
256
257 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
258 #print("tarargs: ", tarargs)
259 tarp = subprocess.Popen( tarargs )
260
261 rett = tarp.wait()
262 if rett != 0:
263 print(tarpath + " returned with exit status " + str(rett) + ":")
264
265
266 def backup(self, epoch=None, mode=None):
267 """Make a new backup, if necessary. If epoch is None then determine
268 desired epoch automatically. Use given epoch otherwise. If mode is None
269 then use mode for given epoch. Use given mode otherwise."""
270
271 now = datetime.datetime.now()
272 oldbackups = self.listOldBackups()
273
274 # Get epoch of backup
275 if epoch == None:
276 epoch = self.getDesiredEpoch(oldbackups, now)
277 if epoch == None:
278 print("No backup planned.")
279 return
280
281 # Get mode of backup
282 if mode == None:
283 mode = self.conf.epochmodes[epoch]
284 print("Making a backup. Epoch: " + epoch + ", mode: " + mode)
285
286 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
287
288 # No old full backups existing
289 if mode != "full" and len(oldfullbackups)==0:
290 print("No full backups existing. Making a full backup.")
291
292 # Checksum changed -> self.config file changed
293 if self.conf.checksum != self.conf.lastchecksum:
294 print("Config file changed since last time.")
295 if mode != "full":
296 print("** Warning: full backup recommended!")
297
298 # Create new target directory
299 basedir = self.conf.directory
300 dirname = Backup.getDirName(now, epoch, mode)
301 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
302 targetdir = os.path.join(basedir, tmpdirname)
303 os.mkdir( targetdir )
304
305 # If we have a full backup, we backup everything
306 since = None
307
308 # Get latest full backup time
309 if mode == "diff":
310 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
311 # Get latest backup time
312 elif mode == "incr":
313 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
314
315 # Backup all file sets
316 for s in self.conf.sets:
317 self.backupFileSet(s, targetdir, since)
318
319 # Rename backup directory to final name
320 os.rename( targetdir, os.path.join(basedir, dirname) )
321
322 # We made a full backup -- recall checksum of config
323 if mode == "full":
324 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
325 f.write( self.conf.checksum )
326 f.close()
327
328
329 def prune(self):
330 """Prune old backup files"""
331
332 # Collect all directories not matching backup name
333 dirs = [ d for d in self.listAllDirs() if not Backup.isBackupDir(d) ]
334
335 # Get all directories which are outdated
336 backups = self.listOldBackups()
337 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
338 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
339 for e in byepoch:
340 keep = self.conf.epochkeeps[e]
341 old = byepoch[e][keep:]
342 dirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
343
344 if len(dirs) == 0:
345 print("No stale/outdated entries to remove.")
346 return
347
348 print("List of stale/outdated entries:")
349 for d in dirs:
350 print(" " + d)
351
352 basedir = self.conf.directory
353 yesno = input("Remove listed entries? [y, N] ")
354 if yesno == "y":
355 for d in dirs:
356 shutil.rmtree(os.path.join(basedir, d))
357
358
359 def printUsage():
360 """Print --help text"""
361
362 print("shbackup - a simple backup solution.")
363 print("")
364 print("Usage:")
365 print(" " + sys.argv[0] + " {options} [cmd]")
366 print(" " + sys.argv[0] + " --help")
367 print("")
368 print("Commands:")
369 print(" backup make a new backup, if necessary")
370 print(" list list all backups (default)")
371 print(" prune prune outdated/old backups")
372 print("")
373 print("Options:")
374 print(" -h, --help print this usage text")
375 print(" -c, --conf <configfile> use given configuration file")
376 print(" default: /etc/shbackup.conf")
377 print(" -e, --epoch <epoch> force to create backup for given epoch:")
378 print(" year, month, week, day, hour, sporadic")
379 print(" -m, --mode <mode> override mode: full, diff, or incr")
380
381
382 if __name__ == "__main__":
383
384 conffn = "/etc/shbackup.conf"
385 cmd = "list"
386 mode = None
387 epoch = None
388
389 i = 0
390 while i < len(sys.argv)-1:
391 i += 1
392 opt = sys.argv[i]
393
394 if opt in ["-h", "--help"]:
395 printUsage()
396 exit(0)
397
398 elif opt in ["-c", "--conf"]:
399 i += 1
400 conffn = sys.argv[i]
401
402 elif opt in ["-m", "--mode"]:
403 i += 1
404 mode = sys.argv[i]
405 if not mode in Mode:
406 print("Unknown mode '" + mode + "'.")
407 exit(1)
408
409 elif opt in ["-e", "--epoch"]:
410 i += 1
411 epoch = sys.argv[i]
412 if not epoch in Epoch:
413 print("Unknown epoch '" + epoch + "'.")
414 exit(1)
415
416
417 elif opt in ["backup", "list", "prune"]:
418 cmd = opt
419
420 else:
421 print("Unknown option: " + opt)
422 exit(1)
423
424 try:
425 man = BackupManager(conffn)
426
427 if cmd == "backup":
428 man.backup(epoch, mode)
429
430 if cmd == "list":
431 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
432 print(b.date.strftime("%Y-%m-%d %H:%M") + \
433 "\t" + b.epoch + "\t" + b.mode)
434
435 if cmd == "prune":
436 man.prune()
437
438 except (Config.ReadError, configparser.DuplicateOptionError) as e:
439 print("Error reading config file: " + e.message)
440
441
442
443