add many cmdline options
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 import datetime
5 import os, shutil, sys
6 import configparser
7 import hashlib
8 import subprocess
9 import random, re
10
11
12 Mode = ["full", "incr", "diff"]
13
14 Epoch = { \
15 "hour" : datetime.timedelta(0, 3600), \
16 "day" : datetime.timedelta(1), \
17 "week" : datetime.timedelta(7), \
18 "month" : datetime.timedelta(30), \
19 "year" : datetime.timedelta(365) }
20
21 class Backup:
22 """A single backup has a date, an epoch and a mode."""
23
24 def __init__(self, date, epoch, mode):
25 self.date = date
26 self.epoch = epoch
27 self.mode = mode
28
29 def __str__(self):
30 return "[date: " + self.date.ctime() + \
31 ", epoch: " + self.epoch + \
32 ", mode: " + self.mode + "]"
33
34 @staticmethod
35 def getDirName(date, epoch, mode):
36 """Get directory name of backup by given properties."""
37 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
38
39 @staticmethod
40 def isBackupDir(dirname):
41 """Is directory a backup directory?"""
42 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
43 return p.match(dirname)
44
45
46
47 class Config:
48 """Encapsules the configuration for the backup program."""
49
50 class ReadException(Exception):
51 """An exception raised when reading configurations."""
52 pass
53
54 class FileSet:
55 """A fileset has a name and a list of directories."""
56 def __init__(self, name, dirs):
57 self.name = name
58 self.dirs = dirs
59
60 def __str__(self):
61 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
62
63 formats = ["tar.gz", "tar.bz2", "tar.xz" ]
64
65 # Filename where checksum of config is saved
66 checksumfn = "checksum"
67
68 def __init__(self):
69 self.directory = "/media/backup"
70 self.format = self.formats[0]
71 self.epochkeeps = { k : 0 for k in Epoch.keys() }
72 self.epochmodes = { k : "full" for k in Epoch.keys() }
73 self.exclpatterns = []
74 self.sets = []
75 self.checksum = None
76 self.lastchecksum = None
77
78 def __str__(self):
79 return "[directory: " + self.directory + \
80 ", format: " + self.format + \
81 ", keeps: " + str(self.epochkeeps) + \
82 ", modes: " + str(self.epochmodes) + \
83 ", exclpatterns: " + str(self.exclpatterns) + \
84 ", sets: " + str([str(s) for s in self.sets]) + "]"
85
86 def read(self, filename):
87 """Read configuration from file"""
88
89 if not os.path.isfile(filename):
90 raise Config.ReadException("No file '" + filename + "'.")
91
92 config = configparser.RawConfigParser()
93 config.read(filename)
94
95 for reqsec in ["destination"]:
96 if not config.has_section(reqsec):
97 raise Config.ReadException("Section '" + reqsec + "' is missing.")
98
99 self.directory = config.get("destination", "directory")
100
101 self.format = config.get("destination", "format")
102 if not self.format in Config.formats:
103 raise Config.ReadException("Invalid 'format' given.")
104
105
106 if config.has_section("history"):
107 for opt in config.options("history"):
108 if opt.startswith("keep"):
109 epoch = opt[4:]
110 if not epoch in Epoch.keys():
111 raise Config.ReadException("Invalid option 'keep" + epoch + "'.")
112 self.epochkeeps[epoch] = int(config.getint("history", opt))
113 elif opt.startswith("mode"):
114 epoch = opt[4:]
115 if not epoch in Epoch.keys():
116 raise Config.ReadException("Invalid option 'mode" + epoch + "'.")
117 self.epochmodes[epoch] = config.get("history", opt)
118 if not self.epochmodes[epoch] in Mode:
119 raise Config.ReadException("Invalid mode given.")
120 else:
121 raise Config.ReadException("Invalid option '" + opt + "'.")
122
123 if config.has_section("input"):
124 for opt in config.options("input"):
125 if opt.startswith("exclude"):
126 self.exclpatterns += [ config.get("input", opt) ]
127 else:
128 raise Config.ReadException("Invalid option '" + opt + "'.")
129
130 for sec in config.sections():
131 if sec in ["destination", "history", "input"]:
132 continue
133 elif sec.startswith("set "):
134 name = sec[4:].strip()
135 dirs = []
136
137 for opt in config.options(sec):
138 if not opt.startswith("dir"):
139 raise Config.ReadException("Unknown option '" + opt + "'.")
140 else:
141 dirs += [config.get(sec, opt)]
142 self.sets += [Config.FileSet(name, dirs)]
143 else:
144 raise Config.ReadException("Unknown section '" + sec + "'.")
145
146 # Compute checksum of config file
147 m = hashlib.sha1()
148 f = open(filename, 'rb')
149 try:
150 m.update(f.read())
151 self.checksum = m.hexdigest()
152 finally:
153 f.close()
154
155 try:
156 f = open(os.path.join(self.directory, self.checksumfn), 'r')
157 self.lastchecksum = f.read().strip()
158 f.close()
159 except IOError:
160 self.lastchecksum = None
161
162
163 class BackupManager:
164 """List and create backups"""
165
166 def __init__(self, conffn):
167 self.conf = Config()
168 self.conf.read(conffn)
169
170
171 def listAllDirs(self):
172 """List all dirs in destination directory"""
173
174 # Get all entries
175 basedir = self.conf.directory
176 dirs = os.listdir(basedir)
177 # Filter directories
178 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
179
180 def listOldBackups(self):
181 """Returns a list of old backups."""
182
183 backups = []
184
185 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
186 [strdate, strtime, epoch, mode] = entry.split("-")
187
188 if not epoch in Epoch.keys():
189 raise ValueError("Invalid epoch: " + epoch)
190
191 if not mode in Mode:
192 raise ValueError("Invalid mode: " + mode)
193
194 date = datetime.datetime(int(strdate[0:4]),
195 int(strdate[4:6]), int(strdate[6:8]),\
196 int(strtime[0:2]), int(strtime[2:4]))
197 backups += [ Backup(date, epoch, mode) ]
198
199 return backups
200
201
202 def getDesiredEpoch(self, backups, now):
203 """Get desired epoch based on self.configuration and list of old backups"""
204
205 # Find the longest epoch for which we would like the make a backup
206 latest = datetime.datetime(1900, 1, 1)
207 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in Epoch ] )):
208 # We make backups of that epoch
209 if self.conf.epochkeeps[e] == 0:
210 continue
211
212 # Get backups of that epoch
213 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
214 key=lambda b: b.date))
215
216 # If there are any, determine the latest
217 if len(byepoch) > 0:
218 latest = max(latest, byepoch[-1].date )
219
220 # the latest backup is too old
221 if now-latest > timespan:
222 return e
223
224 # No backup is to be made
225 return None
226
227
228
229 def backupFileSet(self, fileset, targetdir, since=None):
230 """Create an archive for given fileset at given target directory."""
231
232 print("Running file set: " + fileset.name)
233 tarpath = "/bin/tar"
234 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
235
236 taropts = ["-cpva"]
237
238 if since != None:
239 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
240
241 for pat in self.conf.exclpatterns:
242 taropts += ["--exclude", pat]
243
244 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
245 print("tarargs: ", tarargs)
246 tarp = subprocess.Popen( tarargs, \
247 stdout=subprocess.PIPE, stderr=subprocess.PIPE)
248
249 while tarp.poll():
250 l = tarp.stdout.readline()
251 if len(l) > 0:
252 print(l.decode(), end="")
253 l = tarp.stderr.readline()
254 if len(l) > 0:
255 print(l.decode(), end="")
256
257 for l in tarp.stdout.readlines():
258 print(l.decode(), end="")
259
260 for l in tarp.stderr.readlines():
261 print(l.decode(), end="")
262
263 rett = tarp.wait()
264 if rett != 0:
265 print(tarpath + " returned with exit status " + str(rett) + ":")
266 print( tarp.stderr.read().decode() )
267
268
269 def backup(self, epoch=None, mode=None):
270 """Make a new backup, if necessary. If epoch is None then determine
271 desired epoch automatically. Use given epoch otherwise. If mode is None
272 then use mode for given epoch. Use given mode otherwise."""
273
274 now = datetime.datetime.now()
275 oldbackups = self.listOldBackups()
276
277 # Get epoch of backup
278 if epoch == None:
279 epoch = self.getDesiredEpoch(oldbackups, now)
280 if epoch == None:
281 print("No backup planned.")
282 return
283
284 # Get mode of backup
285 if mode == None:
286 mode = self.conf.epochmodes[epoch]
287 print("Making a backup. Epoch: " + epoch + ", mode: " + mode)
288
289 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
290
291 # No old full backups existing
292 if mode != "full" and len(oldfullbackups)==0:
293 print("No full backups existing. Making a full backup.")
294
295 # Checksum changed -> self.config file changed
296 if self.conf.checksum != self.conf.lastchecksum:
297 print("Config file changed since last time.")
298 if mode != "full":
299 print("** Warning: full backup recommended!")
300
301 # Create new target directory
302 basedir = self.conf.directory
303 dirname = Backup.getDirName(now, epoch, mode)
304 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
305 targetdir = os.path.join(basedir, tmpdirname)
306 os.mkdir( targetdir )
307
308 # If we have a full backup, we backup everything
309 since = None
310
311 # Get latest full backup time
312 if mode == "diff":
313 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
314 # Get latest backup time
315 elif mode == "incr":
316 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
317
318 # Backup all file sets
319 for s in self.conf.sets:
320 self.backupFileSet(s, targetdir, since)
321
322 # Rename backup directory to final name
323 os.rename( targetdir, os.path.join(basedir, dirname) )
324
325 # We made a full backup -- recall checksum of config
326 if mode == "full":
327 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
328 f.write( self.conf.checksum )
329 f.close()
330
331
332 def prune(self):
333 """Prune old backup files"""
334
335 # Collect all directories not matching backup name
336 dirs = [ d for d in self.listAllDirs() if not Backup.isBackupDir(d) ]
337
338 # Get all directories which are outdated
339 backups = self.listOldBackups()
340 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
341 key=lambda b : b.date, reverse=True)) for e in Epoch }
342 for e in byepoch:
343 keep = self.conf.epochkeeps[e]
344 old = byepoch[e][keep:]
345 dirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
346
347 if len(dirs) == 0:
348 print("No stale/outdated entries to remove.")
349 return
350
351 print("List of stale/outdated entries:")
352 for d in dirs:
353 print(" " + d)
354
355 basedir = self.conf.directory
356 yesno = input("Remove listed entries? [y, N] ")
357 if yesno == "y":
358 for d in dirs:
359 shutil.rmtree(os.path.join(basedir, d))
360
361
362 def printUsage():
363 """Print --help text"""
364
365 print("shbackup - a simple backup solution.")
366 print("")
367 print("Usage:")
368 print(" " + sys.argv[0] + " [-C <configfile>] [cmd]")
369 print(" " + sys.argv[0] + " --help")
370 print("")
371 print("Commands:")
372 print(" backup make a new backup, if necessary")
373 print(" list list all backups")
374 print(" prune prune outdated/old backups")
375 print("")
376 print("Options:")
377 print(" -C <configfile> use given configuration file")
378 print(" default: /etc/shbackup.conf")
379 print(" -m, --mode <mode> override mode: full, diff, or incr")
380 print(" -e, --epoch <epoch> create backup for given epoch:")
381 print(" year, month, week, day, hour")
382 print(" -h, --help print this usage text")
383
384
385 if __name__ == "__main__":
386
387 conffn = "/etc/shbackup.conf"
388 cmd = "list"
389 mode = None
390 epoch = None
391
392 i = 0
393 while i < len(sys.argv)-1:
394 i += 1
395 opt = sys.argv[i]
396
397 if opt in ["-h", "--help"]:
398 printUsage()
399 exit(0)
400
401 elif opt in ["-C", "--config"]:
402 i += 1
403 conffn = sys.argv[i]
404
405 elif opt in ["-m", "--mode"]:
406 i += 1
407 mode = sys.argv[i]
408 if not mode in Mode:
409 print("Unknown mode '" + mode + "'.")
410 exit(1)
411
412 elif opt in ["-e", "--epoch"]:
413 i += 1
414 epoch = sys.argv[i]
415 if not epoch in Epoch:
416 print("Unknown epoch '" + epoch + "'.")
417 exit(1)
418
419
420 elif opt in ["backup", "list", "prune"]:
421 cmd = opt
422
423 else:
424 print("Unknown option: " + opt)
425 exit(1)
426
427 try:
428 man = BackupManager(conffn)
429
430 if cmd == "backup":
431 man.backup(epoch, mode)
432
433 if cmd == "list":
434 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
435 print(b.date.strftime("%Y-%m-%d %H:%M") + \
436 "\t" + b.epoch + "\t" + b.mode)
437
438 if cmd == "prune":
439 man.prune()
440
441 except Config.ReadException as e:
442 print("Error reading config file: ", end="")
443 for a in e.args:
444 print(a, end=" ")
445 print()
446
447
448
449