enable output of tar
[sitarba.git] / shbackup
1 #!/usr/bin/python3
2 """Stefan Huber's simplistic backup solution."""
3
4 import datetime
5 import os, shutil, sys
6 import configparser
7 import hashlib
8 import subprocess
9 import random, re
10
11
12 Mode = ["full", "incr", "diff"]
13
14 RealEpoch = { \
15 "hour" : datetime.timedelta(0, 3600), \
16 "day" : datetime.timedelta(1), \
17 "week" : datetime.timedelta(7), \
18 "month" : datetime.timedelta(30), \
19 "year" : datetime.timedelta(365) }
20
21 Epoch = dict(RealEpoch, **{ \
22 "sporadic" : datetime.timedelta(0,0) \
23 })
24
25
26 class Backup:
27 """A single backup has a date, an epoch and a mode."""
28
29 def __init__(self, date, epoch, mode):
30 self.date = date
31 self.epoch = epoch
32 self.mode = mode
33
34 def __str__(self):
35 return "[date: " + self.date.ctime() + \
36 ", epoch: " + self.epoch + \
37 ", mode: " + self.mode + "]"
38
39 @staticmethod
40 def getDirName(date, epoch, mode):
41 """Get directory name of backup by given properties."""
42 return date.strftime("%Y%m%d-%H%M") + "-" + epoch + "-" + mode
43
44 @staticmethod
45 def isBackupDir(dirname):
46 """Is directory a backup directory?"""
47 p = re.compile(r'^\d\d\d\d\d\d\d\d-\d\d\d\d-\w+-\w+$')
48 return p.match(dirname)
49
50
51
52 class Config:
53 """Encapsules the configuration for the backup program."""
54
55 class ReadException(Exception):
56 """An exception raised when reading configurations."""
57 pass
58
59 class FileSet:
60 """A fileset has a name and a list of directories."""
61 def __init__(self, name, dirs):
62 self.name = name
63 self.dirs = dirs
64
65 def __str__(self):
66 return "[name: " + self.name + ", dirs: " + str(self.dirs) + "]"
67
68 formats = ["tar", "tar.gz", "tar.bz2", "tar.xz" ]
69
70 # Filename where checksum of config is saved
71 checksumfn = "checksum"
72
73 def __init__(self):
74 self.directory = "/media/backup"
75 self.format = self.formats[0]
76 self.epochkeeps = { k : 0 for k in RealEpoch.keys() }
77 self.epochmodes = { k : "full" for k in RealEpoch.keys() }
78 self.exclpatterns = []
79 self.sets = []
80 self.checksum = None
81 self.lastchecksum = None
82
83 def __str__(self):
84 return "[directory: " + self.directory + \
85 ", format: " + self.format + \
86 ", keeps: " + str(self.epochkeeps) + \
87 ", modes: " + str(self.epochmodes) + \
88 ", exclpatterns: " + str(self.exclpatterns) + \
89 ", sets: " + str([str(s) for s in self.sets]) + "]"
90
91 def read(self, filename):
92 """Read configuration from file"""
93
94 if not os.path.isfile(filename):
95 raise Config.ReadException("No file '" + filename + "'.")
96
97 config = configparser.RawConfigParser()
98 config.read(filename)
99
100 for reqsec in ["destination"]:
101 if not config.has_section(reqsec):
102 raise Config.ReadException("Section '" + reqsec + "' is missing.")
103
104 self.directory = config.get("destination", "directory")
105
106 self.format = config.get("destination", "format")
107 if not self.format in Config.formats:
108 raise Config.ReadException("Invalid 'format' given.")
109
110
111 if config.has_section("history"):
112 for opt in config.options("history"):
113 if opt.startswith("keep"):
114 epoch = opt[4:]
115 if not epoch in RealEpoch.keys():
116 raise Config.ReadException("Invalid option 'keep" + epoch + "'.")
117 self.epochkeeps[epoch] = int(config.getint("history", opt))
118 elif opt.startswith("mode"):
119 epoch = opt[4:]
120 if not epoch in RealEpoch.keys():
121 raise Config.ReadException("Invalid option 'mode" + epoch + "'.")
122 self.epochmodes[epoch] = config.get("history", opt)
123 if not self.epochmodes[epoch] in Mode:
124 raise Config.ReadException("Invalid mode given.")
125 else:
126 raise Config.ReadException("Invalid option '" + opt + "'.")
127
128 if config.has_section("input"):
129 for opt in config.options("input"):
130 if opt.startswith("exclude"):
131 self.exclpatterns += [ config.get("input", opt) ]
132 else:
133 raise Config.ReadException("Invalid option '" + opt + "'.")
134
135 for sec in config.sections():
136 if sec in ["destination", "history", "input"]:
137 continue
138 elif sec.startswith("set "):
139 name = sec[4:].strip()
140 dirs = []
141
142 for opt in config.options(sec):
143 if not opt.startswith("dir"):
144 raise Config.ReadException("Unknown option '" + opt + "'.")
145 else:
146 dirs += [config.get(sec, opt)]
147 self.sets += [Config.FileSet(name, dirs)]
148 else:
149 raise Config.ReadException("Unknown section '" + sec + "'.")
150
151 # Compute checksum of config file
152 m = hashlib.sha1()
153 f = open(filename, 'rb')
154 try:
155 m.update(f.read())
156 self.checksum = m.hexdigest()
157 finally:
158 f.close()
159
160 try:
161 f = open(os.path.join(self.directory, self.checksumfn), 'r')
162 self.lastchecksum = f.read().strip()
163 f.close()
164 except IOError:
165 self.lastchecksum = None
166
167
168 class BackupManager:
169 """List and create backups"""
170
171 def __init__(self, conffn):
172 self.conf = Config()
173 self.conf.read(conffn)
174
175
176 def listAllDirs(self):
177 """List all dirs in destination directory"""
178
179 # Get all entries
180 basedir = self.conf.directory
181 dirs = os.listdir(basedir)
182 # Filter directories
183 return [ d for d in dirs if os.path.isdir(os.path.join(basedir, d)) ]
184
185
186 def listOldBackups(self):
187 """Returns a list of old backups."""
188
189 backups = []
190
191 for entry in [ b for b in self.listAllDirs() if Backup.isBackupDir(b) ]:
192 [strdate, strtime, epoch, mode] = entry.split("-")
193
194 if not epoch in Epoch.keys():
195 raise ValueError("Invalid epoch: " + epoch)
196
197 if not mode in Mode:
198 raise ValueError("Invalid mode: " + mode)
199
200 date = datetime.datetime(int(strdate[0:4]),
201 int(strdate[4:6]), int(strdate[6:8]),\
202 int(strtime[0:2]), int(strtime[2:4]))
203 backups += [ Backup(date, epoch, mode) ]
204
205 return backups
206
207
208 def getDesiredEpoch(self, backups, now):
209 """Get desired epoch based on self.configuration and list of old backups"""
210
211 # Find the longest epoch for which we would like the make a backup
212 latest = datetime.datetime(1900, 1, 1)
213 for timespan, e in reversed(sorted( [ (Epoch[e], e) for e in RealEpoch ] )):
214 # We make backups of that epoch
215 if self.conf.epochkeeps[e] == 0:
216 continue
217
218 # Get backups of that epoch
219 byepoch = list(sorted( [ b for b in backups if b.epoch==e], \
220 key=lambda b: b.date))
221
222 # If there are any, determine the latest
223 if len(byepoch) > 0:
224 latest = max(latest, byepoch[-1].date )
225
226 # the latest backup is too old
227 if now-latest > timespan:
228 return e
229
230 # No backup is to be made
231 return None
232
233
234
235 def backupFileSet(self, fileset, targetdir, since=None):
236 """Create an archive for given fileset at given target directory."""
237
238 print("Running file set: " + fileset.name)
239 tarpath = "/bin/tar"
240 fsfn = os.path.join(targetdir, fileset.name) + "." + self.conf.format
241
242 taropts = ["-cpva"]
243
244 if since != None:
245 taropts += ["-N", since.strftime("%Y-%m-%d %H:%M:%S")]
246
247 for pat in self.conf.exclpatterns:
248 taropts += ["--exclude", pat]
249
250 tarargs = [tarpath] + taropts + ["-f", fsfn] + fileset.dirs
251 #print("tarargs: ", tarargs)
252 tarp = subprocess.Popen( tarargs )
253
254 rett = tarp.wait()
255 if rett != 0:
256 print(tarpath + " returned with exit status " + str(rett) + ":")
257
258
259 def backup(self, epoch=None, mode=None):
260 """Make a new backup, if necessary. If epoch is None then determine
261 desired epoch automatically. Use given epoch otherwise. If mode is None
262 then use mode for given epoch. Use given mode otherwise."""
263
264 now = datetime.datetime.now()
265 oldbackups = self.listOldBackups()
266
267 # Get epoch of backup
268 if epoch == None:
269 epoch = self.getDesiredEpoch(oldbackups, now)
270 if epoch == None:
271 print("No backup planned.")
272 return
273
274 # Get mode of backup
275 if mode == None:
276 mode = self.conf.epochmodes[epoch]
277 print("Making a backup. Epoch: " + epoch + ", mode: " + mode)
278
279 oldfullbackups = [ b for b in oldbackups if b.mode == "full" ]
280
281 # No old full backups existing
282 if mode != "full" and len(oldfullbackups)==0:
283 print("No full backups existing. Making a full backup.")
284
285 # Checksum changed -> self.config file changed
286 if self.conf.checksum != self.conf.lastchecksum:
287 print("Config file changed since last time.")
288 if mode != "full":
289 print("** Warning: full backup recommended!")
290
291 # Create new target directory
292 basedir = self.conf.directory
293 dirname = Backup.getDirName(now, epoch, mode)
294 tmpdirname = dirname + ("-%x" % (random.random()*2e16) )
295 targetdir = os.path.join(basedir, tmpdirname)
296 os.mkdir( targetdir )
297
298 # If we have a full backup, we backup everything
299 since = None
300
301 # Get latest full backup time
302 if mode == "diff":
303 since = sorted(oldfullbackups, key=lambda b: b.date)[-1].date
304 # Get latest backup time
305 elif mode == "incr":
306 since = sorted(oldbackups, key=lambda b: b.date)[-1].date
307
308 # Backup all file sets
309 for s in self.conf.sets:
310 self.backupFileSet(s, targetdir, since)
311
312 # Rename backup directory to final name
313 os.rename( targetdir, os.path.join(basedir, dirname) )
314
315 # We made a full backup -- recall checksum of config
316 if mode == "full":
317 f = open( os.path.join(basedir, self.conf.checksumfn), "w")
318 f.write( self.conf.checksum )
319 f.close()
320
321
322 def prune(self):
323 """Prune old backup files"""
324
325 # Collect all directories not matching backup name
326 dirs = [ d for d in self.listAllDirs() if not Backup.isBackupDir(d) ]
327
328 # Get all directories which are outdated
329 backups = self.listOldBackups()
330 byepoch = { e : list(sorted( [ b for b in backups if b.epoch == e ], \
331 key=lambda b : b.date, reverse=True)) for e in RealEpoch }
332 for e in byepoch:
333 keep = self.conf.epochkeeps[e]
334 old = byepoch[e][keep:]
335 dirs += [ Backup.getDirName(b.date, b.epoch, b.mode) for b in old]
336
337 if len(dirs) == 0:
338 print("No stale/outdated entries to remove.")
339 return
340
341 print("List of stale/outdated entries:")
342 for d in dirs:
343 print(" " + d)
344
345 basedir = self.conf.directory
346 yesno = input("Remove listed entries? [y, N] ")
347 if yesno == "y":
348 for d in dirs:
349 shutil.rmtree(os.path.join(basedir, d))
350
351
352 def printUsage():
353 """Print --help text"""
354
355 print("shbackup - a simple backup solution.")
356 print("")
357 print("Usage:")
358 print(" " + sys.argv[0] + " {options} [cmd]")
359 print(" " + sys.argv[0] + " --help")
360 print("")
361 print("Commands:")
362 print(" backup make a new backup, if necessary")
363 print(" list list all backups (default)")
364 print(" prune prune outdated/old backups")
365 print("")
366 print("Options:")
367 print(" -h, --help print this usage text")
368 print(" -c, --conf <configfile> use given configuration file")
369 print(" default: /etc/shbackup.conf")
370 print(" -e, --epoch <epoch> force to create backup for given epoch:")
371 print(" year, month, week, day, hour, sporadic")
372 print(" -m, --mode <mode> override mode: full, diff, or incr")
373
374
375 if __name__ == "__main__":
376
377 conffn = "/etc/shbackup.conf"
378 cmd = "list"
379 mode = None
380 epoch = None
381
382 i = 0
383 while i < len(sys.argv)-1:
384 i += 1
385 opt = sys.argv[i]
386
387 if opt in ["-h", "--help"]:
388 printUsage()
389 exit(0)
390
391 elif opt in ["-c", "--conf"]:
392 i += 1
393 conffn = sys.argv[i]
394
395 elif opt in ["-m", "--mode"]:
396 i += 1
397 mode = sys.argv[i]
398 if not mode in Mode:
399 print("Unknown mode '" + mode + "'.")
400 exit(1)
401
402 elif opt in ["-e", "--epoch"]:
403 i += 1
404 epoch = sys.argv[i]
405 if not epoch in Epoch:
406 print("Unknown epoch '" + epoch + "'.")
407 exit(1)
408
409
410 elif opt in ["backup", "list", "prune"]:
411 cmd = opt
412
413 else:
414 print("Unknown option: " + opt)
415 exit(1)
416
417 try:
418 man = BackupManager(conffn)
419
420 if cmd == "backup":
421 man.backup(epoch, mode)
422
423 if cmd == "list":
424 for b in sorted(man.listOldBackups(), key=lambda b: b.date):
425 print(b.date.strftime("%Y-%m-%d %H:%M") + \
426 "\t" + b.epoch + "\t" + b.mode)
427
428 if cmd == "prune":
429 man.prune()
430
431 except Config.ReadException as e:
432 print("Error reading config file: ", end="")
433 for a in e.args:
434 print(a, end=" ")
435 print()
436
437
438
439