Adding eFindStaleFiles.py
authorStefan Huber <shuber@sthu.org>
Thu, 10 Jan 2013 21:41:57 +0000 (22:41 +0100)
committerStefan Huber <shuber@sthu.org>
Thu, 10 Jan 2013 21:41:57 +0000 (22:41 +0100)
eFindStaleFiles.py [new file with mode: 0755]

diff --git a/eFindStaleFiles.py b/eFindStaleFiles.py
new file mode 100755 (executable)
index 0000000..773a1dd
--- /dev/null
@@ -0,0 +1,128 @@
+#!/usr/bin/python
+#shuber, 2011-04-11
+
+__author__ = "Stefan Huber"
+__email__ = "shuber@cosy.sbg.ac.at"
+
+
+import os
+import stat
+import re
+import sys
+
+# portage (output module) and gentoolkit need special path modifications
+sys.path.insert(0, "/usr/lib/portage/pym")
+sys.path.insert(0, "/usr/lib/gentoolkit/pym")
+
+import gentoolkit
+import portage
+
+
+
+
+
+def print_dbg(str):
+       sys.stderr.write("\033[0;34m" + str + "\033[m")
+
+
+def stripSlash(f):
+       if len(f)>1 and f[-1]=="/":
+               return f[:-1]
+       return f
+
+
+def getNontrackedFiles( directory, trackedFiles):
+
+       directory = stripSlash(directory)
+       print_dbg("Scan '" + directory + "'...\n")
+
+       if not os.access(directory, os.F_OK):
+               print_dbg("    not existing.\n")
+       elif os.path.islink(directory):
+               print_dbg("    symlink, skipping.\n")
+       else:
+
+               # The directory is not tracked -- yield it
+               if not directory in trackedFiles and directory!="/":
+                       yield directory
+
+               else:
+                       # So 'directory' is tracked -- get its content
+                       for dirpath, dirnames, filenames in os.walk(directory):
+
+                               # just interested in this directory
+                               if dirpath!=directory:
+                                       continue
+
+                               def topath(f):
+                                       return os.path.join(dirpath,f)
+
+                               # check the files
+                               for f in filenames:
+                                       if not topath(f) in trackedFiles:
+                                               yield topath(f)
+
+                               # check the directories and, if necessary, start recursive scan
+                               for d in dirnames:
+                                       if not topath(d) in trackedFiles:
+                                               yield topath(d)
+                                       else:
+                                               for f in getNontrackedFiles(topath(d), trackedFiles):
+                                                       yield f
+
+                               break
+
+
+
+def getTrackedFiles( directory ):
+
+       directory = stripSlash(directory)
+       print_dbg("Get tracked files from '" + directory + "'...\n")
+
+       rxexp = "^" + directory
+       rx = re.compile(rxexp)
+
+       #Get all packages installed
+       root = "/"
+       vartree = portage.db[root]["vartree"]
+       allcpv = vartree.getallcpv()
+       no = 0
+
+       #Now, really get the files
+       result = set()
+       for cpv in allcpv:
+
+               no += 1
+               if no%100 == 0:
+                       print_dbg("%d of %d packages done...\n" % (no, len(allcpv)))
+
+               #Get all files of this cpv
+               cpvsplit = cpv.split("/")
+               cat,pkg = cpvsplit[0:2]
+               db = portage.dblink(cat, pkg, root, vartree.settings)
+               cpvfiles =  db.getcontents().keys()
+
+               #Check for all files of cpv, whether it matches regex
+               for f in cpvfiles:
+                       if rx.search(f) or directory=="/":
+                               yield f
+
+
+
+if __name__ == "__main__":
+
+       # get directories to scan
+       dirs = sys.argv[1:]
+       if len(dirs) == 0:
+               dirs = [os.getcwd()]
+
+       # get the files tracked and which are in the directories
+       trackedfiles = set()
+       for d in dirs:
+               trackedfiles |= set(getTrackedFiles(d))
+
+       # get the non-tracked files
+       for d in dirs:
+               for f in getNontrackedFiles(d, trackedfiles):
+                       print f
+