From 6adfa2831ebd93ed00b8ee1563ecaeec49886972 Mon Sep 17 00:00:00 2001 From: Stefan Huber Date: Thu, 10 Jan 2013 22:41:57 +0100 Subject: [PATCH] Adding eFindStaleFiles.py --- eFindStaleFiles.py | 128 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100755 eFindStaleFiles.py diff --git a/eFindStaleFiles.py b/eFindStaleFiles.py new file mode 100755 index 0000000..773a1dd --- /dev/null +++ b/eFindStaleFiles.py @@ -0,0 +1,128 @@ +#!/usr/bin/python +#shuber, 2011-04-11 + +__author__ = "Stefan Huber" +__email__ = "shuber@cosy.sbg.ac.at" + + +import os +import stat +import re +import sys + +# portage (output module) and gentoolkit need special path modifications +sys.path.insert(0, "/usr/lib/portage/pym") +sys.path.insert(0, "/usr/lib/gentoolkit/pym") + +import gentoolkit +import portage + + + + + +def print_dbg(str): + sys.stderr.write("\033[0;34m" + str + "\033[m") + + +def stripSlash(f): + if len(f)>1 and f[-1]=="/": + return f[:-1] + return f + + +def getNontrackedFiles( directory, trackedFiles): + + directory = stripSlash(directory) + print_dbg("Scan '" + directory + "'...\n") + + if not os.access(directory, os.F_OK): + print_dbg(" not existing.\n") + elif os.path.islink(directory): + print_dbg(" symlink, skipping.\n") + else: + + # The directory is not tracked -- yield it + if not directory in trackedFiles and directory!="/": + yield directory + + else: + # So 'directory' is tracked -- get its content + for dirpath, dirnames, filenames in os.walk(directory): + + # just interested in this directory + if dirpath!=directory: + continue + + def topath(f): + return os.path.join(dirpath,f) + + # check the files + for f in filenames: + if not topath(f) in trackedFiles: + yield topath(f) + + # check the directories and, if necessary, start recursive scan + for d in dirnames: + if not topath(d) in trackedFiles: + yield topath(d) + else: + for f in getNontrackedFiles(topath(d), trackedFiles): + yield f + + break + + + +def getTrackedFiles( directory ): + + directory = stripSlash(directory) + print_dbg("Get tracked files from '" + directory + "'...\n") + + rxexp = "^" + directory + rx = re.compile(rxexp) + + #Get all packages installed + root = "/" + vartree = portage.db[root]["vartree"] + allcpv = vartree.getallcpv() + no = 0 + + #Now, really get the files + result = set() + for cpv in allcpv: + + no += 1 + if no%100 == 0: + print_dbg("%d of %d packages done...\n" % (no, len(allcpv))) + + #Get all files of this cpv + cpvsplit = cpv.split("/") + cat,pkg = cpvsplit[0:2] + db = portage.dblink(cat, pkg, root, vartree.settings) + cpvfiles = db.getcontents().keys() + + #Check for all files of cpv, whether it matches regex + for f in cpvfiles: + if rx.search(f) or directory=="/": + yield f + + + +if __name__ == "__main__": + + # get directories to scan + dirs = sys.argv[1:] + if len(dirs) == 0: + dirs = [os.getcwd()] + + # get the files tracked and which are in the directories + trackedfiles = set() + for d in dirs: + trackedfiles |= set(getTrackedFiles(d)) + + # get the non-tracked files + for d in dirs: + for f in getNontrackedFiles(d, trackedfiles): + print f + -- 2.39.5