我正在寻找一种包含/排除文件模式并从os.walk()呼叫中排除目录的方法。
os.walk()
这是我现在正在做的事情:
import fnmatch import os includes = ['*.doc', '*.odt'] excludes = ['/home/paulo-freitas/Documents'] def _filter(paths): for path in paths: if os.path.isdir(path) and not path in excludes: yield path for pattern in (includes + excludes): if not os.path.isdir(path) and fnmatch.fnmatch(path, pattern): yield path for root, dirs, files in os.walk('/home/paulo-freitas'): dirs[:] = _filter(map(lambda d: os.path.join(root, d), dirs)) files[:] = _filter(map(lambda f: os.path.join(root, f), files)) for filename in files: filename = os.path.join(root, filename) print(filename)
有一个更好的方法吗?怎么样?
此解决方案用于fnmatch.translate将glob模式转换为正则表达式(假定include仅用于文件):
fnmatch.translate
import fnmatch import os import os.path import re includes = ['*.doc', '*.odt'] # for files only excludes = ['/home/paulo-freitas/Documents'] # for dirs and files # transform glob patterns to regular expressions includes = r'|'.join([fnmatch.translate(x) for x in includes]) excludes = r'|'.join([fnmatch.translate(x) for x in excludes]) or r'$.' for root, dirs, files in os.walk('/home/paulo-freitas'): # exclude dirs dirs[:] = [os.path.join(root, d) for d in dirs] dirs[:] = [d for d in dirs if not re.match(excludes, d)] # exclude/include files files = [os.path.join(root, f) for f in files] files = [f for f in files if not re.match(excludes, f)] files = [f for f in files if re.match(includes, f)] for fname in files: print fname