jpayne@69: """ jpayne@69: Filename globbing utility. Mostly a copy of `glob` from Python 3.5. jpayne@69: jpayne@69: Changes include: jpayne@69: * `yield from` and PEP3102 `*` removed. jpayne@69: * Hidden files are not ignored. jpayne@69: """ jpayne@69: jpayne@69: import fnmatch jpayne@69: import os jpayne@69: import re jpayne@69: jpayne@69: __all__ = ["glob", "iglob", "escape"] jpayne@69: jpayne@69: jpayne@69: def glob(pathname, recursive: bool = False): jpayne@69: """Return a list of paths matching a pathname pattern. jpayne@69: jpayne@69: The pattern may contain simple shell-style wildcards a la jpayne@69: fnmatch. However, unlike fnmatch, filenames starting with a jpayne@69: dot are special cases that are not matched by '*' and '?' jpayne@69: patterns. jpayne@69: jpayne@69: If recursive is true, the pattern '**' will match any files and jpayne@69: zero or more directories and subdirectories. jpayne@69: """ jpayne@69: return list(iglob(pathname, recursive=recursive)) jpayne@69: jpayne@69: jpayne@69: def iglob(pathname, recursive: bool = False): jpayne@69: """Return an iterator which yields the paths matching a pathname pattern. jpayne@69: jpayne@69: The pattern may contain simple shell-style wildcards a la jpayne@69: fnmatch. However, unlike fnmatch, filenames starting with a jpayne@69: dot are special cases that are not matched by '*' and '?' jpayne@69: patterns. jpayne@69: jpayne@69: If recursive is true, the pattern '**' will match any files and jpayne@69: zero or more directories and subdirectories. jpayne@69: """ jpayne@69: it = _iglob(pathname, recursive) jpayne@69: if recursive and _isrecursive(pathname): jpayne@69: s = next(it) # skip empty string jpayne@69: assert not s jpayne@69: return it jpayne@69: jpayne@69: jpayne@69: def _iglob(pathname, recursive): jpayne@69: dirname, basename = os.path.split(pathname) jpayne@69: glob_in_dir = glob2 if recursive and _isrecursive(basename) else glob1 jpayne@69: jpayne@69: if not has_magic(pathname): jpayne@69: if basename: jpayne@69: if os.path.lexists(pathname): jpayne@69: yield pathname jpayne@69: else: jpayne@69: # Patterns ending with a slash should match only directories jpayne@69: if os.path.isdir(dirname): jpayne@69: yield pathname jpayne@69: return jpayne@69: jpayne@69: if not dirname: jpayne@69: yield from glob_in_dir(dirname, basename) jpayne@69: return jpayne@69: # `os.path.split()` returns the argument itself as a dirname if it is a jpayne@69: # drive or UNC path. Prevent an infinite recursion if a drive or UNC path jpayne@69: # contains magic characters (i.e. r'\\?\C:'). jpayne@69: if dirname != pathname and has_magic(dirname): jpayne@69: dirs = _iglob(dirname, recursive) jpayne@69: else: jpayne@69: dirs = [dirname] jpayne@69: if not has_magic(basename): jpayne@69: glob_in_dir = glob0 jpayne@69: for dirname in dirs: jpayne@69: for name in glob_in_dir(dirname, basename): jpayne@69: yield os.path.join(dirname, name) jpayne@69: jpayne@69: jpayne@69: # These 2 helper functions non-recursively glob inside a literal directory. jpayne@69: # They return a list of basenames. `glob1` accepts a pattern while `glob0` jpayne@69: # takes a literal basename (so it only has to check for its existence). jpayne@69: jpayne@69: jpayne@69: def glob1(dirname, pattern): jpayne@69: if not dirname: jpayne@69: if isinstance(pattern, bytes): jpayne@69: dirname = os.curdir.encode('ASCII') jpayne@69: else: jpayne@69: dirname = os.curdir jpayne@69: try: jpayne@69: names = os.listdir(dirname) jpayne@69: except OSError: jpayne@69: return [] jpayne@69: return fnmatch.filter(names, pattern) jpayne@69: jpayne@69: jpayne@69: def glob0(dirname, basename): jpayne@69: if not basename: jpayne@69: # `os.path.split()` returns an empty basename for paths ending with a jpayne@69: # directory separator. 'q*x/' should match only directories. jpayne@69: if os.path.isdir(dirname): jpayne@69: return [basename] jpayne@69: else: jpayne@69: if os.path.lexists(os.path.join(dirname, basename)): jpayne@69: return [basename] jpayne@69: return [] jpayne@69: jpayne@69: jpayne@69: # This helper function recursively yields relative pathnames inside a literal jpayne@69: # directory. jpayne@69: jpayne@69: jpayne@69: def glob2(dirname, pattern): jpayne@69: assert _isrecursive(pattern) jpayne@69: yield pattern[:0] jpayne@69: yield from _rlistdir(dirname) jpayne@69: jpayne@69: jpayne@69: # Recursively yields relative pathnames inside a literal directory. jpayne@69: def _rlistdir(dirname): jpayne@69: if not dirname: jpayne@69: if isinstance(dirname, bytes): jpayne@69: dirname = os.curdir.encode('ASCII') jpayne@69: else: jpayne@69: dirname = os.curdir jpayne@69: try: jpayne@69: names = os.listdir(dirname) jpayne@69: except OSError: jpayne@69: return jpayne@69: for x in names: jpayne@69: yield x jpayne@69: path = os.path.join(dirname, x) if dirname else x jpayne@69: for y in _rlistdir(path): jpayne@69: yield os.path.join(x, y) jpayne@69: jpayne@69: jpayne@69: magic_check = re.compile('([*?[])') jpayne@69: magic_check_bytes = re.compile(b'([*?[])') jpayne@69: jpayne@69: jpayne@69: def has_magic(s): jpayne@69: if isinstance(s, bytes): jpayne@69: match = magic_check_bytes.search(s) jpayne@69: else: jpayne@69: match = magic_check.search(s) jpayne@69: return match is not None jpayne@69: jpayne@69: jpayne@69: def _isrecursive(pattern): jpayne@69: if isinstance(pattern, bytes): jpayne@69: return pattern == b'**' jpayne@69: else: jpayne@69: return pattern == '**' jpayne@69: jpayne@69: jpayne@69: def escape(pathname): jpayne@69: """Escape all special characters.""" jpayne@69: # Escaping is done by wrapping any of "*?[" between square brackets. jpayne@69: # Metacharacters do not work in the drive part and shouldn't be escaped. jpayne@69: drive, pathname = os.path.splitdrive(pathname) jpayne@69: if isinstance(pathname, bytes): jpayne@69: pathname = magic_check_bytes.sub(rb'[\1]', pathname) jpayne@69: else: jpayne@69: pathname = magic_check.sub(r'[\1]', pathname) jpayne@69: return drive + pathname