jpayne@68: """ jpayne@68: Filename globbing utility. Mostly a copy of `glob` from Python 3.5. jpayne@68: jpayne@68: Changes include: jpayne@68: * `yield from` and PEP3102 `*` removed. jpayne@68: * Hidden files are not ignored. jpayne@68: """ jpayne@68: jpayne@68: import fnmatch jpayne@68: import os jpayne@68: import re jpayne@68: jpayne@68: __all__ = ["glob", "iglob", "escape"] jpayne@68: jpayne@68: jpayne@68: def glob(pathname, recursive: bool = False): jpayne@68: """Return a list of paths matching a pathname pattern. jpayne@68: jpayne@68: The pattern may contain simple shell-style wildcards a la jpayne@68: fnmatch. However, unlike fnmatch, filenames starting with a jpayne@68: dot are special cases that are not matched by '*' and '?' jpayne@68: patterns. jpayne@68: jpayne@68: If recursive is true, the pattern '**' will match any files and jpayne@68: zero or more directories and subdirectories. jpayne@68: """ jpayne@68: return list(iglob(pathname, recursive=recursive)) jpayne@68: jpayne@68: jpayne@68: def iglob(pathname, recursive: bool = False): jpayne@68: """Return an iterator which yields the paths matching a pathname pattern. jpayne@68: jpayne@68: The pattern may contain simple shell-style wildcards a la jpayne@68: fnmatch. However, unlike fnmatch, filenames starting with a jpayne@68: dot are special cases that are not matched by '*' and '?' jpayne@68: patterns. jpayne@68: jpayne@68: If recursive is true, the pattern '**' will match any files and jpayne@68: zero or more directories and subdirectories. jpayne@68: """ jpayne@68: it = _iglob(pathname, recursive) jpayne@68: if recursive and _isrecursive(pathname): jpayne@68: s = next(it) # skip empty string jpayne@68: assert not s jpayne@68: return it jpayne@68: jpayne@68: jpayne@68: def _iglob(pathname, recursive): jpayne@68: dirname, basename = os.path.split(pathname) jpayne@68: glob_in_dir = glob2 if recursive and _isrecursive(basename) else glob1 jpayne@68: jpayne@68: if not has_magic(pathname): jpayne@68: if basename: jpayne@68: if os.path.lexists(pathname): jpayne@68: yield pathname jpayne@68: else: jpayne@68: # Patterns ending with a slash should match only directories jpayne@68: if os.path.isdir(dirname): jpayne@68: yield pathname jpayne@68: return jpayne@68: jpayne@68: if not dirname: jpayne@68: yield from glob_in_dir(dirname, basename) jpayne@68: return jpayne@68: # `os.path.split()` returns the argument itself as a dirname if it is a jpayne@68: # drive or UNC path. Prevent an infinite recursion if a drive or UNC path jpayne@68: # contains magic characters (i.e. r'\\?\C:'). jpayne@68: if dirname != pathname and has_magic(dirname): jpayne@68: dirs = _iglob(dirname, recursive) jpayne@68: else: jpayne@68: dirs = [dirname] jpayne@68: if not has_magic(basename): jpayne@68: glob_in_dir = glob0 jpayne@68: for dirname in dirs: jpayne@68: for name in glob_in_dir(dirname, basename): jpayne@68: yield os.path.join(dirname, name) jpayne@68: jpayne@68: jpayne@68: # These 2 helper functions non-recursively glob inside a literal directory. jpayne@68: # They return a list of basenames. `glob1` accepts a pattern while `glob0` jpayne@68: # takes a literal basename (so it only has to check for its existence). jpayne@68: jpayne@68: jpayne@68: def glob1(dirname, pattern): jpayne@68: if not dirname: jpayne@68: if isinstance(pattern, bytes): jpayne@68: dirname = os.curdir.encode('ASCII') jpayne@68: else: jpayne@68: dirname = os.curdir jpayne@68: try: jpayne@68: names = os.listdir(dirname) jpayne@68: except OSError: jpayne@68: return [] jpayne@68: return fnmatch.filter(names, pattern) jpayne@68: jpayne@68: jpayne@68: def glob0(dirname, basename): jpayne@68: if not basename: jpayne@68: # `os.path.split()` returns an empty basename for paths ending with a jpayne@68: # directory separator. 'q*x/' should match only directories. jpayne@68: if os.path.isdir(dirname): jpayne@68: return [basename] jpayne@68: else: jpayne@68: if os.path.lexists(os.path.join(dirname, basename)): jpayne@68: return [basename] jpayne@68: return [] jpayne@68: jpayne@68: jpayne@68: # This helper function recursively yields relative pathnames inside a literal jpayne@68: # directory. jpayne@68: jpayne@68: jpayne@68: def glob2(dirname, pattern): jpayne@68: assert _isrecursive(pattern) jpayne@68: yield pattern[:0] jpayne@68: yield from _rlistdir(dirname) jpayne@68: jpayne@68: jpayne@68: # Recursively yields relative pathnames inside a literal directory. jpayne@68: def _rlistdir(dirname): jpayne@68: if not dirname: jpayne@68: if isinstance(dirname, bytes): jpayne@68: dirname = os.curdir.encode('ASCII') jpayne@68: else: jpayne@68: dirname = os.curdir jpayne@68: try: jpayne@68: names = os.listdir(dirname) jpayne@68: except OSError: jpayne@68: return jpayne@68: for x in names: jpayne@68: yield x jpayne@68: path = os.path.join(dirname, x) if dirname else x jpayne@68: for y in _rlistdir(path): jpayne@68: yield os.path.join(x, y) jpayne@68: jpayne@68: jpayne@68: magic_check = re.compile('([*?[])') jpayne@68: magic_check_bytes = re.compile(b'([*?[])') jpayne@68: jpayne@68: jpayne@68: def has_magic(s): jpayne@68: if isinstance(s, bytes): jpayne@68: match = magic_check_bytes.search(s) jpayne@68: else: jpayne@68: match = magic_check.search(s) jpayne@68: return match is not None jpayne@68: jpayne@68: jpayne@68: def _isrecursive(pattern): jpayne@68: if isinstance(pattern, bytes): jpayne@68: return pattern == b'**' jpayne@68: else: jpayne@68: return pattern == '**' jpayne@68: jpayne@68: jpayne@68: def escape(pathname): jpayne@68: """Escape all special characters.""" jpayne@68: # Escaping is done by wrapping any of "*?[" between square brackets. jpayne@68: # Metacharacters do not work in the drive part and shouldn't be escaped. jpayne@68: drive, pathname = os.path.splitdrive(pathname) jpayne@68: if isinstance(pathname, bytes): jpayne@68: pathname = magic_check_bytes.sub(rb'[\1]', pathname) jpayne@68: else: jpayne@68: pathname = magic_check.sub(r'[\1]', pathname) jpayne@68: return drive + pathname