Mercurial > repos > rliterman > csp2
diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pooch/utils.py @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pooch/utils.py Tue Mar 18 16:23:26 2025 -0400 @@ -0,0 +1,350 @@ +# Copyright (c) 2018 The Pooch Developers. +# Distributed under the terms of the BSD 3-Clause License. +# SPDX-License-Identifier: BSD-3-Clause +# +# This code is part of the Fatiando a Terra project (https://www.fatiando.org) +# +""" +Misc utilities +""" +import logging +import os +import tempfile +import hashlib +from pathlib import Path +from urllib.parse import urlsplit +from contextlib import contextmanager +import warnings + +import platformdirs +from packaging.version import Version + + +LOGGER = logging.Logger("pooch") +LOGGER.addHandler(logging.StreamHandler()) + + +def file_hash(*args, **kwargs): + """ + WARNING: Importing this function from pooch.utils is DEPRECATED. + Please import from the top-level namespace (`from pooch import file_hash`) + instead, which is fully backwards compatible with pooch >= 0.1. + + Examples + -------- + + >>> fname = "test-file-for-hash.txt" + >>> with open(fname, "w") as f: + ... __ = f.write("content of the file") + >>> print(file_hash(fname)) + 0fc74468e6a9a829f103d069aeb2bb4f8646bad58bf146bb0e3379b759ec4a00 + >>> import os + >>> os.remove(fname) + + """ + # pylint: disable=import-outside-toplevel + from .hashes import file_hash as new_file_hash + + message = """ + Importing file_hash from pooch.utils is DEPRECATED. Please import from the + top-level namespace (`from pooch import file_hash`) instead, which is fully + backwards compatible with pooch >= 0.1. + """ + warnings.warn(message, DeprecationWarning, stacklevel=2) + return new_file_hash(*args, **kwargs) + + +def get_logger(): + r""" + Get the default event logger. + + The logger records events like downloading files, unzipping archives, etc. + Use the method :meth:`logging.Logger.setLevel` of this object to adjust the + verbosity level from Pooch. + + Returns + ------- + logger : :class:`logging.Logger` + The logger object for Pooch + """ + return LOGGER + + +def os_cache(project): + r""" + Default cache location based on the operating system. + + The folder locations are defined by the ``platformdirs`` package + using the ``user_cache_dir`` function. + Usually, the locations will be following (see the + `platformdirs documentation <https://platformdirs.readthedocs.io>`__): + + * Mac: ``~/Library/Caches/<AppName>`` + * Unix: ``~/.cache/<AppName>`` or the value of the ``XDG_CACHE_HOME`` + environment variable, if defined. + * Windows: ``C:\Users\<user>\AppData\Local\<AppAuthor>\<AppName>\Cache`` + + Parameters + ---------- + project : str + The project name. + + Returns + ------- + cache_path : :class:`pathlib.Path` + The default location for the data cache. User directories (``'~'``) are + not expanded. + + """ + return Path(platformdirs.user_cache_dir(project)) + + +def check_version(version, fallback="master"): + """ + Check if a version is PEP440 compliant and there are no unreleased changes. + + For example, ``version = "0.1"`` will be returned as is but ``version = + "0.1+10.8dl8dh9"`` will return the fallback. This is the convention used by + `versioneer <https://github.com/warner/python-versioneer>`__ to mark that + this version is 10 commits ahead of the last release. + + Parameters + ---------- + version : str + A version string. + fallback : str + What to return if the version string has unreleased changes. + + Returns + ------- + version : str + If *version* is PEP440 compliant and there are unreleased changes, then + return *version*. Otherwise, return *fallback*. + + Raises + ------ + InvalidVersion + If *version* is not PEP440 compliant. + + Examples + -------- + + >>> check_version("0.1") + '0.1' + >>> check_version("0.1a10") + '0.1a10' + >>> check_version("0.1+111.9hdg36") + 'master' + >>> check_version("0.1+111.9hdg36", fallback="dev") + 'dev' + + """ + parse = Version(version) + if parse.local is not None: + return fallback + return version + + +def parse_url(url): + """ + Parse a URL into 3 components: + + <protocol>://<netloc>/<path> + + Example URLs: + + * http://127.0.0.1:8080/test.nc + * ftp://127.0.0.1:8080/test.nc + * doi:10.6084/m9.figshare.923450.v1/test.nc + + The DOI is a special case. The protocol will be "doi", the netloc will be + the DOI, and the path is what comes after the last "/". + The only exception are Zenodo dois: the protocol will be "doi", the netloc + will be composed by the "prefix/suffix" and the path is what comes after + the second "/". This allows to support special cases of Zenodo dois where + the path contains forward slashes "/", created by the GitHub-Zenodo + integration service. + + Parameters + ---------- + url : str + The URL. + + Returns + ------- + parsed_url : dict + Three components of a URL (e.g., + ``{'protocol':'http', 'netloc':'127.0.0.1:8080','path': '/test.nc'}``). + + """ + if url.startswith("doi://"): + raise ValueError( + f"Invalid DOI link '{url}'. You must not use '//' after 'doi:'." + ) + if url.startswith("doi:"): + protocol = "doi" + parts = url[4:].split("/") + if "zenodo" in parts[1].lower(): + netloc = "/".join(parts[:2]) + path = "/" + "/".join(parts[2:]) + else: + netloc = "/".join(parts[:-1]) + path = "/" + parts[-1] + else: + parsed_url = urlsplit(url) + protocol = parsed_url.scheme or "file" + netloc = parsed_url.netloc + path = parsed_url.path + return {"protocol": protocol, "netloc": netloc, "path": path} + + +def cache_location(path, env=None, version=None): + """ + Location of the cache given a base path and optional configuration. + + Checks for the environment variable to overwrite the path of the local + cache. Optionally add *version* to the path if given. + + Parameters + ---------- + path : str, PathLike, list or tuple + The path to the local data storage folder. If this is a list or tuple, + we'll join the parts with the appropriate separator. Use + :func:`pooch.os_cache` for a sensible default. + version : str or None + The version string for your project. Will be appended to given path if + not None. + env : str or None + An environment variable that can be used to overwrite *path*. This + allows users to control where they want the data to be stored. We'll + append *version* to the end of this value as well. + + Returns + ------- + local_path : PathLike + The path to the local directory. + + """ + if env is not None and env in os.environ and os.environ[env]: + path = os.environ[env] + if isinstance(path, (list, tuple)): + path = os.path.join(*path) + if version is not None: + path = os.path.join(str(path), version) + path = os.path.expanduser(str(path)) + return Path(path) + + +def make_local_storage(path, env=None): + """ + Create the local cache directory and make sure it's writable. + + Parameters + ---------- + path : str or PathLike + The path to the local data storage folder. + env : str or None + An environment variable that can be used to overwrite *path*. Only used + in the error message in case the folder is not writable. + """ + path = str(path) + # Check that the data directory is writable + if not os.path.exists(path): + action = "create" + else: + action = "write to" + + try: + if action == "create": + # When running in parallel, it's possible that multiple jobs will + # try to create the path at the same time. Use exist_ok to avoid + # raising an error. + os.makedirs(path, exist_ok=True) + else: + with tempfile.NamedTemporaryFile(dir=path): + pass + except PermissionError as error: + message = [ + str(error), + f"| Pooch could not {action} data cache folder '{path}'.", + "Will not be able to download data files.", + ] + if env is not None: + message.append( + f"Use environment variable '{env}' to specify a different location." + ) + raise PermissionError(" ".join(message)) from error + + +@contextmanager +def temporary_file(path=None): + """ + Create a closed and named temporary file and make sure it's cleaned up. + + Using :class:`tempfile.NamedTemporaryFile` will fail on Windows if trying + to open the file a second time (when passing its name to Pooch function, + for example). This context manager creates the file, closes it, yields the + file path, and makes sure it's deleted in the end. + + Parameters + ---------- + path : str or PathLike + The directory in which the temporary file will be created. + + Yields + ------ + fname : str + The path to the temporary file. + + """ + tmp = tempfile.NamedTemporaryFile(delete=False, dir=path) + # Close the temp file so that it can be opened elsewhere + tmp.close() + try: + yield tmp.name + finally: + if os.path.exists(tmp.name): + os.remove(tmp.name) + + +def unique_file_name(url): + """ + Create a unique file name based on the given URL. + + The file name will be unique to the URL by prepending the name with the MD5 + hash (hex digest) of the URL. The name will also include the last portion + of the URL. + + The format will be: ``{md5}-{filename}.{ext}`` + + The file name will be cropped so that the entire name (including the hash) + is less than 255 characters long (the limit on most file systems). + + Parameters + ---------- + url : str + The URL with a file name at the end. + + Returns + ------- + fname : str + The file name, unique to this URL. + + Examples + -------- + + >>> print(unique_file_name("https://www.some-server.org/2020/data.txt")) + 02ddee027ce5ebb3d7059fb23d210604-data.txt + >>> print(unique_file_name("https://www.some-server.org/2019/data.txt")) + 9780092867b497fca6fc87d8308f1025-data.txt + >>> print(unique_file_name("https://www.some-server.org/2020/data.txt.gz")) + 181a9d52e908219c2076f55145d6a344-data.txt.gz + + """ + md5 = hashlib.md5(url.encode()).hexdigest() + fname = parse_url(url)["path"].split("/")[-1] + # Crop the start of the file name to fit 255 characters including the hash + # and the : + fname = fname[-(255 - len(md5) - 1) :] + unique_name = f"{md5}-{fname}" + return unique_name