annotate CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pooch/utils.py @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
rev   line source
jpayne@69 1 # Copyright (c) 2018 The Pooch Developers.
jpayne@69 2 # Distributed under the terms of the BSD 3-Clause License.
jpayne@69 3 # SPDX-License-Identifier: BSD-3-Clause
jpayne@69 4 #
jpayne@69 5 # This code is part of the Fatiando a Terra project (https://www.fatiando.org)
jpayne@69 6 #
jpayne@69 7 """
jpayne@69 8 Misc utilities
jpayne@69 9 """
jpayne@69 10 import logging
jpayne@69 11 import os
jpayne@69 12 import tempfile
jpayne@69 13 import hashlib
jpayne@69 14 from pathlib import Path
jpayne@69 15 from urllib.parse import urlsplit
jpayne@69 16 from contextlib import contextmanager
jpayne@69 17 import warnings
jpayne@69 18
jpayne@69 19 import platformdirs
jpayne@69 20 from packaging.version import Version
jpayne@69 21
jpayne@69 22
jpayne@69 23 LOGGER = logging.Logger("pooch")
jpayne@69 24 LOGGER.addHandler(logging.StreamHandler())
jpayne@69 25
jpayne@69 26
jpayne@69 27 def file_hash(*args, **kwargs):
jpayne@69 28 """
jpayne@69 29 WARNING: Importing this function from pooch.utils is DEPRECATED.
jpayne@69 30 Please import from the top-level namespace (`from pooch import file_hash`)
jpayne@69 31 instead, which is fully backwards compatible with pooch >= 0.1.
jpayne@69 32
jpayne@69 33 Examples
jpayne@69 34 --------
jpayne@69 35
jpayne@69 36 >>> fname = "test-file-for-hash.txt"
jpayne@69 37 >>> with open(fname, "w") as f:
jpayne@69 38 ... __ = f.write("content of the file")
jpayne@69 39 >>> print(file_hash(fname))
jpayne@69 40 0fc74468e6a9a829f103d069aeb2bb4f8646bad58bf146bb0e3379b759ec4a00
jpayne@69 41 >>> import os
jpayne@69 42 >>> os.remove(fname)
jpayne@69 43
jpayne@69 44 """
jpayne@69 45 # pylint: disable=import-outside-toplevel
jpayne@69 46 from .hashes import file_hash as new_file_hash
jpayne@69 47
jpayne@69 48 message = """
jpayne@69 49 Importing file_hash from pooch.utils is DEPRECATED. Please import from the
jpayne@69 50 top-level namespace (`from pooch import file_hash`) instead, which is fully
jpayne@69 51 backwards compatible with pooch >= 0.1.
jpayne@69 52 """
jpayne@69 53 warnings.warn(message, DeprecationWarning, stacklevel=2)
jpayne@69 54 return new_file_hash(*args, **kwargs)
jpayne@69 55
jpayne@69 56
jpayne@69 57 def get_logger():
jpayne@69 58 r"""
jpayne@69 59 Get the default event logger.
jpayne@69 60
jpayne@69 61 The logger records events like downloading files, unzipping archives, etc.
jpayne@69 62 Use the method :meth:`logging.Logger.setLevel` of this object to adjust the
jpayne@69 63 verbosity level from Pooch.
jpayne@69 64
jpayne@69 65 Returns
jpayne@69 66 -------
jpayne@69 67 logger : :class:`logging.Logger`
jpayne@69 68 The logger object for Pooch
jpayne@69 69 """
jpayne@69 70 return LOGGER
jpayne@69 71
jpayne@69 72
jpayne@69 73 def os_cache(project):
jpayne@69 74 r"""
jpayne@69 75 Default cache location based on the operating system.
jpayne@69 76
jpayne@69 77 The folder locations are defined by the ``platformdirs`` package
jpayne@69 78 using the ``user_cache_dir`` function.
jpayne@69 79 Usually, the locations will be following (see the
jpayne@69 80 `platformdirs documentation <https://platformdirs.readthedocs.io>`__):
jpayne@69 81
jpayne@69 82 * Mac: ``~/Library/Caches/<AppName>``
jpayne@69 83 * Unix: ``~/.cache/<AppName>`` or the value of the ``XDG_CACHE_HOME``
jpayne@69 84 environment variable, if defined.
jpayne@69 85 * Windows: ``C:\Users\<user>\AppData\Local\<AppAuthor>\<AppName>\Cache``
jpayne@69 86
jpayne@69 87 Parameters
jpayne@69 88 ----------
jpayne@69 89 project : str
jpayne@69 90 The project name.
jpayne@69 91
jpayne@69 92 Returns
jpayne@69 93 -------
jpayne@69 94 cache_path : :class:`pathlib.Path`
jpayne@69 95 The default location for the data cache. User directories (``'~'``) are
jpayne@69 96 not expanded.
jpayne@69 97
jpayne@69 98 """
jpayne@69 99 return Path(platformdirs.user_cache_dir(project))
jpayne@69 100
jpayne@69 101
jpayne@69 102 def check_version(version, fallback="master"):
jpayne@69 103 """
jpayne@69 104 Check if a version is PEP440 compliant and there are no unreleased changes.
jpayne@69 105
jpayne@69 106 For example, ``version = "0.1"`` will be returned as is but ``version =
jpayne@69 107 "0.1+10.8dl8dh9"`` will return the fallback. This is the convention used by
jpayne@69 108 `versioneer <https://github.com/warner/python-versioneer>`__ to mark that
jpayne@69 109 this version is 10 commits ahead of the last release.
jpayne@69 110
jpayne@69 111 Parameters
jpayne@69 112 ----------
jpayne@69 113 version : str
jpayne@69 114 A version string.
jpayne@69 115 fallback : str
jpayne@69 116 What to return if the version string has unreleased changes.
jpayne@69 117
jpayne@69 118 Returns
jpayne@69 119 -------
jpayne@69 120 version : str
jpayne@69 121 If *version* is PEP440 compliant and there are unreleased changes, then
jpayne@69 122 return *version*. Otherwise, return *fallback*.
jpayne@69 123
jpayne@69 124 Raises
jpayne@69 125 ------
jpayne@69 126 InvalidVersion
jpayne@69 127 If *version* is not PEP440 compliant.
jpayne@69 128
jpayne@69 129 Examples
jpayne@69 130 --------
jpayne@69 131
jpayne@69 132 >>> check_version("0.1")
jpayne@69 133 '0.1'
jpayne@69 134 >>> check_version("0.1a10")
jpayne@69 135 '0.1a10'
jpayne@69 136 >>> check_version("0.1+111.9hdg36")
jpayne@69 137 'master'
jpayne@69 138 >>> check_version("0.1+111.9hdg36", fallback="dev")
jpayne@69 139 'dev'
jpayne@69 140
jpayne@69 141 """
jpayne@69 142 parse = Version(version)
jpayne@69 143 if parse.local is not None:
jpayne@69 144 return fallback
jpayne@69 145 return version
jpayne@69 146
jpayne@69 147
jpayne@69 148 def parse_url(url):
jpayne@69 149 """
jpayne@69 150 Parse a URL into 3 components:
jpayne@69 151
jpayne@69 152 <protocol>://<netloc>/<path>
jpayne@69 153
jpayne@69 154 Example URLs:
jpayne@69 155
jpayne@69 156 * http://127.0.0.1:8080/test.nc
jpayne@69 157 * ftp://127.0.0.1:8080/test.nc
jpayne@69 158 * doi:10.6084/m9.figshare.923450.v1/test.nc
jpayne@69 159
jpayne@69 160 The DOI is a special case. The protocol will be "doi", the netloc will be
jpayne@69 161 the DOI, and the path is what comes after the last "/".
jpayne@69 162 The only exception are Zenodo dois: the protocol will be "doi", the netloc
jpayne@69 163 will be composed by the "prefix/suffix" and the path is what comes after
jpayne@69 164 the second "/". This allows to support special cases of Zenodo dois where
jpayne@69 165 the path contains forward slashes "/", created by the GitHub-Zenodo
jpayne@69 166 integration service.
jpayne@69 167
jpayne@69 168 Parameters
jpayne@69 169 ----------
jpayne@69 170 url : str
jpayne@69 171 The URL.
jpayne@69 172
jpayne@69 173 Returns
jpayne@69 174 -------
jpayne@69 175 parsed_url : dict
jpayne@69 176 Three components of a URL (e.g.,
jpayne@69 177 ``{'protocol':'http', 'netloc':'127.0.0.1:8080','path': '/test.nc'}``).
jpayne@69 178
jpayne@69 179 """
jpayne@69 180 if url.startswith("doi://"):
jpayne@69 181 raise ValueError(
jpayne@69 182 f"Invalid DOI link '{url}'. You must not use '//' after 'doi:'."
jpayne@69 183 )
jpayne@69 184 if url.startswith("doi:"):
jpayne@69 185 protocol = "doi"
jpayne@69 186 parts = url[4:].split("/")
jpayne@69 187 if "zenodo" in parts[1].lower():
jpayne@69 188 netloc = "/".join(parts[:2])
jpayne@69 189 path = "/" + "/".join(parts[2:])
jpayne@69 190 else:
jpayne@69 191 netloc = "/".join(parts[:-1])
jpayne@69 192 path = "/" + parts[-1]
jpayne@69 193 else:
jpayne@69 194 parsed_url = urlsplit(url)
jpayne@69 195 protocol = parsed_url.scheme or "file"
jpayne@69 196 netloc = parsed_url.netloc
jpayne@69 197 path = parsed_url.path
jpayne@69 198 return {"protocol": protocol, "netloc": netloc, "path": path}
jpayne@69 199
jpayne@69 200
jpayne@69 201 def cache_location(path, env=None, version=None):
jpayne@69 202 """
jpayne@69 203 Location of the cache given a base path and optional configuration.
jpayne@69 204
jpayne@69 205 Checks for the environment variable to overwrite the path of the local
jpayne@69 206 cache. Optionally add *version* to the path if given.
jpayne@69 207
jpayne@69 208 Parameters
jpayne@69 209 ----------
jpayne@69 210 path : str, PathLike, list or tuple
jpayne@69 211 The path to the local data storage folder. If this is a list or tuple,
jpayne@69 212 we'll join the parts with the appropriate separator. Use
jpayne@69 213 :func:`pooch.os_cache` for a sensible default.
jpayne@69 214 version : str or None
jpayne@69 215 The version string for your project. Will be appended to given path if
jpayne@69 216 not None.
jpayne@69 217 env : str or None
jpayne@69 218 An environment variable that can be used to overwrite *path*. This
jpayne@69 219 allows users to control where they want the data to be stored. We'll
jpayne@69 220 append *version* to the end of this value as well.
jpayne@69 221
jpayne@69 222 Returns
jpayne@69 223 -------
jpayne@69 224 local_path : PathLike
jpayne@69 225 The path to the local directory.
jpayne@69 226
jpayne@69 227 """
jpayne@69 228 if env is not None and env in os.environ and os.environ[env]:
jpayne@69 229 path = os.environ[env]
jpayne@69 230 if isinstance(path, (list, tuple)):
jpayne@69 231 path = os.path.join(*path)
jpayne@69 232 if version is not None:
jpayne@69 233 path = os.path.join(str(path), version)
jpayne@69 234 path = os.path.expanduser(str(path))
jpayne@69 235 return Path(path)
jpayne@69 236
jpayne@69 237
jpayne@69 238 def make_local_storage(path, env=None):
jpayne@69 239 """
jpayne@69 240 Create the local cache directory and make sure it's writable.
jpayne@69 241
jpayne@69 242 Parameters
jpayne@69 243 ----------
jpayne@69 244 path : str or PathLike
jpayne@69 245 The path to the local data storage folder.
jpayne@69 246 env : str or None
jpayne@69 247 An environment variable that can be used to overwrite *path*. Only used
jpayne@69 248 in the error message in case the folder is not writable.
jpayne@69 249 """
jpayne@69 250 path = str(path)
jpayne@69 251 # Check that the data directory is writable
jpayne@69 252 if not os.path.exists(path):
jpayne@69 253 action = "create"
jpayne@69 254 else:
jpayne@69 255 action = "write to"
jpayne@69 256
jpayne@69 257 try:
jpayne@69 258 if action == "create":
jpayne@69 259 # When running in parallel, it's possible that multiple jobs will
jpayne@69 260 # try to create the path at the same time. Use exist_ok to avoid
jpayne@69 261 # raising an error.
jpayne@69 262 os.makedirs(path, exist_ok=True)
jpayne@69 263 else:
jpayne@69 264 with tempfile.NamedTemporaryFile(dir=path):
jpayne@69 265 pass
jpayne@69 266 except PermissionError as error:
jpayne@69 267 message = [
jpayne@69 268 str(error),
jpayne@69 269 f"| Pooch could not {action} data cache folder '{path}'.",
jpayne@69 270 "Will not be able to download data files.",
jpayne@69 271 ]
jpayne@69 272 if env is not None:
jpayne@69 273 message.append(
jpayne@69 274 f"Use environment variable '{env}' to specify a different location."
jpayne@69 275 )
jpayne@69 276 raise PermissionError(" ".join(message)) from error
jpayne@69 277
jpayne@69 278
jpayne@69 279 @contextmanager
jpayne@69 280 def temporary_file(path=None):
jpayne@69 281 """
jpayne@69 282 Create a closed and named temporary file and make sure it's cleaned up.
jpayne@69 283
jpayne@69 284 Using :class:`tempfile.NamedTemporaryFile` will fail on Windows if trying
jpayne@69 285 to open the file a second time (when passing its name to Pooch function,
jpayne@69 286 for example). This context manager creates the file, closes it, yields the
jpayne@69 287 file path, and makes sure it's deleted in the end.
jpayne@69 288
jpayne@69 289 Parameters
jpayne@69 290 ----------
jpayne@69 291 path : str or PathLike
jpayne@69 292 The directory in which the temporary file will be created.
jpayne@69 293
jpayne@69 294 Yields
jpayne@69 295 ------
jpayne@69 296 fname : str
jpayne@69 297 The path to the temporary file.
jpayne@69 298
jpayne@69 299 """
jpayne@69 300 tmp = tempfile.NamedTemporaryFile(delete=False, dir=path)
jpayne@69 301 # Close the temp file so that it can be opened elsewhere
jpayne@69 302 tmp.close()
jpayne@69 303 try:
jpayne@69 304 yield tmp.name
jpayne@69 305 finally:
jpayne@69 306 if os.path.exists(tmp.name):
jpayne@69 307 os.remove(tmp.name)
jpayne@69 308
jpayne@69 309
jpayne@69 310 def unique_file_name(url):
jpayne@69 311 """
jpayne@69 312 Create a unique file name based on the given URL.
jpayne@69 313
jpayne@69 314 The file name will be unique to the URL by prepending the name with the MD5
jpayne@69 315 hash (hex digest) of the URL. The name will also include the last portion
jpayne@69 316 of the URL.
jpayne@69 317
jpayne@69 318 The format will be: ``{md5}-{filename}.{ext}``
jpayne@69 319
jpayne@69 320 The file name will be cropped so that the entire name (including the hash)
jpayne@69 321 is less than 255 characters long (the limit on most file systems).
jpayne@69 322
jpayne@69 323 Parameters
jpayne@69 324 ----------
jpayne@69 325 url : str
jpayne@69 326 The URL with a file name at the end.
jpayne@69 327
jpayne@69 328 Returns
jpayne@69 329 -------
jpayne@69 330 fname : str
jpayne@69 331 The file name, unique to this URL.
jpayne@69 332
jpayne@69 333 Examples
jpayne@69 334 --------
jpayne@69 335
jpayne@69 336 >>> print(unique_file_name("https://www.some-server.org/2020/data.txt"))
jpayne@69 337 02ddee027ce5ebb3d7059fb23d210604-data.txt
jpayne@69 338 >>> print(unique_file_name("https://www.some-server.org/2019/data.txt"))
jpayne@69 339 9780092867b497fca6fc87d8308f1025-data.txt
jpayne@69 340 >>> print(unique_file_name("https://www.some-server.org/2020/data.txt.gz"))
jpayne@69 341 181a9d52e908219c2076f55145d6a344-data.txt.gz
jpayne@69 342
jpayne@69 343 """
jpayne@69 344 md5 = hashlib.md5(url.encode()).hexdigest()
jpayne@69 345 fname = parse_url(url)["path"].split("/")[-1]
jpayne@69 346 # Crop the start of the file name to fit 255 characters including the hash
jpayne@69 347 # and the :
jpayne@69 348 fname = fname[-(255 - len(md5) - 1) :]
jpayne@69 349 unique_name = f"{md5}-{fname}"
jpayne@69 350 return unique_name