comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pooch/utils.py @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 # Copyright (c) 2018 The Pooch Developers.
2 # Distributed under the terms of the BSD 3-Clause License.
3 # SPDX-License-Identifier: BSD-3-Clause
4 #
5 # This code is part of the Fatiando a Terra project (https://www.fatiando.org)
6 #
7 """
8 Misc utilities
9 """
10 import logging
11 import os
12 import tempfile
13 import hashlib
14 from pathlib import Path
15 from urllib.parse import urlsplit
16 from contextlib import contextmanager
17 import warnings
18
19 import platformdirs
20 from packaging.version import Version
21
22
23 LOGGER = logging.Logger("pooch")
24 LOGGER.addHandler(logging.StreamHandler())
25
26
27 def file_hash(*args, **kwargs):
28 """
29 WARNING: Importing this function from pooch.utils is DEPRECATED.
30 Please import from the top-level namespace (`from pooch import file_hash`)
31 instead, which is fully backwards compatible with pooch >= 0.1.
32
33 Examples
34 --------
35
36 >>> fname = "test-file-for-hash.txt"
37 >>> with open(fname, "w") as f:
38 ... __ = f.write("content of the file")
39 >>> print(file_hash(fname))
40 0fc74468e6a9a829f103d069aeb2bb4f8646bad58bf146bb0e3379b759ec4a00
41 >>> import os
42 >>> os.remove(fname)
43
44 """
45 # pylint: disable=import-outside-toplevel
46 from .hashes import file_hash as new_file_hash
47
48 message = """
49 Importing file_hash from pooch.utils is DEPRECATED. Please import from the
50 top-level namespace (`from pooch import file_hash`) instead, which is fully
51 backwards compatible with pooch >= 0.1.
52 """
53 warnings.warn(message, DeprecationWarning, stacklevel=2)
54 return new_file_hash(*args, **kwargs)
55
56
57 def get_logger():
58 r"""
59 Get the default event logger.
60
61 The logger records events like downloading files, unzipping archives, etc.
62 Use the method :meth:`logging.Logger.setLevel` of this object to adjust the
63 verbosity level from Pooch.
64
65 Returns
66 -------
67 logger : :class:`logging.Logger`
68 The logger object for Pooch
69 """
70 return LOGGER
71
72
73 def os_cache(project):
74 r"""
75 Default cache location based on the operating system.
76
77 The folder locations are defined by the ``platformdirs`` package
78 using the ``user_cache_dir`` function.
79 Usually, the locations will be following (see the
80 `platformdirs documentation <https://platformdirs.readthedocs.io>`__):
81
82 * Mac: ``~/Library/Caches/<AppName>``
83 * Unix: ``~/.cache/<AppName>`` or the value of the ``XDG_CACHE_HOME``
84 environment variable, if defined.
85 * Windows: ``C:\Users\<user>\AppData\Local\<AppAuthor>\<AppName>\Cache``
86
87 Parameters
88 ----------
89 project : str
90 The project name.
91
92 Returns
93 -------
94 cache_path : :class:`pathlib.Path`
95 The default location for the data cache. User directories (``'~'``) are
96 not expanded.
97
98 """
99 return Path(platformdirs.user_cache_dir(project))
100
101
102 def check_version(version, fallback="master"):
103 """
104 Check if a version is PEP440 compliant and there are no unreleased changes.
105
106 For example, ``version = "0.1"`` will be returned as is but ``version =
107 "0.1+10.8dl8dh9"`` will return the fallback. This is the convention used by
108 `versioneer <https://github.com/warner/python-versioneer>`__ to mark that
109 this version is 10 commits ahead of the last release.
110
111 Parameters
112 ----------
113 version : str
114 A version string.
115 fallback : str
116 What to return if the version string has unreleased changes.
117
118 Returns
119 -------
120 version : str
121 If *version* is PEP440 compliant and there are unreleased changes, then
122 return *version*. Otherwise, return *fallback*.
123
124 Raises
125 ------
126 InvalidVersion
127 If *version* is not PEP440 compliant.
128
129 Examples
130 --------
131
132 >>> check_version("0.1")
133 '0.1'
134 >>> check_version("0.1a10")
135 '0.1a10'
136 >>> check_version("0.1+111.9hdg36")
137 'master'
138 >>> check_version("0.1+111.9hdg36", fallback="dev")
139 'dev'
140
141 """
142 parse = Version(version)
143 if parse.local is not None:
144 return fallback
145 return version
146
147
148 def parse_url(url):
149 """
150 Parse a URL into 3 components:
151
152 <protocol>://<netloc>/<path>
153
154 Example URLs:
155
156 * http://127.0.0.1:8080/test.nc
157 * ftp://127.0.0.1:8080/test.nc
158 * doi:10.6084/m9.figshare.923450.v1/test.nc
159
160 The DOI is a special case. The protocol will be "doi", the netloc will be
161 the DOI, and the path is what comes after the last "/".
162 The only exception are Zenodo dois: the protocol will be "doi", the netloc
163 will be composed by the "prefix/suffix" and the path is what comes after
164 the second "/". This allows to support special cases of Zenodo dois where
165 the path contains forward slashes "/", created by the GitHub-Zenodo
166 integration service.
167
168 Parameters
169 ----------
170 url : str
171 The URL.
172
173 Returns
174 -------
175 parsed_url : dict
176 Three components of a URL (e.g.,
177 ``{'protocol':'http', 'netloc':'127.0.0.1:8080','path': '/test.nc'}``).
178
179 """
180 if url.startswith("doi://"):
181 raise ValueError(
182 f"Invalid DOI link '{url}'. You must not use '//' after 'doi:'."
183 )
184 if url.startswith("doi:"):
185 protocol = "doi"
186 parts = url[4:].split("/")
187 if "zenodo" in parts[1].lower():
188 netloc = "/".join(parts[:2])
189 path = "/" + "/".join(parts[2:])
190 else:
191 netloc = "/".join(parts[:-1])
192 path = "/" + parts[-1]
193 else:
194 parsed_url = urlsplit(url)
195 protocol = parsed_url.scheme or "file"
196 netloc = parsed_url.netloc
197 path = parsed_url.path
198 return {"protocol": protocol, "netloc": netloc, "path": path}
199
200
201 def cache_location(path, env=None, version=None):
202 """
203 Location of the cache given a base path and optional configuration.
204
205 Checks for the environment variable to overwrite the path of the local
206 cache. Optionally add *version* to the path if given.
207
208 Parameters
209 ----------
210 path : str, PathLike, list or tuple
211 The path to the local data storage folder. If this is a list or tuple,
212 we'll join the parts with the appropriate separator. Use
213 :func:`pooch.os_cache` for a sensible default.
214 version : str or None
215 The version string for your project. Will be appended to given path if
216 not None.
217 env : str or None
218 An environment variable that can be used to overwrite *path*. This
219 allows users to control where they want the data to be stored. We'll
220 append *version* to the end of this value as well.
221
222 Returns
223 -------
224 local_path : PathLike
225 The path to the local directory.
226
227 """
228 if env is not None and env in os.environ and os.environ[env]:
229 path = os.environ[env]
230 if isinstance(path, (list, tuple)):
231 path = os.path.join(*path)
232 if version is not None:
233 path = os.path.join(str(path), version)
234 path = os.path.expanduser(str(path))
235 return Path(path)
236
237
238 def make_local_storage(path, env=None):
239 """
240 Create the local cache directory and make sure it's writable.
241
242 Parameters
243 ----------
244 path : str or PathLike
245 The path to the local data storage folder.
246 env : str or None
247 An environment variable that can be used to overwrite *path*. Only used
248 in the error message in case the folder is not writable.
249 """
250 path = str(path)
251 # Check that the data directory is writable
252 if not os.path.exists(path):
253 action = "create"
254 else:
255 action = "write to"
256
257 try:
258 if action == "create":
259 # When running in parallel, it's possible that multiple jobs will
260 # try to create the path at the same time. Use exist_ok to avoid
261 # raising an error.
262 os.makedirs(path, exist_ok=True)
263 else:
264 with tempfile.NamedTemporaryFile(dir=path):
265 pass
266 except PermissionError as error:
267 message = [
268 str(error),
269 f"| Pooch could not {action} data cache folder '{path}'.",
270 "Will not be able to download data files.",
271 ]
272 if env is not None:
273 message.append(
274 f"Use environment variable '{env}' to specify a different location."
275 )
276 raise PermissionError(" ".join(message)) from error
277
278
279 @contextmanager
280 def temporary_file(path=None):
281 """
282 Create a closed and named temporary file and make sure it's cleaned up.
283
284 Using :class:`tempfile.NamedTemporaryFile` will fail on Windows if trying
285 to open the file a second time (when passing its name to Pooch function,
286 for example). This context manager creates the file, closes it, yields the
287 file path, and makes sure it's deleted in the end.
288
289 Parameters
290 ----------
291 path : str or PathLike
292 The directory in which the temporary file will be created.
293
294 Yields
295 ------
296 fname : str
297 The path to the temporary file.
298
299 """
300 tmp = tempfile.NamedTemporaryFile(delete=False, dir=path)
301 # Close the temp file so that it can be opened elsewhere
302 tmp.close()
303 try:
304 yield tmp.name
305 finally:
306 if os.path.exists(tmp.name):
307 os.remove(tmp.name)
308
309
310 def unique_file_name(url):
311 """
312 Create a unique file name based on the given URL.
313
314 The file name will be unique to the URL by prepending the name with the MD5
315 hash (hex digest) of the URL. The name will also include the last portion
316 of the URL.
317
318 The format will be: ``{md5}-{filename}.{ext}``
319
320 The file name will be cropped so that the entire name (including the hash)
321 is less than 255 characters long (the limit on most file systems).
322
323 Parameters
324 ----------
325 url : str
326 The URL with a file name at the end.
327
328 Returns
329 -------
330 fname : str
331 The file name, unique to this URL.
332
333 Examples
334 --------
335
336 >>> print(unique_file_name("https://www.some-server.org/2020/data.txt"))
337 02ddee027ce5ebb3d7059fb23d210604-data.txt
338 >>> print(unique_file_name("https://www.some-server.org/2019/data.txt"))
339 9780092867b497fca6fc87d8308f1025-data.txt
340 >>> print(unique_file_name("https://www.some-server.org/2020/data.txt.gz"))
341 181a9d52e908219c2076f55145d6a344-data.txt.gz
342
343 """
344 md5 = hashlib.md5(url.encode()).hexdigest()
345 fname = parse_url(url)["path"].split("/")[-1]
346 # Crop the start of the file name to fit 255 characters including the hash
347 # and the :
348 fname = fname[-(255 - len(md5) - 1) :]
349 unique_name = f"{md5}-{fname}"
350 return unique_name