Mercurial > repos > rliterman > csp2
comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pooch/utils.py @ 68:5028fdace37b
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author | jpayne |
---|---|
date | Tue, 18 Mar 2025 16:23:26 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:0e9998148a16 | 68:5028fdace37b |
---|---|
1 # Copyright (c) 2018 The Pooch Developers. | |
2 # Distributed under the terms of the BSD 3-Clause License. | |
3 # SPDX-License-Identifier: BSD-3-Clause | |
4 # | |
5 # This code is part of the Fatiando a Terra project (https://www.fatiando.org) | |
6 # | |
7 """ | |
8 Misc utilities | |
9 """ | |
10 import logging | |
11 import os | |
12 import tempfile | |
13 import hashlib | |
14 from pathlib import Path | |
15 from urllib.parse import urlsplit | |
16 from contextlib import contextmanager | |
17 import warnings | |
18 | |
19 import platformdirs | |
20 from packaging.version import Version | |
21 | |
22 | |
23 LOGGER = logging.Logger("pooch") | |
24 LOGGER.addHandler(logging.StreamHandler()) | |
25 | |
26 | |
27 def file_hash(*args, **kwargs): | |
28 """ | |
29 WARNING: Importing this function from pooch.utils is DEPRECATED. | |
30 Please import from the top-level namespace (`from pooch import file_hash`) | |
31 instead, which is fully backwards compatible with pooch >= 0.1. | |
32 | |
33 Examples | |
34 -------- | |
35 | |
36 >>> fname = "test-file-for-hash.txt" | |
37 >>> with open(fname, "w") as f: | |
38 ... __ = f.write("content of the file") | |
39 >>> print(file_hash(fname)) | |
40 0fc74468e6a9a829f103d069aeb2bb4f8646bad58bf146bb0e3379b759ec4a00 | |
41 >>> import os | |
42 >>> os.remove(fname) | |
43 | |
44 """ | |
45 # pylint: disable=import-outside-toplevel | |
46 from .hashes import file_hash as new_file_hash | |
47 | |
48 message = """ | |
49 Importing file_hash from pooch.utils is DEPRECATED. Please import from the | |
50 top-level namespace (`from pooch import file_hash`) instead, which is fully | |
51 backwards compatible with pooch >= 0.1. | |
52 """ | |
53 warnings.warn(message, DeprecationWarning, stacklevel=2) | |
54 return new_file_hash(*args, **kwargs) | |
55 | |
56 | |
57 def get_logger(): | |
58 r""" | |
59 Get the default event logger. | |
60 | |
61 The logger records events like downloading files, unzipping archives, etc. | |
62 Use the method :meth:`logging.Logger.setLevel` of this object to adjust the | |
63 verbosity level from Pooch. | |
64 | |
65 Returns | |
66 ------- | |
67 logger : :class:`logging.Logger` | |
68 The logger object for Pooch | |
69 """ | |
70 return LOGGER | |
71 | |
72 | |
73 def os_cache(project): | |
74 r""" | |
75 Default cache location based on the operating system. | |
76 | |
77 The folder locations are defined by the ``platformdirs`` package | |
78 using the ``user_cache_dir`` function. | |
79 Usually, the locations will be following (see the | |
80 `platformdirs documentation <https://platformdirs.readthedocs.io>`__): | |
81 | |
82 * Mac: ``~/Library/Caches/<AppName>`` | |
83 * Unix: ``~/.cache/<AppName>`` or the value of the ``XDG_CACHE_HOME`` | |
84 environment variable, if defined. | |
85 * Windows: ``C:\Users\<user>\AppData\Local\<AppAuthor>\<AppName>\Cache`` | |
86 | |
87 Parameters | |
88 ---------- | |
89 project : str | |
90 The project name. | |
91 | |
92 Returns | |
93 ------- | |
94 cache_path : :class:`pathlib.Path` | |
95 The default location for the data cache. User directories (``'~'``) are | |
96 not expanded. | |
97 | |
98 """ | |
99 return Path(platformdirs.user_cache_dir(project)) | |
100 | |
101 | |
102 def check_version(version, fallback="master"): | |
103 """ | |
104 Check if a version is PEP440 compliant and there are no unreleased changes. | |
105 | |
106 For example, ``version = "0.1"`` will be returned as is but ``version = | |
107 "0.1+10.8dl8dh9"`` will return the fallback. This is the convention used by | |
108 `versioneer <https://github.com/warner/python-versioneer>`__ to mark that | |
109 this version is 10 commits ahead of the last release. | |
110 | |
111 Parameters | |
112 ---------- | |
113 version : str | |
114 A version string. | |
115 fallback : str | |
116 What to return if the version string has unreleased changes. | |
117 | |
118 Returns | |
119 ------- | |
120 version : str | |
121 If *version* is PEP440 compliant and there are unreleased changes, then | |
122 return *version*. Otherwise, return *fallback*. | |
123 | |
124 Raises | |
125 ------ | |
126 InvalidVersion | |
127 If *version* is not PEP440 compliant. | |
128 | |
129 Examples | |
130 -------- | |
131 | |
132 >>> check_version("0.1") | |
133 '0.1' | |
134 >>> check_version("0.1a10") | |
135 '0.1a10' | |
136 >>> check_version("0.1+111.9hdg36") | |
137 'master' | |
138 >>> check_version("0.1+111.9hdg36", fallback="dev") | |
139 'dev' | |
140 | |
141 """ | |
142 parse = Version(version) | |
143 if parse.local is not None: | |
144 return fallback | |
145 return version | |
146 | |
147 | |
148 def parse_url(url): | |
149 """ | |
150 Parse a URL into 3 components: | |
151 | |
152 <protocol>://<netloc>/<path> | |
153 | |
154 Example URLs: | |
155 | |
156 * http://127.0.0.1:8080/test.nc | |
157 * ftp://127.0.0.1:8080/test.nc | |
158 * doi:10.6084/m9.figshare.923450.v1/test.nc | |
159 | |
160 The DOI is a special case. The protocol will be "doi", the netloc will be | |
161 the DOI, and the path is what comes after the last "/". | |
162 The only exception are Zenodo dois: the protocol will be "doi", the netloc | |
163 will be composed by the "prefix/suffix" and the path is what comes after | |
164 the second "/". This allows to support special cases of Zenodo dois where | |
165 the path contains forward slashes "/", created by the GitHub-Zenodo | |
166 integration service. | |
167 | |
168 Parameters | |
169 ---------- | |
170 url : str | |
171 The URL. | |
172 | |
173 Returns | |
174 ------- | |
175 parsed_url : dict | |
176 Three components of a URL (e.g., | |
177 ``{'protocol':'http', 'netloc':'127.0.0.1:8080','path': '/test.nc'}``). | |
178 | |
179 """ | |
180 if url.startswith("doi://"): | |
181 raise ValueError( | |
182 f"Invalid DOI link '{url}'. You must not use '//' after 'doi:'." | |
183 ) | |
184 if url.startswith("doi:"): | |
185 protocol = "doi" | |
186 parts = url[4:].split("/") | |
187 if "zenodo" in parts[1].lower(): | |
188 netloc = "/".join(parts[:2]) | |
189 path = "/" + "/".join(parts[2:]) | |
190 else: | |
191 netloc = "/".join(parts[:-1]) | |
192 path = "/" + parts[-1] | |
193 else: | |
194 parsed_url = urlsplit(url) | |
195 protocol = parsed_url.scheme or "file" | |
196 netloc = parsed_url.netloc | |
197 path = parsed_url.path | |
198 return {"protocol": protocol, "netloc": netloc, "path": path} | |
199 | |
200 | |
201 def cache_location(path, env=None, version=None): | |
202 """ | |
203 Location of the cache given a base path and optional configuration. | |
204 | |
205 Checks for the environment variable to overwrite the path of the local | |
206 cache. Optionally add *version* to the path if given. | |
207 | |
208 Parameters | |
209 ---------- | |
210 path : str, PathLike, list or tuple | |
211 The path to the local data storage folder. If this is a list or tuple, | |
212 we'll join the parts with the appropriate separator. Use | |
213 :func:`pooch.os_cache` for a sensible default. | |
214 version : str or None | |
215 The version string for your project. Will be appended to given path if | |
216 not None. | |
217 env : str or None | |
218 An environment variable that can be used to overwrite *path*. This | |
219 allows users to control where they want the data to be stored. We'll | |
220 append *version* to the end of this value as well. | |
221 | |
222 Returns | |
223 ------- | |
224 local_path : PathLike | |
225 The path to the local directory. | |
226 | |
227 """ | |
228 if env is not None and env in os.environ and os.environ[env]: | |
229 path = os.environ[env] | |
230 if isinstance(path, (list, tuple)): | |
231 path = os.path.join(*path) | |
232 if version is not None: | |
233 path = os.path.join(str(path), version) | |
234 path = os.path.expanduser(str(path)) | |
235 return Path(path) | |
236 | |
237 | |
238 def make_local_storage(path, env=None): | |
239 """ | |
240 Create the local cache directory and make sure it's writable. | |
241 | |
242 Parameters | |
243 ---------- | |
244 path : str or PathLike | |
245 The path to the local data storage folder. | |
246 env : str or None | |
247 An environment variable that can be used to overwrite *path*. Only used | |
248 in the error message in case the folder is not writable. | |
249 """ | |
250 path = str(path) | |
251 # Check that the data directory is writable | |
252 if not os.path.exists(path): | |
253 action = "create" | |
254 else: | |
255 action = "write to" | |
256 | |
257 try: | |
258 if action == "create": | |
259 # When running in parallel, it's possible that multiple jobs will | |
260 # try to create the path at the same time. Use exist_ok to avoid | |
261 # raising an error. | |
262 os.makedirs(path, exist_ok=True) | |
263 else: | |
264 with tempfile.NamedTemporaryFile(dir=path): | |
265 pass | |
266 except PermissionError as error: | |
267 message = [ | |
268 str(error), | |
269 f"| Pooch could not {action} data cache folder '{path}'.", | |
270 "Will not be able to download data files.", | |
271 ] | |
272 if env is not None: | |
273 message.append( | |
274 f"Use environment variable '{env}' to specify a different location." | |
275 ) | |
276 raise PermissionError(" ".join(message)) from error | |
277 | |
278 | |
279 @contextmanager | |
280 def temporary_file(path=None): | |
281 """ | |
282 Create a closed and named temporary file and make sure it's cleaned up. | |
283 | |
284 Using :class:`tempfile.NamedTemporaryFile` will fail on Windows if trying | |
285 to open the file a second time (when passing its name to Pooch function, | |
286 for example). This context manager creates the file, closes it, yields the | |
287 file path, and makes sure it's deleted in the end. | |
288 | |
289 Parameters | |
290 ---------- | |
291 path : str or PathLike | |
292 The directory in which the temporary file will be created. | |
293 | |
294 Yields | |
295 ------ | |
296 fname : str | |
297 The path to the temporary file. | |
298 | |
299 """ | |
300 tmp = tempfile.NamedTemporaryFile(delete=False, dir=path) | |
301 # Close the temp file so that it can be opened elsewhere | |
302 tmp.close() | |
303 try: | |
304 yield tmp.name | |
305 finally: | |
306 if os.path.exists(tmp.name): | |
307 os.remove(tmp.name) | |
308 | |
309 | |
310 def unique_file_name(url): | |
311 """ | |
312 Create a unique file name based on the given URL. | |
313 | |
314 The file name will be unique to the URL by prepending the name with the MD5 | |
315 hash (hex digest) of the URL. The name will also include the last portion | |
316 of the URL. | |
317 | |
318 The format will be: ``{md5}-{filename}.{ext}`` | |
319 | |
320 The file name will be cropped so that the entire name (including the hash) | |
321 is less than 255 characters long (the limit on most file systems). | |
322 | |
323 Parameters | |
324 ---------- | |
325 url : str | |
326 The URL with a file name at the end. | |
327 | |
328 Returns | |
329 ------- | |
330 fname : str | |
331 The file name, unique to this URL. | |
332 | |
333 Examples | |
334 -------- | |
335 | |
336 >>> print(unique_file_name("https://www.some-server.org/2020/data.txt")) | |
337 02ddee027ce5ebb3d7059fb23d210604-data.txt | |
338 >>> print(unique_file_name("https://www.some-server.org/2019/data.txt")) | |
339 9780092867b497fca6fc87d8308f1025-data.txt | |
340 >>> print(unique_file_name("https://www.some-server.org/2020/data.txt.gz")) | |
341 181a9d52e908219c2076f55145d6a344-data.txt.gz | |
342 | |
343 """ | |
344 md5 = hashlib.md5(url.encode()).hexdigest() | |
345 fname = parse_url(url)["path"].split("/")[-1] | |
346 # Crop the start of the file name to fit 255 characters including the hash | |
347 # and the : | |
348 fname = fname[-(255 - len(md5) - 1) :] | |
349 unique_name = f"{md5}-{fname}" | |
350 return unique_name |