Mercurial > repos > rliterman > csp2

diff CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pooch/hashes.py @ 69:33d812a61356
planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author: jpayne
date: Tue, 18 Mar 2025 17:55:14 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/pooch/hashes.py	Tue Mar 18 17:55:14 2025 -0400
@@ -0,0 +1,222 @@
+# Copyright (c) 2018 The Pooch Developers.
+# Distributed under the terms of the BSD 3-Clause License.
+# SPDX-License-Identifier: BSD-3-Clause
+#
+# This code is part of the Fatiando a Terra project (https://www.fatiando.org)
+#
+"""
+Calculating and checking file hashes.
+"""
+import hashlib
+import functools
+from pathlib import Path
+
+# From the docs: https://docs.python.org/3/library/hashlib.html#hashlib.new
+#   The named constructors are much faster than new() and should be
+#   preferred.
+# Need to fallback on new() for some algorithms.
+ALGORITHMS_AVAILABLE = {
+    alg: getattr(hashlib, alg, functools.partial(hashlib.new, alg))
+    for alg in hashlib.algorithms_available
+}
+
+try:
+    import xxhash
+
+    # xxhash doesn't have a list of available algorithms yet.
+    # https://github.com/ifduyue/python-xxhash/issues/48
+    ALGORITHMS_AVAILABLE.update(
+        {
+            alg: getattr(xxhash, alg, None)
+            for alg in ["xxh128", "xxh64", "xxh32", "xxh3_128", "xxh3_64"]
+        }
+    )
+    # The xxh3 algorithms are only available for version>=2.0. Set to None and
+    # remove to ensure backwards compatibility.
+    ALGORITHMS_AVAILABLE = {
+        alg: func for alg, func in ALGORITHMS_AVAILABLE.items() if func is not None
+    }
+except ImportError:
+    pass
+
+
+def file_hash(fname, alg="sha256"):
+    """
+    Calculate the hash of a given file.
+
+    Useful for checking if a file has changed or been corrupted.
+
+    Parameters
+    ----------
+    fname : str
+        The name of the file.
+    alg : str
+        The type of the hashing algorithm
+
+    Returns
+    -------
+    hash : str
+        The hash of the file.
+
+    Examples
+    --------
+
+    >>> fname = "test-file-for-hash.txt"
+    >>> with open(fname, "w") as f:
+    ...     __ = f.write("content of the file")
+    >>> print(file_hash(fname))
+    0fc74468e6a9a829f103d069aeb2bb4f8646bad58bf146bb0e3379b759ec4a00
+    >>> import os
+    >>> os.remove(fname)
+
+    """
+    if alg not in ALGORITHMS_AVAILABLE:
+        raise ValueError(
+            f"Algorithm '{alg}' not available to the pooch library. "
+            "Only the following algorithms are available "
+            f"{list(ALGORITHMS_AVAILABLE.keys())}."
+        )
+    # Calculate the hash in chunks to avoid overloading the memory
+    chunksize = 65536
+    hasher = ALGORITHMS_AVAILABLE[alg]()
+    with open(fname, "rb") as fin:
+        buff = fin.read(chunksize)
+        while buff:
+            hasher.update(buff)
+            buff = fin.read(chunksize)
+    return hasher.hexdigest()
+
+
+def hash_algorithm(hash_string):
+    """
+    Parse the name of the hash method from the hash string.
+
+    The hash string should have the following form ``algorithm:hash``, where
+    algorithm can be the name of any algorithm known to :mod:`hashlib`.
+
+    If the algorithm is omitted or the hash string is None, will default to
+    ``"sha256"``.
+
+    Parameters
+    ----------
+    hash_string : str
+        The hash string with optional algorithm prepended.
+
+    Returns
+    -------
+    hash_algorithm : str
+        The name of the algorithm.
+
+    Examples
+    --------
+
+    >>> print(hash_algorithm("qouuwhwd2j192y1lb1iwgowdj2898wd2d9"))
+    sha256
+    >>> print(hash_algorithm("md5:qouuwhwd2j192y1lb1iwgowdj2898wd2d9"))
+    md5
+    >>> print(hash_algorithm("sha256:qouuwhwd2j192y1lb1iwgowdj2898wd2d9"))
+    sha256
+    >>> print(hash_algorithm("SHA256:qouuwhwd2j192y1lb1iwgowdj2898wd2d9"))
+    sha256
+    >>> print(hash_algorithm("xxh3_64:qouuwhwd2j192y1lb1iwgowdj2898wd2d9"))
+    xxh3_64
+    >>> print(hash_algorithm(None))
+    sha256
+
+    """
+    default = "sha256"
+    if hash_string is None:
+        algorithm = default
+    elif ":" not in hash_string:
+        algorithm = default
+    else:
+        algorithm = hash_string.split(":")[0]
+    return algorithm.lower()
+
+
+def hash_matches(fname, known_hash, strict=False, source=None):
+    """
+    Check if the hash of a file matches a known hash.
+
+    If the *known_hash* is None, will always return True.
+
+    Coverts hashes to lowercase before comparison to avoid system specific
+    mismatches between hashes in the registry and computed hashes.
+
+    Parameters
+    ----------
+    fname : str or PathLike
+        The path to the file.
+    known_hash : str
+        The known hash. Optionally, prepend ``alg:`` to the hash to specify the
+        hashing algorithm. Default is SHA256.
+    strict : bool
+        If True, will raise a :class:`ValueError` if the hash does not match
+        informing the user that the file may be corrupted.
+    source : str
+        The source of the downloaded file (name or URL, for example). Will be
+        used in the error message if *strict* is True. Has no other use other
+        than reporting to the user where the file came from in case of hash
+        mismatch. If None, will default to *fname*.
+
+    Returns
+    -------
+    is_same : bool
+        True if the hash matches, False otherwise.
+
+    """
+    if known_hash is None:
+        return True
+    algorithm = hash_algorithm(known_hash)
+    new_hash = file_hash(fname, alg=algorithm)
+    matches = new_hash.lower() == known_hash.split(":")[-1].lower()
+    if strict and not matches:
+        if source is None:
+            source = str(fname)
+        raise ValueError(
+            f"{algorithm.upper()} hash of downloaded file ({source}) does not match"
+            f" the known hash: expected {known_hash} but got {new_hash}. Deleted"
+            " download for safety. The downloaded file may have been corrupted or"
+            " the known hash may be outdated."
+        )
+    return matches
+
+
+def make_registry(directory, output, recursive=True):
+    """
+    Make a registry of files and hashes for the given directory.
+
+    This is helpful if you have many files in your test dataset as it keeps you
+    from needing to manually update the registry.
+
+    Parameters
+    ----------
+    directory : str
+        Directory of the test data to put in the registry. All file names in
+        the registry will be relative to this directory.
+    output : str
+        Name of the output registry file.
+    recursive : bool
+        If True, will recursively look for files in subdirectories of
+        *directory*.
+
+    """
+    directory = Path(directory)
+    if recursive:
+        pattern = "**/*"
+    else:
+        pattern = "*"
+
+    files = sorted(
+        str(path.relative_to(directory))
+        for path in directory.glob(pattern)
+        if path.is_file()
+    )
+
+    hashes = [file_hash(str(directory / fname)) for fname in files]
+
+    with open(output, "w", encoding="utf-8") as outfile:
+        for fname, fhash in zip(files, hashes):
+            # Only use Unix separators for the registry so that we don't go
+            # insane dealing with file paths.
+            outfile.write("{} {}\n".format(fname.replace("\\", "/"), fhash))
author	jpayne
date	Tue, 18 Mar 2025 17:55:14 -0400
parents
children