Mercurial > repos > jpayne > bioproject_to_srr_2
diff urllib3/connectionpool.py @ 7:5eb2d5e3bf22
planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
author | jpayne |
---|---|
date | Sun, 05 May 2024 23:32:17 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/urllib3/connectionpool.py Sun May 05 23:32:17 2024 -0400 @@ -0,0 +1,1186 @@ +from __future__ import annotations + +import errno +import logging +import queue +import sys +import typing +import warnings +import weakref +from socket import timeout as SocketTimeout +from types import TracebackType + +from ._base_connection import _TYPE_BODY +from ._collections import HTTPHeaderDict +from ._request_methods import RequestMethods +from .connection import ( + BaseSSLError, + BrokenPipeError, + DummyConnection, + HTTPConnection, + HTTPException, + HTTPSConnection, + ProxyConfig, + _wrap_proxy_error, +) +from .connection import port_by_scheme as port_by_scheme +from .exceptions import ( + ClosedPoolError, + EmptyPoolError, + FullPoolError, + HostChangedError, + InsecureRequestWarning, + LocationValueError, + MaxRetryError, + NewConnectionError, + ProtocolError, + ProxyError, + ReadTimeoutError, + SSLError, + TimeoutError, +) +from .response import BaseHTTPResponse +from .util.connection import is_connection_dropped +from .util.proxy import connection_requires_http_tunnel +from .util.request import _TYPE_BODY_POSITION, set_file_position +from .util.retry import Retry +from .util.ssl_match_hostname import CertificateError +from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_DEFAULT, Timeout +from .util.url import Url, _encode_target +from .util.url import _normalize_host as normalize_host +from .util.url import parse_url +from .util.util import to_str + +if typing.TYPE_CHECKING: + import ssl + from typing import Literal + + from ._base_connection import BaseHTTPConnection, BaseHTTPSConnection + +log = logging.getLogger(__name__) + +_TYPE_TIMEOUT = typing.Union[Timeout, float, _TYPE_DEFAULT, None] + +_SelfT = typing.TypeVar("_SelfT") + + +# Pool objects +class ConnectionPool: + """ + Base class for all connection pools, such as + :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. + + .. note:: + ConnectionPool.urlopen() does not normalize or percent-encode target URIs + which is useful if your target server doesn't support percent-encoded + target URIs. + """ + + scheme: str | None = None + QueueCls = queue.LifoQueue + + def __init__(self, host: str, port: int | None = None) -> None: + if not host: + raise LocationValueError("No host specified.") + + self.host = _normalize_host(host, scheme=self.scheme) + self.port = port + + # This property uses 'normalize_host()' (not '_normalize_host()') + # to avoid removing square braces around IPv6 addresses. + # This value is sent to `HTTPConnection.set_tunnel()` if called + # because square braces are required for HTTP CONNECT tunneling. + self._tunnel_host = normalize_host(host, scheme=self.scheme).lower() + + def __str__(self) -> str: + return f"{type(self).__name__}(host={self.host!r}, port={self.port!r})" + + def __enter__(self: _SelfT) -> _SelfT: + return self + + def __exit__( + self, + exc_type: type[BaseException] | None, + exc_val: BaseException | None, + exc_tb: TracebackType | None, + ) -> Literal[False]: + self.close() + # Return False to re-raise any potential exceptions + return False + + def close(self) -> None: + """ + Close all pooled connections and disable the pool. + """ + + +# This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 +_blocking_errnos = {errno.EAGAIN, errno.EWOULDBLOCK} + + +class HTTPConnectionPool(ConnectionPool, RequestMethods): + """ + Thread-safe connection pool for one host. + + :param host: + Host used for this HTTP Connection (e.g. "localhost"), passed into + :class:`http.client.HTTPConnection`. + + :param port: + Port used for this HTTP Connection (None is equivalent to 80), passed + into :class:`http.client.HTTPConnection`. + + :param timeout: + Socket timeout in seconds for each individual connection. This can + be a float or integer, which sets the timeout for the HTTP request, + or an instance of :class:`urllib3.util.Timeout` which gives you more + fine-grained control over request timeouts. After the constructor has + been parsed, this is always a `urllib3.util.Timeout` object. + + :param maxsize: + Number of connections to save that can be reused. More than 1 is useful + in multithreaded situations. If ``block`` is set to False, more + connections will be created but they will not be saved once they've + been used. + + :param block: + If set to True, no more than ``maxsize`` connections will be used at + a time. When no free connections are available, the call will block + until a connection has been released. This is a useful side effect for + particular multithreaded situations where one does not want to use more + than maxsize connections per host to prevent flooding. + + :param headers: + Headers to include with all requests, unless other headers are given + explicitly. + + :param retries: + Retry configuration to use by default with requests in this pool. + + :param _proxy: + Parsed proxy URL, should not be used directly, instead, see + :class:`urllib3.ProxyManager` + + :param _proxy_headers: + A dictionary with proxy headers, should not be used directly, + instead, see :class:`urllib3.ProxyManager` + + :param \\**conn_kw: + Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, + :class:`urllib3.connection.HTTPSConnection` instances. + """ + + scheme = "http" + ConnectionCls: ( + type[BaseHTTPConnection] | type[BaseHTTPSConnection] + ) = HTTPConnection + + def __init__( + self, + host: str, + port: int | None = None, + timeout: _TYPE_TIMEOUT | None = _DEFAULT_TIMEOUT, + maxsize: int = 1, + block: bool = False, + headers: typing.Mapping[str, str] | None = None, + retries: Retry | bool | int | None = None, + _proxy: Url | None = None, + _proxy_headers: typing.Mapping[str, str] | None = None, + _proxy_config: ProxyConfig | None = None, + **conn_kw: typing.Any, + ): + ConnectionPool.__init__(self, host, port) + RequestMethods.__init__(self, headers) + + if not isinstance(timeout, Timeout): + timeout = Timeout.from_float(timeout) + + if retries is None: + retries = Retry.DEFAULT + + self.timeout = timeout + self.retries = retries + + self.pool: queue.LifoQueue[typing.Any] | None = self.QueueCls(maxsize) + self.block = block + + self.proxy = _proxy + self.proxy_headers = _proxy_headers or {} + self.proxy_config = _proxy_config + + # Fill the queue up so that doing get() on it will block properly + for _ in range(maxsize): + self.pool.put(None) + + # These are mostly for testing and debugging purposes. + self.num_connections = 0 + self.num_requests = 0 + self.conn_kw = conn_kw + + if self.proxy: + # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. + # We cannot know if the user has added default socket options, so we cannot replace the + # list. + self.conn_kw.setdefault("socket_options", []) + + self.conn_kw["proxy"] = self.proxy + self.conn_kw["proxy_config"] = self.proxy_config + + # Do not pass 'self' as callback to 'finalize'. + # Then the 'finalize' would keep an endless living (leak) to self. + # By just passing a reference to the pool allows the garbage collector + # to free self if nobody else has a reference to it. + pool = self.pool + + # Close all the HTTPConnections in the pool before the + # HTTPConnectionPool object is garbage collected. + weakref.finalize(self, _close_pool_connections, pool) + + def _new_conn(self) -> BaseHTTPConnection: + """ + Return a fresh :class:`HTTPConnection`. + """ + self.num_connections += 1 + log.debug( + "Starting new HTTP connection (%d): %s:%s", + self.num_connections, + self.host, + self.port or "80", + ) + + conn = self.ConnectionCls( + host=self.host, + port=self.port, + timeout=self.timeout.connect_timeout, + **self.conn_kw, + ) + return conn + + def _get_conn(self, timeout: float | None = None) -> BaseHTTPConnection: + """ + Get a connection. Will return a pooled connection if one is available. + + If no connections are available and :prop:`.block` is ``False``, then a + fresh connection is returned. + + :param timeout: + Seconds to wait before giving up and raising + :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and + :prop:`.block` is ``True``. + """ + conn = None + + if self.pool is None: + raise ClosedPoolError(self, "Pool is closed.") + + try: + conn = self.pool.get(block=self.block, timeout=timeout) + + except AttributeError: # self.pool is None + raise ClosedPoolError(self, "Pool is closed.") from None # Defensive: + + except queue.Empty: + if self.block: + raise EmptyPoolError( + self, + "Pool is empty and a new connection can't be opened due to blocking mode.", + ) from None + pass # Oh well, we'll create a new connection then + + # If this is a persistent connection, check if it got disconnected + if conn and is_connection_dropped(conn): + log.debug("Resetting dropped connection: %s", self.host) + conn.close() + + return conn or self._new_conn() + + def _put_conn(self, conn: BaseHTTPConnection | None) -> None: + """ + Put a connection back into the pool. + + :param conn: + Connection object for the current host and port as returned by + :meth:`._new_conn` or :meth:`._get_conn`. + + If the pool is already full, the connection is closed and discarded + because we exceeded maxsize. If connections are discarded frequently, + then maxsize should be increased. + + If the pool is closed, then the connection will be closed and discarded. + """ + if self.pool is not None: + try: + self.pool.put(conn, block=False) + return # Everything is dandy, done. + except AttributeError: + # self.pool is None. + pass + except queue.Full: + # Connection never got put back into the pool, close it. + if conn: + conn.close() + + if self.block: + # This should never happen if you got the conn from self._get_conn + raise FullPoolError( + self, + "Pool reached maximum size and no more connections are allowed.", + ) from None + + log.warning( + "Connection pool is full, discarding connection: %s. Connection pool size: %s", + self.host, + self.pool.qsize(), + ) + + # Connection never got put back into the pool, close it. + if conn: + conn.close() + + def _validate_conn(self, conn: BaseHTTPConnection) -> None: + """ + Called right before a request is made, after the socket is created. + """ + + def _prepare_proxy(self, conn: BaseHTTPConnection) -> None: + # Nothing to do for HTTP connections. + pass + + def _get_timeout(self, timeout: _TYPE_TIMEOUT) -> Timeout: + """Helper that always returns a :class:`urllib3.util.Timeout`""" + if timeout is _DEFAULT_TIMEOUT: + return self.timeout.clone() + + if isinstance(timeout, Timeout): + return timeout.clone() + else: + # User passed us an int/float. This is for backwards compatibility, + # can be removed later + return Timeout.from_float(timeout) + + def _raise_timeout( + self, + err: BaseSSLError | OSError | SocketTimeout, + url: str, + timeout_value: _TYPE_TIMEOUT | None, + ) -> None: + """Is the error actually a timeout? Will raise a ReadTimeout or pass""" + + if isinstance(err, SocketTimeout): + raise ReadTimeoutError( + self, url, f"Read timed out. (read timeout={timeout_value})" + ) from err + + # See the above comment about EAGAIN in Python 3. + if hasattr(err, "errno") and err.errno in _blocking_errnos: + raise ReadTimeoutError( + self, url, f"Read timed out. (read timeout={timeout_value})" + ) from err + + def _make_request( + self, + conn: BaseHTTPConnection, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + retries: Retry | None = None, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + chunked: bool = False, + response_conn: BaseHTTPConnection | None = None, + preload_content: bool = True, + decode_content: bool = True, + enforce_content_length: bool = True, + ) -> BaseHTTPResponse: + """ + Perform a request on a given urllib connection object taken from our + pool. + + :param conn: + a connection from one of our connection pools + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param body: + Data to send in the request body, either :class:`str`, :class:`bytes`, + an iterable of :class:`str`/:class:`bytes`, or a file-like object. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + Pass ``None`` to retry until you receive a response. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. + + :param timeout: + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. + + :param chunked: + If True, urllib3 will send the body using chunked transfer + encoding. Otherwise, urllib3 will send the body using the standard + content-length form. Defaults to False. + + :param response_conn: + Set this to ``None`` if you will handle releasing the connection or + set the connection to have the response release it. + + :param preload_content: + If True, the response's body will be preloaded during construction. + + :param decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param enforce_content_length: + Enforce content length checking. Body returned by server must match + value of Content-Length header, if present. Otherwise, raise error. + """ + self.num_requests += 1 + + timeout_obj = self._get_timeout(timeout) + timeout_obj.start_connect() + conn.timeout = Timeout.resolve_default_timeout(timeout_obj.connect_timeout) + + try: + # Trigger any extra validation we need to do. + try: + self._validate_conn(conn) + except (SocketTimeout, BaseSSLError) as e: + self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) + raise + + # _validate_conn() starts the connection to an HTTPS proxy + # so we need to wrap errors with 'ProxyError' here too. + except ( + OSError, + NewConnectionError, + TimeoutError, + BaseSSLError, + CertificateError, + SSLError, + ) as e: + new_e: Exception = e + if isinstance(e, (BaseSSLError, CertificateError)): + new_e = SSLError(e) + # If the connection didn't successfully connect to it's proxy + # then there + if isinstance( + new_e, (OSError, NewConnectionError, TimeoutError, SSLError) + ) and (conn and conn.proxy and not conn.has_connected_to_proxy): + new_e = _wrap_proxy_error(new_e, conn.proxy.scheme) + raise new_e + + # conn.request() calls http.client.*.request, not the method in + # urllib3.request. It also calls makefile (recv) on the socket. + try: + conn.request( + method, + url, + body=body, + headers=headers, + chunked=chunked, + preload_content=preload_content, + decode_content=decode_content, + enforce_content_length=enforce_content_length, + ) + + # We are swallowing BrokenPipeError (errno.EPIPE) since the server is + # legitimately able to close the connection after sending a valid response. + # With this behaviour, the received response is still readable. + except BrokenPipeError: + pass + except OSError as e: + # MacOS/Linux + # EPROTOTYPE and ECONNRESET are needed on macOS + # https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/ + # Condition changed later to emit ECONNRESET instead of only EPROTOTYPE. + if e.errno != errno.EPROTOTYPE and e.errno != errno.ECONNRESET: + raise + + # Reset the timeout for the recv() on the socket + read_timeout = timeout_obj.read_timeout + + if not conn.is_closed: + # In Python 3 socket.py will catch EAGAIN and return None when you + # try and read into the file pointer created by http.client, which + # instead raises a BadStatusLine exception. Instead of catching + # the exception and assuming all BadStatusLine exceptions are read + # timeouts, check for a zero timeout before making the request. + if read_timeout == 0: + raise ReadTimeoutError( + self, url, f"Read timed out. (read timeout={read_timeout})" + ) + conn.timeout = read_timeout + + # Receive the response from the server + try: + response = conn.getresponse() + except (BaseSSLError, OSError) as e: + self._raise_timeout(err=e, url=url, timeout_value=read_timeout) + raise + + # Set properties that are used by the pooling layer. + response.retries = retries + response._connection = response_conn # type: ignore[attr-defined] + response._pool = self # type: ignore[attr-defined] + + # emscripten connection doesn't have _http_vsn_str + http_version = getattr(conn, "_http_vsn_str", "HTTP/?") + log.debug( + '%s://%s:%s "%s %s %s" %s %s', + self.scheme, + self.host, + self.port, + method, + url, + # HTTP version + http_version, + response.status, + response.length_remaining, + ) + + return response + + def close(self) -> None: + """ + Close all pooled connections and disable the pool. + """ + if self.pool is None: + return + # Disable access to the pool + old_pool, self.pool = self.pool, None + + # Close all the HTTPConnections in the pool. + _close_pool_connections(old_pool) + + def is_same_host(self, url: str) -> bool: + """ + Check if the given ``url`` is a member of the same host as this + connection pool. + """ + if url.startswith("/"): + return True + + # TODO: Add optional support for socket.gethostbyname checking. + scheme, _, host, port, *_ = parse_url(url) + scheme = scheme or "http" + if host is not None: + host = _normalize_host(host, scheme=scheme) + + # Use explicit default port for comparison when none is given + if self.port and not port: + port = port_by_scheme.get(scheme) + elif not self.port and port == port_by_scheme.get(scheme): + port = None + + return (scheme, host, port) == (self.scheme, self.host, self.port) + + def urlopen( # type: ignore[override] + self, + method: str, + url: str, + body: _TYPE_BODY | None = None, + headers: typing.Mapping[str, str] | None = None, + retries: Retry | bool | int | None = None, + redirect: bool = True, + assert_same_host: bool = True, + timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, + pool_timeout: int | None = None, + release_conn: bool | None = None, + chunked: bool = False, + body_pos: _TYPE_BODY_POSITION | None = None, + preload_content: bool = True, + decode_content: bool = True, + **response_kw: typing.Any, + ) -> BaseHTTPResponse: + """ + Get a connection from the pool and perform an HTTP request. This is the + lowest level call for making a request, so you'll need to specify all + the raw details. + + .. note:: + + More commonly, it's appropriate to use a convenience method + such as :meth:`request`. + + .. note:: + + `release_conn` will only behave as expected if + `preload_content=False` because we want to make + `preload_content=False` the default behaviour someday soon without + breaking backwards compatibility. + + :param method: + HTTP request method (such as GET, POST, PUT, etc.) + + :param url: + The URL to perform the request on. + + :param body: + Data to send in the request body, either :class:`str`, :class:`bytes`, + an iterable of :class:`str`/:class:`bytes`, or a file-like object. + + :param headers: + Dictionary of custom headers to send, such as User-Agent, + If-None-Match, etc. If None, pool headers are used. If provided, + these headers completely replace any pool-specific headers. + + :param retries: + Configure the number of retries to allow before raising a + :class:`~urllib3.exceptions.MaxRetryError` exception. + + If ``None`` (default) will retry 3 times, see ``Retry.DEFAULT``. Pass a + :class:`~urllib3.util.retry.Retry` object for fine-grained control + over different types of retries. + Pass an integer number to retry connection errors that many times, + but no other types of errors. Pass zero to never retry. + + If ``False``, then retries are disabled and any exception is raised + immediately. Also, instead of raising a MaxRetryError on redirects, + the redirect response will be returned. + + :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. + + :param redirect: + If True, automatically handle redirects (status codes 301, 302, + 303, 307, 308). Each redirect counts as a retry. Disabling retries + will disable redirect, too. + + :param assert_same_host: + If ``True``, will make sure that the host of the pool requests is + consistent else will raise HostChangedError. When ``False``, you can + use the pool on an HTTP proxy and request foreign hosts. + + :param timeout: + If specified, overrides the default timeout for this one + request. It may be a float (in seconds) or an instance of + :class:`urllib3.util.Timeout`. + + :param pool_timeout: + If set and the pool is set to block=True, then this method will + block for ``pool_timeout`` seconds and raise EmptyPoolError if no + connection is available within the time period. + + :param bool preload_content: + If True, the response's body will be preloaded into memory. + + :param bool decode_content: + If True, will attempt to decode the body based on the + 'content-encoding' header. + + :param release_conn: + If False, then the urlopen call will not release the connection + back into the pool once a response is received (but will release if + you read the entire contents of the response such as when + `preload_content=True`). This is useful if you're not preloading + the response's content immediately. You will need to call + ``r.release_conn()`` on the response ``r`` to return the connection + back into the pool. If None, it takes the value of ``preload_content`` + which defaults to ``True``. + + :param bool chunked: + If True, urllib3 will send the body using chunked transfer + encoding. Otherwise, urllib3 will send the body using the standard + content-length form. Defaults to False. + + :param int body_pos: + Position to seek to in file-like body in the event of a retry or + redirect. Typically this won't need to be set because urllib3 will + auto-populate the value when needed. + """ + parsed_url = parse_url(url) + destination_scheme = parsed_url.scheme + + if headers is None: + headers = self.headers + + if not isinstance(retries, Retry): + retries = Retry.from_int(retries, redirect=redirect, default=self.retries) + + if release_conn is None: + release_conn = preload_content + + # Check host + if assert_same_host and not self.is_same_host(url): + raise HostChangedError(self, url, retries) + + # Ensure that the URL we're connecting to is properly encoded + if url.startswith("/"): + url = to_str(_encode_target(url)) + else: + url = to_str(parsed_url.url) + + conn = None + + # Track whether `conn` needs to be released before + # returning/raising/recursing. Update this variable if necessary, and + # leave `release_conn` constant throughout the function. That way, if + # the function recurses, the original value of `release_conn` will be + # passed down into the recursive call, and its value will be respected. + # + # See issue #651 [1] for details. + # + # [1] <https://github.com/urllib3/urllib3/issues/651> + release_this_conn = release_conn + + http_tunnel_required = connection_requires_http_tunnel( + self.proxy, self.proxy_config, destination_scheme + ) + + # Merge the proxy headers. Only done when not using HTTP CONNECT. We + # have to copy the headers dict so we can safely change it without those + # changes being reflected in anyone else's copy. + if not http_tunnel_required: + headers = headers.copy() # type: ignore[attr-defined] + headers.update(self.proxy_headers) # type: ignore[union-attr] + + # Must keep the exception bound to a separate variable or else Python 3 + # complains about UnboundLocalError. + err = None + + # Keep track of whether we cleanly exited the except block. This + # ensures we do proper cleanup in finally. + clean_exit = False + + # Rewind body position, if needed. Record current position + # for future rewinds in the event of a redirect/retry. + body_pos = set_file_position(body, body_pos) + + try: + # Request a connection from the queue. + timeout_obj = self._get_timeout(timeout) + conn = self._get_conn(timeout=pool_timeout) + + conn.timeout = timeout_obj.connect_timeout # type: ignore[assignment] + + # Is this a closed/new connection that requires CONNECT tunnelling? + if self.proxy is not None and http_tunnel_required and conn.is_closed: + try: + self._prepare_proxy(conn) + except (BaseSSLError, OSError, SocketTimeout) as e: + self._raise_timeout( + err=e, url=self.proxy.url, timeout_value=conn.timeout + ) + raise + + # If we're going to release the connection in ``finally:``, then + # the response doesn't need to know about the connection. Otherwise + # it will also try to release it and we'll have a double-release + # mess. + response_conn = conn if not release_conn else None + + # Make the request on the HTTPConnection object + response = self._make_request( + conn, + method, + url, + timeout=timeout_obj, + body=body, + headers=headers, + chunked=chunked, + retries=retries, + response_conn=response_conn, + preload_content=preload_content, + decode_content=decode_content, + **response_kw, + ) + + # Everything went great! + clean_exit = True + + except EmptyPoolError: + # Didn't get a connection from the pool, no need to clean up + clean_exit = True + release_this_conn = False + raise + + except ( + TimeoutError, + HTTPException, + OSError, + ProtocolError, + BaseSSLError, + SSLError, + CertificateError, + ProxyError, + ) as e: + # Discard the connection for these exceptions. It will be + # replaced during the next _get_conn() call. + clean_exit = False + new_e: Exception = e + if isinstance(e, (BaseSSLError, CertificateError)): + new_e = SSLError(e) + if isinstance( + new_e, + ( + OSError, + NewConnectionError, + TimeoutError, + SSLError, + HTTPException, + ), + ) and (conn and conn.proxy and not conn.has_connected_to_proxy): + new_e = _wrap_proxy_error(new_e, conn.proxy.scheme) + elif isinstance(new_e, (OSError, HTTPException)): + new_e = ProtocolError("Connection aborted.", new_e) + + retries = retries.increment( + method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2] + ) + retries.sleep() + + # Keep track of the error for the retry warning. + err = e + + finally: + if not clean_exit: + # We hit some kind of exception, handled or otherwise. We need + # to throw the connection away unless explicitly told not to. + # Close the connection, set the variable to None, and make sure + # we put the None back in the pool to avoid leaking it. + if conn: + conn.close() + conn = None + release_this_conn = True + + if release_this_conn: + # Put the connection back to be reused. If the connection is + # expired then it will be None, which will get replaced with a + # fresh connection during _get_conn. + self._put_conn(conn) + + if not conn: + # Try again + log.warning( + "Retrying (%r) after connection broken by '%r': %s", retries, err, url + ) + return self.urlopen( + method, + url, + body, + headers, + retries, + redirect, + assert_same_host, + timeout=timeout, + pool_timeout=pool_timeout, + release_conn=release_conn, + chunked=chunked, + body_pos=body_pos, + preload_content=preload_content, + decode_content=decode_content, + **response_kw, + ) + + # Handle redirect? + redirect_location = redirect and response.get_redirect_location() + if redirect_location: + if response.status == 303: + # Change the method according to RFC 9110, Section 15.4.4. + method = "GET" + # And lose the body not to transfer anything sensitive. + body = None + headers = HTTPHeaderDict(headers)._prepare_for_method_change() + + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_redirect: + response.drain_conn() + raise + return response + + response.drain_conn() + retries.sleep_for_retry(response) + log.debug("Redirecting %s -> %s", url, redirect_location) + return self.urlopen( + method, + redirect_location, + body, + headers, + retries=retries, + redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, + pool_timeout=pool_timeout, + release_conn=release_conn, + chunked=chunked, + body_pos=body_pos, + preload_content=preload_content, + decode_content=decode_content, + **response_kw, + ) + + # Check if we should retry the HTTP response. + has_retry_after = bool(response.headers.get("Retry-After")) + if retries.is_retry(method, response.status, has_retry_after): + try: + retries = retries.increment(method, url, response=response, _pool=self) + except MaxRetryError: + if retries.raise_on_status: + response.drain_conn() + raise + return response + + response.drain_conn() + retries.sleep(response) + log.debug("Retry: %s", url) + return self.urlopen( + method, + url, + body, + headers, + retries=retries, + redirect=redirect, + assert_same_host=assert_same_host, + timeout=timeout, + pool_timeout=pool_timeout, + release_conn=release_conn, + chunked=chunked, + body_pos=body_pos, + preload_content=preload_content, + decode_content=decode_content, + **response_kw, + ) + + return response + + +class HTTPSConnectionPool(HTTPConnectionPool): + """ + Same as :class:`.HTTPConnectionPool`, but HTTPS. + + :class:`.HTTPSConnection` uses one of ``assert_fingerprint``, + ``assert_hostname`` and ``host`` in this order to verify connections. + If ``assert_hostname`` is False, no verification is done. + + The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``, + ``ca_cert_dir``, ``ssl_version``, ``key_password`` are only used if :mod:`ssl` + is available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade + the connection socket into an SSL socket. + """ + + scheme = "https" + ConnectionCls: type[BaseHTTPSConnection] = HTTPSConnection + + def __init__( + self, + host: str, + port: int | None = None, + timeout: _TYPE_TIMEOUT | None = _DEFAULT_TIMEOUT, + maxsize: int = 1, + block: bool = False, + headers: typing.Mapping[str, str] | None = None, + retries: Retry | bool | int | None = None, + _proxy: Url | None = None, + _proxy_headers: typing.Mapping[str, str] | None = None, + key_file: str | None = None, + cert_file: str | None = None, + cert_reqs: int | str | None = None, + key_password: str | None = None, + ca_certs: str | None = None, + ssl_version: int | str | None = None, + ssl_minimum_version: ssl.TLSVersion | None = None, + ssl_maximum_version: ssl.TLSVersion | None = None, + assert_hostname: str | Literal[False] | None = None, + assert_fingerprint: str | None = None, + ca_cert_dir: str | None = None, + **conn_kw: typing.Any, + ) -> None: + super().__init__( + host, + port, + timeout, + maxsize, + block, + headers, + retries, + _proxy, + _proxy_headers, + **conn_kw, + ) + + self.key_file = key_file + self.cert_file = cert_file + self.cert_reqs = cert_reqs + self.key_password = key_password + self.ca_certs = ca_certs + self.ca_cert_dir = ca_cert_dir + self.ssl_version = ssl_version + self.ssl_minimum_version = ssl_minimum_version + self.ssl_maximum_version = ssl_maximum_version + self.assert_hostname = assert_hostname + self.assert_fingerprint = assert_fingerprint + + def _prepare_proxy(self, conn: HTTPSConnection) -> None: # type: ignore[override] + """Establishes a tunnel connection through HTTP CONNECT.""" + if self.proxy and self.proxy.scheme == "https": + tunnel_scheme = "https" + else: + tunnel_scheme = "http" + + conn.set_tunnel( + scheme=tunnel_scheme, + host=self._tunnel_host, + port=self.port, + headers=self.proxy_headers, + ) + conn.connect() + + def _new_conn(self) -> BaseHTTPSConnection: + """ + Return a fresh :class:`urllib3.connection.HTTPConnection`. + """ + self.num_connections += 1 + log.debug( + "Starting new HTTPS connection (%d): %s:%s", + self.num_connections, + self.host, + self.port or "443", + ) + + if not self.ConnectionCls or self.ConnectionCls is DummyConnection: # type: ignore[comparison-overlap] + raise ImportError( + "Can't connect to HTTPS URL because the SSL module is not available." + ) + + actual_host: str = self.host + actual_port = self.port + if self.proxy is not None and self.proxy.host is not None: + actual_host = self.proxy.host + actual_port = self.proxy.port + + return self.ConnectionCls( + host=actual_host, + port=actual_port, + timeout=self.timeout.connect_timeout, + cert_file=self.cert_file, + key_file=self.key_file, + key_password=self.key_password, + cert_reqs=self.cert_reqs, + ca_certs=self.ca_certs, + ca_cert_dir=self.ca_cert_dir, + assert_hostname=self.assert_hostname, + assert_fingerprint=self.assert_fingerprint, + ssl_version=self.ssl_version, + ssl_minimum_version=self.ssl_minimum_version, + ssl_maximum_version=self.ssl_maximum_version, + **self.conn_kw, + ) + + def _validate_conn(self, conn: BaseHTTPConnection) -> None: + """ + Called right before a request is made, after the socket is created. + """ + super()._validate_conn(conn) + + # Force connect early to allow us to validate the connection. + if conn.is_closed: + conn.connect() + + # TODO revise this, see https://github.com/urllib3/urllib3/issues/2791 + if not conn.is_verified and not conn.proxy_is_verified: + warnings.warn( + ( + f"Unverified HTTPS request is being made to host '{conn.host}'. " + "Adding certificate verification is strongly advised. See: " + "https://urllib3.readthedocs.io/en/latest/advanced-usage.html" + "#tls-warnings" + ), + InsecureRequestWarning, + ) + + +def connection_from_url(url: str, **kw: typing.Any) -> HTTPConnectionPool: + """ + Given a url, return an :class:`.ConnectionPool` instance of its host. + + This is a shortcut for not having to parse out the scheme, host, and port + of the url before creating an :class:`.ConnectionPool` instance. + + :param url: + Absolute URL string that must include the scheme. Port is optional. + + :param \\**kw: + Passes additional parameters to the constructor of the appropriate + :class:`.ConnectionPool`. Useful for specifying things like + timeout, maxsize, headers, etc. + + Example:: + + >>> conn = connection_from_url('http://google.com/') + >>> r = conn.request('GET', '/') + """ + scheme, _, host, port, *_ = parse_url(url) + scheme = scheme or "http" + port = port or port_by_scheme.get(scheme, 80) + if scheme == "https": + return HTTPSConnectionPool(host, port=port, **kw) # type: ignore[arg-type] + else: + return HTTPConnectionPool(host, port=port, **kw) # type: ignore[arg-type] + + +@typing.overload +def _normalize_host(host: None, scheme: str | None) -> None: + ... + + +@typing.overload +def _normalize_host(host: str, scheme: str | None) -> str: + ... + + +def _normalize_host(host: str | None, scheme: str | None) -> str | None: + """ + Normalize hosts for comparisons and use with sockets. + """ + + host = normalize_host(host, scheme) + + # httplib doesn't like it when we include brackets in IPv6 addresses + # Specifically, if we include brackets but also pass the port then + # httplib crazily doubles up the square brackets on the Host header. + # Instead, we need to make sure we never pass ``None`` as the port. + # However, for backward compatibility reasons we can't actually + # *assert* that. See http://bugs.python.org/issue28539 + if host and host.startswith("[") and host.endswith("]"): + host = host[1:-1] + return host + + +def _url_from_pool( + pool: HTTPConnectionPool | HTTPSConnectionPool, path: str | None = None +) -> str: + """Returns the URL from a given connection pool. This is mainly used for testing and logging.""" + return Url(scheme=pool.scheme, host=pool.host, port=pool.port, path=path).url + + +def _close_pool_connections(pool: queue.LifoQueue[typing.Any]) -> None: + """Drains a queue of connections and closes each one.""" + try: + while True: + conn = pool.get(block=False) + if conn: + conn.close() + except queue.Empty: + pass # Done.