comparison urllib3/connectionpool.py @ 7:5eb2d5e3bf22

planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
author jpayne
date Sun, 05 May 2024 23:32:17 -0400
parents
children
comparison
equal deleted inserted replaced
6:b2745907b1eb 7:5eb2d5e3bf22
1 from __future__ import annotations
2
3 import errno
4 import logging
5 import queue
6 import sys
7 import typing
8 import warnings
9 import weakref
10 from socket import timeout as SocketTimeout
11 from types import TracebackType
12
13 from ._base_connection import _TYPE_BODY
14 from ._collections import HTTPHeaderDict
15 from ._request_methods import RequestMethods
16 from .connection import (
17 BaseSSLError,
18 BrokenPipeError,
19 DummyConnection,
20 HTTPConnection,
21 HTTPException,
22 HTTPSConnection,
23 ProxyConfig,
24 _wrap_proxy_error,
25 )
26 from .connection import port_by_scheme as port_by_scheme
27 from .exceptions import (
28 ClosedPoolError,
29 EmptyPoolError,
30 FullPoolError,
31 HostChangedError,
32 InsecureRequestWarning,
33 LocationValueError,
34 MaxRetryError,
35 NewConnectionError,
36 ProtocolError,
37 ProxyError,
38 ReadTimeoutError,
39 SSLError,
40 TimeoutError,
41 )
42 from .response import BaseHTTPResponse
43 from .util.connection import is_connection_dropped
44 from .util.proxy import connection_requires_http_tunnel
45 from .util.request import _TYPE_BODY_POSITION, set_file_position
46 from .util.retry import Retry
47 from .util.ssl_match_hostname import CertificateError
48 from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_DEFAULT, Timeout
49 from .util.url import Url, _encode_target
50 from .util.url import _normalize_host as normalize_host
51 from .util.url import parse_url
52 from .util.util import to_str
53
54 if typing.TYPE_CHECKING:
55 import ssl
56 from typing import Literal
57
58 from ._base_connection import BaseHTTPConnection, BaseHTTPSConnection
59
60 log = logging.getLogger(__name__)
61
62 _TYPE_TIMEOUT = typing.Union[Timeout, float, _TYPE_DEFAULT, None]
63
64 _SelfT = typing.TypeVar("_SelfT")
65
66
67 # Pool objects
68 class ConnectionPool:
69 """
70 Base class for all connection pools, such as
71 :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`.
72
73 .. note::
74 ConnectionPool.urlopen() does not normalize or percent-encode target URIs
75 which is useful if your target server doesn't support percent-encoded
76 target URIs.
77 """
78
79 scheme: str | None = None
80 QueueCls = queue.LifoQueue
81
82 def __init__(self, host: str, port: int | None = None) -> None:
83 if not host:
84 raise LocationValueError("No host specified.")
85
86 self.host = _normalize_host(host, scheme=self.scheme)
87 self.port = port
88
89 # This property uses 'normalize_host()' (not '_normalize_host()')
90 # to avoid removing square braces around IPv6 addresses.
91 # This value is sent to `HTTPConnection.set_tunnel()` if called
92 # because square braces are required for HTTP CONNECT tunneling.
93 self._tunnel_host = normalize_host(host, scheme=self.scheme).lower()
94
95 def __str__(self) -> str:
96 return f"{type(self).__name__}(host={self.host!r}, port={self.port!r})"
97
98 def __enter__(self: _SelfT) -> _SelfT:
99 return self
100
101 def __exit__(
102 self,
103 exc_type: type[BaseException] | None,
104 exc_val: BaseException | None,
105 exc_tb: TracebackType | None,
106 ) -> Literal[False]:
107 self.close()
108 # Return False to re-raise any potential exceptions
109 return False
110
111 def close(self) -> None:
112 """
113 Close all pooled connections and disable the pool.
114 """
115
116
117 # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252
118 _blocking_errnos = {errno.EAGAIN, errno.EWOULDBLOCK}
119
120
121 class HTTPConnectionPool(ConnectionPool, RequestMethods):
122 """
123 Thread-safe connection pool for one host.
124
125 :param host:
126 Host used for this HTTP Connection (e.g. "localhost"), passed into
127 :class:`http.client.HTTPConnection`.
128
129 :param port:
130 Port used for this HTTP Connection (None is equivalent to 80), passed
131 into :class:`http.client.HTTPConnection`.
132
133 :param timeout:
134 Socket timeout in seconds for each individual connection. This can
135 be a float or integer, which sets the timeout for the HTTP request,
136 or an instance of :class:`urllib3.util.Timeout` which gives you more
137 fine-grained control over request timeouts. After the constructor has
138 been parsed, this is always a `urllib3.util.Timeout` object.
139
140 :param maxsize:
141 Number of connections to save that can be reused. More than 1 is useful
142 in multithreaded situations. If ``block`` is set to False, more
143 connections will be created but they will not be saved once they've
144 been used.
145
146 :param block:
147 If set to True, no more than ``maxsize`` connections will be used at
148 a time. When no free connections are available, the call will block
149 until a connection has been released. This is a useful side effect for
150 particular multithreaded situations where one does not want to use more
151 than maxsize connections per host to prevent flooding.
152
153 :param headers:
154 Headers to include with all requests, unless other headers are given
155 explicitly.
156
157 :param retries:
158 Retry configuration to use by default with requests in this pool.
159
160 :param _proxy:
161 Parsed proxy URL, should not be used directly, instead, see
162 :class:`urllib3.ProxyManager`
163
164 :param _proxy_headers:
165 A dictionary with proxy headers, should not be used directly,
166 instead, see :class:`urllib3.ProxyManager`
167
168 :param \\**conn_kw:
169 Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`,
170 :class:`urllib3.connection.HTTPSConnection` instances.
171 """
172
173 scheme = "http"
174 ConnectionCls: (
175 type[BaseHTTPConnection] | type[BaseHTTPSConnection]
176 ) = HTTPConnection
177
178 def __init__(
179 self,
180 host: str,
181 port: int | None = None,
182 timeout: _TYPE_TIMEOUT | None = _DEFAULT_TIMEOUT,
183 maxsize: int = 1,
184 block: bool = False,
185 headers: typing.Mapping[str, str] | None = None,
186 retries: Retry | bool | int | None = None,
187 _proxy: Url | None = None,
188 _proxy_headers: typing.Mapping[str, str] | None = None,
189 _proxy_config: ProxyConfig | None = None,
190 **conn_kw: typing.Any,
191 ):
192 ConnectionPool.__init__(self, host, port)
193 RequestMethods.__init__(self, headers)
194
195 if not isinstance(timeout, Timeout):
196 timeout = Timeout.from_float(timeout)
197
198 if retries is None:
199 retries = Retry.DEFAULT
200
201 self.timeout = timeout
202 self.retries = retries
203
204 self.pool: queue.LifoQueue[typing.Any] | None = self.QueueCls(maxsize)
205 self.block = block
206
207 self.proxy = _proxy
208 self.proxy_headers = _proxy_headers or {}
209 self.proxy_config = _proxy_config
210
211 # Fill the queue up so that doing get() on it will block properly
212 for _ in range(maxsize):
213 self.pool.put(None)
214
215 # These are mostly for testing and debugging purposes.
216 self.num_connections = 0
217 self.num_requests = 0
218 self.conn_kw = conn_kw
219
220 if self.proxy:
221 # Enable Nagle's algorithm for proxies, to avoid packet fragmentation.
222 # We cannot know if the user has added default socket options, so we cannot replace the
223 # list.
224 self.conn_kw.setdefault("socket_options", [])
225
226 self.conn_kw["proxy"] = self.proxy
227 self.conn_kw["proxy_config"] = self.proxy_config
228
229 # Do not pass 'self' as callback to 'finalize'.
230 # Then the 'finalize' would keep an endless living (leak) to self.
231 # By just passing a reference to the pool allows the garbage collector
232 # to free self if nobody else has a reference to it.
233 pool = self.pool
234
235 # Close all the HTTPConnections in the pool before the
236 # HTTPConnectionPool object is garbage collected.
237 weakref.finalize(self, _close_pool_connections, pool)
238
239 def _new_conn(self) -> BaseHTTPConnection:
240 """
241 Return a fresh :class:`HTTPConnection`.
242 """
243 self.num_connections += 1
244 log.debug(
245 "Starting new HTTP connection (%d): %s:%s",
246 self.num_connections,
247 self.host,
248 self.port or "80",
249 )
250
251 conn = self.ConnectionCls(
252 host=self.host,
253 port=self.port,
254 timeout=self.timeout.connect_timeout,
255 **self.conn_kw,
256 )
257 return conn
258
259 def _get_conn(self, timeout: float | None = None) -> BaseHTTPConnection:
260 """
261 Get a connection. Will return a pooled connection if one is available.
262
263 If no connections are available and :prop:`.block` is ``False``, then a
264 fresh connection is returned.
265
266 :param timeout:
267 Seconds to wait before giving up and raising
268 :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and
269 :prop:`.block` is ``True``.
270 """
271 conn = None
272
273 if self.pool is None:
274 raise ClosedPoolError(self, "Pool is closed.")
275
276 try:
277 conn = self.pool.get(block=self.block, timeout=timeout)
278
279 except AttributeError: # self.pool is None
280 raise ClosedPoolError(self, "Pool is closed.") from None # Defensive:
281
282 except queue.Empty:
283 if self.block:
284 raise EmptyPoolError(
285 self,
286 "Pool is empty and a new connection can't be opened due to blocking mode.",
287 ) from None
288 pass # Oh well, we'll create a new connection then
289
290 # If this is a persistent connection, check if it got disconnected
291 if conn and is_connection_dropped(conn):
292 log.debug("Resetting dropped connection: %s", self.host)
293 conn.close()
294
295 return conn or self._new_conn()
296
297 def _put_conn(self, conn: BaseHTTPConnection | None) -> None:
298 """
299 Put a connection back into the pool.
300
301 :param conn:
302 Connection object for the current host and port as returned by
303 :meth:`._new_conn` or :meth:`._get_conn`.
304
305 If the pool is already full, the connection is closed and discarded
306 because we exceeded maxsize. If connections are discarded frequently,
307 then maxsize should be increased.
308
309 If the pool is closed, then the connection will be closed and discarded.
310 """
311 if self.pool is not None:
312 try:
313 self.pool.put(conn, block=False)
314 return # Everything is dandy, done.
315 except AttributeError:
316 # self.pool is None.
317 pass
318 except queue.Full:
319 # Connection never got put back into the pool, close it.
320 if conn:
321 conn.close()
322
323 if self.block:
324 # This should never happen if you got the conn from self._get_conn
325 raise FullPoolError(
326 self,
327 "Pool reached maximum size and no more connections are allowed.",
328 ) from None
329
330 log.warning(
331 "Connection pool is full, discarding connection: %s. Connection pool size: %s",
332 self.host,
333 self.pool.qsize(),
334 )
335
336 # Connection never got put back into the pool, close it.
337 if conn:
338 conn.close()
339
340 def _validate_conn(self, conn: BaseHTTPConnection) -> None:
341 """
342 Called right before a request is made, after the socket is created.
343 """
344
345 def _prepare_proxy(self, conn: BaseHTTPConnection) -> None:
346 # Nothing to do for HTTP connections.
347 pass
348
349 def _get_timeout(self, timeout: _TYPE_TIMEOUT) -> Timeout:
350 """Helper that always returns a :class:`urllib3.util.Timeout`"""
351 if timeout is _DEFAULT_TIMEOUT:
352 return self.timeout.clone()
353
354 if isinstance(timeout, Timeout):
355 return timeout.clone()
356 else:
357 # User passed us an int/float. This is for backwards compatibility,
358 # can be removed later
359 return Timeout.from_float(timeout)
360
361 def _raise_timeout(
362 self,
363 err: BaseSSLError | OSError | SocketTimeout,
364 url: str,
365 timeout_value: _TYPE_TIMEOUT | None,
366 ) -> None:
367 """Is the error actually a timeout? Will raise a ReadTimeout or pass"""
368
369 if isinstance(err, SocketTimeout):
370 raise ReadTimeoutError(
371 self, url, f"Read timed out. (read timeout={timeout_value})"
372 ) from err
373
374 # See the above comment about EAGAIN in Python 3.
375 if hasattr(err, "errno") and err.errno in _blocking_errnos:
376 raise ReadTimeoutError(
377 self, url, f"Read timed out. (read timeout={timeout_value})"
378 ) from err
379
380 def _make_request(
381 self,
382 conn: BaseHTTPConnection,
383 method: str,
384 url: str,
385 body: _TYPE_BODY | None = None,
386 headers: typing.Mapping[str, str] | None = None,
387 retries: Retry | None = None,
388 timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
389 chunked: bool = False,
390 response_conn: BaseHTTPConnection | None = None,
391 preload_content: bool = True,
392 decode_content: bool = True,
393 enforce_content_length: bool = True,
394 ) -> BaseHTTPResponse:
395 """
396 Perform a request on a given urllib connection object taken from our
397 pool.
398
399 :param conn:
400 a connection from one of our connection pools
401
402 :param method:
403 HTTP request method (such as GET, POST, PUT, etc.)
404
405 :param url:
406 The URL to perform the request on.
407
408 :param body:
409 Data to send in the request body, either :class:`str`, :class:`bytes`,
410 an iterable of :class:`str`/:class:`bytes`, or a file-like object.
411
412 :param headers:
413 Dictionary of custom headers to send, such as User-Agent,
414 If-None-Match, etc. If None, pool headers are used. If provided,
415 these headers completely replace any pool-specific headers.
416
417 :param retries:
418 Configure the number of retries to allow before raising a
419 :class:`~urllib3.exceptions.MaxRetryError` exception.
420
421 Pass ``None`` to retry until you receive a response. Pass a
422 :class:`~urllib3.util.retry.Retry` object for fine-grained control
423 over different types of retries.
424 Pass an integer number to retry connection errors that many times,
425 but no other types of errors. Pass zero to never retry.
426
427 If ``False``, then retries are disabled and any exception is raised
428 immediately. Also, instead of raising a MaxRetryError on redirects,
429 the redirect response will be returned.
430
431 :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
432
433 :param timeout:
434 If specified, overrides the default timeout for this one
435 request. It may be a float (in seconds) or an instance of
436 :class:`urllib3.util.Timeout`.
437
438 :param chunked:
439 If True, urllib3 will send the body using chunked transfer
440 encoding. Otherwise, urllib3 will send the body using the standard
441 content-length form. Defaults to False.
442
443 :param response_conn:
444 Set this to ``None`` if you will handle releasing the connection or
445 set the connection to have the response release it.
446
447 :param preload_content:
448 If True, the response's body will be preloaded during construction.
449
450 :param decode_content:
451 If True, will attempt to decode the body based on the
452 'content-encoding' header.
453
454 :param enforce_content_length:
455 Enforce content length checking. Body returned by server must match
456 value of Content-Length header, if present. Otherwise, raise error.
457 """
458 self.num_requests += 1
459
460 timeout_obj = self._get_timeout(timeout)
461 timeout_obj.start_connect()
462 conn.timeout = Timeout.resolve_default_timeout(timeout_obj.connect_timeout)
463
464 try:
465 # Trigger any extra validation we need to do.
466 try:
467 self._validate_conn(conn)
468 except (SocketTimeout, BaseSSLError) as e:
469 self._raise_timeout(err=e, url=url, timeout_value=conn.timeout)
470 raise
471
472 # _validate_conn() starts the connection to an HTTPS proxy
473 # so we need to wrap errors with 'ProxyError' here too.
474 except (
475 OSError,
476 NewConnectionError,
477 TimeoutError,
478 BaseSSLError,
479 CertificateError,
480 SSLError,
481 ) as e:
482 new_e: Exception = e
483 if isinstance(e, (BaseSSLError, CertificateError)):
484 new_e = SSLError(e)
485 # If the connection didn't successfully connect to it's proxy
486 # then there
487 if isinstance(
488 new_e, (OSError, NewConnectionError, TimeoutError, SSLError)
489 ) and (conn and conn.proxy and not conn.has_connected_to_proxy):
490 new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)
491 raise new_e
492
493 # conn.request() calls http.client.*.request, not the method in
494 # urllib3.request. It also calls makefile (recv) on the socket.
495 try:
496 conn.request(
497 method,
498 url,
499 body=body,
500 headers=headers,
501 chunked=chunked,
502 preload_content=preload_content,
503 decode_content=decode_content,
504 enforce_content_length=enforce_content_length,
505 )
506
507 # We are swallowing BrokenPipeError (errno.EPIPE) since the server is
508 # legitimately able to close the connection after sending a valid response.
509 # With this behaviour, the received response is still readable.
510 except BrokenPipeError:
511 pass
512 except OSError as e:
513 # MacOS/Linux
514 # EPROTOTYPE and ECONNRESET are needed on macOS
515 # https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/
516 # Condition changed later to emit ECONNRESET instead of only EPROTOTYPE.
517 if e.errno != errno.EPROTOTYPE and e.errno != errno.ECONNRESET:
518 raise
519
520 # Reset the timeout for the recv() on the socket
521 read_timeout = timeout_obj.read_timeout
522
523 if not conn.is_closed:
524 # In Python 3 socket.py will catch EAGAIN and return None when you
525 # try and read into the file pointer created by http.client, which
526 # instead raises a BadStatusLine exception. Instead of catching
527 # the exception and assuming all BadStatusLine exceptions are read
528 # timeouts, check for a zero timeout before making the request.
529 if read_timeout == 0:
530 raise ReadTimeoutError(
531 self, url, f"Read timed out. (read timeout={read_timeout})"
532 )
533 conn.timeout = read_timeout
534
535 # Receive the response from the server
536 try:
537 response = conn.getresponse()
538 except (BaseSSLError, OSError) as e:
539 self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
540 raise
541
542 # Set properties that are used by the pooling layer.
543 response.retries = retries
544 response._connection = response_conn # type: ignore[attr-defined]
545 response._pool = self # type: ignore[attr-defined]
546
547 # emscripten connection doesn't have _http_vsn_str
548 http_version = getattr(conn, "_http_vsn_str", "HTTP/?")
549 log.debug(
550 '%s://%s:%s "%s %s %s" %s %s',
551 self.scheme,
552 self.host,
553 self.port,
554 method,
555 url,
556 # HTTP version
557 http_version,
558 response.status,
559 response.length_remaining,
560 )
561
562 return response
563
564 def close(self) -> None:
565 """
566 Close all pooled connections and disable the pool.
567 """
568 if self.pool is None:
569 return
570 # Disable access to the pool
571 old_pool, self.pool = self.pool, None
572
573 # Close all the HTTPConnections in the pool.
574 _close_pool_connections(old_pool)
575
576 def is_same_host(self, url: str) -> bool:
577 """
578 Check if the given ``url`` is a member of the same host as this
579 connection pool.
580 """
581 if url.startswith("/"):
582 return True
583
584 # TODO: Add optional support for socket.gethostbyname checking.
585 scheme, _, host, port, *_ = parse_url(url)
586 scheme = scheme or "http"
587 if host is not None:
588 host = _normalize_host(host, scheme=scheme)
589
590 # Use explicit default port for comparison when none is given
591 if self.port and not port:
592 port = port_by_scheme.get(scheme)
593 elif not self.port and port == port_by_scheme.get(scheme):
594 port = None
595
596 return (scheme, host, port) == (self.scheme, self.host, self.port)
597
598 def urlopen( # type: ignore[override]
599 self,
600 method: str,
601 url: str,
602 body: _TYPE_BODY | None = None,
603 headers: typing.Mapping[str, str] | None = None,
604 retries: Retry | bool | int | None = None,
605 redirect: bool = True,
606 assert_same_host: bool = True,
607 timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT,
608 pool_timeout: int | None = None,
609 release_conn: bool | None = None,
610 chunked: bool = False,
611 body_pos: _TYPE_BODY_POSITION | None = None,
612 preload_content: bool = True,
613 decode_content: bool = True,
614 **response_kw: typing.Any,
615 ) -> BaseHTTPResponse:
616 """
617 Get a connection from the pool and perform an HTTP request. This is the
618 lowest level call for making a request, so you'll need to specify all
619 the raw details.
620
621 .. note::
622
623 More commonly, it's appropriate to use a convenience method
624 such as :meth:`request`.
625
626 .. note::
627
628 `release_conn` will only behave as expected if
629 `preload_content=False` because we want to make
630 `preload_content=False` the default behaviour someday soon without
631 breaking backwards compatibility.
632
633 :param method:
634 HTTP request method (such as GET, POST, PUT, etc.)
635
636 :param url:
637 The URL to perform the request on.
638
639 :param body:
640 Data to send in the request body, either :class:`str`, :class:`bytes`,
641 an iterable of :class:`str`/:class:`bytes`, or a file-like object.
642
643 :param headers:
644 Dictionary of custom headers to send, such as User-Agent,
645 If-None-Match, etc. If None, pool headers are used. If provided,
646 these headers completely replace any pool-specific headers.
647
648 :param retries:
649 Configure the number of retries to allow before raising a
650 :class:`~urllib3.exceptions.MaxRetryError` exception.
651
652 If ``None`` (default) will retry 3 times, see ``Retry.DEFAULT``. Pass a
653 :class:`~urllib3.util.retry.Retry` object for fine-grained control
654 over different types of retries.
655 Pass an integer number to retry connection errors that many times,
656 but no other types of errors. Pass zero to never retry.
657
658 If ``False``, then retries are disabled and any exception is raised
659 immediately. Also, instead of raising a MaxRetryError on redirects,
660 the redirect response will be returned.
661
662 :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int.
663
664 :param redirect:
665 If True, automatically handle redirects (status codes 301, 302,
666 303, 307, 308). Each redirect counts as a retry. Disabling retries
667 will disable redirect, too.
668
669 :param assert_same_host:
670 If ``True``, will make sure that the host of the pool requests is
671 consistent else will raise HostChangedError. When ``False``, you can
672 use the pool on an HTTP proxy and request foreign hosts.
673
674 :param timeout:
675 If specified, overrides the default timeout for this one
676 request. It may be a float (in seconds) or an instance of
677 :class:`urllib3.util.Timeout`.
678
679 :param pool_timeout:
680 If set and the pool is set to block=True, then this method will
681 block for ``pool_timeout`` seconds and raise EmptyPoolError if no
682 connection is available within the time period.
683
684 :param bool preload_content:
685 If True, the response's body will be preloaded into memory.
686
687 :param bool decode_content:
688 If True, will attempt to decode the body based on the
689 'content-encoding' header.
690
691 :param release_conn:
692 If False, then the urlopen call will not release the connection
693 back into the pool once a response is received (but will release if
694 you read the entire contents of the response such as when
695 `preload_content=True`). This is useful if you're not preloading
696 the response's content immediately. You will need to call
697 ``r.release_conn()`` on the response ``r`` to return the connection
698 back into the pool. If None, it takes the value of ``preload_content``
699 which defaults to ``True``.
700
701 :param bool chunked:
702 If True, urllib3 will send the body using chunked transfer
703 encoding. Otherwise, urllib3 will send the body using the standard
704 content-length form. Defaults to False.
705
706 :param int body_pos:
707 Position to seek to in file-like body in the event of a retry or
708 redirect. Typically this won't need to be set because urllib3 will
709 auto-populate the value when needed.
710 """
711 parsed_url = parse_url(url)
712 destination_scheme = parsed_url.scheme
713
714 if headers is None:
715 headers = self.headers
716
717 if not isinstance(retries, Retry):
718 retries = Retry.from_int(retries, redirect=redirect, default=self.retries)
719
720 if release_conn is None:
721 release_conn = preload_content
722
723 # Check host
724 if assert_same_host and not self.is_same_host(url):
725 raise HostChangedError(self, url, retries)
726
727 # Ensure that the URL we're connecting to is properly encoded
728 if url.startswith("/"):
729 url = to_str(_encode_target(url))
730 else:
731 url = to_str(parsed_url.url)
732
733 conn = None
734
735 # Track whether `conn` needs to be released before
736 # returning/raising/recursing. Update this variable if necessary, and
737 # leave `release_conn` constant throughout the function. That way, if
738 # the function recurses, the original value of `release_conn` will be
739 # passed down into the recursive call, and its value will be respected.
740 #
741 # See issue #651 [1] for details.
742 #
743 # [1] <https://github.com/urllib3/urllib3/issues/651>
744 release_this_conn = release_conn
745
746 http_tunnel_required = connection_requires_http_tunnel(
747 self.proxy, self.proxy_config, destination_scheme
748 )
749
750 # Merge the proxy headers. Only done when not using HTTP CONNECT. We
751 # have to copy the headers dict so we can safely change it without those
752 # changes being reflected in anyone else's copy.
753 if not http_tunnel_required:
754 headers = headers.copy() # type: ignore[attr-defined]
755 headers.update(self.proxy_headers) # type: ignore[union-attr]
756
757 # Must keep the exception bound to a separate variable or else Python 3
758 # complains about UnboundLocalError.
759 err = None
760
761 # Keep track of whether we cleanly exited the except block. This
762 # ensures we do proper cleanup in finally.
763 clean_exit = False
764
765 # Rewind body position, if needed. Record current position
766 # for future rewinds in the event of a redirect/retry.
767 body_pos = set_file_position(body, body_pos)
768
769 try:
770 # Request a connection from the queue.
771 timeout_obj = self._get_timeout(timeout)
772 conn = self._get_conn(timeout=pool_timeout)
773
774 conn.timeout = timeout_obj.connect_timeout # type: ignore[assignment]
775
776 # Is this a closed/new connection that requires CONNECT tunnelling?
777 if self.proxy is not None and http_tunnel_required and conn.is_closed:
778 try:
779 self._prepare_proxy(conn)
780 except (BaseSSLError, OSError, SocketTimeout) as e:
781 self._raise_timeout(
782 err=e, url=self.proxy.url, timeout_value=conn.timeout
783 )
784 raise
785
786 # If we're going to release the connection in ``finally:``, then
787 # the response doesn't need to know about the connection. Otherwise
788 # it will also try to release it and we'll have a double-release
789 # mess.
790 response_conn = conn if not release_conn else None
791
792 # Make the request on the HTTPConnection object
793 response = self._make_request(
794 conn,
795 method,
796 url,
797 timeout=timeout_obj,
798 body=body,
799 headers=headers,
800 chunked=chunked,
801 retries=retries,
802 response_conn=response_conn,
803 preload_content=preload_content,
804 decode_content=decode_content,
805 **response_kw,
806 )
807
808 # Everything went great!
809 clean_exit = True
810
811 except EmptyPoolError:
812 # Didn't get a connection from the pool, no need to clean up
813 clean_exit = True
814 release_this_conn = False
815 raise
816
817 except (
818 TimeoutError,
819 HTTPException,
820 OSError,
821 ProtocolError,
822 BaseSSLError,
823 SSLError,
824 CertificateError,
825 ProxyError,
826 ) as e:
827 # Discard the connection for these exceptions. It will be
828 # replaced during the next _get_conn() call.
829 clean_exit = False
830 new_e: Exception = e
831 if isinstance(e, (BaseSSLError, CertificateError)):
832 new_e = SSLError(e)
833 if isinstance(
834 new_e,
835 (
836 OSError,
837 NewConnectionError,
838 TimeoutError,
839 SSLError,
840 HTTPException,
841 ),
842 ) and (conn and conn.proxy and not conn.has_connected_to_proxy):
843 new_e = _wrap_proxy_error(new_e, conn.proxy.scheme)
844 elif isinstance(new_e, (OSError, HTTPException)):
845 new_e = ProtocolError("Connection aborted.", new_e)
846
847 retries = retries.increment(
848 method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
849 )
850 retries.sleep()
851
852 # Keep track of the error for the retry warning.
853 err = e
854
855 finally:
856 if not clean_exit:
857 # We hit some kind of exception, handled or otherwise. We need
858 # to throw the connection away unless explicitly told not to.
859 # Close the connection, set the variable to None, and make sure
860 # we put the None back in the pool to avoid leaking it.
861 if conn:
862 conn.close()
863 conn = None
864 release_this_conn = True
865
866 if release_this_conn:
867 # Put the connection back to be reused. If the connection is
868 # expired then it will be None, which will get replaced with a
869 # fresh connection during _get_conn.
870 self._put_conn(conn)
871
872 if not conn:
873 # Try again
874 log.warning(
875 "Retrying (%r) after connection broken by '%r': %s", retries, err, url
876 )
877 return self.urlopen(
878 method,
879 url,
880 body,
881 headers,
882 retries,
883 redirect,
884 assert_same_host,
885 timeout=timeout,
886 pool_timeout=pool_timeout,
887 release_conn=release_conn,
888 chunked=chunked,
889 body_pos=body_pos,
890 preload_content=preload_content,
891 decode_content=decode_content,
892 **response_kw,
893 )
894
895 # Handle redirect?
896 redirect_location = redirect and response.get_redirect_location()
897 if redirect_location:
898 if response.status == 303:
899 # Change the method according to RFC 9110, Section 15.4.4.
900 method = "GET"
901 # And lose the body not to transfer anything sensitive.
902 body = None
903 headers = HTTPHeaderDict(headers)._prepare_for_method_change()
904
905 try:
906 retries = retries.increment(method, url, response=response, _pool=self)
907 except MaxRetryError:
908 if retries.raise_on_redirect:
909 response.drain_conn()
910 raise
911 return response
912
913 response.drain_conn()
914 retries.sleep_for_retry(response)
915 log.debug("Redirecting %s -> %s", url, redirect_location)
916 return self.urlopen(
917 method,
918 redirect_location,
919 body,
920 headers,
921 retries=retries,
922 redirect=redirect,
923 assert_same_host=assert_same_host,
924 timeout=timeout,
925 pool_timeout=pool_timeout,
926 release_conn=release_conn,
927 chunked=chunked,
928 body_pos=body_pos,
929 preload_content=preload_content,
930 decode_content=decode_content,
931 **response_kw,
932 )
933
934 # Check if we should retry the HTTP response.
935 has_retry_after = bool(response.headers.get("Retry-After"))
936 if retries.is_retry(method, response.status, has_retry_after):
937 try:
938 retries = retries.increment(method, url, response=response, _pool=self)
939 except MaxRetryError:
940 if retries.raise_on_status:
941 response.drain_conn()
942 raise
943 return response
944
945 response.drain_conn()
946 retries.sleep(response)
947 log.debug("Retry: %s", url)
948 return self.urlopen(
949 method,
950 url,
951 body,
952 headers,
953 retries=retries,
954 redirect=redirect,
955 assert_same_host=assert_same_host,
956 timeout=timeout,
957 pool_timeout=pool_timeout,
958 release_conn=release_conn,
959 chunked=chunked,
960 body_pos=body_pos,
961 preload_content=preload_content,
962 decode_content=decode_content,
963 **response_kw,
964 )
965
966 return response
967
968
969 class HTTPSConnectionPool(HTTPConnectionPool):
970 """
971 Same as :class:`.HTTPConnectionPool`, but HTTPS.
972
973 :class:`.HTTPSConnection` uses one of ``assert_fingerprint``,
974 ``assert_hostname`` and ``host`` in this order to verify connections.
975 If ``assert_hostname`` is False, no verification is done.
976
977 The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``,
978 ``ca_cert_dir``, ``ssl_version``, ``key_password`` are only used if :mod:`ssl`
979 is available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade
980 the connection socket into an SSL socket.
981 """
982
983 scheme = "https"
984 ConnectionCls: type[BaseHTTPSConnection] = HTTPSConnection
985
986 def __init__(
987 self,
988 host: str,
989 port: int | None = None,
990 timeout: _TYPE_TIMEOUT | None = _DEFAULT_TIMEOUT,
991 maxsize: int = 1,
992 block: bool = False,
993 headers: typing.Mapping[str, str] | None = None,
994 retries: Retry | bool | int | None = None,
995 _proxy: Url | None = None,
996 _proxy_headers: typing.Mapping[str, str] | None = None,
997 key_file: str | None = None,
998 cert_file: str | None = None,
999 cert_reqs: int | str | None = None,
1000 key_password: str | None = None,
1001 ca_certs: str | None = None,
1002 ssl_version: int | str | None = None,
1003 ssl_minimum_version: ssl.TLSVersion | None = None,
1004 ssl_maximum_version: ssl.TLSVersion | None = None,
1005 assert_hostname: str | Literal[False] | None = None,
1006 assert_fingerprint: str | None = None,
1007 ca_cert_dir: str | None = None,
1008 **conn_kw: typing.Any,
1009 ) -> None:
1010 super().__init__(
1011 host,
1012 port,
1013 timeout,
1014 maxsize,
1015 block,
1016 headers,
1017 retries,
1018 _proxy,
1019 _proxy_headers,
1020 **conn_kw,
1021 )
1022
1023 self.key_file = key_file
1024 self.cert_file = cert_file
1025 self.cert_reqs = cert_reqs
1026 self.key_password = key_password
1027 self.ca_certs = ca_certs
1028 self.ca_cert_dir = ca_cert_dir
1029 self.ssl_version = ssl_version
1030 self.ssl_minimum_version = ssl_minimum_version
1031 self.ssl_maximum_version = ssl_maximum_version
1032 self.assert_hostname = assert_hostname
1033 self.assert_fingerprint = assert_fingerprint
1034
1035 def _prepare_proxy(self, conn: HTTPSConnection) -> None: # type: ignore[override]
1036 """Establishes a tunnel connection through HTTP CONNECT."""
1037 if self.proxy and self.proxy.scheme == "https":
1038 tunnel_scheme = "https"
1039 else:
1040 tunnel_scheme = "http"
1041
1042 conn.set_tunnel(
1043 scheme=tunnel_scheme,
1044 host=self._tunnel_host,
1045 port=self.port,
1046 headers=self.proxy_headers,
1047 )
1048 conn.connect()
1049
1050 def _new_conn(self) -> BaseHTTPSConnection:
1051 """
1052 Return a fresh :class:`urllib3.connection.HTTPConnection`.
1053 """
1054 self.num_connections += 1
1055 log.debug(
1056 "Starting new HTTPS connection (%d): %s:%s",
1057 self.num_connections,
1058 self.host,
1059 self.port or "443",
1060 )
1061
1062 if not self.ConnectionCls or self.ConnectionCls is DummyConnection: # type: ignore[comparison-overlap]
1063 raise ImportError(
1064 "Can't connect to HTTPS URL because the SSL module is not available."
1065 )
1066
1067 actual_host: str = self.host
1068 actual_port = self.port
1069 if self.proxy is not None and self.proxy.host is not None:
1070 actual_host = self.proxy.host
1071 actual_port = self.proxy.port
1072
1073 return self.ConnectionCls(
1074 host=actual_host,
1075 port=actual_port,
1076 timeout=self.timeout.connect_timeout,
1077 cert_file=self.cert_file,
1078 key_file=self.key_file,
1079 key_password=self.key_password,
1080 cert_reqs=self.cert_reqs,
1081 ca_certs=self.ca_certs,
1082 ca_cert_dir=self.ca_cert_dir,
1083 assert_hostname=self.assert_hostname,
1084 assert_fingerprint=self.assert_fingerprint,
1085 ssl_version=self.ssl_version,
1086 ssl_minimum_version=self.ssl_minimum_version,
1087 ssl_maximum_version=self.ssl_maximum_version,
1088 **self.conn_kw,
1089 )
1090
1091 def _validate_conn(self, conn: BaseHTTPConnection) -> None:
1092 """
1093 Called right before a request is made, after the socket is created.
1094 """
1095 super()._validate_conn(conn)
1096
1097 # Force connect early to allow us to validate the connection.
1098 if conn.is_closed:
1099 conn.connect()
1100
1101 # TODO revise this, see https://github.com/urllib3/urllib3/issues/2791
1102 if not conn.is_verified and not conn.proxy_is_verified:
1103 warnings.warn(
1104 (
1105 f"Unverified HTTPS request is being made to host '{conn.host}'. "
1106 "Adding certificate verification is strongly advised. See: "
1107 "https://urllib3.readthedocs.io/en/latest/advanced-usage.html"
1108 "#tls-warnings"
1109 ),
1110 InsecureRequestWarning,
1111 )
1112
1113
1114 def connection_from_url(url: str, **kw: typing.Any) -> HTTPConnectionPool:
1115 """
1116 Given a url, return an :class:`.ConnectionPool` instance of its host.
1117
1118 This is a shortcut for not having to parse out the scheme, host, and port
1119 of the url before creating an :class:`.ConnectionPool` instance.
1120
1121 :param url:
1122 Absolute URL string that must include the scheme. Port is optional.
1123
1124 :param \\**kw:
1125 Passes additional parameters to the constructor of the appropriate
1126 :class:`.ConnectionPool`. Useful for specifying things like
1127 timeout, maxsize, headers, etc.
1128
1129 Example::
1130
1131 >>> conn = connection_from_url('http://google.com/')
1132 >>> r = conn.request('GET', '/')
1133 """
1134 scheme, _, host, port, *_ = parse_url(url)
1135 scheme = scheme or "http"
1136 port = port or port_by_scheme.get(scheme, 80)
1137 if scheme == "https":
1138 return HTTPSConnectionPool(host, port=port, **kw) # type: ignore[arg-type]
1139 else:
1140 return HTTPConnectionPool(host, port=port, **kw) # type: ignore[arg-type]
1141
1142
1143 @typing.overload
1144 def _normalize_host(host: None, scheme: str | None) -> None:
1145 ...
1146
1147
1148 @typing.overload
1149 def _normalize_host(host: str, scheme: str | None) -> str:
1150 ...
1151
1152
1153 def _normalize_host(host: str | None, scheme: str | None) -> str | None:
1154 """
1155 Normalize hosts for comparisons and use with sockets.
1156 """
1157
1158 host = normalize_host(host, scheme)
1159
1160 # httplib doesn't like it when we include brackets in IPv6 addresses
1161 # Specifically, if we include brackets but also pass the port then
1162 # httplib crazily doubles up the square brackets on the Host header.
1163 # Instead, we need to make sure we never pass ``None`` as the port.
1164 # However, for backward compatibility reasons we can't actually
1165 # *assert* that. See http://bugs.python.org/issue28539
1166 if host and host.startswith("[") and host.endswith("]"):
1167 host = host[1:-1]
1168 return host
1169
1170
1171 def _url_from_pool(
1172 pool: HTTPConnectionPool | HTTPSConnectionPool, path: str | None = None
1173 ) -> str:
1174 """Returns the URL from a given connection pool. This is mainly used for testing and logging."""
1175 return Url(scheme=pool.scheme, host=pool.host, port=pool.port, path=path).url
1176
1177
1178 def _close_pool_connections(pool: queue.LifoQueue[typing.Any]) -> None:
1179 """Drains a queue of connections and closes each one."""
1180 try:
1181 while True:
1182 conn = pool.get(block=False)
1183 if conn:
1184 conn.close()
1185 except queue.Empty:
1186 pass # Done.