Mercurial > repos > jpayne > bioproject_to_srr_2
comparison urllib3/connectionpool.py @ 7:5eb2d5e3bf22
planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
author | jpayne |
---|---|
date | Sun, 05 May 2024 23:32:17 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
6:b2745907b1eb | 7:5eb2d5e3bf22 |
---|---|
1 from __future__ import annotations | |
2 | |
3 import errno | |
4 import logging | |
5 import queue | |
6 import sys | |
7 import typing | |
8 import warnings | |
9 import weakref | |
10 from socket import timeout as SocketTimeout | |
11 from types import TracebackType | |
12 | |
13 from ._base_connection import _TYPE_BODY | |
14 from ._collections import HTTPHeaderDict | |
15 from ._request_methods import RequestMethods | |
16 from .connection import ( | |
17 BaseSSLError, | |
18 BrokenPipeError, | |
19 DummyConnection, | |
20 HTTPConnection, | |
21 HTTPException, | |
22 HTTPSConnection, | |
23 ProxyConfig, | |
24 _wrap_proxy_error, | |
25 ) | |
26 from .connection import port_by_scheme as port_by_scheme | |
27 from .exceptions import ( | |
28 ClosedPoolError, | |
29 EmptyPoolError, | |
30 FullPoolError, | |
31 HostChangedError, | |
32 InsecureRequestWarning, | |
33 LocationValueError, | |
34 MaxRetryError, | |
35 NewConnectionError, | |
36 ProtocolError, | |
37 ProxyError, | |
38 ReadTimeoutError, | |
39 SSLError, | |
40 TimeoutError, | |
41 ) | |
42 from .response import BaseHTTPResponse | |
43 from .util.connection import is_connection_dropped | |
44 from .util.proxy import connection_requires_http_tunnel | |
45 from .util.request import _TYPE_BODY_POSITION, set_file_position | |
46 from .util.retry import Retry | |
47 from .util.ssl_match_hostname import CertificateError | |
48 from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_DEFAULT, Timeout | |
49 from .util.url import Url, _encode_target | |
50 from .util.url import _normalize_host as normalize_host | |
51 from .util.url import parse_url | |
52 from .util.util import to_str | |
53 | |
54 if typing.TYPE_CHECKING: | |
55 import ssl | |
56 from typing import Literal | |
57 | |
58 from ._base_connection import BaseHTTPConnection, BaseHTTPSConnection | |
59 | |
60 log = logging.getLogger(__name__) | |
61 | |
62 _TYPE_TIMEOUT = typing.Union[Timeout, float, _TYPE_DEFAULT, None] | |
63 | |
64 _SelfT = typing.TypeVar("_SelfT") | |
65 | |
66 | |
67 # Pool objects | |
68 class ConnectionPool: | |
69 """ | |
70 Base class for all connection pools, such as | |
71 :class:`.HTTPConnectionPool` and :class:`.HTTPSConnectionPool`. | |
72 | |
73 .. note:: | |
74 ConnectionPool.urlopen() does not normalize or percent-encode target URIs | |
75 which is useful if your target server doesn't support percent-encoded | |
76 target URIs. | |
77 """ | |
78 | |
79 scheme: str | None = None | |
80 QueueCls = queue.LifoQueue | |
81 | |
82 def __init__(self, host: str, port: int | None = None) -> None: | |
83 if not host: | |
84 raise LocationValueError("No host specified.") | |
85 | |
86 self.host = _normalize_host(host, scheme=self.scheme) | |
87 self.port = port | |
88 | |
89 # This property uses 'normalize_host()' (not '_normalize_host()') | |
90 # to avoid removing square braces around IPv6 addresses. | |
91 # This value is sent to `HTTPConnection.set_tunnel()` if called | |
92 # because square braces are required for HTTP CONNECT tunneling. | |
93 self._tunnel_host = normalize_host(host, scheme=self.scheme).lower() | |
94 | |
95 def __str__(self) -> str: | |
96 return f"{type(self).__name__}(host={self.host!r}, port={self.port!r})" | |
97 | |
98 def __enter__(self: _SelfT) -> _SelfT: | |
99 return self | |
100 | |
101 def __exit__( | |
102 self, | |
103 exc_type: type[BaseException] | None, | |
104 exc_val: BaseException | None, | |
105 exc_tb: TracebackType | None, | |
106 ) -> Literal[False]: | |
107 self.close() | |
108 # Return False to re-raise any potential exceptions | |
109 return False | |
110 | |
111 def close(self) -> None: | |
112 """ | |
113 Close all pooled connections and disable the pool. | |
114 """ | |
115 | |
116 | |
117 # This is taken from http://hg.python.org/cpython/file/7aaba721ebc0/Lib/socket.py#l252 | |
118 _blocking_errnos = {errno.EAGAIN, errno.EWOULDBLOCK} | |
119 | |
120 | |
121 class HTTPConnectionPool(ConnectionPool, RequestMethods): | |
122 """ | |
123 Thread-safe connection pool for one host. | |
124 | |
125 :param host: | |
126 Host used for this HTTP Connection (e.g. "localhost"), passed into | |
127 :class:`http.client.HTTPConnection`. | |
128 | |
129 :param port: | |
130 Port used for this HTTP Connection (None is equivalent to 80), passed | |
131 into :class:`http.client.HTTPConnection`. | |
132 | |
133 :param timeout: | |
134 Socket timeout in seconds for each individual connection. This can | |
135 be a float or integer, which sets the timeout for the HTTP request, | |
136 or an instance of :class:`urllib3.util.Timeout` which gives you more | |
137 fine-grained control over request timeouts. After the constructor has | |
138 been parsed, this is always a `urllib3.util.Timeout` object. | |
139 | |
140 :param maxsize: | |
141 Number of connections to save that can be reused. More than 1 is useful | |
142 in multithreaded situations. If ``block`` is set to False, more | |
143 connections will be created but they will not be saved once they've | |
144 been used. | |
145 | |
146 :param block: | |
147 If set to True, no more than ``maxsize`` connections will be used at | |
148 a time. When no free connections are available, the call will block | |
149 until a connection has been released. This is a useful side effect for | |
150 particular multithreaded situations where one does not want to use more | |
151 than maxsize connections per host to prevent flooding. | |
152 | |
153 :param headers: | |
154 Headers to include with all requests, unless other headers are given | |
155 explicitly. | |
156 | |
157 :param retries: | |
158 Retry configuration to use by default with requests in this pool. | |
159 | |
160 :param _proxy: | |
161 Parsed proxy URL, should not be used directly, instead, see | |
162 :class:`urllib3.ProxyManager` | |
163 | |
164 :param _proxy_headers: | |
165 A dictionary with proxy headers, should not be used directly, | |
166 instead, see :class:`urllib3.ProxyManager` | |
167 | |
168 :param \\**conn_kw: | |
169 Additional parameters are used to create fresh :class:`urllib3.connection.HTTPConnection`, | |
170 :class:`urllib3.connection.HTTPSConnection` instances. | |
171 """ | |
172 | |
173 scheme = "http" | |
174 ConnectionCls: ( | |
175 type[BaseHTTPConnection] | type[BaseHTTPSConnection] | |
176 ) = HTTPConnection | |
177 | |
178 def __init__( | |
179 self, | |
180 host: str, | |
181 port: int | None = None, | |
182 timeout: _TYPE_TIMEOUT | None = _DEFAULT_TIMEOUT, | |
183 maxsize: int = 1, | |
184 block: bool = False, | |
185 headers: typing.Mapping[str, str] | None = None, | |
186 retries: Retry | bool | int | None = None, | |
187 _proxy: Url | None = None, | |
188 _proxy_headers: typing.Mapping[str, str] | None = None, | |
189 _proxy_config: ProxyConfig | None = None, | |
190 **conn_kw: typing.Any, | |
191 ): | |
192 ConnectionPool.__init__(self, host, port) | |
193 RequestMethods.__init__(self, headers) | |
194 | |
195 if not isinstance(timeout, Timeout): | |
196 timeout = Timeout.from_float(timeout) | |
197 | |
198 if retries is None: | |
199 retries = Retry.DEFAULT | |
200 | |
201 self.timeout = timeout | |
202 self.retries = retries | |
203 | |
204 self.pool: queue.LifoQueue[typing.Any] | None = self.QueueCls(maxsize) | |
205 self.block = block | |
206 | |
207 self.proxy = _proxy | |
208 self.proxy_headers = _proxy_headers or {} | |
209 self.proxy_config = _proxy_config | |
210 | |
211 # Fill the queue up so that doing get() on it will block properly | |
212 for _ in range(maxsize): | |
213 self.pool.put(None) | |
214 | |
215 # These are mostly for testing and debugging purposes. | |
216 self.num_connections = 0 | |
217 self.num_requests = 0 | |
218 self.conn_kw = conn_kw | |
219 | |
220 if self.proxy: | |
221 # Enable Nagle's algorithm for proxies, to avoid packet fragmentation. | |
222 # We cannot know if the user has added default socket options, so we cannot replace the | |
223 # list. | |
224 self.conn_kw.setdefault("socket_options", []) | |
225 | |
226 self.conn_kw["proxy"] = self.proxy | |
227 self.conn_kw["proxy_config"] = self.proxy_config | |
228 | |
229 # Do not pass 'self' as callback to 'finalize'. | |
230 # Then the 'finalize' would keep an endless living (leak) to self. | |
231 # By just passing a reference to the pool allows the garbage collector | |
232 # to free self if nobody else has a reference to it. | |
233 pool = self.pool | |
234 | |
235 # Close all the HTTPConnections in the pool before the | |
236 # HTTPConnectionPool object is garbage collected. | |
237 weakref.finalize(self, _close_pool_connections, pool) | |
238 | |
239 def _new_conn(self) -> BaseHTTPConnection: | |
240 """ | |
241 Return a fresh :class:`HTTPConnection`. | |
242 """ | |
243 self.num_connections += 1 | |
244 log.debug( | |
245 "Starting new HTTP connection (%d): %s:%s", | |
246 self.num_connections, | |
247 self.host, | |
248 self.port or "80", | |
249 ) | |
250 | |
251 conn = self.ConnectionCls( | |
252 host=self.host, | |
253 port=self.port, | |
254 timeout=self.timeout.connect_timeout, | |
255 **self.conn_kw, | |
256 ) | |
257 return conn | |
258 | |
259 def _get_conn(self, timeout: float | None = None) -> BaseHTTPConnection: | |
260 """ | |
261 Get a connection. Will return a pooled connection if one is available. | |
262 | |
263 If no connections are available and :prop:`.block` is ``False``, then a | |
264 fresh connection is returned. | |
265 | |
266 :param timeout: | |
267 Seconds to wait before giving up and raising | |
268 :class:`urllib3.exceptions.EmptyPoolError` if the pool is empty and | |
269 :prop:`.block` is ``True``. | |
270 """ | |
271 conn = None | |
272 | |
273 if self.pool is None: | |
274 raise ClosedPoolError(self, "Pool is closed.") | |
275 | |
276 try: | |
277 conn = self.pool.get(block=self.block, timeout=timeout) | |
278 | |
279 except AttributeError: # self.pool is None | |
280 raise ClosedPoolError(self, "Pool is closed.") from None # Defensive: | |
281 | |
282 except queue.Empty: | |
283 if self.block: | |
284 raise EmptyPoolError( | |
285 self, | |
286 "Pool is empty and a new connection can't be opened due to blocking mode.", | |
287 ) from None | |
288 pass # Oh well, we'll create a new connection then | |
289 | |
290 # If this is a persistent connection, check if it got disconnected | |
291 if conn and is_connection_dropped(conn): | |
292 log.debug("Resetting dropped connection: %s", self.host) | |
293 conn.close() | |
294 | |
295 return conn or self._new_conn() | |
296 | |
297 def _put_conn(self, conn: BaseHTTPConnection | None) -> None: | |
298 """ | |
299 Put a connection back into the pool. | |
300 | |
301 :param conn: | |
302 Connection object for the current host and port as returned by | |
303 :meth:`._new_conn` or :meth:`._get_conn`. | |
304 | |
305 If the pool is already full, the connection is closed and discarded | |
306 because we exceeded maxsize. If connections are discarded frequently, | |
307 then maxsize should be increased. | |
308 | |
309 If the pool is closed, then the connection will be closed and discarded. | |
310 """ | |
311 if self.pool is not None: | |
312 try: | |
313 self.pool.put(conn, block=False) | |
314 return # Everything is dandy, done. | |
315 except AttributeError: | |
316 # self.pool is None. | |
317 pass | |
318 except queue.Full: | |
319 # Connection never got put back into the pool, close it. | |
320 if conn: | |
321 conn.close() | |
322 | |
323 if self.block: | |
324 # This should never happen if you got the conn from self._get_conn | |
325 raise FullPoolError( | |
326 self, | |
327 "Pool reached maximum size and no more connections are allowed.", | |
328 ) from None | |
329 | |
330 log.warning( | |
331 "Connection pool is full, discarding connection: %s. Connection pool size: %s", | |
332 self.host, | |
333 self.pool.qsize(), | |
334 ) | |
335 | |
336 # Connection never got put back into the pool, close it. | |
337 if conn: | |
338 conn.close() | |
339 | |
340 def _validate_conn(self, conn: BaseHTTPConnection) -> None: | |
341 """ | |
342 Called right before a request is made, after the socket is created. | |
343 """ | |
344 | |
345 def _prepare_proxy(self, conn: BaseHTTPConnection) -> None: | |
346 # Nothing to do for HTTP connections. | |
347 pass | |
348 | |
349 def _get_timeout(self, timeout: _TYPE_TIMEOUT) -> Timeout: | |
350 """Helper that always returns a :class:`urllib3.util.Timeout`""" | |
351 if timeout is _DEFAULT_TIMEOUT: | |
352 return self.timeout.clone() | |
353 | |
354 if isinstance(timeout, Timeout): | |
355 return timeout.clone() | |
356 else: | |
357 # User passed us an int/float. This is for backwards compatibility, | |
358 # can be removed later | |
359 return Timeout.from_float(timeout) | |
360 | |
361 def _raise_timeout( | |
362 self, | |
363 err: BaseSSLError | OSError | SocketTimeout, | |
364 url: str, | |
365 timeout_value: _TYPE_TIMEOUT | None, | |
366 ) -> None: | |
367 """Is the error actually a timeout? Will raise a ReadTimeout or pass""" | |
368 | |
369 if isinstance(err, SocketTimeout): | |
370 raise ReadTimeoutError( | |
371 self, url, f"Read timed out. (read timeout={timeout_value})" | |
372 ) from err | |
373 | |
374 # See the above comment about EAGAIN in Python 3. | |
375 if hasattr(err, "errno") and err.errno in _blocking_errnos: | |
376 raise ReadTimeoutError( | |
377 self, url, f"Read timed out. (read timeout={timeout_value})" | |
378 ) from err | |
379 | |
380 def _make_request( | |
381 self, | |
382 conn: BaseHTTPConnection, | |
383 method: str, | |
384 url: str, | |
385 body: _TYPE_BODY | None = None, | |
386 headers: typing.Mapping[str, str] | None = None, | |
387 retries: Retry | None = None, | |
388 timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, | |
389 chunked: bool = False, | |
390 response_conn: BaseHTTPConnection | None = None, | |
391 preload_content: bool = True, | |
392 decode_content: bool = True, | |
393 enforce_content_length: bool = True, | |
394 ) -> BaseHTTPResponse: | |
395 """ | |
396 Perform a request on a given urllib connection object taken from our | |
397 pool. | |
398 | |
399 :param conn: | |
400 a connection from one of our connection pools | |
401 | |
402 :param method: | |
403 HTTP request method (such as GET, POST, PUT, etc.) | |
404 | |
405 :param url: | |
406 The URL to perform the request on. | |
407 | |
408 :param body: | |
409 Data to send in the request body, either :class:`str`, :class:`bytes`, | |
410 an iterable of :class:`str`/:class:`bytes`, or a file-like object. | |
411 | |
412 :param headers: | |
413 Dictionary of custom headers to send, such as User-Agent, | |
414 If-None-Match, etc. If None, pool headers are used. If provided, | |
415 these headers completely replace any pool-specific headers. | |
416 | |
417 :param retries: | |
418 Configure the number of retries to allow before raising a | |
419 :class:`~urllib3.exceptions.MaxRetryError` exception. | |
420 | |
421 Pass ``None`` to retry until you receive a response. Pass a | |
422 :class:`~urllib3.util.retry.Retry` object for fine-grained control | |
423 over different types of retries. | |
424 Pass an integer number to retry connection errors that many times, | |
425 but no other types of errors. Pass zero to never retry. | |
426 | |
427 If ``False``, then retries are disabled and any exception is raised | |
428 immediately. Also, instead of raising a MaxRetryError on redirects, | |
429 the redirect response will be returned. | |
430 | |
431 :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. | |
432 | |
433 :param timeout: | |
434 If specified, overrides the default timeout for this one | |
435 request. It may be a float (in seconds) or an instance of | |
436 :class:`urllib3.util.Timeout`. | |
437 | |
438 :param chunked: | |
439 If True, urllib3 will send the body using chunked transfer | |
440 encoding. Otherwise, urllib3 will send the body using the standard | |
441 content-length form. Defaults to False. | |
442 | |
443 :param response_conn: | |
444 Set this to ``None`` if you will handle releasing the connection or | |
445 set the connection to have the response release it. | |
446 | |
447 :param preload_content: | |
448 If True, the response's body will be preloaded during construction. | |
449 | |
450 :param decode_content: | |
451 If True, will attempt to decode the body based on the | |
452 'content-encoding' header. | |
453 | |
454 :param enforce_content_length: | |
455 Enforce content length checking. Body returned by server must match | |
456 value of Content-Length header, if present. Otherwise, raise error. | |
457 """ | |
458 self.num_requests += 1 | |
459 | |
460 timeout_obj = self._get_timeout(timeout) | |
461 timeout_obj.start_connect() | |
462 conn.timeout = Timeout.resolve_default_timeout(timeout_obj.connect_timeout) | |
463 | |
464 try: | |
465 # Trigger any extra validation we need to do. | |
466 try: | |
467 self._validate_conn(conn) | |
468 except (SocketTimeout, BaseSSLError) as e: | |
469 self._raise_timeout(err=e, url=url, timeout_value=conn.timeout) | |
470 raise | |
471 | |
472 # _validate_conn() starts the connection to an HTTPS proxy | |
473 # so we need to wrap errors with 'ProxyError' here too. | |
474 except ( | |
475 OSError, | |
476 NewConnectionError, | |
477 TimeoutError, | |
478 BaseSSLError, | |
479 CertificateError, | |
480 SSLError, | |
481 ) as e: | |
482 new_e: Exception = e | |
483 if isinstance(e, (BaseSSLError, CertificateError)): | |
484 new_e = SSLError(e) | |
485 # If the connection didn't successfully connect to it's proxy | |
486 # then there | |
487 if isinstance( | |
488 new_e, (OSError, NewConnectionError, TimeoutError, SSLError) | |
489 ) and (conn and conn.proxy and not conn.has_connected_to_proxy): | |
490 new_e = _wrap_proxy_error(new_e, conn.proxy.scheme) | |
491 raise new_e | |
492 | |
493 # conn.request() calls http.client.*.request, not the method in | |
494 # urllib3.request. It also calls makefile (recv) on the socket. | |
495 try: | |
496 conn.request( | |
497 method, | |
498 url, | |
499 body=body, | |
500 headers=headers, | |
501 chunked=chunked, | |
502 preload_content=preload_content, | |
503 decode_content=decode_content, | |
504 enforce_content_length=enforce_content_length, | |
505 ) | |
506 | |
507 # We are swallowing BrokenPipeError (errno.EPIPE) since the server is | |
508 # legitimately able to close the connection after sending a valid response. | |
509 # With this behaviour, the received response is still readable. | |
510 except BrokenPipeError: | |
511 pass | |
512 except OSError as e: | |
513 # MacOS/Linux | |
514 # EPROTOTYPE and ECONNRESET are needed on macOS | |
515 # https://erickt.github.io/blog/2014/11/19/adventures-in-debugging-a-potential-osx-kernel-bug/ | |
516 # Condition changed later to emit ECONNRESET instead of only EPROTOTYPE. | |
517 if e.errno != errno.EPROTOTYPE and e.errno != errno.ECONNRESET: | |
518 raise | |
519 | |
520 # Reset the timeout for the recv() on the socket | |
521 read_timeout = timeout_obj.read_timeout | |
522 | |
523 if not conn.is_closed: | |
524 # In Python 3 socket.py will catch EAGAIN and return None when you | |
525 # try and read into the file pointer created by http.client, which | |
526 # instead raises a BadStatusLine exception. Instead of catching | |
527 # the exception and assuming all BadStatusLine exceptions are read | |
528 # timeouts, check for a zero timeout before making the request. | |
529 if read_timeout == 0: | |
530 raise ReadTimeoutError( | |
531 self, url, f"Read timed out. (read timeout={read_timeout})" | |
532 ) | |
533 conn.timeout = read_timeout | |
534 | |
535 # Receive the response from the server | |
536 try: | |
537 response = conn.getresponse() | |
538 except (BaseSSLError, OSError) as e: | |
539 self._raise_timeout(err=e, url=url, timeout_value=read_timeout) | |
540 raise | |
541 | |
542 # Set properties that are used by the pooling layer. | |
543 response.retries = retries | |
544 response._connection = response_conn # type: ignore[attr-defined] | |
545 response._pool = self # type: ignore[attr-defined] | |
546 | |
547 # emscripten connection doesn't have _http_vsn_str | |
548 http_version = getattr(conn, "_http_vsn_str", "HTTP/?") | |
549 log.debug( | |
550 '%s://%s:%s "%s %s %s" %s %s', | |
551 self.scheme, | |
552 self.host, | |
553 self.port, | |
554 method, | |
555 url, | |
556 # HTTP version | |
557 http_version, | |
558 response.status, | |
559 response.length_remaining, | |
560 ) | |
561 | |
562 return response | |
563 | |
564 def close(self) -> None: | |
565 """ | |
566 Close all pooled connections and disable the pool. | |
567 """ | |
568 if self.pool is None: | |
569 return | |
570 # Disable access to the pool | |
571 old_pool, self.pool = self.pool, None | |
572 | |
573 # Close all the HTTPConnections in the pool. | |
574 _close_pool_connections(old_pool) | |
575 | |
576 def is_same_host(self, url: str) -> bool: | |
577 """ | |
578 Check if the given ``url`` is a member of the same host as this | |
579 connection pool. | |
580 """ | |
581 if url.startswith("/"): | |
582 return True | |
583 | |
584 # TODO: Add optional support for socket.gethostbyname checking. | |
585 scheme, _, host, port, *_ = parse_url(url) | |
586 scheme = scheme or "http" | |
587 if host is not None: | |
588 host = _normalize_host(host, scheme=scheme) | |
589 | |
590 # Use explicit default port for comparison when none is given | |
591 if self.port and not port: | |
592 port = port_by_scheme.get(scheme) | |
593 elif not self.port and port == port_by_scheme.get(scheme): | |
594 port = None | |
595 | |
596 return (scheme, host, port) == (self.scheme, self.host, self.port) | |
597 | |
598 def urlopen( # type: ignore[override] | |
599 self, | |
600 method: str, | |
601 url: str, | |
602 body: _TYPE_BODY | None = None, | |
603 headers: typing.Mapping[str, str] | None = None, | |
604 retries: Retry | bool | int | None = None, | |
605 redirect: bool = True, | |
606 assert_same_host: bool = True, | |
607 timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, | |
608 pool_timeout: int | None = None, | |
609 release_conn: bool | None = None, | |
610 chunked: bool = False, | |
611 body_pos: _TYPE_BODY_POSITION | None = None, | |
612 preload_content: bool = True, | |
613 decode_content: bool = True, | |
614 **response_kw: typing.Any, | |
615 ) -> BaseHTTPResponse: | |
616 """ | |
617 Get a connection from the pool and perform an HTTP request. This is the | |
618 lowest level call for making a request, so you'll need to specify all | |
619 the raw details. | |
620 | |
621 .. note:: | |
622 | |
623 More commonly, it's appropriate to use a convenience method | |
624 such as :meth:`request`. | |
625 | |
626 .. note:: | |
627 | |
628 `release_conn` will only behave as expected if | |
629 `preload_content=False` because we want to make | |
630 `preload_content=False` the default behaviour someday soon without | |
631 breaking backwards compatibility. | |
632 | |
633 :param method: | |
634 HTTP request method (such as GET, POST, PUT, etc.) | |
635 | |
636 :param url: | |
637 The URL to perform the request on. | |
638 | |
639 :param body: | |
640 Data to send in the request body, either :class:`str`, :class:`bytes`, | |
641 an iterable of :class:`str`/:class:`bytes`, or a file-like object. | |
642 | |
643 :param headers: | |
644 Dictionary of custom headers to send, such as User-Agent, | |
645 If-None-Match, etc. If None, pool headers are used. If provided, | |
646 these headers completely replace any pool-specific headers. | |
647 | |
648 :param retries: | |
649 Configure the number of retries to allow before raising a | |
650 :class:`~urllib3.exceptions.MaxRetryError` exception. | |
651 | |
652 If ``None`` (default) will retry 3 times, see ``Retry.DEFAULT``. Pass a | |
653 :class:`~urllib3.util.retry.Retry` object for fine-grained control | |
654 over different types of retries. | |
655 Pass an integer number to retry connection errors that many times, | |
656 but no other types of errors. Pass zero to never retry. | |
657 | |
658 If ``False``, then retries are disabled and any exception is raised | |
659 immediately. Also, instead of raising a MaxRetryError on redirects, | |
660 the redirect response will be returned. | |
661 | |
662 :type retries: :class:`~urllib3.util.retry.Retry`, False, or an int. | |
663 | |
664 :param redirect: | |
665 If True, automatically handle redirects (status codes 301, 302, | |
666 303, 307, 308). Each redirect counts as a retry. Disabling retries | |
667 will disable redirect, too. | |
668 | |
669 :param assert_same_host: | |
670 If ``True``, will make sure that the host of the pool requests is | |
671 consistent else will raise HostChangedError. When ``False``, you can | |
672 use the pool on an HTTP proxy and request foreign hosts. | |
673 | |
674 :param timeout: | |
675 If specified, overrides the default timeout for this one | |
676 request. It may be a float (in seconds) or an instance of | |
677 :class:`urllib3.util.Timeout`. | |
678 | |
679 :param pool_timeout: | |
680 If set and the pool is set to block=True, then this method will | |
681 block for ``pool_timeout`` seconds and raise EmptyPoolError if no | |
682 connection is available within the time period. | |
683 | |
684 :param bool preload_content: | |
685 If True, the response's body will be preloaded into memory. | |
686 | |
687 :param bool decode_content: | |
688 If True, will attempt to decode the body based on the | |
689 'content-encoding' header. | |
690 | |
691 :param release_conn: | |
692 If False, then the urlopen call will not release the connection | |
693 back into the pool once a response is received (but will release if | |
694 you read the entire contents of the response such as when | |
695 `preload_content=True`). This is useful if you're not preloading | |
696 the response's content immediately. You will need to call | |
697 ``r.release_conn()`` on the response ``r`` to return the connection | |
698 back into the pool. If None, it takes the value of ``preload_content`` | |
699 which defaults to ``True``. | |
700 | |
701 :param bool chunked: | |
702 If True, urllib3 will send the body using chunked transfer | |
703 encoding. Otherwise, urllib3 will send the body using the standard | |
704 content-length form. Defaults to False. | |
705 | |
706 :param int body_pos: | |
707 Position to seek to in file-like body in the event of a retry or | |
708 redirect. Typically this won't need to be set because urllib3 will | |
709 auto-populate the value when needed. | |
710 """ | |
711 parsed_url = parse_url(url) | |
712 destination_scheme = parsed_url.scheme | |
713 | |
714 if headers is None: | |
715 headers = self.headers | |
716 | |
717 if not isinstance(retries, Retry): | |
718 retries = Retry.from_int(retries, redirect=redirect, default=self.retries) | |
719 | |
720 if release_conn is None: | |
721 release_conn = preload_content | |
722 | |
723 # Check host | |
724 if assert_same_host and not self.is_same_host(url): | |
725 raise HostChangedError(self, url, retries) | |
726 | |
727 # Ensure that the URL we're connecting to is properly encoded | |
728 if url.startswith("/"): | |
729 url = to_str(_encode_target(url)) | |
730 else: | |
731 url = to_str(parsed_url.url) | |
732 | |
733 conn = None | |
734 | |
735 # Track whether `conn` needs to be released before | |
736 # returning/raising/recursing. Update this variable if necessary, and | |
737 # leave `release_conn` constant throughout the function. That way, if | |
738 # the function recurses, the original value of `release_conn` will be | |
739 # passed down into the recursive call, and its value will be respected. | |
740 # | |
741 # See issue #651 [1] for details. | |
742 # | |
743 # [1] <https://github.com/urllib3/urllib3/issues/651> | |
744 release_this_conn = release_conn | |
745 | |
746 http_tunnel_required = connection_requires_http_tunnel( | |
747 self.proxy, self.proxy_config, destination_scheme | |
748 ) | |
749 | |
750 # Merge the proxy headers. Only done when not using HTTP CONNECT. We | |
751 # have to copy the headers dict so we can safely change it without those | |
752 # changes being reflected in anyone else's copy. | |
753 if not http_tunnel_required: | |
754 headers = headers.copy() # type: ignore[attr-defined] | |
755 headers.update(self.proxy_headers) # type: ignore[union-attr] | |
756 | |
757 # Must keep the exception bound to a separate variable or else Python 3 | |
758 # complains about UnboundLocalError. | |
759 err = None | |
760 | |
761 # Keep track of whether we cleanly exited the except block. This | |
762 # ensures we do proper cleanup in finally. | |
763 clean_exit = False | |
764 | |
765 # Rewind body position, if needed. Record current position | |
766 # for future rewinds in the event of a redirect/retry. | |
767 body_pos = set_file_position(body, body_pos) | |
768 | |
769 try: | |
770 # Request a connection from the queue. | |
771 timeout_obj = self._get_timeout(timeout) | |
772 conn = self._get_conn(timeout=pool_timeout) | |
773 | |
774 conn.timeout = timeout_obj.connect_timeout # type: ignore[assignment] | |
775 | |
776 # Is this a closed/new connection that requires CONNECT tunnelling? | |
777 if self.proxy is not None and http_tunnel_required and conn.is_closed: | |
778 try: | |
779 self._prepare_proxy(conn) | |
780 except (BaseSSLError, OSError, SocketTimeout) as e: | |
781 self._raise_timeout( | |
782 err=e, url=self.proxy.url, timeout_value=conn.timeout | |
783 ) | |
784 raise | |
785 | |
786 # If we're going to release the connection in ``finally:``, then | |
787 # the response doesn't need to know about the connection. Otherwise | |
788 # it will also try to release it and we'll have a double-release | |
789 # mess. | |
790 response_conn = conn if not release_conn else None | |
791 | |
792 # Make the request on the HTTPConnection object | |
793 response = self._make_request( | |
794 conn, | |
795 method, | |
796 url, | |
797 timeout=timeout_obj, | |
798 body=body, | |
799 headers=headers, | |
800 chunked=chunked, | |
801 retries=retries, | |
802 response_conn=response_conn, | |
803 preload_content=preload_content, | |
804 decode_content=decode_content, | |
805 **response_kw, | |
806 ) | |
807 | |
808 # Everything went great! | |
809 clean_exit = True | |
810 | |
811 except EmptyPoolError: | |
812 # Didn't get a connection from the pool, no need to clean up | |
813 clean_exit = True | |
814 release_this_conn = False | |
815 raise | |
816 | |
817 except ( | |
818 TimeoutError, | |
819 HTTPException, | |
820 OSError, | |
821 ProtocolError, | |
822 BaseSSLError, | |
823 SSLError, | |
824 CertificateError, | |
825 ProxyError, | |
826 ) as e: | |
827 # Discard the connection for these exceptions. It will be | |
828 # replaced during the next _get_conn() call. | |
829 clean_exit = False | |
830 new_e: Exception = e | |
831 if isinstance(e, (BaseSSLError, CertificateError)): | |
832 new_e = SSLError(e) | |
833 if isinstance( | |
834 new_e, | |
835 ( | |
836 OSError, | |
837 NewConnectionError, | |
838 TimeoutError, | |
839 SSLError, | |
840 HTTPException, | |
841 ), | |
842 ) and (conn and conn.proxy and not conn.has_connected_to_proxy): | |
843 new_e = _wrap_proxy_error(new_e, conn.proxy.scheme) | |
844 elif isinstance(new_e, (OSError, HTTPException)): | |
845 new_e = ProtocolError("Connection aborted.", new_e) | |
846 | |
847 retries = retries.increment( | |
848 method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2] | |
849 ) | |
850 retries.sleep() | |
851 | |
852 # Keep track of the error for the retry warning. | |
853 err = e | |
854 | |
855 finally: | |
856 if not clean_exit: | |
857 # We hit some kind of exception, handled or otherwise. We need | |
858 # to throw the connection away unless explicitly told not to. | |
859 # Close the connection, set the variable to None, and make sure | |
860 # we put the None back in the pool to avoid leaking it. | |
861 if conn: | |
862 conn.close() | |
863 conn = None | |
864 release_this_conn = True | |
865 | |
866 if release_this_conn: | |
867 # Put the connection back to be reused. If the connection is | |
868 # expired then it will be None, which will get replaced with a | |
869 # fresh connection during _get_conn. | |
870 self._put_conn(conn) | |
871 | |
872 if not conn: | |
873 # Try again | |
874 log.warning( | |
875 "Retrying (%r) after connection broken by '%r': %s", retries, err, url | |
876 ) | |
877 return self.urlopen( | |
878 method, | |
879 url, | |
880 body, | |
881 headers, | |
882 retries, | |
883 redirect, | |
884 assert_same_host, | |
885 timeout=timeout, | |
886 pool_timeout=pool_timeout, | |
887 release_conn=release_conn, | |
888 chunked=chunked, | |
889 body_pos=body_pos, | |
890 preload_content=preload_content, | |
891 decode_content=decode_content, | |
892 **response_kw, | |
893 ) | |
894 | |
895 # Handle redirect? | |
896 redirect_location = redirect and response.get_redirect_location() | |
897 if redirect_location: | |
898 if response.status == 303: | |
899 # Change the method according to RFC 9110, Section 15.4.4. | |
900 method = "GET" | |
901 # And lose the body not to transfer anything sensitive. | |
902 body = None | |
903 headers = HTTPHeaderDict(headers)._prepare_for_method_change() | |
904 | |
905 try: | |
906 retries = retries.increment(method, url, response=response, _pool=self) | |
907 except MaxRetryError: | |
908 if retries.raise_on_redirect: | |
909 response.drain_conn() | |
910 raise | |
911 return response | |
912 | |
913 response.drain_conn() | |
914 retries.sleep_for_retry(response) | |
915 log.debug("Redirecting %s -> %s", url, redirect_location) | |
916 return self.urlopen( | |
917 method, | |
918 redirect_location, | |
919 body, | |
920 headers, | |
921 retries=retries, | |
922 redirect=redirect, | |
923 assert_same_host=assert_same_host, | |
924 timeout=timeout, | |
925 pool_timeout=pool_timeout, | |
926 release_conn=release_conn, | |
927 chunked=chunked, | |
928 body_pos=body_pos, | |
929 preload_content=preload_content, | |
930 decode_content=decode_content, | |
931 **response_kw, | |
932 ) | |
933 | |
934 # Check if we should retry the HTTP response. | |
935 has_retry_after = bool(response.headers.get("Retry-After")) | |
936 if retries.is_retry(method, response.status, has_retry_after): | |
937 try: | |
938 retries = retries.increment(method, url, response=response, _pool=self) | |
939 except MaxRetryError: | |
940 if retries.raise_on_status: | |
941 response.drain_conn() | |
942 raise | |
943 return response | |
944 | |
945 response.drain_conn() | |
946 retries.sleep(response) | |
947 log.debug("Retry: %s", url) | |
948 return self.urlopen( | |
949 method, | |
950 url, | |
951 body, | |
952 headers, | |
953 retries=retries, | |
954 redirect=redirect, | |
955 assert_same_host=assert_same_host, | |
956 timeout=timeout, | |
957 pool_timeout=pool_timeout, | |
958 release_conn=release_conn, | |
959 chunked=chunked, | |
960 body_pos=body_pos, | |
961 preload_content=preload_content, | |
962 decode_content=decode_content, | |
963 **response_kw, | |
964 ) | |
965 | |
966 return response | |
967 | |
968 | |
969 class HTTPSConnectionPool(HTTPConnectionPool): | |
970 """ | |
971 Same as :class:`.HTTPConnectionPool`, but HTTPS. | |
972 | |
973 :class:`.HTTPSConnection` uses one of ``assert_fingerprint``, | |
974 ``assert_hostname`` and ``host`` in this order to verify connections. | |
975 If ``assert_hostname`` is False, no verification is done. | |
976 | |
977 The ``key_file``, ``cert_file``, ``cert_reqs``, ``ca_certs``, | |
978 ``ca_cert_dir``, ``ssl_version``, ``key_password`` are only used if :mod:`ssl` | |
979 is available and are fed into :meth:`urllib3.util.ssl_wrap_socket` to upgrade | |
980 the connection socket into an SSL socket. | |
981 """ | |
982 | |
983 scheme = "https" | |
984 ConnectionCls: type[BaseHTTPSConnection] = HTTPSConnection | |
985 | |
986 def __init__( | |
987 self, | |
988 host: str, | |
989 port: int | None = None, | |
990 timeout: _TYPE_TIMEOUT | None = _DEFAULT_TIMEOUT, | |
991 maxsize: int = 1, | |
992 block: bool = False, | |
993 headers: typing.Mapping[str, str] | None = None, | |
994 retries: Retry | bool | int | None = None, | |
995 _proxy: Url | None = None, | |
996 _proxy_headers: typing.Mapping[str, str] | None = None, | |
997 key_file: str | None = None, | |
998 cert_file: str | None = None, | |
999 cert_reqs: int | str | None = None, | |
1000 key_password: str | None = None, | |
1001 ca_certs: str | None = None, | |
1002 ssl_version: int | str | None = None, | |
1003 ssl_minimum_version: ssl.TLSVersion | None = None, | |
1004 ssl_maximum_version: ssl.TLSVersion | None = None, | |
1005 assert_hostname: str | Literal[False] | None = None, | |
1006 assert_fingerprint: str | None = None, | |
1007 ca_cert_dir: str | None = None, | |
1008 **conn_kw: typing.Any, | |
1009 ) -> None: | |
1010 super().__init__( | |
1011 host, | |
1012 port, | |
1013 timeout, | |
1014 maxsize, | |
1015 block, | |
1016 headers, | |
1017 retries, | |
1018 _proxy, | |
1019 _proxy_headers, | |
1020 **conn_kw, | |
1021 ) | |
1022 | |
1023 self.key_file = key_file | |
1024 self.cert_file = cert_file | |
1025 self.cert_reqs = cert_reqs | |
1026 self.key_password = key_password | |
1027 self.ca_certs = ca_certs | |
1028 self.ca_cert_dir = ca_cert_dir | |
1029 self.ssl_version = ssl_version | |
1030 self.ssl_minimum_version = ssl_minimum_version | |
1031 self.ssl_maximum_version = ssl_maximum_version | |
1032 self.assert_hostname = assert_hostname | |
1033 self.assert_fingerprint = assert_fingerprint | |
1034 | |
1035 def _prepare_proxy(self, conn: HTTPSConnection) -> None: # type: ignore[override] | |
1036 """Establishes a tunnel connection through HTTP CONNECT.""" | |
1037 if self.proxy and self.proxy.scheme == "https": | |
1038 tunnel_scheme = "https" | |
1039 else: | |
1040 tunnel_scheme = "http" | |
1041 | |
1042 conn.set_tunnel( | |
1043 scheme=tunnel_scheme, | |
1044 host=self._tunnel_host, | |
1045 port=self.port, | |
1046 headers=self.proxy_headers, | |
1047 ) | |
1048 conn.connect() | |
1049 | |
1050 def _new_conn(self) -> BaseHTTPSConnection: | |
1051 """ | |
1052 Return a fresh :class:`urllib3.connection.HTTPConnection`. | |
1053 """ | |
1054 self.num_connections += 1 | |
1055 log.debug( | |
1056 "Starting new HTTPS connection (%d): %s:%s", | |
1057 self.num_connections, | |
1058 self.host, | |
1059 self.port or "443", | |
1060 ) | |
1061 | |
1062 if not self.ConnectionCls or self.ConnectionCls is DummyConnection: # type: ignore[comparison-overlap] | |
1063 raise ImportError( | |
1064 "Can't connect to HTTPS URL because the SSL module is not available." | |
1065 ) | |
1066 | |
1067 actual_host: str = self.host | |
1068 actual_port = self.port | |
1069 if self.proxy is not None and self.proxy.host is not None: | |
1070 actual_host = self.proxy.host | |
1071 actual_port = self.proxy.port | |
1072 | |
1073 return self.ConnectionCls( | |
1074 host=actual_host, | |
1075 port=actual_port, | |
1076 timeout=self.timeout.connect_timeout, | |
1077 cert_file=self.cert_file, | |
1078 key_file=self.key_file, | |
1079 key_password=self.key_password, | |
1080 cert_reqs=self.cert_reqs, | |
1081 ca_certs=self.ca_certs, | |
1082 ca_cert_dir=self.ca_cert_dir, | |
1083 assert_hostname=self.assert_hostname, | |
1084 assert_fingerprint=self.assert_fingerprint, | |
1085 ssl_version=self.ssl_version, | |
1086 ssl_minimum_version=self.ssl_minimum_version, | |
1087 ssl_maximum_version=self.ssl_maximum_version, | |
1088 **self.conn_kw, | |
1089 ) | |
1090 | |
1091 def _validate_conn(self, conn: BaseHTTPConnection) -> None: | |
1092 """ | |
1093 Called right before a request is made, after the socket is created. | |
1094 """ | |
1095 super()._validate_conn(conn) | |
1096 | |
1097 # Force connect early to allow us to validate the connection. | |
1098 if conn.is_closed: | |
1099 conn.connect() | |
1100 | |
1101 # TODO revise this, see https://github.com/urllib3/urllib3/issues/2791 | |
1102 if not conn.is_verified and not conn.proxy_is_verified: | |
1103 warnings.warn( | |
1104 ( | |
1105 f"Unverified HTTPS request is being made to host '{conn.host}'. " | |
1106 "Adding certificate verification is strongly advised. See: " | |
1107 "https://urllib3.readthedocs.io/en/latest/advanced-usage.html" | |
1108 "#tls-warnings" | |
1109 ), | |
1110 InsecureRequestWarning, | |
1111 ) | |
1112 | |
1113 | |
1114 def connection_from_url(url: str, **kw: typing.Any) -> HTTPConnectionPool: | |
1115 """ | |
1116 Given a url, return an :class:`.ConnectionPool` instance of its host. | |
1117 | |
1118 This is a shortcut for not having to parse out the scheme, host, and port | |
1119 of the url before creating an :class:`.ConnectionPool` instance. | |
1120 | |
1121 :param url: | |
1122 Absolute URL string that must include the scheme. Port is optional. | |
1123 | |
1124 :param \\**kw: | |
1125 Passes additional parameters to the constructor of the appropriate | |
1126 :class:`.ConnectionPool`. Useful for specifying things like | |
1127 timeout, maxsize, headers, etc. | |
1128 | |
1129 Example:: | |
1130 | |
1131 >>> conn = connection_from_url('http://google.com/') | |
1132 >>> r = conn.request('GET', '/') | |
1133 """ | |
1134 scheme, _, host, port, *_ = parse_url(url) | |
1135 scheme = scheme or "http" | |
1136 port = port or port_by_scheme.get(scheme, 80) | |
1137 if scheme == "https": | |
1138 return HTTPSConnectionPool(host, port=port, **kw) # type: ignore[arg-type] | |
1139 else: | |
1140 return HTTPConnectionPool(host, port=port, **kw) # type: ignore[arg-type] | |
1141 | |
1142 | |
1143 @typing.overload | |
1144 def _normalize_host(host: None, scheme: str | None) -> None: | |
1145 ... | |
1146 | |
1147 | |
1148 @typing.overload | |
1149 def _normalize_host(host: str, scheme: str | None) -> str: | |
1150 ... | |
1151 | |
1152 | |
1153 def _normalize_host(host: str | None, scheme: str | None) -> str | None: | |
1154 """ | |
1155 Normalize hosts for comparisons and use with sockets. | |
1156 """ | |
1157 | |
1158 host = normalize_host(host, scheme) | |
1159 | |
1160 # httplib doesn't like it when we include brackets in IPv6 addresses | |
1161 # Specifically, if we include brackets but also pass the port then | |
1162 # httplib crazily doubles up the square brackets on the Host header. | |
1163 # Instead, we need to make sure we never pass ``None`` as the port. | |
1164 # However, for backward compatibility reasons we can't actually | |
1165 # *assert* that. See http://bugs.python.org/issue28539 | |
1166 if host and host.startswith("[") and host.endswith("]"): | |
1167 host = host[1:-1] | |
1168 return host | |
1169 | |
1170 | |
1171 def _url_from_pool( | |
1172 pool: HTTPConnectionPool | HTTPSConnectionPool, path: str | None = None | |
1173 ) -> str: | |
1174 """Returns the URL from a given connection pool. This is mainly used for testing and logging.""" | |
1175 return Url(scheme=pool.scheme, host=pool.host, port=pool.port, path=path).url | |
1176 | |
1177 | |
1178 def _close_pool_connections(pool: queue.LifoQueue[typing.Any]) -> None: | |
1179 """Drains a queue of connections and closes each one.""" | |
1180 try: | |
1181 while True: | |
1182 conn = pool.get(block=False) | |
1183 if conn: | |
1184 conn.close() | |
1185 except queue.Empty: | |
1186 pass # Done. |