comparison urllib3/poolmanager.py @ 7:5eb2d5e3bf22

planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
author jpayne
date Sun, 05 May 2024 23:32:17 -0400
parents
children
comparison
equal deleted inserted replaced
6:b2745907b1eb 7:5eb2d5e3bf22
1 from __future__ import annotations
2
3 import functools
4 import logging
5 import typing
6 import warnings
7 from types import TracebackType
8 from urllib.parse import urljoin
9
10 from ._collections import HTTPHeaderDict, RecentlyUsedContainer
11 from ._request_methods import RequestMethods
12 from .connection import ProxyConfig
13 from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
14 from .exceptions import (
15 LocationValueError,
16 MaxRetryError,
17 ProxySchemeUnknown,
18 URLSchemeUnknown,
19 )
20 from .response import BaseHTTPResponse
21 from .util.connection import _TYPE_SOCKET_OPTIONS
22 from .util.proxy import connection_requires_http_tunnel
23 from .util.retry import Retry
24 from .util.timeout import Timeout
25 from .util.url import Url, parse_url
26
27 if typing.TYPE_CHECKING:
28 import ssl
29 from typing import Literal
30
31 __all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
32
33
34 log = logging.getLogger(__name__)
35
36 SSL_KEYWORDS = (
37 "key_file",
38 "cert_file",
39 "cert_reqs",
40 "ca_certs",
41 "ca_cert_data",
42 "ssl_version",
43 "ssl_minimum_version",
44 "ssl_maximum_version",
45 "ca_cert_dir",
46 "ssl_context",
47 "key_password",
48 "server_hostname",
49 )
50 # Default value for `blocksize` - a new parameter introduced to
51 # http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7
52 _DEFAULT_BLOCKSIZE = 16384
53
54 _SelfT = typing.TypeVar("_SelfT")
55
56
57 class PoolKey(typing.NamedTuple):
58 """
59 All known keyword arguments that could be provided to the pool manager, its
60 pools, or the underlying connections.
61
62 All custom key schemes should include the fields in this key at a minimum.
63 """
64
65 key_scheme: str
66 key_host: str
67 key_port: int | None
68 key_timeout: Timeout | float | int | None
69 key_retries: Retry | bool | int | None
70 key_block: bool | None
71 key_source_address: tuple[str, int] | None
72 key_key_file: str | None
73 key_key_password: str | None
74 key_cert_file: str | None
75 key_cert_reqs: str | None
76 key_ca_certs: str | None
77 key_ca_cert_data: str | bytes | None
78 key_ssl_version: int | str | None
79 key_ssl_minimum_version: ssl.TLSVersion | None
80 key_ssl_maximum_version: ssl.TLSVersion | None
81 key_ca_cert_dir: str | None
82 key_ssl_context: ssl.SSLContext | None
83 key_maxsize: int | None
84 key_headers: frozenset[tuple[str, str]] | None
85 key__proxy: Url | None
86 key__proxy_headers: frozenset[tuple[str, str]] | None
87 key__proxy_config: ProxyConfig | None
88 key_socket_options: _TYPE_SOCKET_OPTIONS | None
89 key__socks_options: frozenset[tuple[str, str]] | None
90 key_assert_hostname: bool | str | None
91 key_assert_fingerprint: str | None
92 key_server_hostname: str | None
93 key_blocksize: int | None
94
95
96 def _default_key_normalizer(
97 key_class: type[PoolKey], request_context: dict[str, typing.Any]
98 ) -> PoolKey:
99 """
100 Create a pool key out of a request context dictionary.
101
102 According to RFC 3986, both the scheme and host are case-insensitive.
103 Therefore, this function normalizes both before constructing the pool
104 key for an HTTPS request. If you wish to change this behaviour, provide
105 alternate callables to ``key_fn_by_scheme``.
106
107 :param key_class:
108 The class to use when constructing the key. This should be a namedtuple
109 with the ``scheme`` and ``host`` keys at a minimum.
110 :type key_class: namedtuple
111 :param request_context:
112 A dictionary-like object that contain the context for a request.
113 :type request_context: dict
114
115 :return: A namedtuple that can be used as a connection pool key.
116 :rtype: PoolKey
117 """
118 # Since we mutate the dictionary, make a copy first
119 context = request_context.copy()
120 context["scheme"] = context["scheme"].lower()
121 context["host"] = context["host"].lower()
122
123 # These are both dictionaries and need to be transformed into frozensets
124 for key in ("headers", "_proxy_headers", "_socks_options"):
125 if key in context and context[key] is not None:
126 context[key] = frozenset(context[key].items())
127
128 # The socket_options key may be a list and needs to be transformed into a
129 # tuple.
130 socket_opts = context.get("socket_options")
131 if socket_opts is not None:
132 context["socket_options"] = tuple(socket_opts)
133
134 # Map the kwargs to the names in the namedtuple - this is necessary since
135 # namedtuples can't have fields starting with '_'.
136 for key in list(context.keys()):
137 context["key_" + key] = context.pop(key)
138
139 # Default to ``None`` for keys missing from the context
140 for field in key_class._fields:
141 if field not in context:
142 context[field] = None
143
144 # Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context
145 if context.get("key_blocksize") is None:
146 context["key_blocksize"] = _DEFAULT_BLOCKSIZE
147
148 return key_class(**context)
149
150
151 #: A dictionary that maps a scheme to a callable that creates a pool key.
152 #: This can be used to alter the way pool keys are constructed, if desired.
153 #: Each PoolManager makes a copy of this dictionary so they can be configured
154 #: globally here, or individually on the instance.
155 key_fn_by_scheme = {
156 "http": functools.partial(_default_key_normalizer, PoolKey),
157 "https": functools.partial(_default_key_normalizer, PoolKey),
158 }
159
160 pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
161
162
163 class PoolManager(RequestMethods):
164 """
165 Allows for arbitrary requests while transparently keeping track of
166 necessary connection pools for you.
167
168 :param num_pools:
169 Number of connection pools to cache before discarding the least
170 recently used pool.
171
172 :param headers:
173 Headers to include with all requests, unless other headers are given
174 explicitly.
175
176 :param \\**connection_pool_kw:
177 Additional parameters are used to create fresh
178 :class:`urllib3.connectionpool.ConnectionPool` instances.
179
180 Example:
181
182 .. code-block:: python
183
184 import urllib3
185
186 http = urllib3.PoolManager(num_pools=2)
187
188 resp1 = http.request("GET", "https://google.com/")
189 resp2 = http.request("GET", "https://google.com/mail")
190 resp3 = http.request("GET", "https://yahoo.com/")
191
192 print(len(http.pools))
193 # 2
194
195 """
196
197 proxy: Url | None = None
198 proxy_config: ProxyConfig | None = None
199
200 def __init__(
201 self,
202 num_pools: int = 10,
203 headers: typing.Mapping[str, str] | None = None,
204 **connection_pool_kw: typing.Any,
205 ) -> None:
206 super().__init__(headers)
207 self.connection_pool_kw = connection_pool_kw
208
209 self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool]
210 self.pools = RecentlyUsedContainer(num_pools)
211
212 # Locally set the pool classes and keys so other PoolManagers can
213 # override them.
214 self.pool_classes_by_scheme = pool_classes_by_scheme
215 self.key_fn_by_scheme = key_fn_by_scheme.copy()
216
217 def __enter__(self: _SelfT) -> _SelfT:
218 return self
219
220 def __exit__(
221 self,
222 exc_type: type[BaseException] | None,
223 exc_val: BaseException | None,
224 exc_tb: TracebackType | None,
225 ) -> Literal[False]:
226 self.clear()
227 # Return False to re-raise any potential exceptions
228 return False
229
230 def _new_pool(
231 self,
232 scheme: str,
233 host: str,
234 port: int,
235 request_context: dict[str, typing.Any] | None = None,
236 ) -> HTTPConnectionPool:
237 """
238 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
239 any additional pool keyword arguments.
240
241 If ``request_context`` is provided, it is provided as keyword arguments
242 to the pool class used. This method is used to actually create the
243 connection pools handed out by :meth:`connection_from_url` and
244 companion methods. It is intended to be overridden for customization.
245 """
246 pool_cls: type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme]
247 if request_context is None:
248 request_context = self.connection_pool_kw.copy()
249
250 # Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly
251 # set to 'None' in the request_context.
252 if request_context.get("blocksize") is None:
253 request_context["blocksize"] = _DEFAULT_BLOCKSIZE
254
255 # Although the context has everything necessary to create the pool,
256 # this function has historically only used the scheme, host, and port
257 # in the positional args. When an API change is acceptable these can
258 # be removed.
259 for key in ("scheme", "host", "port"):
260 request_context.pop(key, None)
261
262 if scheme == "http":
263 for kw in SSL_KEYWORDS:
264 request_context.pop(kw, None)
265
266 return pool_cls(host, port, **request_context)
267
268 def clear(self) -> None:
269 """
270 Empty our store of pools and direct them all to close.
271
272 This will not affect in-flight connections, but they will not be
273 re-used after completion.
274 """
275 self.pools.clear()
276
277 def connection_from_host(
278 self,
279 host: str | None,
280 port: int | None = None,
281 scheme: str | None = "http",
282 pool_kwargs: dict[str, typing.Any] | None = None,
283 ) -> HTTPConnectionPool:
284 """
285 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
286
287 If ``port`` isn't given, it will be derived from the ``scheme`` using
288 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
289 provided, it is merged with the instance's ``connection_pool_kw``
290 variable and used to create the new connection pool, if one is
291 needed.
292 """
293
294 if not host:
295 raise LocationValueError("No host specified.")
296
297 request_context = self._merge_pool_kwargs(pool_kwargs)
298 request_context["scheme"] = scheme or "http"
299 if not port:
300 port = port_by_scheme.get(request_context["scheme"].lower(), 80)
301 request_context["port"] = port
302 request_context["host"] = host
303
304 return self.connection_from_context(request_context)
305
306 def connection_from_context(
307 self, request_context: dict[str, typing.Any]
308 ) -> HTTPConnectionPool:
309 """
310 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
311
312 ``request_context`` must at least contain the ``scheme`` key and its
313 value must be a key in ``key_fn_by_scheme`` instance variable.
314 """
315 if "strict" in request_context:
316 warnings.warn(
317 "The 'strict' parameter is no longer needed on Python 3+. "
318 "This will raise an error in urllib3 v2.1.0.",
319 DeprecationWarning,
320 )
321 request_context.pop("strict")
322
323 scheme = request_context["scheme"].lower()
324 pool_key_constructor = self.key_fn_by_scheme.get(scheme)
325 if not pool_key_constructor:
326 raise URLSchemeUnknown(scheme)
327 pool_key = pool_key_constructor(request_context)
328
329 return self.connection_from_pool_key(pool_key, request_context=request_context)
330
331 def connection_from_pool_key(
332 self, pool_key: PoolKey, request_context: dict[str, typing.Any]
333 ) -> HTTPConnectionPool:
334 """
335 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
336
337 ``pool_key`` should be a namedtuple that only contains immutable
338 objects. At a minimum it must have the ``scheme``, ``host``, and
339 ``port`` fields.
340 """
341 with self.pools.lock:
342 # If the scheme, host, or port doesn't match existing open
343 # connections, open a new ConnectionPool.
344 pool = self.pools.get(pool_key)
345 if pool:
346 return pool
347
348 # Make a fresh ConnectionPool of the desired type
349 scheme = request_context["scheme"]
350 host = request_context["host"]
351 port = request_context["port"]
352 pool = self._new_pool(scheme, host, port, request_context=request_context)
353 self.pools[pool_key] = pool
354
355 return pool
356
357 def connection_from_url(
358 self, url: str, pool_kwargs: dict[str, typing.Any] | None = None
359 ) -> HTTPConnectionPool:
360 """
361 Similar to :func:`urllib3.connectionpool.connection_from_url`.
362
363 If ``pool_kwargs`` is not provided and a new pool needs to be
364 constructed, ``self.connection_pool_kw`` is used to initialize
365 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
366 is provided, it is used instead. Note that if a new pool does not
367 need to be created for the request, the provided ``pool_kwargs`` are
368 not used.
369 """
370 u = parse_url(url)
371 return self.connection_from_host(
372 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
373 )
374
375 def _merge_pool_kwargs(
376 self, override: dict[str, typing.Any] | None
377 ) -> dict[str, typing.Any]:
378 """
379 Merge a dictionary of override values for self.connection_pool_kw.
380
381 This does not modify self.connection_pool_kw and returns a new dict.
382 Any keys in the override dictionary with a value of ``None`` are
383 removed from the merged dictionary.
384 """
385 base_pool_kwargs = self.connection_pool_kw.copy()
386 if override:
387 for key, value in override.items():
388 if value is None:
389 try:
390 del base_pool_kwargs[key]
391 except KeyError:
392 pass
393 else:
394 base_pool_kwargs[key] = value
395 return base_pool_kwargs
396
397 def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool:
398 """
399 Indicates if the proxy requires the complete destination URL in the
400 request. Normally this is only needed when not using an HTTP CONNECT
401 tunnel.
402 """
403 if self.proxy is None:
404 return False
405
406 return not connection_requires_http_tunnel(
407 self.proxy, self.proxy_config, parsed_url.scheme
408 )
409
410 def urlopen( # type: ignore[override]
411 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
412 ) -> BaseHTTPResponse:
413 """
414 Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
415 with custom cross-host redirect logic and only sends the request-uri
416 portion of the ``url``.
417
418 The given ``url`` parameter must be absolute, such that an appropriate
419 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
420 """
421 u = parse_url(url)
422
423 if u.scheme is None:
424 warnings.warn(
425 "URLs without a scheme (ie 'https://') are deprecated and will raise an error "
426 "in a future version of urllib3. To avoid this DeprecationWarning ensure all URLs "
427 "start with 'https://' or 'http://'. Read more in this issue: "
428 "https://github.com/urllib3/urllib3/issues/2920",
429 category=DeprecationWarning,
430 stacklevel=2,
431 )
432
433 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
434
435 kw["assert_same_host"] = False
436 kw["redirect"] = False
437
438 if "headers" not in kw:
439 kw["headers"] = self.headers
440
441 if self._proxy_requires_url_absolute_form(u):
442 response = conn.urlopen(method, url, **kw)
443 else:
444 response = conn.urlopen(method, u.request_uri, **kw)
445
446 redirect_location = redirect and response.get_redirect_location()
447 if not redirect_location:
448 return response
449
450 # Support relative URLs for redirecting.
451 redirect_location = urljoin(url, redirect_location)
452
453 if response.status == 303:
454 # Change the method according to RFC 9110, Section 15.4.4.
455 method = "GET"
456 # And lose the body not to transfer anything sensitive.
457 kw["body"] = None
458 kw["headers"] = HTTPHeaderDict(kw["headers"])._prepare_for_method_change()
459
460 retries = kw.get("retries")
461 if not isinstance(retries, Retry):
462 retries = Retry.from_int(retries, redirect=redirect)
463
464 # Strip headers marked as unsafe to forward to the redirected location.
465 # Check remove_headers_on_redirect to avoid a potential network call within
466 # conn.is_same_host() which may use socket.gethostbyname() in the future.
467 if retries.remove_headers_on_redirect and not conn.is_same_host(
468 redirect_location
469 ):
470 new_headers = kw["headers"].copy()
471 for header in kw["headers"]:
472 if header.lower() in retries.remove_headers_on_redirect:
473 new_headers.pop(header, None)
474 kw["headers"] = new_headers
475
476 try:
477 retries = retries.increment(method, url, response=response, _pool=conn)
478 except MaxRetryError:
479 if retries.raise_on_redirect:
480 response.drain_conn()
481 raise
482 return response
483
484 kw["retries"] = retries
485 kw["redirect"] = redirect
486
487 log.info("Redirecting %s -> %s", url, redirect_location)
488
489 response.drain_conn()
490 return self.urlopen(method, redirect_location, **kw)
491
492
493 class ProxyManager(PoolManager):
494 """
495 Behaves just like :class:`PoolManager`, but sends all requests through
496 the defined proxy, using the CONNECT method for HTTPS URLs.
497
498 :param proxy_url:
499 The URL of the proxy to be used.
500
501 :param proxy_headers:
502 A dictionary containing headers that will be sent to the proxy. In case
503 of HTTP they are being sent with each request, while in the
504 HTTPS/CONNECT case they are sent only once. Could be used for proxy
505 authentication.
506
507 :param proxy_ssl_context:
508 The proxy SSL context is used to establish the TLS connection to the
509 proxy when using HTTPS proxies.
510
511 :param use_forwarding_for_https:
512 (Defaults to False) If set to True will forward requests to the HTTPS
513 proxy to be made on behalf of the client instead of creating a TLS
514 tunnel via the CONNECT method. **Enabling this flag means that request
515 and response headers and content will be visible from the HTTPS proxy**
516 whereas tunneling keeps request and response headers and content
517 private. IP address, target hostname, SNI, and port are always visible
518 to an HTTPS proxy even when this flag is disabled.
519
520 :param proxy_assert_hostname:
521 The hostname of the certificate to verify against.
522
523 :param proxy_assert_fingerprint:
524 The fingerprint of the certificate to verify against.
525
526 Example:
527
528 .. code-block:: python
529
530 import urllib3
531
532 proxy = urllib3.ProxyManager("https://localhost:3128/")
533
534 resp1 = proxy.request("GET", "https://google.com/")
535 resp2 = proxy.request("GET", "https://httpbin.org/")
536
537 print(len(proxy.pools))
538 # 1
539
540 resp3 = proxy.request("GET", "https://httpbin.org/")
541 resp4 = proxy.request("GET", "https://twitter.com/")
542
543 print(len(proxy.pools))
544 # 3
545
546 """
547
548 def __init__(
549 self,
550 proxy_url: str,
551 num_pools: int = 10,
552 headers: typing.Mapping[str, str] | None = None,
553 proxy_headers: typing.Mapping[str, str] | None = None,
554 proxy_ssl_context: ssl.SSLContext | None = None,
555 use_forwarding_for_https: bool = False,
556 proxy_assert_hostname: None | str | Literal[False] = None,
557 proxy_assert_fingerprint: str | None = None,
558 **connection_pool_kw: typing.Any,
559 ) -> None:
560 if isinstance(proxy_url, HTTPConnectionPool):
561 str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}"
562 else:
563 str_proxy_url = proxy_url
564 proxy = parse_url(str_proxy_url)
565
566 if proxy.scheme not in ("http", "https"):
567 raise ProxySchemeUnknown(proxy.scheme)
568
569 if not proxy.port:
570 port = port_by_scheme.get(proxy.scheme, 80)
571 proxy = proxy._replace(port=port)
572
573 self.proxy = proxy
574 self.proxy_headers = proxy_headers or {}
575 self.proxy_ssl_context = proxy_ssl_context
576 self.proxy_config = ProxyConfig(
577 proxy_ssl_context,
578 use_forwarding_for_https,
579 proxy_assert_hostname,
580 proxy_assert_fingerprint,
581 )
582
583 connection_pool_kw["_proxy"] = self.proxy
584 connection_pool_kw["_proxy_headers"] = self.proxy_headers
585 connection_pool_kw["_proxy_config"] = self.proxy_config
586
587 super().__init__(num_pools, headers, **connection_pool_kw)
588
589 def connection_from_host(
590 self,
591 host: str | None,
592 port: int | None = None,
593 scheme: str | None = "http",
594 pool_kwargs: dict[str, typing.Any] | None = None,
595 ) -> HTTPConnectionPool:
596 if scheme == "https":
597 return super().connection_from_host(
598 host, port, scheme, pool_kwargs=pool_kwargs
599 )
600
601 return super().connection_from_host(
602 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr]
603 )
604
605 def _set_proxy_headers(
606 self, url: str, headers: typing.Mapping[str, str] | None = None
607 ) -> typing.Mapping[str, str]:
608 """
609 Sets headers needed by proxies: specifically, the Accept and Host
610 headers. Only sets headers not provided by the user.
611 """
612 headers_ = {"Accept": "*/*"}
613
614 netloc = parse_url(url).netloc
615 if netloc:
616 headers_["Host"] = netloc
617
618 if headers:
619 headers_.update(headers)
620 return headers_
621
622 def urlopen( # type: ignore[override]
623 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
624 ) -> BaseHTTPResponse:
625 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
626 u = parse_url(url)
627 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
628 # For connections using HTTP CONNECT, httplib sets the necessary
629 # headers on the CONNECT to the proxy. If we're not using CONNECT,
630 # we'll definitely need to set 'Host' at the very least.
631 headers = kw.get("headers", self.headers)
632 kw["headers"] = self._set_proxy_headers(url, headers)
633
634 return super().urlopen(method, url, redirect=redirect, **kw)
635
636
637 def proxy_from_url(url: str, **kw: typing.Any) -> ProxyManager:
638 return ProxyManager(proxy_url=url, **kw)