comparison urllib3/util/retry.py @ 7:5eb2d5e3bf22

planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
author jpayne
date Sun, 05 May 2024 23:32:17 -0400
parents
children
comparison
equal deleted inserted replaced
6:b2745907b1eb 7:5eb2d5e3bf22
1 from __future__ import annotations
2
3 import email
4 import logging
5 import random
6 import re
7 import time
8 import typing
9 from itertools import takewhile
10 from types import TracebackType
11
12 from ..exceptions import (
13 ConnectTimeoutError,
14 InvalidHeader,
15 MaxRetryError,
16 ProtocolError,
17 ProxyError,
18 ReadTimeoutError,
19 ResponseError,
20 )
21 from .util import reraise
22
23 if typing.TYPE_CHECKING:
24 from ..connectionpool import ConnectionPool
25 from ..response import BaseHTTPResponse
26
27 log = logging.getLogger(__name__)
28
29
30 # Data structure for representing the metadata of requests that result in a retry.
31 class RequestHistory(typing.NamedTuple):
32 method: str | None
33 url: str | None
34 error: Exception | None
35 status: int | None
36 redirect_location: str | None
37
38
39 class Retry:
40 """Retry configuration.
41
42 Each retry attempt will create a new Retry object with updated values, so
43 they can be safely reused.
44
45 Retries can be defined as a default for a pool:
46
47 .. code-block:: python
48
49 retries = Retry(connect=5, read=2, redirect=5)
50 http = PoolManager(retries=retries)
51 response = http.request("GET", "https://example.com/")
52
53 Or per-request (which overrides the default for the pool):
54
55 .. code-block:: python
56
57 response = http.request("GET", "https://example.com/", retries=Retry(10))
58
59 Retries can be disabled by passing ``False``:
60
61 .. code-block:: python
62
63 response = http.request("GET", "https://example.com/", retries=False)
64
65 Errors will be wrapped in :class:`~urllib3.exceptions.MaxRetryError` unless
66 retries are disabled, in which case the causing exception will be raised.
67
68 :param int total:
69 Total number of retries to allow. Takes precedence over other counts.
70
71 Set to ``None`` to remove this constraint and fall back on other
72 counts.
73
74 Set to ``0`` to fail on the first retry.
75
76 Set to ``False`` to disable and imply ``raise_on_redirect=False``.
77
78 :param int connect:
79 How many connection-related errors to retry on.
80
81 These are errors raised before the request is sent to the remote server,
82 which we assume has not triggered the server to process the request.
83
84 Set to ``0`` to fail on the first retry of this type.
85
86 :param int read:
87 How many times to retry on read errors.
88
89 These errors are raised after the request was sent to the server, so the
90 request may have side-effects.
91
92 Set to ``0`` to fail on the first retry of this type.
93
94 :param int redirect:
95 How many redirects to perform. Limit this to avoid infinite redirect
96 loops.
97
98 A redirect is a HTTP response with a status code 301, 302, 303, 307 or
99 308.
100
101 Set to ``0`` to fail on the first retry of this type.
102
103 Set to ``False`` to disable and imply ``raise_on_redirect=False``.
104
105 :param int status:
106 How many times to retry on bad status codes.
107
108 These are retries made on responses, where status code matches
109 ``status_forcelist``.
110
111 Set to ``0`` to fail on the first retry of this type.
112
113 :param int other:
114 How many times to retry on other errors.
115
116 Other errors are errors that are not connect, read, redirect or status errors.
117 These errors might be raised after the request was sent to the server, so the
118 request might have side-effects.
119
120 Set to ``0`` to fail on the first retry of this type.
121
122 If ``total`` is not set, it's a good idea to set this to 0 to account
123 for unexpected edge cases and avoid infinite retry loops.
124
125 :param Collection allowed_methods:
126 Set of uppercased HTTP method verbs that we should retry on.
127
128 By default, we only retry on methods which are considered to be
129 idempotent (multiple requests with the same parameters end with the
130 same state). See :attr:`Retry.DEFAULT_ALLOWED_METHODS`.
131
132 Set to a ``None`` value to retry on any verb.
133
134 :param Collection status_forcelist:
135 A set of integer HTTP status codes that we should force a retry on.
136 A retry is initiated if the request method is in ``allowed_methods``
137 and the response status code is in ``status_forcelist``.
138
139 By default, this is disabled with ``None``.
140
141 :param float backoff_factor:
142 A backoff factor to apply between attempts after the second try
143 (most errors are resolved immediately by a second try without a
144 delay). urllib3 will sleep for::
145
146 {backoff factor} * (2 ** ({number of previous retries}))
147
148 seconds. If `backoff_jitter` is non-zero, this sleep is extended by::
149
150 random.uniform(0, {backoff jitter})
151
152 seconds. For example, if the backoff_factor is 0.1, then :func:`Retry.sleep` will
153 sleep for [0.0s, 0.2s, 0.4s, 0.8s, ...] between retries. No backoff will ever
154 be longer than `backoff_max`.
155
156 By default, backoff is disabled (factor set to 0).
157
158 :param bool raise_on_redirect: Whether, if the number of redirects is
159 exhausted, to raise a MaxRetryError, or to return a response with a
160 response code in the 3xx range.
161
162 :param bool raise_on_status: Similar meaning to ``raise_on_redirect``:
163 whether we should raise an exception, or return a response,
164 if status falls in ``status_forcelist`` range and retries have
165 been exhausted.
166
167 :param tuple history: The history of the request encountered during
168 each call to :meth:`~Retry.increment`. The list is in the order
169 the requests occurred. Each list item is of class :class:`RequestHistory`.
170
171 :param bool respect_retry_after_header:
172 Whether to respect Retry-After header on status codes defined as
173 :attr:`Retry.RETRY_AFTER_STATUS_CODES` or not.
174
175 :param Collection remove_headers_on_redirect:
176 Sequence of headers to remove from the request when a response
177 indicating a redirect is returned before firing off the redirected
178 request.
179 """
180
181 #: Default methods to be used for ``allowed_methods``
182 DEFAULT_ALLOWED_METHODS = frozenset(
183 ["HEAD", "GET", "PUT", "DELETE", "OPTIONS", "TRACE"]
184 )
185
186 #: Default status codes to be used for ``status_forcelist``
187 RETRY_AFTER_STATUS_CODES = frozenset([413, 429, 503])
188
189 #: Default headers to be used for ``remove_headers_on_redirect``
190 DEFAULT_REMOVE_HEADERS_ON_REDIRECT = frozenset(["Cookie", "Authorization"])
191
192 #: Default maximum backoff time.
193 DEFAULT_BACKOFF_MAX = 120
194
195 # Backward compatibility; assigned outside of the class.
196 DEFAULT: typing.ClassVar[Retry]
197
198 def __init__(
199 self,
200 total: bool | int | None = 10,
201 connect: int | None = None,
202 read: int | None = None,
203 redirect: bool | int | None = None,
204 status: int | None = None,
205 other: int | None = None,
206 allowed_methods: typing.Collection[str] | None = DEFAULT_ALLOWED_METHODS,
207 status_forcelist: typing.Collection[int] | None = None,
208 backoff_factor: float = 0,
209 backoff_max: float = DEFAULT_BACKOFF_MAX,
210 raise_on_redirect: bool = True,
211 raise_on_status: bool = True,
212 history: tuple[RequestHistory, ...] | None = None,
213 respect_retry_after_header: bool = True,
214 remove_headers_on_redirect: typing.Collection[
215 str
216 ] = DEFAULT_REMOVE_HEADERS_ON_REDIRECT,
217 backoff_jitter: float = 0.0,
218 ) -> None:
219 self.total = total
220 self.connect = connect
221 self.read = read
222 self.status = status
223 self.other = other
224
225 if redirect is False or total is False:
226 redirect = 0
227 raise_on_redirect = False
228
229 self.redirect = redirect
230 self.status_forcelist = status_forcelist or set()
231 self.allowed_methods = allowed_methods
232 self.backoff_factor = backoff_factor
233 self.backoff_max = backoff_max
234 self.raise_on_redirect = raise_on_redirect
235 self.raise_on_status = raise_on_status
236 self.history = history or ()
237 self.respect_retry_after_header = respect_retry_after_header
238 self.remove_headers_on_redirect = frozenset(
239 h.lower() for h in remove_headers_on_redirect
240 )
241 self.backoff_jitter = backoff_jitter
242
243 def new(self, **kw: typing.Any) -> Retry:
244 params = dict(
245 total=self.total,
246 connect=self.connect,
247 read=self.read,
248 redirect=self.redirect,
249 status=self.status,
250 other=self.other,
251 allowed_methods=self.allowed_methods,
252 status_forcelist=self.status_forcelist,
253 backoff_factor=self.backoff_factor,
254 backoff_max=self.backoff_max,
255 raise_on_redirect=self.raise_on_redirect,
256 raise_on_status=self.raise_on_status,
257 history=self.history,
258 remove_headers_on_redirect=self.remove_headers_on_redirect,
259 respect_retry_after_header=self.respect_retry_after_header,
260 backoff_jitter=self.backoff_jitter,
261 )
262
263 params.update(kw)
264 return type(self)(**params) # type: ignore[arg-type]
265
266 @classmethod
267 def from_int(
268 cls,
269 retries: Retry | bool | int | None,
270 redirect: bool | int | None = True,
271 default: Retry | bool | int | None = None,
272 ) -> Retry:
273 """Backwards-compatibility for the old retries format."""
274 if retries is None:
275 retries = default if default is not None else cls.DEFAULT
276
277 if isinstance(retries, Retry):
278 return retries
279
280 redirect = bool(redirect) and None
281 new_retries = cls(retries, redirect=redirect)
282 log.debug("Converted retries value: %r -> %r", retries, new_retries)
283 return new_retries
284
285 def get_backoff_time(self) -> float:
286 """Formula for computing the current backoff
287
288 :rtype: float
289 """
290 # We want to consider only the last consecutive errors sequence (Ignore redirects).
291 consecutive_errors_len = len(
292 list(
293 takewhile(lambda x: x.redirect_location is None, reversed(self.history))
294 )
295 )
296 if consecutive_errors_len <= 1:
297 return 0
298
299 backoff_value = self.backoff_factor * (2 ** (consecutive_errors_len - 1))
300 if self.backoff_jitter != 0.0:
301 backoff_value += random.random() * self.backoff_jitter
302 return float(max(0, min(self.backoff_max, backoff_value)))
303
304 def parse_retry_after(self, retry_after: str) -> float:
305 seconds: float
306 # Whitespace: https://tools.ietf.org/html/rfc7230#section-3.2.4
307 if re.match(r"^\s*[0-9]+\s*$", retry_after):
308 seconds = int(retry_after)
309 else:
310 retry_date_tuple = email.utils.parsedate_tz(retry_after)
311 if retry_date_tuple is None:
312 raise InvalidHeader(f"Invalid Retry-After header: {retry_after}")
313
314 retry_date = email.utils.mktime_tz(retry_date_tuple)
315 seconds = retry_date - time.time()
316
317 seconds = max(seconds, 0)
318
319 return seconds
320
321 def get_retry_after(self, response: BaseHTTPResponse) -> float | None:
322 """Get the value of Retry-After in seconds."""
323
324 retry_after = response.headers.get("Retry-After")
325
326 if retry_after is None:
327 return None
328
329 return self.parse_retry_after(retry_after)
330
331 def sleep_for_retry(self, response: BaseHTTPResponse) -> bool:
332 retry_after = self.get_retry_after(response)
333 if retry_after:
334 time.sleep(retry_after)
335 return True
336
337 return False
338
339 def _sleep_backoff(self) -> None:
340 backoff = self.get_backoff_time()
341 if backoff <= 0:
342 return
343 time.sleep(backoff)
344
345 def sleep(self, response: BaseHTTPResponse | None = None) -> None:
346 """Sleep between retry attempts.
347
348 This method will respect a server's ``Retry-After`` response header
349 and sleep the duration of the time requested. If that is not present, it
350 will use an exponential backoff. By default, the backoff factor is 0 and
351 this method will return immediately.
352 """
353
354 if self.respect_retry_after_header and response:
355 slept = self.sleep_for_retry(response)
356 if slept:
357 return
358
359 self._sleep_backoff()
360
361 def _is_connection_error(self, err: Exception) -> bool:
362 """Errors when we're fairly sure that the server did not receive the
363 request, so it should be safe to retry.
364 """
365 if isinstance(err, ProxyError):
366 err = err.original_error
367 return isinstance(err, ConnectTimeoutError)
368
369 def _is_read_error(self, err: Exception) -> bool:
370 """Errors that occur after the request has been started, so we should
371 assume that the server began processing it.
372 """
373 return isinstance(err, (ReadTimeoutError, ProtocolError))
374
375 def _is_method_retryable(self, method: str) -> bool:
376 """Checks if a given HTTP method should be retried upon, depending if
377 it is included in the allowed_methods
378 """
379 if self.allowed_methods and method.upper() not in self.allowed_methods:
380 return False
381 return True
382
383 def is_retry(
384 self, method: str, status_code: int, has_retry_after: bool = False
385 ) -> bool:
386 """Is this method/status code retryable? (Based on allowlists and control
387 variables such as the number of total retries to allow, whether to
388 respect the Retry-After header, whether this header is present, and
389 whether the returned status code is on the list of status codes to
390 be retried upon on the presence of the aforementioned header)
391 """
392 if not self._is_method_retryable(method):
393 return False
394
395 if self.status_forcelist and status_code in self.status_forcelist:
396 return True
397
398 return bool(
399 self.total
400 and self.respect_retry_after_header
401 and has_retry_after
402 and (status_code in self.RETRY_AFTER_STATUS_CODES)
403 )
404
405 def is_exhausted(self) -> bool:
406 """Are we out of retries?"""
407 retry_counts = [
408 x
409 for x in (
410 self.total,
411 self.connect,
412 self.read,
413 self.redirect,
414 self.status,
415 self.other,
416 )
417 if x
418 ]
419 if not retry_counts:
420 return False
421
422 return min(retry_counts) < 0
423
424 def increment(
425 self,
426 method: str | None = None,
427 url: str | None = None,
428 response: BaseHTTPResponse | None = None,
429 error: Exception | None = None,
430 _pool: ConnectionPool | None = None,
431 _stacktrace: TracebackType | None = None,
432 ) -> Retry:
433 """Return a new Retry object with incremented retry counters.
434
435 :param response: A response object, or None, if the server did not
436 return a response.
437 :type response: :class:`~urllib3.response.BaseHTTPResponse`
438 :param Exception error: An error encountered during the request, or
439 None if the response was received successfully.
440
441 :return: A new ``Retry`` object.
442 """
443 if self.total is False and error:
444 # Disabled, indicate to re-raise the error.
445 raise reraise(type(error), error, _stacktrace)
446
447 total = self.total
448 if total is not None:
449 total -= 1
450
451 connect = self.connect
452 read = self.read
453 redirect = self.redirect
454 status_count = self.status
455 other = self.other
456 cause = "unknown"
457 status = None
458 redirect_location = None
459
460 if error and self._is_connection_error(error):
461 # Connect retry?
462 if connect is False:
463 raise reraise(type(error), error, _stacktrace)
464 elif connect is not None:
465 connect -= 1
466
467 elif error and self._is_read_error(error):
468 # Read retry?
469 if read is False or method is None or not self._is_method_retryable(method):
470 raise reraise(type(error), error, _stacktrace)
471 elif read is not None:
472 read -= 1
473
474 elif error:
475 # Other retry?
476 if other is not None:
477 other -= 1
478
479 elif response and response.get_redirect_location():
480 # Redirect retry?
481 if redirect is not None:
482 redirect -= 1
483 cause = "too many redirects"
484 response_redirect_location = response.get_redirect_location()
485 if response_redirect_location:
486 redirect_location = response_redirect_location
487 status = response.status
488
489 else:
490 # Incrementing because of a server error like a 500 in
491 # status_forcelist and the given method is in the allowed_methods
492 cause = ResponseError.GENERIC_ERROR
493 if response and response.status:
494 if status_count is not None:
495 status_count -= 1
496 cause = ResponseError.SPECIFIC_ERROR.format(status_code=response.status)
497 status = response.status
498
499 history = self.history + (
500 RequestHistory(method, url, error, status, redirect_location),
501 )
502
503 new_retry = self.new(
504 total=total,
505 connect=connect,
506 read=read,
507 redirect=redirect,
508 status=status_count,
509 other=other,
510 history=history,
511 )
512
513 if new_retry.is_exhausted():
514 reason = error or ResponseError(cause)
515 raise MaxRetryError(_pool, url, reason) from reason # type: ignore[arg-type]
516
517 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
518
519 return new_retry
520
521 def __repr__(self) -> str:
522 return (
523 f"{type(self).__name__}(total={self.total}, connect={self.connect}, "
524 f"read={self.read}, redirect={self.redirect}, status={self.status})"
525 )
526
527
528 # For backwards compatibility (equivalent to pre-v1.9):
529 Retry.DEFAULT = Retry(3)