comparison urllib3/response.py @ 7:5eb2d5e3bf22

planemo upload for repository https://toolrepo.galaxytrakr.org/view/jpayne/bioproject_to_srr_2/556cac4fb538
author jpayne
date Sun, 05 May 2024 23:32:17 -0400
parents
children
comparison
equal deleted inserted replaced
6:b2745907b1eb 7:5eb2d5e3bf22
1 from __future__ import annotations
2
3 import collections
4 import io
5 import json as _json
6 import logging
7 import re
8 import sys
9 import typing
10 import warnings
11 import zlib
12 from contextlib import contextmanager
13 from http.client import HTTPMessage as _HttplibHTTPMessage
14 from http.client import HTTPResponse as _HttplibHTTPResponse
15 from socket import timeout as SocketTimeout
16
17 if typing.TYPE_CHECKING:
18 from ._base_connection import BaseHTTPConnection
19
20 try:
21 try:
22 import brotlicffi as brotli # type: ignore[import-not-found]
23 except ImportError:
24 import brotli # type: ignore[import-not-found]
25 except ImportError:
26 brotli = None
27
28 try:
29 import zstandard as zstd # type: ignore[import-not-found]
30
31 # The package 'zstandard' added the 'eof' property starting
32 # in v0.18.0 which we require to ensure a complete and
33 # valid zstd stream was fed into the ZstdDecoder.
34 # See: https://github.com/urllib3/urllib3/pull/2624
35 _zstd_version = _zstd_version = tuple(
36 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
37 )
38 if _zstd_version < (0, 18): # Defensive:
39 zstd = None
40
41 except (AttributeError, ImportError, ValueError): # Defensive:
42 zstd = None
43
44 from . import util
45 from ._base_connection import _TYPE_BODY
46 from ._collections import HTTPHeaderDict
47 from .connection import BaseSSLError, HTTPConnection, HTTPException
48 from .exceptions import (
49 BodyNotHttplibCompatible,
50 DecodeError,
51 HTTPError,
52 IncompleteRead,
53 InvalidChunkLength,
54 InvalidHeader,
55 ProtocolError,
56 ReadTimeoutError,
57 ResponseNotChunked,
58 SSLError,
59 )
60 from .util.response import is_fp_closed, is_response_to_head
61 from .util.retry import Retry
62
63 if typing.TYPE_CHECKING:
64 from typing import Literal
65
66 from .connectionpool import HTTPConnectionPool
67
68 log = logging.getLogger(__name__)
69
70
71 class ContentDecoder:
72 def decompress(self, data: bytes) -> bytes:
73 raise NotImplementedError()
74
75 def flush(self) -> bytes:
76 raise NotImplementedError()
77
78
79 class DeflateDecoder(ContentDecoder):
80 def __init__(self) -> None:
81 self._first_try = True
82 self._data = b""
83 self._obj = zlib.decompressobj()
84
85 def decompress(self, data: bytes) -> bytes:
86 if not data:
87 return data
88
89 if not self._first_try:
90 return self._obj.decompress(data)
91
92 self._data += data
93 try:
94 decompressed = self._obj.decompress(data)
95 if decompressed:
96 self._first_try = False
97 self._data = None # type: ignore[assignment]
98 return decompressed
99 except zlib.error:
100 self._first_try = False
101 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
102 try:
103 return self.decompress(self._data)
104 finally:
105 self._data = None # type: ignore[assignment]
106
107 def flush(self) -> bytes:
108 return self._obj.flush()
109
110
111 class GzipDecoderState:
112 FIRST_MEMBER = 0
113 OTHER_MEMBERS = 1
114 SWALLOW_DATA = 2
115
116
117 class GzipDecoder(ContentDecoder):
118 def __init__(self) -> None:
119 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
120 self._state = GzipDecoderState.FIRST_MEMBER
121
122 def decompress(self, data: bytes) -> bytes:
123 ret = bytearray()
124 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
125 return bytes(ret)
126 while True:
127 try:
128 ret += self._obj.decompress(data)
129 except zlib.error:
130 previous_state = self._state
131 # Ignore data after the first error
132 self._state = GzipDecoderState.SWALLOW_DATA
133 if previous_state == GzipDecoderState.OTHER_MEMBERS:
134 # Allow trailing garbage acceptable in other gzip clients
135 return bytes(ret)
136 raise
137 data = self._obj.unused_data
138 if not data:
139 return bytes(ret)
140 self._state = GzipDecoderState.OTHER_MEMBERS
141 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
142
143 def flush(self) -> bytes:
144 return self._obj.flush()
145
146
147 if brotli is not None:
148
149 class BrotliDecoder(ContentDecoder):
150 # Supports both 'brotlipy' and 'Brotli' packages
151 # since they share an import name. The top branches
152 # are for 'brotlipy' and bottom branches for 'Brotli'
153 def __init__(self) -> None:
154 self._obj = brotli.Decompressor()
155 if hasattr(self._obj, "decompress"):
156 setattr(self, "decompress", self._obj.decompress)
157 else:
158 setattr(self, "decompress", self._obj.process)
159
160 def flush(self) -> bytes:
161 if hasattr(self._obj, "flush"):
162 return self._obj.flush() # type: ignore[no-any-return]
163 return b""
164
165
166 if zstd is not None:
167
168 class ZstdDecoder(ContentDecoder):
169 def __init__(self) -> None:
170 self._obj = zstd.ZstdDecompressor().decompressobj()
171
172 def decompress(self, data: bytes) -> bytes:
173 if not data:
174 return b""
175 data_parts = [self._obj.decompress(data)]
176 while self._obj.eof and self._obj.unused_data:
177 unused_data = self._obj.unused_data
178 self._obj = zstd.ZstdDecompressor().decompressobj()
179 data_parts.append(self._obj.decompress(unused_data))
180 return b"".join(data_parts)
181
182 def flush(self) -> bytes:
183 ret = self._obj.flush() # note: this is a no-op
184 if not self._obj.eof:
185 raise DecodeError("Zstandard data is incomplete")
186 return ret # type: ignore[no-any-return]
187
188
189 class MultiDecoder(ContentDecoder):
190 """
191 From RFC7231:
192 If one or more encodings have been applied to a representation, the
193 sender that applied the encodings MUST generate a Content-Encoding
194 header field that lists the content codings in the order in which
195 they were applied.
196 """
197
198 def __init__(self, modes: str) -> None:
199 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
200
201 def flush(self) -> bytes:
202 return self._decoders[0].flush()
203
204 def decompress(self, data: bytes) -> bytes:
205 for d in reversed(self._decoders):
206 data = d.decompress(data)
207 return data
208
209
210 def _get_decoder(mode: str) -> ContentDecoder:
211 if "," in mode:
212 return MultiDecoder(mode)
213
214 # According to RFC 9110 section 8.4.1.3, recipients should
215 # consider x-gzip equivalent to gzip
216 if mode in ("gzip", "x-gzip"):
217 return GzipDecoder()
218
219 if brotli is not None and mode == "br":
220 return BrotliDecoder()
221
222 if zstd is not None and mode == "zstd":
223 return ZstdDecoder()
224
225 return DeflateDecoder()
226
227
228 class BytesQueueBuffer:
229 """Memory-efficient bytes buffer
230
231 To return decoded data in read() and still follow the BufferedIOBase API, we need a
232 buffer to always return the correct amount of bytes.
233
234 This buffer should be filled using calls to put()
235
236 Our maximum memory usage is determined by the sum of the size of:
237
238 * self.buffer, which contains the full data
239 * the largest chunk that we will copy in get()
240
241 The worst case scenario is a single chunk, in which case we'll make a full copy of
242 the data inside get().
243 """
244
245 def __init__(self) -> None:
246 self.buffer: typing.Deque[bytes] = collections.deque()
247 self._size: int = 0
248
249 def __len__(self) -> int:
250 return self._size
251
252 def put(self, data: bytes) -> None:
253 self.buffer.append(data)
254 self._size += len(data)
255
256 def get(self, n: int) -> bytes:
257 if n == 0:
258 return b""
259 elif not self.buffer:
260 raise RuntimeError("buffer is empty")
261 elif n < 0:
262 raise ValueError("n should be > 0")
263
264 fetched = 0
265 ret = io.BytesIO()
266 while fetched < n:
267 remaining = n - fetched
268 chunk = self.buffer.popleft()
269 chunk_length = len(chunk)
270 if remaining < chunk_length:
271 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
272 ret.write(left_chunk)
273 self.buffer.appendleft(right_chunk)
274 self._size -= remaining
275 break
276 else:
277 ret.write(chunk)
278 self._size -= chunk_length
279 fetched += chunk_length
280
281 if not self.buffer:
282 break
283
284 return ret.getvalue()
285
286 def get_all(self) -> bytes:
287 buffer = self.buffer
288 if not buffer:
289 assert self._size == 0
290 return b""
291 if len(buffer) == 1:
292 result = buffer.pop()
293 else:
294 ret = io.BytesIO()
295 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
296 result = ret.getvalue()
297 self._size = 0
298 return result
299
300
301 class BaseHTTPResponse(io.IOBase):
302 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
303 if brotli is not None:
304 CONTENT_DECODERS += ["br"]
305 if zstd is not None:
306 CONTENT_DECODERS += ["zstd"]
307 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
308
309 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
310 if brotli is not None:
311 DECODER_ERROR_CLASSES += (brotli.error,)
312
313 if zstd is not None:
314 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
315
316 def __init__(
317 self,
318 *,
319 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
320 status: int,
321 version: int,
322 reason: str | None,
323 decode_content: bool,
324 request_url: str | None,
325 retries: Retry | None = None,
326 ) -> None:
327 if isinstance(headers, HTTPHeaderDict):
328 self.headers = headers
329 else:
330 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
331 self.status = status
332 self.version = version
333 self.reason = reason
334 self.decode_content = decode_content
335 self._has_decoded_content = False
336 self._request_url: str | None = request_url
337 self.retries = retries
338
339 self.chunked = False
340 tr_enc = self.headers.get("transfer-encoding", "").lower()
341 # Don't incur the penalty of creating a list and then discarding it
342 encodings = (enc.strip() for enc in tr_enc.split(","))
343 if "chunked" in encodings:
344 self.chunked = True
345
346 self._decoder: ContentDecoder | None = None
347 self.length_remaining: int | None
348
349 def get_redirect_location(self) -> str | None | Literal[False]:
350 """
351 Should we redirect and where to?
352
353 :returns: Truthy redirect location string if we got a redirect status
354 code and valid location. ``None`` if redirect status and no
355 location. ``False`` if not a redirect status code.
356 """
357 if self.status in self.REDIRECT_STATUSES:
358 return self.headers.get("location")
359 return False
360
361 @property
362 def data(self) -> bytes:
363 raise NotImplementedError()
364
365 def json(self) -> typing.Any:
366 """
367 Parses the body of the HTTP response as JSON.
368
369 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder.
370
371 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`.
372
373 Read more :ref:`here <json>`.
374 """
375 data = self.data.decode("utf-8")
376 return _json.loads(data)
377
378 @property
379 def url(self) -> str | None:
380 raise NotImplementedError()
381
382 @url.setter
383 def url(self, url: str | None) -> None:
384 raise NotImplementedError()
385
386 @property
387 def connection(self) -> BaseHTTPConnection | None:
388 raise NotImplementedError()
389
390 @property
391 def retries(self) -> Retry | None:
392 return self._retries
393
394 @retries.setter
395 def retries(self, retries: Retry | None) -> None:
396 # Override the request_url if retries has a redirect location.
397 if retries is not None and retries.history:
398 self.url = retries.history[-1].redirect_location
399 self._retries = retries
400
401 def stream(
402 self, amt: int | None = 2**16, decode_content: bool | None = None
403 ) -> typing.Iterator[bytes]:
404 raise NotImplementedError()
405
406 def read(
407 self,
408 amt: int | None = None,
409 decode_content: bool | None = None,
410 cache_content: bool = False,
411 ) -> bytes:
412 raise NotImplementedError()
413
414 def read1(
415 self,
416 amt: int | None = None,
417 decode_content: bool | None = None,
418 ) -> bytes:
419 raise NotImplementedError()
420
421 def read_chunked(
422 self,
423 amt: int | None = None,
424 decode_content: bool | None = None,
425 ) -> typing.Iterator[bytes]:
426 raise NotImplementedError()
427
428 def release_conn(self) -> None:
429 raise NotImplementedError()
430
431 def drain_conn(self) -> None:
432 raise NotImplementedError()
433
434 def close(self) -> None:
435 raise NotImplementedError()
436
437 def _init_decoder(self) -> None:
438 """
439 Set-up the _decoder attribute if necessary.
440 """
441 # Note: content-encoding value should be case-insensitive, per RFC 7230
442 # Section 3.2
443 content_encoding = self.headers.get("content-encoding", "").lower()
444 if self._decoder is None:
445 if content_encoding in self.CONTENT_DECODERS:
446 self._decoder = _get_decoder(content_encoding)
447 elif "," in content_encoding:
448 encodings = [
449 e.strip()
450 for e in content_encoding.split(",")
451 if e.strip() in self.CONTENT_DECODERS
452 ]
453 if encodings:
454 self._decoder = _get_decoder(content_encoding)
455
456 def _decode(
457 self, data: bytes, decode_content: bool | None, flush_decoder: bool
458 ) -> bytes:
459 """
460 Decode the data passed in and potentially flush the decoder.
461 """
462 if not decode_content:
463 if self._has_decoded_content:
464 raise RuntimeError(
465 "Calling read(decode_content=False) is not supported after "
466 "read(decode_content=True) was called."
467 )
468 return data
469
470 try:
471 if self._decoder:
472 data = self._decoder.decompress(data)
473 self._has_decoded_content = True
474 except self.DECODER_ERROR_CLASSES as e:
475 content_encoding = self.headers.get("content-encoding", "").lower()
476 raise DecodeError(
477 "Received response with content-encoding: %s, but "
478 "failed to decode it." % content_encoding,
479 e,
480 ) from e
481 if flush_decoder:
482 data += self._flush_decoder()
483
484 return data
485
486 def _flush_decoder(self) -> bytes:
487 """
488 Flushes the decoder. Should only be called if the decoder is actually
489 being used.
490 """
491 if self._decoder:
492 return self._decoder.decompress(b"") + self._decoder.flush()
493 return b""
494
495 # Compatibility methods for `io` module
496 def readinto(self, b: bytearray) -> int:
497 temp = self.read(len(b))
498 if len(temp) == 0:
499 return 0
500 else:
501 b[: len(temp)] = temp
502 return len(temp)
503
504 # Compatibility methods for http.client.HTTPResponse
505 def getheaders(self) -> HTTPHeaderDict:
506 warnings.warn(
507 "HTTPResponse.getheaders() is deprecated and will be removed "
508 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
509 category=DeprecationWarning,
510 stacklevel=2,
511 )
512 return self.headers
513
514 def getheader(self, name: str, default: str | None = None) -> str | None:
515 warnings.warn(
516 "HTTPResponse.getheader() is deprecated and will be removed "
517 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
518 category=DeprecationWarning,
519 stacklevel=2,
520 )
521 return self.headers.get(name, default)
522
523 # Compatibility method for http.cookiejar
524 def info(self) -> HTTPHeaderDict:
525 return self.headers
526
527 def geturl(self) -> str | None:
528 return self.url
529
530
531 class HTTPResponse(BaseHTTPResponse):
532 """
533 HTTP Response container.
534
535 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
536 loaded and decoded on-demand when the ``data`` property is accessed. This
537 class is also compatible with the Python standard library's :mod:`io`
538 module, and can hence be treated as a readable object in the context of that
539 framework.
540
541 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
542
543 :param preload_content:
544 If True, the response's body will be preloaded during construction.
545
546 :param decode_content:
547 If True, will attempt to decode the body based on the
548 'content-encoding' header.
549
550 :param original_response:
551 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
552 object, it's convenient to include the original for debug purposes. It's
553 otherwise unused.
554
555 :param retries:
556 The retries contains the last :class:`~urllib3.util.retry.Retry` that
557 was used during the request.
558
559 :param enforce_content_length:
560 Enforce content length checking. Body returned by server must match
561 value of Content-Length header, if present. Otherwise, raise error.
562 """
563
564 def __init__(
565 self,
566 body: _TYPE_BODY = "",
567 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
568 status: int = 0,
569 version: int = 0,
570 reason: str | None = None,
571 preload_content: bool = True,
572 decode_content: bool = True,
573 original_response: _HttplibHTTPResponse | None = None,
574 pool: HTTPConnectionPool | None = None,
575 connection: HTTPConnection | None = None,
576 msg: _HttplibHTTPMessage | None = None,
577 retries: Retry | None = None,
578 enforce_content_length: bool = True,
579 request_method: str | None = None,
580 request_url: str | None = None,
581 auto_close: bool = True,
582 ) -> None:
583 super().__init__(
584 headers=headers,
585 status=status,
586 version=version,
587 reason=reason,
588 decode_content=decode_content,
589 request_url=request_url,
590 retries=retries,
591 )
592
593 self.enforce_content_length = enforce_content_length
594 self.auto_close = auto_close
595
596 self._body = None
597 self._fp: _HttplibHTTPResponse | None = None
598 self._original_response = original_response
599 self._fp_bytes_read = 0
600 self.msg = msg
601
602 if body and isinstance(body, (str, bytes)):
603 self._body = body
604
605 self._pool = pool
606 self._connection = connection
607
608 if hasattr(body, "read"):
609 self._fp = body # type: ignore[assignment]
610
611 # Are we using the chunked-style of transfer encoding?
612 self.chunk_left: int | None = None
613
614 # Determine length of response
615 self.length_remaining = self._init_length(request_method)
616
617 # Used to return the correct amount of bytes for partial read()s
618 self._decoded_buffer = BytesQueueBuffer()
619
620 # If requested, preload the body.
621 if preload_content and not self._body:
622 self._body = self.read(decode_content=decode_content)
623
624 def release_conn(self) -> None:
625 if not self._pool or not self._connection:
626 return None
627
628 self._pool._put_conn(self._connection)
629 self._connection = None
630
631 def drain_conn(self) -> None:
632 """
633 Read and discard any remaining HTTP response data in the response connection.
634
635 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
636 """
637 try:
638 self.read()
639 except (HTTPError, OSError, BaseSSLError, HTTPException):
640 pass
641
642 @property
643 def data(self) -> bytes:
644 # For backwards-compat with earlier urllib3 0.4 and earlier.
645 if self._body:
646 return self._body # type: ignore[return-value]
647
648 if self._fp:
649 return self.read(cache_content=True)
650
651 return None # type: ignore[return-value]
652
653 @property
654 def connection(self) -> HTTPConnection | None:
655 return self._connection
656
657 def isclosed(self) -> bool:
658 return is_fp_closed(self._fp)
659
660 def tell(self) -> int:
661 """
662 Obtain the number of bytes pulled over the wire so far. May differ from
663 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
664 if bytes are encoded on the wire (e.g, compressed).
665 """
666 return self._fp_bytes_read
667
668 def _init_length(self, request_method: str | None) -> int | None:
669 """
670 Set initial length value for Response content if available.
671 """
672 length: int | None
673 content_length: str | None = self.headers.get("content-length")
674
675 if content_length is not None:
676 if self.chunked:
677 # This Response will fail with an IncompleteRead if it can't be
678 # received as chunked. This method falls back to attempt reading
679 # the response before raising an exception.
680 log.warning(
681 "Received response with both Content-Length and "
682 "Transfer-Encoding set. This is expressly forbidden "
683 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
684 "attempting to process response as Transfer-Encoding: "
685 "chunked."
686 )
687 return None
688
689 try:
690 # RFC 7230 section 3.3.2 specifies multiple content lengths can
691 # be sent in a single Content-Length header
692 # (e.g. Content-Length: 42, 42). This line ensures the values
693 # are all valid ints and that as long as the `set` length is 1,
694 # all values are the same. Otherwise, the header is invalid.
695 lengths = {int(val) for val in content_length.split(",")}
696 if len(lengths) > 1:
697 raise InvalidHeader(
698 "Content-Length contained multiple "
699 "unmatching values (%s)" % content_length
700 )
701 length = lengths.pop()
702 except ValueError:
703 length = None
704 else:
705 if length < 0:
706 length = None
707
708 else: # if content_length is None
709 length = None
710
711 # Convert status to int for comparison
712 # In some cases, httplib returns a status of "_UNKNOWN"
713 try:
714 status = int(self.status)
715 except ValueError:
716 status = 0
717
718 # Check for responses that shouldn't include a body
719 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
720 length = 0
721
722 return length
723
724 @contextmanager
725 def _error_catcher(self) -> typing.Generator[None, None, None]:
726 """
727 Catch low-level python exceptions, instead re-raising urllib3
728 variants, so that low-level exceptions are not leaked in the
729 high-level api.
730
731 On exit, release the connection back to the pool.
732 """
733 clean_exit = False
734
735 try:
736 try:
737 yield
738
739 except SocketTimeout as e:
740 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
741 # there is yet no clean way to get at it from this context.
742 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
743
744 except BaseSSLError as e:
745 # FIXME: Is there a better way to differentiate between SSLErrors?
746 if "read operation timed out" not in str(e):
747 # SSL errors related to framing/MAC get wrapped and reraised here
748 raise SSLError(e) from e
749
750 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
751
752 except IncompleteRead as e:
753 if (
754 e.expected is not None
755 and e.partial is not None
756 and e.expected == -e.partial
757 ):
758 arg = "Response may not contain content."
759 else:
760 arg = f"Connection broken: {e!r}"
761 raise ProtocolError(arg, e) from e
762
763 except (HTTPException, OSError) as e:
764 raise ProtocolError(f"Connection broken: {e!r}", e) from e
765
766 # If no exception is thrown, we should avoid cleaning up
767 # unnecessarily.
768 clean_exit = True
769 finally:
770 # If we didn't terminate cleanly, we need to throw away our
771 # connection.
772 if not clean_exit:
773 # The response may not be closed but we're not going to use it
774 # anymore so close it now to ensure that the connection is
775 # released back to the pool.
776 if self._original_response:
777 self._original_response.close()
778
779 # Closing the response may not actually be sufficient to close
780 # everything, so if we have a hold of the connection close that
781 # too.
782 if self._connection:
783 self._connection.close()
784
785 # If we hold the original response but it's closed now, we should
786 # return the connection back to the pool.
787 if self._original_response and self._original_response.isclosed():
788 self.release_conn()
789
790 def _fp_read(
791 self,
792 amt: int | None = None,
793 *,
794 read1: bool = False,
795 ) -> bytes:
796 """
797 Read a response with the thought that reading the number of bytes
798 larger than can fit in a 32-bit int at a time via SSL in some
799 known cases leads to an overflow error that has to be prevented
800 if `amt` or `self.length_remaining` indicate that a problem may
801 happen.
802
803 The known cases:
804 * 3.8 <= CPython < 3.9.7 because of a bug
805 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
806 * urllib3 injected with pyOpenSSL-backed SSL-support.
807 * CPython < 3.10 only when `amt` does not fit 32-bit int.
808 """
809 assert self._fp
810 c_int_max = 2**31 - 1
811 if (
812 (amt and amt > c_int_max)
813 or (
814 amt is None
815 and self.length_remaining
816 and self.length_remaining > c_int_max
817 )
818 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):
819 if read1:
820 return self._fp.read1(c_int_max)
821 buffer = io.BytesIO()
822 # Besides `max_chunk_amt` being a maximum chunk size, it
823 # affects memory overhead of reading a response by this
824 # method in CPython.
825 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
826 # chunk size that does not lead to an overflow error, but
827 # 256 MiB is a compromise.
828 max_chunk_amt = 2**28
829 while amt is None or amt != 0:
830 if amt is not None:
831 chunk_amt = min(amt, max_chunk_amt)
832 amt -= chunk_amt
833 else:
834 chunk_amt = max_chunk_amt
835 data = self._fp.read(chunk_amt)
836 if not data:
837 break
838 buffer.write(data)
839 del data # to reduce peak memory usage by `max_chunk_amt`.
840 return buffer.getvalue()
841 elif read1:
842 return self._fp.read1(amt) if amt is not None else self._fp.read1()
843 else:
844 # StringIO doesn't like amt=None
845 return self._fp.read(amt) if amt is not None else self._fp.read()
846
847 def _raw_read(
848 self,
849 amt: int | None = None,
850 *,
851 read1: bool = False,
852 ) -> bytes:
853 """
854 Reads `amt` of bytes from the socket.
855 """
856 if self._fp is None:
857 return None # type: ignore[return-value]
858
859 fp_closed = getattr(self._fp, "closed", False)
860
861 with self._error_catcher():
862 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
863 if amt is not None and amt != 0 and not data:
864 # Platform-specific: Buggy versions of Python.
865 # Close the connection when no data is returned
866 #
867 # This is redundant to what httplib/http.client _should_
868 # already do. However, versions of python released before
869 # December 15, 2012 (http://bugs.python.org/issue16298) do
870 # not properly close the connection in all cases. There is
871 # no harm in redundantly calling close.
872 self._fp.close()
873 if (
874 self.enforce_content_length
875 and self.length_remaining is not None
876 and self.length_remaining != 0
877 ):
878 # This is an edge case that httplib failed to cover due
879 # to concerns of backward compatibility. We're
880 # addressing it here to make sure IncompleteRead is
881 # raised during streaming, so all calls with incorrect
882 # Content-Length are caught.
883 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
884 elif read1 and (
885 (amt != 0 and not data) or self.length_remaining == len(data)
886 ):
887 # All data has been read, but `self._fp.read1` in
888 # CPython 3.12 and older doesn't always close
889 # `http.client.HTTPResponse`, so we close it here.
890 # See https://github.com/python/cpython/issues/113199
891 self._fp.close()
892
893 if data:
894 self._fp_bytes_read += len(data)
895 if self.length_remaining is not None:
896 self.length_remaining -= len(data)
897 return data
898
899 def read(
900 self,
901 amt: int | None = None,
902 decode_content: bool | None = None,
903 cache_content: bool = False,
904 ) -> bytes:
905 """
906 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
907 parameters: ``decode_content`` and ``cache_content``.
908
909 :param amt:
910 How much of the content to read. If specified, caching is skipped
911 because it doesn't make sense to cache partial content as the full
912 response.
913
914 :param decode_content:
915 If True, will attempt to decode the body based on the
916 'content-encoding' header.
917
918 :param cache_content:
919 If True, will save the returned data such that the same result is
920 returned despite of the state of the underlying file object. This
921 is useful if you want the ``.data`` property to continue working
922 after having ``.read()`` the file object. (Overridden if ``amt`` is
923 set.)
924 """
925 self._init_decoder()
926 if decode_content is None:
927 decode_content = self.decode_content
928
929 if amt is not None:
930 cache_content = False
931
932 if len(self._decoded_buffer) >= amt:
933 return self._decoded_buffer.get(amt)
934
935 data = self._raw_read(amt)
936
937 flush_decoder = amt is None or (amt != 0 and not data)
938
939 if not data and len(self._decoded_buffer) == 0:
940 return data
941
942 if amt is None:
943 data = self._decode(data, decode_content, flush_decoder)
944 if cache_content:
945 self._body = data
946 else:
947 # do not waste memory on buffer when not decoding
948 if not decode_content:
949 if self._has_decoded_content:
950 raise RuntimeError(
951 "Calling read(decode_content=False) is not supported after "
952 "read(decode_content=True) was called."
953 )
954 return data
955
956 decoded_data = self._decode(data, decode_content, flush_decoder)
957 self._decoded_buffer.put(decoded_data)
958
959 while len(self._decoded_buffer) < amt and data:
960 # TODO make sure to initially read enough data to get past the headers
961 # For example, the GZ file header takes 10 bytes, we don't want to read
962 # it one byte at a time
963 data = self._raw_read(amt)
964 decoded_data = self._decode(data, decode_content, flush_decoder)
965 self._decoded_buffer.put(decoded_data)
966 data = self._decoded_buffer.get(amt)
967
968 return data
969
970 def read1(
971 self,
972 amt: int | None = None,
973 decode_content: bool | None = None,
974 ) -> bytes:
975 """
976 Similar to ``http.client.HTTPResponse.read1`` and documented
977 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
978 ``decode_content``.
979
980 :param amt:
981 How much of the content to read.
982
983 :param decode_content:
984 If True, will attempt to decode the body based on the
985 'content-encoding' header.
986 """
987 if decode_content is None:
988 decode_content = self.decode_content
989 # try and respond without going to the network
990 if self._has_decoded_content:
991 if not decode_content:
992 raise RuntimeError(
993 "Calling read1(decode_content=False) is not supported after "
994 "read1(decode_content=True) was called."
995 )
996 if len(self._decoded_buffer) > 0:
997 if amt is None:
998 return self._decoded_buffer.get_all()
999 return self._decoded_buffer.get(amt)
1000 if amt == 0:
1001 return b""
1002
1003 # FIXME, this method's type doesn't say returning None is possible
1004 data = self._raw_read(amt, read1=True)
1005 if not decode_content or data is None:
1006 return data
1007
1008 self._init_decoder()
1009 while True:
1010 flush_decoder = not data
1011 decoded_data = self._decode(data, decode_content, flush_decoder)
1012 self._decoded_buffer.put(decoded_data)
1013 if decoded_data or flush_decoder:
1014 break
1015 data = self._raw_read(8192, read1=True)
1016
1017 if amt is None:
1018 return self._decoded_buffer.get_all()
1019 return self._decoded_buffer.get(amt)
1020
1021 def stream(
1022 self, amt: int | None = 2**16, decode_content: bool | None = None
1023 ) -> typing.Generator[bytes, None, None]:
1024 """
1025 A generator wrapper for the read() method. A call will block until
1026 ``amt`` bytes have been read from the connection or until the
1027 connection is closed.
1028
1029 :param amt:
1030 How much of the content to read. The generator will return up to
1031 much data per iteration, but may return less. This is particularly
1032 likely when using compressed data. However, the empty string will
1033 never be returned.
1034
1035 :param decode_content:
1036 If True, will attempt to decode the body based on the
1037 'content-encoding' header.
1038 """
1039 if self.chunked and self.supports_chunked_reads():
1040 yield from self.read_chunked(amt, decode_content=decode_content)
1041 else:
1042 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
1043 data = self.read(amt=amt, decode_content=decode_content)
1044
1045 if data:
1046 yield data
1047
1048 # Overrides from io.IOBase
1049 def readable(self) -> bool:
1050 return True
1051
1052 def close(self) -> None:
1053 if not self.closed and self._fp:
1054 self._fp.close()
1055
1056 if self._connection:
1057 self._connection.close()
1058
1059 if not self.auto_close:
1060 io.IOBase.close(self)
1061
1062 @property
1063 def closed(self) -> bool:
1064 if not self.auto_close:
1065 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1066 elif self._fp is None:
1067 return True
1068 elif hasattr(self._fp, "isclosed"):
1069 return self._fp.isclosed()
1070 elif hasattr(self._fp, "closed"):
1071 return self._fp.closed
1072 else:
1073 return True
1074
1075 def fileno(self) -> int:
1076 if self._fp is None:
1077 raise OSError("HTTPResponse has no file to get a fileno from")
1078 elif hasattr(self._fp, "fileno"):
1079 return self._fp.fileno()
1080 else:
1081 raise OSError(
1082 "The file-like object this HTTPResponse is wrapped "
1083 "around has no file descriptor"
1084 )
1085
1086 def flush(self) -> None:
1087 if (
1088 self._fp is not None
1089 and hasattr(self._fp, "flush")
1090 and not getattr(self._fp, "closed", False)
1091 ):
1092 return self._fp.flush()
1093
1094 def supports_chunked_reads(self) -> bool:
1095 """
1096 Checks if the underlying file-like object looks like a
1097 :class:`http.client.HTTPResponse` object. We do this by testing for
1098 the fp attribute. If it is present we assume it returns raw chunks as
1099 processed by read_chunked().
1100 """
1101 return hasattr(self._fp, "fp")
1102
1103 def _update_chunk_length(self) -> None:
1104 # First, we'll figure out length of a chunk and then
1105 # we'll try to read it from socket.
1106 if self.chunk_left is not None:
1107 return None
1108 line = self._fp.fp.readline() # type: ignore[union-attr]
1109 line = line.split(b";", 1)[0]
1110 try:
1111 self.chunk_left = int(line, 16)
1112 except ValueError:
1113 self.close()
1114 if line:
1115 # Invalid chunked protocol response, abort.
1116 raise InvalidChunkLength(self, line) from None
1117 else:
1118 # Truncated at start of next chunk
1119 raise ProtocolError("Response ended prematurely") from None
1120
1121 def _handle_chunk(self, amt: int | None) -> bytes:
1122 returned_chunk = None
1123 if amt is None:
1124 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1125 returned_chunk = chunk
1126 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1127 self.chunk_left = None
1128 elif self.chunk_left is not None and amt < self.chunk_left:
1129 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1130 self.chunk_left = self.chunk_left - amt
1131 returned_chunk = value
1132 elif amt == self.chunk_left:
1133 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1134 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1135 self.chunk_left = None
1136 returned_chunk = value
1137 else: # amt > self.chunk_left
1138 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1139 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1140 self.chunk_left = None
1141 return returned_chunk # type: ignore[no-any-return]
1142
1143 def read_chunked(
1144 self, amt: int | None = None, decode_content: bool | None = None
1145 ) -> typing.Generator[bytes, None, None]:
1146 """
1147 Similar to :meth:`HTTPResponse.read`, but with an additional
1148 parameter: ``decode_content``.
1149
1150 :param amt:
1151 How much of the content to read. If specified, caching is skipped
1152 because it doesn't make sense to cache partial content as the full
1153 response.
1154
1155 :param decode_content:
1156 If True, will attempt to decode the body based on the
1157 'content-encoding' header.
1158 """
1159 self._init_decoder()
1160 # FIXME: Rewrite this method and make it a class with a better structured logic.
1161 if not self.chunked:
1162 raise ResponseNotChunked(
1163 "Response is not chunked. "
1164 "Header 'transfer-encoding: chunked' is missing."
1165 )
1166 if not self.supports_chunked_reads():
1167 raise BodyNotHttplibCompatible(
1168 "Body should be http.client.HTTPResponse like. "
1169 "It should have have an fp attribute which returns raw chunks."
1170 )
1171
1172 with self._error_catcher():
1173 # Don't bother reading the body of a HEAD request.
1174 if self._original_response and is_response_to_head(self._original_response):
1175 self._original_response.close()
1176 return None
1177
1178 # If a response is already read and closed
1179 # then return immediately.
1180 if self._fp.fp is None: # type: ignore[union-attr]
1181 return None
1182
1183 while True:
1184 self._update_chunk_length()
1185 if self.chunk_left == 0:
1186 break
1187 chunk = self._handle_chunk(amt)
1188 decoded = self._decode(
1189 chunk, decode_content=decode_content, flush_decoder=False
1190 )
1191 if decoded:
1192 yield decoded
1193
1194 if decode_content:
1195 # On CPython and PyPy, we should never need to flush the
1196 # decoder. However, on Jython we *might* need to, so
1197 # lets defensively do it anyway.
1198 decoded = self._flush_decoder()
1199 if decoded: # Platform-specific: Jython.
1200 yield decoded
1201
1202 # Chunk content ends with \r\n: discard it.
1203 while self._fp is not None:
1204 line = self._fp.fp.readline()
1205 if not line:
1206 # Some sites may not end with '\r\n'.
1207 break
1208 if line == b"\r\n":
1209 break
1210
1211 # We read everything; close the "file".
1212 if self._original_response:
1213 self._original_response.close()
1214
1215 @property
1216 def url(self) -> str | None:
1217 """
1218 Returns the URL that was the source of this response.
1219 If the request that generated this response redirected, this method
1220 will return the final redirect location.
1221 """
1222 return self._request_url
1223
1224 @url.setter
1225 def url(self, url: str) -> None:
1226 self._request_url = url
1227
1228 def __iter__(self) -> typing.Iterator[bytes]:
1229 buffer: list[bytes] = []
1230 for chunk in self.stream(decode_content=True):
1231 if b"\n" in chunk:
1232 chunks = chunk.split(b"\n")
1233 yield b"".join(buffer) + chunks[0] + b"\n"
1234 for x in chunks[1:-1]:
1235 yield x + b"\n"
1236 if chunks[-1]:
1237 buffer = [chunks[-1]]
1238 else:
1239 buffer = []
1240 else:
1241 buffer.append(chunk)
1242 if buffer:
1243 yield b"".join(buffer)