comparison CSP2/CSP2_env/env-d9b9114564458d9d-741b3de822f2aaca6c6caa4325c4afce/lib/python3.8/site-packages/urllib3/response.py @ 69:33d812a61356

planemo upload commit 2e9511a184a1ca667c7be0c6321a36dc4e3d116d
author jpayne
date Tue, 18 Mar 2025 17:55:14 -0400
parents
children
comparison
equal deleted inserted replaced
67:0e9998148a16 69:33d812a61356
1 from __future__ import annotations
2
3 import collections
4 import io
5 import json as _json
6 import logging
7 import re
8 import sys
9 import typing
10 import warnings
11 import zlib
12 from contextlib import contextmanager
13 from http.client import HTTPMessage as _HttplibHTTPMessage
14 from http.client import HTTPResponse as _HttplibHTTPResponse
15 from socket import timeout as SocketTimeout
16
17 if typing.TYPE_CHECKING:
18 from ._base_connection import BaseHTTPConnection
19
20 try:
21 try:
22 import brotlicffi as brotli # type: ignore[import-not-found]
23 except ImportError:
24 import brotli # type: ignore[import-not-found]
25 except ImportError:
26 brotli = None
27
28 try:
29 import zstandard as zstd
30 except (AttributeError, ImportError, ValueError): # Defensive:
31 HAS_ZSTD = False
32 else:
33 # The package 'zstandard' added the 'eof' property starting
34 # in v0.18.0 which we require to ensure a complete and
35 # valid zstd stream was fed into the ZstdDecoder.
36 # See: https://github.com/urllib3/urllib3/pull/2624
37 _zstd_version = tuple(
38 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
39 )
40 if _zstd_version < (0, 18): # Defensive:
41 HAS_ZSTD = False
42 else:
43 HAS_ZSTD = True
44
45 from . import util
46 from ._base_connection import _TYPE_BODY
47 from ._collections import HTTPHeaderDict
48 from .connection import BaseSSLError, HTTPConnection, HTTPException
49 from .exceptions import (
50 BodyNotHttplibCompatible,
51 DecodeError,
52 HTTPError,
53 IncompleteRead,
54 InvalidChunkLength,
55 InvalidHeader,
56 ProtocolError,
57 ReadTimeoutError,
58 ResponseNotChunked,
59 SSLError,
60 )
61 from .util.response import is_fp_closed, is_response_to_head
62 from .util.retry import Retry
63
64 if typing.TYPE_CHECKING:
65 from .connectionpool import HTTPConnectionPool
66
67 log = logging.getLogger(__name__)
68
69
70 class ContentDecoder:
71 def decompress(self, data: bytes) -> bytes:
72 raise NotImplementedError()
73
74 def flush(self) -> bytes:
75 raise NotImplementedError()
76
77
78 class DeflateDecoder(ContentDecoder):
79 def __init__(self) -> None:
80 self._first_try = True
81 self._data = b""
82 self._obj = zlib.decompressobj()
83
84 def decompress(self, data: bytes) -> bytes:
85 if not data:
86 return data
87
88 if not self._first_try:
89 return self._obj.decompress(data)
90
91 self._data += data
92 try:
93 decompressed = self._obj.decompress(data)
94 if decompressed:
95 self._first_try = False
96 self._data = None # type: ignore[assignment]
97 return decompressed
98 except zlib.error:
99 self._first_try = False
100 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
101 try:
102 return self.decompress(self._data)
103 finally:
104 self._data = None # type: ignore[assignment]
105
106 def flush(self) -> bytes:
107 return self._obj.flush()
108
109
110 class GzipDecoderState:
111 FIRST_MEMBER = 0
112 OTHER_MEMBERS = 1
113 SWALLOW_DATA = 2
114
115
116 class GzipDecoder(ContentDecoder):
117 def __init__(self) -> None:
118 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
119 self._state = GzipDecoderState.FIRST_MEMBER
120
121 def decompress(self, data: bytes) -> bytes:
122 ret = bytearray()
123 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
124 return bytes(ret)
125 while True:
126 try:
127 ret += self._obj.decompress(data)
128 except zlib.error:
129 previous_state = self._state
130 # Ignore data after the first error
131 self._state = GzipDecoderState.SWALLOW_DATA
132 if previous_state == GzipDecoderState.OTHER_MEMBERS:
133 # Allow trailing garbage acceptable in other gzip clients
134 return bytes(ret)
135 raise
136 data = self._obj.unused_data
137 if not data:
138 return bytes(ret)
139 self._state = GzipDecoderState.OTHER_MEMBERS
140 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
141
142 def flush(self) -> bytes:
143 return self._obj.flush()
144
145
146 if brotli is not None:
147
148 class BrotliDecoder(ContentDecoder):
149 # Supports both 'brotlipy' and 'Brotli' packages
150 # since they share an import name. The top branches
151 # are for 'brotlipy' and bottom branches for 'Brotli'
152 def __init__(self) -> None:
153 self._obj = brotli.Decompressor()
154 if hasattr(self._obj, "decompress"):
155 setattr(self, "decompress", self._obj.decompress)
156 else:
157 setattr(self, "decompress", self._obj.process)
158
159 def flush(self) -> bytes:
160 if hasattr(self._obj, "flush"):
161 return self._obj.flush() # type: ignore[no-any-return]
162 return b""
163
164
165 if HAS_ZSTD:
166
167 class ZstdDecoder(ContentDecoder):
168 def __init__(self) -> None:
169 self._obj = zstd.ZstdDecompressor().decompressobj()
170
171 def decompress(self, data: bytes) -> bytes:
172 if not data:
173 return b""
174 data_parts = [self._obj.decompress(data)]
175 while self._obj.eof and self._obj.unused_data:
176 unused_data = self._obj.unused_data
177 self._obj = zstd.ZstdDecompressor().decompressobj()
178 data_parts.append(self._obj.decompress(unused_data))
179 return b"".join(data_parts)
180
181 def flush(self) -> bytes:
182 ret = self._obj.flush() # note: this is a no-op
183 if not self._obj.eof:
184 raise DecodeError("Zstandard data is incomplete")
185 return ret
186
187
188 class MultiDecoder(ContentDecoder):
189 """
190 From RFC7231:
191 If one or more encodings have been applied to a representation, the
192 sender that applied the encodings MUST generate a Content-Encoding
193 header field that lists the content codings in the order in which
194 they were applied.
195 """
196
197 def __init__(self, modes: str) -> None:
198 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
199
200 def flush(self) -> bytes:
201 return self._decoders[0].flush()
202
203 def decompress(self, data: bytes) -> bytes:
204 for d in reversed(self._decoders):
205 data = d.decompress(data)
206 return data
207
208
209 def _get_decoder(mode: str) -> ContentDecoder:
210 if "," in mode:
211 return MultiDecoder(mode)
212
213 # According to RFC 9110 section 8.4.1.3, recipients should
214 # consider x-gzip equivalent to gzip
215 if mode in ("gzip", "x-gzip"):
216 return GzipDecoder()
217
218 if brotli is not None and mode == "br":
219 return BrotliDecoder()
220
221 if HAS_ZSTD and mode == "zstd":
222 return ZstdDecoder()
223
224 return DeflateDecoder()
225
226
227 class BytesQueueBuffer:
228 """Memory-efficient bytes buffer
229
230 To return decoded data in read() and still follow the BufferedIOBase API, we need a
231 buffer to always return the correct amount of bytes.
232
233 This buffer should be filled using calls to put()
234
235 Our maximum memory usage is determined by the sum of the size of:
236
237 * self.buffer, which contains the full data
238 * the largest chunk that we will copy in get()
239
240 The worst case scenario is a single chunk, in which case we'll make a full copy of
241 the data inside get().
242 """
243
244 def __init__(self) -> None:
245 self.buffer: typing.Deque[bytes] = collections.deque()
246 self._size: int = 0
247
248 def __len__(self) -> int:
249 return self._size
250
251 def put(self, data: bytes) -> None:
252 self.buffer.append(data)
253 self._size += len(data)
254
255 def get(self, n: int) -> bytes:
256 if n == 0:
257 return b""
258 elif not self.buffer:
259 raise RuntimeError("buffer is empty")
260 elif n < 0:
261 raise ValueError("n should be > 0")
262
263 fetched = 0
264 ret = io.BytesIO()
265 while fetched < n:
266 remaining = n - fetched
267 chunk = self.buffer.popleft()
268 chunk_length = len(chunk)
269 if remaining < chunk_length:
270 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
271 ret.write(left_chunk)
272 self.buffer.appendleft(right_chunk)
273 self._size -= remaining
274 break
275 else:
276 ret.write(chunk)
277 self._size -= chunk_length
278 fetched += chunk_length
279
280 if not self.buffer:
281 break
282
283 return ret.getvalue()
284
285 def get_all(self) -> bytes:
286 buffer = self.buffer
287 if not buffer:
288 assert self._size == 0
289 return b""
290 if len(buffer) == 1:
291 result = buffer.pop()
292 else:
293 ret = io.BytesIO()
294 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
295 result = ret.getvalue()
296 self._size = 0
297 return result
298
299
300 class BaseHTTPResponse(io.IOBase):
301 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
302 if brotli is not None:
303 CONTENT_DECODERS += ["br"]
304 if HAS_ZSTD:
305 CONTENT_DECODERS += ["zstd"]
306 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
307
308 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
309 if brotli is not None:
310 DECODER_ERROR_CLASSES += (brotli.error,)
311
312 if HAS_ZSTD:
313 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
314
315 def __init__(
316 self,
317 *,
318 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
319 status: int,
320 version: int,
321 version_string: str,
322 reason: str | None,
323 decode_content: bool,
324 request_url: str | None,
325 retries: Retry | None = None,
326 ) -> None:
327 if isinstance(headers, HTTPHeaderDict):
328 self.headers = headers
329 else:
330 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
331 self.status = status
332 self.version = version
333 self.version_string = version_string
334 self.reason = reason
335 self.decode_content = decode_content
336 self._has_decoded_content = False
337 self._request_url: str | None = request_url
338 self.retries = retries
339
340 self.chunked = False
341 tr_enc = self.headers.get("transfer-encoding", "").lower()
342 # Don't incur the penalty of creating a list and then discarding it
343 encodings = (enc.strip() for enc in tr_enc.split(","))
344 if "chunked" in encodings:
345 self.chunked = True
346
347 self._decoder: ContentDecoder | None = None
348 self.length_remaining: int | None
349
350 def get_redirect_location(self) -> str | None | typing.Literal[False]:
351 """
352 Should we redirect and where to?
353
354 :returns: Truthy redirect location string if we got a redirect status
355 code and valid location. ``None`` if redirect status and no
356 location. ``False`` if not a redirect status code.
357 """
358 if self.status in self.REDIRECT_STATUSES:
359 return self.headers.get("location")
360 return False
361
362 @property
363 def data(self) -> bytes:
364 raise NotImplementedError()
365
366 def json(self) -> typing.Any:
367 """
368 Deserializes the body of the HTTP response as a Python object.
369
370 The body of the HTTP response must be encoded using UTF-8, as per
371 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
372
373 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
374 your custom decoder instead.
375
376 If the body of the HTTP response is not decodable to UTF-8, a
377 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
378 valid JSON document, a `json.JSONDecodeError` will be raised.
379
380 Read more :ref:`here <json_content>`.
381
382 :returns: The body of the HTTP response as a Python object.
383 """
384 data = self.data.decode("utf-8")
385 return _json.loads(data)
386
387 @property
388 def url(self) -> str | None:
389 raise NotImplementedError()
390
391 @url.setter
392 def url(self, url: str | None) -> None:
393 raise NotImplementedError()
394
395 @property
396 def connection(self) -> BaseHTTPConnection | None:
397 raise NotImplementedError()
398
399 @property
400 def retries(self) -> Retry | None:
401 return self._retries
402
403 @retries.setter
404 def retries(self, retries: Retry | None) -> None:
405 # Override the request_url if retries has a redirect location.
406 if retries is not None and retries.history:
407 self.url = retries.history[-1].redirect_location
408 self._retries = retries
409
410 def stream(
411 self, amt: int | None = 2**16, decode_content: bool | None = None
412 ) -> typing.Iterator[bytes]:
413 raise NotImplementedError()
414
415 def read(
416 self,
417 amt: int | None = None,
418 decode_content: bool | None = None,
419 cache_content: bool = False,
420 ) -> bytes:
421 raise NotImplementedError()
422
423 def read1(
424 self,
425 amt: int | None = None,
426 decode_content: bool | None = None,
427 ) -> bytes:
428 raise NotImplementedError()
429
430 def read_chunked(
431 self,
432 amt: int | None = None,
433 decode_content: bool | None = None,
434 ) -> typing.Iterator[bytes]:
435 raise NotImplementedError()
436
437 def release_conn(self) -> None:
438 raise NotImplementedError()
439
440 def drain_conn(self) -> None:
441 raise NotImplementedError()
442
443 def close(self) -> None:
444 raise NotImplementedError()
445
446 def _init_decoder(self) -> None:
447 """
448 Set-up the _decoder attribute if necessary.
449 """
450 # Note: content-encoding value should be case-insensitive, per RFC 7230
451 # Section 3.2
452 content_encoding = self.headers.get("content-encoding", "").lower()
453 if self._decoder is None:
454 if content_encoding in self.CONTENT_DECODERS:
455 self._decoder = _get_decoder(content_encoding)
456 elif "," in content_encoding:
457 encodings = [
458 e.strip()
459 for e in content_encoding.split(",")
460 if e.strip() in self.CONTENT_DECODERS
461 ]
462 if encodings:
463 self._decoder = _get_decoder(content_encoding)
464
465 def _decode(
466 self, data: bytes, decode_content: bool | None, flush_decoder: bool
467 ) -> bytes:
468 """
469 Decode the data passed in and potentially flush the decoder.
470 """
471 if not decode_content:
472 if self._has_decoded_content:
473 raise RuntimeError(
474 "Calling read(decode_content=False) is not supported after "
475 "read(decode_content=True) was called."
476 )
477 return data
478
479 try:
480 if self._decoder:
481 data = self._decoder.decompress(data)
482 self._has_decoded_content = True
483 except self.DECODER_ERROR_CLASSES as e:
484 content_encoding = self.headers.get("content-encoding", "").lower()
485 raise DecodeError(
486 "Received response with content-encoding: %s, but "
487 "failed to decode it." % content_encoding,
488 e,
489 ) from e
490 if flush_decoder:
491 data += self._flush_decoder()
492
493 return data
494
495 def _flush_decoder(self) -> bytes:
496 """
497 Flushes the decoder. Should only be called if the decoder is actually
498 being used.
499 """
500 if self._decoder:
501 return self._decoder.decompress(b"") + self._decoder.flush()
502 return b""
503
504 # Compatibility methods for `io` module
505 def readinto(self, b: bytearray) -> int:
506 temp = self.read(len(b))
507 if len(temp) == 0:
508 return 0
509 else:
510 b[: len(temp)] = temp
511 return len(temp)
512
513 # Compatibility methods for http.client.HTTPResponse
514 def getheaders(self) -> HTTPHeaderDict:
515 warnings.warn(
516 "HTTPResponse.getheaders() is deprecated and will be removed "
517 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
518 category=DeprecationWarning,
519 stacklevel=2,
520 )
521 return self.headers
522
523 def getheader(self, name: str, default: str | None = None) -> str | None:
524 warnings.warn(
525 "HTTPResponse.getheader() is deprecated and will be removed "
526 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
527 category=DeprecationWarning,
528 stacklevel=2,
529 )
530 return self.headers.get(name, default)
531
532 # Compatibility method for http.cookiejar
533 def info(self) -> HTTPHeaderDict:
534 return self.headers
535
536 def geturl(self) -> str | None:
537 return self.url
538
539
540 class HTTPResponse(BaseHTTPResponse):
541 """
542 HTTP Response container.
543
544 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
545 loaded and decoded on-demand when the ``data`` property is accessed. This
546 class is also compatible with the Python standard library's :mod:`io`
547 module, and can hence be treated as a readable object in the context of that
548 framework.
549
550 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
551
552 :param preload_content:
553 If True, the response's body will be preloaded during construction.
554
555 :param decode_content:
556 If True, will attempt to decode the body based on the
557 'content-encoding' header.
558
559 :param original_response:
560 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
561 object, it's convenient to include the original for debug purposes. It's
562 otherwise unused.
563
564 :param retries:
565 The retries contains the last :class:`~urllib3.util.retry.Retry` that
566 was used during the request.
567
568 :param enforce_content_length:
569 Enforce content length checking. Body returned by server must match
570 value of Content-Length header, if present. Otherwise, raise error.
571 """
572
573 def __init__(
574 self,
575 body: _TYPE_BODY = "",
576 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
577 status: int = 0,
578 version: int = 0,
579 version_string: str = "HTTP/?",
580 reason: str | None = None,
581 preload_content: bool = True,
582 decode_content: bool = True,
583 original_response: _HttplibHTTPResponse | None = None,
584 pool: HTTPConnectionPool | None = None,
585 connection: HTTPConnection | None = None,
586 msg: _HttplibHTTPMessage | None = None,
587 retries: Retry | None = None,
588 enforce_content_length: bool = True,
589 request_method: str | None = None,
590 request_url: str | None = None,
591 auto_close: bool = True,
592 ) -> None:
593 super().__init__(
594 headers=headers,
595 status=status,
596 version=version,
597 version_string=version_string,
598 reason=reason,
599 decode_content=decode_content,
600 request_url=request_url,
601 retries=retries,
602 )
603
604 self.enforce_content_length = enforce_content_length
605 self.auto_close = auto_close
606
607 self._body = None
608 self._fp: _HttplibHTTPResponse | None = None
609 self._original_response = original_response
610 self._fp_bytes_read = 0
611 self.msg = msg
612
613 if body and isinstance(body, (str, bytes)):
614 self._body = body
615
616 self._pool = pool
617 self._connection = connection
618
619 if hasattr(body, "read"):
620 self._fp = body # type: ignore[assignment]
621
622 # Are we using the chunked-style of transfer encoding?
623 self.chunk_left: int | None = None
624
625 # Determine length of response
626 self.length_remaining = self._init_length(request_method)
627
628 # Used to return the correct amount of bytes for partial read()s
629 self._decoded_buffer = BytesQueueBuffer()
630
631 # If requested, preload the body.
632 if preload_content and not self._body:
633 self._body = self.read(decode_content=decode_content)
634
635 def release_conn(self) -> None:
636 if not self._pool or not self._connection:
637 return None
638
639 self._pool._put_conn(self._connection)
640 self._connection = None
641
642 def drain_conn(self) -> None:
643 """
644 Read and discard any remaining HTTP response data in the response connection.
645
646 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
647 """
648 try:
649 self.read()
650 except (HTTPError, OSError, BaseSSLError, HTTPException):
651 pass
652
653 @property
654 def data(self) -> bytes:
655 # For backwards-compat with earlier urllib3 0.4 and earlier.
656 if self._body:
657 return self._body # type: ignore[return-value]
658
659 if self._fp:
660 return self.read(cache_content=True)
661
662 return None # type: ignore[return-value]
663
664 @property
665 def connection(self) -> HTTPConnection | None:
666 return self._connection
667
668 def isclosed(self) -> bool:
669 return is_fp_closed(self._fp)
670
671 def tell(self) -> int:
672 """
673 Obtain the number of bytes pulled over the wire so far. May differ from
674 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
675 if bytes are encoded on the wire (e.g, compressed).
676 """
677 return self._fp_bytes_read
678
679 def _init_length(self, request_method: str | None) -> int | None:
680 """
681 Set initial length value for Response content if available.
682 """
683 length: int | None
684 content_length: str | None = self.headers.get("content-length")
685
686 if content_length is not None:
687 if self.chunked:
688 # This Response will fail with an IncompleteRead if it can't be
689 # received as chunked. This method falls back to attempt reading
690 # the response before raising an exception.
691 log.warning(
692 "Received response with both Content-Length and "
693 "Transfer-Encoding set. This is expressly forbidden "
694 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
695 "attempting to process response as Transfer-Encoding: "
696 "chunked."
697 )
698 return None
699
700 try:
701 # RFC 7230 section 3.3.2 specifies multiple content lengths can
702 # be sent in a single Content-Length header
703 # (e.g. Content-Length: 42, 42). This line ensures the values
704 # are all valid ints and that as long as the `set` length is 1,
705 # all values are the same. Otherwise, the header is invalid.
706 lengths = {int(val) for val in content_length.split(",")}
707 if len(lengths) > 1:
708 raise InvalidHeader(
709 "Content-Length contained multiple "
710 "unmatching values (%s)" % content_length
711 )
712 length = lengths.pop()
713 except ValueError:
714 length = None
715 else:
716 if length < 0:
717 length = None
718
719 else: # if content_length is None
720 length = None
721
722 # Convert status to int for comparison
723 # In some cases, httplib returns a status of "_UNKNOWN"
724 try:
725 status = int(self.status)
726 except ValueError:
727 status = 0
728
729 # Check for responses that shouldn't include a body
730 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
731 length = 0
732
733 return length
734
735 @contextmanager
736 def _error_catcher(self) -> typing.Generator[None, None, None]:
737 """
738 Catch low-level python exceptions, instead re-raising urllib3
739 variants, so that low-level exceptions are not leaked in the
740 high-level api.
741
742 On exit, release the connection back to the pool.
743 """
744 clean_exit = False
745
746 try:
747 try:
748 yield
749
750 except SocketTimeout as e:
751 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
752 # there is yet no clean way to get at it from this context.
753 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
754
755 except BaseSSLError as e:
756 # FIXME: Is there a better way to differentiate between SSLErrors?
757 if "read operation timed out" not in str(e):
758 # SSL errors related to framing/MAC get wrapped and reraised here
759 raise SSLError(e) from e
760
761 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
762
763 except IncompleteRead as e:
764 if (
765 e.expected is not None
766 and e.partial is not None
767 and e.expected == -e.partial
768 ):
769 arg = "Response may not contain content."
770 else:
771 arg = f"Connection broken: {e!r}"
772 raise ProtocolError(arg, e) from e
773
774 except (HTTPException, OSError) as e:
775 raise ProtocolError(f"Connection broken: {e!r}", e) from e
776
777 # If no exception is thrown, we should avoid cleaning up
778 # unnecessarily.
779 clean_exit = True
780 finally:
781 # If we didn't terminate cleanly, we need to throw away our
782 # connection.
783 if not clean_exit:
784 # The response may not be closed but we're not going to use it
785 # anymore so close it now to ensure that the connection is
786 # released back to the pool.
787 if self._original_response:
788 self._original_response.close()
789
790 # Closing the response may not actually be sufficient to close
791 # everything, so if we have a hold of the connection close that
792 # too.
793 if self._connection:
794 self._connection.close()
795
796 # If we hold the original response but it's closed now, we should
797 # return the connection back to the pool.
798 if self._original_response and self._original_response.isclosed():
799 self.release_conn()
800
801 def _fp_read(
802 self,
803 amt: int | None = None,
804 *,
805 read1: bool = False,
806 ) -> bytes:
807 """
808 Read a response with the thought that reading the number of bytes
809 larger than can fit in a 32-bit int at a time via SSL in some
810 known cases leads to an overflow error that has to be prevented
811 if `amt` or `self.length_remaining` indicate that a problem may
812 happen.
813
814 The known cases:
815 * 3.8 <= CPython < 3.9.7 because of a bug
816 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
817 * urllib3 injected with pyOpenSSL-backed SSL-support.
818 * CPython < 3.10 only when `amt` does not fit 32-bit int.
819 """
820 assert self._fp
821 c_int_max = 2**31 - 1
822 if (
823 (amt and amt > c_int_max)
824 or (
825 amt is None
826 and self.length_remaining
827 and self.length_remaining > c_int_max
828 )
829 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):
830 if read1:
831 return self._fp.read1(c_int_max)
832 buffer = io.BytesIO()
833 # Besides `max_chunk_amt` being a maximum chunk size, it
834 # affects memory overhead of reading a response by this
835 # method in CPython.
836 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
837 # chunk size that does not lead to an overflow error, but
838 # 256 MiB is a compromise.
839 max_chunk_amt = 2**28
840 while amt is None or amt != 0:
841 if amt is not None:
842 chunk_amt = min(amt, max_chunk_amt)
843 amt -= chunk_amt
844 else:
845 chunk_amt = max_chunk_amt
846 data = self._fp.read(chunk_amt)
847 if not data:
848 break
849 buffer.write(data)
850 del data # to reduce peak memory usage by `max_chunk_amt`.
851 return buffer.getvalue()
852 elif read1:
853 return self._fp.read1(amt) if amt is not None else self._fp.read1()
854 else:
855 # StringIO doesn't like amt=None
856 return self._fp.read(amt) if amt is not None else self._fp.read()
857
858 def _raw_read(
859 self,
860 amt: int | None = None,
861 *,
862 read1: bool = False,
863 ) -> bytes:
864 """
865 Reads `amt` of bytes from the socket.
866 """
867 if self._fp is None:
868 return None # type: ignore[return-value]
869
870 fp_closed = getattr(self._fp, "closed", False)
871
872 with self._error_catcher():
873 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
874 if amt is not None and amt != 0 and not data:
875 # Platform-specific: Buggy versions of Python.
876 # Close the connection when no data is returned
877 #
878 # This is redundant to what httplib/http.client _should_
879 # already do. However, versions of python released before
880 # December 15, 2012 (http://bugs.python.org/issue16298) do
881 # not properly close the connection in all cases. There is
882 # no harm in redundantly calling close.
883 self._fp.close()
884 if (
885 self.enforce_content_length
886 and self.length_remaining is not None
887 and self.length_remaining != 0
888 ):
889 # This is an edge case that httplib failed to cover due
890 # to concerns of backward compatibility. We're
891 # addressing it here to make sure IncompleteRead is
892 # raised during streaming, so all calls with incorrect
893 # Content-Length are caught.
894 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
895 elif read1 and (
896 (amt != 0 and not data) or self.length_remaining == len(data)
897 ):
898 # All data has been read, but `self._fp.read1` in
899 # CPython 3.12 and older doesn't always close
900 # `http.client.HTTPResponse`, so we close it here.
901 # See https://github.com/python/cpython/issues/113199
902 self._fp.close()
903
904 if data:
905 self._fp_bytes_read += len(data)
906 if self.length_remaining is not None:
907 self.length_remaining -= len(data)
908 return data
909
910 def read(
911 self,
912 amt: int | None = None,
913 decode_content: bool | None = None,
914 cache_content: bool = False,
915 ) -> bytes:
916 """
917 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
918 parameters: ``decode_content`` and ``cache_content``.
919
920 :param amt:
921 How much of the content to read. If specified, caching is skipped
922 because it doesn't make sense to cache partial content as the full
923 response.
924
925 :param decode_content:
926 If True, will attempt to decode the body based on the
927 'content-encoding' header.
928
929 :param cache_content:
930 If True, will save the returned data such that the same result is
931 returned despite of the state of the underlying file object. This
932 is useful if you want the ``.data`` property to continue working
933 after having ``.read()`` the file object. (Overridden if ``amt`` is
934 set.)
935 """
936 self._init_decoder()
937 if decode_content is None:
938 decode_content = self.decode_content
939
940 if amt and amt < 0:
941 # Negative numbers and `None` should be treated the same.
942 amt = None
943 elif amt is not None:
944 cache_content = False
945
946 if len(self._decoded_buffer) >= amt:
947 return self._decoded_buffer.get(amt)
948
949 data = self._raw_read(amt)
950
951 flush_decoder = amt is None or (amt != 0 and not data)
952
953 if not data and len(self._decoded_buffer) == 0:
954 return data
955
956 if amt is None:
957 data = self._decode(data, decode_content, flush_decoder)
958 if cache_content:
959 self._body = data
960 else:
961 # do not waste memory on buffer when not decoding
962 if not decode_content:
963 if self._has_decoded_content:
964 raise RuntimeError(
965 "Calling read(decode_content=False) is not supported after "
966 "read(decode_content=True) was called."
967 )
968 return data
969
970 decoded_data = self._decode(data, decode_content, flush_decoder)
971 self._decoded_buffer.put(decoded_data)
972
973 while len(self._decoded_buffer) < amt and data:
974 # TODO make sure to initially read enough data to get past the headers
975 # For example, the GZ file header takes 10 bytes, we don't want to read
976 # it one byte at a time
977 data = self._raw_read(amt)
978 decoded_data = self._decode(data, decode_content, flush_decoder)
979 self._decoded_buffer.put(decoded_data)
980 data = self._decoded_buffer.get(amt)
981
982 return data
983
984 def read1(
985 self,
986 amt: int | None = None,
987 decode_content: bool | None = None,
988 ) -> bytes:
989 """
990 Similar to ``http.client.HTTPResponse.read1`` and documented
991 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
992 ``decode_content``.
993
994 :param amt:
995 How much of the content to read.
996
997 :param decode_content:
998 If True, will attempt to decode the body based on the
999 'content-encoding' header.
1000 """
1001 if decode_content is None:
1002 decode_content = self.decode_content
1003 if amt and amt < 0:
1004 # Negative numbers and `None` should be treated the same.
1005 amt = None
1006 # try and respond without going to the network
1007 if self._has_decoded_content:
1008 if not decode_content:
1009 raise RuntimeError(
1010 "Calling read1(decode_content=False) is not supported after "
1011 "read1(decode_content=True) was called."
1012 )
1013 if len(self._decoded_buffer) > 0:
1014 if amt is None:
1015 return self._decoded_buffer.get_all()
1016 return self._decoded_buffer.get(amt)
1017 if amt == 0:
1018 return b""
1019
1020 # FIXME, this method's type doesn't say returning None is possible
1021 data = self._raw_read(amt, read1=True)
1022 if not decode_content or data is None:
1023 return data
1024
1025 self._init_decoder()
1026 while True:
1027 flush_decoder = not data
1028 decoded_data = self._decode(data, decode_content, flush_decoder)
1029 self._decoded_buffer.put(decoded_data)
1030 if decoded_data or flush_decoder:
1031 break
1032 data = self._raw_read(8192, read1=True)
1033
1034 if amt is None:
1035 return self._decoded_buffer.get_all()
1036 return self._decoded_buffer.get(amt)
1037
1038 def stream(
1039 self, amt: int | None = 2**16, decode_content: bool | None = None
1040 ) -> typing.Generator[bytes, None, None]:
1041 """
1042 A generator wrapper for the read() method. A call will block until
1043 ``amt`` bytes have been read from the connection or until the
1044 connection is closed.
1045
1046 :param amt:
1047 How much of the content to read. The generator will return up to
1048 much data per iteration, but may return less. This is particularly
1049 likely when using compressed data. However, the empty string will
1050 never be returned.
1051
1052 :param decode_content:
1053 If True, will attempt to decode the body based on the
1054 'content-encoding' header.
1055 """
1056 if self.chunked and self.supports_chunked_reads():
1057 yield from self.read_chunked(amt, decode_content=decode_content)
1058 else:
1059 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
1060 data = self.read(amt=amt, decode_content=decode_content)
1061
1062 if data:
1063 yield data
1064
1065 # Overrides from io.IOBase
1066 def readable(self) -> bool:
1067 return True
1068
1069 def close(self) -> None:
1070 if not self.closed and self._fp:
1071 self._fp.close()
1072
1073 if self._connection:
1074 self._connection.close()
1075
1076 if not self.auto_close:
1077 io.IOBase.close(self)
1078
1079 @property
1080 def closed(self) -> bool:
1081 if not self.auto_close:
1082 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1083 elif self._fp is None:
1084 return True
1085 elif hasattr(self._fp, "isclosed"):
1086 return self._fp.isclosed()
1087 elif hasattr(self._fp, "closed"):
1088 return self._fp.closed
1089 else:
1090 return True
1091
1092 def fileno(self) -> int:
1093 if self._fp is None:
1094 raise OSError("HTTPResponse has no file to get a fileno from")
1095 elif hasattr(self._fp, "fileno"):
1096 return self._fp.fileno()
1097 else:
1098 raise OSError(
1099 "The file-like object this HTTPResponse is wrapped "
1100 "around has no file descriptor"
1101 )
1102
1103 def flush(self) -> None:
1104 if (
1105 self._fp is not None
1106 and hasattr(self._fp, "flush")
1107 and not getattr(self._fp, "closed", False)
1108 ):
1109 return self._fp.flush()
1110
1111 def supports_chunked_reads(self) -> bool:
1112 """
1113 Checks if the underlying file-like object looks like a
1114 :class:`http.client.HTTPResponse` object. We do this by testing for
1115 the fp attribute. If it is present we assume it returns raw chunks as
1116 processed by read_chunked().
1117 """
1118 return hasattr(self._fp, "fp")
1119
1120 def _update_chunk_length(self) -> None:
1121 # First, we'll figure out length of a chunk and then
1122 # we'll try to read it from socket.
1123 if self.chunk_left is not None:
1124 return None
1125 line = self._fp.fp.readline() # type: ignore[union-attr]
1126 line = line.split(b";", 1)[0]
1127 try:
1128 self.chunk_left = int(line, 16)
1129 except ValueError:
1130 self.close()
1131 if line:
1132 # Invalid chunked protocol response, abort.
1133 raise InvalidChunkLength(self, line) from None
1134 else:
1135 # Truncated at start of next chunk
1136 raise ProtocolError("Response ended prematurely") from None
1137
1138 def _handle_chunk(self, amt: int | None) -> bytes:
1139 returned_chunk = None
1140 if amt is None:
1141 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1142 returned_chunk = chunk
1143 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1144 self.chunk_left = None
1145 elif self.chunk_left is not None and amt < self.chunk_left:
1146 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1147 self.chunk_left = self.chunk_left - amt
1148 returned_chunk = value
1149 elif amt == self.chunk_left:
1150 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1151 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1152 self.chunk_left = None
1153 returned_chunk = value
1154 else: # amt > self.chunk_left
1155 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1156 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1157 self.chunk_left = None
1158 return returned_chunk # type: ignore[no-any-return]
1159
1160 def read_chunked(
1161 self, amt: int | None = None, decode_content: bool | None = None
1162 ) -> typing.Generator[bytes, None, None]:
1163 """
1164 Similar to :meth:`HTTPResponse.read`, but with an additional
1165 parameter: ``decode_content``.
1166
1167 :param amt:
1168 How much of the content to read. If specified, caching is skipped
1169 because it doesn't make sense to cache partial content as the full
1170 response.
1171
1172 :param decode_content:
1173 If True, will attempt to decode the body based on the
1174 'content-encoding' header.
1175 """
1176 self._init_decoder()
1177 # FIXME: Rewrite this method and make it a class with a better structured logic.
1178 if not self.chunked:
1179 raise ResponseNotChunked(
1180 "Response is not chunked. "
1181 "Header 'transfer-encoding: chunked' is missing."
1182 )
1183 if not self.supports_chunked_reads():
1184 raise BodyNotHttplibCompatible(
1185 "Body should be http.client.HTTPResponse like. "
1186 "It should have have an fp attribute which returns raw chunks."
1187 )
1188
1189 with self._error_catcher():
1190 # Don't bother reading the body of a HEAD request.
1191 if self._original_response and is_response_to_head(self._original_response):
1192 self._original_response.close()
1193 return None
1194
1195 # If a response is already read and closed
1196 # then return immediately.
1197 if self._fp.fp is None: # type: ignore[union-attr]
1198 return None
1199
1200 if amt and amt < 0:
1201 # Negative numbers and `None` should be treated the same,
1202 # but httplib handles only `None` correctly.
1203 amt = None
1204
1205 while True:
1206 self._update_chunk_length()
1207 if self.chunk_left == 0:
1208 break
1209 chunk = self._handle_chunk(amt)
1210 decoded = self._decode(
1211 chunk, decode_content=decode_content, flush_decoder=False
1212 )
1213 if decoded:
1214 yield decoded
1215
1216 if decode_content:
1217 # On CPython and PyPy, we should never need to flush the
1218 # decoder. However, on Jython we *might* need to, so
1219 # lets defensively do it anyway.
1220 decoded = self._flush_decoder()
1221 if decoded: # Platform-specific: Jython.
1222 yield decoded
1223
1224 # Chunk content ends with \r\n: discard it.
1225 while self._fp is not None:
1226 line = self._fp.fp.readline()
1227 if not line:
1228 # Some sites may not end with '\r\n'.
1229 break
1230 if line == b"\r\n":
1231 break
1232
1233 # We read everything; close the "file".
1234 if self._original_response:
1235 self._original_response.close()
1236
1237 @property
1238 def url(self) -> str | None:
1239 """
1240 Returns the URL that was the source of this response.
1241 If the request that generated this response redirected, this method
1242 will return the final redirect location.
1243 """
1244 return self._request_url
1245
1246 @url.setter
1247 def url(self, url: str) -> None:
1248 self._request_url = url
1249
1250 def __iter__(self) -> typing.Iterator[bytes]:
1251 buffer: list[bytes] = []
1252 for chunk in self.stream(decode_content=True):
1253 if b"\n" in chunk:
1254 chunks = chunk.split(b"\n")
1255 yield b"".join(buffer) + chunks[0] + b"\n"
1256 for x in chunks[1:-1]:
1257 yield x + b"\n"
1258 if chunks[-1]:
1259 buffer = [chunks[-1]]
1260 else:
1261 buffer = []
1262 else:
1263 buffer.append(chunk)
1264 if buffer:
1265 yield b"".join(buffer)