From 46d0708b2cb5bc8c132f43ef3d3f6b2b78106569 Mon Sep 17 00:00:00 2001 From: Zo Bot Date: Mon, 29 Jun 2026 19:31:57 +0000 Subject: [PATCH] skip Content-Length size check when Content-Encoding indicates compression MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a server returns Content-Encoding: gzip despite an Accept-Encoding: identity request, requests still auto-decompresses the body in iter_content. Per RFC 9110 §8.6 the Content-Length header reflects the *encoded* payload size, but Downloader.chunk_downloaded counts the *decoded* bytes yielded by requests. The Downloader.interrupted check then compared a compressed size to a decompressed byte count and erroneously flagged every compressed download as incomplete. Resolve this by treating any non-identity Content-Encoding the same as a missing Content-Length: drop total_size so the interrupted check short-circuits and the progress display falls back to a spinner. Tests cover gzip, deflate, and br encodings plus a tolerance for casing and surrounding whitespace in the header value. --- httpie/downloads.py | 20 +++++++++++++----- tests/test_downloads.py | 47 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/httpie/downloads.py b/httpie/downloads.py index 9c4b895e6f..90a733cafc 100644 --- a/httpie/downloads.py +++ b/httpie/downloads.py @@ -216,11 +216,21 @@ def start( """ assert not self.status.time_started - # FIXME: some servers still might sent Content-Encoding: gzip - # - try: - total_size = int(final_response.headers['Content-Length']) - except (KeyError, ValueError, TypeError): + # If the server applied a content coding (e.g. ``gzip``), then + # ``requests`` auto-decompresses the body in ``iter_content`` while + # ``Content-Length`` still reflects the *encoded* size per + # RFC 9110 §8.6. Comparing those two numbers would always mark the + # download as incomplete, so skip the size tracking in that case. + # See . + content_encoding = ( + final_response.headers.get('Content-Encoding') or 'identity' + ).strip().lower() + if content_encoding == 'identity': + try: + total_size = int(final_response.headers['Content-Length']) + except (KeyError, ValueError, TypeError): + total_size = None + else: total_size = None if not self._output_file: diff --git a/tests/test_downloads.py b/tests/test_downloads.py index b646a0e6a5..0337761329 100644 --- a/tests/test_downloads.py +++ b/tests/test_downloads.py @@ -147,6 +147,53 @@ def test_download_with_Content_Length(self, mock_env, httpbin_both): downloader.finish() assert not downloader.interrupted + def test_download_with_Content_Encoding_skips_size_check(self, mock_env, httpbin_both): + # When the server applies a content coding, ``requests`` auto-decompresses + # the body but Content-Length still reflects the encoded size per + # RFC 9110 §8.6. The downloader must skip the size comparison in that + # case so a fully-received, encoded payload isn't reported as an + # "Incomplete download". + # + with open(os.devnull, 'w') as devnull: + for content_encoding in ('gzip', 'br', 'deflate'): + downloader = Downloader(mock_env, output_file=devnull) + downloader.start( + initial_url='/', + final_response=Response( + url=httpbin_both.url + '/', + headers={ + 'Content-Length': 10, + 'Content-Encoding': content_encoding, + }, + ), + ) + # Decompressed stream ends up much larger than the encoded size. + downloader.chunk_downloaded(b'1234567890' * 1000) + downloader.finish() + assert not downloader.interrupted, ( + f'Content-Encoding={content_encoding!r} should bypass ' + f'the size comparison; got interrupted=True.' + ) + + def test_download_with_Content_Encoding_uppercase_or_padded(self, mock_env, httpbin_both): + # Header values come back with arbitrary casing and surrounding whitespace; + # the downloader must recognise those as content codings too. + with open(os.devnull, 'w') as devnull: + downloader = Downloader(mock_env, output_file=devnull) + downloader.start( + initial_url='/', + final_response=Response( + url=httpbin_both.url + '/', + headers={ + 'Content-Length': 10, + 'Content-Encoding': ' GZIP ', + }, + ), + ) + downloader.chunk_downloaded(b'1234567890' * 1000) + downloader.finish() + assert not downloader.interrupted + def test_download_no_Content_Length(self, mock_env, httpbin_both): with open(os.devnull, 'w') as devnull: downloader = Downloader(mock_env, output_file=devnull)