From 6cd88202d8150128a4c366db85e3cec993ab4cd5 Mon Sep 17 00:00:00 2001 From: Ryan Costa Date: Mon, 20 Apr 2026 11:39:48 -0300 Subject: [PATCH] fix(storage): remove partial file on any download_to_filename error Blob.download_to_filename() opens the destination file for writing before the network request begins. If the download fails, the method raises an exception but previously left an empty or partially-written file on disk for most error types (only DataCorruption and NotFound were cleaned up). This extends the cleanup to catch all exceptions, ensuring the partially-written file is removed before re-raising. This prevents callers from seeing a corrupt/empty file after network errors, timeouts, or any other unexpected failures. The inner try/except OSError is a safety guard for the edge case where the file was never successfully created. --- .../google/cloud/storage/blob.py | 26 ++++-- .../tests/unit/test_blob.py | 85 +++++++++++++++++++ 2 files changed, 104 insertions(+), 7 deletions(-) diff --git a/packages/google-cloud-storage/google/cloud/storage/blob.py b/packages/google-cloud-storage/google/cloud/storage/blob.py index c6fbcf4c12b7..b534dfd0ffdb 100644 --- a/packages/google-cloud-storage/google/cloud/storage/blob.py +++ b/packages/google-cloud-storage/google/cloud/storage/blob.py @@ -1271,18 +1271,23 @@ def _handle_filename_and_download(self, filename, *args, **kwargs): For *args and **kwargs, refer to the documentation for download_to_filename() for more information. """ - try: - with open(filename, "wb") as file_obj: + with open(filename, "wb") as file_obj: + try: self._prep_and_do_download( file_obj, *args, **kwargs, ) - - except (DataCorruption, NotFound): - # Delete the corrupt or empty downloaded file. - os.remove(filename) - raise + except Exception: + # Remove the (empty or partial) file before re-raising so + # callers never see a corrupt destination file. This extends + # the existing DataCorruption/NotFound cleanup to cover network + # errors, timeouts, and any other unexpected failures. + try: + os.remove(filename) + except OSError: + pass # file may not exist if open() itself failed + raise updated = self.updated if updated is not None: @@ -1312,6 +1317,13 @@ def download_to_filename( If :attr:`user_project` is set on the bucket, bills the API request to that project. + .. note:: + If the download fails for any reason (network error, timeout, + :exc:`~google.cloud.exceptions.NotFound`, etc.), any partially + written destination file is removed before re-raising the + exception, so the filesystem is never left with an empty or + incomplete file. + See a [code sample](https://cloud.google.com/storage/docs/samples/storage-download-encrypted-file#storage_download_encrypted_file-python) to download a file with a [`customer-supplied encryption key`](https://cloud.google.com/storage/docs/encryption#customer-supplied). diff --git a/packages/google-cloud-storage/tests/unit/test_blob.py b/packages/google-cloud-storage/tests/unit/test_blob.py index a218f011dd17..4ce6b8b4689d 100644 --- a/packages/google-cloud-storage/tests/unit/test_blob.py +++ b/packages/google-cloud-storage/tests/unit/test_blob.py @@ -1962,6 +1962,91 @@ def test_download_to_filename_notfound(self): stream = blob._prep_and_do_download.mock_calls[0].args[0] self.assertEqual(stream.name, filename) + def test_download_to_filename_cleans_up_on_connection_error(self): + import requests.exceptions + + blob_name = "blob-name" + client = self._make_client() + bucket = _Bucket(client) + blob = self._make_one(blob_name, bucket=bucket) + + with mock.patch.object(blob, "_prep_and_do_download"): + blob._prep_and_do_download.side_effect = requests.exceptions.ConnectionError( + "Network is unreachable" + ) + + filehandle, filename = tempfile.mkstemp() + os.close(filehandle) + self.assertTrue(os.path.exists(filename)) + + with self.assertRaises(requests.exceptions.ConnectionError): + blob.download_to_filename(filename) + + self.assertFalse(os.path.exists(filename)) + + def test_download_to_filename_cleans_up_on_timeout(self): + import requests.exceptions + + blob_name = "blob-name" + client = self._make_client() + bucket = _Bucket(client) + blob = self._make_one(blob_name, bucket=bucket) + + with mock.patch.object(blob, "_prep_and_do_download"): + blob._prep_and_do_download.side_effect = requests.exceptions.Timeout( + "Timeout of 120.0s exceeded" + ) + + filehandle, filename = tempfile.mkstemp() + os.close(filehandle) + self.assertTrue(os.path.exists(filename)) + + with self.assertRaises(requests.exceptions.Timeout): + blob.download_to_filename(filename) + + self.assertFalse(os.path.exists(filename)) + + def test_download_to_filename_cleans_up_on_generic_exception(self): + blob_name = "blob-name" + client = self._make_client() + bucket = _Bucket(client) + blob = self._make_one(blob_name, bucket=bucket) + + with mock.patch.object(blob, "_prep_and_do_download"): + blob._prep_and_do_download.side_effect = RuntimeError("unexpected failure") + + filehandle, filename = tempfile.mkstemp() + os.close(filehandle) + self.assertTrue(os.path.exists(filename)) + + with self.assertRaises(RuntimeError): + blob.download_to_filename(filename) + + self.assertFalse(os.path.exists(filename)) + + def test_download_to_filename_keeps_file_on_success(self): + blob_name = "blob-name" + client = self._make_client() + bucket = _Bucket(client) + blob = self._make_one(blob_name, bucket=bucket) + + def _write_data(file_obj, **kwargs): + file_obj.write(b"hello world") + + with mock.patch.object( + blob, "_prep_and_do_download", side_effect=_write_data + ): + filehandle, filename = tempfile.mkstemp() + os.close(filehandle) + try: + blob.download_to_filename(filename) + self.assertTrue(os.path.exists(filename)) + with open(filename, "rb") as f: + self.assertEqual(f.read(), b"hello world") + finally: + if os.path.exists(filename): + os.remove(filename) + def _download_as_bytes_helper( self, raw_download, timeout=None, single_shot_download=False, **extra_kwargs ):