Skip to content

Commit 5e0a0f5

Browse files
committed
Kept the changes and the test class for download_file_from_link function. Move the test class to appropriate file. Note, that the same changes were done in the PR geometric-intelligence#241 (they are duplicated here, as the script wouldn't run otherwise and would require additional adaptation to the old download_file_from_link function.
1 parent f70fa7a commit 5e0a0f5

2 files changed

Lines changed: 335 additions & 342 deletions

File tree

test/data/utils/test_io_utils.py

Lines changed: 335 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
"""Tests for the io_utils module."""
22

3+
import os
4+
import tempfile
5+
from pathlib import Path
6+
from unittest.mock import MagicMock, patch
37
import pytest
48

59
from topobench.data.utils.io_utils import *
@@ -20,3 +24,334 @@ def test_get_file_id_from_url():
2024

2125
with pytest.raises(ValueError):
2226
get_file_id_from_url(url_wrong)
27+
28+
29+
class TestDownloadFileFromLink:
30+
"""Test suite for download_file_from_link function."""
31+
32+
@pytest.fixture
33+
def temp_dir(self):
34+
"""Create temporary directory for test outputs.
35+
36+
Returns
37+
-------
38+
str
39+
Path to temporary directory.
40+
"""
41+
with tempfile.TemporaryDirectory() as tmpdir:
42+
yield tmpdir
43+
44+
@pytest.fixture
45+
def mock_response(self):
46+
"""Create mock response object.
47+
48+
Returns
49+
-------
50+
MagicMock
51+
Mock response object with status code and headers.
52+
"""
53+
response = MagicMock()
54+
response.status_code = 200
55+
response.headers = {"content-length": "5242880"} # 5 MB
56+
response.elapsed.total_seconds.return_value = 1.0
57+
return response
58+
59+
def test_download_success_with_progress(self, temp_dir, mock_response):
60+
"""Test successful download with progress reporting.
61+
62+
Parameters
63+
----------
64+
temp_dir : str
65+
Temporary directory path.
66+
mock_response : MagicMock
67+
Mock response object.
68+
"""
69+
# Setup mock chunks (5MB total in 1MB chunks)
70+
chunk_data = [b"x" * (1024 * 1024) for _ in range(5)]
71+
mock_response.iter_content.return_value = chunk_data
72+
73+
with patch("requests.get", return_value=mock_response):
74+
download_file_from_link(
75+
file_link="http://example.com/dataset.tar.gz",
76+
path_to_save=temp_dir,
77+
dataset_name="test_dataset",
78+
file_format="tar.gz",
79+
timeout=60,
80+
retries=1,
81+
)
82+
83+
# Verify file was created and has correct size
84+
output_file = os.path.join(temp_dir, "test_dataset.tar.gz")
85+
assert os.path.exists(output_file)
86+
assert os.path.getsize(output_file) == 5 * 1024 * 1024
87+
88+
def test_download_creates_directory_if_not_exists(self, temp_dir):
89+
"""Test that download creates directory structure.
90+
91+
Parameters
92+
----------
93+
temp_dir : str
94+
Temporary directory path.
95+
"""
96+
nested_dir = os.path.join(temp_dir, "nested", "path")
97+
98+
mock_response = MagicMock()
99+
mock_response.status_code = 200
100+
mock_response.headers = {"content-length": "1024"}
101+
mock_response.elapsed.total_seconds.return_value = 0.5
102+
mock_response.iter_content.return_value = [b"x" * 1024]
103+
104+
with patch("requests.get", return_value=mock_response):
105+
download_file_from_link(
106+
file_link="http://example.com/dataset.tar.gz",
107+
path_to_save=nested_dir,
108+
dataset_name="test_dataset",
109+
file_format="tar.gz",
110+
timeout=60,
111+
retries=1,
112+
)
113+
114+
output_file = os.path.join(nested_dir, "test_dataset.tar.gz")
115+
assert os.path.exists(output_file)
116+
assert os.path.isdir(nested_dir)
117+
118+
def test_download_http_error(self, temp_dir):
119+
"""Test handling of HTTP error responses.
120+
121+
Parameters
122+
----------
123+
temp_dir : str
124+
Temporary directory path.
125+
"""
126+
mock_response = MagicMock()
127+
mock_response.status_code = 404
128+
129+
with patch("requests.get", return_value=mock_response):
130+
download_file_from_link(
131+
file_link="http://example.com/nonexistent.tar.gz",
132+
path_to_save=temp_dir,
133+
dataset_name="test_dataset",
134+
file_format="tar.gz",
135+
timeout=60,
136+
retries=1,
137+
)
138+
139+
# File should not be created on HTTP error
140+
output_file = os.path.join(temp_dir, "test_dataset.tar.gz")
141+
assert not os.path.exists(output_file)
142+
143+
def test_download_timeout_retry(self, temp_dir):
144+
"""Test retry logic on timeout.
145+
146+
Parameters
147+
----------
148+
temp_dir : str
149+
Temporary directory path.
150+
"""
151+
import requests
152+
153+
with patch("requests.get") as mock_get:
154+
# First call times out, second succeeds
155+
mock_response_success = MagicMock()
156+
mock_response_success.status_code = 200
157+
mock_response_success.headers = {"content-length": "1024"}
158+
mock_response_success.elapsed.total_seconds.return_value = 0.5
159+
mock_response_success.iter_content.return_value = [b"x" * 1024]
160+
161+
mock_get.side_effect = [
162+
requests.exceptions.Timeout("Connection timed out"),
163+
mock_response_success,
164+
]
165+
166+
with patch("time.sleep"): # Mock sleep to speed up test
167+
download_file_from_link(
168+
file_link="http://example.com/dataset.tar.gz",
169+
path_to_save=temp_dir,
170+
dataset_name="test_dataset",
171+
file_format="tar.gz",
172+
timeout=60,
173+
retries=3,
174+
)
175+
176+
# File should be created on successful retry
177+
output_file = os.path.join(temp_dir, "test_dataset.tar.gz")
178+
assert os.path.exists(output_file)
179+
assert mock_get.call_count == 2
180+
181+
def test_download_exhausts_retries(self, temp_dir):
182+
"""Test that exception is raised after all retries exhausted.
183+
184+
Parameters
185+
----------
186+
temp_dir : str
187+
Temporary directory path.
188+
"""
189+
import requests
190+
191+
with patch("requests.get") as mock_get:
192+
mock_get.side_effect = requests.exceptions.Timeout(
193+
"Connection timed out"
194+
)
195+
196+
with patch("time.sleep"):
197+
with pytest.raises(requests.exceptions.Timeout):
198+
download_file_from_link(
199+
file_link="http://example.com/dataset.tar.gz",
200+
path_to_save=temp_dir,
201+
dataset_name="test_dataset",
202+
file_format="tar.gz",
203+
timeout=60,
204+
retries=2,
205+
)
206+
207+
# Verify retries were attempted
208+
assert mock_get.call_count == 2
209+
210+
def test_download_with_different_formats(self, temp_dir, mock_response):
211+
"""Test download with different file formats.
212+
213+
Parameters
214+
----------
215+
temp_dir : str
216+
Temporary directory path.
217+
mock_response : MagicMock
218+
Mock response object.
219+
"""
220+
mock_response.iter_content.return_value = [b"test content"]
221+
222+
formats = ["zip", "tar", "tar.gz"]
223+
224+
with patch("requests.get", return_value=mock_response):
225+
for fmt in formats:
226+
download_file_from_link(
227+
file_link="http://example.com/dataset",
228+
path_to_save=temp_dir,
229+
dataset_name=f"test_dataset_{fmt.replace('.', '_')}",
230+
file_format=fmt,
231+
timeout=60,
232+
retries=1,
233+
)
234+
235+
# Verify all files were created with correct extensions
236+
for fmt in formats:
237+
output_file = os.path.join(
238+
temp_dir, f"test_dataset_{fmt.replace('.', '_')}.{fmt}"
239+
)
240+
assert os.path.exists(output_file)
241+
242+
def test_download_empty_chunks(self, temp_dir):
243+
"""Test handling of empty chunks in response.
244+
245+
Parameters
246+
----------
247+
temp_dir : str
248+
Temporary directory path.
249+
"""
250+
mock_response = MagicMock()
251+
mock_response.status_code = 200
252+
mock_response.headers = {"content-length": "1024"}
253+
mock_response.elapsed.total_seconds.return_value = 1.0
254+
# Include empty chunks (should be skipped)
255+
mock_response.iter_content.return_value = [
256+
b"x" * 512,
257+
b"", # Empty chunk
258+
b"y" * 512,
259+
b"", # Another empty chunk
260+
]
261+
262+
with patch("requests.get", return_value=mock_response):
263+
download_file_from_link(
264+
file_link="http://example.com/dataset.tar.gz",
265+
path_to_save=temp_dir,
266+
dataset_name="test_dataset",
267+
file_format="tar.gz",
268+
timeout=60,
269+
retries=1,
270+
)
271+
272+
# File should contain only non-empty chunks
273+
output_file = os.path.join(temp_dir, "test_dataset.tar.gz")
274+
assert os.path.exists(output_file)
275+
assert os.path.getsize(output_file) == 1024
276+
277+
def test_download_unknown_size(self, temp_dir):
278+
"""Test download when content-length header is missing.
279+
280+
Parameters
281+
----------
282+
temp_dir : str
283+
Temporary directory path.
284+
"""
285+
mock_response = MagicMock()
286+
mock_response.status_code = 200
287+
mock_response.headers = {} # No content-length header
288+
mock_response.elapsed.total_seconds.return_value = 0.5
289+
mock_response.iter_content.return_value = [b"x" * 1024]
290+
291+
with patch("requests.get", return_value=mock_response):
292+
download_file_from_link(
293+
file_link="http://example.com/dataset.tar.gz",
294+
path_to_save=temp_dir,
295+
dataset_name="test_dataset",
296+
file_format="tar.gz",
297+
timeout=60,
298+
retries=1,
299+
)
300+
301+
output_file = os.path.join(temp_dir, "test_dataset.tar.gz")
302+
assert os.path.exists(output_file)
303+
assert os.path.getsize(output_file) == 1024
304+
305+
def test_download_ssl_verification_disabled(self, temp_dir, mock_response):
306+
"""Test that SSL verification can be disabled.
307+
308+
Parameters
309+
----------
310+
temp_dir : str
311+
Temporary directory path.
312+
mock_response : MagicMock
313+
Mock response object.
314+
"""
315+
mock_response.iter_content.return_value = [b"test content"]
316+
317+
with patch("requests.get", return_value=mock_response) as mock_get:
318+
download_file_from_link(
319+
file_link="https://example.com/dataset.tar.gz",
320+
path_to_save=temp_dir,
321+
dataset_name="test_dataset",
322+
file_format="tar.gz",
323+
verify=False,
324+
timeout=60,
325+
retries=1,
326+
)
327+
328+
# Verify requests.get was called with verify=False
329+
mock_get.assert_called_once()
330+
assert mock_get.call_args[1]["verify"] is False
331+
332+
def test_download_custom_timeout(self, temp_dir, mock_response):
333+
"""Test that custom timeout is used.
334+
335+
Parameters
336+
----------
337+
temp_dir : str
338+
Temporary directory path.
339+
mock_response : MagicMock
340+
Mock response object.
341+
"""
342+
mock_response.iter_content.return_value = [b"test content"]
343+
344+
with patch("requests.get", return_value=mock_response) as mock_get:
345+
custom_timeout = 120 # 2 minutes per chunk
346+
download_file_from_link(
347+
file_link="https://github.com/aidos-lab/mantra/releases/download/{version}/2_manifolds.json.gz",
348+
path_to_save=temp_dir,
349+
dataset_name="test_dataset",
350+
file_format="tar.gz",
351+
timeout=custom_timeout,
352+
retries=1,
353+
)
354+
355+
# Verify requests.get was called with correct timeout
356+
mock_get.assert_called_once()
357+
assert mock_get.call_args[1]["timeout"] == (30, custom_timeout)

0 commit comments

Comments
 (0)