11"""Tests for the io_utils module."""
22
3+ import os
4+ import tempfile
5+ from pathlib import Path
6+ from unittest .mock import MagicMock , patch
37import pytest
48
59from topobench .data .utils .io_utils import *
@@ -20,3 +24,334 @@ def test_get_file_id_from_url():
2024
2125 with pytest .raises (ValueError ):
2226 get_file_id_from_url (url_wrong )
27+
28+
29+ class TestDownloadFileFromLink :
30+ """Test suite for download_file_from_link function."""
31+
32+ @pytest .fixture
33+ def temp_dir (self ):
34+ """Create temporary directory for test outputs.
35+
36+ Returns
37+ -------
38+ str
39+ Path to temporary directory.
40+ """
41+ with tempfile .TemporaryDirectory () as tmpdir :
42+ yield tmpdir
43+
44+ @pytest .fixture
45+ def mock_response (self ):
46+ """Create mock response object.
47+
48+ Returns
49+ -------
50+ MagicMock
51+ Mock response object with status code and headers.
52+ """
53+ response = MagicMock ()
54+ response .status_code = 200
55+ response .headers = {"content-length" : "5242880" } # 5 MB
56+ response .elapsed .total_seconds .return_value = 1.0
57+ return response
58+
59+ def test_download_success_with_progress (self , temp_dir , mock_response ):
60+ """Test successful download with progress reporting.
61+
62+ Parameters
63+ ----------
64+ temp_dir : str
65+ Temporary directory path.
66+ mock_response : MagicMock
67+ Mock response object.
68+ """
69+ # Setup mock chunks (5MB total in 1MB chunks)
70+ chunk_data = [b"x" * (1024 * 1024 ) for _ in range (5 )]
71+ mock_response .iter_content .return_value = chunk_data
72+
73+ with patch ("requests.get" , return_value = mock_response ):
74+ download_file_from_link (
75+ file_link = "http://example.com/dataset.tar.gz" ,
76+ path_to_save = temp_dir ,
77+ dataset_name = "test_dataset" ,
78+ file_format = "tar.gz" ,
79+ timeout = 60 ,
80+ retries = 1 ,
81+ )
82+
83+ # Verify file was created and has correct size
84+ output_file = os .path .join (temp_dir , "test_dataset.tar.gz" )
85+ assert os .path .exists (output_file )
86+ assert os .path .getsize (output_file ) == 5 * 1024 * 1024
87+
88+ def test_download_creates_directory_if_not_exists (self , temp_dir ):
89+ """Test that download creates directory structure.
90+
91+ Parameters
92+ ----------
93+ temp_dir : str
94+ Temporary directory path.
95+ """
96+ nested_dir = os .path .join (temp_dir , "nested" , "path" )
97+
98+ mock_response = MagicMock ()
99+ mock_response .status_code = 200
100+ mock_response .headers = {"content-length" : "1024" }
101+ mock_response .elapsed .total_seconds .return_value = 0.5
102+ mock_response .iter_content .return_value = [b"x" * 1024 ]
103+
104+ with patch ("requests.get" , return_value = mock_response ):
105+ download_file_from_link (
106+ file_link = "http://example.com/dataset.tar.gz" ,
107+ path_to_save = nested_dir ,
108+ dataset_name = "test_dataset" ,
109+ file_format = "tar.gz" ,
110+ timeout = 60 ,
111+ retries = 1 ,
112+ )
113+
114+ output_file = os .path .join (nested_dir , "test_dataset.tar.gz" )
115+ assert os .path .exists (output_file )
116+ assert os .path .isdir (nested_dir )
117+
118+ def test_download_http_error (self , temp_dir ):
119+ """Test handling of HTTP error responses.
120+
121+ Parameters
122+ ----------
123+ temp_dir : str
124+ Temporary directory path.
125+ """
126+ mock_response = MagicMock ()
127+ mock_response .status_code = 404
128+
129+ with patch ("requests.get" , return_value = mock_response ):
130+ download_file_from_link (
131+ file_link = "http://example.com/nonexistent.tar.gz" ,
132+ path_to_save = temp_dir ,
133+ dataset_name = "test_dataset" ,
134+ file_format = "tar.gz" ,
135+ timeout = 60 ,
136+ retries = 1 ,
137+ )
138+
139+ # File should not be created on HTTP error
140+ output_file = os .path .join (temp_dir , "test_dataset.tar.gz" )
141+ assert not os .path .exists (output_file )
142+
143+ def test_download_timeout_retry (self , temp_dir ):
144+ """Test retry logic on timeout.
145+
146+ Parameters
147+ ----------
148+ temp_dir : str
149+ Temporary directory path.
150+ """
151+ import requests
152+
153+ with patch ("requests.get" ) as mock_get :
154+ # First call times out, second succeeds
155+ mock_response_success = MagicMock ()
156+ mock_response_success .status_code = 200
157+ mock_response_success .headers = {"content-length" : "1024" }
158+ mock_response_success .elapsed .total_seconds .return_value = 0.5
159+ mock_response_success .iter_content .return_value = [b"x" * 1024 ]
160+
161+ mock_get .side_effect = [
162+ requests .exceptions .Timeout ("Connection timed out" ),
163+ mock_response_success ,
164+ ]
165+
166+ with patch ("time.sleep" ): # Mock sleep to speed up test
167+ download_file_from_link (
168+ file_link = "http://example.com/dataset.tar.gz" ,
169+ path_to_save = temp_dir ,
170+ dataset_name = "test_dataset" ,
171+ file_format = "tar.gz" ,
172+ timeout = 60 ,
173+ retries = 3 ,
174+ )
175+
176+ # File should be created on successful retry
177+ output_file = os .path .join (temp_dir , "test_dataset.tar.gz" )
178+ assert os .path .exists (output_file )
179+ assert mock_get .call_count == 2
180+
181+ def test_download_exhausts_retries (self , temp_dir ):
182+ """Test that exception is raised after all retries exhausted.
183+
184+ Parameters
185+ ----------
186+ temp_dir : str
187+ Temporary directory path.
188+ """
189+ import requests
190+
191+ with patch ("requests.get" ) as mock_get :
192+ mock_get .side_effect = requests .exceptions .Timeout (
193+ "Connection timed out"
194+ )
195+
196+ with patch ("time.sleep" ):
197+ with pytest .raises (requests .exceptions .Timeout ):
198+ download_file_from_link (
199+ file_link = "http://example.com/dataset.tar.gz" ,
200+ path_to_save = temp_dir ,
201+ dataset_name = "test_dataset" ,
202+ file_format = "tar.gz" ,
203+ timeout = 60 ,
204+ retries = 2 ,
205+ )
206+
207+ # Verify retries were attempted
208+ assert mock_get .call_count == 2
209+
210+ def test_download_with_different_formats (self , temp_dir , mock_response ):
211+ """Test download with different file formats.
212+
213+ Parameters
214+ ----------
215+ temp_dir : str
216+ Temporary directory path.
217+ mock_response : MagicMock
218+ Mock response object.
219+ """
220+ mock_response .iter_content .return_value = [b"test content" ]
221+
222+ formats = ["zip" , "tar" , "tar.gz" ]
223+
224+ with patch ("requests.get" , return_value = mock_response ):
225+ for fmt in formats :
226+ download_file_from_link (
227+ file_link = "http://example.com/dataset" ,
228+ path_to_save = temp_dir ,
229+ dataset_name = f"test_dataset_{ fmt .replace ('.' , '_' )} " ,
230+ file_format = fmt ,
231+ timeout = 60 ,
232+ retries = 1 ,
233+ )
234+
235+ # Verify all files were created with correct extensions
236+ for fmt in formats :
237+ output_file = os .path .join (
238+ temp_dir , f"test_dataset_{ fmt .replace ('.' , '_' )} .{ fmt } "
239+ )
240+ assert os .path .exists (output_file )
241+
242+ def test_download_empty_chunks (self , temp_dir ):
243+ """Test handling of empty chunks in response.
244+
245+ Parameters
246+ ----------
247+ temp_dir : str
248+ Temporary directory path.
249+ """
250+ mock_response = MagicMock ()
251+ mock_response .status_code = 200
252+ mock_response .headers = {"content-length" : "1024" }
253+ mock_response .elapsed .total_seconds .return_value = 1.0
254+ # Include empty chunks (should be skipped)
255+ mock_response .iter_content .return_value = [
256+ b"x" * 512 ,
257+ b"" , # Empty chunk
258+ b"y" * 512 ,
259+ b"" , # Another empty chunk
260+ ]
261+
262+ with patch ("requests.get" , return_value = mock_response ):
263+ download_file_from_link (
264+ file_link = "http://example.com/dataset.tar.gz" ,
265+ path_to_save = temp_dir ,
266+ dataset_name = "test_dataset" ,
267+ file_format = "tar.gz" ,
268+ timeout = 60 ,
269+ retries = 1 ,
270+ )
271+
272+ # File should contain only non-empty chunks
273+ output_file = os .path .join (temp_dir , "test_dataset.tar.gz" )
274+ assert os .path .exists (output_file )
275+ assert os .path .getsize (output_file ) == 1024
276+
277+ def test_download_unknown_size (self , temp_dir ):
278+ """Test download when content-length header is missing.
279+
280+ Parameters
281+ ----------
282+ temp_dir : str
283+ Temporary directory path.
284+ """
285+ mock_response = MagicMock ()
286+ mock_response .status_code = 200
287+ mock_response .headers = {} # No content-length header
288+ mock_response .elapsed .total_seconds .return_value = 0.5
289+ mock_response .iter_content .return_value = [b"x" * 1024 ]
290+
291+ with patch ("requests.get" , return_value = mock_response ):
292+ download_file_from_link (
293+ file_link = "http://example.com/dataset.tar.gz" ,
294+ path_to_save = temp_dir ,
295+ dataset_name = "test_dataset" ,
296+ file_format = "tar.gz" ,
297+ timeout = 60 ,
298+ retries = 1 ,
299+ )
300+
301+ output_file = os .path .join (temp_dir , "test_dataset.tar.gz" )
302+ assert os .path .exists (output_file )
303+ assert os .path .getsize (output_file ) == 1024
304+
305+ def test_download_ssl_verification_disabled (self , temp_dir , mock_response ):
306+ """Test that SSL verification can be disabled.
307+
308+ Parameters
309+ ----------
310+ temp_dir : str
311+ Temporary directory path.
312+ mock_response : MagicMock
313+ Mock response object.
314+ """
315+ mock_response .iter_content .return_value = [b"test content" ]
316+
317+ with patch ("requests.get" , return_value = mock_response ) as mock_get :
318+ download_file_from_link (
319+ file_link = "https://example.com/dataset.tar.gz" ,
320+ path_to_save = temp_dir ,
321+ dataset_name = "test_dataset" ,
322+ file_format = "tar.gz" ,
323+ verify = False ,
324+ timeout = 60 ,
325+ retries = 1 ,
326+ )
327+
328+ # Verify requests.get was called with verify=False
329+ mock_get .assert_called_once ()
330+ assert mock_get .call_args [1 ]["verify" ] is False
331+
332+ def test_download_custom_timeout (self , temp_dir , mock_response ):
333+ """Test that custom timeout is used.
334+
335+ Parameters
336+ ----------
337+ temp_dir : str
338+ Temporary directory path.
339+ mock_response : MagicMock
340+ Mock response object.
341+ """
342+ mock_response .iter_content .return_value = [b"test content" ]
343+
344+ with patch ("requests.get" , return_value = mock_response ) as mock_get :
345+ custom_timeout = 120 # 2 minutes per chunk
346+ download_file_from_link (
347+ file_link = "https://github.com/aidos-lab/mantra/releases/download/{version}/2_manifolds.json.gz" ,
348+ path_to_save = temp_dir ,
349+ dataset_name = "test_dataset" ,
350+ file_format = "tar.gz" ,
351+ timeout = custom_timeout ,
352+ retries = 1 ,
353+ )
354+
355+ # Verify requests.get was called with correct timeout
356+ mock_get .assert_called_once ()
357+ assert mock_get .call_args [1 ]["timeout" ] == (30 , custom_timeout )
0 commit comments