44import tempfile
55from io import BytesIO
66from pathlib import Path
7- from typing import Dict , List , Optional , Tuple
7+ from typing import IO , Dict , List , Optional , Tuple
88
99import httpx
1010import rich
6565ZIP_LIMIT_MESSAGE = "The number of files in the zip archive is over the limit"
6666
6767
68+ class _ProgressFileWrapper :
69+ """
70+ Wrap a binary file-like object and update a rich progress bar on reads.
71+ httpx's multipart expects a synchronous file-like object exposing .read().
72+ """
73+
74+ def __init__ (
75+ self ,
76+ file : IO [bytes ],
77+ progress : Progress ,
78+ pbar : TaskID ,
79+ chunk_size : int = 1024 * 1024 ,
80+ ):
81+ self ._file = file
82+ self ._progress = progress
83+ self ._pbar = pbar
84+ self ._chunk_size = chunk_size
85+
86+ def read (self , size : int = - 1 ) -> bytes :
87+ if size is None or size < 0 :
88+ size = self ._chunk_size
89+ data = self ._file .read (size )
90+ if data :
91+ self ._progress .update (self ._pbar , advance = len (data ))
92+ return data
93+
94+ def __getattr__ (self , name ):
95+ return getattr (self ._file , name )
96+
97+
6898init_logging ()
6999
70100
@@ -161,6 +191,7 @@ async def native_upload(
161191 persistent_id = persistent_id ,
162192 dataverse_url = dataverse_url ,
163193 api_token = api_token ,
194+ proxy = proxy ,
164195 )
165196
166197
@@ -255,7 +286,9 @@ def _reset_progress(
255286@tenacity .retry (
256287 wait = RETRY_STRAT ,
257288 stop = tenacity .stop_after_attempt (MAX_RETRIES ),
258- retry = tenacity .retry_if_exception_type ((httpx .HTTPStatusError ,)),
289+ retry = tenacity .retry_if_exception_type (
290+ (httpx .HTTPStatusError , httpx .ReadError , httpx .RequestError )
291+ ),
259292)
260293async def _single_native_upload (
261294 session : httpx .AsyncClient ,
@@ -301,10 +334,12 @@ async def _single_native_upload(
301334 json_data = _get_json_data (file )
302335 handler = file .get_handler ()
303336
337+ assert handler is not None , "File handler is required for native upload"
338+
304339 files = {
305340 "file" : (
306341 file .file_name ,
307- handler ,
342+ _ProgressFileWrapper ( handler , progress , pbar ), # type: ignore[arg-type]
308343 file .mimeType ,
309344 ),
310345 "jsonData" : (
@@ -316,7 +351,7 @@ async def _single_native_upload(
316351
317352 response = await session .post (
318353 endpoint ,
319- files = files , # type: ignore
354+ files = files ,
320355 )
321356
322357 if response .status_code == 400 and response .json ()["message" ].startswith (
@@ -371,6 +406,7 @@ async def _update_metadata(
371406 dataverse_url : str ,
372407 api_token : str ,
373408 persistent_id : str ,
409+ proxy : Optional [str ],
374410):
375411 """
376412 Updates the metadata of the given files in a Dataverse repository.
@@ -390,6 +426,7 @@ async def _update_metadata(
390426 persistent_id = persistent_id ,
391427 dataverse_url = dataverse_url ,
392428 api_token = api_token ,
429+ proxy = proxy ,
393430 )
394431
395432 tasks = []
@@ -505,6 +542,7 @@ def _retrieve_file_ids(
505542 persistent_id : str ,
506543 dataverse_url : str ,
507544 api_token : str ,
545+ proxy : Optional [str ] = None ,
508546) -> Dict [str , str ]:
509547 """
510548 Retrieves the file IDs of files in a dataset.
@@ -513,7 +551,7 @@ def _retrieve_file_ids(
513551 persistent_id (str): The persistent identifier of the dataset.
514552 dataverse_url (str): The URL of the Dataverse repository.
515553 api_token (str): The API token of the Dataverse repository.
516-
554+ proxy (str): The proxy to use for the request.
517555 Returns:
518556 Dict[str, str]: Dictionary mapping file paths to their IDs.
519557 """
@@ -523,6 +561,7 @@ def _retrieve_file_ids(
523561 persistent_id = persistent_id ,
524562 dataverse_url = dataverse_url ,
525563 api_token = api_token ,
564+ proxy = proxy ,
526565 )
527566
528567 return _create_file_id_path_mapping (ds_files )
0 commit comments