diff --git a/airbyte_cdk/sources/streams/http/http_client.py b/airbyte_cdk/sources/streams/http/http_client.py index e9fc5add2..b4aae01f5 100644 --- a/airbyte_cdk/sources/streams/http/http_client.py +++ b/airbyte_cdk/sources/streams/http/http_client.py @@ -102,6 +102,8 @@ def __str__(self) -> str: class HttpClient: _DEFAULT_MAX_RETRY: int = 5 _DEFAULT_MAX_TIME: int = 60 * 10 + _DEFAULT_CONNECT_TIMEOUT: int = 30 + _DEFAULT_READ_TIMEOUT: int = 300 _ACTIONS_TO_RETRY_ON = {ResponseAction.RETRY, ResponseAction.RATE_LIMITED} def __init__( @@ -573,11 +575,17 @@ def send_request( verify=request_kwargs.get("verify"), cert=request_kwargs.get("cert"), ) - request_kwargs = {**request_kwargs, **env_settings} + mutable_request_kwargs: Dict[str, Any] = {**request_kwargs, **env_settings} + + if "timeout" not in mutable_request_kwargs: + mutable_request_kwargs["timeout"] = ( + self._DEFAULT_CONNECT_TIMEOUT, + self._DEFAULT_READ_TIMEOUT, + ) response: requests.Response = self._send_with_retry( request=request, - request_kwargs=request_kwargs, + request_kwargs=mutable_request_kwargs, log_formatter=log_formatter, exit_on_rate_limit=exit_on_rate_limit, ) diff --git a/unit_tests/sources/streams/http/test_http_client.py b/unit_tests/sources/streams/http/test_http_client.py index 3840c70e3..92080d3ff 100644 --- a/unit_tests/sources/streams/http/test_http_client.py +++ b/unit_tests/sources/streams/http/test_http_client.py @@ -837,3 +837,52 @@ def backoff_time(self, response_or_exception, attempt_count): with pytest.raises(AirbyteTracedException) as e: http_client.send_request(http_method="get", url="https://airbyte.io/", request_kwargs={}) assert e.value.failure_type == expected_failure_type + + +def test_send_request_applies_default_timeout_when_not_provided(mocker): + http_client = test_http_client() + mocked_response = MagicMock(spec=requests.Response) + mocked_response.status_code = 200 + mocked_response.headers = {} + mock_send = mocker.patch.object(requests.Session, "send", return_value=mocked_response) + + http_client.send_request( + http_method="get", + url="https://test_base_url.com/v1/endpoint", + request_kwargs={}, + ) + + assert mock_send.call_count == 1 + call_kwargs = mock_send.call_args + # The timeout should be passed as part of the keyword arguments to session.send() + # session.send(request, **request_kwargs) unpacks request_kwargs, so timeout appears as a kwarg + assert call_kwargs.kwargs.get("timeout") == ( + HttpClient._DEFAULT_CONNECT_TIMEOUT, + HttpClient._DEFAULT_READ_TIMEOUT, + ) or call_kwargs[1].get("timeout") == ( + HttpClient._DEFAULT_CONNECT_TIMEOUT, + HttpClient._DEFAULT_READ_TIMEOUT, + ) + + +def test_send_request_respects_explicit_timeout(mocker): + http_client = test_http_client() + mocked_response = MagicMock(spec=requests.Response) + mocked_response.status_code = 200 + mocked_response.headers = {} + mock_send = mocker.patch.object(requests.Session, "send", return_value=mocked_response) + + custom_timeout = (10, 60) + http_client.send_request( + http_method="get", + url="https://test_base_url.com/v1/endpoint", + request_kwargs={"timeout": custom_timeout}, + ) + + assert mock_send.call_count == 1 + call_kwargs = mock_send.call_args + # The explicit timeout should be preserved, not overridden by the default + assert ( + call_kwargs.kwargs.get("timeout") == custom_timeout + or call_kwargs[1].get("timeout") == custom_timeout + )