From ab846a7cf9efccde291d2fb7acc468cb72e76217 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 12 Mar 2026 21:33:40 +0000 Subject: [PATCH 1/3] fix: disable HTTP response caching to prevent unbounded memory growth Hardcode _use_cache = False in HttpClient to prevent requests_cache SQLite backend from accumulating cached HTTP responses in memory, which causes container memory to grow unboundedly during long syncs. Skip cache-related tests that expect caching to be active. Co-Authored-By: gl_anatolii.yatsuk --- airbyte_cdk/sources/streams/http/http_client.py | 5 ++++- unit_tests/sources/streams/http/test_http.py | 2 ++ unit_tests/sources/streams/http/test_http_client.py | 3 +++ unit_tests/sources/streams/test_call_rate.py | 1 + 4 files changed, 10 insertions(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/streams/http/http_client.py b/airbyte_cdk/sources/streams/http/http_client.py index 3a0a62739..1209991b3 100644 --- a/airbyte_cdk/sources/streams/http/http_client.py +++ b/airbyte_cdk/sources/streams/http/http_client.py @@ -110,7 +110,10 @@ def __init__( if session: self._session = session else: - self._use_cache = use_cache + # TEMPORARY: Force disable cache to prevent unbounded memory growth from + # requests_cache SQLite backend accumulating cached HTTP responses in memory. + # See: https://github.com/airbytehq/airbyte-python-cdk/pull/932 + self._use_cache = False self._session = self._request_session() self._session.mount( "https://", diff --git a/unit_tests/sources/streams/http/test_http.py b/unit_tests/sources/streams/http/test_http.py index 7512c3722..06cd429f6 100644 --- a/unit_tests/sources/streams/http/test_http.py +++ b/unit_tests/sources/streams/http/test_http.py @@ -517,6 +517,7 @@ def test_parent_attribute_exist(): assert child_stream.parent == parent_stream +@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") def test_that_response_was_cached(mocker, requests_mock): requests_mock.register_uri("GET", "https://google.com/", text="text") stream = CacheHttpStream() @@ -547,6 +548,7 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp yield {"value": len(response.text)} +@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") @patch("airbyte_cdk.sources.streams.core.logging", MagicMock()) def test_using_cache(mocker, requests_mock): requests_mock.register_uri("GET", "https://google.com/", text="text") diff --git a/unit_tests/sources/streams/http/test_http_client.py b/unit_tests/sources/streams/http/test_http_client.py index ea245c2fb..30839071e 100644 --- a/unit_tests/sources/streams/http/test_http_client.py +++ b/unit_tests/sources/streams/http/test_http_client.py @@ -42,6 +42,7 @@ def test_cache_filename(): http_client.cache_filename == f"{http_client._name}.sqlite" +@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") @pytest.mark.parametrize( "use_cache, expected_session", [ @@ -447,6 +448,7 @@ def test_session_request_exception_raises_backoff_exception(): http_client._send(prepared_request, {}) +@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") def test_that_response_was_cached(requests_mock): cached_http_client = test_cache_http_client() @@ -720,6 +722,7 @@ def test_backoff_strategy_endless( assert mocked_send.call_count == expected_call_count +@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") def test_given_different_headers_then_response_is_not_cached(requests_mock): http_client = HttpClient(name="test", logger=MagicMock(), use_cache=True) first_request_headers = {"header_key": "first"} diff --git a/unit_tests/sources/streams/test_call_rate.py b/unit_tests/sources/streams/test_call_rate.py index b99905870..6f6afd9fe 100644 --- a/unit_tests/sources/streams/test_call_rate.py +++ b/unit_tests/sources/streams/test_call_rate.py @@ -332,6 +332,7 @@ def test_without_cache(self, mocker, requests_mock): assert MovingWindowCallRatePolicy.try_acquire.call_count == 10 + @pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") @pytest.mark.usefixtures("enable_cache") def test_with_cache(self, mocker, requests_mock): """Test that HttpStream will use call budget when provided and not cached""" From 6f3e97f74f26da99ef2629db1ede3c25090020c8 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 12 Mar 2026 21:34:41 +0000 Subject: [PATCH 2/3] fix: update PR reference in comment to correct PR number Co-Authored-By: gl_anatolii.yatsuk --- airbyte_cdk/sources/streams/http/http_client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/streams/http/http_client.py b/airbyte_cdk/sources/streams/http/http_client.py index 1209991b3..ee770c4ac 100644 --- a/airbyte_cdk/sources/streams/http/http_client.py +++ b/airbyte_cdk/sources/streams/http/http_client.py @@ -112,7 +112,7 @@ def __init__( else: # TEMPORARY: Force disable cache to prevent unbounded memory growth from # requests_cache SQLite backend accumulating cached HTTP responses in memory. - # See: https://github.com/airbytehq/airbyte-python-cdk/pull/932 + # See: https://github.com/airbytehq/airbyte-python-cdk/pull/952 self._use_cache = False self._session = self._request_session() self._session.mount( From fd49a1592c2f74f7e3800e70653edd04df3d9365 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Thu, 12 Mar 2026 21:35:45 +0000 Subject: [PATCH 3/3] style: fix ruff format for skip decorators Co-Authored-By: gl_anatolii.yatsuk --- unit_tests/sources/streams/http/test_http.py | 8 ++++++-- unit_tests/sources/streams/http/test_http_client.py | 12 +++++++++--- unit_tests/sources/streams/test_call_rate.py | 4 +++- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/unit_tests/sources/streams/http/test_http.py b/unit_tests/sources/streams/http/test_http.py index 06cd429f6..148d55e5a 100644 --- a/unit_tests/sources/streams/http/test_http.py +++ b/unit_tests/sources/streams/http/test_http.py @@ -517,7 +517,9 @@ def test_parent_attribute_exist(): assert child_stream.parent == parent_stream -@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") +@pytest.mark.skip( + reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth" +) def test_that_response_was_cached(mocker, requests_mock): requests_mock.register_uri("GET", "https://google.com/", text="text") stream = CacheHttpStream() @@ -548,7 +550,9 @@ def parse_response(self, response: requests.Response, **kwargs) -> Iterable[Mapp yield {"value": len(response.text)} -@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") +@pytest.mark.skip( + reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth" +) @patch("airbyte_cdk.sources.streams.core.logging", MagicMock()) def test_using_cache(mocker, requests_mock): requests_mock.register_uri("GET", "https://google.com/", text="text") diff --git a/unit_tests/sources/streams/http/test_http_client.py b/unit_tests/sources/streams/http/test_http_client.py index 30839071e..d24943763 100644 --- a/unit_tests/sources/streams/http/test_http_client.py +++ b/unit_tests/sources/streams/http/test_http_client.py @@ -42,7 +42,9 @@ def test_cache_filename(): http_client.cache_filename == f"{http_client._name}.sqlite" -@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") +@pytest.mark.skip( + reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth" +) @pytest.mark.parametrize( "use_cache, expected_session", [ @@ -448,7 +450,9 @@ def test_session_request_exception_raises_backoff_exception(): http_client._send(prepared_request, {}) -@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") +@pytest.mark.skip( + reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth" +) def test_that_response_was_cached(requests_mock): cached_http_client = test_cache_http_client() @@ -722,7 +726,9 @@ def test_backoff_strategy_endless( assert mocked_send.call_count == expected_call_count -@pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") +@pytest.mark.skip( + reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth" +) def test_given_different_headers_then_response_is_not_cached(requests_mock): http_client = HttpClient(name="test", logger=MagicMock(), use_cache=True) first_request_headers = {"header_key": "first"} diff --git a/unit_tests/sources/streams/test_call_rate.py b/unit_tests/sources/streams/test_call_rate.py index 6f6afd9fe..7500982cc 100644 --- a/unit_tests/sources/streams/test_call_rate.py +++ b/unit_tests/sources/streams/test_call_rate.py @@ -332,7 +332,9 @@ def test_without_cache(self, mocker, requests_mock): assert MovingWindowCallRatePolicy.try_acquire.call_count == 10 - @pytest.mark.skip(reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth") + @pytest.mark.skip( + reason="TEMPORARY: cache is hardcoded off in HttpClient to prevent unbounded memory growth" + ) @pytest.mark.usefixtures("enable_cache") def test_with_cache(self, mocker, requests_mock): """Test that HttpStream will use call budget when provided and not cached"""