fix: replace requests with httpx across all HTTP call sites

bencap · bencap · commit 4bbbb8a15a48 · 2026-02-18T18:18:43.000-08:00
urllib3 2.6.3 seems to have a regression causing requests-based calls to
rest.ensembl.org to time out at 30-70s while curl and httpx complete
in ~0.8s. The root cause is in urllib3's connection handling layer;
switching to httpx bypasses it entirely and restores expected latency.

Rather than patching only the Ensembl calls, all HTTP call sites are
migrated to ensure consistent performance throughout the codebase
(MaveDB API, UCSC genome download, cdot transcript lookups, UniProt).

resource_utils.py already used httpx for request_with_backoff; the
remaining requests.get calls and the streaming download in http_download
are updated to match. Test infrastructure is updated from requests-mock
to respx and exception types are updated to their httpx equivalents
(HTTPStatusError, ConnectError, TimeoutException, HTTPError).
diff --git a/pyproject.toml b/pyproject.toml
@@ -33,14 +33,14 @@ requires-python = ">=3.11"
 
 dependencies = [
     "agct~=0.1.0",
-    "requests",
     "biopython",
     "tqdm",
     "cdot",
     "click",
     "cool-seq-tool==0.4.0.dev3",
     "ga4gh.vrs==2.0.0-a6",
     "gene_normalizer[etl,pg]==0.3.0-dev2",
+    "httpx~=0.28",
     "pydantic>=2",
     "python-dotenv",
     "setuptools>=68.0",  # tmp -- ensure 3.12 compatibility
@@ -61,7 +61,7 @@ tests = [
     "pytest-mock",
     "pytest-cov",
     "pytest-asyncio",
-    "requests-mock"
+    "respx"
 ]
 dev = [
     "ruff==0.2.0",
diff --git a/src/api/routers/map.py b/src/api/routers/map.py
@@ -5,7 +5,7 @@
 from cool_seq_tool.schemas import AnnotationLayer
 from fastapi import APIRouter, HTTPException
 from fastapi.responses import JSONResponse
-from requests import HTTPError
+from httpx import HTTPStatusError
 
 from dcd_mapping.align import build_alignment_result
 from dcd_mapping.annotate import (
@@ -117,7 +117,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse
     # on the target level and on the variant level for variants relative to that target
     # HTTPErrors and DataLookupErrors cause the mapping process to exit because these indicate
     # underlying issues with data providers.
-    except HTTPError as e:
+    except HTTPStatusError as e:
         msg = f"HTTP error occurred during transcript selection: {e}"
         _logger.error(msg)
         raise HTTPException(status_code=500, detail=msg) from e
diff --git a/src/dcd_mapping/align.py b/src/dcd_mapping/align.py
@@ -7,7 +7,7 @@
 from pathlib import Path
 from urllib.parse import urlparse
 
-import requests
+import httpx
 from Bio.SearchIO import HSP
 from Bio.SearchIO import parse as parse_blat
 from Bio.SearchIO._model import Hit, QueryResult
@@ -84,7 +84,7 @@ def get_ref_genome_file(
     if not genome_file.exists():
         try:
             http_download(url, genome_file, silent)
-        except requests.HTTPError as e:
+        except httpx.HTTPStatusError as e:
             msg = f"HTTPError when fetching reference genome file from {url}"
             _logger.error(msg)
             raise ResourceAcquisitionError(msg) from e
@@ -378,11 +378,11 @@ def fetch_alignment(
             alignment_results[accession_id] = None
         else:
             url = f"{CDOT_URL}/transcript/{accession_id}"
-            r = requests.get(url, timeout=30)
+            r = httpx.get(url, timeout=30)
 
             try:
                 r.raise_for_status()
-            except requests.HTTPError as e:
+            except httpx.HTTPStatusError as e:
                 msg = f"Received HTTPError from {url} for scoreset {metadata.urn}"
                 _logger.error(msg)
                 raise ResourceAcquisitionError(msg) from e
diff --git a/src/dcd_mapping/lookup.py b/src/dcd_mapping/lookup.py
@@ -14,8 +14,8 @@
 from typing import Any
 
 import hgvs
+import httpx
 import polars as pl
-import requests
 from biocommons.seqrepo import SeqRepo
 from biocommons.seqrepo.seqaliasdb.seqaliasdb import sqlite3
 from cdot.hgvs.dataproviders import ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider
@@ -682,7 +682,7 @@ def get_overlapping_features_for_region(
             url, headers={"Content-Type": "application/json"}
         )
         response.raise_for_status()
-    except requests.RequestException as e:
+    except httpx.HTTPError as e:
         _logger.error(
             "Failed to fetch overlapping features for region %s-%s on chromosome %s: %s",
             start,
@@ -715,7 +715,7 @@ def get_uniprot_sequence(uniprot_id: str) -> str | None:
     :raise HTTPError: if response comes with an HTTP error code
     """
     url = f"https://www.ebi.ac.uk/proteins/api/proteins?accession={uniprot_id.split(':')[1]}&format=json"
-    response = requests.get(url, timeout=30)
+    response = httpx.get(url, timeout=30)
     response.raise_for_status()
     json = response.json()
     return json[0]["sequence"]["sequence"]
diff --git a/src/dcd_mapping/main.py b/src/dcd_mapping/main.py
@@ -6,7 +6,7 @@
 from pathlib import Path
 
 import click
-from requests import HTTPError
+from httpx import HTTPStatusError
 
 from dcd_mapping.align import build_alignment_result
 from dcd_mapping.annotate import (
@@ -205,7 +205,7 @@ async def map_scoreset(
     # on the target level and on the variant level for variants relative to that target
     # HTTPErrors and DataLookupErrors cause the mapping process to exit because these indicate
     # underlying issues with data providers.
-    except HTTPError as e:
+    except HTTPStatusError as e:
         _emit_info(
             f"HTTP error occurred during transcript selection: {e}",
             silent,
diff --git a/src/dcd_mapping/mavedb_data.py b/src/dcd_mapping/mavedb_data.py
@@ -13,7 +13,7 @@
 from pathlib import Path
 from typing import Any
 
-import requests
+import httpx
 from fastapi import HTTPException
 from pydantic import ValidationError
 
@@ -57,7 +57,7 @@ def get_scoreset_urns() -> set[str]:
 
     :return: set of URN strings
     """
-    r = requests.get(
+    r = httpx.get(
         f"{MAVEDB_BASE_URL}/api/v1/experiments/",
         timeout=30,
         headers=authentication_header(),
@@ -101,14 +101,14 @@ def get_human_urns() -> list[str]:
     scoreset_urns = get_scoreset_urns()
     human_scoresets: list[str] = []
     for urn in scoreset_urns:
-        r = requests.get(
+        r = httpx.get(
             f"{MAVEDB_BASE_URL}/api/v1/score-sets/{urn}",
             timeout=30,
             headers=authentication_header(),
         )
         try:
             r.raise_for_status()
-        except requests.exceptions.HTTPError:
+        except httpx.HTTPStatusError:
             _logger.info("Unable to retrieve scoreset data for URN %s", urn)
             continue
         data = r.json()
@@ -156,10 +156,10 @@ def get_raw_scoreset_metadata(
     metadata_file = dcd_mapping_dir / f"{scoreset_urn}_metadata.json"
     if not metadata_file.exists():
         url = f"{MAVEDB_BASE_URL}/api/v1/score-sets/{scoreset_urn}"
-        r = requests.get(url, timeout=30, headers=authentication_header())
+        r = httpx.get(url, timeout=30, headers=authentication_header())
         try:
             r.raise_for_status()
-        except requests.HTTPError as e:
+        except httpx.HTTPStatusError as e:
             msg = f"Received HTTPError from {url} for scoreset {scoreset_urn}"
             _logger.error(msg)
             raise ResourceAcquisitionError(msg) from e
@@ -318,7 +318,7 @@ def get_scoreset_records(
             url = f"{MAVEDB_BASE_URL}/api/v1/score-sets/{metadata.urn}/scores"
             try:
                 http_download(url, scores_csv, silent)
-            except requests.HTTPError as e:
+            except httpx.HTTPStatusError as e:
                 msg = f"HTTPError when fetching scores CSV from {url}"
                 _logger.error(msg)
                 raise ResourceAcquisitionError(msg) from e
diff --git a/src/dcd_mapping/resource_utils.py b/src/dcd_mapping/resource_utils.py
@@ -5,7 +5,7 @@
 from pathlib import Path
 
 import click
-import requests
+import httpx
 from tqdm import tqdm
 
 _logger = logging.getLogger(__name__)
@@ -71,13 +71,11 @@ def http_download(url: str, out_path: Path, silent: bool = True) -> Path:
     :param out_path: location to save file to
     :param silent: show TQDM progress bar if true
     :return: Path if download successful
-    :raise requests.HTTPError: if request is unsuccessful
+    :raise httpx.HTTPStatusError: if request is unsuccessful
     """
     if not silent:
         click.echo(f"Downloading {out_path.name} to {out_path.parents[0].absolute()}")
-    with requests.get(
-        url, stream=True, timeout=60, headers=authentication_header()
-    ) as r:
+    with httpx.stream("GET", url, timeout=60, headers=authentication_header()) as r:
         r.raise_for_status()
         total_size = int(r.headers.get("content-length", 0))
         with out_path.open("wb") as h:
@@ -89,20 +87,20 @@ def http_download(url: str, out_path: Path, silent: bool = True) -> Path:
                     desc=out_path.name,
                     ncols=80,
                 ) as progress_bar:
-                    for chunk in r.iter_content(chunk_size=8192):
+                    for chunk in r.iter_bytes(chunk_size=8192):
                         if chunk:
                             h.write(chunk)
                             progress_bar.update(len(chunk))
             else:
-                for chunk in r.iter_content(chunk_size=8192):
+                for chunk in r.iter_bytes(chunk_size=8192):
                     if chunk:
                         h.write(chunk)
     return out_path
 
 
 def request_with_backoff(
     url: str, max_retries: int = 5, backoff_factor: float = 0.3, **kwargs
-) -> requests.Response:
+) -> httpx.Response:
     """HTTP GET with exponential backoff only for retryable errors.
 
     Retries on:
@@ -115,9 +113,9 @@ def request_with_backoff(
     attempt = 0
     while attempt < max_retries:
         try:
-            kwargs.setdefault("timeout", 60)  # Default timeout of 10 seconds
-            response = requests.get(url, **kwargs)  # noqa: S113
-        except (requests.Timeout, requests.ConnectionError):
+            kwargs.setdefault("timeout", 60)
+            response = httpx.get(url, **kwargs)
+        except (httpx.TimeoutException, httpx.ConnectError):
             # Retry on transient network failures
             if attempt == max_retries - 1:
                 raise
diff --git a/tests/test_lookup.py b/tests/test_lookup.py
@@ -2,7 +2,7 @@
 
 from unittest.mock import patch
 
-import requests
+import httpx
 
 from dcd_mapping.lookup import get_overlapping_features_for_region
 
@@ -95,7 +95,7 @@ def __init__(self):
 
         def raise_for_status(self):
             msg = f"HTTP {self.status_code} Error"
-            raise requests.RequestException(msg)
+            raise httpx.HTTPError(msg)
 
     with (
         patch("dcd_mapping.lookup.request_with_backoff", return_value=ErrorResponse()),
diff --git a/tests/test_mavedb_data.py b/tests/test_mavedb_data.py
@@ -3,8 +3,9 @@
 import shutil
 from pathlib import Path
 
+import httpx
 import pytest
-import requests_mock
+import respx
 
 from dcd_mapping.mavedb_data import get_scoreset_metadata, get_scoreset_records
 
@@ -32,10 +33,9 @@ def test_get_scoreset_metadata(
     resources_data_dir: Path, scoreset_metadata_response: dict
 ):
     urn = "urn:mavedb:00000093-a-1"
-    with requests_mock.Mocker() as m:
-        m.get(
-            f"https://api.mavedb.org/api/v1/score-sets/{urn}",
-            json=scoreset_metadata_response[urn],
+    with respx.mock:
+        respx.get(f"https://api.mavedb.org/api/v1/score-sets/{urn}").mock(
+            return_value=httpx.Response(200, json=scoreset_metadata_response[urn])
         )
         scoreset_metadata = get_scoreset_metadata(
             urn, dcd_mapping_dir=resources_data_dir
@@ -62,17 +62,15 @@ def test_get_scoreset_records(
     urn = "urn:mavedb:00000093-a-1"
     with (fixture_data_dir / f"{urn}_scores.csv").open() as f:
         scores_csv_text = f.read()
-    with requests_mock.Mocker() as m:
-        m.get(
-            f"https://api.mavedb.org/api/v1/score-sets/{urn}",
-            json=scoreset_metadata_response[urn],
+    with respx.mock:
+        respx.get(f"https://api.mavedb.org/api/v1/score-sets/{urn}").mock(
+            return_value=httpx.Response(200, json=scoreset_metadata_response[urn])
         )
         scoreset_metadata = get_scoreset_metadata(
             urn, dcd_mapping_dir=resources_data_dir
         )
-        m.get(
-            f"https://api.mavedb.org/api/v1/score-sets/{urn}/scores",
-            text=scores_csv_text,
+        respx.get(f"https://api.mavedb.org/api/v1/score-sets/{urn}/scores").mock(
+            return_value=httpx.Response(200, text=scores_csv_text)
         )
         scoreset_records = get_scoreset_records(
             scoreset_metadata, dcd_mapping_dir=resources_data_dir
diff --git a/tests/test_resource_utils.py b/tests/test_resource_utils.py