Skip to content

Commit 4bbbb8a

Browse files
committed
fix: replace requests with httpx across all HTTP call sites
urllib3 2.6.3 seems to have a regression causing requests-based calls to rest.ensembl.org to time out at 30-70s while curl and httpx complete in ~0.8s. The root cause is in urllib3's connection handling layer; switching to httpx bypasses it entirely and restores expected latency. Rather than patching only the Ensembl calls, all HTTP call sites are migrated to ensure consistent performance throughout the codebase (MaveDB API, UCSC genome download, cdot transcript lookups, UniProt). resource_utils.py already used httpx for request_with_backoff; the remaining requests.get calls and the streaming download in http_download are updated to match. Test infrastructure is updated from requests-mock to respx and exception types are updated to their httpx equivalents (HTTPStatusError, ConnectError, TimeoutException, HTTPError).
1 parent ecdc1cf commit 4bbbb8a

10 files changed

Lines changed: 61 additions & 63 deletions

File tree

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,14 @@ requires-python = ">=3.11"
3333

3434
dependencies = [
3535
"agct~=0.1.0",
36-
"requests",
3736
"biopython",
3837
"tqdm",
3938
"cdot",
4039
"click",
4140
"cool-seq-tool==0.4.0.dev3",
4241
"ga4gh.vrs==2.0.0-a6",
4342
"gene_normalizer[etl,pg]==0.3.0-dev2",
43+
"httpx~=0.28",
4444
"pydantic>=2",
4545
"python-dotenv",
4646
"setuptools>=68.0", # tmp -- ensure 3.12 compatibility
@@ -61,7 +61,7 @@ tests = [
6161
"pytest-mock",
6262
"pytest-cov",
6363
"pytest-asyncio",
64-
"requests-mock"
64+
"respx"
6565
]
6666
dev = [
6767
"ruff==0.2.0",

src/api/routers/map.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from cool_seq_tool.schemas import AnnotationLayer
66
from fastapi import APIRouter, HTTPException
77
from fastapi.responses import JSONResponse
8-
from requests import HTTPError
8+
from httpx import HTTPStatusError
99

1010
from dcd_mapping.align import build_alignment_result
1111
from dcd_mapping.annotate import (
@@ -117,7 +117,7 @@ async def map_scoreset(urn: str, store_path: Path | None = None) -> JSONResponse
117117
# on the target level and on the variant level for variants relative to that target
118118
# HTTPErrors and DataLookupErrors cause the mapping process to exit because these indicate
119119
# underlying issues with data providers.
120-
except HTTPError as e:
120+
except HTTPStatusError as e:
121121
msg = f"HTTP error occurred during transcript selection: {e}"
122122
_logger.error(msg)
123123
raise HTTPException(status_code=500, detail=msg) from e

src/dcd_mapping/align.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from pathlib import Path
88
from urllib.parse import urlparse
99

10-
import requests
10+
import httpx
1111
from Bio.SearchIO import HSP
1212
from Bio.SearchIO import parse as parse_blat
1313
from Bio.SearchIO._model import Hit, QueryResult
@@ -84,7 +84,7 @@ def get_ref_genome_file(
8484
if not genome_file.exists():
8585
try:
8686
http_download(url, genome_file, silent)
87-
except requests.HTTPError as e:
87+
except httpx.HTTPStatusError as e:
8888
msg = f"HTTPError when fetching reference genome file from {url}"
8989
_logger.error(msg)
9090
raise ResourceAcquisitionError(msg) from e
@@ -378,11 +378,11 @@ def fetch_alignment(
378378
alignment_results[accession_id] = None
379379
else:
380380
url = f"{CDOT_URL}/transcript/{accession_id}"
381-
r = requests.get(url, timeout=30)
381+
r = httpx.get(url, timeout=30)
382382

383383
try:
384384
r.raise_for_status()
385-
except requests.HTTPError as e:
385+
except httpx.HTTPStatusError as e:
386386
msg = f"Received HTTPError from {url} for scoreset {metadata.urn}"
387387
_logger.error(msg)
388388
raise ResourceAcquisitionError(msg) from e

src/dcd_mapping/lookup.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@
1414
from typing import Any
1515

1616
import hgvs
17+
import httpx
1718
import polars as pl
18-
import requests
1919
from biocommons.seqrepo import SeqRepo
2020
from biocommons.seqrepo.seqaliasdb.seqaliasdb import sqlite3
2121
from cdot.hgvs.dataproviders import ChainedSeqFetcher, FastaSeqFetcher, RESTDataProvider
@@ -682,7 +682,7 @@ def get_overlapping_features_for_region(
682682
url, headers={"Content-Type": "application/json"}
683683
)
684684
response.raise_for_status()
685-
except requests.RequestException as e:
685+
except httpx.HTTPError as e:
686686
_logger.error(
687687
"Failed to fetch overlapping features for region %s-%s on chromosome %s: %s",
688688
start,
@@ -715,7 +715,7 @@ def get_uniprot_sequence(uniprot_id: str) -> str | None:
715715
:raise HTTPError: if response comes with an HTTP error code
716716
"""
717717
url = f"https://www.ebi.ac.uk/proteins/api/proteins?accession={uniprot_id.split(':')[1]}&format=json"
718-
response = requests.get(url, timeout=30)
718+
response = httpx.get(url, timeout=30)
719719
response.raise_for_status()
720720
json = response.json()
721721
return json[0]["sequence"]["sequence"]

src/dcd_mapping/main.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pathlib import Path
77

88
import click
9-
from requests import HTTPError
9+
from httpx import HTTPStatusError
1010

1111
from dcd_mapping.align import build_alignment_result
1212
from dcd_mapping.annotate import (
@@ -205,7 +205,7 @@ async def map_scoreset(
205205
# on the target level and on the variant level for variants relative to that target
206206
# HTTPErrors and DataLookupErrors cause the mapping process to exit because these indicate
207207
# underlying issues with data providers.
208-
except HTTPError as e:
208+
except HTTPStatusError as e:
209209
_emit_info(
210210
f"HTTP error occurred during transcript selection: {e}",
211211
silent,

src/dcd_mapping/mavedb_data.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from pathlib import Path
1414
from typing import Any
1515

16-
import requests
16+
import httpx
1717
from fastapi import HTTPException
1818
from pydantic import ValidationError
1919

@@ -57,7 +57,7 @@ def get_scoreset_urns() -> set[str]:
5757
5858
:return: set of URN strings
5959
"""
60-
r = requests.get(
60+
r = httpx.get(
6161
f"{MAVEDB_BASE_URL}/api/v1/experiments/",
6262
timeout=30,
6363
headers=authentication_header(),
@@ -101,14 +101,14 @@ def get_human_urns() -> list[str]:
101101
scoreset_urns = get_scoreset_urns()
102102
human_scoresets: list[str] = []
103103
for urn in scoreset_urns:
104-
r = requests.get(
104+
r = httpx.get(
105105
f"{MAVEDB_BASE_URL}/api/v1/score-sets/{urn}",
106106
timeout=30,
107107
headers=authentication_header(),
108108
)
109109
try:
110110
r.raise_for_status()
111-
except requests.exceptions.HTTPError:
111+
except httpx.HTTPStatusError:
112112
_logger.info("Unable to retrieve scoreset data for URN %s", urn)
113113
continue
114114
data = r.json()
@@ -156,10 +156,10 @@ def get_raw_scoreset_metadata(
156156
metadata_file = dcd_mapping_dir / f"{scoreset_urn}_metadata.json"
157157
if not metadata_file.exists():
158158
url = f"{MAVEDB_BASE_URL}/api/v1/score-sets/{scoreset_urn}"
159-
r = requests.get(url, timeout=30, headers=authentication_header())
159+
r = httpx.get(url, timeout=30, headers=authentication_header())
160160
try:
161161
r.raise_for_status()
162-
except requests.HTTPError as e:
162+
except httpx.HTTPStatusError as e:
163163
msg = f"Received HTTPError from {url} for scoreset {scoreset_urn}"
164164
_logger.error(msg)
165165
raise ResourceAcquisitionError(msg) from e
@@ -318,7 +318,7 @@ def get_scoreset_records(
318318
url = f"{MAVEDB_BASE_URL}/api/v1/score-sets/{metadata.urn}/scores"
319319
try:
320320
http_download(url, scores_csv, silent)
321-
except requests.HTTPError as e:
321+
except httpx.HTTPStatusError as e:
322322
msg = f"HTTPError when fetching scores CSV from {url}"
323323
_logger.error(msg)
324324
raise ResourceAcquisitionError(msg) from e

src/dcd_mapping/resource_utils.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
from pathlib import Path
66

77
import click
8-
import requests
8+
import httpx
99
from tqdm import tqdm
1010

1111
_logger = logging.getLogger(__name__)
@@ -71,13 +71,11 @@ def http_download(url: str, out_path: Path, silent: bool = True) -> Path:
7171
:param out_path: location to save file to
7272
:param silent: show TQDM progress bar if true
7373
:return: Path if download successful
74-
:raise requests.HTTPError: if request is unsuccessful
74+
:raise httpx.HTTPStatusError: if request is unsuccessful
7575
"""
7676
if not silent:
7777
click.echo(f"Downloading {out_path.name} to {out_path.parents[0].absolute()}")
78-
with requests.get(
79-
url, stream=True, timeout=60, headers=authentication_header()
80-
) as r:
78+
with httpx.stream("GET", url, timeout=60, headers=authentication_header()) as r:
8179
r.raise_for_status()
8280
total_size = int(r.headers.get("content-length", 0))
8381
with out_path.open("wb") as h:
@@ -89,20 +87,20 @@ def http_download(url: str, out_path: Path, silent: bool = True) -> Path:
8987
desc=out_path.name,
9088
ncols=80,
9189
) as progress_bar:
92-
for chunk in r.iter_content(chunk_size=8192):
90+
for chunk in r.iter_bytes(chunk_size=8192):
9391
if chunk:
9492
h.write(chunk)
9593
progress_bar.update(len(chunk))
9694
else:
97-
for chunk in r.iter_content(chunk_size=8192):
95+
for chunk in r.iter_bytes(chunk_size=8192):
9896
if chunk:
9997
h.write(chunk)
10098
return out_path
10199

102100

103101
def request_with_backoff(
104102
url: str, max_retries: int = 5, backoff_factor: float = 0.3, **kwargs
105-
) -> requests.Response:
103+
) -> httpx.Response:
106104
"""HTTP GET with exponential backoff only for retryable errors.
107105
108106
Retries on:
@@ -115,9 +113,9 @@ def request_with_backoff(
115113
attempt = 0
116114
while attempt < max_retries:
117115
try:
118-
kwargs.setdefault("timeout", 60) # Default timeout of 10 seconds
119-
response = requests.get(url, **kwargs) # noqa: S113
120-
except (requests.Timeout, requests.ConnectionError):
116+
kwargs.setdefault("timeout", 60)
117+
response = httpx.get(url, **kwargs)
118+
except (httpx.TimeoutException, httpx.ConnectError):
121119
# Retry on transient network failures
122120
if attempt == max_retries - 1:
123121
raise

tests/test_lookup.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
from unittest.mock import patch
44

5-
import requests
5+
import httpx
66

77
from dcd_mapping.lookup import get_overlapping_features_for_region
88

@@ -95,7 +95,7 @@ def __init__(self):
9595

9696
def raise_for_status(self):
9797
msg = f"HTTP {self.status_code} Error"
98-
raise requests.RequestException(msg)
98+
raise httpx.HTTPError(msg)
9999

100100
with (
101101
patch("dcd_mapping.lookup.request_with_backoff", return_value=ErrorResponse()),

tests/test_mavedb_data.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
import shutil
44
from pathlib import Path
55

6+
import httpx
67
import pytest
7-
import requests_mock
8+
import respx
89

910
from dcd_mapping.mavedb_data import get_scoreset_metadata, get_scoreset_records
1011

@@ -32,10 +33,9 @@ def test_get_scoreset_metadata(
3233
resources_data_dir: Path, scoreset_metadata_response: dict
3334
):
3435
urn = "urn:mavedb:00000093-a-1"
35-
with requests_mock.Mocker() as m:
36-
m.get(
37-
f"https://api.mavedb.org/api/v1/score-sets/{urn}",
38-
json=scoreset_metadata_response[urn],
36+
with respx.mock:
37+
respx.get(f"https://api.mavedb.org/api/v1/score-sets/{urn}").mock(
38+
return_value=httpx.Response(200, json=scoreset_metadata_response[urn])
3939
)
4040
scoreset_metadata = get_scoreset_metadata(
4141
urn, dcd_mapping_dir=resources_data_dir
@@ -62,17 +62,15 @@ def test_get_scoreset_records(
6262
urn = "urn:mavedb:00000093-a-1"
6363
with (fixture_data_dir / f"{urn}_scores.csv").open() as f:
6464
scores_csv_text = f.read()
65-
with requests_mock.Mocker() as m:
66-
m.get(
67-
f"https://api.mavedb.org/api/v1/score-sets/{urn}",
68-
json=scoreset_metadata_response[urn],
65+
with respx.mock:
66+
respx.get(f"https://api.mavedb.org/api/v1/score-sets/{urn}").mock(
67+
return_value=httpx.Response(200, json=scoreset_metadata_response[urn])
6968
)
7069
scoreset_metadata = get_scoreset_metadata(
7170
urn, dcd_mapping_dir=resources_data_dir
7271
)
73-
m.get(
74-
f"https://api.mavedb.org/api/v1/score-sets/{urn}/scores",
75-
text=scores_csv_text,
72+
respx.get(f"https://api.mavedb.org/api/v1/score-sets/{urn}/scores").mock(
73+
return_value=httpx.Response(200, text=scores_csv_text)
7674
)
7775
scoreset_records = get_scoreset_records(
7876
scoreset_metadata, dcd_mapping_dir=resources_data_dir

0 commit comments

Comments
 (0)