From fb3419738e19327054b51cf4462277765747fd52 Mon Sep 17 00:00:00 2001 From: igennova Date: Wed, 18 Mar 2026 01:18:57 +0530 Subject: [PATCH 1/8] Implement GET /setup/{id} endpoint with Pydantic schema --- src/database/setups.py | 28 +++++++++++++- src/routers/openml/setups.py | 25 ++++++++++++- src/schemas/setups.py | 37 +++++++++++++++++++ .../openml/migration/setups_migration_test.py | 31 +++++++++++++++- tests/routers/openml/setups_test.py | 14 +++++++ 5 files changed, 132 insertions(+), 3 deletions(-) create mode 100644 src/schemas/setups.py diff --git a/src/database/setups.py b/src/database/setups.py index e399f194..048877f2 100644 --- a/src/database/setups.py +++ b/src/database/setups.py @@ -1,7 +1,7 @@ """All database operations that directly operate on setups.""" from sqlalchemy import text -from sqlalchemy.engine import Row +from sqlalchemy.engine import Row, RowMapping from sqlalchemy.ext.asyncio import AsyncConnection @@ -20,6 +20,32 @@ async def get(setup_id: int, connection: AsyncConnection) -> Row | None: return row.first() +async def get_parameters(setup_id: int, connection: AsyncConnection) -> list[RowMapping]: + """Get all parameters for setup with `setup_id` from the database.""" + rows = await connection.execute( + text( + """ + SELECT + CAST(t_input.id AS CHAR) as id, + CAST(t_input.implementation_id AS CHAR) as flow_id, + t_impl.name AS flow_name, + CONCAT(t_impl.fullName, '_', t_input.name) AS full_name, + t_input.name AS parameter_name, + t_input.name AS name, + t_input.dataType AS data_type, + t_input.defaultValue AS default_value, + t_setting.value AS value + FROM input_setting t_setting + JOIN input t_input ON t_setting.input_id = t_input.id + JOIN implementation t_impl ON t_input.implementation_id = t_impl.id + WHERE t_setting.setup = :setup_id + """, + ), + parameters={"setup_id": setup_id}, + ) + return list(rows.mappings().all()) + + async def get_tags(setup_id: int, connection: AsyncConnection) -> list[Row]: """Get all tags for setup with `setup_id` from the database.""" rows = await connection.execute( diff --git a/src/routers/openml/setups.py b/src/routers/openml/setups.py index 65d2d533..f7bd0462 100644 --- a/src/routers/openml/setups.py +++ b/src/routers/openml/setups.py @@ -2,7 +2,7 @@ from typing import Annotated -from fastapi import APIRouter, Body, Depends +from fastapi import APIRouter, Body, Depends, Path from sqlalchemy.ext.asyncio import AsyncConnection import database.setups @@ -15,10 +15,33 @@ from database.users import User, UserGroup from routers.dependencies import expdb_connection, fetch_user_or_raise from routers.types import SystemString64 +from schemas.setups import SetupParameters, SetupResponse router = APIRouter(prefix="/setup", tags=["setup"]) +@router.get(path="/{setup_id}", response_model_exclude_none=True) +async def get_setup( + setup_id: Annotated[int, Path()], + expdb_db: Annotated[AsyncConnection, Depends(expdb_connection)], +) -> SetupResponse: + """Get setup by id.""" + setup = await database.setups.get(setup_id, expdb_db) + if not setup: + msg = f"Setup {setup_id} not found." + raise SetupNotFoundError(msg, code=281) + + setup_parameters = await database.setups.get_parameters(setup_id, expdb_db) + + params_model = SetupParameters( + setup_id=str(setup_id), + flow_id=str(setup.implementation_id), + parameter=[dict(param) for param in setup_parameters] if setup_parameters else None, + ) + + return SetupResponse(setup_parameters=params_model) + + @router.post(path="/tag") async def tag_setup( setup_id: Annotated[int, Body()], diff --git a/src/schemas/setups.py b/src/schemas/setups.py new file mode 100644 index 00000000..8bcaf1bb --- /dev/null +++ b/src/schemas/setups.py @@ -0,0 +1,37 @@ +"""Pydantic schemas for the setup API endpoints.""" + +from pydantic import BaseModel, ConfigDict + + +class SetupParameter(BaseModel): + """Schema representing an individual parameter within a setup.""" + + id: str + flow_id: str + flow_name: str + full_name: str + parameter_name: str + name: str + data_type: str | None = None + default_value: str | None = None + value: str + + model_config = ConfigDict(from_attributes=True) + + +class SetupParameters(BaseModel): + """Schema representing the grouped properties of a setup and its parameters.""" + + setup_id: str + flow_id: str + parameter: list[SetupParameter] | None = None + + model_config = ConfigDict(from_attributes=True) + + +class SetupResponse(BaseModel): + """Schema for the complete response of the GET /setup/{id} endpoint.""" + + setup_parameters: SetupParameters + + model_config = ConfigDict(from_attributes=True) diff --git a/tests/routers/openml/migration/setups_migration_test.py b/tests/routers/openml/migration/setups_migration_test.py index b33742e0..3aefcab4 100644 --- a/tests/routers/openml/migration/setups_migration_test.py +++ b/tests/routers/openml/migration/setups_migration_test.py @@ -266,6 +266,35 @@ async def test_setup_tag_response_is_identical_tag_already_exists( assert original.status_code == HTTPStatus.INTERNAL_SERVER_ERROR assert new.status_code == HTTPStatus.CONFLICT - assert original.json()["error"]["code"] == new.json()["code"] assert original.json()["error"]["message"] == "Entity already tagged by this tag." assert new.json()["detail"] == f"Setup {setup_id} already has tag {tag!r}." + + +async def test_get_setup_response_is_identical_setup_doesnt_exist( + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + setup_id = 999999 + + original = await php_api.get(f"/setup/{setup_id}") + new = await py_api.get(f"/setup/{setup_id}") + + assert original.status_code == HTTPStatus.PRECONDITION_FAILED + assert new.status_code == HTTPStatus.NOT_FOUND + assert original.json()["error"]["message"] == "Unknown setup" + assert original.json()["error"]["code"] == new.json()["code"] + + +@pytest.mark.parametrize("setup_id", [1, 48]) +async def test_get_setup_response_is_identical( + setup_id: int, + py_api: httpx.AsyncClient, + php_api: httpx.AsyncClient, +) -> None: + original = await php_api.get(f"/setup/{setup_id}") + new = await py_api.get(f"/setup/{setup_id}") + + assert original.status_code == HTTPStatus.OK + assert new.status_code == HTTPStatus.OK + + assert original.json() == new.json() diff --git a/tests/routers/openml/setups_test.py b/tests/routers/openml/setups_test.py index 305bf423..db35b41b 100644 --- a/tests/routers/openml/setups_test.py +++ b/tests/routers/openml/setups_test.py @@ -130,3 +130,17 @@ async def test_setup_tag_success(py_api: httpx.AsyncClient, expdb_test: AsyncCon text("SELECT * FROM setup_tag WHERE id = 1 AND tag = 'my_new_success_tag'") ) assert len(rows.all()) == 1 + + +async def test_get_setup_unknown(py_api: httpx.AsyncClient) -> None: + response = await py_api.get("/setup/999999") + assert response.status_code == HTTPStatus.NOT_FOUND + assert re.match(r"Setup \d+ not found.", response.json()["detail"]) + + +async def test_get_setup_success(py_api: httpx.AsyncClient) -> None: + response = await py_api.get("/setup/1") + assert response.status_code == HTTPStatus.OK + data = response.json()["setup_parameters"] + assert data["setup_id"] == "1" + assert "parameter" in data From 264043d84524acd4c9ff534d054a503915fe0c44 Mon Sep 17 00:00:00 2001 From: igennova Date: Wed, 18 Mar 2026 01:33:26 +0530 Subject: [PATCH 2/8] updating docs and minor improvements --- docs/migration.md | 3 +++ src/database/setups.py | 1 + src/routers/openml/setups.py | 2 +- src/schemas/setups.py | 2 +- 4 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/migration.md b/docs/migration.md index 2c349c82..f2e57f12 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -109,6 +109,9 @@ For example, after tagging dataset 21 with the tag `"foo"`: ## Setups +### `GET /{id}` +The endpoint behaves identically to the PHP implementation. Note that `setup_id` and `flow_id` are consistently returned as strings instead of integers to maintain strict backward compatibility. Also, if a setup has no parameters, the `parameter` field is omitted entirely from the response. + ### `POST /setup/tag` and `POST /setup/untag` When successful, the "tag" property in the returned response is now always a list, even if only one tag exists for the entity. When removing the last tag, the "tag" property will be an empty list `[]` instead of being omitted from the response. diff --git a/src/database/setups.py b/src/database/setups.py index 048877f2..a91af5eb 100644 --- a/src/database/setups.py +++ b/src/database/setups.py @@ -39,6 +39,7 @@ async def get_parameters(setup_id: int, connection: AsyncConnection) -> list[Row JOIN input t_input ON t_setting.input_id = t_input.id JOIN implementation t_impl ON t_input.implementation_id = t_impl.id WHERE t_setting.setup = :setup_id + ORDER BY t_impl.id, t_input.id """, ), parameters={"setup_id": setup_id}, diff --git a/src/routers/openml/setups.py b/src/routers/openml/setups.py index f7bd0462..267866d5 100644 --- a/src/routers/openml/setups.py +++ b/src/routers/openml/setups.py @@ -36,7 +36,7 @@ async def get_setup( params_model = SetupParameters( setup_id=str(setup_id), flow_id=str(setup.implementation_id), - parameter=[dict(param) for param in setup_parameters] if setup_parameters else None, + parameter=setup_parameters or None, ) return SetupResponse(setup_parameters=params_model) diff --git a/src/schemas/setups.py b/src/schemas/setups.py index 8bcaf1bb..7dbb7011 100644 --- a/src/schemas/setups.py +++ b/src/schemas/setups.py @@ -14,7 +14,7 @@ class SetupParameter(BaseModel): name: str data_type: str | None = None default_value: str | None = None - value: str + value: str | None = None model_config = ConfigDict(from_attributes=True) From 0967c80daf2f6eeefb8f2a9b2c01f0843dd2b75d Mon Sep 17 00:00:00 2001 From: igennova Date: Thu, 26 Mar 2026 18:57:08 +0530 Subject: [PATCH 3/8] updating python api behaviour to return int instead of string --- docs/migration.md | 2 +- src/database/setups.py | 4 +- src/routers/openml/setups.py | 4 +- src/schemas/setups.py | 8 ++-- .../openml/migration/setups_migration_test.py | 40 ++++++++++++++++--- tests/routers/openml/setups_test.py | 2 +- 6 files changed, 44 insertions(+), 16 deletions(-) diff --git a/docs/migration.md b/docs/migration.md index f2e57f12..9945fad1 100644 --- a/docs/migration.md +++ b/docs/migration.md @@ -110,7 +110,7 @@ For example, after tagging dataset 21 with the tag `"foo"`: ## Setups ### `GET /{id}` -The endpoint behaves identically to the PHP implementation. Note that `setup_id` and `flow_id` are consistently returned as strings instead of integers to maintain strict backward compatibility. Also, if a setup has no parameters, the `parameter` field is omitted entirely from the response. +The endpoint behaves almost identically to the PHP implementation. Note that fields representing integers like `setup_id` and `flow_id` are returned as integers instead of strings to align with typed JSON. Also, if a setup has no parameters, the `parameter` field is omitted entirely from the response. ### `POST /setup/tag` and `POST /setup/untag` When successful, the "tag" property in the returned response is now always a list, even if only one tag exists for the entity. When removing the last tag, the "tag" property will be an empty list `[]` instead of being omitted from the response. diff --git a/src/database/setups.py b/src/database/setups.py index a91af5eb..74498478 100644 --- a/src/database/setups.py +++ b/src/database/setups.py @@ -26,8 +26,8 @@ async def get_parameters(setup_id: int, connection: AsyncConnection) -> list[Row text( """ SELECT - CAST(t_input.id AS CHAR) as id, - CAST(t_input.implementation_id AS CHAR) as flow_id, + t_input.id as id, + t_input.implementation_id as flow_id, t_impl.name AS flow_name, CONCAT(t_impl.fullName, '_', t_input.name) AS full_name, t_input.name AS parameter_name, diff --git a/src/routers/openml/setups.py b/src/routers/openml/setups.py index 267866d5..085eb169 100644 --- a/src/routers/openml/setups.py +++ b/src/routers/openml/setups.py @@ -34,8 +34,8 @@ async def get_setup( setup_parameters = await database.setups.get_parameters(setup_id, expdb_db) params_model = SetupParameters( - setup_id=str(setup_id), - flow_id=str(setup.implementation_id), + setup_id=setup_id, + flow_id=setup.implementation_id, parameter=setup_parameters or None, ) diff --git a/src/schemas/setups.py b/src/schemas/setups.py index 7dbb7011..b4869dbe 100644 --- a/src/schemas/setups.py +++ b/src/schemas/setups.py @@ -6,8 +6,8 @@ class SetupParameter(BaseModel): """Schema representing an individual parameter within a setup.""" - id: str - flow_id: str + id: int + flow_id: int flow_name: str full_name: str parameter_name: str @@ -22,8 +22,8 @@ class SetupParameter(BaseModel): class SetupParameters(BaseModel): """Schema representing the grouped properties of a setup and its parameters.""" - setup_id: str - flow_id: str + setup_id: int + flow_id: int parameter: list[SetupParameter] | None = None model_config = ConfigDict(from_attributes=True) diff --git a/tests/routers/openml/migration/setups_migration_test.py b/tests/routers/openml/migration/setups_migration_test.py index 3aefcab4..69c6c802 100644 --- a/tests/routers/openml/migration/setups_migration_test.py +++ b/tests/routers/openml/migration/setups_migration_test.py @@ -1,3 +1,4 @@ +import asyncio import contextlib import re from collections.abc import AsyncGenerator, Callable, Iterable @@ -276,25 +277,52 @@ async def test_get_setup_response_is_identical_setup_doesnt_exist( ) -> None: setup_id = 999999 - original = await php_api.get(f"/setup/{setup_id}") - new = await py_api.get(f"/setup/{setup_id}") + original, new = await asyncio.gather( + php_api.get(f"/setup/{setup_id}"), + py_api.get(f"/setup/{setup_id}"), + ) assert original.status_code == HTTPStatus.PRECONDITION_FAILED assert new.status_code == HTTPStatus.NOT_FOUND assert original.json()["error"]["message"] == "Unknown setup" assert original.json()["error"]["code"] == new.json()["code"] + assert new.json()["detail"] == f"Setup {setup_id} not found." -@pytest.mark.parametrize("setup_id", [1, 48]) +@pytest.mark.parametrize("setup_id", range(1, 125)) async def test_get_setup_response_is_identical( setup_id: int, py_api: httpx.AsyncClient, php_api: httpx.AsyncClient, ) -> None: - original = await php_api.get(f"/setup/{setup_id}") - new = await py_api.get(f"/setup/{setup_id}") + original, new = await asyncio.gather( + php_api.get(f"/setup/{setup_id}"), + py_api.get(f"/setup/{setup_id}"), + ) + + if original.status_code == HTTPStatus.PRECONDITION_FAILED: + assert new.status_code == HTTPStatus.NOT_FOUND + return assert original.status_code == HTTPStatus.OK assert new.status_code == HTTPStatus.OK - assert original.json() == new.json() + original_json = original.json() + + # PHP returns integer fields as strings. To compare, we cast them to ints in the PHP response. + # PHP also returns `[]` instead of null for empty string optional fields, which Python omits. + setup_params = original_json["setup_parameters"] + setup_params["setup_id"] = int(setup_params["setup_id"]) + setup_params["flow_id"] = int(setup_params["flow_id"]) + if "parameter" in setup_params: + for p in setup_params["parameter"]: + p["id"] = int(p["id"]) + p["flow_id"] = int(p["flow_id"]) + if p.get("data_type") == []: + del p["data_type"] + if p.get("default_value") == []: + del p["default_value"] + if p.get("value") == []: + del p["value"] + + assert original_json == new.json() diff --git a/tests/routers/openml/setups_test.py b/tests/routers/openml/setups_test.py index db35b41b..ca4e6cd2 100644 --- a/tests/routers/openml/setups_test.py +++ b/tests/routers/openml/setups_test.py @@ -142,5 +142,5 @@ async def test_get_setup_success(py_api: httpx.AsyncClient) -> None: response = await py_api.get("/setup/1") assert response.status_code == HTTPStatus.OK data = response.json()["setup_parameters"] - assert data["setup_id"] == "1" + assert data["setup_id"] == 1 assert "parameter" in data From 61761f7888d08d8b6a27ea0be2e728cc2501941d Mon Sep 17 00:00:00 2001 From: igennova Date: Fri, 27 Mar 2026 00:32:18 +0530 Subject: [PATCH 4/8] Extend nested_remove_nones for empty lists --- src/core/conversions.py | 14 ++++++++++---- .../openml/migration/setups_migration_test.py | 9 +++------ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/core/conversions.py b/src/core/conversions.py index 07ec71ef..c545e60a 100644 --- a/src/core/conversions.py +++ b/src/core/conversions.py @@ -42,17 +42,23 @@ def nested_num_to_str(obj: Any) -> Any: return obj -def nested_remove_nones(obj: Any) -> Any: +def nested_remove_nones(obj: Any, *, remove_empty_list: bool = False) -> Any: if isinstance(obj, str): return obj if isinstance(obj, Mapping): return { - key: nested_remove_nones(val) + key: nested_remove_nones(val, remove_empty_list=remove_empty_list) for key, val in obj.items() - if val is not None and nested_remove_nones(val) is not None + if val is not None + and (not remove_empty_list or val != []) + and nested_remove_nones(val, remove_empty_list=remove_empty_list) is not None } if isinstance(obj, Iterable): - return [nested_remove_nones(val) for val in obj if nested_remove_nones(val) is not None] + return [ + nested_remove_nones(val, remove_empty_list=remove_empty_list) + for val in obj + if nested_remove_nones(val, remove_empty_list=remove_empty_list) is not None + ] return obj diff --git a/tests/routers/openml/migration/setups_migration_test.py b/tests/routers/openml/migration/setups_migration_test.py index 69c6c802..7f752dd3 100644 --- a/tests/routers/openml/migration/setups_migration_test.py +++ b/tests/routers/openml/migration/setups_migration_test.py @@ -10,6 +10,7 @@ from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncConnection +from core.conversions import nested_remove_nones from tests.conftest import temporary_records from tests.users import OWNER_USER, ApiKey @@ -318,11 +319,7 @@ async def test_get_setup_response_is_identical( for p in setup_params["parameter"]: p["id"] = int(p["id"]) p["flow_id"] = int(p["flow_id"]) - if p.get("data_type") == []: - del p["data_type"] - if p.get("default_value") == []: - del p["default_value"] - if p.get("value") == []: - del p["value"] + + original_json = nested_remove_nones(original_json, remove_empty_list=True) assert original_json == new.json() From 2e110c8efdde7ca86901b9c0d6294c380ad8994d Mon Sep 17 00:00:00 2001 From: igennova Date: Fri, 27 Mar 2026 00:44:09 +0530 Subject: [PATCH 5/8] better performance --- src/core/conversions.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/src/core/conversions.py b/src/core/conversions.py index c545e60a..1d611ee2 100644 --- a/src/core/conversions.py +++ b/src/core/conversions.py @@ -46,19 +46,25 @@ def nested_remove_nones(obj: Any, *, remove_empty_list: bool = False) -> Any: if isinstance(obj, str): return obj if isinstance(obj, Mapping): - return { - key: nested_remove_nones(val, remove_empty_list=remove_empty_list) - for key, val in obj.items() - if val is not None - and (not remove_empty_list or val != []) - and nested_remove_nones(val, remove_empty_list=remove_empty_list) is not None - } + cleaned: dict[Any, Any] = {} + for key, val in obj.items(): + cleaned_val = nested_remove_nones(val, remove_empty_list=remove_empty_list) + if cleaned_val is None: + continue + if remove_empty_list and cleaned_val == []: + continue + cleaned[key] = cleaned_val + return cleaned if isinstance(obj, Iterable): - return [ - nested_remove_nones(val, remove_empty_list=remove_empty_list) - for val in obj - if nested_remove_nones(val, remove_empty_list=remove_empty_list) is not None - ] + cleaned_list: list[Any] = [] + for val in obj: + cleaned_val = nested_remove_nones(val, remove_empty_list=remove_empty_list) + if cleaned_val is None: + continue + if remove_empty_list and cleaned_val == []: + continue + cleaned_list.append(cleaned_val) + return cleaned_list return obj From 86f818f59073c971226c1ef4a327f96db55318b1 Mon Sep 17 00:00:00 2001 From: igennova Date: Fri, 27 Mar 2026 00:58:46 +0530 Subject: [PATCH 6/8] Fix NaN parsing in _str_to_num and use in migration tests --- src/core/conversions.py | 7 ++++++- .../openml/migration/setups_migration_test.py | 19 ++++++++----------- 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/core/conversions.py b/src/core/conversions.py index 1d611ee2..84668d5b 100644 --- a/src/core/conversions.py +++ b/src/core/conversions.py @@ -1,3 +1,4 @@ +import math from collections.abc import Iterable, Mapping, Sequence from typing import Any @@ -7,9 +8,13 @@ def _str_to_num(string: str) -> int | float | str: if string.isdigit(): return int(string) try: - return float(string) + f = float(string) + if math.isnan(f) or math.isinf(f): + return string except ValueError: return string + else: + return f def nested_str_to_num(obj: Any) -> Any: diff --git a/tests/routers/openml/migration/setups_migration_test.py b/tests/routers/openml/migration/setups_migration_test.py index 7f752dd3..adb89ba4 100644 --- a/tests/routers/openml/migration/setups_migration_test.py +++ b/tests/routers/openml/migration/setups_migration_test.py @@ -10,7 +10,7 @@ from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncConnection -from core.conversions import nested_remove_nones +from core.conversions import nested_remove_nones, nested_str_to_num from tests.conftest import temporary_records from tests.users import OWNER_USER, ApiKey @@ -310,16 +310,13 @@ async def test_get_setup_response_is_identical( original_json = original.json() - # PHP returns integer fields as strings. To compare, we cast them to ints in the PHP response. + # PHP returns integer fields as strings. To compare, we recursively convert string digits + # to integers. # PHP also returns `[]` instead of null for empty string optional fields, which Python omits. - setup_params = original_json["setup_parameters"] - setup_params["setup_id"] = int(setup_params["setup_id"]) - setup_params["flow_id"] = int(setup_params["flow_id"]) - if "parameter" in setup_params: - for p in setup_params["parameter"]: - p["id"] = int(p["id"]) - p["flow_id"] = int(p["flow_id"]) - + original_json = nested_str_to_num(original_json) original_json = nested_remove_nones(original_json, remove_empty_list=True) - assert original_json == new.json() + new_json = nested_str_to_num(new.json()) + new_json = nested_remove_nones(new_json, remove_empty_list=True) + + assert original_json == new_json From d9f8bfd67f337cc21f822dfa3a1137926c34ac23 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 27 Mar 2026 09:33:35 +0100 Subject: [PATCH 7/8] Generalize removal of values to parameter --- src/core/conversions.py | 33 ++++++++----------- .../openml/migration/setups_migration_test.py | 6 ++-- .../migration/studies_migration_test.py | 4 +-- .../openml/migration/tasks_migration_test.py | 4 +-- 4 files changed, 21 insertions(+), 26 deletions(-) diff --git a/src/core/conversions.py b/src/core/conversions.py index 84668d5b..a0cfff3a 100644 --- a/src/core/conversions.py +++ b/src/core/conversions.py @@ -47,29 +47,24 @@ def nested_num_to_str(obj: Any) -> Any: return obj -def nested_remove_nones(obj: Any, *, remove_empty_list: bool = False) -> Any: +def nested_remove_values(obj: Any, *, values: list[Any] | None = None) -> Any: + if values is None: + values = [None] + if isinstance(obj, str): return obj if isinstance(obj, Mapping): - cleaned: dict[Any, Any] = {} - for key, val in obj.items(): - cleaned_val = nested_remove_nones(val, remove_empty_list=remove_empty_list) - if cleaned_val is None: - continue - if remove_empty_list and cleaned_val == []: - continue - cleaned[key] = cleaned_val - return cleaned + return { + key: nested_remove_values(val, values=values) + for key, val in obj.items() + if nested_remove_values(val, values=values) not in values + } if isinstance(obj, Iterable): - cleaned_list: list[Any] = [] - for val in obj: - cleaned_val = nested_remove_nones(val, remove_empty_list=remove_empty_list) - if cleaned_val is None: - continue - if remove_empty_list and cleaned_val == []: - continue - cleaned_list.append(cleaned_val) - return cleaned_list + return [ + nested_remove_values(val, values=values) + for val in obj + if nested_remove_values(val, values=values) not in values + ] return obj diff --git a/tests/routers/openml/migration/setups_migration_test.py b/tests/routers/openml/migration/setups_migration_test.py index adb89ba4..93228645 100644 --- a/tests/routers/openml/migration/setups_migration_test.py +++ b/tests/routers/openml/migration/setups_migration_test.py @@ -10,7 +10,7 @@ from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncConnection -from core.conversions import nested_remove_nones, nested_str_to_num +from core.conversions import nested_remove_values, nested_str_to_num from tests.conftest import temporary_records from tests.users import OWNER_USER, ApiKey @@ -314,9 +314,9 @@ async def test_get_setup_response_is_identical( # to integers. # PHP also returns `[]` instead of null for empty string optional fields, which Python omits. original_json = nested_str_to_num(original_json) - original_json = nested_remove_nones(original_json, remove_empty_list=True) + original_json = nested_remove_values(original_json, values=[[], None]) new_json = nested_str_to_num(new.json()) - new_json = nested_remove_nones(new_json, remove_empty_list=True) + new_json = nested_remove_values(new_json, values=[[], None]) assert original_json == new_json diff --git a/tests/routers/openml/migration/studies_migration_test.py b/tests/routers/openml/migration/studies_migration_test.py index 550ca686..07cdd0cb 100644 --- a/tests/routers/openml/migration/studies_migration_test.py +++ b/tests/routers/openml/migration/studies_migration_test.py @@ -3,7 +3,7 @@ import deepdiff import httpx -from core.conversions import nested_num_to_str, nested_remove_nones +from core.conversions import nested_num_to_str, nested_remove_values async def test_get_study_equal(py_api: httpx.AsyncClient, php_api: httpx.AsyncClient) -> None: @@ -17,7 +17,7 @@ async def test_get_study_equal(py_api: httpx.AsyncClient, php_api: httpx.AsyncCl # New implementation is typed new_json = nested_num_to_str(new_json) # New implementation has same fields even if empty - new_json = nested_remove_nones(new_json) + new_json = nested_remove_values(new_json, values=[None]) new_json["tasks"] = {"task_id": new_json.pop("task_ids")} new_json["data"] = {"data_id": new_json.pop("data_ids")} if runs := new_json.pop("run_ids", None): diff --git a/tests/routers/openml/migration/tasks_migration_test.py b/tests/routers/openml/migration/tasks_migration_test.py index f71a1e2c..eb2297d4 100644 --- a/tests/routers/openml/migration/tasks_migration_test.py +++ b/tests/routers/openml/migration/tasks_migration_test.py @@ -7,8 +7,8 @@ from core.conversions import ( nested_num_to_str, - nested_remove_nones, nested_remove_single_element_list, + nested_remove_values, ) @@ -32,7 +32,7 @@ async def test_get_task_equal( new_json["task_id"] = new_json.pop("id") new_json["task_name"] = new_json.pop("name") # PHP is not typed *and* automatically removes None values - new_json = nested_remove_nones(new_json) + new_json = nested_remove_values(new_json, values=[None]) new_json = nested_num_to_str(new_json) # It also removes "value" entries for parameters if the list is empty, # it does not remove *all* empty lists, e.g., for cost_matrix input they are kept From 111165292aa0e7a33f9b360cdf604be2d21201d1 Mon Sep 17 00:00:00 2001 From: PGijsbers Date: Fri, 27 Mar 2026 09:40:50 +0100 Subject: [PATCH 8/8] make `values` a required parameter --- src/core/conversions.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/core/conversions.py b/src/core/conversions.py index a0cfff3a..5ac13c4a 100644 --- a/src/core/conversions.py +++ b/src/core/conversions.py @@ -47,10 +47,7 @@ def nested_num_to_str(obj: Any) -> Any: return obj -def nested_remove_values(obj: Any, *, values: list[Any] | None = None) -> Any: - if values is None: - values = [None] - +def nested_remove_values(obj: Any, *, values: list[Any]) -> Any: if isinstance(obj, str): return obj if isinstance(obj, Mapping):