From 3eebed29c4c4a0506212ebd94dae0cb4a3ee6284 Mon Sep 17 00:00:00 2001 From: Jayant Date: Wed, 11 Mar 2026 11:05:12 +0530 Subject: [PATCH 1/4] feat(datasets): migrate POST /datasets/untag endpoint --- src/database/datasets.py | 32 +++++++ src/routers/openml/datasets.py | 39 +++++++++ tests/routers/openml/dataset_tag_test.py | 83 +++++++++++++++++++ .../migration/datasets_migration_test.py | 43 ++++++++++ 4 files changed, 197 insertions(+) diff --git a/src/database/datasets.py b/src/database/datasets.py index f69a035a..0307950c 100644 --- a/src/database/datasets.py +++ b/src/database/datasets.py @@ -66,6 +66,38 @@ def tag(id_: int, tag_: str, *, user_id: int, connection: Connection) -> None: ) +def get_tag_for(id_: int, tag_: str, connection: Connection) -> Row | None: + row = connection.execute( + text( + """ + SELECT * + FROM dataset_tag + WHERE `id` = :dataset_id AND `tag` = :tag + """, + ), + parameters={ + "dataset_id": id_, + "tag": tag_, + }, + ) + return row.one_or_none() + + +def untag(id_: int, tag_: str, *, connection: Connection) -> None: + connection.execute( + text( + """ + DELETE FROM dataset_tag + WHERE `id` = :dataset_id AND `tag` = :tag + """, + ), + parameters={ + "dataset_id": id_, + "tag": tag_, + }, + ) + + def get_description( id_: int, connection: Connection, diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index dda25117..ccfa57e9 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -48,6 +48,31 @@ def tag_dataset( } +@router.post( + path="/untag", +) +def untag_dataset( + data_id: Annotated[int, Body()], + tag: Annotated[str, SystemString64], + user: Annotated[User | None, Depends(fetch_user)] = None, + expdb_db: Annotated[Connection, Depends(expdb_connection)] = None, +) -> dict[str, dict[str, Any]]: + if user is None: + raise create_authentication_failed_error() + + tag_record = database.datasets.get_tag_for(data_id, tag, expdb_db) + if tag_record is None: + raise create_tag_not_found_error() + + if tag_record.uploader != user.user_id and UserGroup.ADMIN not in user.groups: + raise create_tag_not_owned_error() + + database.datasets.untag(data_id, tag, connection=expdb_db) + return { + "data_untag": {"id": str(data_id)}, + } + + def create_authentication_failed_error() -> HTTPException: return HTTPException( status_code=HTTPStatus.PRECONDITION_FAILED, @@ -55,6 +80,20 @@ def create_authentication_failed_error() -> HTTPException: ) +def create_tag_not_found_error() -> HTTPException: + return HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail={"code": "475", "message": "Tag not found."}, + ) + + +def create_tag_not_owned_error() -> HTTPException: + return HTTPException( + status_code=HTTPStatus.INTERNAL_SERVER_ERROR, + detail={"code": "476", "message": "Tag is not owned by you"}, + ) + + def create_tag_exists_error(data_id: int, tag: str) -> HTTPException: return HTTPException( status_code=HTTPStatus.INTERNAL_SERVER_ERROR, diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py index 5449862a..f435f948 100644 --- a/tests/routers/openml/dataset_tag_test.py +++ b/tests/routers/openml/dataset_tag_test.py @@ -85,3 +85,86 @@ def test_dataset_tag_invalid_tag_is_rejected( assert new.status_code == HTTPStatus.UNPROCESSABLE_ENTITY assert new.json()["detail"][0]["loc"] == ["body", "tag"] + + +@pytest.mark.parametrize( + "key", + [None, ApiKey.INVALID], + ids=["no authentication", "invalid key"], +) +def test_dataset_untag_rejects_unauthorized(key: ApiKey, py_api: TestClient) -> None: + apikey = "" if key is None else f"?api_key={key}" + response = py_api.post( + f"/datasets/untag{apikey}", + json={"data_id": 1, "tag": "study_14"}, + ) + assert response.status_code == HTTPStatus.PRECONDITION_FAILED + assert response.json()["detail"] == {"code": "103", "message": "Authentication failed"} + + +def test_dataset_untag(py_api: TestClient, expdb_test: Connection) -> None: + dataset_id = 1 + tag = "temp_dataset_untag" + py_api.post( + f"/datasets/tag?api_key={ApiKey.SOME_USER}", + json={"data_id": dataset_id, "tag": tag}, + ) + + response = py_api.post( + f"/datasets/untag?api_key={ApiKey.SOME_USER}", + json={"data_id": dataset_id, "tag": tag}, + ) + assert response.status_code == HTTPStatus.OK + assert response.json() == {"data_untag": {"id": str(dataset_id)}} + assert tag not in get_tags_for(id_=dataset_id, connection=expdb_test) + + +def test_dataset_untag_rejects_other_user(py_api: TestClient) -> None: + dataset_id = 1 + tag = "temp_dataset_untag_not_owned" + py_api.post( + f"/datasets/tag?api_key={ApiKey.SOME_USER}", + json={"data_id": dataset_id, "tag": tag}, + ) + + response = py_api.post( + f"/datasets/untag?api_key={ApiKey.OWNER_USER}", + json={"data_id": dataset_id, "tag": tag}, + ) + assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + assert response.json()["detail"] == {"code": "476", "message": "Tag is not owned by you"} + + cleanup = py_api.post( + f"/datasets/untag?api_key={ApiKey.SOME_USER}", + json={"data_id": dataset_id, "tag": tag}, + ) + assert cleanup.status_code == HTTPStatus.OK + + +def test_dataset_untag_fails_if_tag_does_not_exist(py_api: TestClient) -> None: + dataset_id = 1 + tag = "definitely_not_a_dataset_tag" + response = py_api.post( + f"/datasets/untag?api_key={ApiKey.ADMIN}", + json={"data_id": dataset_id, "tag": tag}, + ) + assert response.status_code == HTTPStatus.INTERNAL_SERVER_ERROR + assert response.json()["detail"] == {"code": "475", "message": "Tag not found."} + + +@pytest.mark.parametrize( + "tag", + ["", "h@", " a", "a" * 65], + ids=["too short", "@", "space", "too long"], +) +def test_dataset_untag_invalid_tag_is_rejected( + tag: str, + py_api: TestClient, +) -> None: + response = py_api.post( + f"/datasets/untag?api_key={ApiKey.ADMIN}", + json={"data_id": 1, "tag": tag}, + ) + + assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY + assert response.json()["detail"][0]["loc"] == ["body", "tag"] diff --git a/tests/routers/openml/migration/datasets_migration_test.py b/tests/routers/openml/migration/datasets_migration_test.py index 011d8dba..e486e6a4 100644 --- a/tests/routers/openml/migration/datasets_migration_test.py +++ b/tests/routers/openml/migration/datasets_migration_test.py @@ -195,6 +195,49 @@ def test_dataset_tag_response_is_identical( assert original == new +@pytest.mark.parametrize( + "dataset_id", + [1, 2, 3, 101, 131], +) +@pytest.mark.parametrize( + "api_key", + [ApiKey.ADMIN, ApiKey.SOME_USER, ApiKey.OWNER_USER], + ids=["Administrator", "regular user", "possible owner"], +) +@pytest.mark.parametrize( + "tag", + ["study_14", "study_15"], +) +def test_dataset_untag_response_is_identical( + dataset_id: int, + tag: str, + api_key: str, + py_api: TestClient, + php_api: httpx.Client, +) -> None: + original = php_api.post( + "/data/untag", + data={"api_key": api_key, "tag": tag, "data_id": dataset_id}, + ) + if original.status_code == HTTPStatus.OK: + php_api.post( + "/data/tag", + data={"api_key": api_key, "tag": tag, "data_id": dataset_id}, + ) + + new = py_api.post( + f"/datasets/untag?api_key={api_key}", + json={"data_id": dataset_id, "tag": tag}, + ) + + assert original.status_code == new.status_code, original.json() + if new.status_code != HTTPStatus.OK: + assert original.json()["error"] == new.json()["detail"] + return + + assert original.json() == new.json() + + @pytest.mark.parametrize( "data_id", list(range(1, 130)), From a81a21a342f14301807ff9dedd3119a2321f3577 Mon Sep 17 00:00:00 2001 From: Jayant Date: Sun, 15 Mar 2026 11:49:03 +0530 Subject: [PATCH 2/4] style: format datasets untag changes --- src/routers/openml/datasets.py | 1 + tests/routers/openml/dataset_tag_test.py | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/routers/openml/datasets.py b/src/routers/openml/datasets.py index ce8ece5c..d2ff7f1a 100644 --- a/src/routers/openml/datasets.py +++ b/src/routers/openml/datasets.py @@ -100,6 +100,7 @@ async def untag_dataset( "data_untag": {"id": str(data_id)}, } + class DatasetStatusFilter(StrEnum): ACTIVE = DatasetStatus.ACTIVE DEACTIVATED = DatasetStatus.DEACTIVATED diff --git a/tests/routers/openml/dataset_tag_test.py b/tests/routers/openml/dataset_tag_test.py index d524b681..586428f2 100644 --- a/tests/routers/openml/dataset_tag_test.py +++ b/tests/routers/openml/dataset_tag_test.py @@ -96,9 +96,7 @@ async def test_dataset_tag_invalid_tag_is_rejected( [None, ApiKey.INVALID], ids=["no authentication", "invalid key"], ) -async def test_dataset_untag_rejects_unauthorized( - key: ApiKey, py_api: httpx.AsyncClient -) -> None: +async def test_dataset_untag_rejects_unauthorized(key: ApiKey, py_api: httpx.AsyncClient) -> None: apikey = "" if key is None else f"?api_key={key}" response = await py_api.post( f"/datasets/untag{apikey}", From 72675aa99260c92b849c3d5c31cd7b2135a34335 Mon Sep 17 00:00:00 2001 From: Jayant Date: Sun, 15 Mar 2026 12:47:43 +0530 Subject: [PATCH 3/4] fix(flows): add POST /flows/exists and keep deprecated GET --- src/routers/openml/flows.py | 25 ++++++--- src/schemas/flows.py | 5 ++ tests/routers/openml/flows_test.py | 81 +++++++++++++++++++++++++++--- 3 files changed, 98 insertions(+), 13 deletions(-) diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py index 41254863..d6ea4dc9 100644 --- a/src/routers/openml/flows.py +++ b/src/routers/openml/flows.py @@ -7,29 +7,40 @@ from core.conversions import _str_to_num from core.errors import FlowNotFoundError from routers.dependencies import expdb_connection -from schemas.flows import Flow, Parameter, Subflow +from schemas.flows import Flow, FlowExistsBody, Parameter, Subflow router = APIRouter(prefix="/flows", tags=["flows"]) -@router.get("/exists/{name}/{external_version}") +@router.post("/exists") async def flow_exists( - name: str, - external_version: str, + body: FlowExistsBody, expdb: Annotated[AsyncConnection, Depends(expdb_connection)], ) -> dict[Literal["flow_id"], int]: """Check if a Flow with the name and version exists, if so, return the flow id.""" flow = await database.flows.get_by_name( - name=name, - external_version=external_version, + name=body.name, + external_version=body.external_version, expdb=expdb, ) if flow is None: - msg = f"Flow with name {name} and external version {external_version} not found." + msg = ( + f"Flow with name {body.name} and external version {body.external_version} not found." + ) raise FlowNotFoundError(msg) return {"flow_id": flow.id} +@router.get("/exists/{name}/{external_version}", deprecated=True) +async def flow_exists_get( + name: str, + external_version: str, + expdb: Annotated[AsyncConnection, Depends(expdb_connection)], +) -> dict[Literal["flow_id"], int]: + """Use POST /flows/exists instead.""" + return await flow_exists(FlowExistsBody(name=name, external_version=external_version), expdb) + + @router.get("/{flow_id}") async def get_flow( flow_id: int, diff --git a/src/schemas/flows.py b/src/schemas/flows.py index a6cd479c..04ef1455 100644 --- a/src/schemas/flows.py +++ b/src/schemas/flows.py @@ -6,6 +6,11 @@ from pydantic import BaseModel, ConfigDict, Field +class FlowExistsBody(BaseModel): + name: str = Field(min_length=1, max_length=1024) + external_version: str = Field(min_length=1, max_length=128) + + class Parameter(BaseModel): name: str default_value: Any diff --git a/tests/routers/openml/flows_test.py b/tests/routers/openml/flows_test.py index 400ec4c0..b1841d36 100644 --- a/tests/routers/openml/flows_test.py +++ b/tests/routers/openml/flows_test.py @@ -4,10 +4,12 @@ import httpx import pytest from pytest_mock import MockerFixture +from sqlalchemy import text from sqlalchemy.ext.asyncio import AsyncConnection from core.errors import FlowNotFoundError from routers.openml.flows import flow_exists +from schemas.flows import FlowExistsBody from tests.conftest import Flow @@ -28,7 +30,7 @@ async def test_flow_exists_calls_db_correctly( "database.flows.get_by_name", new_callable=mocker.AsyncMock, ) - await flow_exists(name, external_version, expdb_test) + await flow_exists(FlowExistsBody(name=name, external_version=external_version), expdb_test) mocked_db.assert_called_once_with( name=name, external_version=external_version, @@ -51,29 +53,42 @@ async def test_flow_exists_processes_found( new_callable=mocker.AsyncMock, return_value=fake_flow, ) - response = await flow_exists("name", "external_version", expdb_test) + response = await flow_exists( + FlowExistsBody(name="name", external_version="external_version"), + expdb_test, + ) assert response == {"flow_id": fake_flow.id} async def test_flow_exists_handles_flow_not_found( mocker: MockerFixture, expdb_test: AsyncConnection ) -> None: - mocker.patch("database.flows.get_by_name", return_value=None) + mocker.patch( + "database.flows.get_by_name", + new_callable=mocker.AsyncMock, + return_value=None, + ) with pytest.raises(FlowNotFoundError) as error: - await flow_exists("foo", "bar", expdb_test) + await flow_exists(FlowExistsBody(name="foo", external_version="bar"), expdb_test) assert error.value.status_code == HTTPStatus.NOT_FOUND assert error.value.uri == FlowNotFoundError.uri async def test_flow_exists(flow: Flow, py_api: httpx.AsyncClient) -> None: - response = await py_api.get(f"/flows/exists/{flow.name}/{flow.external_version}") + response = await py_api.post( + "/flows/exists", + json={"name": flow.name, "external_version": flow.external_version}, + ) assert response.status_code == HTTPStatus.OK assert response.json() == {"flow_id": flow.id} async def test_flow_exists_not_exists(py_api: httpx.AsyncClient) -> None: name, version = "foo", "bar" - response = await py_api.get(f"/flows/exists/{name}/{version}") + response = await py_api.post( + "/flows/exists", + json={"name": name, "external_version": version}, + ) assert response.status_code == HTTPStatus.NOT_FOUND assert response.headers["content-type"] == "application/problem+json" error = response.json() @@ -82,6 +97,60 @@ async def test_flow_exists_not_exists(py_api: httpx.AsyncClient) -> None: assert version in error["detail"] +@pytest.mark.parametrize( + ("name", "external_version"), + [ + ("", "v1"), + ("some-flow", ""), + ], +) +async def test_flow_exists_rejects_empty_fields( + py_api: httpx.AsyncClient, + name: str, + external_version: str, +) -> None: + response = await py_api.post( + "/flows/exists", + json={"name": name, "external_version": external_version}, + ) + assert response.status_code == HTTPStatus.UNPROCESSABLE_ENTITY + + +async def test_flow_exists_with_uri_unsafe_chars( + py_api: httpx.AsyncClient, + expdb_test: AsyncConnection, +) -> None: + name = "sklearn.pipeline.Pipeline(steps=[('a','b')])" + external_version = "v1" + await expdb_test.execute( + text( + """ + INSERT INTO implementation(fullname,name,version,external_version,uploadDate) + VALUES (:fullname,:name,2,:external_version,'2024-02-02 02:23:23'); + """, + ), + parameters={ + "fullname": name, + "name": name, + "external_version": external_version, + }, + ) + result = await expdb_test.execute(text("""SELECT LAST_INSERT_ID();""")) + (flow_id,) = result.one() + response = await py_api.post( + "/flows/exists", + json={"name": name, "external_version": external_version}, + ) + assert response.status_code == HTTPStatus.OK + assert response.json() == {"flow_id": flow_id} + + +async def test_flow_exists_get_deprecated(flow: Flow, py_api: httpx.AsyncClient) -> None: + response = await py_api.get(f"/flows/exists/{flow.name}/{flow.external_version}") + assert response.status_code == HTTPStatus.OK + assert response.json() == {"flow_id": flow.id} + + async def test_get_flow_no_subflow(py_api: httpx.AsyncClient) -> None: response = await py_api.get("/flows/1") assert response.status_code == HTTPStatus.OK From 841c16e9b1fc8755b72a1e56fd28ba4faf2cb356 Mon Sep 17 00:00:00 2001 From: Jayant Date: Sun, 15 Mar 2026 12:50:30 +0530 Subject: [PATCH 4/4] style: apply ruff format for flows exists changes --- src/routers/openml/flows.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/routers/openml/flows.py b/src/routers/openml/flows.py index d6ea4dc9..dbc467a9 100644 --- a/src/routers/openml/flows.py +++ b/src/routers/openml/flows.py @@ -24,9 +24,7 @@ async def flow_exists( expdb=expdb, ) if flow is None: - msg = ( - f"Flow with name {body.name} and external version {body.external_version} not found." - ) + msg = f"Flow with name {body.name} and external version {body.external_version} not found." raise FlowNotFoundError(msg) return {"flow_id": flow.id}