Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,8 +70,8 @@ def create_api() -> FastAPI:
app.include_router(task_router)
app.include_router(flows_router)
app.include_router(study_router)
app.include_router(setup_router)
app.include_router(run_router)
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

issue (bug_risk): The run_router is being included twice, which will cause route duplication conflicts.

It’s now included once in the new line you added and again in the existing line below, which will cause FastAPI to raise duplicate route errors for identical paths/methods. Please keep only one app.include_router(run_router) here.

app.include_router(setup_router)
return app


Expand Down
145 changes: 140 additions & 5 deletions src/routers/openml/runs.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,153 @@
"""Endpoints for run-related data."""
"""Endpoints for OpenML Run resources."""

from typing import Annotated
from typing import Annotated, Any

from fastapi import APIRouter, Depends
from fastapi import APIRouter, Body, Depends
from sqlalchemy import text
from sqlalchemy.ext.asyncio import AsyncConnection

import database.runs
from core.errors import RunNotFoundError, RunTraceNotFoundError
from routers.dependencies import expdb_connection
from core.errors import NoResultsError, RunNotFoundError, RunTraceNotFoundError
from routers.dependencies import Pagination, expdb_connection
from routers.types import SystemString64
from schemas.runs import RunTrace, TraceIteration

router = APIRouter(prefix="/run", tags=["run"])


def _add_in_filter(
filters: list[str],
params: dict[str, Any],
column: str,
param_prefix: str,
values: list[int],
) -> None:
"""Append an IN filter clause and its bind parameters to the query builder.

Builds named placeholders (:prefix_0, :prefix_1, ...) for safe binding
of multiple integer values without SQL injection risk.

Args:
filters: List of WHERE clause fragments to append to.
params: Bind parameter dict to update in-place.
column: SQL column expression (e.g. "r.rid", "a.implementation_id").
param_prefix: Prefix for named bind params (e.g. "run_id", "flow_id").
values: List of integer values to filter by.

"""
placeholders = ", ".join(f":{param_prefix}_{i}" for i in range(len(values)))
filters.append(f"{column} IN ({placeholders})")
params |= {f"{param_prefix}_{i}": v for i, v in enumerate(values)}


@router.post(path="/list", description="Provided for convenience, same as `GET` endpoint.")
@router.get(path="/list")
async def list_runs( # noqa: PLR0913
expdb: Annotated[AsyncConnection, Depends(expdb_connection)],
pagination: Annotated[Pagination, Body(default_factory=Pagination)],
run_id: Annotated[
list[int] | None,
Body(
description="The run(s) to include in the search. "
"If none are specified, all runs are included.",
),
] = None,
task_id: Annotated[
list[int] | None,
Body(description="Only include runs for these task id(s)."),
] = None,
flow_id: Annotated[
list[int] | None,
Body(description="Only include runs using these flow id(s)."),
] = None,
setup_id: Annotated[
list[int] | None,
Body(description="Only include runs with these setup id(s)."),
] = None,
uploader: Annotated[
list[int] | None,
Body(description="Only include runs uploaded by these user id(s)."),
] = None,
Comment on lines +48 to +70
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Explicit empty filters currently broaden the search.

The truthiness checks on Lines 95-104 treat [] the same as None, so a request like { "run_id": [] } falls through to the no-filter path and can return unrelated runs. That contradicts the documented “all provided filters” behavior; handle is not None separately and reject or short-circuit empty lists.

🔧 One possible guard
-    if run_id:
+    if run_id is not None:
+        if not run_id:
+            raise NoResultsError("No runs match the search criteria.")
         _add_in_filter(filters, params, "r.rid", "run_id", run_id)

Apply the same pattern to task_id, flow_id, setup_id, and uploader.

Also applies to: 95-104

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@src/routers/openml/runs.py` around lines 47 - 69, The current filter handling
treats empty lists like None so inputs such as run_id=[] broaden the search;
update the checks in the run filtering logic (where run_id, task_id, flow_id,
setup_id, uploader are evaluated) to explicitly distinguish None from an empty
list: if a filter is None skip it, if it is an empty list either return an empty
result (short-circuit) or raise a 400 validation error (choose the project's
convention), and apply this same pattern to task_id, flow_id, setup_id and
uploader so empty arrays do not fall through to the no-filter path.

tag: Annotated[
str | None,
SystemString64,
Body(description="Only include runs with this tag."),
] = None,
Comment on lines +71 to +75
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion: The tag parameter description suggests multiple tags but the type only allows a single string.

The description uses tag(s) but the parameter is str | None, not a list. If multiple tags should be supported (like the other list filters), consider list[str] | None with an IN-style filter; otherwise, make the description singular to match the current type.

Suggested change
tag: Annotated[
str | None,
SystemString64,
Body(description="Only include runs with this tag(s)."),
] = None,
tag: Annotated[
str | None,
SystemString64,
Body(description="Only include runs with this tag."),
] = None,

) -> list[dict[str, Any]]:
"""List runs, optionally filtered by one or more criteria.

Filters are combinable — all provided filters are applied with AND logic.
List filters (run_id, task_id, flow_id, setup_id, uploader) accept multiple
values and are applied with IN logic within each filter.

Returns a flat list of run objects. Raises 404 if no runs match the filters.

PHP equivalent: GET /run/list/[run/{ids}][/task/{ids}][/flow/{ids}]...
Note: Unlike PHP (which requires at least one filter), this endpoint allows
an empty filter set and returns all runs paginated.
"""
# Clamp Pagination (Safety against massive scans or negative values)
limit = max(1, min(pagination.limit, 1000)) # Enforce sensible limits to prevent abuse
offset = max(0, pagination.offset)

filters: list[str] = []
params: dict[str, Any] = {"limit": limit, "offset": offset}

# Each list filter maps a user-facing param to a SQL column.
# flow_id maps to algorithm_setup.implementation_id (aliased as `a`).
# setup_id maps to run.setup — the FK column stored on the run row.
if run_id:
_add_in_filter(filters, params, "r.rid", "run_id", run_id)
if task_id:
_add_in_filter(filters, params, "r.task_id", "task_id", task_id)
if flow_id:
_add_in_filter(filters, params, "a.implementation_id", "flow_id", flow_id)
if setup_id:
_add_in_filter(filters, params, "r.setup", "setup_id", setup_id)
if uploader:
_add_in_filter(filters, params, "r.uploader", "uploader", uploader)

if tag is not None:
# run_tag.id is the run FK (not a surrogate PK), so we join on run.rid
filters.append("r.rid IN (SELECT id FROM run_tag WHERE tag = :tag)")
params["tag"] = tag

where_clause = f"WHERE {' AND '.join(filters)}" if filters else ""

query = text(
f"""
SELECT
r.rid AS run_id,
r.task_id AS task_id,
r.setup AS setup_id,
a.implementation_id AS flow_id,
r.uploader AS uploader,
r.start_time AS upload_time,
IFNULL(r.error_message, '') AS error_message,
IFNULL(r.run_details, '') AS run_details
FROM run r
JOIN algorithm_setup a ON r.setup = a.sid
{where_clause}
ORDER BY r.rid
LIMIT :limit OFFSET :offset
""", # noqa: S608
)

result = await expdb.execute(query, params)
rows = result.mappings().all()

if not rows:
msg = "No runs match the search criteria."
raise NoResultsError(msg)

# SQLAlchemy returns start_time as a datetime object. Format to match PHP
# response shape: "YYYY-MM-DD HH:MM:SS" (no T separator, no timezone).
# dict unpacking with a later key overrides the earlier one from **row.
return [
{**row, "upload_time": row["upload_time"].strftime("%Y-%m-%d %H:%M:%S")} for row in rows
]


@router.get("/trace/{run_id}")
async def get_run_trace(
run_id: int,
Expand Down
Loading
Loading