Skip to content

Commit 294ca47

Browse files
authored
[MNT] Use new test database image (#236)
* Update to newer database image * Update validation to work with new user keys (temporarily) * Update constant with new unprocessed datasets * Update test users with their new IDs * Update for new ownership of studies * Ignore ontology of features for now as it is not in Python API * Add back in key for private dataset owner * User private dataset owner * Update constants to match new test database state * Database setup script not needed with new image * Allow more lenient api key through configuration * Apply suggestion from @PGijsbers * Wrap line to adhere to line limit
1 parent 7c0916b commit 294ca47

10 files changed

Lines changed: 60 additions & 47 deletions

File tree

docker-compose.yaml

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
services:
22
database:
33
profiles: ["python", "php", "all"]
4-
image: "openml/test-database:20240105"
4+
image: "openml/test-database:v0.1.20260204"
55
container_name: "openml-test-database"
66
environment:
77
MYSQL_ROOT_PASSWORD: ok
@@ -15,17 +15,6 @@ services:
1515
interval: 5s
1616
retries: 10
1717

18-
database-setup:
19-
profiles: ["python", "php", "all"]
20-
image: mysql
21-
container_name: "openml-test-database-setup"
22-
volumes:
23-
- ./docker/database/update.sh:/database-update.sh
24-
command: /bin/sh -c "/database-update.sh"
25-
depends_on:
26-
database:
27-
condition: service_healthy
28-
2918
docs:
3019
profiles: ["all"]
3120
build:

src/config.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
arff_base_url="https://test.openml.org"
22
minio_base_url="https://openml1.win.tue.nl"
33

4+
[development]
5+
allow_test_api_keys=true
6+
47
[fastapi]
58
root_path=""
69

src/database/users.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,19 @@
55
from pydantic import StringConstraints
66
from sqlalchemy import Connection, text
77

8+
from config import load_configuration
9+
810
# Enforces str is 32 hexadecimal characters, does not check validity.
9-
APIKey = Annotated[str, StringConstraints(pattern=r"^[0-9a-fA-F]{32}$")]
11+
# If `allow_test_api_keys` is set, the key may also be one of `normaluser`,
12+
# `normaluser2`, or `abc` (admin).
13+
api_key_pattern = r"^[0-9a-fA-F]{32}$"
14+
if load_configuration()["development"].get("allow_test_api_keys"):
15+
api_key_pattern = r"^([0-9a-fA-F]{32}|normaluser|normaluser2|abc)$"
16+
17+
APIKey = Annotated[
18+
str,
19+
StringConstraints(pattern=api_key_pattern),
20+
]
1021

1122

1223
class UserGroup(IntEnum):

tests/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
PRIVATE_DATASET_ID = {130}
2-
IN_PREPARATION_ID = {33}
2+
IN_PREPARATION_ID = {33, 161, 162, 163}
33
DEACTIVATED_DATASETS = {131}
4-
DATASETS = set(range(1, 132))
4+
DATASETS = set(range(1, 132)) | {161, 162, 163}
55

66
NUMBER_OF_DATASETS = len(DATASETS)
77
NUMBER_OF_DEACTIVATED_DATASETS = len(DEACTIVATED_DATASETS)

tests/routers/openml/datasets_list_datasets_test.py

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def test_list_filter_active(status: str, amount: int, py_api: TestClient) -> Non
4747
("api_key", "amount"),
4848
[
4949
(ApiKey.ADMIN, constants.NUMBER_OF_DATASETS),
50-
(ApiKey.OWNER_USER, constants.NUMBER_OF_DATASETS),
50+
(ApiKey.DATASET_130_OWNER, constants.NUMBER_OF_DATASETS),
5151
(ApiKey.SOME_USER, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
5252
(None, constants.NUMBER_OF_DATASETS - constants.NUMBER_OF_PRIVATE_DATASETS),
5353
],
@@ -91,13 +91,15 @@ def test_list_data_name_absent(name: str, py_api: TestClient) -> None:
9191

9292

9393
@pytest.mark.parametrize("limit", [None, 5, 10, 200])
94-
@pytest.mark.parametrize("offset", [None, 0, 5, 129, 130, 200])
94+
@pytest.mark.parametrize("offset", [None, 0, 5, 129, 140, 200])
9595
def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClient) -> None:
96+
# dataset ids are contiguous until 131, then there are 161, 162, and 163.
97+
extra_datasets = [161, 162, 163]
9698
all_ids = [
9799
did
98-
for did in range(1, 1 + constants.NUMBER_OF_DATASETS)
100+
for did in range(1, 1 + constants.NUMBER_OF_DATASETS - len(extra_datasets))
99101
if did not in constants.PRIVATE_DATASET_ID
100-
]
102+
] + extra_datasets
101103

102104
start = 0 if offset is None else offset
103105
end = start + (100 if limit is None else limit)
@@ -108,7 +110,7 @@ def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClie
108110
filters = {"status": "all", "pagination": offset_body | limit_body}
109111
response = py_api.post("/datasets/list", json=filters)
110112

111-
if offset in [130, 200]:
113+
if offset in [140, 200]:
112114
_assert_empty_result(response)
113115
return
114116

@@ -119,7 +121,7 @@ def test_list_pagination(limit: int | None, offset: int | None, py_api: TestClie
119121

120122
@pytest.mark.parametrize(
121123
("version", "count"),
122-
[(1, 100), (2, 6), (5, 1)],
124+
[(1, 100), (2, 7), (5, 1)],
123125
)
124126
def test_list_data_version(version: int, count: int, py_api: TestClient) -> None:
125127
response = py_api.post(
@@ -133,16 +135,17 @@ def test_list_data_version(version: int, count: int, py_api: TestClient) -> None
133135

134136

135137
def test_list_data_version_no_result(py_api: TestClient) -> None:
138+
version_with_no_datasets = 42
136139
response = py_api.post(
137140
f"/datasets/list?api_key={ApiKey.ADMIN}",
138-
json={"status": "all", "data_version": 4},
141+
json={"status": "all", "data_version": version_with_no_datasets},
139142
)
140143
_assert_empty_result(response)
141144

142145

143146
@pytest.mark.parametrize(
144147
"key",
145-
[ApiKey.SOME_USER, ApiKey.OWNER_USER, ApiKey.ADMIN],
148+
[ApiKey.SOME_USER, ApiKey.DATASET_130_OWNER, ApiKey.ADMIN],
146149
)
147150
@pytest.mark.parametrize(
148151
("user_id", "count"),
@@ -211,7 +214,7 @@ def test_list_data_tag_empty(py_api: TestClient) -> None:
211214
("number_classes", "2", 51),
212215
("number_classes", "2..3", 56),
213216
("number_missing_values", "2", 1),
214-
("number_missing_values", "2..100000", 22),
217+
("number_missing_values", "2..100000", 23),
215218
],
216219
)
217220
def test_list_data_quality(quality: str, range_: str, count: int, py_api: TestClient) -> None:

tests/routers/openml/datasets_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from routers.openml.datasets import get_dataset
1010
from schemas.datasets.openml import DatasetMetadata, DatasetStatus
1111
from tests import constants
12-
from tests.users import ADMIN_USER, NO_USER, OWNER_USER, SOME_USER, ApiKey
12+
from tests.users import ADMIN_USER, DATASET_130_OWNER, NO_USER, SOME_USER, ApiKey
1313

1414

1515
@pytest.mark.parametrize(
@@ -92,7 +92,7 @@ def test_private_dataset_no_access(
9292

9393

9494
@pytest.mark.parametrize(
95-
"user", [OWNER_USER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
95+
"user", [DATASET_130_OWNER, ADMIN_USER, pytest.param(SOME_USER, marks=pytest.mark.xfail)]
9696
)
9797
def test_private_dataset_access(user: User, expdb_test: Connection, user_test: Connection) -> None:
9898
dataset = get_dataset(
@@ -165,7 +165,7 @@ def test_dataset_features_no_access(py_api: TestClient) -> None:
165165

166166
@pytest.mark.parametrize(
167167
"api_key",
168-
[ApiKey.ADMIN, ApiKey.OWNER_USER],
168+
[ApiKey.ADMIN, ApiKey.DATASET_130_OWNER],
169169
)
170170
def test_dataset_features_access_to_private(api_key: ApiKey, py_api: TestClient) -> None:
171171
response = py_api.get(f"/datasets/features/130?api_key={api_key}")

tests/routers/openml/migration/datasets_migration_test.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def test_private_dataset_no_user_no_access(
123123

124124
@pytest.mark.parametrize(
125125
"api_key",
126-
[ApiKey.OWNER_USER, ApiKey.ADMIN],
126+
[ApiKey.DATASET_130_OWNER, ApiKey.ADMIN],
127127
)
128128
def test_private_dataset_owner_access(
129129
py_api: TestClient,
@@ -225,4 +225,7 @@ def test_datasets_feature_is_identical(
225225
else:
226226
# The old API formats bool as string in lower-case
227227
feature[key] = str(value) if not isinstance(value, bool) else str(value).lower()
228-
assert python_body == original.json()["data_features"]["feature"]
228+
original_features = original.json()["data_features"]["feature"]
229+
for feature in original_features:
230+
feature.pop("ontology", None)
231+
assert python_body == original_features

tests/routers/openml/study_test.py

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from starlette.testclient import TestClient
77

88
from schemas.study import StudyType
9+
from tests.users import ApiKey
910

1011

1112
def test_get_task_study_by_id(py_api: TestClient) -> None:
@@ -458,7 +459,7 @@ def test_get_task_study_by_alias(py_api: TestClient) -> None:
458459

459460
def test_create_task_study(py_api: TestClient) -> None:
460461
response = py_api.post(
461-
"/studies?api_key=00000000000000000000000000000000",
462+
f"/studies?api_key={ApiKey.SOME_USER}",
462463
json={
463464
"name": "Test Study",
464465
"alias": "test-study",
@@ -518,27 +519,28 @@ def _attach_tasks_to_study(
518519

519520

520521
def test_attach_task_to_study(py_api: TestClient, expdb_test: Connection) -> None:
522+
expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 7"))
521523
response = _attach_tasks_to_study(
522-
study_id=1,
523-
task_ids=[2, 3, 4],
524-
api_key="AD000000000000000000000000000000",
524+
study_id=7,
525+
task_ids=[50],
526+
api_key=ApiKey.OWNER_USER,
525527
py_api=py_api,
526528
expdb_test=expdb_test,
527529
)
528-
assert response.status_code == HTTPStatus.OK
529-
assert response.json() == {"study_id": 1, "main_entity_type": StudyType.TASK}
530+
assert response.status_code == HTTPStatus.OK, response.content
531+
assert response.json() == {"study_id": 7, "main_entity_type": StudyType.TASK}
530532

531533

532534
def test_attach_task_to_study_needs_owner(py_api: TestClient, expdb_test: Connection) -> None:
533-
expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 1"))
535+
expdb_test.execute(text("UPDATE study SET status = 'in_preparation' WHERE id = 7"))
534536
response = _attach_tasks_to_study(
535537
study_id=1,
536538
task_ids=[2, 3, 4],
537-
api_key="00000000000000000000000000000000",
539+
api_key=ApiKey.OWNER_USER,
538540
py_api=py_api,
539541
expdb_test=expdb_test,
540542
)
541-
assert response.status_code == HTTPStatus.FORBIDDEN
543+
assert response.status_code == HTTPStatus.FORBIDDEN, response.content
542544

543545

544546
def test_attach_task_to_study_already_linked_raises(
@@ -549,11 +551,11 @@ def test_attach_task_to_study_already_linked_raises(
549551
response = _attach_tasks_to_study(
550552
study_id=1,
551553
task_ids=[1, 3, 4],
552-
api_key="AD000000000000000000000000000000",
554+
api_key=ApiKey.ADMIN,
553555
py_api=py_api,
554556
expdb_test=expdb_test,
555557
)
556-
assert response.status_code == HTTPStatus.CONFLICT
558+
assert response.status_code == HTTPStatus.CONFLICT, response.content
557559
assert response.json() == {"detail": "Task 1 is already attached to study 1."}
558560

559561

@@ -565,7 +567,7 @@ def test_attach_task_to_study_but_task_not_exist_raises(
565567
response = _attach_tasks_to_study(
566568
study_id=1,
567569
task_ids=[80123, 78914],
568-
api_key="AD000000000000000000000000000000",
570+
api_key=ApiKey.ADMIN,
569571
py_api=py_api,
570572
expdb_test=expdb_test,
571573
)

tests/routers/openml/users_test.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ def test_fetch_user(api_key: str, user: User, user_test: Connection) -> None:
1818
db_user = fetch_user(api_key, user_data=user_test)
1919
assert db_user is not None
2020
assert user.user_id == db_user.user_id
21-
assert user.groups == db_user.groups
21+
assert set(user.groups) == set(db_user.groups)
2222

2323

2424
def test_fetch_user_invalid_key_returns_none(user_test: Connection) -> None:

tests/users.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,14 @@
44

55
NO_USER = None
66
SOME_USER = User(user_id=2, _database=None, _groups=[UserGroup.READ_WRITE])
7-
OWNER_USER = User(user_id=16, _database=None, _groups=[UserGroup.READ_WRITE])
8-
ADMIN_USER = User(user_id=1, _database=None, _groups=[UserGroup.ADMIN, UserGroup.READ_WRITE])
7+
OWNER_USER = User(user_id=3229, _database=None, _groups=[UserGroup.READ_WRITE])
8+
DATASET_130_OWNER = User(user_id=16, _database=None, _groups=[UserGroup.READ_WRITE])
9+
ADMIN_USER = User(user_id=1159, _database=None, _groups=[UserGroup.ADMIN, UserGroup.READ_WRITE])
910

1011

1112
class ApiKey(StrEnum):
12-
ADMIN = "AD000000000000000000000000000000"
13-
SOME_USER = "00000000000000000000000000000000"
14-
OWNER_USER = "DA1A0000000000000000000000000000"
13+
ADMIN = "abc"
14+
SOME_USER = "normaluser2"
15+
OWNER_USER = "normaluser"
16+
DATASET_130_OWNER = "DA1A0000000000000000000000000000"
1517
INVALID = "11111111111111111111111111111111"

0 commit comments

Comments
 (0)