Skip to content

Commit 9e32d72

Browse files
committed
test(integration): run arrow scenarios in CI instead of skipping
1 parent 3fc2cdd commit 9e32d72

3 files changed

Lines changed: 45 additions & 21 deletions

File tree

test-requirements.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,7 @@ tox >= 3.9.0
44
flake8 >= 4.0.0
55
types-python-dateutil >= 2.8.19.14
66
mypy >= 1.5
7+
# pyarrow backs the `arrow` extra. Required here (not just an optional extra) so
8+
# the arrow integration scenarios actually run in CI instead of silently
9+
# skipping via importorskip. Keep the floor in sync with pyproject's extra.
10+
pyarrow >= 14

tests/integration/test_managed_tables_lifecycle.py

Lines changed: 37 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -6,33 +6,41 @@
66
1. declare a schema and a table on the database's default catalog connection,
77
2. upload a small parquet file,
88
3. load it into the table (load_managed_table),
9-
4. read get_table_profile,
10-
5. refresh the catalog metadata,
11-
6. purge_table_cache,
12-
7. delete the managed table.
9+
4. poll get_table_profile until the load syncs,
10+
5. purge_table_cache,
11+
6. delete the managed table.
12+
13+
Note on managed-catalog semantics: there is no `refresh` step. `refresh` is
14+
rejected with a 400 on a managed catalog ("use the loads endpoint to update its
15+
data") — `load_managed_table` is itself the load. The profile is populated
16+
asynchronously after the load, so step 4 polls get_table_profile (a 404 means
17+
"not synced yet") rather than reading it once.
1318
1419
The scratch_database fixture tears the database (and its catalog) down, so the
15-
test touches no seeded data. Skipped if pyarrow is unavailable (needed to author
16-
the parquet payload).
20+
test touches no seeded data. pyarrow is a hard test dependency (see
21+
test-requirements.txt) and is imported directly — a missing pyarrow must fail
22+
loudly, never silently skip this scenario in CI.
1723
"""
1824

1925
from __future__ import annotations
2026

2127
import io
28+
import time
2229

23-
import pytest
24-
25-
pa = pytest.importorskip("pyarrow")
26-
pq = pytest.importorskip("pyarrow.parquet")
30+
import pyarrow as pa
31+
import pyarrow.parquet as pq
2732

2833
from hotdata.api.connections_api import ConnectionsApi
2934
from hotdata.api.databases_api import DatabasesApi
30-
from hotdata.api.refresh_api import RefreshApi
3135
from hotdata.api.uploads_api import UploadsApi
36+
from hotdata.exceptions import ApiException
3237
from hotdata.models.add_managed_schema_request import AddManagedSchemaRequest
3338
from hotdata.models.add_managed_table_request import AddManagedTableRequest
3439
from hotdata.models.load_managed_table_request import LoadManagedTableRequest
35-
from hotdata.models.refresh_request import RefreshRequest
40+
41+
42+
PROFILE_SYNC_TIMEOUT_S = 60.0
43+
PROFILE_POLL_INTERVAL_S = 2.0
3644

3745

3846
def _parquet_bytes() -> bytes:
@@ -46,7 +54,6 @@ def test_managed_tables_lifecycle(
4654
databases_api: DatabasesApi,
4755
connections_api: ConnectionsApi,
4856
uploads_api: UploadsApi,
49-
refresh_api: RefreshApi,
5057
scratch_database: str,
5158
) -> None:
5259
# The database's auto-provisioned default catalog is a managed catalog,
@@ -77,15 +84,27 @@ def test_managed_tables_lifecycle(
7784
assert loaded.table_name == table_name
7885
assert loaded.row_count == 3
7986

80-
profile = connections_api.get_table_profile(connection_id, schema_name, table_name)
87+
# The profile syncs asynchronously after the load — get_table_profile 404s
88+
# ("Table may not be synced yet") until it lands. Poll instead of reading
89+
# once. There is no manual trigger to force this: refresh is rejected on a
90+
# managed catalog, and load_managed_table is the load.
91+
deadline = time.monotonic() + PROFILE_SYNC_TIMEOUT_S
92+
profile = None
93+
while time.monotonic() < deadline:
94+
try:
95+
profile = connections_api.get_table_profile(
96+
connection_id, schema_name, table_name
97+
)
98+
break
99+
except ApiException as exc:
100+
if exc.status != 404:
101+
raise
102+
time.sleep(PROFILE_POLL_INTERVAL_S)
103+
assert profile is not None, "table profile never synced after load"
81104
assert profile.var_schema == schema_name
82105
assert profile.table == table_name
83106
assert profile.row_count == 3
84107

85-
# Refresh the catalog metadata for the managed connection.
86-
refreshed = refresh_api.refresh(RefreshRequest(connection_id=connection_id))
87-
assert refreshed.actual_instance is not None
88-
89108
# purge_table_cache and delete_managed_table both return None on success.
90109
connections_api.purge_table_cache(connection_id, schema_name, table_name)
91110
connections_api.delete_managed_table(connection_id, schema_name, table_name)

tests/integration/test_results_arrow.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,18 @@
55
that Arrow IPC content negotiation works end-to-end and that the streaming
66
variant yields the same data.
77
8-
Skipped if pyarrow is not installed (the helper requires the ``arrow`` extra).
8+
pyarrow is a hard test dependency (see test-requirements.txt), so this imports
9+
it directly rather than via importorskip — a missing pyarrow must fail loudly,
10+
never silently skip this scenario in CI.
911
"""
1012

1113
from __future__ import annotations
1214

1315
import time
1416

17+
import pyarrow as pa
1518
import pytest
1619

17-
pa = pytest.importorskip("pyarrow")
18-
1920
from hotdata.api.query_api import QueryApi
2021
from hotdata.api.query_runs_api import QueryRunsApi
2122
from hotdata.arrow import ResultsApi

0 commit comments

Comments
 (0)