From e98633a0a6440e281e53e63d127048c46bc6d391 Mon Sep 17 00:00:00 2001 From: Yuya Ebihara Date: Sat, 9 May 2026 10:03:22 +0900 Subject: [PATCH] REST: Add pagination support for list_tables --- pyiceberg/catalog/rest/__init__.py | 32 ++++++++++++++++---- tests/catalog/test_rest.py | 48 ++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+), 6 deletions(-) diff --git a/pyiceberg/catalog/rest/__init__.py b/pyiceberg/catalog/rest/__init__.py index ca0ff75e8c..81fe20783c 100644 --- a/pyiceberg/catalog/rest/__init__.py +++ b/pyiceberg/catalog/rest/__init__.py @@ -366,6 +366,7 @@ class ListViewResponseEntry(IcebergBaseModel): class ListTablesResponse(IcebergBaseModel): identifiers: list[ListTableResponseEntry] = Field() + next_page_token: str | None = Field(default=None, alias="next-page-token") class ListViewsResponse(IcebergBaseModel): @@ -1016,12 +1017,31 @@ def list_tables(self, namespace: str | Identifier) -> list[Identifier]: self._check_endpoint(Capability.V1_LIST_TABLES) namespace_tuple = self._check_valid_namespace_identifier(namespace) namespace_concat = self._encode_namespace_path(namespace_tuple) - response = self._session.get(self.url(Endpoints.list_tables, namespace=namespace_concat)) - try: - response.raise_for_status() - except HTTPError as exc: - _handle_non_200_response(exc, {404: NoSuchNamespaceError}) - return [(*table.namespace, table.name) for table in ListTablesResponse.model_validate_json(response.text).identifiers] + + all_identifiers: list[Identifier] = [] + page_token: str | None = None + + while True: + # Build URL with pagination params + url = self.url(Endpoints.list_tables, namespace=namespace_concat) + if page_token: + url = f"{url}?pageToken={page_token}" + + response = self._session.get(url) + try: + response.raise_for_status() + except HTTPError as exc: + _handle_non_200_response(exc, {404: NoSuchNamespaceError}) + + parsed = ListTablesResponse.model_validate_json(response.text) + all_identifiers.extend([(*table.namespace, table.name) for table in parsed.identifiers]) + + # Check if more pages exist + if not parsed.next_page_token: + break + page_token = parsed.next_page_token + + return all_identifiers @retry(**_RETRY_ARGS) def load_table(self, identifier: str | Identifier) -> Table: diff --git a/tests/catalog/test_rest.py b/tests/catalog/test_rest.py index 7977892635..d554221e97 100644 --- a/tests/catalog/test_rest.py +++ b/tests/catalog/test_rest.py @@ -477,6 +477,54 @@ def test_list_tables_200(rest_mock: Mocker) -> None: assert RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_tables(namespace) == [("examples", "fooshare")] +def test_list_tables_paginated_200(rest_mock: Mocker) -> None: + namespace = "examples" + # First page with next-page-token + rest_mock.get( + f"{TEST_URI}v1/namespaces/{namespace}/tables", + json={ + "identifiers": [ + {"namespace": ["examples"], "name": "table1"}, + {"namespace": ["examples"], "name": "table2"}, + ], + "next-page-token": "page2token", + }, + status_code=200, + request_headers=TEST_HEADERS, + ) + # Second page with next-page-token + rest_mock.get( + f"{TEST_URI}v1/namespaces/{namespace}/tables?pageToken=page2token", + json={ + "identifiers": [ + {"namespace": ["examples"], "name": "table3"}, + ], + "next-page-token": "page3token", + }, + status_code=200, + request_headers=TEST_HEADERS, + ) + # Third page without next-page-token (last page) + rest_mock.get( + f"{TEST_URI}v1/namespaces/{namespace}/tables?pageToken=page3token", + json={ + "identifiers": [ + {"namespace": ["examples"], "name": "table4"}, + ], + }, + status_code=200, + request_headers=TEST_HEADERS, + ) + + result = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN).list_tables(namespace) + assert result == [ + ("examples", "table1"), + ("examples", "table2"), + ("examples", "table3"), + ("examples", "table4"), + ] + + def test_list_tables_200_sigv4(rest_mock: Mocker) -> None: namespace = "examples" rest_mock.get(