Skip to content

Commit 4a4f18c

Browse files
fix(rest): skip Hadoop-only vended storage credentials during resolution
1 parent 0bdff48 commit 4a4f18c

2 files changed

Lines changed: 80 additions & 4 deletions

File tree

pyiceberg/catalog/rest/__init__.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,11 @@ class ListViewsResponse(IcebergBaseModel):
387387
_PLANNING_RESPONSE_ADAPTER = TypeAdapter(PlanningResponse)
388388

389389

390+
def _is_hadoop_only_config(config: Properties) -> bool:
391+
"""Return True if every key is a Hadoop ``fs.*`` key — pyiceberg has no HadoopFileIO to consume them."""
392+
return bool(config) and all(k.startswith("fs.") for k in config)
393+
394+
390395
class RestCatalog(Catalog):
391396
uri: str
392397
_session: Session
@@ -453,22 +458,32 @@ def _create_session(self) -> Session:
453458

454459
@staticmethod
455460
def _resolve_storage_credentials(storage_credentials: list[StorageCredential], location: str | None) -> Properties:
456-
"""Resolve the best-matching storage credential by longest prefix match.
461+
"""Pick the longest-prefix storage credential for ``location``.
457462
458-
Mirrors the Java implementation in S3FileIO.clientForStoragePath() which iterates
459-
over storage credential prefixes and selects the one with the longest match.
463+
Mirrors Java ``S3FileIO.clientForStoragePath``. Hadoop-only (``fs.*``)
464+
credentials are filtered out since pyiceberg has no HadoopFileIO to
465+
consume them — otherwise a catalog vending both ``fs.*`` and ``s3.*``
466+
bundles per location could strand the FileIO with unusable keys.
460467
461468
See: https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/s3/S3FileIO.java
462469
"""
463470
if not storage_credentials or not location:
464471
return {}
465472

473+
consumable = [c for c in storage_credentials if not _is_hadoop_only_config(c.config)]
474+
466475
best_match: StorageCredential | None = None
467-
for cred in storage_credentials:
476+
for cred in consumable:
468477
if location.startswith(cred.prefix):
469478
if best_match is None or len(cred.prefix) > len(best_match.prefix):
470479
best_match = cred
471480

481+
# Java S3FileIO falls back to the "s3" ROOT_PREFIX credential; scope it to
482+
# schemes pyarrow's S3FileSystem handles so non-S3 schemes (gs://, abfs://,
483+
# etc.) don't get handed s3.* keys.
484+
if best_match is None and location.startswith(("s3://", "s3a://", "s3n://", "oss://")):
485+
best_match = next((c for c in consumable if c.prefix == "s3"), None)
486+
472487
return best_match.config if best_match else {}
473488

474489
def _load_file_io(self, properties: Properties = EMPTY_DICT, location: str | None = None) -> FileIO:

tests/catalog/test_rest.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2844,6 +2844,67 @@ def test_resolve_storage_credentials_empty() -> None:
28442844
assert RestCatalog._resolve_storage_credentials([], None) == {}
28452845

28462846

2847+
def test_resolve_storage_credentials_skips_hadoop_only() -> None:
2848+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2849+
2850+
# The longer fs.* prefix would win a blind longest-match; the filter drops it.
2851+
credentials = [
2852+
StorageCredential(prefix="s3://warehouse/jindo", config={"fs.s3.access-key": "hadoop-k"}),
2853+
StorageCredential(prefix="s3://warehouse", config={"s3.access-key-id": "native-k"}),
2854+
]
2855+
result = RestCatalog._resolve_storage_credentials(credentials, "s3://warehouse/jindo/table/data")
2856+
assert result == {"s3.access-key-id": "native-k"}
2857+
2858+
2859+
def test_resolve_storage_credentials_mixed_prefix_namespaces_preserved() -> None:
2860+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2861+
2862+
credentials = [
2863+
StorageCredential(prefix="gs", config={"gs.oauth2.token": "tok"}),
2864+
StorageCredential(prefix="s3", config={"s3.access-key-id": "native-k"}),
2865+
]
2866+
result = RestCatalog._resolve_storage_credentials(credentials, "gs://bucket/path")
2867+
assert result == {"gs.oauth2.token": "tok"}
2868+
2869+
2870+
def test_resolve_storage_credentials_all_hadoop_only_returns_empty() -> None:
2871+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2872+
2873+
credentials = [
2874+
StorageCredential(prefix="custom", config={"fs.custom.access-key": "hadoop-k"}),
2875+
]
2876+
assert RestCatalog._resolve_storage_credentials(credentials, "custom://bucket/path") == {}
2877+
2878+
2879+
def test_resolve_storage_credentials_root_prefix_fallback_for_s3_compatible_scheme() -> None:
2880+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2881+
2882+
# oss:// is routed through pyarrow's S3FileSystem, so ROOT_PREFIX "s3" applies.
2883+
credentials = [
2884+
StorageCredential(prefix="s3", config={"s3.access-key-id": "native-k"}),
2885+
]
2886+
result = RestCatalog._resolve_storage_credentials(credentials, "oss://bucket/path")
2887+
assert result == {"s3.access-key-id": "native-k"}
2888+
2889+
2890+
def test_resolve_storage_credentials_root_prefix_fallback_respects_consumable() -> None:
2891+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2892+
2893+
credentials = [
2894+
StorageCredential(prefix="s3", config={"fs.s3.access-key": "hadoop-k"}),
2895+
]
2896+
assert RestCatalog._resolve_storage_credentials(credentials, "s3://bucket/path") == {}
2897+
2898+
2899+
def test_resolve_storage_credentials_fallback_skipped_for_non_s3_scheme() -> None:
2900+
from pyiceberg.catalog.rest.scan_planning import StorageCredential
2901+
2902+
credentials = [
2903+
StorageCredential(prefix="s3", config={"s3.access-key-id": "native-k"}),
2904+
]
2905+
assert RestCatalog._resolve_storage_credentials(credentials, "gs://bucket/path") == {}
2906+
2907+
28472908
def test_load_table_with_storage_credentials(rest_mock: Mocker, example_table_metadata_with_snapshot_v1: dict[str, Any]) -> None:
28482909
metadata_location = "s3://warehouse/database/table/metadata/00001.metadata.json"
28492910
rest_mock.get(

0 commit comments

Comments
 (0)