Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ CUSTOM_S3_SECRET_KEY=your_custom_s3_secret_key
## BUCKETS
# Used when running live tests; you will need read/write access to these buckets to run those test
LIVE_S3_BUCKET=a-bucket-you-can-access
LIVE_S3_MRAP_ARN=arn:aws:s3::ACCOUNT:accesspoint/MRAP


LIVE_AZURE_CONTAINER=a-container-you-can-access

Expand Down
3 changes: 2 additions & 1 deletion HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# cloudpathlib Changelog

## UNRELEASED
## v0.24.0 (2026-04-29)
- Added support for S3 Multi-Region Access Point (MRAP) URLs in `S3Path` (Issue [#556](https://github.com/drivendataorg/cloudpathlib/issues/556), PR [#557](https://github.com/drivendataorg/cloudpathlib/pull/557))
- Added support for Pydantic serialization (Issue [#537](https://github.com/drivendataorg/cloudpathlib/issues/537), PR [#538](https://github.com/drivendataorg/cloudpathlib/pull/538))

## v0.23.0 (2025-10-07)
Expand Down
31 changes: 30 additions & 1 deletion cloudpathlib/s3/s3path.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import os
import re
import sys
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any, Optional, TYPE_CHECKING
Expand All @@ -8,6 +10,10 @@
if TYPE_CHECKING:
from .s3client import S3Client

_MRAP_PATTERN = re.compile(
r"^s3://(?P<arn>arn:aws:s3::\d{12}:accesspoint/[^/]+\.mrap)(?:/(?P<key>.*))?$"
)


@register_path_class("s3")
class S3Path(CloudPath):
Expand All @@ -26,6 +32,8 @@ class S3Path(CloudPath):

cloud_prefix: str = "s3://"
client: "S3Client"
_bucket: str
_local_path: Path

@property
def drive(self) -> str:
Expand Down Expand Up @@ -74,7 +82,17 @@ def stat(self, follow_symlinks=True):

@property
def bucket(self) -> str:
return self._no_prefix.split("/", 1)[0]
"""The bucket name, or the full MRAP ARN for MRAP paths.

:type: :class:`str`
"""
if hasattr(self, "_bucket"):
return self._bucket
if match := _MRAP_PATTERN.match(str(self)):
self._bucket = match.group("arn")
else:
self._bucket = self._no_prefix.split("/", 1)[0]
return self._bucket

@property
def key(self) -> str:
Expand All @@ -90,3 +108,14 @@ def key(self) -> str:
@property
def etag(self):
return self.client._get_metadata(self).get("etag")

@property
def _local(self) -> Path:
if hasattr(self, "_local_path"):
return self._local_path
no_prefix = self._no_prefix
# `:` is invalid in Windows paths; percent-encode it for MRAP ARNs
if sys.platform == "win32":
no_prefix = no_prefix.replace(":", "%3A")
self._local_path = self.client._local_cache_dir / no_prefix
return self._local_path
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "flit_core.buildapi"

[project]
name = "cloudpathlib"
version = "0.23.0"
version = "0.24.0"
description = "pathlib-style classes for cloud storage services."
readme = "README.md"
authors = [{ name = "DrivenData", email = "info@drivendata.org" }]
Expand Down
1 change: 1 addition & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

azure-identity
black[jupyter]>=24.1.0;python_version>='3.8'
botocore[crt] # required for S3 Multi-Region Access Point (MRAP) tests
build
flake8
ipytest
Expand Down
11 changes: 4 additions & 7 deletions tests/mock_clients/mock_s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,14 +220,11 @@ def list_buckets(self):
return {"Buckets": [{"Name": DEFAULT_S3_BUCKET_NAME}]}

def head_object(self, Bucket, Key, **kwargs):
if (
not (self.root / Key).exists()
or (self.root / Key).is_dir()
or Bucket != DEFAULT_S3_BUCKET_NAME
):
if not (self.root / Key).exists() or (self.root / Key).is_dir():
raise ClientError({}, {})
else:
return {"key": Key}
if Bucket != DEFAULT_S3_BUCKET_NAME and ".mrap" not in Bucket:
raise ClientError({}, {})
return {"key": Key}

def generate_presigned_url(self, op: str, Params: dict, ExpiresIn: int):
mock_presigned_url = f"https://{Params['Bucket']}.s3.amazonaws.com/{Params['Key']}?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=TEST%2FTEST%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240131T194721Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=TEST"
Expand Down
128 changes: 128 additions & 0 deletions tests/test_s3_specific.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from concurrent.futures import ProcessPoolExecutor
from itertools import islice
import os
from time import sleep
import time

Expand Down Expand Up @@ -290,3 +291,130 @@ def test_as_url_presign(s3_rig):
assert "Signature" in query_params
else:
assert False, "Unknown presigned URL format"


_MRAP_ARN = "arn:aws:s3::123456789012:accesspoint/my-mrap.mrap"


def test_mrap_bucket_and_key():
"""MRAP paths return the full ARN as bucket and the path suffix as key."""
# MRAP path without key
p = S3Path(f"s3://{_MRAP_ARN}")
assert p.bucket == _MRAP_ARN
assert p.key == ""

# MRAP path with trailing slash
p2 = S3Path(f"s3://{_MRAP_ARN}/")
assert p2.bucket == _MRAP_ARN
assert p2.key == ""

# MRAP path with a single key segment
p3 = S3Path(f"s3://{_MRAP_ARN}/file.txt")
assert p3.bucket == _MRAP_ARN
assert p3.key == "file.txt"

# MRAP path with a nested key
p4 = S3Path(f"s3://{_MRAP_ARN}/folder/sub/file.txt")
assert p4.bucket == _MRAP_ARN
assert p4.key == "folder/sub/file.txt"

# Regular S3 path is unaffected
p5 = S3Path("s3://my-bucket/folder/file.txt")
assert p5.bucket == "my-bucket"
assert p5.key == "folder/file.txt"

# ARN-like strings that are NOT valid MRAPs fall back to normal bucket parsing
# (wrong account ID length, missing .mrap suffix)
p6 = S3Path("s3://arn:aws:s3::12345:accesspoint/x.mrap/key")
assert p6.bucket == "arn:aws:s3::12345:accesspoint" # treated as normal bucket

p7 = S3Path("s3://arn:aws:s3::123456789012:accesspoint/notmrap/key")
assert p7.bucket == "arn:aws:s3::123456789012:accesspoint" # treated as normal bucket


def test_mrap_path_manipulation():
"""MRAP paths support standard path manipulation operations."""
base = S3Path(f"s3://{_MRAP_ARN}")

# Joining via /
child = base / "folder" / "file.txt"
assert str(child) == f"s3://{_MRAP_ARN}/folder/file.txt"
assert child.bucket == _MRAP_ARN
assert child.key == "folder/file.txt"

# name, stem, suffix
assert child.name == "file.txt"
assert child.stem == "file"
assert child.suffix == ".txt"

# parent preserves the MRAP ARN as bucket
parent = child.parent
assert str(parent) == f"s3://{_MRAP_ARN}/folder"
assert parent.bucket == _MRAP_ARN
assert parent.key == "folder"

# with_name and with_suffix
assert str(child.with_name("other.csv")) == f"s3://{_MRAP_ARN}/folder/other.csv"
assert str(child.with_suffix(".csv")) == f"s3://{_MRAP_ARN}/folder/file.csv"

# str / repr round-trip
url = f"s3://{_MRAP_ARN}/folder/file.txt"
assert str(S3Path(url)) == url
assert repr(S3Path(url)) == f"S3Path('{url}')"


def test_mrap_file_operations(s3_rig):
"""MRAP paths work end-to-end with the mock S3 backend (or a real MRAP in live mode)."""
if s3_rig.live_server:
mrap_arn = os.getenv("LIVE_S3_MRAP_ARN")
if not mrap_arn:
pytest.skip(
"LIVE_S3_MRAP_ARN is not set; set it to the ARN of an MRAP "
"fronting LIVE_S3_BUCKET to run this test live."
)
else:
mrap_arn = _MRAP_ARN

client = s3_rig.client_class()
base = f"s3://{mrap_arn}/{s3_rig.test_dir}"

# seeded file from test assets
existing = client.CloudPath(f"{base}/dir_0/file0_0.txt")
assert existing.exists()
assert existing.is_file()
assert not existing.is_dir()
assert client.CloudPath(f"{base}/dir_0").is_dir()

# iterdir on the test_dir level: expects dir_0 and dir_1
top_level = list(client.CloudPath(base).iterdir())
assert len(top_level) == 2
assert all(p.is_dir() for p in top_level)
assert {p.name for p in top_level} == {"dir_0", "dir_1"}

# iterdir on dir_0: expects 3 files
dir0_contents = list(client.CloudPath(f"{base}/dir_0").iterdir())
assert len(dir0_contents) == 3
assert all(p.is_file() for p in dir0_contents)

# write / read / delete
new_file = client.CloudPath(f"{base}/mrap_write_test.txt")
assert not new_file.exists()
new_file.write_text("hello from mrap")
assert new_file.exists()
assert new_file.read_text() == "hello from mrap"
assert new_file.bucket == mrap_arn
new_file.unlink()
assert not new_file.exists()


def test_mrap_local_path_windows_encoding(monkeypatch, s3_rig):
"""On Windows, colons in MRAP ARNs must be percent-encoded in the local cache path."""
import cloudpathlib.s3.s3path as s3path_module

monkeypatch.setattr(s3path_module.sys, "platform", "win32")
client = s3_rig.client_class()
p = client.CloudPath(f"s3://{_MRAP_ARN}/some/key.txt")
# strip drive (e.g. "C:") since it legitimately contains a colon on Windows
local_no_drive = str(p._local)[len(p._local.drive) :]
assert ":" not in local_no_drive, f"Colon found in local path on simulated Windows: {p._local}"
assert "%3A" in local_no_drive
Loading