drivendataorg · pjbull · Jun 29, 2026
diff --git a/HISTORY.md b/HISTORY.md
@@ -9,6 +9,7 @@
 - Changed `S3Client._get_metadata` to read object metadata with `HeadObject` instead of `GetObject`, so `stat`, `etag`, and `size` no longer open the object body. Also fixes a `KeyError` on `ContentLength` against S3-compatible gateways that drop `Content-Length` from `GetObject` responses. (Issue [#564](https://github.com/drivendataorg/cloudpathlib/issues/564), PR [#565](https://github.com/drivendataorg/cloudpathlib/pull/565))
 - Added a `lazy` keyword argument to `CloudPath.walk`. By default (`lazy=False`) the existing fast behavior is preserved: the whole subtree is fetched up front with a single recursive listing. Passing `lazy=True` lists each directory on demand so that, when `top_down=True`, callers can prune subdirectories by modifying `dirnames` in-place (à la `os.walk` / `Path.walk`) to skip fetching the contents of those subtrees entirely — dramatically reducing API calls for large, sparsely-traversed trees. (Issue [#518](https://github.com/drivendataorg/cloudpathlib/issues/518))
 - Fixed S3 copy and move operations to forward copy-specific extra args such as `CopySourceSSECustomerKey`, and added `addressing_style="virtual"` support for `S3Client` (Issues [#500](https://github.com/drivendataorg/cloudpathlib/issues/500), [#527](https://github.com/drivendataorg/cloudpathlib/issues/527))
+- Fixed `CloudPath.rglob` raising `TypeError: 'NoneType' object is not subscriptable` when a bucket contains both a blob and a "directory" prefix sharing the same name (a layout cloud object stores allow but local filesystems do not). The internal `_build_subtree` now merges file and directory entries at the same path instead of overwriting one with the other. (Issue [#431](https://github.com/drivendataorg/cloudpathlib/issues/431))
 
 ## v0.24.0 (2026-04-29)
 - Added support for S3 Multi-Region Access Point (MRAP) URLs in `S3Path` (Issue [#556](https://github.com/drivendataorg/cloudpathlib/issues/556), PR [#557](https://github.com/drivendataorg/cloudpathlib/pull/557))

diff --git a/cloudpathlib/cloudpath.py b/cloudpathlib/cloudpath.py
@@ -488,16 +488,30 @@ def _glob_checks(self, pattern: Union[str, os.PathLike]) -> str:
     def _build_subtree(self, recursive):
         # build a tree structure for all files out of default dicts
         Tree: Callable = lambda: defaultdict(Tree)
+        _MISSING = object()
 
         def _build_tree(trunk, branch, nodes, is_dir):
             """Utility to build a tree from nested defaultdicts with a generator
             of nodes (parts) of a path."""
             next_branch = next(nodes, None)
 
+            existing = trunk.get(branch) if branch in trunk else _MISSING
+
             if next_branch is None:
-                trunk[branch] = Tree() if is_dir else None  # leaf node
+                # Leaf node. Cloud object stores can have a blob and a
+                # "directory" with the same name; if we've already recorded a
+                # subtree here, keep it (it represents descendants we still
+                # need to walk). Otherwise set the marker for file/dir.
+                if existing is _MISSING or not isinstance(existing, defaultdict):
+                    trunk[branch] = Tree() if is_dir else None
 
             else:
+                # Recursing into a deeper level: if we previously recorded a
+                # file leaf (None) at this branch, promote it to a subtree so
+                # descendants are reachable. If a subtree already exists,
+                # reuse it rather than overwriting.
+                if existing is None:
+                    trunk[branch] = Tree()
                 _build_tree(trunk[branch], next_branch, nodes, is_dir)
 
         file_tree = Tree()

diff --git a/tests/test_cloudpath_file_io.py b/tests/test_cloudpath_file_io.py
@@ -472,6 +472,31 @@ def test_glob_many_open_files(rig):
             assert next(it) == p
 
 
+def test_rglob_file_and_dir_same_name(rig, monkeypatch):
+    """Regression test for #431: a blob and a "directory" can share the same
+    name in cloud storage. Recursive globbing must not error in that case."""
+    base = rig.create_cloud_path("")
+
+    # Simulate _list_dir returning both a blob and a "directory" with the
+    # same name "output". Cloud object stores allow this; local filesystems
+    # do not, which is why we patch _list_dir instead of writing files.
+    fake_entries = [
+        (base / "output", False),
+        (base / "output" / "13655" / "0" / "file1.json", False),
+        (base / "output" / "13655" / "0", True),
+        (base / "output" / "13655", True),
+    ]
+
+    def fake_list_dir(self, cloud_path, recursive=False):
+        return iter(fake_entries)
+
+    monkeypatch.setattr(rig.client_class, "_list_dir", fake_list_dir)
+
+    results = list(base.rglob("*"))
+    paths = {str(p) for p in results}
+    assert str(base / "output" / "13655" / "0" / "file1.json") in paths
+
+
 def test_glob_exceptions(rig):
     cp = rig.create_cloud_path("dir_0/")