Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 32 additions & 1 deletion cf_xarray/accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,37 @@ def _parse_grid_mapping_attribute(
return Frozen(result)


def _hashable_attrs(attrs: Mapping[Any, Any]) -> tuple:
"""Return a hashable, order-independent representation of an attrs mapping.

List- and array-valued attributes (e.g. ``standard_parallel``) are coerced
to tuples so the result can be used as an ``lru_cache`` key.
"""
frozen = []
for key, value in attrs.items():
if hasattr(value, "tolist"): # numpy scalars/arrays
value = value.tolist()
if isinstance(value, list | tuple):
value = tuple(value)
frozen.append((key, value))
frozen.sort(key=lambda kv: repr(kv[0]))
return tuple(frozen)


@functools.lru_cache(maxsize=256)
def _crs_from_cf_attrs(attrs_items: tuple) -> Any:
"""Build a ``pyproj.CRS`` from frozen CF grid-mapping attrs (memoized).

``pyproj.CRS.from_cf`` re-parses the datum/ellipsoid on every call, which is
expensive for grid mappings carrying explicit ellipsoid parameters (e.g.
geostationary). A dataset routinely references the same grid mapping from
many variables, so cache on the attribute items.
"""
import pyproj

return pyproj.CRS.from_cf(dict(attrs_items))


def _create_grid_mapping(
var_name: str,
ds: Dataset,
Expand Down Expand Up @@ -669,7 +700,7 @@ def _create_grid_mapping(
}
)
else:
crs = pyproj.CRS.from_cf(var.attrs)
crs = _crs_from_cf_attrs(_hashable_attrs(var.attrs))

# Get associated coordinate variables, fallback to dimension names
coordinates: list[Hashable] = grid_mapping_dict.get(var_name, [])
Expand Down
38 changes: 38 additions & 0 deletions cf_xarray/tests/test_accessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -1254,6 +1254,44 @@ def test_grid_mappings_property():
assert gm.coordinates == ("x", "y")


@requires_pyproj
def test_grid_mappings_crs_construction_is_cached(monkeypatch):
"""``pyproj.CRS.from_cf`` is memoized per grid-mapping attrs.

Building the CRS re-parses the datum/ellipsoid on every call. A dataset
references the same grid mapping from many variables and the property may
be accessed repeatedly, so each distinct grid mapping should be built once.
"""
import pyproj

from ..accessor import _crs_from_cf_attrs

_crs_from_cf_attrs.cache_clear()

from ..datasets import hrrrds

ds = hrrrds

calls = {"n": 0}
orig = pyproj.CRS.from_cf

def counting_from_cf(*args, **kwargs):
calls["n"] += 1
return orig(*args, **kwargs)

monkeypatch.setattr(pyproj.CRS, "from_cf", staticmethod(counting_from_cf))

# Repeated property accesses, including via a DataArray, must not rebuild
# the same CRS: hrrrds has 3 distinct grid mappings, each built once.
ds.cf.grid_mappings
ds.cf.grid_mappings
ds.foo.cf.grid_mappings

assert calls["n"] == 3

_crs_from_cf_attrs.cache_clear()


@requires_pyproj
def test_grid_mappings_coordinates_attribute():
"""Test that coordinates attribute is always populated correctly for DataArray grid mappings."""
Expand Down
Loading