Skip to content

Commit c602b68

Browse files
authored
Merge pull request #131 from fish-pace/copilot/add-nearest-time-implementation
Report time dimension in geolocation output and add multi-time integration tests
2 parents dd6fd8d + 5e3ad4a commit c602b68

3 files changed

Lines changed: 300 additions & 8 deletions

File tree

src/point_collocation/core/_open_method.py

Lines changed: 29 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -505,14 +505,31 @@ def _apply_coords(ds: xr.Dataset, spec: dict) -> tuple[xr.Dataset, str, str]:
505505
)
506506

507507

508-
def _geoloc_description(ds: "xr.Dataset", lon_name: str, lat_name: str, spec: dict) -> str:
508+
def _geoloc_description(
509+
ds: "xr.Dataset",
510+
lon_name: str,
511+
lat_name: str,
512+
spec: dict,
513+
time_dim: "str | None" = None,
514+
) -> str:
509515
"""Return a human-readable geolocation line for printing in open_dataset.
510516
511517
The description notes *how* the pair was found:
512518
513519
* ``"auto detected with cf_xarray"`` — CF-convention attributes used.
514520
* ``"auto detected by name"`` — name-based fallback used.
515521
* ``"specified"`` — caller provided an explicit ``coords`` dict.
522+
523+
When *time_dim* is not ``None``, the time dimension name and the number
524+
of time steps are appended to the description. When *time_dim* is
525+
``None`` (dataset has no time dimension) nothing is appended.
526+
527+
Parameters
528+
----------
529+
time_dim:
530+
Name of the time dimension detected by
531+
:func:`~point_collocation.core.engine._find_time_dim`, or ``None``
532+
if the dataset has no time dimension.
516533
"""
517534
coords = spec.get("coords", "auto")
518535

@@ -522,12 +539,18 @@ def _geoloc_description(ds: "xr.Dataset", lon_name: str, lat_name: str, spec: di
522539
pair_str = f"({lon_name!r}, {lat_name!r})"
523540

524541
if isinstance(coords, dict):
525-
return f"Geolocation specified: {pair_str}{dims_str}"
542+
base = f"Geolocation specified: {pair_str}{dims_str}"
543+
else:
544+
# "auto" or list — check whether cf_xarray drove the detection.
545+
cf_lons = _cf_geoloc_names(ds, "longitude")
546+
method = "auto detected with cf_xarray" if lon_name in cf_lons else "auto detected by name"
547+
base = f"Geolocation {method}: {pair_str}{dims_str}"
548+
549+
if time_dim is not None:
550+
n_times = ds.sizes[time_dim]
551+
base += f"; time dim={time_dim!r} ({n_times} step(s))"
526552

527-
# "auto" or list — check whether cf_xarray drove the detection.
528-
cf_lons = _cf_geoloc_names(ds, "longitude")
529-
method = "auto detected with cf_xarray" if lon_name in cf_lons else "auto detected by name"
530-
return f"Geolocation {method}: {pair_str}{dims_str}"
553+
return base
531554

532555

533556
# ---------------------------------------------------------------------------

src/point_collocation/core/plan.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -257,6 +257,7 @@ def open_dataset(
257257
_resolve_auto_spec,
258258
_suppress_dask_progress,
259259
)
260+
from point_collocation.core.engine import _find_time_dim
260261

261262
try:
262263
import earthaccess # type: ignore[import-untyped]
@@ -315,7 +316,8 @@ def open_dataset(
315316
try:
316317
ds, lon_n, lat_n = _apply_coords(ds, spec)
317318
if not silent:
318-
print(_geoloc_description(ds, lon_n, lat_n, spec))
319+
time_dim = _find_time_dim(ds)
320+
print(_geoloc_description(ds, lon_n, lat_n, spec, time_dim=time_dim))
319321
except ValueError:
320322
pass # coords not found; return merged dataset as-is
321323
return ds
@@ -331,7 +333,8 @@ def open_dataset(
331333
try:
332334
ds, lon_n, lat_n = _apply_coords(ds, spec)
333335
if not silent:
334-
print(_geoloc_description(ds, lon_n, lat_n, spec))
336+
time_dim = _find_time_dim(ds)
337+
print(_geoloc_description(ds, lon_n, lat_n, spec, time_dim=time_dim))
335338
except ValueError:
336339
pass # coords not found; return dataset as-is
337340
return ds

tests/test_plan.py

Lines changed: 266 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3802,6 +3802,272 @@ def test_open_dataset_silent_suppresses_geolocation(
38023802
assert "Geolocation" not in captured.out
38033803
assert "open_method" not in captured.out
38043804

3805+
def test_open_dataset_geolocation_includes_time_dim(
3806+
self,
3807+
tmp_path: pathlib.Path,
3808+
monkeypatch: pytest.MonkeyPatch,
3809+
capsys: pytest.CaptureFixture,
3810+
) -> None:
3811+
"""open_dataset() reports the time dim in the Geolocation line when present."""
3812+
nc_path = str(tmp_path / "test_time.nc")
3813+
_make_l3_time_dataset(
3814+
[-90.0, 0.0, 90.0],
3815+
[-180.0, 0.0, 180.0],
3816+
times=["2023-06-01", "2023-06-02", "2023-06-03"],
3817+
seed=1,
3818+
).to_netcdf(nc_path, engine="netcdf4")
3819+
mock_ea = MagicMock()
3820+
mock_ea.open.return_value = [nc_path]
3821+
monkeypatch.setitem(__import__("sys").modules, "earthaccess", mock_ea)
3822+
3823+
pts = pd.DataFrame(
3824+
{"lat": [0.0], "lon": [0.0], "time": pd.to_datetime(["2023-06-01"])}
3825+
)
3826+
p = Plan(
3827+
points=pts,
3828+
results=[object()],
3829+
granules=[],
3830+
point_granule_map={0: []},
3831+
source_kwargs={"short_name": "TEST"},
3832+
time_buffer=pd.Timedelta(0),
3833+
)
3834+
3835+
ds = p.open_dataset(0, open_method={"open_kwargs": {"engine": "netcdf4"}})
3836+
ds.close()
3837+
captured = capsys.readouterr()
3838+
assert "time dim=" in captured.out
3839+
assert "3 step(s)" in captured.out
3840+
3841+
def test_open_dataset_geolocation_omits_time_dim_when_absent(
3842+
self,
3843+
tmp_path: pathlib.Path,
3844+
monkeypatch: pytest.MonkeyPatch,
3845+
capsys: pytest.CaptureFixture,
3846+
) -> None:
3847+
"""open_dataset() does NOT report a time dim when the dataset has none."""
3848+
nc_path = str(tmp_path / "test_no_time.nc")
3849+
_make_l3_dataset([-90.0, 0.0, 90.0], [-180.0, 0.0, 180.0], seed=1).to_netcdf(
3850+
nc_path, engine="netcdf4"
3851+
)
3852+
mock_ea = MagicMock()
3853+
mock_ea.open.return_value = [nc_path]
3854+
monkeypatch.setitem(__import__("sys").modules, "earthaccess", mock_ea)
3855+
3856+
pts = pd.DataFrame(
3857+
{"lat": [0.0], "lon": [0.0], "time": pd.to_datetime(["2023-06-01"])}
3858+
)
3859+
p = Plan(
3860+
points=pts,
3861+
results=[object()],
3862+
granules=[],
3863+
point_granule_map={0: []},
3864+
source_kwargs={"short_name": "TEST"},
3865+
time_buffer=pd.Timedelta(0),
3866+
)
3867+
3868+
ds = p.open_dataset(0, open_method={"open_kwargs": {"engine": "netcdf4"}})
3869+
ds.close()
3870+
captured = capsys.readouterr()
3871+
assert "Geolocation" in captured.out
3872+
assert "time dim=" not in captured.out
3873+
3874+
3875+
# ---------------------------------------------------------------------------
3876+
# Tests for _geoloc_description time_dim parameter
3877+
# ---------------------------------------------------------------------------
3878+
3879+
3880+
class TestGeolocationDescription:
3881+
"""Tests for _geoloc_description() with time_dim reporting."""
3882+
3883+
def test_no_time_dim_omits_time_info(self) -> None:
3884+
from point_collocation.core._open_method import _geoloc_description
3885+
3886+
ds = xr.Dataset(
3887+
{"sst": (["lat", "lon"], [[1.0]])},
3888+
coords={"lat": [0.0], "lon": [0.0]},
3889+
)
3890+
desc = _geoloc_description(ds, "lon", "lat", {"coords": "auto"}, time_dim=None)
3891+
assert "time" not in desc
3892+
3893+
def test_time_dim_included_in_description(self) -> None:
3894+
from point_collocation.core._open_method import _geoloc_description
3895+
3896+
times = pd.to_datetime(["2023-06-01", "2023-06-02", "2023-06-03"])
3897+
ds = xr.Dataset(
3898+
{"sst": (["time", "lat", "lon"], [[[1.0]], [[2.0]], [[3.0]]])},
3899+
coords={"time": times, "lat": [0.0], "lon": [0.0]},
3900+
)
3901+
desc = _geoloc_description(ds, "lon", "lat", {"coords": "auto"}, time_dim="time")
3902+
assert "time dim='time'" in desc
3903+
assert "3 step(s)" in desc
3904+
3905+
def test_single_time_step_reported_correctly(self) -> None:
3906+
from point_collocation.core._open_method import _geoloc_description
3907+
3908+
times = pd.to_datetime(["2023-06-01"])
3909+
ds = xr.Dataset(
3910+
{"sst": (["time", "lat", "lon"], [[[1.0]]])},
3911+
coords={"time": times, "lat": [0.0], "lon": [0.0]},
3912+
)
3913+
desc = _geoloc_description(ds, "lon", "lat", {"coords": "auto"}, time_dim="time")
3914+
assert "time dim='time'" in desc
3915+
assert "1 step(s)" in desc
3916+
3917+
def test_time_dim_appended_to_specified_coords(self) -> None:
3918+
from point_collocation.core._open_method import _geoloc_description
3919+
3920+
times = pd.to_datetime(["2023-06-01", "2023-06-02"])
3921+
ds = xr.Dataset(
3922+
{"sst": (["time", "lat", "lon"], [[[1.0]], [[2.0]]])},
3923+
coords={"time": times, "lat": [0.0], "lon": [0.0]},
3924+
)
3925+
spec = {"coords": {"lat": "lat", "lon": "lon"}}
3926+
desc = _geoloc_description(ds, "lon", "lat", spec, time_dim="time")
3927+
assert "Geolocation specified" in desc
3928+
assert "time dim='time'" in desc
3929+
assert "2 step(s)" in desc
3930+
3931+
3932+
# ---------------------------------------------------------------------------
3933+
# Integration test with multi_time.nc fixture
3934+
# ---------------------------------------------------------------------------
3935+
3936+
3937+
class TestMultiTimeFixtureIntegration:
3938+
"""End-to-end tests using the examples/fixtures/multi_time.nc fixture."""
3939+
3940+
FIXTURE_PATH = str(
3941+
pathlib.Path(__file__).parent.parent / "examples" / "fixtures" / "multi_time.nc"
3942+
)
3943+
3944+
def _make_plan(
3945+
self,
3946+
monkeypatch: pytest.MonkeyPatch,
3947+
lat: float,
3948+
lon: float,
3949+
point_time: str,
3950+
) -> "Plan":
3951+
"""Build a plan that points to the multi_time.nc fixture."""
3952+
mock_ea = MagicMock()
3953+
mock_ea.open.return_value = [self.FIXTURE_PATH]
3954+
monkeypatch.setitem(__import__("sys").modules, "earthaccess", mock_ea)
3955+
3956+
pts = pd.DataFrame(
3957+
{"lat": [lat], "lon": [lon], "time": pd.to_datetime([point_time])}
3958+
)
3959+
gm = GranuleMeta(
3960+
granule_id="multi_time.nc",
3961+
begin=pd.Timestamp("2024-01-01"),
3962+
end=pd.Timestamp("2024-12-31"),
3963+
bbox=(-180.0, -90.0, 180.0, 90.0),
3964+
result_index=0,
3965+
)
3966+
return Plan(
3967+
points=pts,
3968+
results=[object()],
3969+
granules=[gm],
3970+
point_granule_map={0: [0]},
3971+
variables=["uo"],
3972+
source_kwargs={"short_name": "TEST"},
3973+
time_buffer=pd.Timedelta(0),
3974+
)
3975+
3976+
def test_nearest_time_selects_expected_day(
3977+
self, monkeypatch: pytest.MonkeyPatch
3978+
) -> None:
3979+
"""Nearest time selection retrieves a real value (not NaN) from multi_time.nc."""
3980+
p = self._make_plan(
3981+
monkeypatch,
3982+
lat=33.69,
3983+
lon=17.46,
3984+
point_time="2024-06-15",
3985+
)
3986+
result = pc.matchup(
3987+
p,
3988+
open_method="dataset",
3989+
open_dataset_kwargs={"engine": "netcdf4"},
3990+
)
3991+
assert len(result) == 1
3992+
assert "uo" in result.columns
3993+
assert not math.isnan(result.loc[0, "uo"]), (
3994+
"uo must be a real value — nearest time should be selected from multi_time.nc"
3995+
)
3996+
3997+
def test_different_point_times_select_different_values(
3998+
self, monkeypatch: pytest.MonkeyPatch
3999+
) -> None:
4000+
"""Two points at the same location but different times should generally yield
4001+
different extracted values from the 366-day multi_time.nc fixture."""
4002+
# Build two separate plans with different point times.
4003+
mock_ea = MagicMock()
4004+
mock_ea.open.side_effect = [
4005+
[self.FIXTURE_PATH],
4006+
[self.FIXTURE_PATH],
4007+
]
4008+
monkeypatch.setitem(__import__("sys").modules, "earthaccess", mock_ea)
4009+
4010+
gm = GranuleMeta(
4011+
granule_id="multi_time.nc",
4012+
begin=pd.Timestamp("2024-01-01"),
4013+
end=pd.Timestamp("2024-12-31"),
4014+
bbox=(-180.0, -90.0, 180.0, 90.0),
4015+
result_index=0,
4016+
)
4017+
4018+
def _build(point_time: str) -> "Plan":
4019+
pts = pd.DataFrame(
4020+
{"lat": [33.69], "lon": [17.46], "time": pd.to_datetime([point_time])}
4021+
)
4022+
return Plan(
4023+
points=pts,
4024+
results=[object()],
4025+
granules=[gm],
4026+
point_granule_map={0: [0]},
4027+
variables=["uo"],
4028+
source_kwargs={"short_name": "TEST"},
4029+
time_buffer=pd.Timedelta(0),
4030+
)
4031+
4032+
kwargs = {"open_method": "dataset", "open_dataset_kwargs": {"engine": "netcdf4"}}
4033+
r_jan = pc.matchup(_build("2024-01-15"), **kwargs)
4034+
r_jul = pc.matchup(_build("2024-07-15"), **kwargs)
4035+
4036+
assert not math.isnan(r_jan.loc[0, "uo"])
4037+
assert not math.isnan(r_jul.loc[0, "uo"])
4038+
# January and July sea-current values should differ (different time steps).
4039+
assert r_jan.loc[0, "uo"] != pytest.approx(r_jul.loc[0, "uo"]), (
4040+
"Expected different uo values for Jan-15 vs Jul-15 in multi_time.nc"
4041+
)
4042+
4043+
def test_open_dataset_reports_time_dim_for_multi_time_fixture(
4044+
self,
4045+
monkeypatch: pytest.MonkeyPatch,
4046+
capsys: pytest.CaptureFixture,
4047+
) -> None:
4048+
"""open_dataset() prints the time dim info when opening multi_time.nc."""
4049+
mock_ea = MagicMock()
4050+
mock_ea.open.return_value = [self.FIXTURE_PATH]
4051+
monkeypatch.setitem(__import__("sys").modules, "earthaccess", mock_ea)
4052+
4053+
pts = pd.DataFrame(
4054+
{"lat": [33.69], "lon": [17.46], "time": pd.to_datetime(["2024-06-15"])}
4055+
)
4056+
p = Plan(
4057+
points=pts,
4058+
results=[object()],
4059+
granules=[],
4060+
point_granule_map={0: []},
4061+
source_kwargs={"short_name": "TEST"},
4062+
time_buffer=pd.Timedelta(0),
4063+
)
4064+
4065+
ds = p.open_dataset(0, open_method={"open_kwargs": {"engine": "netcdf4"}})
4066+
ds.close()
4067+
captured = capsys.readouterr()
4068+
assert "time dim='time'" in captured.out
4069+
assert "366 step(s)" in captured.out
4070+
38054071

38064072
# ---------------------------------------------------------------------------
38074073
# Helper: create a grouped NetCDF4 file (HDF5 with subgroups)

0 commit comments

Comments
 (0)