Skip to content

Commit e52c816

Browse files
authored
Merge pull request #117 from fish-pace/copilot/quiet-dask-progress-bars
Suppress dask progress bar output when opening HE5 files with chunks={}
2 parents bdf4ec4 + db1bf88 commit e52c816

3 files changed

Lines changed: 136 additions & 25 deletions

File tree

src/point_collocation/core/_open_method.py

Lines changed: 73 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636

3737
from __future__ import annotations
3838

39+
import contextlib
40+
import os
3941
import re
4042
from contextlib import contextmanager
4143
from typing import TYPE_CHECKING, Generator
@@ -74,6 +76,38 @@
7476
}
7577

7678

79+
# ---------------------------------------------------------------------------
80+
# Progress-suppression helper
81+
# ---------------------------------------------------------------------------
82+
83+
84+
@contextmanager
85+
def _suppress_dask_progress() -> Generator[None, None, None]:
86+
"""Suppress dask progress bar output during file-open operations.
87+
88+
When opening HE5/HDF5 files with ``chunks={}``, dask (or pqdm used
89+
internally by earthaccess) may emit verbose progress bar output
90+
(e.g. ``QUEUEING TASKS``, ``PROCESSING TASKS``, ``COLLECTING RESULTS``).
91+
This context manager suppresses that output without affecting the data.
92+
93+
In a Jupyter environment it uses :func:`IPython.utils.io.capture_output`;
94+
otherwise it redirects both ``stdout`` and ``stderr`` to ``/dev/null`` for
95+
the duration of the open call.
96+
"""
97+
try:
98+
from IPython.utils import io as _ipy_io # type: ignore[import]
99+
100+
with _ipy_io.capture_output():
101+
yield
102+
return
103+
except ImportError:
104+
pass
105+
106+
with open(os.devnull, "w") as _devnull:
107+
with contextlib.redirect_stdout(_devnull), contextlib.redirect_stderr(_devnull):
108+
yield
109+
110+
77111
# ---------------------------------------------------------------------------
78112
# Open kwargs helpers
79113
# ---------------------------------------------------------------------------
@@ -575,13 +609,15 @@ def _open_and_merge_dataset_groups(
575609
group_paths = list(merge)
576610
else:
577611
# No merge requested — open the root dataset directly.
578-
return xr.open_dataset(file_obj, **effective_kwargs) # type: ignore[arg-type]
612+
with _suppress_dask_progress():
613+
return xr.open_dataset(file_obj, **effective_kwargs) # type: ignore[arg-type]
579614

580615
opened: list[xr.Dataset] = []
581616
for path in group_paths:
582617
kwargs = {**effective_kwargs, "group": path}
583618
try:
584-
ds = xr.open_dataset(file_obj, **kwargs) # type: ignore[arg-type]
619+
with _suppress_dask_progress():
620+
ds = xr.open_dataset(file_obj, **kwargs) # type: ignore[arg-type]
585621
if ds.data_vars:
586622
opened.append(ds)
587623
else:
@@ -686,22 +722,23 @@ def _visit(name: str, obj: object) -> None:
686722

687723
def _open_datatree_fn(file_obj: object, kwargs: dict) -> object:
688724
"""Open *file_obj* as a DataTree using whichever API is available."""
689-
try:
690-
open_dt = xr.open_datatree # type: ignore[attr-defined]
691-
return open_dt(file_obj, **kwargs) # type: ignore[arg-type]
692-
except AttributeError:
693-
pass
725+
with _suppress_dask_progress():
726+
try:
727+
open_dt = xr.open_datatree # type: ignore[attr-defined]
728+
return open_dt(file_obj, **kwargs) # type: ignore[arg-type]
729+
except AttributeError:
730+
pass
694731

695-
try:
696-
import datatree # type: ignore[import-untyped]
732+
try:
733+
import datatree # type: ignore[import-untyped]
697734

698-
return datatree.open_datatree(file_obj, **kwargs) # type: ignore[arg-type]
699-
except ImportError as exc:
700-
raise ImportError(
701-
"open_method='datatree-merge' requires either xarray >= 2024.x (with "
702-
"built-in DataTree support) or the 'datatree' package. "
703-
"Install it with: pip install datatree"
704-
) from exc
735+
return datatree.open_datatree(file_obj, **kwargs) # type: ignore[arg-type]
736+
except ImportError as exc:
737+
raise ImportError(
738+
"open_method='datatree-merge' requires either xarray >= 2024.x (with "
739+
"built-in DataTree support) or the 'datatree' package. "
740+
"Install it with: pip install datatree"
741+
) from exc
705742

706743

707744
def _merge_datatree_with_spec(dt: object, spec: dict) -> xr.Dataset:
@@ -876,13 +913,18 @@ def _seek_back() -> None:
876913

877914
# --- Try the fast dataset path ---
878915
dataset_error: BaseException | None = None
916+
ds_probe: xr.Dataset | None = None
879917
try:
880-
with xr.open_dataset(file_obj, **effective_kwargs) as ds: # type: ignore[arg-type]
881-
_apply_coords(ds, spec)
918+
with _suppress_dask_progress():
919+
ds_probe = xr.open_dataset(file_obj, **effective_kwargs) # type: ignore[arg-type]
920+
_apply_coords(ds_probe, spec)
882921
_seek_back()
883922
return {**spec, "xarray_open": "dataset"}
884923
except Exception as exc:
885924
dataset_error = exc
925+
finally:
926+
if ds_probe is not None:
927+
ds_probe.close()
886928

887929
_seek_back()
888930

@@ -968,7 +1010,8 @@ def _open_as_flat_dataset(
9681010
for path in group_paths:
9691011
kwargs = {**effective_kwargs, "group": path}
9701012
try:
971-
ds_grp = xr.open_dataset(file_obj, **kwargs) # type: ignore[arg-type]
1013+
with _suppress_dask_progress():
1014+
ds_grp = xr.open_dataset(file_obj, **kwargs) # type: ignore[arg-type]
9721015
if ds_grp.data_vars:
9731016
opened.append(ds_grp)
9741017
else:
@@ -985,9 +1028,15 @@ def _open_as_flat_dataset(
9851028
except Exception:
9861029
pass
9871030
else:
988-
with xr.open_dataset(file_obj, **effective_kwargs) as ds: # type: ignore[arg-type]
989-
ds, lon_name, lat_name = _apply_coords(ds, spec)
990-
yield (ds, lon_name, lat_name)
1031+
ds_simple: xr.Dataset | None = None
1032+
try:
1033+
with _suppress_dask_progress():
1034+
ds_simple = xr.open_dataset(file_obj, **effective_kwargs) # type: ignore[arg-type]
1035+
ds_simple, lon_name, lat_name = _apply_coords(ds_simple, spec)
1036+
yield (ds_simple, lon_name, lat_name)
1037+
finally:
1038+
if ds_simple is not None:
1039+
ds_simple.close()
9911040

9921041
elif xarray_open == "datatree":
9931042
dt = _open_datatree_fn(file_obj, effective_kwargs)
@@ -1032,7 +1081,8 @@ def _open_as_flat_dataset_auto(
10321081

10331082
# --- Fast path: try xr.open_dataset ---
10341083
try:
1035-
ds_fast = xr.open_dataset(file_obj, **effective_kwargs) # type: ignore[arg-type]
1084+
with _suppress_dask_progress():
1085+
ds_fast = xr.open_dataset(file_obj, **effective_kwargs) # type: ignore[arg-type]
10361086
ds_fast, lon_name_fast, lat_name_fast = _apply_coords(ds_fast, spec)
10371087
except Exception as exc:
10381088
dataset_exc = exc

src/point_collocation/core/plan.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,7 @@ def open_dataset(
254254
_open_and_merge_dataset_groups,
255255
_open_datatree_fn,
256256
_resolve_auto_spec,
257+
_suppress_dask_progress,
257258
)
258259

259260
try:
@@ -320,7 +321,8 @@ def open_dataset(
320321
# Dataset-based group merge: open each group and merge.
321322
ds = _open_and_merge_dataset_groups(file_obj, spec, effective_kwargs)
322323
else:
323-
ds = xr.open_dataset(file_obj, **effective_kwargs) # type: ignore[arg-type]
324+
with _suppress_dask_progress():
325+
ds = xr.open_dataset(file_obj, **effective_kwargs) # type: ignore[arg-type]
324326
try:
325327
ds, _, _ = _apply_coords(ds, spec)
326328
except ValueError:
@@ -371,6 +373,7 @@ def open_mfdataset(
371373
_open_and_merge_dataset_groups,
372374
_open_as_flat_dataset,
373375
_open_datatree_fn,
376+
_suppress_dask_progress,
374377
)
375378

376379
try:
@@ -427,7 +430,8 @@ def open_mfdataset(
427430
if not merged_datasets:
428431
return xr.Dataset()
429432
return xr.concat(merged_datasets, dim="granule")
430-
return xr.open_mfdataset(file_objs, **effective_kwargs) # type: ignore[arg-type]
433+
with _suppress_dask_progress():
434+
return xr.open_mfdataset(file_objs, **effective_kwargs) # type: ignore[arg-type]
431435

432436
raise ValueError(
433437
f"open_method['xarray_open']={xarray_open!r} is not valid for open_mfdataset."

tests/test_plan.py

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6549,3 +6549,60 @@ def test_no_change_when_no_phony_dims(self) -> None:
65496549
result = _safe_align_phony_dims([ds1, ds2])
65506550
assert "y" in result[0].dims
65516551
assert "x" in result[0].dims
6552+
6553+
6554+
class TestSuppressDaskProgress:
6555+
"""Tests for _suppress_dask_progress()."""
6556+
6557+
def test_suppresses_stdout_output(self, capsys: pytest.CaptureFixture) -> None:
6558+
"""Output written to stdout inside the context is suppressed."""
6559+
from point_collocation.core._open_method import _suppress_dask_progress
6560+
6561+
with _suppress_dask_progress():
6562+
print("this should be suppressed")
6563+
6564+
captured = capsys.readouterr()
6565+
assert "this should be suppressed" not in captured.out
6566+
6567+
def test_suppresses_stderr_output(self, capsys: pytest.CaptureFixture) -> None:
6568+
"""Output written to stderr inside the context is suppressed."""
6569+
import sys
6570+
6571+
from point_collocation.core._open_method import _suppress_dask_progress
6572+
6573+
with _suppress_dask_progress():
6574+
print("stderr output", file=sys.stderr)
6575+
6576+
captured = capsys.readouterr()
6577+
assert "stderr output" not in captured.err
6578+
6579+
def test_does_not_suppress_after_context(self, capsys: pytest.CaptureFixture) -> None:
6580+
"""Stdout/stderr are restored after the context exits."""
6581+
from point_collocation.core._open_method import _suppress_dask_progress
6582+
6583+
with _suppress_dask_progress():
6584+
print("inside (suppressed)")
6585+
6586+
print("outside (not suppressed)")
6587+
captured = capsys.readouterr()
6588+
assert "outside (not suppressed)" in captured.out
6589+
assert "inside (suppressed)" not in captured.out
6590+
6591+
def test_propagates_exceptions(self) -> None:
6592+
"""Exceptions raised inside the context are propagated normally."""
6593+
from point_collocation.core._open_method import _suppress_dask_progress
6594+
6595+
with pytest.raises(RuntimeError, match="test error"):
6596+
with _suppress_dask_progress():
6597+
raise RuntimeError("test error")
6598+
6599+
def test_context_is_reentrant(self, capsys: pytest.CaptureFixture) -> None:
6600+
"""_suppress_dask_progress can be nested without error."""
6601+
from point_collocation.core._open_method import _suppress_dask_progress
6602+
6603+
with _suppress_dask_progress():
6604+
with _suppress_dask_progress():
6605+
print("nested suppression")
6606+
6607+
captured = capsys.readouterr()
6608+
assert "nested suppression" not in captured.out

0 commit comments

Comments
 (0)