From aabb69b804712802b20fcf02b70a6c2c1152f4e9 Mon Sep 17 00:00:00 2001 From: griesche <48522799+griesche@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:38:44 +0200 Subject: [PATCH 1/7] Add initial implementation for EDR product --- cloudnetpy/output.py | 3 + cloudnetpy/products/__init__.py | 1 + cloudnetpy/products/edr.py | 291 +++++++++++++++++++++++++++ cloudnetpy/products/product_tools.py | 54 +++++ 4 files changed, 349 insertions(+) create mode 100644 cloudnetpy/products/edr.py diff --git a/cloudnetpy/output.py b/cloudnetpy/output.py index e203d877..5a92236f 100644 --- a/cloudnetpy/output.py +++ b/cloudnetpy/output.py @@ -436,6 +436,7 @@ def _get_identifier(short_id: str) -> str: "der", "ier", "classification-voodoo", + "edr", ) if short_id not in valid_ids: msg = f"Invalid file identifier: {short_id}" @@ -448,6 +449,8 @@ def _get_identifier(short_id: str) -> str: return "ice effective radius" if short_id == "der": return "droplet effective radius" + if short_id == "edr": + return "eddy dissipation rate" return short_id diff --git a/cloudnetpy/products/__init__.py b/cloudnetpy/products/__init__.py index 3d742d04..77dd4ff4 100644 --- a/cloudnetpy/products/__init__.py +++ b/cloudnetpy/products/__init__.py @@ -1,6 +1,7 @@ from .classification import generate_classification from .der import generate_der from .drizzle import generate_drizzle +from .edr import generate_edr from .ier import generate_ier from .iwc import generate_iwc from .lwc import generate_lwc diff --git a/cloudnetpy/products/edr.py b/cloudnetpy/products/edr.py new file mode 100644 index 00000000..5d0c5f86 --- /dev/null +++ b/cloudnetpy/products/edr.py @@ -0,0 +1,291 @@ +"""Module for creating Cloudnet droplet effective radius +using the Frisch et al. 2002 method. +""" + +import logging + +import numpy as np +from numpy import ma +from scipy.interpolate import CubicSpline + +from cloudnetpy import output # , utils +from cloudnetpy.datasource import DataSource +from cloudnetpy.metadata import MetaData +from cloudnetpy.products import product_tools + + +def generate_edr( + radar_file: str, + categorize_file: str, + output_file: str, + uuid: str | None = None, +) -> str: + """Generates Cloudnet eddy dissipation rates + product according to Griesche 2020 / Borque 2016. + + This function calculates the eddy dissipation rate. + The results are written in a netCDF file. + + Args: + radar_file: Radar file name. + categorize_file: Categorize file name. + output_file: Output file name. + uuid: Set specific UUID for the file. + + Returns: + UUID of the generated file. + + Examples: + >>> from cloudnetpy.products import generate_edr + >>> generate_edr('radar_file.nc','categorize.nc', 'edr.nc') + + References: + Griesche, H. J., Seifert, P., Ansmann, A., Baars, H., Barrientos Velasco, + C., Bühl, J., Engelmann, R., Radenz, M., Zhenping, Y., and Macke, A. (2020): + Application of the shipborne remote sensing supersite OCEANET for + profiling of Arctic aerosols and clouds during Polarstern cruise PS106, + Atmos. Meas. Tech., 13, 5335–5358, + from https://doi.org/10.5194/amt-13-5335-2020. + + Borque, P., Luke, E., and Kollias, P. (2016): + On the unified estimation of turbulence eddy dissipation rate using Doppler + cloud radars and lidars, J. Geophys. Res.Atmos., 120, 5972–5989, + from https://doi.org/10.1002/2015JD024543. + + """ + wind_source = WindSource(categorize_file) + edr_source = EdrSource(radar_file) + edr_source.append_edr(wind_source) + edr_source.update_time_dependent_variables(wind_source) + date = edr_source.get_date() + attributes = output.add_time_attribute(EDR_ATTRIBUTES, date) + output.update_attributes(edr_source.data, attributes) + uuid = output.save_product_file("edr", edr_source, output_file, uuid) + edr_source.close() + return uuid + + +class WindSource(DataSource): + """Data container for wind for eddy dissipation rate calculations.""" + + # + def __init__(self, categorize_file: str): + super().__init__(categorize_file) + + # + def get_wind(self) -> np.ndarray: + uwind = self.getvar("uwind") + vwind = self.getvar("vwind") + model_time = self.getvar("model_time") + model_height = self.getvar("model_height") + return product_tools.get_interpolated_horizontal_wind( + uwind, vwind, model_time, model_height + ) + + # + def get_cat_time(self) -> np.ndarray: + return self.getvar("time") + + +class EdrSource(DataSource): + """Data container for eddy dissipation rate calculations.""" + + # + def __init__(self, radar_file: str): + super().__init__(radar_file) + + def update_time_dependent_variables(self, wind_source, copy_from_cat: tuple = ()): + """Update the temporal high resolved variables that will be stored in + the product file to the standard Cloudnet product time resolution of + 30s. + """ + self.dataset.variables["time"] = wind_source.dataset.variables["time"] + self.time = wind_source.getvar("time") + vars_to_be_copied_from_source = ( + "altitude", + "latitude", + "longitude", + "time", + "height", + *copy_from_cat, + ) + for key in vars_to_be_copied_from_source: + if self.dataset.variables[key].size > 0: + self.dataset.variables[key] = wind_source.dataset.variables[key] + + def append_edr(self, wind_source): + """Estimate Eddy Dissipation Rate (EDR) according to Griesche et al. 2020.""" + + def interpolate_missing_values( + data: np.ndarray, time_window: np.ndarray + ) -> np.ndarray | None: + """Interpolate missing Doppler velocity values using a cubic spline. + + Returns: + - interpolated_data: np.ndarray + Data with missing values interpolated, or None if insufficient data. + """ + valid_indices = ma.where(data)[0] + if ( + valid_indices.shape[0] < 0.9 * data.shape[0] + ): # Minimum threshold for valid data + return None + if ma.is_masked(data[0]) or ma.is_masked( + data[-1] + ): # Avoid edge interpolation issues + return None + + spline = CubicSpline(time_window[valid_indices], data[valid_indices]) + return spline(time_window) + + def compute_epsilon_from_spectrum( + freq_sp: np.ndarray, + power_sp: np.ndarray, + freq_list: list[list[float]], + constant: float, + ) -> list[float]: + """Compute epsilon values from power spectrum.""" + epsilon_values = [] + + for freq_range in freq_list: + mask = (freq_sp > freq_range[0]) & (freq_sp < freq_range[1]) + if freq_sp[mask].shape[0] > 1 and power_sp[mask].shape[0] > 1: + fit = product_tools.spec_fit(freq_sp, power_sp, freq_range) + slope = fit[0] + intercept = fit[1] + if np.isnan(slope) or not ( + THRESHOLD_SLOPE_MIN < slope < THRESHOLD_SLOPE_MAX + ): + continue + epsilon_values.append((10**intercept / constant) ** (3.0 / 2.0)) + return epsilon_values + + # Predefinded Constants + KOLMOGOROV_CONSTANT = 0.5 + THRESHOLD_SLOPE_MIN = -2 # based on Borque, 2016, JGR (-5/3 +-20%) + THRESHOLD_SLOPE_MAX = -1.33 # based on Borque, 2016, JGR (-5/3 +-20%) + AVERAGING_TIME_MIN = 5 # Averaging time in minutes + H_to_S = 3600 # hours to seconds + H_to_M = 60 # hours to minutes + + # Input data + freq_list = standard_freq_list # predefined list of frequency ranges to analyze + radar_time = self.getvar("time") + radar_time_resolution_sec = np.round( + np.nanmin(np.diff(radar_time)) * H_to_S + ) # in seconds + height = self.getvar("height") + vertical_velocity = self.getvar("v") + + horizontal_wind_function = wind_source.get_wind() + horizontal_wind = horizontal_wind_function(radar_time, height) + categorize_time = wind_source.get_cat_time() + product_resolution = np.round( + np.nanmin(np.diff(categorize_time)) * H_to_S + ) # in seconds + + num_height_levels = height.shape[0] + epsilon = ( + np.ones((int(np.ceil(categorize_time.shape[0])), num_height_levels)) + * -999.0 + ) + logging.debug(product_resolution, radar_time_resolution_sec) + + # Process each time interval + for time_idx, t in enumerate(categorize_time): + logging.debug(time_idx, t) + if t + AVERAGING_TIME_MIN / H_to_M > radar_time[-1]: + break + # + start_idx = np.where(radar_time > t)[0][0] + end_idx = np.where(radar_time > t + AVERAGING_TIME_MIN / H_to_M)[0][0] + + # Process each height level + for height_idx, _h in enumerate(height[:num_height_levels]): + data_velocity = vertical_velocity[start_idx:end_idx, height_idx] + time_window = radar_time[start_idx:end_idx] + + if ma.is_masked(data_velocity): + data_velocity = interpolate_missing_values( + data_velocity, time_window + ) # type: ignore[assignment] + if data_velocity is None: + continue + + horizontal_wind_speed = horizontal_wind[ + int((start_idx + end_idx) / 2), height_idx + ] + if np.isnan(horizontal_wind_speed): + continue + + # Compute frequency spectrum + freq_sp, power_sp = product_tools.periodogram( + data_velocity, radar_time_resolution_sec, horizontal_wind_speed + ) + if np.isnan(freq_sp).all(): + continue + # Compute epsilon for frequency ranges + epsilon_values = compute_epsilon_from_spectrum( + freq_sp, power_sp, freq_list, KOLMOGOROV_CONSTANT + ) + + # Store average epsilon + if epsilon_values: + epsilon[time_idx, height_idx] = np.mean(epsilon_values) + + self.append_data(ma.masked_where(epsilon == -999.0, epsilon), "edr") + + +COMMENTS = { + "general": ( + "This dataset contains the eddy dissipation rate calculated\n" + "according to the approach presented by Griesche et al. (2020). It is based\n" + "on a relationship between the slope of the inertial subrange of the\n" + "turbulent energy spectrum. If spectrum follows a -5/3 slope in a log-log\n" + "the eddy dissipation rate can be calculated. The turbulent energy spectrum\n" + "was derived as the power spectrum of cloud radar Doppler velocity averaged\n" + "over 5 minutes.\n" + ), + "edr": ( + "This variable was calculated for the profiles where 5 minutes of\n" + "continuous cloud radar Doppler velocity was available. In case of\n" + "than lass than 10% missing data, the missing data points were interpolated\n" + "using cubic spine interpolation.\n" + ), +} + + +EDR_ATTRIBUTES = { + "comment": COMMENTS["general"], + "edr": MetaData( + long_name="Eddy dissipation rate", + units="m2 s-3", + ancillary_variables="edr_error", + comment=COMMENTS["edr"], + ), + "edr_error": MetaData( + long_name="Absolute error in eddy dissipation rate", + units="m2 s-3", + ), +} + + +standard_freq_list = [ + [5e-3, 5e-2], + [5e-3, 1e-1], + [5e-3, 35e-2], + [5e-3, 8e-1], + [1e-2, 1e-1], + [1e-2, 35e-2], + [1e-2, 8e-1], + [2e-2, 2e-1], + [2e-2, 8e-1], + [35e-3, 2e-1], + [35e-3, 8e-1], + [5e-2, 2e-1], + [5e-2, 8e-1], + [8e-2, 35e-2], + [8e-2, 8e-1], + [1e-1, 5e-1], + [1e-1, 8e-1], +] # adapted from Griesche et al. 2020 diff --git a/cloudnetpy/products/product_tools.py b/cloudnetpy/products/product_tools.py index c6a6acbf..0c2dc83d 100644 --- a/cloudnetpy/products/product_tools.py +++ b/cloudnetpy/products/product_tools.py @@ -10,6 +10,8 @@ import numpy.typing as npt from numpy import ma from numpy.typing import NDArray +from scipy.interpolate import RectBivariateSpline +from scipy.stats import linregress from cloudnetpy import constants, utils from cloudnetpy.datasource import DataSource @@ -319,3 +321,55 @@ def _get_temperature(categorize_file: str | PathLike) -> npt.NDArray: """Returns interpolated temperatures in Celsius.""" atmosphere = interpolate_model(categorize_file, "temperature") return atmoslib.k2c(atmosphere["temperature"]) + + +def get_interpolated_horizontal_wind(uwind, vwind, wind_time, wind_height): + horizontal_wind_speed = np.sqrt((uwind) ** 2 + (vwind) ** 2) + return RectBivariateSpline( + wind_time, wind_height, horizontal_wind_speed, kx=1, ky=1 + ) + + +def periodogram(vel_data, delta_t, adv_vel=10): + """Compute frequency and power spectra. + + Based on a time series of mean Doppler velocities. + """ + # Convert input to numpy array + vel_data = np.asarray(vel_data, dtype=float) + if vel_data.size == 0: + return np.nan, np.nan + # Use wavenumber + delta_x = delta_t * adv_vel + # Compute the Fourier transform and scale power spectrum + fft_result = np.fft.rfft(vel_data) + power_sp = (delta_x**2 / (vel_data.size * delta_x)) * np.abs(fft_result) ** 2 + power_sp *= 2 / (2 * np.pi) # Scale by 2 for positive frequencies, normalize by 2π + + # Compute angular frequencies + freq_sp = np.fft.rfftfreq(vel_data.size, d=delta_x) * 2 * np.pi + + if any(power_sp == 0): + power_sp = power_sp[np.where(np.abs(fft_result) != 0)] + freq_sp = freq_sp[np.where(np.abs(fft_result) != 0)] + + return freq_sp, power_sp + + +def spec_fit(freq, power, freq_range): + """Fit a linear function (power~frequency). + + Within a given frequency range (in log space). + """ + mask = (freq > freq_range[0]) & (freq < freq_range[1]) + freq_lin = np.log10(freq[mask]) + power_lin = np.log10(power[mask]) + valid_mask = ~np.isnan(freq_lin) & ~np.isnan(power_lin) + freq_valid = freq_lin[valid_mask] + power_valid = power_lin[valid_mask] + # slope, intercept, r_value, p_value, std_err + if len(freq_valid) > 1: + # Perform linear regression + return linregress(freq_valid, power_valid) + return (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan) + From e0abac22e7b446dd994c98e3e674c3effd19c2f6 Mon Sep 17 00:00:00 2001 From: Tuomas Siipola Date: Tue, 7 Jan 2025 08:51:49 +0200 Subject: [PATCH 2/7] Fix edr plotting --- cloudnetpy/plotting/plot_meta.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cloudnetpy/plotting/plot_meta.py b/cloudnetpy/plotting/plot_meta.py index 56234d35..ae7bea80 100644 --- a/cloudnetpy/plotting/plot_meta.py +++ b/cloudnetpy/plotting/plot_meta.py @@ -667,5 +667,6 @@ class PlotMeta(NamedTuple): plot_range=(1e-7, 1e-1), log_scale=True, ), + "edr": PlotMeta(log_scale=True), }, } From ef8cd3b1ede5a571660e371102b212b0a82e516a Mon Sep 17 00:00:00 2001 From: Tuomas Siipola Date: Wed, 3 Sep 2025 15:48:09 +0300 Subject: [PATCH 3/7] Fix type hints --- cloudnetpy/products/edr.py | 93 +++++++++++++++------------- cloudnetpy/products/product_tools.py | 14 ++++- 2 files changed, 60 insertions(+), 47 deletions(-) diff --git a/cloudnetpy/products/edr.py b/cloudnetpy/products/edr.py index 5d0c5f86..728137ee 100644 --- a/cloudnetpy/products/edr.py +++ b/cloudnetpy/products/edr.py @@ -3,23 +3,28 @@ """ import logging +from collections.abc import Callable +from os import PathLike +from uuid import UUID import numpy as np +import numpy.typing as npt from numpy import ma from scipy.interpolate import CubicSpline -from cloudnetpy import output # , utils +from cloudnetpy import output from cloudnetpy.datasource import DataSource from cloudnetpy.metadata import MetaData from cloudnetpy.products import product_tools +from cloudnetpy.utils import get_uuid def generate_edr( - radar_file: str, - categorize_file: str, - output_file: str, - uuid: str | None = None, -) -> str: + radar_file: str | PathLike, + categorize_file: str | PathLike, + output_file: str | PathLike, + uuid: str | UUID | None = None, +) -> UUID: """Generates Cloudnet eddy dissipation rates product according to Griesche 2020 / Borque 2016. @@ -53,27 +58,25 @@ def generate_edr( from https://doi.org/10.1002/2015JD024543. """ + uuid = get_uuid(uuid) wind_source = WindSource(categorize_file) - edr_source = EdrSource(radar_file) - edr_source.append_edr(wind_source) - edr_source.update_time_dependent_variables(wind_source) - date = edr_source.get_date() - attributes = output.add_time_attribute(EDR_ATTRIBUTES, date) - output.update_attributes(edr_source.data, attributes) - uuid = output.save_product_file("edr", edr_source, output_file, uuid) - edr_source.close() + with EdrSource(radar_file) as edr_source: + edr_source.append_edr(wind_source) + edr_source.update_time_dependent_variables(wind_source) + date = edr_source.get_date() + attributes = output.add_time_attribute(EDR_ATTRIBUTES, date) + output.update_attributes(edr_source.data, attributes) + output.save_product_file("edr", edr_source, output_file, uuid) return uuid class WindSource(DataSource): """Data container for wind for eddy dissipation rate calculations.""" - # - def __init__(self, categorize_file: str): + def __init__(self, categorize_file: str | PathLike) -> None: super().__init__(categorize_file) - # - def get_wind(self) -> np.ndarray: + def get_wind(self) -> Callable[[npt.NDArray, npt.NDArray], npt.NDArray]: uwind = self.getvar("uwind") vwind = self.getvar("vwind") model_time = self.getvar("model_time") @@ -82,19 +85,19 @@ def get_wind(self) -> np.ndarray: uwind, vwind, model_time, model_height ) - # - def get_cat_time(self) -> np.ndarray: + def get_cat_time(self) -> npt.NDArray: return self.getvar("time") class EdrSource(DataSource): """Data container for eddy dissipation rate calculations.""" - # - def __init__(self, radar_file: str): + def __init__(self, radar_file: str | PathLike) -> None: super().__init__(radar_file) - def update_time_dependent_variables(self, wind_source, copy_from_cat: tuple = ()): + def update_time_dependent_variables( + self, wind_source: WindSource, copy_from_cat: tuple[str, ...] = () + ) -> None: """Update the temporal high resolved variables that will be stored in the product file to the standard Cloudnet product time resolution of 30s. @@ -113,7 +116,7 @@ def update_time_dependent_variables(self, wind_source, copy_from_cat: tuple = () if self.dataset.variables[key].size > 0: self.dataset.variables[key] = wind_source.dataset.variables[key] - def append_edr(self, wind_source): + def append_edr(self, wind_source: WindSource) -> None: """Estimate Eddy Dissipation Rate (EDR) according to Griesche et al. 2020.""" def interpolate_missing_values( @@ -139,9 +142,9 @@ def interpolate_missing_values( return spline(time_window) def compute_epsilon_from_spectrum( - freq_sp: np.ndarray, - power_sp: np.ndarray, - freq_list: list[list[float]], + freq_sp: npt.NDArray, + power_sp: npt.NDArray, + freq_list: list[tuple[float, float]], constant: float, ) -> list[float]: """Compute epsilon values from power spectrum.""" @@ -262,30 +265,32 @@ def compute_epsilon_from_spectrum( units="m2 s-3", ancillary_variables="edr_error", comment=COMMENTS["edr"], + dimensions=("time", "height"), ), "edr_error": MetaData( long_name="Absolute error in eddy dissipation rate", units="m2 s-3", + dimensions=("time", "height"), ), } standard_freq_list = [ - [5e-3, 5e-2], - [5e-3, 1e-1], - [5e-3, 35e-2], - [5e-3, 8e-1], - [1e-2, 1e-1], - [1e-2, 35e-2], - [1e-2, 8e-1], - [2e-2, 2e-1], - [2e-2, 8e-1], - [35e-3, 2e-1], - [35e-3, 8e-1], - [5e-2, 2e-1], - [5e-2, 8e-1], - [8e-2, 35e-2], - [8e-2, 8e-1], - [1e-1, 5e-1], - [1e-1, 8e-1], + (5e-3, 5e-2), + (5e-3, 1e-1), + (5e-3, 35e-2), + (5e-3, 8e-1), + (1e-2, 1e-1), + (1e-2, 35e-2), + (1e-2, 8e-1), + (2e-2, 2e-1), + (2e-2, 8e-1), + (35e-3, 2e-1), + (35e-3, 8e-1), + (5e-2, 2e-1), + (5e-2, 8e-1), + (8e-2, 35e-2), + (8e-2, 8e-1), + (1e-1, 5e-1), + (1e-1, 8e-1), ] # adapted from Griesche et al. 2020 diff --git a/cloudnetpy/products/product_tools.py b/cloudnetpy/products/product_tools.py index 0c2dc83d..69e19b29 100644 --- a/cloudnetpy/products/product_tools.py +++ b/cloudnetpy/products/product_tools.py @@ -1,5 +1,6 @@ """General helper classes and functions for all products.""" +from collections.abc import Callable from dataclasses import dataclass from os import PathLike from typing import NamedTuple @@ -323,14 +324,19 @@ def _get_temperature(categorize_file: str | PathLike) -> npt.NDArray: return atmoslib.k2c(atmosphere["temperature"]) -def get_interpolated_horizontal_wind(uwind, vwind, wind_time, wind_height): +def get_interpolated_horizontal_wind( + uwind: npt.NDArray, + vwind: npt.NDArray, + wind_time: npt.NDArray, + wind_height: npt.NDArray, +) -> Callable[[npt.NDArray, npt.NDArray], npt.NDArray]: horizontal_wind_speed = np.sqrt((uwind) ** 2 + (vwind) ** 2) return RectBivariateSpline( wind_time, wind_height, horizontal_wind_speed, kx=1, ky=1 ) -def periodogram(vel_data, delta_t, adv_vel=10): +def periodogram(vel_data: npt.NDArray, delta_t: float, adv_vel: float = 10.0) -> tuple: """Compute frequency and power spectra. Based on a time series of mean Doppler velocities. @@ -356,7 +362,9 @@ def periodogram(vel_data, delta_t, adv_vel=10): return freq_sp, power_sp -def spec_fit(freq, power, freq_range): +def spec_fit( + freq: npt.NDArray, power: npt.NDArray, freq_range: tuple[float, float] +) -> tuple[float, float, float, float, float, float]: """Fit a linear function (power~frequency). Within a given frequency range (in log space). From fe26b8c4eb2ceb2a03d07c52d9325417166c0a83 Mon Sep 17 00:00:00 2001 From: Simo Tukiainen Date: Tue, 5 May 2026 14:39:43 +0300 Subject: [PATCH 4/7] Fix format --- cloudnetpy/products/product_tools.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cloudnetpy/products/product_tools.py b/cloudnetpy/products/product_tools.py index 69e19b29..311eba92 100644 --- a/cloudnetpy/products/product_tools.py +++ b/cloudnetpy/products/product_tools.py @@ -380,4 +380,3 @@ def spec_fit( # Perform linear regression return linregress(freq_valid, power_valid) return (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan) - From e4df318154f1a78bf632b26ab1060c0e225e24a2 Mon Sep 17 00:00:00 2001 From: Simo Tukiainen Date: Tue, 5 May 2026 15:11:12 +0300 Subject: [PATCH 5/7] Optimize EDR product and switch input from categorize to model file --- cloudnetpy/categorize/model.py | 21 +- cloudnetpy/cli.py | 45 +++- cloudnetpy/output.py | 59 ++++- cloudnetpy/plotting/plot_meta.py | 1 - cloudnetpy/products/__init__.py | 2 +- cloudnetpy/products/edr.py | 296 --------------------- cloudnetpy/products/epsilon_radar.py | 373 +++++++++++++++++++++++++++ cloudnetpy/products/product_tools.py | 61 ----- cloudnetpy/utils.py | 23 +- 9 files changed, 488 insertions(+), 393 deletions(-) delete mode 100644 cloudnetpy/products/edr.py create mode 100644 cloudnetpy/products/epsilon_radar.py diff --git a/cloudnetpy/categorize/model.py b/cloudnetpy/categorize/model.py index 805421b8..0387f9f7 100644 --- a/cloudnetpy/categorize/model.py +++ b/cloudnetpy/categorize/model.py @@ -11,7 +11,6 @@ from cloudnetpy import utils from cloudnetpy.cloudnetarray import CloudnetArray -from cloudnetpy.constants import G from cloudnetpy.datasource import DataSource from cloudnetpy.exceptions import ModelDataError @@ -146,27 +145,9 @@ def _get_model_heights(self, alt_site: float) -> npt.NDArray: except KeyError as err: msg = "No 'height' variable in the model file." raise ModelDataError(msg) from err - surface_altitude = self._get_model_surface_altitude(alt_site) + surface_altitude = utils.get_model_surface_altitude(self.dataset, alt_site) return self.to_m(model_heights) + surface_altitude - def _get_model_surface_altitude(self, alt_site: float) -> float | npt.NDArray: - """Returns model surface altitude from geopotential if available. - - For sites in complex terrain (e.g. mountains), the model grid cell - surface height can differ significantly from the actual site altitude. - Using the model's own surface height ensures that thermodynamic fields - are placed at their physically correct absolute heights. - - Note: Model surface altitude might be higher than the site altitude. - """ - if "sfc_height" in self.dataset.variables: - sfc_height = self.dataset.variables["sfc_height"][:] - return sfc_height[:, np.newaxis] - if "sfc_geopotential" in self.dataset.variables: - geopotential = self.dataset.variables["sfc_geopotential"][:] - return geopotential[:, np.newaxis] / G - return alt_site - def calc_attenuations(self, frequency: float) -> None: temperature = self.getvar("temperature") pressure = self.getvar("pressure") diff --git a/cloudnetpy/cli.py b/cloudnetpy/cli.py index 8e485b9e..79c22854 100644 --- a/cloudnetpy/cli.py +++ b/cloudnetpy/cli.py @@ -29,6 +29,16 @@ cloudnet_api_url: Final = "https://cloudnet.fmi.fi/api/" +# Products implemented in cloudnetpy that are not (yet) registered in the +# Cloudnet API. The CLI handles them with explicit branches below since the +# API-driven dispatch (source_product_ids, source_instrument_ids) doesn't +# know about them. +LOCAL_PRODUCTS: Final = frozenset({"epsilon-radar"}) + +# Default plotted variables for products that aren't yet known to the API's +# /products/variables endpoint. +LOCAL_PRODUCT_VARS: Final = {"epsilon-radar": ["epsilon"]} + def run(args: argparse.Namespace, tmpdir: str, client: APIClient) -> None: cat_files = {} @@ -90,6 +100,33 @@ def run(args: argparse.Namespace, tmpdir: str, client: APIClient) -> None: l2_filename = _process_mwrpy_product(product, mwrpy_filepath, args) _plot(l2_filename, product, args) + # Radar + model based products (e.g. epsilon-radar) + if "epsilon-radar" in args.products: + epsilon_filepath = _process_epsilon_radar(cat_files, args, client) + if epsilon_filepath is not None: + _plot(epsilon_filepath, "epsilon-radar", args) + + +def _process_epsilon_radar( + cat_files: dict, args: argparse.Namespace, client: APIClient +) -> str | None: + radar_filepath = cat_files.get("radar") or _fetch_product(args, "radar", client) + if radar_filepath is None: + logging.info("No radar data available for epsilon-radar") + return None + model_filepath = _fetch_model(args, client) + if model_filepath is None: + logging.info("No model data available for epsilon-radar") + return None + filename = f"{args.date.replace('-', '')}_{args.site}_epsilon-radar.nc" + output_file = _create_output_folder("geophysical", args) / filename + products = importlib.import_module("cloudnetpy.products") + products.generate_epsilon_from_radar( + str(radar_filepath), model_filepath, str(output_file) + ) + logging.info("Processed epsilon-radar: %s", output_file) + return str(output_file) + def _process_categorize( input_files: dict, @@ -361,6 +398,8 @@ def _get_source_instruments( source_instruments = {} for product in products: prod, model = _parse_instrument(product) + if prod in LOCAL_PRODUCTS: + continue if model is None: pref = instrument_prefs.get(prod) if pref is not None and not _is_pid(pref): @@ -389,6 +428,8 @@ def _get_product_sources( source_products = {} for product in products: prod, _ = _parse_instrument(product) + if prod in LOCAL_PRODUCTS: + continue product_obj = client.product(prod) if product_obj.source_product_ids: source_products[prod] = list(product_obj.source_product_ids) @@ -546,6 +587,8 @@ def _plot( return if args.variables is not None: variables = args.variables.split(",") + elif product in LOCAL_PRODUCT_VARS: + variables = LOCAL_PRODUCT_VARS[product] else: res = requests.get(f"{cloudnet_api_url}products/variables", timeout=60) res.raise_for_status() @@ -590,7 +633,7 @@ def _process_mwrpy_product( def _parse_products(product_argument: str, client: APIClient) -> list[str]: products = product_argument.split(",") - valid_options = [p.id for p in client.products()] + valid_options = {p.id for p in client.products()} | LOCAL_PRODUCTS valid_products = [] for product in products: prod, _ = _parse_instrument(product) diff --git a/cloudnetpy/output.py b/cloudnetpy/output.py index 5a92236f..b032da38 100644 --- a/cloudnetpy/output.py +++ b/cloudnetpy/output.py @@ -74,6 +74,7 @@ def save_product_file( file_name: str | PathLike, uuid: UUID, copy_from_cat: tuple = (), + extra_sources: tuple[DataSource, ...] = (), ) -> None: """Saves a standard Cloudnet product file. @@ -83,13 +84,18 @@ def save_product_file( file_name: Name of the output file to be generated. uuid: Set specific UUID for the file. copy_from_cat: Variables to be copied from the categorize file. + extra_sources: Additional input DataSources whose ``file_uuid`` should + also be listed in ``source_file_uuids`` (e.g. a model file used + alongside the primary L1b input). """ human_readable_file_type = _get_identifier(short_id) - dimensions = { - "time": len(obj.time), - "height": len(obj.dataset.variables["height"]), - } + height_size = ( + len(obj.data["height"][:]) + if "height" in obj.data + else len(obj.dataset.variables["height"]) + ) + dimensions = {"time": len(obj.time), "height": height_size} with init_file(file_name, dimensions, obj.data, uuid) as nc: nc.cloudnet_file_type = short_id vars_from_source = ( @@ -105,7 +111,7 @@ def save_product_file( f"{human_readable_file_type.capitalize()} products from" f" {obj.dataset.location}" ) - nc.source_file_uuids = get_source_uuids([nc, obj]) + nc.source_file_uuids = get_source_uuids([nc, obj, *extra_sources]) copy_global( obj.dataset, nc, @@ -116,13 +122,28 @@ def save_product_file( "year", "source", "source_instrument_pids", + "instrument_pid", "voodoonet_version", ), ) - merge_history(nc, human_readable_file_type, obj) + _append_extra_sources(nc, extra_sources) + merge_history(nc, human_readable_file_type, obj, extra_sources=extra_sources) nc.references = get_references(short_id) +def _append_extra_sources( + nc: netCDF4.Dataset, extra_sources: tuple[DataSource, ...] +) -> None: + """Merges ``source`` strings from auxiliary input files into ``nc.source``.""" + if not extra_sources: + return + existing = nc.source.split("\n") if "source" in nc.ncattrs() else [] + extras = [src.dataset.source for src in extra_sources if src.dataset.source] + merged = list(dict.fromkeys([*existing, *extras])) + if merged: + nc.source = "\n".join(merged) + + def get_l1b_source(instrument: Instrument) -> str: """Returns level 1b file source.""" parts = [ @@ -173,6 +194,11 @@ def get_references(identifier: str | None = None, extra: list | None = None) -> references += ", https://doi.org/10.1175/JAM2340.1" case "drizzle": references += ", https://doi.org/10.1175/JAM-2181.1" + case "epsilon-radar": + references += ( + ", https://doi.org/10.5194/amt-13-5335-2020" + ", https://doi.org/10.1002/2015JD024543" + ) if extra is not None: for reference in extra: references += f", {reference}" @@ -203,14 +229,21 @@ def get_source_uuids(data: Observations | list[netCDF4.Dataset | DataSource]) -> def merge_history( - nc: netCDF4.Dataset, file_type: str, data: Observations | DataSource + nc: netCDF4.Dataset, + file_type: str, + data: Observations | DataSource, + extra_sources: tuple[DataSource, ...] = (), ) -> None: """Merges history fields from one or several files and creates a new record.""" def extract_history(obj: DataSource | Observations) -> list[str]: if hasattr(obj, "dataset") and hasattr(obj.dataset, "history"): history = obj.dataset.history - if isinstance(obj, Model): + is_model_file = ( + isinstance(obj, Model) + or getattr(obj.dataset, "cloudnet_file_type", "") == "model" + ) + if is_model_file: return [history.split("\n")[-1]] return history.split("\n") return [] @@ -221,6 +254,8 @@ def extract_history(obj: DataSource | Observations) -> list[str]: elif isinstance(data, Observations): for field in fields(data): histories.extend(extract_history(getattr(data, field.name))) + for src in extra_sources: + histories.extend(extract_history(src)) # Remove duplicates histories = list(dict.fromkeys(histories)) @@ -298,7 +333,7 @@ def copy_variables( """ for key in keys: - if key in source.variables: + if key in source.variables and key not in target.variables: fill_value = getattr(source.variables[key], "_FillValue", False) variable = source.variables[key] var_out = target.createVariable( @@ -436,7 +471,7 @@ def _get_identifier(short_id: str) -> str: "der", "ier", "classification-voodoo", - "edr", + "epsilon-radar", ) if short_id not in valid_ids: msg = f"Invalid file identifier: {short_id}" @@ -449,8 +484,8 @@ def _get_identifier(short_id: str) -> str: return "ice effective radius" if short_id == "der": return "droplet effective radius" - if short_id == "edr": - return "eddy dissipation rate" + if short_id == "epsilon-radar": + return "dissipation rate of turbulent kinetic energy" return short_id diff --git a/cloudnetpy/plotting/plot_meta.py b/cloudnetpy/plotting/plot_meta.py index ae7bea80..56234d35 100644 --- a/cloudnetpy/plotting/plot_meta.py +++ b/cloudnetpy/plotting/plot_meta.py @@ -667,6 +667,5 @@ class PlotMeta(NamedTuple): plot_range=(1e-7, 1e-1), log_scale=True, ), - "edr": PlotMeta(log_scale=True), }, } diff --git a/cloudnetpy/products/__init__.py b/cloudnetpy/products/__init__.py index 77dd4ff4..65cf087f 100644 --- a/cloudnetpy/products/__init__.py +++ b/cloudnetpy/products/__init__.py @@ -1,7 +1,7 @@ from .classification import generate_classification from .der import generate_der from .drizzle import generate_drizzle -from .edr import generate_edr +from .epsilon_radar import generate_epsilon_from_radar from .ier import generate_ier from .iwc import generate_iwc from .lwc import generate_lwc diff --git a/cloudnetpy/products/edr.py b/cloudnetpy/products/edr.py deleted file mode 100644 index 728137ee..00000000 --- a/cloudnetpy/products/edr.py +++ /dev/null @@ -1,296 +0,0 @@ -"""Module for creating Cloudnet droplet effective radius -using the Frisch et al. 2002 method. -""" - -import logging -from collections.abc import Callable -from os import PathLike -from uuid import UUID - -import numpy as np -import numpy.typing as npt -from numpy import ma -from scipy.interpolate import CubicSpline - -from cloudnetpy import output -from cloudnetpy.datasource import DataSource -from cloudnetpy.metadata import MetaData -from cloudnetpy.products import product_tools -from cloudnetpy.utils import get_uuid - - -def generate_edr( - radar_file: str | PathLike, - categorize_file: str | PathLike, - output_file: str | PathLike, - uuid: str | UUID | None = None, -) -> UUID: - """Generates Cloudnet eddy dissipation rates - product according to Griesche 2020 / Borque 2016. - - This function calculates the eddy dissipation rate. - The results are written in a netCDF file. - - Args: - radar_file: Radar file name. - categorize_file: Categorize file name. - output_file: Output file name. - uuid: Set specific UUID for the file. - - Returns: - UUID of the generated file. - - Examples: - >>> from cloudnetpy.products import generate_edr - >>> generate_edr('radar_file.nc','categorize.nc', 'edr.nc') - - References: - Griesche, H. J., Seifert, P., Ansmann, A., Baars, H., Barrientos Velasco, - C., Bühl, J., Engelmann, R., Radenz, M., Zhenping, Y., and Macke, A. (2020): - Application of the shipborne remote sensing supersite OCEANET for - profiling of Arctic aerosols and clouds during Polarstern cruise PS106, - Atmos. Meas. Tech., 13, 5335–5358, - from https://doi.org/10.5194/amt-13-5335-2020. - - Borque, P., Luke, E., and Kollias, P. (2016): - On the unified estimation of turbulence eddy dissipation rate using Doppler - cloud radars and lidars, J. Geophys. Res.Atmos., 120, 5972–5989, - from https://doi.org/10.1002/2015JD024543. - - """ - uuid = get_uuid(uuid) - wind_source = WindSource(categorize_file) - with EdrSource(radar_file) as edr_source: - edr_source.append_edr(wind_source) - edr_source.update_time_dependent_variables(wind_source) - date = edr_source.get_date() - attributes = output.add_time_attribute(EDR_ATTRIBUTES, date) - output.update_attributes(edr_source.data, attributes) - output.save_product_file("edr", edr_source, output_file, uuid) - return uuid - - -class WindSource(DataSource): - """Data container for wind for eddy dissipation rate calculations.""" - - def __init__(self, categorize_file: str | PathLike) -> None: - super().__init__(categorize_file) - - def get_wind(self) -> Callable[[npt.NDArray, npt.NDArray], npt.NDArray]: - uwind = self.getvar("uwind") - vwind = self.getvar("vwind") - model_time = self.getvar("model_time") - model_height = self.getvar("model_height") - return product_tools.get_interpolated_horizontal_wind( - uwind, vwind, model_time, model_height - ) - - def get_cat_time(self) -> npt.NDArray: - return self.getvar("time") - - -class EdrSource(DataSource): - """Data container for eddy dissipation rate calculations.""" - - def __init__(self, radar_file: str | PathLike) -> None: - super().__init__(radar_file) - - def update_time_dependent_variables( - self, wind_source: WindSource, copy_from_cat: tuple[str, ...] = () - ) -> None: - """Update the temporal high resolved variables that will be stored in - the product file to the standard Cloudnet product time resolution of - 30s. - """ - self.dataset.variables["time"] = wind_source.dataset.variables["time"] - self.time = wind_source.getvar("time") - vars_to_be_copied_from_source = ( - "altitude", - "latitude", - "longitude", - "time", - "height", - *copy_from_cat, - ) - for key in vars_to_be_copied_from_source: - if self.dataset.variables[key].size > 0: - self.dataset.variables[key] = wind_source.dataset.variables[key] - - def append_edr(self, wind_source: WindSource) -> None: - """Estimate Eddy Dissipation Rate (EDR) according to Griesche et al. 2020.""" - - def interpolate_missing_values( - data: np.ndarray, time_window: np.ndarray - ) -> np.ndarray | None: - """Interpolate missing Doppler velocity values using a cubic spline. - - Returns: - - interpolated_data: np.ndarray - Data with missing values interpolated, or None if insufficient data. - """ - valid_indices = ma.where(data)[0] - if ( - valid_indices.shape[0] < 0.9 * data.shape[0] - ): # Minimum threshold for valid data - return None - if ma.is_masked(data[0]) or ma.is_masked( - data[-1] - ): # Avoid edge interpolation issues - return None - - spline = CubicSpline(time_window[valid_indices], data[valid_indices]) - return spline(time_window) - - def compute_epsilon_from_spectrum( - freq_sp: npt.NDArray, - power_sp: npt.NDArray, - freq_list: list[tuple[float, float]], - constant: float, - ) -> list[float]: - """Compute epsilon values from power spectrum.""" - epsilon_values = [] - - for freq_range in freq_list: - mask = (freq_sp > freq_range[0]) & (freq_sp < freq_range[1]) - if freq_sp[mask].shape[0] > 1 and power_sp[mask].shape[0] > 1: - fit = product_tools.spec_fit(freq_sp, power_sp, freq_range) - slope = fit[0] - intercept = fit[1] - if np.isnan(slope) or not ( - THRESHOLD_SLOPE_MIN < slope < THRESHOLD_SLOPE_MAX - ): - continue - epsilon_values.append((10**intercept / constant) ** (3.0 / 2.0)) - return epsilon_values - - # Predefinded Constants - KOLMOGOROV_CONSTANT = 0.5 - THRESHOLD_SLOPE_MIN = -2 # based on Borque, 2016, JGR (-5/3 +-20%) - THRESHOLD_SLOPE_MAX = -1.33 # based on Borque, 2016, JGR (-5/3 +-20%) - AVERAGING_TIME_MIN = 5 # Averaging time in minutes - H_to_S = 3600 # hours to seconds - H_to_M = 60 # hours to minutes - - # Input data - freq_list = standard_freq_list # predefined list of frequency ranges to analyze - radar_time = self.getvar("time") - radar_time_resolution_sec = np.round( - np.nanmin(np.diff(radar_time)) * H_to_S - ) # in seconds - height = self.getvar("height") - vertical_velocity = self.getvar("v") - - horizontal_wind_function = wind_source.get_wind() - horizontal_wind = horizontal_wind_function(radar_time, height) - categorize_time = wind_source.get_cat_time() - product_resolution = np.round( - np.nanmin(np.diff(categorize_time)) * H_to_S - ) # in seconds - - num_height_levels = height.shape[0] - epsilon = ( - np.ones((int(np.ceil(categorize_time.shape[0])), num_height_levels)) - * -999.0 - ) - logging.debug(product_resolution, radar_time_resolution_sec) - - # Process each time interval - for time_idx, t in enumerate(categorize_time): - logging.debug(time_idx, t) - if t + AVERAGING_TIME_MIN / H_to_M > radar_time[-1]: - break - # - start_idx = np.where(radar_time > t)[0][0] - end_idx = np.where(radar_time > t + AVERAGING_TIME_MIN / H_to_M)[0][0] - - # Process each height level - for height_idx, _h in enumerate(height[:num_height_levels]): - data_velocity = vertical_velocity[start_idx:end_idx, height_idx] - time_window = radar_time[start_idx:end_idx] - - if ma.is_masked(data_velocity): - data_velocity = interpolate_missing_values( - data_velocity, time_window - ) # type: ignore[assignment] - if data_velocity is None: - continue - - horizontal_wind_speed = horizontal_wind[ - int((start_idx + end_idx) / 2), height_idx - ] - if np.isnan(horizontal_wind_speed): - continue - - # Compute frequency spectrum - freq_sp, power_sp = product_tools.periodogram( - data_velocity, radar_time_resolution_sec, horizontal_wind_speed - ) - if np.isnan(freq_sp).all(): - continue - # Compute epsilon for frequency ranges - epsilon_values = compute_epsilon_from_spectrum( - freq_sp, power_sp, freq_list, KOLMOGOROV_CONSTANT - ) - - # Store average epsilon - if epsilon_values: - epsilon[time_idx, height_idx] = np.mean(epsilon_values) - - self.append_data(ma.masked_where(epsilon == -999.0, epsilon), "edr") - - -COMMENTS = { - "general": ( - "This dataset contains the eddy dissipation rate calculated\n" - "according to the approach presented by Griesche et al. (2020). It is based\n" - "on a relationship between the slope of the inertial subrange of the\n" - "turbulent energy spectrum. If spectrum follows a -5/3 slope in a log-log\n" - "the eddy dissipation rate can be calculated. The turbulent energy spectrum\n" - "was derived as the power spectrum of cloud radar Doppler velocity averaged\n" - "over 5 minutes.\n" - ), - "edr": ( - "This variable was calculated for the profiles where 5 minutes of\n" - "continuous cloud radar Doppler velocity was available. In case of\n" - "than lass than 10% missing data, the missing data points were interpolated\n" - "using cubic spine interpolation.\n" - ), -} - - -EDR_ATTRIBUTES = { - "comment": COMMENTS["general"], - "edr": MetaData( - long_name="Eddy dissipation rate", - units="m2 s-3", - ancillary_variables="edr_error", - comment=COMMENTS["edr"], - dimensions=("time", "height"), - ), - "edr_error": MetaData( - long_name="Absolute error in eddy dissipation rate", - units="m2 s-3", - dimensions=("time", "height"), - ), -} - - -standard_freq_list = [ - (5e-3, 5e-2), - (5e-3, 1e-1), - (5e-3, 35e-2), - (5e-3, 8e-1), - (1e-2, 1e-1), - (1e-2, 35e-2), - (1e-2, 8e-1), - (2e-2, 2e-1), - (2e-2, 8e-1), - (35e-3, 2e-1), - (35e-3, 8e-1), - (5e-2, 2e-1), - (5e-2, 8e-1), - (8e-2, 35e-2), - (8e-2, 8e-1), - (1e-1, 5e-1), - (1e-1, 8e-1), -] # adapted from Griesche et al. 2020 diff --git a/cloudnetpy/products/epsilon_radar.py b/cloudnetpy/products/epsilon_radar.py new file mode 100644 index 00000000..04fd8be8 --- /dev/null +++ b/cloudnetpy/products/epsilon_radar.py @@ -0,0 +1,373 @@ +"""Module for creating Cloudnet eddy dissipation rate product, based on the +pipeline of Griesche et al. (2020) with the inertial-subrange slope-acceptance +criterion of Borque et al. (2016). +""" + +from collections.abc import Callable +from os import PathLike +from uuid import UUID + +import numpy as np +import numpy.typing as npt +from numpy import ma +from scipy.interpolate import CubicSpline, RectBivariateSpline + +from cloudnetpy import output, utils +from cloudnetpy.datasource import DataSource +from cloudnetpy.metadata import COMMON_ATTRIBUTES, MetaData +from cloudnetpy.utils import get_uuid + + +def generate_epsilon_from_radar( + radar_file: str | PathLike, + model_file: str | PathLike, + output_file: str | PathLike, + uuid: str | UUID | None = None, +) -> UUID: + """Generates Cloudnet radar-based dissipation rate of turbulent kinetic + energy product. + + Based on the pipeline of Griesche et al. (2020) with the inertial-subrange + slope-acceptance criterion of Borque et al. (2016). + + Args: + radar_file: Cloud radar L1b file name (provides Doppler velocity). + model_file: Cloudnet model file name (provides horizontal wind). + output_file: Output file name. + uuid: Set specific UUID for the file. + + Returns: + UUID of the generated file. + + Examples: + >>> from cloudnetpy.products import generate_epsilon_from_radar + >>> generate_epsilon_from_radar('radar.nc', 'ecmwf.nc', 'epsilon.nc') + + References: + Griesche, H. J., Seifert, P., Ansmann, A., Baars, H., Barrientos + Velasco, C., Bühl, J., Engelmann, R., Radenz, M., Zhenping, Y., and + Macke, A. (2020): Application of the shipborne remote sensing + supersite OCEANET for profiling of Arctic aerosols and clouds during + Polarstern cruise PS106, Atmos. Meas. Tech., 13, 5335-5358, + https://doi.org/10.5194/amt-13-5335-2020. + + Borque, P., Luke, E., and Kollias, P. (2016): On the unified + estimation of turbulence eddy dissipation rate using Doppler cloud + radars and lidars, J. Geophys. Res. Atmos., 120, 5972-5989, + https://doi.org/10.1002/2015JD024543. + """ + uuid = get_uuid(uuid) + with ( + EpsilonRadarSource(radar_file) as epsilon_source, + DataSource(model_file) as model_source, + ): + if epsilon_source.altitude is None: + msg = "Radar file is missing 'altitude' attribute." + raise ValueError(msg) + wind_interp = _get_wind_interpolator( + model_source, alt_site=float(epsilon_source.altitude) + ) + epsilon_source.append_epsilon(wind_interp) + epsilon_source.append_grid_variables() + date = epsilon_source.get_date() + attributes = output.add_time_attribute(EPSILON_RADAR_ATTRIBUTES, date) + output.update_attributes(epsilon_source.data, attributes) + output.save_product_file( + "epsilon-radar", + epsilon_source, + output_file, + uuid, + extra_sources=(model_source,), + ) + return uuid + + +def _get_wind_interpolator( + model: DataSource, alt_site: float +) -> Callable[[npt.NDArray, npt.NDArray], npt.NDArray]: + """Returns a bilinear interpolator for horizontal wind speed on + (time, height_amsl) where height is meters above MSL. + + Model heights are above the model's own surface; shift to absolute MSL so + they share the radar's height frame. In complex terrain the model grid + cell surface can differ from the site altitude, so prefer the model's own + surface field when available. + """ + uwind = model.getvar("uwind") + vwind = model.getvar("vwind") + surface_altitude = utils.get_model_surface_altitude(model.dataset, alt_site) + heights = model.to_m(model.dataset.variables["height"]) + surface_altitude + wind_speed = np.hypot(np.asarray(uwind), np.asarray(vwind)) + mean_height = np.asarray(ma.mean(heights, axis=0)) + common = np.empty((wind_speed.shape[0], mean_height.size)) + for i in range(wind_speed.shape[0]): + common[i] = np.interp(mean_height, np.asarray(heights[i]), wind_speed[i]) + return RectBivariateSpline(model.time, mean_height, common, kx=1, ky=1) + + +class EpsilonRadarSource(DataSource): + """Reads radar Doppler velocity and computes turbulent kinetic energy + dissipation rate (epsilon) on the product grid. + """ + + height: npt.NDArray + + def append_epsilon( + self, wind_interp: Callable[[npt.NDArray, npt.NDArray], npt.NDArray] + ) -> None: + """Estimate the dissipation rate (epsilon) on the 30 s product grid.""" + radar_time = np.asarray(self.getvar("time")) + self._radar_time = radar_time + height = self.height + radar_dt_sec = float(np.round(np.median(np.diff(radar_time)) * 3600.0)) + + v = self.getvar("v") + if isinstance(v, ma.MaskedArray): + v = v.filled(np.nan) + v = np.asarray(v, dtype=np.float64) + + product_time = utils.time_grid(time_step=PRODUCT_TIME_STEP_SEC) + self.time = product_time + wind_at_product = wind_interp(product_time, height) + + n_time = product_time.size + n_height = height.size + epsilon = np.full((n_time, n_height), EPSILON_INVALID, dtype=np.float64) + epsilon_error = np.full((n_time, n_height), EPSILON_INVALID, dtype=np.float64) + + starts = np.searchsorted(radar_time, product_time, side="right") + stops = np.searchsorted( + radar_time, product_time + AVERAGING_TIME_HR, side="right" + ) + + for time_idx in range(n_time): + after, stop = int(starts[time_idx]), int(stops[time_idx]) + if stop <= after: + continue + time_window = radar_time[after:stop] + min_valid = int(MIN_VALID_FRACTION * time_window.size) + + for height_idx in range(n_height): + vel = v[after:stop, height_idx] + nan_mask = np.isnan(vel) + if nan_mask[:2].all() or nan_mask[-2:].all(): + continue + if (vel.size - int(nan_mask.sum())) < min_valid: + continue + if nan_mask.any(): + valid = ~nan_mask + vel = CubicSpline(time_window[valid], vel[valid])(time_window) + + wind_speed = float(wind_at_product[time_idx, height_idx]) + if not np.isfinite(wind_speed) or wind_speed <= 0: + continue + + freq_sp, power_sp = _periodogram(vel, radar_dt_sec, wind_speed) + if freq_sp.size < 2: + continue + + result = _epsilon_from_spectrum(freq_sp, power_sp) + if result is not None: + eps, eps_err = result + epsilon[time_idx, height_idx] = eps + epsilon_error[time_idx, height_idx] = eps_err + + self.append_data( + ma.masked_where(epsilon == EPSILON_INVALID, epsilon), "epsilon" + ) + self.append_data( + ma.masked_where( + (epsilon_error == EPSILON_INVALID) | ~np.isfinite(epsilon_error), + epsilon_error, + ), + "epsilon_error", + ) + + def append_grid_variables(self) -> None: + """Adds time/height/altitude/latitude/longitude on the product grid. + + altitude/latitude/longitude are always written as 1-D arrays on the + product time grid: scalars from stationary radars are broadcast, + time-varying fields from moving platforms are rebinned. + """ + self.append_data(self.time, "time", dtype="f8") + self.append_data(np.asarray(self.height, dtype=np.float32), "height") + for key in ("altitude", "latitude", "longitude"): + if key not in self.dataset.variables: + continue + src = np.asarray(self.dataset.variables[key][:]) + if src.ndim == 0: + values = np.full(self.time.size, float(src), dtype=np.float32) + else: + values = utils.rebin_1d(self._radar_time, src, self.time) + self.append_data(values, key) + + +def _periodogram( + vel: npt.NDArray, delta_t_sec: float, adv_vel: float +) -> tuple[npt.NDArray, npt.NDArray]: + """Compute angular wavenumber spectrum from a Doppler velocity series.""" + n = vel.size + delta_x = delta_t_sec * adv_vel + fft_result = np.fft.rfft(vel)[1:] + power_sp = (delta_x / n) * np.abs(fft_result) ** 2 / np.pi + freq_sp = np.fft.rfftfreq(n, d=delta_x)[1:] * 2.0 * np.pi + keep = power_sp > 0 + return freq_sp[keep], power_sp[keep] + + +def _epsilon_from_spectrum( + freq_sp: npt.NDArray, + power_sp: npt.NDArray, +) -> tuple[float, float] | None: + """Vectorised slope fit on log-log spectrum across all frequency bands. + + Uses cumulative sums over a sorted spectrum so each band's least-squares + fit reduces to a constant-time index lookup. Returns ``(mean, std)`` of + epsilon across accepted bands -- the band-spread used by Griesche et al. + (2020) as the random retrieval uncertainty -- or ``None`` if no band + passes the slope filter. ``std`` is NaN when only a single band passes. + """ + log_f = np.log10(freq_sp) + log_p = np.log10(power_sp) + + csx = np.concatenate(([0.0], np.cumsum(log_f))) + csy = np.concatenate(([0.0], np.cumsum(log_p))) + csxx = np.concatenate(([0.0], np.cumsum(log_f * log_f))) + csxy = np.concatenate(([0.0], np.cumsum(log_f * log_p))) + + lo = np.searchsorted(freq_sp, _FMIN_ARR, side="right") + hi = np.searchsorted(freq_sp, _FMAX_ARR, side="left") + n = hi - lo + + sx = csx[hi] - csx[lo] + sy = csy[hi] - csy[lo] + sxx = csxx[hi] - csxx[lo] + sxy = csxy[hi] - csxy[lo] + + with np.errstate(invalid="ignore", divide="ignore"): + slope = (n * sxy - sx * sy) / (n * sxx - sx * sx) + intercept = (sy - slope * sx) / n + + valid = ( + (n >= 2) + & np.isfinite(slope) + & (slope > THRESHOLD_SLOPE_MIN) + & (slope < THRESHOLD_SLOPE_MAX) + ) + if not valid.any(): + return None + epsilon = (10.0 ** intercept[valid] / KOLMOGOROV_CONSTANT) ** 1.5 + std = float(np.std(epsilon, ddof=1)) if epsilon.size >= 2 else float("nan") + return float(np.mean(epsilon)), std + + +KOLMOGOROV_CONSTANT = 0.5 +THRESHOLD_SLOPE_MIN = -2.0 # -5/3 - 20% (Borque, 2016) +THRESHOLD_SLOPE_MAX = -1.33 # -5/3 + 20% (Borque, 2016) +AVERAGING_TIME_HR = 5.0 / 60.0 +PRODUCT_TIME_STEP_SEC = 30 +MIN_VALID_FRACTION = 0.9 +EPSILON_INVALID = -999.0 + + +COMMENTS = { + "general": ( + "This dataset contains the dissipation rate of turbulent kinetic\n" + "energy calculated using the pipeline of Griesche et al. (2020)\n" + "with the inertial-subrange slope-acceptance criterion of Borque\n" + "et al. (2016). The turbulent energy spectrum is derived as the\n" + "power spectrum of cloud radar Doppler velocity over 5-minute\n" + "windows and converted to a wavenumber spectrum using model\n" + "horizontal wind via Taylor's hypothesis. The dissipation rate is\n" + "recovered from the intercept of a log-log fit across multiple\n" + "frequency bands; bands whose slope falls outside -5/3 +/- 20% are\n" + "rejected and the accepted bands are averaged.\n" + ), + "epsilon": ( + "This variable was calculated for profiles where 5 minutes of\n" + "continuous cloud radar Doppler velocity was available. With less\n" + "than 10% missing data, the missing values were filled by cubic\n" + "spline interpolation.\n" + ), + "epsilon_error": ( + "Random uncertainty estimated as the standard deviation of epsilon\n" + "across the frequency bands accepted by the inertial-subrange\n" + "slope criterion (Griesche et al. 2020). It captures the scatter\n" + "introduced by the multi-band fit but does not include systematic\n" + "contributions from horizontal wind uncertainty or the Kolmogorov\n" + "constant. The variable is masked where only a single band passed\n" + "the slope filter and no spread can be estimated.\n" + ), +} + + +EPSILON_RADAR_ATTRIBUTES = { + "comment": COMMENTS["general"], + "epsilon": MetaData( + long_name="Dissipation rate of turbulent kinetic energy", + units="m2 s-3", + ancillary_variables="epsilon_error", + comment=COMMENTS["epsilon"], + dimensions=("time", "height"), + ), + "epsilon_error": MetaData( + long_name="Absolute error in dissipation rate of turbulent kinetic energy", + units="m2 s-3", + comment=COMMENTS["epsilon_error"], + dimensions=("time", "height"), + ), + "height": COMMON_ATTRIBUTES["height"]._replace(dimensions=("height",)), +} + + +# Frequency-range pairs (rad/m) used to fit the inertial subrange slope. +# Adapted from Griesche et al. 2020 with a multi-decade extension. +freq_array = np.array( + [ + [5e-3, 1.5e-2], + [5e-3, 3e-2], + [5e-3, 5e-2], + [8e-3, 2e-2], + [8e-3, 4e-2], + [8e-3, 6e-2], + [2e-2, 7e-2], + [2e-2, 1e-1], + [2e-2, 1.5e-1], + [3e-2, 1e-1], + [3e-2, 2e-1], + [3e-2, 3e-1], + [5e-2, 1.5e-1], + [5e-2, 3e-1], + [5e-2, 5e-1], + [8e-2, 2e-1], + [8e-2, 4e-1], + [8e-2, 6e-1], + [2e-1, 7e-1], + [2e-1, 1e0], + [2e-1, 1.5e0], + [3e-1, 1e0], + [3e-1, 2e0], + [3e-1, 3e0], + [5e-1, 1.5e0], + [5e-1, 3e0], + [5e-1, 5e0], + [8e-1, 2e0], + [8e-1, 4e0], + [8e-1, 6e0], + [2e0, 7e0], + [2e0, 1e1], + [2e0, 1.5e1], + [3e0, 1e1], + [3e0, 2e1], + [3e0, 3e1], + [5e0, 1.5e1], + [5e0, 3e1], + [5e0, 5e1], + [8e0, 2e1], + [8e0, 4e1], + [8e0, 6e1], + ] +) +_FMIN_ARR = freq_array[:, 0] +_FMAX_ARR = freq_array[:, 1] diff --git a/cloudnetpy/products/product_tools.py b/cloudnetpy/products/product_tools.py index 311eba92..c6a6acbf 100644 --- a/cloudnetpy/products/product_tools.py +++ b/cloudnetpy/products/product_tools.py @@ -1,6 +1,5 @@ """General helper classes and functions for all products.""" -from collections.abc import Callable from dataclasses import dataclass from os import PathLike from typing import NamedTuple @@ -11,8 +10,6 @@ import numpy.typing as npt from numpy import ma from numpy.typing import NDArray -from scipy.interpolate import RectBivariateSpline -from scipy.stats import linregress from cloudnetpy import constants, utils from cloudnetpy.datasource import DataSource @@ -322,61 +319,3 @@ def _get_temperature(categorize_file: str | PathLike) -> npt.NDArray: """Returns interpolated temperatures in Celsius.""" atmosphere = interpolate_model(categorize_file, "temperature") return atmoslib.k2c(atmosphere["temperature"]) - - -def get_interpolated_horizontal_wind( - uwind: npt.NDArray, - vwind: npt.NDArray, - wind_time: npt.NDArray, - wind_height: npt.NDArray, -) -> Callable[[npt.NDArray, npt.NDArray], npt.NDArray]: - horizontal_wind_speed = np.sqrt((uwind) ** 2 + (vwind) ** 2) - return RectBivariateSpline( - wind_time, wind_height, horizontal_wind_speed, kx=1, ky=1 - ) - - -def periodogram(vel_data: npt.NDArray, delta_t: float, adv_vel: float = 10.0) -> tuple: - """Compute frequency and power spectra. - - Based on a time series of mean Doppler velocities. - """ - # Convert input to numpy array - vel_data = np.asarray(vel_data, dtype=float) - if vel_data.size == 0: - return np.nan, np.nan - # Use wavenumber - delta_x = delta_t * adv_vel - # Compute the Fourier transform and scale power spectrum - fft_result = np.fft.rfft(vel_data) - power_sp = (delta_x**2 / (vel_data.size * delta_x)) * np.abs(fft_result) ** 2 - power_sp *= 2 / (2 * np.pi) # Scale by 2 for positive frequencies, normalize by 2π - - # Compute angular frequencies - freq_sp = np.fft.rfftfreq(vel_data.size, d=delta_x) * 2 * np.pi - - if any(power_sp == 0): - power_sp = power_sp[np.where(np.abs(fft_result) != 0)] - freq_sp = freq_sp[np.where(np.abs(fft_result) != 0)] - - return freq_sp, power_sp - - -def spec_fit( - freq: npt.NDArray, power: npt.NDArray, freq_range: tuple[float, float] -) -> tuple[float, float, float, float, float, float]: - """Fit a linear function (power~frequency). - - Within a given frequency range (in log space). - """ - mask = (freq > freq_range[0]) & (freq < freq_range[1]) - freq_lin = np.log10(freq[mask]) - power_lin = np.log10(power[mask]) - valid_mask = ~np.isnan(freq_lin) & ~np.isnan(power_lin) - freq_valid = freq_lin[valid_mask] - power_valid = power_lin[valid_mask] - # slope, intercept, r_value, p_value, std_err - if len(freq_valid) > 1: - # Perform linear regression - return linregress(freq_valid, power_valid) - return (np.nan, np.nan, np.nan, np.nan, np.nan, np.nan) diff --git a/cloudnetpy/utils.py b/cloudnetpy/utils.py index 1bd9ecde..8bf54f18 100644 --- a/cloudnetpy/utils.py +++ b/cloudnetpy/utils.py @@ -25,7 +25,7 @@ ) from cloudnetpy.cloudnetarray import CloudnetArray -from cloudnetpy.constants import SEC_IN_DAY, SEC_IN_HOUR, SEC_IN_MINUTE +from cloudnetpy.constants import SEC_IN_DAY, SEC_IN_HOUR, SEC_IN_MINUTE, G from cloudnetpy.exceptions import ValidTimeStampError @@ -875,6 +875,27 @@ def is_empty_line(line: str) -> bool: return line in ("\n", "\r\n") +def get_model_surface_altitude( + dataset: netCDF4.Dataset, alt_site: float +) -> float | npt.NDArray: + """Returns model surface altitude (m), preferring the model's own field. + + For sites in complex terrain (e.g. mountains), the model grid cell + surface height can differ significantly from the actual site altitude. + Using the model's own surface height ensures that thermodynamic and + wind fields are placed at their physically correct absolute heights. + + Returns a (n_time, 1) array when the model provides a per-timestep + surface field, otherwise the scalar ``alt_site`` fallback. Both shapes + broadcast against (n_time, n_height) model arrays. + """ + if "sfc_height" in dataset.variables: + return dataset.variables["sfc_height"][:][:, np.newaxis] + if "sfc_geopotential" in dataset.variables: + return dataset.variables["sfc_geopotential"][:][:, np.newaxis] / G + return alt_site + + def is_timestamp(timestamp: str) -> bool: """Tests if the input string is formatted as -yyyy-mm-dd hh:mm:ss.""" reg_exp = re.compile(r"-\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}") From a30bc979da2e401f1ee5498bc509bb1f69164270 Mon Sep 17 00:00:00 2001 From: Simo Tukiainen Date: Wed, 6 May 2026 15:16:15 +0300 Subject: [PATCH 6/7] Remove local prods --- cloudnetpy/cli.py | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/cloudnetpy/cli.py b/cloudnetpy/cli.py index 79c22854..eecf282a 100644 --- a/cloudnetpy/cli.py +++ b/cloudnetpy/cli.py @@ -29,16 +29,6 @@ cloudnet_api_url: Final = "https://cloudnet.fmi.fi/api/" -# Products implemented in cloudnetpy that are not (yet) registered in the -# Cloudnet API. The CLI handles them with explicit branches below since the -# API-driven dispatch (source_product_ids, source_instrument_ids) doesn't -# know about them. -LOCAL_PRODUCTS: Final = frozenset({"epsilon-radar"}) - -# Default plotted variables for products that aren't yet known to the API's -# /products/variables endpoint. -LOCAL_PRODUCT_VARS: Final = {"epsilon-radar": ["epsilon"]} - def run(args: argparse.Namespace, tmpdir: str, client: APIClient) -> None: cat_files = {} @@ -398,8 +388,6 @@ def _get_source_instruments( source_instruments = {} for product in products: prod, model = _parse_instrument(product) - if prod in LOCAL_PRODUCTS: - continue if model is None: pref = instrument_prefs.get(prod) if pref is not None and not _is_pid(pref): @@ -428,8 +416,6 @@ def _get_product_sources( source_products = {} for product in products: prod, _ = _parse_instrument(product) - if prod in LOCAL_PRODUCTS: - continue product_obj = client.product(prod) if product_obj.source_product_ids: source_products[prod] = list(product_obj.source_product_ids) @@ -587,8 +573,6 @@ def _plot( return if args.variables is not None: variables = args.variables.split(",") - elif product in LOCAL_PRODUCT_VARS: - variables = LOCAL_PRODUCT_VARS[product] else: res = requests.get(f"{cloudnet_api_url}products/variables", timeout=60) res.raise_for_status() @@ -633,7 +617,7 @@ def _process_mwrpy_product( def _parse_products(product_argument: str, client: APIClient) -> list[str]: products = product_argument.split(",") - valid_options = {p.id for p in client.products()} | LOCAL_PRODUCTS + valid_options = [p.id for p in client.products()] valid_products = [] for product in products: prod, _ = _parse_instrument(product) From 02773957387a5dcb5d33049d8e63b60cc63635a4 Mon Sep 17 00:00:00 2001 From: Simo Tukiainen Date: Wed, 6 May 2026 15:19:53 +0300 Subject: [PATCH 7/7] Fix error colormap --- cloudnetpy/plotting/plot_meta.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cloudnetpy/plotting/plot_meta.py b/cloudnetpy/plotting/plot_meta.py index 56234d35..375687a4 100644 --- a/cloudnetpy/plotting/plot_meta.py +++ b/cloudnetpy/plotting/plot_meta.py @@ -667,5 +667,10 @@ class PlotMeta(NamedTuple): plot_range=(1e-7, 1e-1), log_scale=True, ), + "epsilon_error": PlotMeta( + cmap="inferno", + plot_range=(1e-7, 1e-1), + log_scale=True, + ), }, }