Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ dependencies = [
"refnx",
"refl1d>=1.0.0rc0",
"orsopy",
"svglib<1.6 ; platform_system=='Linux'",
"xhtml2pdf",
"bumps",
]
Expand Down
109 changes: 103 additions & 6 deletions src/easyreflectometry/data/measurement.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
__author__ = 'github.com/arm61'

import os
import warnings
from typing import TextIO
from typing import Union

Expand Down Expand Up @@ -31,11 +32,38 @@ def load_as_dataset(fname: Union[TextIO, str]) -> DataSet1D:
coords_name = 'Qz_' + basename
coords_name = list(data_group['coords'].keys())[0] if coords_name not in data_group['coords'] else coords_name
data_name = list(data_group['data'].keys())[0] if data_name not in data_group['data'] else data_name

# Get the data arrays
x_vals = data_group['coords'][coords_name].values
y_vals = data_group['data'][data_name].values
ye_vals = data_group['data'][data_name].variances
xe_vals = data_group['coords'][coords_name].variances

# Check for zero variances and remove those points
# Only check ye_vals for zero if xe_vals was all zeros (3-column file)
if np.all(xe_vals == 0.0):
Comment thread
rozyczko marked this conversation as resolved.
Outdated
zero_variance_mask = (ye_vals == 0.0)
else:
zero_variance_mask = (ye_vals == 0.0) | (xe_vals == 0.0)
num_zero_variance = np.sum(zero_variance_mask)

if num_zero_variance > 0:
warnings.warn(
f"Removed {num_zero_variance} data point(s) from '{fname}' due to zero variance.",
UserWarning
)
# Keep only points with non-zero variances
valid_mask = ~zero_variance_mask
x_vals = x_vals[valid_mask]
y_vals = y_vals[valid_mask]
ye_vals = ye_vals[valid_mask]
xe_vals = xe_vals[valid_mask]

return DataSet1D(
x=data_group['coords'][coords_name].values,
y=data_group['data'][data_name].values,
ye=data_group['data'][data_name].variances,
xe=data_group['coords'][coords_name].variances,
x=x_vals,
y=y_vals,
ye=ye_vals,
xe=xe_vals,
)


Expand Down Expand Up @@ -72,6 +100,50 @@ def _load_orso(fname: Union[TextIO, str]) -> sc.DataGroup:
variances=np.square(o.data[:, 2]),
)
attrs[f'R_{name}'] = {'orso_header': sc.scalar(Header.asdict(o.info))}

# Check for zero variances and remove those points
total_removed = 0
for key in list(data.keys()):
data_variances = data[key].variances
# Find the corresponding coordinate key for this data
coord_key = None
for coord_name in coords.keys():
if data[key].dims == coords[coord_name].dims:
coord_key = coord_name
break

if coord_key is None:
continue # Skip if no matching coordinate found

coord_variances = coords[coord_key].variances

# Find points with zero variances
zero_variance_mask = (data_variances == 0.0) | (coord_variances == 0.0)
num_zero_variance = np.sum(zero_variance_mask)
total_removed += num_zero_variance

if num_zero_variance > 0:
# Keep only points with non-zero variances
valid_mask = ~zero_variance_mask
data[key] = sc.array(
dims=data[key].dims,
values=data[key].values[valid_mask],
variances=data[key].variances[valid_mask],
unit=data[key].unit if hasattr(data[key], 'unit') else None
)
coords[coord_key] = sc.array(
dims=coords[coord_key].dims,
values=coords[coord_key].values[valid_mask],
variances=coords[coord_key].variances[valid_mask],
unit=coords[coord_key].unit
)

if total_removed > 0:
warnings.warn(
f"Removed {total_removed} data point(s) from '{fname}' due to zero variance.",
UserWarning
)

return sc.DataGroup(data=data, coords=coords, attrs=attrs)


Expand Down Expand Up @@ -120,12 +192,37 @@ def _load_txt(fname: Union[TextIO, str]) -> sc.DataGroup:

data_name = 'R_' + basename
coords_name = 'Qz_' + basename
data = {data_name: sc.array(dims=[coords_name], values=y, variances=np.square(e))}

# Calculate variances
y_variances = np.square(e)
x_variances = np.square(xe)

# Check for zero variances and remove those points
# Only check y_variances for zero if xe was all zeros (3-column file)
if np.all(xe == 0.0):
zero_variance_mask = (y_variances == 0.0)
else:
zero_variance_mask = (y_variances == 0.0) | (x_variances == 0.0)
num_zero_variance = np.sum(zero_variance_mask)

if num_zero_variance > 0:
warnings.warn(
f"Removed {num_zero_variance} data point(s) from '{fname}' due to zero variance.",
UserWarning
)
# Keep only points with non-zero variances
valid_mask = ~zero_variance_mask
x = x[valid_mask]
y = y[valid_mask]
y_variances = y_variances[valid_mask]
x_variances = x_variances[valid_mask]

data = {data_name: sc.array(dims=[coords_name], values=y, variances=y_variances)}
coords = {
data[data_name].dims[0]: sc.array(
dims=[coords_name],
values=x,
variances=np.square(xe),
variances=x_variances,
unit=sc.Unit('1/angstrom'),
)
}
Expand Down
193 changes: 193 additions & 0 deletions tests/_static/ref_zero_var.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,193 @@
# q(A-1), ref, err
7.6135e-03, 9.7447e-01, 2.5462e-02
7.7658e-03, 1.0139e+00, 2.5015e-02
7.9211e-03, 1.0429e+00, 2.4555e-02
8.0796e-03, 9.7839e-01, 2.2475e-02
8.2411e-03, 9.7563e-01, 2.1450e-02
8.4060e-03, 9.8295e-01, 2.0815e-02
8.5741e-03, 1.0347e+00, 2.0846e-02
8.7456e-03, 9.9588e-01, 1.9555e-02
8.9205e-03, 9.8956e-01, 1.8711e-02
9.0989e-03, 1.0045e+00, 1.8276e-02
9.2809e-03, 9.8197e-01, 1.7210e-02
9.4665e-03, 9.9510e-01, 1.6929e-02
9.6558e-03, 9.9095e-01, 1.6384e-02
9.8489e-03, 9.5620e-01, 1.5578e-02
1.0046e-02, 7.4635e-01, 1.2730e-02
1.0247e-02, 5.3653e-01, 1.0084e-02
1.0452e-02, 3.4076e-01, 7.5298e-03
1.0661e-02, 2.1115e-01, 5.5973e-03
1.0874e-02, 1.5915e-01, 4.6622e-03
1.1091e-02, 1.2474e-01, 3.9928e-03
1.1313e-02, 1.0077e-01, 3.4676e-03
1.1540e-02, 6.7939e-02, 2.7482e-03
1.1770e-02, 6.1736e-02, 2.5467e-03
1.2006e-02, 4.8561e-02, 2.2045e-03
1.2246e-02, 4.1062e-02, 1.9733e-03
1.2491e-02, 3.2515e-02, 1.7031e-03
1.2741e-02, 2.7232e-02, 1.5269e-03
1.2995e-02, 2.2746e-02, 1.3521e-03
1.3255e-02, 1.7792e-02, 1.1778e-03
1.3520e-02, 1.2697e-02, 9.7116e-04
1.3791e-02, 1.2313e-02, 9.4167e-04
1.4067e-02, 1.0335e-02, 8.7240e-04
1.4348e-02, 7.9747e-03, 7.5829e-04
1.4635e-02, 7.9688e-03, 7.5098e-04
1.4928e-02, 5.5187e-03, 6.3000e-04
1.5226e-02, 4.3436e-03, 5.7090e-04
1.5531e-02, 3.7503e-03, 5.3663e-04
1.5841e-02, 3.4079e-03, 5.2009e-04
1.6158e-02, 1.5589e-03, 3.6834e-04
1.6481e-02, 1.8615e-03, 3.9704e-04
1.6811e-02, 8.4691e-04, 2.5540e-04
1.7147e-02, 1.2625e-03, 3.0629e-04
1.7490e-02, 1.0137e-03, 2.7099e-04
1.7840e-02, 6.0481e-04, 2.0163e-04
1.8197e-02, 7.6578e-04, 2.2110e-04
1.8561e-02, 9.3992e-04, 2.4274e-04
1.8932e-02, 4.2138e-04, 1.5928e-04
1.9311e-02, 4.8892e-04, 3.9837e-05
1.9697e-02, 4.4982e-04, 3.6981e-05
2.0091e-02, 4.3653e-04, 3.5081e-05
2.0493e-02, 3.9129e-04, 3.1923e-05
2.0902e-02, 4.5988e-04, 3.3398e-05
2.1320e-02, 4.7029e-04, 3.2732e-05
2.1747e-02, 4.1789e-04, 2.9339e-05
2.2182e-02, 5.5580e-04, 3.3223e-05
2.2625e-02, 5.5040e-04, 3.2084e-05
2.3078e-02, 5.9884e-04, 3.2446e-05
2.3539e-02, 5.8326e-04, 3.0862e-05
2.4010e-02, 6.4411e-04, 3.1161e-05
2.4490e-02, 6.0736e-04, 2.9565e-05
2.4980e-02, 6.7219e-04, 3.0222e-05
2.5480e-02, 7.6819e-04, 3.1418e-05
2.5989e-02, 7.7232e-04, 3.0737e-05
2.6509e-02, 7.4941e-04, 2.9341e-05
2.7039e-02, 7.2715e-04, 2.7959e-05
2.7580e-02, 8.1394e-04, 2.9100e-05
2.8132e-02, 7.9111e-04, 2.7939e-05
2.8694e-02, 7.7717e-04, 2.6943e-05
2.9268e-02, 7.7420e-04, 2.6248e-05
2.9854e-02, 7.4343e-04, 2.5134e-05
3.0451e-02, 7.6124e-04, 2.4730e-05
3.1060e-02, 7.9219e-04, 2.4832e-05
3.1681e-02, 7.3167e-04, 2.3434e-05
3.2315e-02, 6.7479e-04, 2.2047e-05
3.2961e-02, 6.4508e-04, 2.1919e-05
3.3620e-02, 6.0626e-04, 2.0950e-05
3.4293e-02, 6.0048e-04, 2.0702e-05
3.4978e-02, 5.4601e-04, 1.9966e-05
3.5678e-02, 5.1839e-04, 1.9762e-05
3.6392e-02, 4.3221e-04, 1.8272e-05
3.7119e-02, 4.7017e-04, 1.9439e-05
3.7862e-02, 4.1933e-04, 1.9311e-05
3.8619e-02, 3.6578e-04, 1.7446e-05
3.9391e-02, 2.9619e-04, 1.4991e-05
4.0179e-02, 2.6859e-04, 1.4041e-05
4.0983e-02, 2.1978e-04, 1.2514e-05
4.1802e-02, 1.5957e-04, 6.1469e-06
4.2638e-02, 1.4973e-04, 5.6957e-06
4.3491e-02, 1.3650e-04, 5.1658e-06
4.4361e-02, 1.0894e-04, 4.3978e-06
4.5248e-02, 8.4313e-05, 3.6870e-06
4.6153e-02, 6.6209e-05, 3.1442e-06
4.7076e-02, 4.7721e-05, 2.5218e-06
4.8018e-02, 3.8550e-05, 2.1951e-06
4.8978e-02, 2.4256e-05, 1.6738e-06
4.9958e-02, 1.2668e-05, 1.1440e-06
5.0957e-02, 9.8042e-06, 9.8695e-07
5.1976e-02, 5.5653e-06, 7.2030e-07
5.3016e-02, 4.6001e-06, 6.3347e-07
5.4076e-02, 4.5340e-06, 6.0180e-07
5.5157e-02, 6.0764e-06, 6.7701e-07
5.6261e-02, 1.0837e-05, 8.8046e-07
5.7386e-02, 1.5080e-05, 1.0136e-06
5.8534e-02, 1.8215e-05, 1.0789e-06
5.9704e-02, 2.4590e-05, 1.2253e-06
6.0898e-02, 2.9010e-05, 1.3040e-06
6.2116e-02, 3.4848e-05, 1.3756e-06
6.3359e-02, 3.7815e-05, 1.4164e-06
6.4626e-02, 3.6734e-05, 1.3531e-06
6.5918e-02, 3.8991e-05, 1.3649e-06
6.7237e-02, 3.7503e-05, 1.3006e-06
6.8581e-02, 3.7938e-05, 1.2808e-06
6.9953e-02, 3.5174e-05, 1.1989e-06
7.1352e-02, 3.1424e-05, 1.1148e-06
7.2779e-02, 2.5972e-05, 9.9229e-07
7.4235e-02, 2.1881e-05, 8.9970e-07
7.5719e-02, 1.8255e-05, 8.3013e-07
7.7234e-02, 1.3071e-05, 6.9096e-07
7.8778e-02, 9.7935e-06, 5.9432e-07
8.0354e-02, 6.2634e-06, 4.8657e-07
8.1961e-02, 2.9788e-06, 3.3668e-07
8.3600e-02, 2.0870e-06, 2.8795e-07
8.5272e-02, 1.4029e-06, 2.4111e-07
8.6978e-02, 1.1454e-06, 2.2760e-07
8.8717e-02, 2.3790e-06, 3.1299e-07
9.0492e-02, 3.0687e-06, 3.4127e-07
9.2301e-02, 5.3517e-06, 4.4876e-07
9.4147e-02, 6.2487e-06, 4.7367e-07
9.6030e-02, 7.6304e-06, 5.0400e-07
9.7951e-02, 9.0528e-06, 5.4178e-07
9.9910e-02, 9.6743e-06, 5.5215e-07
1.0191e-01, 9.6967e-06, 5.4148e-07
1.0395e-01, 8.7491e-06, 5.0450e-07
1.0603e-01, 7.5989e-06, 4.5372e-07
1.0815e-01, 5.2862e-06, 3.6156e-07
1.1031e-01, 3.8920e-06, 2.9952e-07
1.1251e-01, 2.9870e-06, 2.8059e-07
1.1477e-01, 1.6669e-06, 1.8473e-07
1.1706e-01, 8.6049e-07, 1.2721e-07
1.1940e-01, 8.7034e-07, 1.2368e-07
1.2179e-01, 1.0986e-06, 1.3499e-07
1.2423e-01, 1.9180e-06, 1.7343e-07
1.2671e-01, 2.1959e-06, 1.8012e-07
1.2924e-01, 3.1488e-06, 2.1032e-07
1.3183e-01, 3.6948e-06, 2.2170e-07
1.3447e-01, 3.8395e-06, 2.2005e-07
1.3716e-01, 3.5445e-06, 2.0548e-07
1.3990e-01, 2.8194e-06, 1.7880e-07
1.4270e-01, 2.1205e-06, 1.5114e-07
1.4555e-01, 1.4023e-06, 1.1952e-07
1.4846e-01, 1.0245e-06, 9.9819e-08
1.5143e-01, 6.8465e-07, 7.9165e-08
1.5446e-01, 9.5744e-07, 9.2363e-08
1.5755e-01, 1.2631e-06, 1.0379e-07
1.6070e-01, 1.4797e-06, 1.1061e-07
1.6391e-01, 2.0772e-06, 1.3256e-07
1.6719e-01, 1.8731e-06, 1.2463e-07
1.7054e-01, 1.6925e-06, 1.1733e-07
1.7395e-01, 1.4556e-06, 1.0970e-07
1.7742e-01, 1.1049e-06, 9.7703e-08
1.8097e-01, 6.5186e-07, 7.5546e-08
1.8459e-01, 5.9841e-07, 7.3744e-08
1.8828e-01, 8.0107e-07, 8.9328e-08
1.9205e-01, 1.0381e-06, 1.0071e-07
1.9589e-01, 1.1199e-06, 9.9764e-08
1.9981e-01, 1.2928e-06, 1.0516e-07
2.0381e-01, 8.6691e-07, 8.5084e-08
2.0788e-01, 7.0954e-07, 7.4143e-08
2.1204e-01, 5.3766e-07, 6.2839e-08
2.1628e-01, 5.7456e-07, 6.4332e-08
2.2061e-01, 7.8023e-07, 0.0
2.2502e-01, 8.3610e-07, 7.4847e-08
2.2952e-01, 9.8054e-07, 7.9847e-08
2.3411e-01, 6.9297e-07, 6.5946e-08
2.3879e-01, 8.1531e-07, 7.0975e-08
2.4357e-01, 5.4552e-07, 5.7255e-08
2.4844e-01, 6.1100e-07, 5.9889e-08
2.5341e-01, 5.1126e-07, 5.4720e-08
2.5847e-01, 7.3334e-07, 6.4703e-08
2.6364e-01, 6.5027e-07, 0.0
2.6892e-01, 7.0689e-07, 6.6486e-08
2.7430e-01, 6.6355e-07, 6.3295e-08
2.7978e-01, 6.1870e-07, 6.0932e-08
2.8538e-01, 4.6847e-07, 0.0
2.9108e-01, 5.1032e-07, 5.5877e-08
2.9691e-01, 4.6251e-07, 5.3567e-08
3.0284e-01, 3.6602e-07, 4.8650e-08
3.0890e-01, 4.0211e-07, 5.2733e-08
3.1508e-01, 3.6333e-07, 5.3747e-08
3.2138e-01, 3.1715e-07, 0.0
3.2781e-01, 3.4336e-07, 0.0
3.3436e-01, 3.2231e-07, 0.0
45 changes: 45 additions & 0 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from easyreflectometry.data.measurement import _load_orso
from easyreflectometry.data.measurement import _load_txt
from easyreflectometry.data.measurement import load
from easyreflectometry.data.measurement import load_as_dataset

PATH_STATIC = os.path.join(os.path.dirname(easyreflectometry.__file__), '..', '..', 'tests', '_static')

Expand Down Expand Up @@ -103,3 +104,47 @@ def test_txt(self):
assert_almost_equal(er_data['coords'][coords_name].values, n_data[:, 0])
assert_almost_equal(er_data['data'][data_name].variances, np.square(n_data[:, 2]))
assert_almost_equal(er_data['coords'][coords_name].variances, np.square(n_data[:, 3]))

def test_zero_variance_filtering(self):
"""Test that data points with zero variance are properly filtered out."""
import warnings

fpath = os.path.join(PATH_STATIC, 'ref_zero_var.txt')

# Load original data to verify expectations
original_data = np.loadtxt(fpath, delimiter=',', comments='#')
original_count = len(original_data)
zero_variance_count = np.sum(original_data[:, 2] == 0.0)
expected_filtered_count = original_count - zero_variance_count

# Test with warnings captured
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
dataset = load_as_dataset(fpath)

# Check that a warning about removed points was issued
warning_messages = [str(warning.message) for warning in w]
zero_variance_warning = None
for msg in warning_messages:
if f"Removed {zero_variance_count} data point(s)" in msg:
zero_variance_warning = msg
break
self.assertIsNotNone(zero_variance_warning, f"Expected warning about removed points not found in: {warning_messages}")

# Verify the correct number of points were filtered
self.assertEqual(len(dataset.x), expected_filtered_count)
self.assertEqual(len(dataset.y), expected_filtered_count)
self.assertEqual(len(dataset.ye), expected_filtered_count)

# Verify no zero variances remain in the filtered data
self.assertTrue(np.all(dataset.ye > 0.0))

# Verify the filtered data matches expected values (non-zero variance points)
non_zero_mask = original_data[:, 2] != 0.0
expected_x = original_data[non_zero_mask, 0]
expected_y = original_data[non_zero_mask, 1]
expected_ye = np.square(original_data[non_zero_mask, 2])

assert_almost_equal(dataset.x, expected_x)
assert_almost_equal(dataset.y, expected_y)
assert_almost_equal(dataset.ye, expected_ye)
Loading