From f6b9b0c7dbeedb376e3d0d2948f28c5daa0bb239 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 25 Sep 2025 14:23:18 +0200 Subject: [PATCH 1/5] Update DataConverter --- flixopt/core.py | 395 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 268 insertions(+), 127 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 08171c4cb..1bd6fe52c 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -5,6 +5,7 @@ import logging import warnings +from itertools import permutations from typing import Literal, Union import numpy as np @@ -160,271 +161,411 @@ def agg_weight(self): class DataConverter: """ - Converts data into xarray.DataArray with specified coordinates. - - Supports: - - Scalars (broadcast to all dimensions) - - 1D data (np.ndarray, pd.Series, single-column DataFrame) - - Multi-dimensional arrays - - xr.DataArray (validated and potentially broadcast) - - Simple 1D data is matched to one dimension and broadcast to others. - DataArrays can have any number of dimensions. + Converts various data types into xarray.DataArray with specified target coordinates. + + This converter handles intelligent dimension matching and broadcasting to ensure + the output DataArray always conforms to the specified coordinate structure. + + Supported input types: + - Scalars: int, float, np.number (broadcast to all target dimensions) + - 1D data: np.ndarray, pd.Series, single-column DataFrame (matched by length/index) + - Multi-dimensional arrays: np.ndarray, DataFrame (matched by shape) + - xr.DataArray: validated and potentially broadcast to target dimensions + + The converter uses smart matching strategies: + - Series: matched by exact index comparison + - 1D arrays: matched by length to target coordinates + - Multi-dimensional arrays: matched by shape permutation analysis + - DataArrays: validated for compatibility and broadcast as needed """ @staticmethod - def _match_series_to_dimension( - data: pd.Series, coords: dict[str, pd.Index], target_dims: tuple[str, ...] + def _match_series_by_index_alignment( + data: pd.Series, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...] ) -> xr.DataArray: """ - Match pandas Series to a dimension by comparing its index to coordinates. + Match pandas Series to target dimension by exact index comparison. + + Attempts to find a target dimension whose coordinates exactly match + the Series index values, ensuring proper alignment. Args: - data: pandas Series - coords: Available coordinates - target_dims: Target dimension names + data: pandas Series to convert + target_coords: Available target coordinates {dim_name: coordinate_index} + target_dims: Target dimension names to consider for matching Returns: - DataArray with the Series matched to appropriate dimension by index + DataArray with Series matched to the appropriate dimension Raises: - ConversionError: If Series index doesn't match any target dimension coordinates + ConversionError: If Series cannot be matched to any target dimension, + or if no target dimensions provided for multi-element Series """ + # Handle edge case: no target dimensions if len(target_dims) == 0: if len(data) != 1: raise ConversionError( - f'Cannot convert multi-element Series without target dimensions. Got \n{data}\n and \n{coords}' + f'Cannot convert multi-element Series without target dimensions. ' + f'Series has {len(data)} elements but no target dimensions specified.' ) return xr.DataArray(data.iloc[0]) - # Try to match Series index to coordinates + # Attempt exact index matching with each target dimension for dim_name in target_dims: - if data.index.equals(coords[dim_name]): - return xr.DataArray(data.values.copy(), coords={dim_name: coords[dim_name]}, dims=dim_name) - - # If no index matches, raise error - raise ConversionError(f'Series index does not match any target dimension coordinates: {target_dims}') + target_index = target_coords[dim_name] + if data.index.equals(target_index): + return xr.DataArray(data.values.copy(), coords={dim_name: target_index}, dims=dim_name) + + # No exact matches found + available_lengths = {dim: len(target_coords[dim]) for dim in target_dims} + raise ConversionError( + f'Series index does not match any target dimension coordinates. ' + f'Series length: {len(data)}, available coordinate lengths: {available_lengths}' + ) @staticmethod - def _match_array_to_dimension( - data: np.ndarray, coords: dict[str, pd.Index], target_dims: tuple[str, ...] + def _match_1d_array_by_length( + data: np.ndarray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...] ) -> xr.DataArray: """ - Match 1D numpy array to a dimension by comparing its length to coordinate lengths. + Match 1D numpy array to target dimension by length comparison. + + Finds target dimensions whose coordinate length matches the array length. + Requires unique length match to avoid ambiguity. Args: - data: 1D numpy array - coords: Available coordinates - target_dims: Target dimension names + data: 1D numpy array to convert + target_coords: Available target coordinates {dim_name: coordinate_index} + target_dims: Target dimension names to consider for matching Returns: - DataArray with the array matched to appropriate dimension by length + DataArray with array matched to the uniquely identified dimension Raises: - ConversionError: If array length doesn't uniquely match a target dimension + ConversionError: If array length matches zero or multiple target dimensions, + or if no target dimensions provided for multi-element array """ + # Handle edge case: no target dimensions if len(target_dims) == 0: if len(data) != 1: - raise ConversionError('Cannot convert multi-element array without target dimensions') + raise ConversionError( + f'Cannot convert multi-element array without target dimensions. Array has {len(data)} elements.' + ) return xr.DataArray(data[0]) - # Find dimensions with matching lengths + # Find all dimensions with matching lengths + array_length = len(data) matching_dims = [] + coordinate_lengths = {} + for dim_name in target_dims: - if len(data) == len(coords[dim_name]): + coord_length = len(target_coords[dim_name]) + coordinate_lengths[dim_name] = coord_length + if array_length == coord_length: matching_dims.append(dim_name) + # Validate matching results if len(matching_dims) == 0: - dim_info = {dim: len(coords[dim]) for dim in target_dims} - raise ConversionError(f'Array length {len(data)} matches none of the target dimensions: {dim_info}') + raise ConversionError( + f'Array length {array_length} does not match any target dimension lengths: {coordinate_lengths}' + ) elif len(matching_dims) > 1: raise ConversionError( - f'Array length {len(data)} matches multiple dimensions: {matching_dims}. Cannot determine which ' - f'dimension to use. To avoid this error, convert the array to a DataArray with the correct dimensions ' - f'yourself.' + f'Array length {array_length} matches multiple dimensions: {matching_dims}. ' + f'Cannot uniquely determine target dimension. Consider using explicit ' + f'dimension specification or converting to DataArray manually.' ) - # Match to the single matching dimension - match_dim = matching_dims[0] - return xr.DataArray(data.copy(), coords={match_dim: coords[match_dim]}, dims=match_dim) + # Create DataArray with the uniquely matched dimension + matched_dim = matching_dims[0] + return xr.DataArray(data.copy(), coords={matched_dim: target_coords[matched_dim]}, dims=matched_dim) @staticmethod - def _match_multidim_array_to_dimensions( - data: np.ndarray, coords: dict[str, pd.Index], target_dims: tuple[str, ...] + def _match_multidim_array_by_shape_permutation( + data: np.ndarray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...] ) -> xr.DataArray: """ - Match multi-dimensional numpy array to dimensions by finding the correct shape permutation. + Match multi-dimensional array to target dimensions using shape permutation analysis. + + Analyzes all possible mappings between array shape and target coordinate lengths + to find the unique valid dimension assignment. Args: - data: Multi-dimensional numpy array - coords: Available coordinates - target_dims: Target dimension names + data: Multi-dimensional numpy array to convert + target_coords: Available target coordinates {dim_name: coordinate_index} + target_dims: Target dimension names to consider for matching Returns: - DataArray with dimensions matched by shape + DataArray with array dimensions mapped to target dimensions by shape Raises: - ConversionError: If array dimensions cannot be uniquely matched to coordinates + ConversionError: If array shape cannot be uniquely mapped to target dimensions, + or if no target dimensions provided for multi-element array """ + # Handle edge case: no target dimensions if len(target_dims) == 0: if data.size != 1: - raise ConversionError('Cannot convert multi-element array without target dimensions') + raise ConversionError( + f'Cannot convert multi-element array without target dimensions. ' + f'Array has {data.size} elements with shape {data.shape}.' + ) return xr.DataArray(data.item()) - from itertools import permutations - array_shape = data.shape - coord_lengths = {dim: len(coords[dim]) for dim in target_dims} + coordinate_lengths = {dim: len(target_coords[dim]) for dim in target_dims} - # Find all possible dimension mappings - possible_mappings = [] - for dim_subset in permutations(target_dims, data.ndim): - if all(array_shape[i] == coord_lengths[dim_subset[i]] for i in range(len(dim_subset))): - possible_mappings.append(dim_subset) + # Find all valid dimension permutations that match the array shape + valid_mappings = [] + for dim_permutation in permutations(target_dims, data.ndim): + shape_matches = all( + array_shape[i] == coordinate_lengths[dim_permutation[i]] for i in range(len(dim_permutation)) + ) + if shape_matches: + valid_mappings.append(dim_permutation) - if len(possible_mappings) == 0: - shape_info = f'Array shape: {array_shape}, Coordinate lengths: {coord_lengths}' - raise ConversionError(f'Array dimensions do not match any coordinate lengths. {shape_info}') + # Validate mapping results + if len(valid_mappings) == 0: + raise ConversionError( + f'Array shape {array_shape} cannot be mapped to any combination of target ' + f'coordinate lengths: {coordinate_lengths}. Consider reshaping the array ' + f'or adjusting target coordinates.' + ) - if len(possible_mappings) > 1: + if len(valid_mappings) > 1: raise ConversionError( - f'Array shape {array_shape} matches multiple dimension orders: {possible_mappings}. ' - 'Cannot uniquely determine dimension mapping.' + f'Array shape {array_shape} matches multiple dimension combinations: ' + f'{valid_mappings}. Cannot uniquely determine dimension mapping. ' + f'Consider using explicit dimension specification.' ) - matched_dims = possible_mappings[0] - matched_coords = {dim: coords[dim] for dim in matched_dims} + # Create DataArray with the uniquely determined mapping + matched_dims = valid_mappings[0] + matched_coords = {dim: target_coords[dim] for dim in matched_dims} return xr.DataArray(data.copy(), coords=matched_coords, dims=matched_dims) @staticmethod - def _broadcast_to_target( - data: xr.DataArray, coords: dict[str, pd.Index], target_dims: tuple[str, ...] + def _broadcast_dataarray_to_target_specification( + source_data: xr.DataArray, target_coords: dict[str, pd.Index], target_dims: tuple[str, ...] ) -> xr.DataArray: """ - Broadcast DataArray to target dimensions with validation. + Broadcast DataArray to conform to target coordinate and dimension specification. + + Performs comprehensive validation and broadcasting to ensure the result exactly + matches the target specification. Handles scalar expansion, dimension validation, + coordinate compatibility checking, and broadcasting to additional dimensions. + + Args: + source_data: Source DataArray to broadcast + target_coords: Target coordinates {dim_name: coordinate_index} + target_dims: Target dimension names in desired order + + Returns: + DataArray broadcast to target specification with proper dimension ordering - Handles all cases: scalar expansion, dimension validation, coordinate matching, - and broadcasting to additional dimensions using xarray's capabilities. + Raises: + ConversionError: If broadcasting is impossible due to incompatible dimensions + or coordinate mismatches """ - # Cannot reduce dimensions of data - if len(data.dims) > len(target_dims): - raise ConversionError(f'Cannot reduce DataArray from {len(data.dims)} to {len(target_dims)} dimensions') + # Validate: cannot reduce dimensions + if len(source_data.dims) > len(target_dims): + raise ConversionError( + f'Cannot reduce DataArray dimensionality from {len(source_data.dims)} ' + f'to {len(target_dims)} dimensions. Source dims: {source_data.dims}, ' + f'target dims: {target_dims}' + ) - # Validate coordinate compatibility - for dim in data.dims: - if dim not in target_dims: - raise ConversionError(f'Source dimension "{dim}" not found in target dimensions {target_dims}') + # Validate: all source dimensions must exist in target + missing_dims = set(source_data.dims) - set(target_dims) + if missing_dims: + raise ConversionError( + f'Source DataArray has dimensions {missing_dims} not present in target dimensions {target_dims}' + ) - if not np.array_equal(data.coords[dim].values, coords[dim].values): - raise ConversionError(f'DataArray {dim} coordinates do not match target coordinates') + # Validate: coordinate compatibility for overlapping dimensions + for dim in source_data.dims: + if dim in source_data.coords and dim in target_coords: + source_coords = source_data.coords[dim] + target_coords_for_dim = target_coords[dim] - # Use xarray's broadcast_like for efficient expansion and broadcasting - target_template = xr.DataArray( - np.empty([len(coords[dim]) for dim in target_dims]), coords=coords, dims=target_dims - ) - return data.broadcast_like(target_template).transpose(*target_dims) + if not np.array_equal(source_coords.values, target_coords_for_dim.values): + raise ConversionError( + f'Coordinate mismatch for dimension "{dim}". ' + f'Source and target coordinates have different values.' + ) + + # Create target template for broadcasting + target_shape = [len(target_coords[dim]) for dim in target_dims] + target_template = xr.DataArray(np.empty(target_shape), coords=target_coords, dims=target_dims) + + # Perform broadcasting and ensure proper dimension ordering + broadcasted = source_data.broadcast_like(target_template) + return broadcasted.transpose(*target_dims) @classmethod def to_dataarray( cls, - data: float | int | np.ndarray | pd.Series | pd.DataFrame | xr.DataArray, + data: int | float | np.integer | np.floating | np.ndarray | pd.Series | pd.DataFrame | xr.DataArray, coords: dict[str, pd.Index] | None = None, ) -> xr.DataArray: """ - Convert various data types to xarray.DataArray with specified coordinates. + Convert various data types to xarray.DataArray with specified target coordinates. + + This is the main conversion method that intelligently handles different input types + and ensures the result conforms to the specified coordinate structure through + smart dimension matching and broadcasting. Args: - data: Data to convert (scalar, array, Series, DataFrame, or DataArray) - coords: Dictionary mapping dimension names to coordinate indices + data: Input data to convert. Supported types: + - Scalars: int, float, np.integer, np.floating + - Arrays: np.ndarray (1D and multi-dimensional) + - Pandas: pd.Series, pd.DataFrame + - xarray: xr.DataArray + coords: Target coordinate specification as {dimension_name: coordinate_index}. + All coordinate indices must be pandas.Index objects. Returns: - DataArray with the converted data broadcast to target dimensions + DataArray conforming to the target coordinate specification, + with input data appropriately matched and broadcast Raises: - ConversionError: If data cannot be converted or dimensions are ambiguous + ConversionError: If data type is unsupported, conversion fails, + or broadcasting to target coordinates is impossible + + Examples: + # Scalar broadcasting + >>> coords = {'x': pd.Index([1, 2, 3]), 'y': pd.Index(['a', 'b'])} + >>> converter.to_dataarray(42, coords) + # Returns: DataArray with shape (3, 2), all values = 42 + + # Series index matching + >>> series = pd.Series([10, 20, 30], index=[1, 2, 3]) + >>> converter.to_dataarray(series, coords) + # Returns: DataArray matched to 'x' dimension, broadcast to 'y' + + # Array shape matching + >>> array = np.array([[1, 2], [3, 4], [5, 6]]) # Shape (3, 2) + >>> converter.to_dataarray(array, coords) + # Returns: DataArray with dimensions ('x', 'y') based on shape """ + # Prepare and validate target specification if coords is None: coords = {} - validated_coords, target_dims = cls._prepare_coordinates(coords) + validated_coords, target_dims = cls._validate_and_prepare_target_coordinates(coords) - # Step 1: Convert input data to initial DataArray + # Convert input data to intermediate DataArray based on type if isinstance(data, (int, float, np.integer, np.floating)): - # Scalar values + # Scalar values - create scalar DataArray intermediate = xr.DataArray(data.item() if hasattr(data, 'item') else data) elif isinstance(data, np.ndarray): - if data.ndim == 1: - intermediate = cls._match_array_to_dimension(data, validated_coords, target_dims) + # NumPy arrays - dispatch based on dimensionality + if data.ndim == 0: + # 0-dimensional array (scalar) + intermediate = xr.DataArray(data.item()) + elif data.ndim == 1: + # 1-dimensional array + intermediate = cls._match_1d_array_by_length(data, validated_coords, target_dims) else: - intermediate = cls._match_multidim_array_to_dimensions(data, validated_coords, target_dims) + # Multi-dimensional array + intermediate = cls._match_multidim_array_by_shape_permutation(data, validated_coords, target_dims) elif isinstance(data, pd.Series): + # Pandas Series - validate and match by index if isinstance(data.index, pd.MultiIndex): - raise ConversionError( - 'Series index must be a single level Index. Multi-index Series are not supported.' - ) - intermediate = cls._match_series_to_dimension(data, validated_coords, target_dims) + raise ConversionError('MultiIndex Series are not supported. Please use a single-level index.') + intermediate = cls._match_series_by_index_alignment(data, validated_coords, target_dims) elif isinstance(data, pd.DataFrame): + # Pandas DataFrame - validate and convert if isinstance(data.index, pd.MultiIndex): - raise ConversionError( - 'DataFrame index must be a single level Index. Multi-index DataFrames are not supported.' - ) + raise ConversionError('MultiIndex DataFrames are not supported. Please use a single-level index.') if len(data.columns) == 0 or data.empty: - raise ConversionError('DataFrame must have at least one column.') + raise ConversionError('DataFrame must have at least one column and cannot be empty.') if len(data.columns) == 1: # Single-column DataFrame - treat as Series - intermediate = cls._match_series_to_dimension(data.iloc[:, 0], validated_coords, target_dims) + series_data = data.iloc[:, 0] + intermediate = cls._match_series_by_index_alignment(series_data, validated_coords, target_dims) else: # Multi-column DataFrame - treat as multi-dimensional array - intermediate = cls._match_multidim_array_to_dimensions(data.to_numpy(), validated_coords, target_dims) + intermediate = cls._match_multidim_array_by_shape_permutation( + data.to_numpy(), validated_coords, target_dims + ) elif isinstance(data, xr.DataArray): + # Existing DataArray - use as-is intermediate = data.copy() else: - raise ConversionError(f'Unsupported data type: {type(data).__name__}.') + # Unsupported data type + supported_types = [ + 'int', + 'float', + 'np.integer', + 'np.floating', + 'np.ndarray', + 'pd.Series', + 'pd.DataFrame', + 'xr.DataArray', + ] + raise ConversionError( + f'Unsupported data type: {type(data).__name__}. Supported types: {", ".join(supported_types)}' + ) - # Step 2: Broadcast to target dimensions - return cls._broadcast_to_target(intermediate, validated_coords, target_dims) + # Broadcast intermediate result to target specification + return cls._broadcast_dataarray_to_target_specification(intermediate, validated_coords, target_dims) @staticmethod - def _prepare_coordinates(coords: dict[str, pd.Index]) -> tuple[dict[str, pd.Index], tuple[str, ...]]: + def _validate_and_prepare_target_coordinates( + coords: dict[str, pd.Index], + ) -> tuple[dict[str, pd.Index], tuple[str, ...]]: """ - Validate coordinates and prepare them for DataArray creation. + Validate and prepare target coordinate specification for DataArray creation. + + Performs comprehensive validation of coordinate inputs and prepares them + for use in DataArray construction with appropriate naming and type checking. Args: - coords: Dictionary mapping dimension names to coordinate indices + coords: Raw coordinate specification {dimension_name: coordinate_index} Returns: - Tuple of (validated coordinates dict, dimensions tuple) + Tuple of (validated_coordinates_dict, dimension_names_tuple) Raises: - ConversionError: If coordinates are invalid + ConversionError: If any coordinates are invalid, improperly typed, + or have inconsistent naming """ validated_coords = {} - dims = [] + dimension_names = [] for dim_name, coord_index in coords.items(): - # Basic validation - if not isinstance(coord_index, pd.Index) or len(coord_index) == 0: - raise ConversionError(f'{dim_name} coordinates must be a non-empty pandas Index') + # Type validation + if not isinstance(coord_index, pd.Index): + raise ConversionError( + f'Coordinate for dimension "{dim_name}" must be a pandas.Index, got {type(coord_index).__name__}' + ) + + # Non-empty validation + if len(coord_index) == 0: + raise ConversionError(f'Coordinate for dimension "{dim_name}" cannot be empty') - # Ensure coordinate index has the correct name + # Ensure coordinate index has consistent naming if coord_index.name != dim_name: coord_index = coord_index.rename(dim_name) - # Special validation for time dimension + # Special validation for time dimensions (common pattern) if dim_name == 'time' and not isinstance(coord_index, pd.DatetimeIndex): - raise ConversionError('time coordinates must be a DatetimeIndex') + raise ConversionError( + f'Dimension named "time" should use DatetimeIndex for proper ' + f'time-series functionality, got {type(coord_index).__name__}' + ) validated_coords[dim_name] = coord_index - dims.append(dim_name) + dimension_names.append(dim_name) - return validated_coords, tuple(dims) + return validated_coords, tuple(dimension_names) def get_dataarray_stats(arr: xr.DataArray) -> dict: From 64612a54b8fff232be8bf436bbdc04ca419c26d5 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 25 Sep 2025 14:26:33 +0200 Subject: [PATCH 2/5] Update tests of error messages --- tests/test_dataconverter.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index aab04fd15..5451e3d8e 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -351,7 +351,7 @@ def test_2d_array_ambiguous_dimensions_error(self): } data_2d = np.random.rand(3, 3) - with pytest.raises(ConversionError, match='matches multiple dimension orders'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(data_2d, coords=coords_ambiguous) def test_multid_array_no_coords(self): @@ -376,7 +376,7 @@ def test_array_no_matching_dimensions_error(self, standard_coords): 'scenario': standard_coords['scenario'], # length 3 } - with pytest.raises(ConversionError, match='Array dimensions do not match any coordinate lengths'): + with pytest.raises(ConversionError, match='Array shape'): DataConverter.to_dataarray(data_2d, coords=coords_2d) def test_multid_array_special_values(self, standard_coords): @@ -600,7 +600,7 @@ def test_time_coord_validation(self): """Time coordinates must be DatetimeIndex.""" # Non-datetime index with name 'time' should fail wrong_time = pd.Index([1, 2, 3], name='time') - with pytest.raises(ConversionError, match='time coordinates must be a DatetimeIndex'): + with pytest.raises(ConversionError, match='DatetimeIndex'): DataConverter.to_dataarray(42, coords={'time': wrong_time}) def test_coord_naming(self, time_coords): @@ -633,7 +633,7 @@ def test_multidimensional_array_dimension_count_mismatch(self, standard_coords): """Array with wrong number of dimensions should fail with clear error.""" # 4D array with 3D coordinates data_4d = np.random.rand(5, 3, 2, 4) - with pytest.raises(ConversionError, match='matches multiple dimension orders|Array dimensions do not match'): + with pytest.raises(ConversionError, match='matches multiple dimensions|Array dimensions do not match'): DataConverter.to_dataarray(data_4d, coords=standard_coords) def test_error_message_quality(self, standard_coords): @@ -809,12 +809,12 @@ def test_ambiguous_length_handling(self): # 2D array - should fail arr_2d = np.random.rand(3, 3) - with pytest.raises(ConversionError, match='matches multiple dimension orders'): + with pytest.raises(ConversionError, match='matches multiple dimensions'): DataConverter.to_dataarray(arr_2d, coords=coords_3x3x3) # 3D array - should fail arr_3d = np.random.rand(3, 3, 3) - with pytest.raises(ConversionError, match='matches multiple dimension orders'): + with pytest.raises(ConversionError, match='matches multiple dimensions'): DataConverter.to_dataarray(arr_3d, coords=coords_3x3x3) def test_mixed_broadcasting_scenarios(self): @@ -903,7 +903,7 @@ def test_2d_array_ambiguous_dimensions_both_same(self): # 3x3 array - could be any combination of the three dimensions arr_2d = np.random.rand(3, 3) - with pytest.raises(ConversionError, match='matches multiple dimension orders'): + with pytest.raises(ConversionError, match='matches multiple dimensions'): DataConverter.to_dataarray(arr_2d, coords=coords_3x3x3) def test_2d_array_one_dimension_ambiguous(self): @@ -919,7 +919,7 @@ def test_2d_array_one_dimension_ambiguous(self): # but second dimension could be scenario or region (both length 3) arr_5x3 = np.random.rand(5, 3) - with pytest.raises(ConversionError, match='matches multiple dimension orders'): + with pytest.raises(ConversionError, match='matches multiple dimensions'): DataConverter.to_dataarray(arr_5x3, coords=coords_mixed) # 5x2 array should work - dimensions are unambiguous @@ -943,7 +943,7 @@ def test_3d_array_all_dimensions_ambiguous(self): # 2x2x2 array - could be any combination of 3 dimensions from the 4 available arr_3d = np.random.rand(2, 2, 2) - with pytest.raises(ConversionError, match='matches multiple dimension orders'): + with pytest.raises(ConversionError, match='matches multiple dimensions'): DataConverter.to_dataarray(arr_3d, coords=coords_2x2x2x2) def test_3d_array_partial_ambiguity(self): @@ -959,7 +959,7 @@ def test_3d_array_partial_ambiguity(self): # This should still fail because middle dimension (length 3) could be scenario or region arr_4x3x2 = np.random.rand(4, 3, 2) - with pytest.raises(ConversionError, match='matches multiple dimension orders'): + with pytest.raises(ConversionError, match='matches multiple dimensions'): DataConverter.to_dataarray(arr_4x3x2, coords=coords_partial) def test_pandas_series_ambiguous_dimensions(self): @@ -1000,12 +1000,12 @@ def test_edge_case_many_same_lengths(self): # 2D array arr_2d = np.random.rand(2, 2) - with pytest.raises(ConversionError, match='matches multiple dimension orders'): + with pytest.raises(ConversionError, match='matches multiple dimensions'): DataConverter.to_dataarray(arr_2d, coords=coords_many) # 3D array arr_3d = np.random.rand(2, 2, 2) - with pytest.raises(ConversionError, match='matches multiple dimension orders'): + with pytest.raises(ConversionError, match='matches multiple dimensions'): DataConverter.to_dataarray(arr_3d, coords=coords_many) def test_mixed_lengths_with_duplicates(self): @@ -1047,7 +1047,7 @@ def test_dataframe_with_ambiguous_dimensions(self): df = pd.DataFrame({'col1': [1, 2, 3], 'col2': [4, 5, 6], 'col3': [7, 8, 9]}) # 3x3 DataFrame # Should fail due to ambiguous dimensions - with pytest.raises(ConversionError, match='matches multiple dimension orders'): + with pytest.raises(ConversionError, match='matches multiple dimensions'): DataConverter.to_dataarray(df, coords=coords_ambiguous) def test_error_message_quality_for_ambiguous_dimensions(self): @@ -1077,7 +1077,7 @@ def test_error_message_quality_for_ambiguous_dimensions(self): raise AssertionError('Should have raised ConversionError') except ConversionError as e: error_msg = str(e) - assert 'matches multiple dimension orders' in error_msg + assert 'matches multiple dimensions' in error_msg assert '(3, 3)' in error_msg def test_ambiguous_with_broadcasting_target(self): @@ -1099,7 +1099,7 @@ def test_ambiguous_with_broadcasting_target(self): # 2D array with one ambiguous dimension arr_5x3 = np.random.rand(5, 3) # 5 is unique (time), 3 is ambiguous (scenario/region) - with pytest.raises(ConversionError, match='matches multiple dimension orders'): + with pytest.raises(ConversionError, match='matches multiple dimensions'): DataConverter.to_dataarray(arr_5x3, coords=coords_ambiguous_plus) def test_time_dimension_ambiguity(self): From b3db175857591c23265a241656f3cada6a4b136f Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 25 Sep 2025 14:36:29 +0200 Subject: [PATCH 3/5] Update tests of error messages --- tests/test_dataconverter.py | 54 ++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index 5451e3d8e..4f76ccf24 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -135,7 +135,7 @@ def test_1d_array_ambiguous_length(self): } arr = np.array([1, 2, 3]) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr, coords=coords_3x3) def test_1d_array_broadcast_to_many_dimensions(self, standard_coords): @@ -351,7 +351,7 @@ def test_2d_array_ambiguous_dimensions_error(self): } data_2d = np.random.rand(3, 3) - with pytest.raises(ConversionError, match='matches multiple dimension'): + with pytest.raises(ConversionError, match='matches multiple dimension combinations'): DataConverter.to_dataarray(data_2d, coords=coords_ambiguous) def test_multid_array_no_coords(self): @@ -376,7 +376,7 @@ def test_array_no_matching_dimensions_error(self, standard_coords): 'scenario': standard_coords['scenario'], # length 3 } - with pytest.raises(ConversionError, match='Array shape'): + with pytest.raises(ConversionError, match='cannot be mapped to any combination'): DataConverter.to_dataarray(data_2d, coords=coords_2d) def test_multid_array_special_values(self, standard_coords): @@ -626,14 +626,14 @@ def test_dimension_mismatch_messages(self, time_coords, scenario_coords): """Error messages should be informative.""" # Array with wrong length wrong_arr = np.array([1, 2]) # Length 2, but no dimension has length 2 - with pytest.raises(ConversionError, match='matches none of the target dimensions'): + with pytest.raises(ConversionError, match='does not match any target dimension lengths'): DataConverter.to_dataarray(wrong_arr, coords={'time': time_coords, 'scenario': scenario_coords}) def test_multidimensional_array_dimension_count_mismatch(self, standard_coords): """Array with wrong number of dimensions should fail with clear error.""" # 4D array with 3D coordinates data_4d = np.random.rand(5, 3, 2, 4) - with pytest.raises(ConversionError, match='matches multiple dimensions|Array dimensions do not match'): + with pytest.raises(ConversionError, match='cannot be mapped to any combination'): DataConverter.to_dataarray(data_4d, coords=standard_coords) def test_error_message_quality(self, standard_coords): @@ -650,8 +650,8 @@ def test_error_message_quality(self, standard_coords): raise AssertionError('Should have raised ConversionError') except ConversionError as e: error_msg = str(e) - assert 'Array shape: (7, 8)' in error_msg - assert 'Coordinate lengths:' in error_msg + assert 'Array shape (7, 8)' in error_msg + assert 'target coordinate lengths:' in error_msg class TestDataIntegrity: @@ -804,17 +804,17 @@ def test_ambiguous_length_handling(self): # 1D array - should fail arr_1d = np.array([1, 2, 3]) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_1d, coords=coords_3x3x3) # 2D array - should fail arr_2d = np.random.rand(3, 3) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_2d, coords=coords_3x3x3) # 3D array - should fail arr_3d = np.random.rand(3, 3, 3) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_3d, coords=coords_3x3x3) def test_mixed_broadcasting_scenarios(self): @@ -866,7 +866,7 @@ def test_1d_array_ambiguous_dimensions_simple(self): arr_1d = np.array([1, 2, 3]) # length 3 - matches both dimensions - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_1d, coords=coords_ambiguous) def test_1d_array_ambiguous_dimensions_complex(self): @@ -882,7 +882,7 @@ def test_1d_array_ambiguous_dimensions_complex(self): # Array matching the ambiguous length arr_1d = np.array([10, 20, 30, 40]) # length 4 - matches time, scenario, region - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_1d, coords=coords_4x4x4) # Array matching the unique length should work @@ -903,7 +903,7 @@ def test_2d_array_ambiguous_dimensions_both_same(self): # 3x3 array - could be any combination of the three dimensions arr_2d = np.random.rand(3, 3) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_2d, coords=coords_3x3x3) def test_2d_array_one_dimension_ambiguous(self): @@ -919,7 +919,7 @@ def test_2d_array_one_dimension_ambiguous(self): # but second dimension could be scenario or region (both length 3) arr_5x3 = np.random.rand(5, 3) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_5x3, coords=coords_mixed) # 5x2 array should work - dimensions are unambiguous @@ -943,7 +943,7 @@ def test_3d_array_all_dimensions_ambiguous(self): # 2x2x2 array - could be any combination of 3 dimensions from the 4 available arr_3d = np.random.rand(2, 2, 2) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_3d, coords=coords_2x2x2x2) def test_3d_array_partial_ambiguity(self): @@ -959,7 +959,7 @@ def test_3d_array_partial_ambiguity(self): # This should still fail because middle dimension (length 3) could be scenario or region arr_4x3x2 = np.random.rand(4, 3, 2) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_4x3x2, coords=coords_partial) def test_pandas_series_ambiguous_dimensions(self): @@ -973,7 +973,7 @@ def test_pandas_series_ambiguous_dimensions(self): generic_series = pd.Series([10, 20, 30], index=[0, 1, 2]) # Should fail because length matches multiple dimensions and index doesn't match any - with pytest.raises(ConversionError, match='index does not match any target dimension'): + with pytest.raises(ConversionError, match='Series index does not match any target dimension coordinates'): DataConverter.to_dataarray(generic_series, coords=coords_ambiguous) # Series with index that matches one of the ambiguous coordinates should work @@ -995,17 +995,17 @@ def test_edge_case_many_same_lengths(self): # 1D array arr_1d = np.array([1, 2]) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_1d, coords=coords_many) # 2D array arr_2d = np.random.rand(2, 2) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_2d, coords=coords_many) # 3D array arr_3d = np.random.rand(2, 2, 2) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_3d, coords=coords_many) def test_mixed_lengths_with_duplicates(self): @@ -1033,7 +1033,7 @@ def test_mixed_lengths_with_duplicates(self): # Arrays with ambiguous length should fail arr_3 = np.array([1, 2, 3]) # matches both scenario and region - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_3, coords=coords_mixed) def test_dataframe_with_ambiguous_dimensions(self): @@ -1047,7 +1047,7 @@ def test_dataframe_with_ambiguous_dimensions(self): df = pd.DataFrame({'col1': [1, 2, 3], 'col2': [4, 5, 6], 'col3': [7, 8, 9]}) # 3x3 DataFrame # Should fail due to ambiguous dimensions - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(df, coords=coords_ambiguous) def test_error_message_quality_for_ambiguous_dimensions(self): @@ -1065,7 +1065,7 @@ def test_error_message_quality_for_ambiguous_dimensions(self): raise AssertionError('Should have raised ConversionError') except ConversionError as e: error_msg = str(e) - assert 'matches multiple dimensions' in error_msg + assert 'matches multiple dimension' in error_msg assert 'scenario' in error_msg assert 'region' in error_msg assert 'technology' in error_msg @@ -1077,7 +1077,7 @@ def test_error_message_quality_for_ambiguous_dimensions(self): raise AssertionError('Should have raised ConversionError') except ConversionError as e: error_msg = str(e) - assert 'matches multiple dimensions' in error_msg + assert 'matches multiple dimension combinations' in error_msg assert '(3, 3)' in error_msg def test_ambiguous_with_broadcasting_target(self): @@ -1093,13 +1093,13 @@ def test_ambiguous_with_broadcasting_target(self): arr_3 = np.array([10, 20, 30]) # length 3, matches scenario and region # Should fail even though it would broadcast to other dimensions - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_3, coords=coords_ambiguous_plus) # 2D array with one ambiguous dimension arr_5x3 = np.random.rand(5, 3) # 5 is unique (time), 3 is ambiguous (scenario/region) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(arr_5x3, coords=coords_ambiguous_plus) def test_time_dimension_ambiguity(self): @@ -1119,7 +1119,7 @@ def test_time_dimension_ambiguity(self): # But generic array with length 3 should still fail generic_array = np.array([100, 200, 300]) - with pytest.raises(ConversionError, match='matches multiple dimensions'): + with pytest.raises(ConversionError, match='matches multiple dimension'): DataConverter.to_dataarray(generic_array, coords=coords_time_ambiguous) From dd720b8a1b0328e0aa03cdc99eb4cd84453e89ef Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 25 Sep 2025 14:51:49 +0200 Subject: [PATCH 4/5] Update Dataconverter to allow bool values --- flixopt/core.py | 17 +++- tests/test_dataconverter.py | 156 ++++++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+), 3 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 1bd6fe52c..3376b2c5f 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -405,7 +405,16 @@ def _broadcast_dataarray_to_target_specification( @classmethod def to_dataarray( cls, - data: int | float | np.integer | np.floating | np.ndarray | pd.Series | pd.DataFrame | xr.DataArray, + data: int + | float + | bool + | np.integer + | np.floating + | np.bool_ + | np.ndarray + | pd.Series + | pd.DataFrame + | xr.DataArray, coords: dict[str, pd.Index] | None = None, ) -> xr.DataArray: """ @@ -417,7 +426,7 @@ def to_dataarray( Args: data: Input data to convert. Supported types: - - Scalars: int, float, np.integer, np.floating + - Scalars: int, float, bool, np.integer, np.floating, np.bool_ - Arrays: np.ndarray (1D and multi-dimensional) - Pandas: pd.Series, pd.DataFrame - xarray: xr.DataArray @@ -455,7 +464,7 @@ def to_dataarray( validated_coords, target_dims = cls._validate_and_prepare_target_coordinates(coords) # Convert input data to intermediate DataArray based on type - if isinstance(data, (int, float, np.integer, np.floating)): + if isinstance(data, (int, float, bool, np.integer, np.floating, np.bool_)): # Scalar values - create scalar DataArray intermediate = xr.DataArray(data.item() if hasattr(data, 'item') else data) @@ -503,8 +512,10 @@ def to_dataarray( supported_types = [ 'int', 'float', + 'bool', 'np.integer', 'np.floating', + 'np.bool_', 'np.ndarray', 'pd.Series', 'pd.DataFrame', diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index 4f76ccf24..8ea8250aa 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -516,6 +516,33 @@ def test_timeseries_data_broadcast(self, time_coords, scenario_coords): assert np.array_equal(result.sel(scenario=scenario).values, [10, 20, 30, 40, 50]) +class TestAsDataArrayAlias: + """Test that as_dataarray works as an alias for to_dataarray.""" + + def test_as_dataarray_is_alias(self, time_coords, scenario_coords): + """as_dataarray should work identically to to_dataarray.""" + # Test with scalar + result_to = DataConverter.to_dataarray(42, coords={'time': time_coords}) + result_as = DataConverter.as_dataarray(42, coords={'time': time_coords}) + assert np.array_equal(result_to.values, result_as.values) + assert result_to.dims == result_as.dims + assert result_to.shape == result_as.shape + + # Test with array + arr = np.array([10, 20, 30, 40, 50]) + result_to_arr = DataConverter.to_dataarray(arr, coords={'time': time_coords}) + result_as_arr = DataConverter.as_dataarray(arr, coords={'time': time_coords}) + assert np.array_equal(result_to_arr.values, result_as_arr.values) + assert result_to_arr.dims == result_as_arr.dims + + # Test with Series + series = pd.Series([100, 200, 300, 400, 500], index=time_coords) + result_to_series = DataConverter.to_dataarray(series, coords={'time': time_coords, 'scenario': scenario_coords}) + result_as_series = DataConverter.as_dataarray(series, coords={'time': time_coords, 'scenario': scenario_coords}) + assert np.array_equal(result_to_series.values, result_as_series.values) + assert result_to_series.dims == result_as_series.dims + + class TestCustomDimensions: """Test with custom dimension names beyond time/scenario.""" @@ -704,6 +731,135 @@ def test_multid_array_copy_independence(self, standard_coords): assert original_data[0, 0] != 999 +class TestBooleanValues: + """Test handling of boolean values and arrays.""" + + def test_scalar_boolean_to_dataarray(self, time_coords): + """Scalar boolean values should work with to_dataarray.""" + result_true = DataConverter.to_dataarray(True, coords={'time': time_coords}) + assert result_true.shape == (5,) + assert result_true.dtype == bool + assert np.all(result_true.values) + + result_false = DataConverter.to_dataarray(False, coords={'time': time_coords}) + assert result_false.shape == (5,) + assert result_false.dtype == bool + assert not np.any(result_false.values) + + def test_scalar_boolean_as_dataarray(self, time_coords): + """Scalar boolean values should work with as_dataarray.""" + result_true = DataConverter.as_dataarray(True, coords={'time': time_coords}) + assert result_true.shape == (5,) + assert result_true.dtype == bool + assert np.all(result_true.values) + + result_false = DataConverter.as_dataarray(False, coords={'time': time_coords}) + assert result_false.shape == (5,) + assert result_false.dtype == bool + assert not np.any(result_false.values) + + def test_numpy_boolean_scalar(self, time_coords): + """Numpy boolean scalars should work.""" + result_np_true = DataConverter.to_dataarray(np.bool_(True), coords={'time': time_coords}) + assert result_np_true.shape == (5,) + assert result_np_true.dtype == bool + assert np.all(result_np_true.values) + + result_np_false = DataConverter.as_dataarray(np.bool_(False), coords={'time': time_coords}) + assert result_np_false.shape == (5,) + assert result_np_false.dtype == bool + assert not np.any(result_np_false.values) + + def test_boolean_array_to_dataarray(self, time_coords): + """Boolean arrays should work with to_dataarray.""" + bool_arr = np.array([True, False, True, False, True]) + result = DataConverter.to_dataarray(bool_arr, coords={'time': time_coords}) + assert result.shape == (5,) + assert result.dims == ('time',) + assert result.dtype == bool + assert np.array_equal(result.values, bool_arr) + + def test_boolean_array_as_dataarray(self, time_coords): + """Boolean arrays should work with as_dataarray.""" + bool_arr = np.array([True, False, True, False, True]) + result = DataConverter.as_dataarray(bool_arr, coords={'time': time_coords}) + assert result.shape == (5,) + assert result.dims == ('time',) + assert result.dtype == bool + assert np.array_equal(result.values, bool_arr) + + def test_boolean_no_coords(self): + """Boolean scalar without coordinates should create 0D DataArray.""" + result = DataConverter.to_dataarray(True) + assert result.shape == () + assert result.dims == () + assert result.item() + + result_as = DataConverter.as_dataarray(False) + assert result_as.shape == () + assert result_as.dims == () + assert not result_as.item() + + def test_boolean_multidimensional_broadcast(self, standard_coords): + """Boolean values should broadcast to multiple dimensions.""" + result = DataConverter.to_dataarray(True, coords=standard_coords) + assert result.shape == (5, 3, 2) + assert result.dims == ('time', 'scenario', 'region') + assert result.dtype == bool + assert np.all(result.values) + + result_as = DataConverter.as_dataarray(False, coords=standard_coords) + assert result_as.shape == (5, 3, 2) + assert result_as.dims == ('time', 'scenario', 'region') + assert result_as.dtype == bool + assert not np.any(result_as.values) + + def test_boolean_series(self, time_coords): + """Boolean Series should work.""" + bool_series = pd.Series([True, False, True, False, True], index=time_coords) + result = DataConverter.to_dataarray(bool_series, coords={'time': time_coords}) + assert result.shape == (5,) + assert result.dtype == bool + assert np.array_equal(result.values, bool_series.values) + + result_as = DataConverter.as_dataarray(bool_series, coords={'time': time_coords}) + assert result_as.shape == (5,) + assert result_as.dtype == bool + assert np.array_equal(result_as.values, bool_series.values) + + def test_boolean_dataframe(self, time_coords): + """Boolean DataFrame should work.""" + bool_df = pd.DataFrame({'values': [True, False, True, False, True]}, index=time_coords) + result = DataConverter.to_dataarray(bool_df, coords={'time': time_coords}) + assert result.shape == (5,) + assert result.dtype == bool + assert np.array_equal(result.values, bool_df['values'].values) + + result_as = DataConverter.as_dataarray(bool_df, coords={'time': time_coords}) + assert result_as.shape == (5,) + assert result_as.dtype == bool + assert np.array_equal(result_as.values, bool_df['values'].values) + + def test_multidimensional_boolean_array(self, standard_coords): + """Multi-dimensional boolean arrays should work.""" + bool_data = np.array( + [[True, False, True], [False, True, False], [True, True, False], [False, False, True], [True, False, True]] + ) + result = DataConverter.to_dataarray( + bool_data, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} + ) + assert result.shape == (5, 3) + assert result.dtype == bool + assert np.array_equal(result.values, bool_data) + + result_as = DataConverter.as_dataarray( + bool_data, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} + ) + assert result_as.shape == (5, 3) + assert result_as.dtype == bool + assert np.array_equal(result_as.values, bool_data) + + class TestSpecialValues: """Test handling of special numeric values.""" From 5e51d7b3bdd69bd3c7db721372593436772a05aa Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Thu, 25 Sep 2025 15:19:43 +0200 Subject: [PATCH 5/5] fix tests --- tests/test_dataconverter.py | 45 ++++++++++--------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index 8ea8250aa..798324436 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -517,13 +517,13 @@ def test_timeseries_data_broadcast(self, time_coords, scenario_coords): class TestAsDataArrayAlias: - """Test that as_dataarray works as an alias for to_dataarray.""" + """Test that to_dataarray works as an alias for to_dataarray.""" - def test_as_dataarray_is_alias(self, time_coords, scenario_coords): - """as_dataarray should work identically to to_dataarray.""" + def test_to_dataarray_is_alias(self, time_coords, scenario_coords): + """to_dataarray should work identically to to_dataarray.""" # Test with scalar result_to = DataConverter.to_dataarray(42, coords={'time': time_coords}) - result_as = DataConverter.as_dataarray(42, coords={'time': time_coords}) + result_as = DataConverter.to_dataarray(42, coords={'time': time_coords}) assert np.array_equal(result_to.values, result_as.values) assert result_to.dims == result_as.dims assert result_to.shape == result_as.shape @@ -531,14 +531,14 @@ def test_as_dataarray_is_alias(self, time_coords, scenario_coords): # Test with array arr = np.array([10, 20, 30, 40, 50]) result_to_arr = DataConverter.to_dataarray(arr, coords={'time': time_coords}) - result_as_arr = DataConverter.as_dataarray(arr, coords={'time': time_coords}) + result_as_arr = DataConverter.to_dataarray(arr, coords={'time': time_coords}) assert np.array_equal(result_to_arr.values, result_as_arr.values) assert result_to_arr.dims == result_as_arr.dims # Test with Series series = pd.Series([100, 200, 300, 400, 500], index=time_coords) result_to_series = DataConverter.to_dataarray(series, coords={'time': time_coords, 'scenario': scenario_coords}) - result_as_series = DataConverter.as_dataarray(series, coords={'time': time_coords, 'scenario': scenario_coords}) + result_as_series = DataConverter.to_dataarray(series, coords={'time': time_coords, 'scenario': scenario_coords}) assert np.array_equal(result_to_series.values, result_as_series.values) assert result_to_series.dims == result_as_series.dims @@ -746,18 +746,6 @@ def test_scalar_boolean_to_dataarray(self, time_coords): assert result_false.dtype == bool assert not np.any(result_false.values) - def test_scalar_boolean_as_dataarray(self, time_coords): - """Scalar boolean values should work with as_dataarray.""" - result_true = DataConverter.as_dataarray(True, coords={'time': time_coords}) - assert result_true.shape == (5,) - assert result_true.dtype == bool - assert np.all(result_true.values) - - result_false = DataConverter.as_dataarray(False, coords={'time': time_coords}) - assert result_false.shape == (5,) - assert result_false.dtype == bool - assert not np.any(result_false.values) - def test_numpy_boolean_scalar(self, time_coords): """Numpy boolean scalars should work.""" result_np_true = DataConverter.to_dataarray(np.bool_(True), coords={'time': time_coords}) @@ -765,7 +753,7 @@ def test_numpy_boolean_scalar(self, time_coords): assert result_np_true.dtype == bool assert np.all(result_np_true.values) - result_np_false = DataConverter.as_dataarray(np.bool_(False), coords={'time': time_coords}) + result_np_false = DataConverter.to_dataarray(np.bool_(False), coords={'time': time_coords}) assert result_np_false.shape == (5,) assert result_np_false.dtype == bool assert not np.any(result_np_false.values) @@ -779,15 +767,6 @@ def test_boolean_array_to_dataarray(self, time_coords): assert result.dtype == bool assert np.array_equal(result.values, bool_arr) - def test_boolean_array_as_dataarray(self, time_coords): - """Boolean arrays should work with as_dataarray.""" - bool_arr = np.array([True, False, True, False, True]) - result = DataConverter.as_dataarray(bool_arr, coords={'time': time_coords}) - assert result.shape == (5,) - assert result.dims == ('time',) - assert result.dtype == bool - assert np.array_equal(result.values, bool_arr) - def test_boolean_no_coords(self): """Boolean scalar without coordinates should create 0D DataArray.""" result = DataConverter.to_dataarray(True) @@ -795,7 +774,7 @@ def test_boolean_no_coords(self): assert result.dims == () assert result.item() - result_as = DataConverter.as_dataarray(False) + result_as = DataConverter.to_dataarray(False) assert result_as.shape == () assert result_as.dims == () assert not result_as.item() @@ -808,7 +787,7 @@ def test_boolean_multidimensional_broadcast(self, standard_coords): assert result.dtype == bool assert np.all(result.values) - result_as = DataConverter.as_dataarray(False, coords=standard_coords) + result_as = DataConverter.to_dataarray(False, coords=standard_coords) assert result_as.shape == (5, 3, 2) assert result_as.dims == ('time', 'scenario', 'region') assert result_as.dtype == bool @@ -822,7 +801,7 @@ def test_boolean_series(self, time_coords): assert result.dtype == bool assert np.array_equal(result.values, bool_series.values) - result_as = DataConverter.as_dataarray(bool_series, coords={'time': time_coords}) + result_as = DataConverter.to_dataarray(bool_series, coords={'time': time_coords}) assert result_as.shape == (5,) assert result_as.dtype == bool assert np.array_equal(result_as.values, bool_series.values) @@ -835,7 +814,7 @@ def test_boolean_dataframe(self, time_coords): assert result.dtype == bool assert np.array_equal(result.values, bool_df['values'].values) - result_as = DataConverter.as_dataarray(bool_df, coords={'time': time_coords}) + result_as = DataConverter.to_dataarray(bool_df, coords={'time': time_coords}) assert result_as.shape == (5,) assert result_as.dtype == bool assert np.array_equal(result_as.values, bool_df['values'].values) @@ -852,7 +831,7 @@ def test_multidimensional_boolean_array(self, standard_coords): assert result.dtype == bool assert np.array_equal(result.values, bool_data) - result_as = DataConverter.as_dataarray( + result_as = DataConverter.to_dataarray( bool_data, coords={'time': standard_coords['time'], 'scenario': standard_coords['scenario']} ) assert result_as.shape == (5, 3)