From 803d6809253a467184fd16b6c04bec4bc28ee21f Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 12:28:22 +0200 Subject: [PATCH 01/55] Update core.py to work with another dimension --- flixopt/core.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 379828554..dac5a6a18 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -46,16 +46,27 @@ class DataConverter: """ @staticmethod - def as_dataarray(data: NumericData, timesteps: pd.DatetimeIndex) -> xr.DataArray: + def as_dataarray(data: NumericData, timesteps: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None) -> xr.DataArray: """Convert data to xarray.DataArray with specified timesteps index.""" if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0: raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}') if not timesteps.name == 'time': raise ConversionError(f'DatetimeIndex is not named correctly. Must be named "time", got {timesteps.name=}') - coords = [timesteps] - dims = ['time'] - expected_shape = (len(timesteps),) + if scenarios is not None: + if not isinstance(scenarios, pd.Index) or len(scenarios) == 0: + raise ValueError(f'Scenarios must be a non-empty Index, got {type(scenarios).__name__}') + if not scenarios.name == 'scenario': + raise ConversionError(f'Scenarios Index is not named correctly. Must be named "scenario", got {scenarios.name=}') + + if scenarios is not None: + coords = [scenarios, timesteps] + dims = ['scenario', 'time'] + expected_shape = (len(scenarios), len(timesteps)) + else: + coords = [timesteps] + dims = ['time'] + expected_shape = (len(timesteps),) try: if isinstance(data, (int, float, np.integer, np.floating)): @@ -155,6 +166,7 @@ def from_datasource( data: NumericData, name: str, timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index] = None, aggregation_weight: Optional[float] = None, aggregation_group: Optional[str] = None, needs_extra_timestep: bool = False, @@ -166,6 +178,7 @@ def from_datasource( data: The time series data name: The name of the TimeSeries timesteps: The timesteps of the TimeSeries + scenarios: The scenarios of the TimeSeries aggregation_weight: The weight in aggregation calculations aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing needs_extra_timestep: Whether this series requires an extra timestep @@ -174,7 +187,7 @@ def from_datasource( A new TimeSeries instance """ return cls( - DataConverter.as_dataarray(data, timesteps), + DataConverter.as_dataarray(data, timesteps, scenarios), name, aggregation_weight, aggregation_group, @@ -478,12 +491,14 @@ class TimeSeriesCollection: def __init__( self, timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index] = None, hours_of_last_timestep: Optional[float] = None, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None, ): """ Args: timesteps: The timesteps of the Collection. + scenarios: The scenarios of the Collection. hours_of_last_timestep: The duration of the last time step. Uses the last time interval if not specified hours_of_previous_timesteps: The duration of previous timesteps. If None, the first time increment of time_series is used. @@ -506,6 +521,10 @@ def __init__( self._active_timesteps_extra = None self._active_hours_per_timestep = None + # Scenarios + self.all_scenarios = scenarios + self._active_scenarios = None + # Dictionary of time series by name self.time_series_data: Dict[str, TimeSeries] = {} From a83f2b1cecf9960ee29f17e712bbd4d56fbbbc5c Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 12:33:26 +0200 Subject: [PATCH 02/55] Add scenarios to TimeSeries --- flixopt/core.py | 56 +++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index dac5a6a18..690d4fd88 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -247,12 +247,14 @@ def __init__( needs_extra_timestep: Whether this series requires an extra timestep Raises: - ValueError: If data doesn't have a 'time' index or has more than 1 dimension + ValueError: If data doesn't have a 'time' index or has unsupported dimensions """ if 'time' not in data.indexes: raise ValueError(f'DataArray must have a "time" index. Got {data.indexes}') - if data.ndim > 1: - raise ValueError(f'Number of dimensions of DataArray must be 1. Got {data.ndim}') + + allowed_dims = {'time', 'scenario'} + if not set(data.dims).issubset(allowed_dims): + raise ValueError(f'DataArray dimensions must be subset of {allowed_dims}. Got {data.dims}') self.name = name self.aggregation_weight = aggregation_weight @@ -263,14 +265,21 @@ def __init__( self._stored_data = data.copy(deep=True) self._backup = self._stored_data.copy(deep=True) self._active_timesteps = self._stored_data.indexes['time'] + + # Handle scenarios if present + self._has_scenarios = 'scenario' in data.dims + self._active_scenarios = self._stored_data.indexes.get('scenario', None) + self._active_data = None self._update_active_data() def reset(self): """ - Reset active timesteps to the full set of stored timesteps. + Reset active timesteps and scenarios to the full set of stored data. """ self.active_timesteps = None + if self._has_scenarios: + self.active_scenarios = None def restore_data(self): """ @@ -320,9 +329,12 @@ def stats(self) -> str: def _update_active_data(self): """ - Update the active data based on active_timesteps. + Update the active data based on active_timesteps and active_scenarios. """ - self._active_data = self._stored_data.sel(time=self.active_timesteps) + if self._has_scenarios and self._active_scenarios is not None: + self._active_data = self._stored_data.sel(time=self.active_timesteps, scenario=self._active_scenarios) + else: + self._active_data = self._stored_data.sel(time=self.active_timesteps) @property def all_equal(self) -> bool: @@ -354,6 +366,38 @@ def active_timesteps(self, timesteps: Optional[pd.DatetimeIndex]): self._update_active_data() + @property + def active_scenarios(self) -> Optional[pd.Index]: + """Get the current active scenarios.""" + return self._active_scenarios + + @active_scenarios.setter + def active_scenarios(self, scenarios: Optional[pd.Index]): + """ + Set active_scenarios and refresh active_data. + + Args: + scenarios: New scenarios to activate, or None to use all stored scenarios + + Raises: + TypeError: If scenarios is not a pandas Index or None + ValueError: If scenarios is not a subset of stored scenarios + """ + if not self._has_scenarios: + logger.warning('This TimeSeries does not have scenarios dimension. Ignoring scenarios setting.') + return + + if scenarios is None: + self._active_scenarios = self.stored_data.indexes.get('scenario', None) + elif isinstance(scenarios, pd.Index): + if not scenarios.isin(self.stored_data.indexes['scenario']).all(): + raise ValueError('active_scenarios must be a subset of the stored scenarios') + self._active_scenarios = scenarios + else: + raise TypeError('active_scenarios must be a pandas Index or None') + + self._update_active_data() + @property def active_data(self) -> xr.DataArray: """Get a view of stored_data based on active_timesteps.""" From 8b064544e17e3cf3e48fb7ff9d06478d4b434749 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 12:40:57 +0200 Subject: [PATCH 03/55] Update TimeSeriesCollection --- flixopt/core.py | 74 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 54 insertions(+), 20 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 690d4fd88..24b124a40 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -594,11 +594,9 @@ def create_time_series( data: The data to create the TimeSeries from. name: The name of the TimeSeries. needs_extra_timestep: Whether to create an additional timestep at the end of the timesteps. - The data to create the TimeSeries from. Returns: The created TimeSeries. - """ # Check for duplicate name if name in self.time_series_data: @@ -613,6 +611,7 @@ def create_time_series( name=name, data=data.data, timesteps=timesteps_to_use, + scenarios=self.scenarios, aggregation_weight=data.agg_weight, aggregation_group=data.agg_group, needs_extra_timestep=needs_extra_timestep, @@ -621,7 +620,11 @@ def create_time_series( data.label = name else: time_series = TimeSeries.from_datasource( - name=name, data=data, timesteps=timesteps_to_use, needs_extra_timestep=needs_extra_timestep + name=name, + data=data, + timesteps=timesteps_to_use, + scenarios=self.scenarios, + needs_extra_timestep=needs_extra_timestep, ) # Add to the collection @@ -639,36 +642,54 @@ def calculate_aggregation_weights(self) -> Dict[str, float]: return self.weights - def activate_timesteps(self, active_timesteps: Optional[pd.DatetimeIndex] = None): + def activate_timesteps( # TODO: rename + self, active_timesteps: Optional[pd.DatetimeIndex] = None, active_scenarios: Optional[pd.Index] = None + ): """ - Update active timesteps for the collection and all time series. - If no arguments are provided, the active timesteps are reset. + Update active timesteps and scenarios for the collection and all time series. + If no arguments are provided, the active states are reset. Args: active_timesteps: The active timesteps of the model. - If None, the all timesteps of the TimeSeriesCollection are taken. + If None, all timesteps of the TimeSeriesCollection are taken. + active_scenarios: The active scenarios of the model. + If None, all scenarios of the TimeSeriesCollection are taken. """ - if active_timesteps is None: + if active_timesteps is None and active_scenarios is None: return self.reset() - if not np.all(np.isin(active_timesteps, self.all_timesteps)): - raise ValueError('active_timesteps must be a subset of the timesteps of the TimeSeriesCollection') + # Handle timesteps + if active_timesteps is not None: + if not np.all(np.isin(active_timesteps, self.all_timesteps)): + raise ValueError('active_timesteps must be a subset of the timesteps of the TimeSeriesCollection') + + # Calculate derived timesteps + self._active_timesteps = active_timesteps + first_ts_index = np.where(self.all_timesteps == active_timesteps[0])[0][0] + last_ts_idx = np.where(self.all_timesteps == active_timesteps[-1])[0][0] + self._active_timesteps_extra = self.all_timesteps_extra[first_ts_index : last_ts_idx + 2] + self._active_hours_per_timestep = self.all_hours_per_timestep.isel( + time=slice(first_ts_index, last_ts_idx + 1) + ) - # Calculate derived timesteps - self._active_timesteps = active_timesteps - first_ts_index = np.where(self.all_timesteps == active_timesteps[0])[0][0] - last_ts_idx = np.where(self.all_timesteps == active_timesteps[-1])[0][0] - self._active_timesteps_extra = self.all_timesteps_extra[first_ts_index : last_ts_idx + 2] - self._active_hours_per_timestep = self.all_hours_per_timestep.isel(time=slice(first_ts_index, last_ts_idx + 1)) + # Handle scenarios + if active_scenarios is not None: + if self.all_scenarios is None: + logger.warning('This TimeSeriesCollection does not have scenarios. Ignoring scenarios setting.') + else: + if not np.all(np.isin(active_scenarios, self.all_scenarios)): + raise ValueError('active_scenarios must be a subset of the scenarios of the TimeSeriesCollection') + self._active_scenarios = active_scenarios # Update all time series - self._update_time_series_timesteps() + self._update_time_series_active_states() def reset(self): - """Reset active timesteps to defaults for all time series.""" + """Reset active timesteps and scenarios to defaults for all time series.""" self._active_timesteps = None self._active_timesteps_extra = None self._active_hours_per_timestep = None + self._active_scenarios = None for time_series in self.time_series_data.values(): time_series.reset() @@ -782,6 +803,10 @@ def to_dataset(self, include_constants: bool = True) -> xr.Dataset: # Ensure the correct time coordinates ds = ds.reindex(time=self.timesteps_extra) + # Add scenarios dimension if present + if self.scenarios is not None: + ds = ds.reindex(scenario=self.scenarios) + ds.attrs.update( { 'timesteps_extra': f'{self.timesteps_extra[0]} ... {self.timesteps_extra[-1]} | len={len(self.timesteps_extra)}', @@ -791,13 +816,17 @@ def to_dataset(self, include_constants: bool = True) -> xr.Dataset: return ds - def _update_time_series_timesteps(self): - """Update active timesteps for all time series.""" + def _update_time_series_active_states(self): + """Update active timesteps and scenarios for all time series.""" for ts in self.time_series_data.values(): + # Set timesteps if ts.needs_extra_timestep: ts.active_timesteps = self.timesteps_extra else: ts.active_timesteps = self.timesteps + # Set scenarios + if self.scenarios is not None: + ts.active_scenarios = self.scenarios @staticmethod def _validate_timesteps(timesteps: pd.DatetimeIndex): @@ -941,6 +970,11 @@ def hours_of_last_timestep(self) -> float: """Get the duration of the last timestep.""" return float(self.hours_per_timestep[-1].item()) + @property + def scenarios(self) -> Optional[pd.Index]: + """Get the active scenarios.""" + return self.all_scenarios if self._active_scenarios is None else self._active_scenarios + def __repr__(self): return f'TimeSeriesCollection:\n{self.to_dataset()}' From 74c18c2816fd5315e20d9ebbc80ded56b0d7bbc6 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 12:42:53 +0200 Subject: [PATCH 04/55] Update get_numeric_stats() to return values per scenario --- flixopt/core.py | 38 ++++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 24b124a40..bce4bf66f 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -998,14 +998,48 @@ def __str__(self): ) -def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10) -> str: - """Calculates the mean, median, min, max, and standard deviation of a numeric DataArray.""" +def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10, by_scenario: bool = False) -> str: + """ + Calculates the mean, median, min, max, and standard deviation of a numeric DataArray. + + Args: + data: The DataArray to analyze + decimals: Number of decimal places to show + padd: Padding for alignment + by_scenario: Whether to break down stats by scenario + + Returns: + String representation of data statistics + """ format_spec = f'>{padd}.{decimals}f' if padd else f'.{decimals}f' + + # If by_scenario is True and there's a scenario dimension with multiple values + if by_scenario and 'scenario' in data.dims and data.sizes['scenario'] > 1: + results = [] + for scenario in data.coords['scenario'].values: + scenario_data = data.sel(scenario=scenario) + if np.unique(scenario_data).size == 1: + results.append(f' {scenario}: {scenario_data.item():{format_spec}} (constant)') + else: + mean = scenario_data.mean().item() + median = scenario_data.median().item() + min_val = scenario_data.min().item() + max_val = scenario_data.max().item() + std = scenario_data.std().item() + results.append( + f' {scenario}: {mean:{format_spec}} (mean), {median:{format_spec}} (median), ' + f'{min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)' + ) + return '\n'.join(['By scenario:'] + results) + + # Standard logic for non-scenario data or aggregated stats if np.unique(data).size == 1: return f'{data.max().item():{format_spec}} (constant)' + mean = data.mean().item() median = data.median().item() min_val = data.min().item() max_val = data.max().item() std = data.std().item() + return f'{mean:{format_spec}} (mean), {median:{format_spec}} (median), {min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)' From 3927e29efc77ebd274625e6a2222a6165e3ecd3c Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 12:48:17 +0200 Subject: [PATCH 05/55] Update repr and str --- flixopt/core.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index bce4bf66f..df2ba4547 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -511,6 +511,13 @@ def __repr__(self): 'shape': self.active_data.shape, 'time_range': f'{self.active_timesteps[0]} to {self.active_timesteps[-1]}', } + + # Add scenario information if present + if self._has_scenarios: + attrs['scenarios'] = f'{len(self.active_scenarios)} scenarios' + else: + attrs['scenarios'] = 'No scenarios' + attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items()) return f'TimeSeries({attr_str})' @@ -979,12 +986,13 @@ def __repr__(self): return f'TimeSeriesCollection:\n{self.to_dataset()}' def __str__(self): - longest_name = max([time_series.name for time_series in self.time_series_data], key=len) + """Get a human-readable string representation.""" + longest_name = max([len(time_series.name) for time_series in self.time_series_data.values()]) stats_summary = '\n'.join( [ - f' - {time_series.name:<{len(longest_name)}}: {get_numeric_stats(time_series.active_data)}' - for time_series in self.time_series_data + f' - {time_series.name:<{longest_name}}: {get_numeric_stats(time_series.active_data)}' + for time_series in self.time_series_data.values() ] ) @@ -992,6 +1000,7 @@ def __str__(self): f'TimeSeriesCollection with {len(self.time_series_data)} series\n' f' Time Range: {self.timesteps[0]} → {self.timesteps[-1]}\n' f' No. of timesteps: {len(self.timesteps)} + 1 extra\n' + f' No. of scenarios: {len(self.scenarios) if self.scenarios is not None else "No Scenarios"}\n' f' Hours per timestep: {get_numeric_stats(self.hours_per_timestep)}\n' f' Time Series Data:\n' f'{stats_summary}' From 132c1199995e116cfa2907095b01cd0d7fe70a6f Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 12:50:47 +0200 Subject: [PATCH 06/55] Improve stats --- flixopt/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flixopt/core.py b/flixopt/core.py index df2ba4547..9403a907e 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -325,7 +325,7 @@ def stats(self) -> str: Returns: String representation of data statistics """ - return get_numeric_stats(self.active_data, padd=0) + return get_numeric_stats(self.active_data, padd=0, by_scenario=(self._has_scenarios and len(self.active_scenarios) > 1)) def _update_active_data(self): """ From 89eaa4e81ab895baabdf472ad72bec999dc9f75a Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 12:52:50 +0200 Subject: [PATCH 07/55] Add utility methods to analyze data --- flixopt/core.py | 90 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) diff --git a/flixopt/core.py b/flixopt/core.py index 9403a907e..acf528192 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -823,6 +823,96 @@ def to_dataset(self, include_constants: bool = True) -> xr.Dataset: return ds + def get_scenario_data(self, scenario_name): + """ + Extract data for a specific scenario as a DataFrame. + + Args: + scenario_name: Name of the scenario to extract + + Returns: + DataFrame containing all time series data for the specified scenario + + Raises: + ValueError: If scenario_name doesn't exist or collection doesn't have scenarios + """ + if self.scenarios is None: + raise ValueError("This TimeSeriesCollection doesn't have scenarios") + + if scenario_name not in self.scenarios: + raise ValueError(f"Scenario '{scenario_name}' not found in collection") + + # Create a DataFrame with data from all time series for this scenario + data_dict = {} + for name, ts in self.time_series_data.items(): + if hasattr(ts, '_has_scenarios') and ts._has_scenarios: + data_dict[name] = ts.select_scenario(scenario_name).values + else: + # For time series without scenarios, use the same data for all scenarios + data_dict[name] = ts.active_data.values + + # Create DataFrame with the right index + df = pd.DataFrame(data_dict, index=self.timesteps) + return df + + def compare_scenarios(self, scenario1, scenario2, time_series_names=None): + """ + Compare data between two scenarios and return the differences. + + Args: + scenario1: First scenario to compare + scenario2: Second scenario to compare + time_series_names: Optional list of time series names to include (default: all) + + Returns: + DataFrame with differences between scenarios + """ + if self.scenarios is None: + raise ValueError("This TimeSeriesCollection doesn't have scenarios") + + if scenario1 not in self.scenarios or scenario2 not in self.scenarios: + raise ValueError(f'Scenarios must exist in collection') + + # Get DataFrames for each scenario + df1 = self.get_scenario_data(scenario1) + df2 = self.get_scenario_data(scenario2) + + # Filter to specified time series if provided + if time_series_names is not None: + df1 = df1[time_series_names] + df2 = df2[time_series_names] + + # Calculate differences + diff_df = df1 - df2 + diff_df.name = f'Difference ({scenario1} - {scenario2})' + + return diff_df + + def scenario_summary(self): + """ + Generate a summary of all scenarios in the collection. + + Returns: + DataFrame with statistics for each time series by scenario + """ + if self.scenarios is None or len(self.scenarios) <= 1: + raise ValueError("This TimeSeriesCollection doesn't have multiple scenarios") + + # Create multi-level columns for the summary + index = pd.MultiIndex.from_product([self.time_series_data.keys(), ['mean', 'min', 'max', 'std']]) + summary = pd.DataFrame(index=self.scenarios, columns=index) + + # Calculate statistics for each time series in each scenario + for scenario in self.scenarios: + df = self.get_scenario_data(scenario) + + for ts_name in self.time_series_data.keys(): + if ts_name in df.columns: + summary.loc[scenario, (ts_name, 'mean')] = df[ts_name].mean() + summary.loc[scenario, (ts_name, 'min')] = df[ts_name].min() + summary.loc[scenario, (ts_name, 'max')] = df[ts_name].max() + summary.loc[scenario, (ts_name, 'std')] = df[ts_name].std() + def _update_time_series_active_states(self): """Update active timesteps and scenarios for all time series.""" for ts in self.time_series_data.values(): From b2aba8f79c8174e712ea00b36f47cc0a28951587 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 12:55:46 +0200 Subject: [PATCH 08/55] Move test insto class --- tests/test_dataconverter.py | 122 +++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 59 deletions(-) diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index 49f1438e7..2884c10a1 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -7,65 +7,69 @@ @pytest.fixture -def sample_time_index(request): - index = pd.date_range('2024-01-01', periods=5, freq='D', name='time') - return index - - -def test_scalar_conversion(sample_time_index): - # Test scalar conversion - result = DataConverter.as_dataarray(42, sample_time_index) - assert isinstance(result, xr.DataArray) - assert result.shape == (len(sample_time_index),) - assert result.dims == ('time',) - assert np.all(result.values == 42) - - -def test_series_conversion(sample_time_index): - series = pd.Series([1, 2, 3, 4, 5], index=sample_time_index) - - # Test Series conversion - result = DataConverter.as_dataarray(series, sample_time_index) - assert isinstance(result, xr.DataArray) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values, series.values) - - -def test_dataframe_conversion(sample_time_index): - # Create a single-column DataFrame - df = pd.DataFrame({'A': [1, 2, 3, 4, 5]}, index=sample_time_index) - - # Test DataFrame conversion - result = DataConverter.as_dataarray(df, sample_time_index) - assert isinstance(result, xr.DataArray) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values.flatten(), df['A'].values) - - -def test_ndarray_conversion(sample_time_index): - # Test 1D array conversion - arr_1d = np.array([1, 2, 3, 4, 5]) - result = DataConverter.as_dataarray(arr_1d, sample_time_index) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values, arr_1d) - - -def test_dataarray_conversion(sample_time_index): - # Create a DataArray - original = xr.DataArray(data=np.array([1, 2, 3, 4, 5]), coords={'time': sample_time_index}, dims=['time']) - - # Test DataArray conversion - result = DataConverter.as_dataarray(original, sample_time_index) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values, original.values) - - # Ensure it's a copy - result[0] = 999 - assert original[0].item() == 1 # Original should be unchanged +def sample_time_index(): + return pd.date_range('2024-01-01', periods=5, freq='D', name='time') + + + + +class TestSingleDimensionConversion: + """Tests for converting data without scenarios (1D: time only)""" + + def test_scalar_conversion(self, sample_time_index): + # Test scalar conversion + result = DataConverter.as_dataarray(42, sample_time_index) + assert isinstance(result, xr.DataArray) + assert result.shape == (len(sample_time_index),) + assert result.dims == ('time',) + assert np.all(result.values == 42) + + def test_series_conversion(self, sample_time_index): + series = pd.Series([1, 2, 3, 4, 5], index=sample_time_index) + + # Test Series conversion + result = DataConverter.as_dataarray(series, sample_time_index) + assert isinstance(result, xr.DataArray) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, series.values) + + def test_dataframe_conversion(self, sample_time_index): + # Create a single-column DataFrame + df = pd.DataFrame({'A': [1, 2, 3, 4, 5]}, index=sample_time_index) + + # Test DataFrame conversion + result = DataConverter.as_dataarray(df, sample_time_index) + assert isinstance(result, xr.DataArray) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values.flatten(), df['A'].values) + + def test_ndarray_conversion(self, sample_time_index): + # Test 1D array conversion + arr_1d = np.array([1, 2, 3, 4, 5]) + result = DataConverter.as_dataarray(arr_1d, sample_time_index) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, arr_1d) + + def test_dataarray_conversion(self, sample_time_index): + # Create a DataArray + original = xr.DataArray( + data=np.array([1, 2, 3, 4, 5]), + coords={'time': sample_time_index}, + dims=['time'] + ) + + # Test DataArray conversion + result = DataConverter.as_dataarray(original, sample_time_index) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, original.values) + + # Ensure it's a copy + result[0] = 999 + assert original[0].item() == 1 # Original should be unchanged def test_invalid_inputs(sample_time_index): From 7993df7932ee18033cdf242331c679027b15ed59 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 13:13:21 +0200 Subject: [PATCH 09/55] Improve DataConverter --- flixopt/core.py | 308 +++++++++++++++++++++++++++++++----- tests/test_dataconverter.py | 244 ++++++++++++++++++++++++++++ 2 files changed, 509 insertions(+), 43 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index acf528192..4fd98a3e7 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -40,67 +40,289 @@ class ConversionError(Exception): class DataConverter: """ - Converts various data types into xarray.DataArray with a timesteps index. - - Supports: scalars, arrays, Series, DataFrames, and DataArrays. + Converts various data types into xarray.DataArray with timesteps and optional scenarios dimensions. + + Supports: + - Scalar values (broadcast to all timesteps/scenarios) + - 1D arrays (mapped to timesteps, broadcast to scenarios if provided) + - 2D arrays (mapped to scenarios × timesteps if dimensions match) + - Series with time index (broadcast to scenarios if provided) + - DataFrames with time index and a single column (broadcast to scenarios if provided) + - Series/DataFrames with MultiIndex (scenario, time) + - Existing DataArrays """ @staticmethod - def as_dataarray(data: NumericData, timesteps: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None) -> xr.DataArray: - """Convert data to xarray.DataArray with specified timesteps index.""" - if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0: - raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}') - if not timesteps.name == 'time': - raise ConversionError(f'DatetimeIndex is not named correctly. Must be named "time", got {timesteps.name=}') + def as_dataarray( + data: NumericData, timesteps: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None + ) -> xr.DataArray: + """ + Convert data to xarray.DataArray with specified timesteps and optional scenarios dimensions. - if scenarios is not None: - if not isinstance(scenarios, pd.Index) or len(scenarios) == 0: - raise ValueError(f'Scenarios must be a non-empty Index, got {type(scenarios).__name__}') - if not scenarios.name == 'scenario': - raise ConversionError(f'Scenarios Index is not named correctly. Must be named "scenario", got {scenarios.name=}') + Args: + data: The data to convert (scalar, array, Series, DataFrame, or DataArray) + timesteps: DatetimeIndex representing the time dimension (must be named 'time') + scenarios: Optional Index representing scenarios (must be named 'scenario') + + Returns: + DataArray with the converted data + Raises: + ValueError: If timesteps or scenarios are invalid + ConversionError: If the data cannot be converted to the expected dimensions + """ + # Validate inputs + DataConverter._validate_timesteps(timesteps) if scenarios is not None: - coords = [scenarios, timesteps] - dims = ['scenario', 'time'] - expected_shape = (len(scenarios), len(timesteps)) - else: - coords = [timesteps] - dims = ['time'] - expected_shape = (len(timesteps),) + DataConverter._validate_scenarios(scenarios) + + # Determine dimensions and coordinates + coords, dims, expected_shape = DataConverter._get_dimensions(timesteps, scenarios) try: + # Convert different data types using specialized methods if isinstance(data, (int, float, np.integer, np.floating)): - return xr.DataArray(data, coords=coords, dims=dims) + return DataConverter._convert_scalar(data, coords, dims) + elif isinstance(data, pd.DataFrame): - if not data.index.equals(timesteps): - raise ConversionError("DataFrame index doesn't match timesteps index") - if not len(data.columns) == 1: - raise ConversionError('DataFrame must have exactly one column') - return xr.DataArray(data.values.flatten(), coords=coords, dims=dims) + return DataConverter._convert_dataframe(data, timesteps, scenarios, coords, dims) + elif isinstance(data, pd.Series): - if not data.index.equals(timesteps): - raise ConversionError("Series index doesn't match timesteps index") - return xr.DataArray(data.values, coords=coords, dims=dims) + return DataConverter._convert_series(data, timesteps, scenarios, coords, dims) + elif isinstance(data, np.ndarray): - if data.ndim != 1: - raise ConversionError(f'Array must be 1-dimensional, got {data.ndim}') - elif data.shape[0] != expected_shape[0]: - raise ConversionError(f"Array shape {data.shape} doesn't match expected {expected_shape}") - return xr.DataArray(data, coords=coords, dims=dims) + return DataConverter._convert_ndarray(data, timesteps, scenarios, coords, dims, expected_shape) + elif isinstance(data, xr.DataArray): - if data.dims != tuple(dims): - raise ConversionError(f"DataArray dimensions {data.dims} don't match expected {dims}") - if data.sizes[dims[0]] != len(coords[0]): - raise ConversionError( - f"DataArray length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}" - ) - return data.copy(deep=True) + return DataConverter._convert_dataarray(data, timesteps, scenarios, coords, dims) + else: raise ConversionError(f'Unsupported type: {type(data).__name__}') + except Exception as e: if isinstance(e, ConversionError): raise - raise ConversionError(f'Converting data {type(data)} to xarray.Dataset raised an error: {str(e)}') from e + raise ConversionError(f'Converting {type(data)} to DataArray raised an error: {str(e)}') from e + + @staticmethod + def _validate_timesteps(timesteps: pd.DatetimeIndex) -> None: + """Validate that timesteps is a properly named non-empty DatetimeIndex.""" + if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0: + raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}') + if timesteps.name != 'time': + raise ConversionError(f'DatetimeIndex must be named "time", got {timesteps.name=}') + + @staticmethod + def _validate_scenarios(scenarios: pd.Index) -> None: + """Validate that scenarios is a properly named non-empty Index.""" + if not isinstance(scenarios, pd.Index) or len(scenarios) == 0: + raise ValueError(f'Scenarios must be a non-empty Index, got {type(scenarios).__name__}') + if scenarios.name != 'scenario': + raise ConversionError(f'Scenarios Index must be named "scenario", got {scenarios.name=}') + + @staticmethod + def _get_dimensions(timesteps: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None): + """Create the coordinates, dimensions, and expected shape for the output DataArray.""" + if scenarios is not None: + coords = {'scenario': scenarios, 'time': timesteps} + dims = ('scenario', 'time') + expected_shape = (len(scenarios), len(timesteps)) + else: + coords = {'time': timesteps} + dims = ('time',) + expected_shape = (len(timesteps),) + + return coords, dims, expected_shape + + @staticmethod + def _convert_scalar(data, coords, dims): + """Convert a scalar value to a DataArray.""" + return xr.DataArray(data, coords=coords, dims=dims) + + @staticmethod + def _convert_dataframe(df, timesteps, scenarios, coords, dims): + """Convert a DataFrame to a DataArray.""" + # Case 1: DataFrame with MultiIndex (scenario, time) + if ( + isinstance(df.index, pd.MultiIndex) + and len(df.index.names) == 2 + and 'scenario' in df.index.names + and 'time' in df.index.names + and scenarios is not None + ): + return DataConverter._convert_multi_index_dataframe(df, timesteps, scenarios, coords, dims) + + # Case 2: Standard DataFrame with time index + elif not isinstance(df.index, pd.MultiIndex): + return DataConverter._convert_standard_dataframe(df, timesteps, scenarios, coords, dims) + + else: + raise ConversionError('Unsupported DataFrame index structure') + + @staticmethod + def _convert_multi_index_dataframe(df, timesteps, scenarios, coords, dims): + """Convert a DataFrame with MultiIndex (scenario, time) to a DataArray.""" + # Validate that the index contains the expected values + if not set(df.index.get_level_values('time')).issubset(set(timesteps)): + raise ConversionError("DataFrame time index doesn't match or isn't a subset of timesteps") + if not set(df.index.get_level_values('scenario')).issubset(set(scenarios)): + raise ConversionError("DataFrame scenario index doesn't match or isn't a subset of scenarios") + + # Ensure single column + if len(df.columns) != 1: + raise ConversionError('DataFrame must have exactly one column') + + # Reindex to ensure complete coverage and correct order + multi_idx = pd.MultiIndex.from_product([scenarios, timesteps], names=['scenario', 'time']) + reindexed = df.reindex(multi_idx).iloc[:, 0] + + # Reshape to 2D array + reshaped = reindexed.values.reshape(len(scenarios), len(timesteps)) + return xr.DataArray(reshaped, coords=coords, dims=dims) + + @staticmethod + def _convert_standard_dataframe(df, timesteps, scenarios, coords, dims): + """Convert a standard DataFrame with time index to a DataArray.""" + if not df.index.equals(timesteps): + raise ConversionError("DataFrame index doesn't match timesteps index") + if len(df.columns) != 1: + raise ConversionError('DataFrame must have exactly one column') + + # Get values + values = df.values.flatten() + + if scenarios is not None: + # Broadcast to scenarios dimension + values = np.tile(values, (len(scenarios), 1)) + + return xr.DataArray(values, coords=coords, dims=dims) + + @staticmethod + def _convert_series(series, timesteps, scenarios, coords, dims): + """Convert a Series to a DataArray.""" + # Case 1: Series with MultiIndex (scenario, time) + if ( + isinstance(series.index, pd.MultiIndex) + and len(series.index.names) == 2 + and 'scenario' in series.index.names + and 'time' in series.index.names + and scenarios is not None + ): + return DataConverter._convert_multi_index_series(series, timesteps, scenarios, coords, dims) + + # Case 2: Standard Series with time index + elif not isinstance(series.index, pd.MultiIndex): + return DataConverter._convert_standard_series(series, timesteps, scenarios, coords, dims) + + else: + raise ConversionError('Unsupported Series index structure') + + @staticmethod + def _convert_multi_index_series(series, timesteps, scenarios, coords, dims): + """Convert a Series with MultiIndex (scenario, time) to a DataArray.""" + # Validate that the index contains the expected values + if not set(series.index.get_level_values('time')).issubset(set(timesteps)): + raise ConversionError("Series time index doesn't match or isn't a subset of timesteps") + if not set(series.index.get_level_values('scenario')).issubset(set(scenarios)): + raise ConversionError("Series scenario index doesn't match or isn't a subset of scenarios") + + # Reindex to ensure complete coverage and correct order + multi_idx = pd.MultiIndex.from_product([scenarios, timesteps], names=['scenario', 'time']) + reindexed = series.reindex(multi_idx) + + # Reshape to 2D array + reshaped = reindexed.values.reshape(len(scenarios), len(timesteps)) + return xr.DataArray(reshaped, coords=coords, dims=dims) + + @staticmethod + def _convert_standard_series(series, timesteps, scenarios, coords, dims): + """Convert a standard Series with time index to a DataArray.""" + if not series.index.equals(timesteps): + raise ConversionError("Series index doesn't match timesteps index") + + # Get values + values = series.values + + if scenarios is not None: + # Broadcast to scenarios dimension + values = np.tile(values, (len(scenarios), 1)) + + return xr.DataArray(values, coords=coords, dims=dims) + + @staticmethod + def _convert_ndarray(arr, timesteps, scenarios, coords, dims, expected_shape): + """Convert a numpy array to a DataArray.""" + # Case 1: With scenarios - array can be 1D or 2D + if scenarios is not None: + return DataConverter._convert_ndarray_with_scenarios( + arr, timesteps, scenarios, coords, dims, expected_shape + ) + + # Case 2: Without scenarios - array must be 1D + else: + return DataConverter._convert_ndarray_without_scenarios(arr, timesteps, coords, dims) + + @staticmethod + def _convert_ndarray_with_scenarios(arr, timesteps, scenarios, coords, dims, expected_shape): + """Convert a numpy array to a DataArray with scenarios dimension.""" + if arr.ndim == 1: + # 1D array should match timesteps and be broadcast to scenarios + if arr.shape[0] != len(timesteps): + raise ConversionError(f"1D array length {arr.shape[0]} doesn't match timesteps length {len(timesteps)}") + # Broadcast to scenarios + values = np.tile(arr, (len(scenarios), 1)) + return xr.DataArray(values, coords=coords, dims=dims) + + elif arr.ndim == 2: + # 2D array should match (scenarios, timesteps) + if arr.shape != expected_shape: + raise ConversionError(f"2D array shape {arr.shape} doesn't match expected shape {expected_shape}") + return xr.DataArray(arr, coords=coords, dims=dims) + + else: + raise ConversionError(f'Array must be 1D or 2D, got {arr.ndim}D') + + @staticmethod + def _convert_ndarray_without_scenarios(arr, timesteps, coords, dims): + """Convert a numpy array to a DataArray without scenarios dimension.""" + if arr.ndim != 1: + raise ConversionError(f'Without scenarios, array must be 1D, got {arr.ndim}D') + if arr.shape[0] != len(timesteps): + raise ConversionError(f"Array shape {arr.shape} doesn't match expected length {len(timesteps)}") + return xr.DataArray(arr, coords=coords, dims=dims) + + @staticmethod + def _convert_dataarray(da, timesteps, scenarios, coords, dims): + """Convert an existing DataArray to a new DataArray with the desired dimensions.""" + # Case 1: DataArray with only time dimension when scenarios are provided + if scenarios is not None and set(da.dims) == {'time'}: + return DataConverter._broadcast_time_only_dataarray(da, timesteps, scenarios, coords, dims) + + # Case 2: DataArray dimensions should match expected + elif set(da.dims) != set(dims): + raise ConversionError(f"DataArray dimensions {da.dims} don't match expected {dims}") + + # Validate dimensions sizes + for dim in dims: + if dim in da.dims and da.sizes[dim] != len(coords[dim]): + raise ConversionError( + f"DataArray dimension '{dim}' length {da.sizes[dim]} doesn't match expected {len(coords[dim])}" + ) + + # Create a new DataArray with our coordinates to ensure consistency + result = xr.DataArray(da.values, coords=coords, dims=dims) + return result + + @staticmethod + def _broadcast_time_only_dataarray(da, timesteps, scenarios, coords, dims): + """Broadcast a time-only DataArray to include the scenarios dimension.""" + # Ensure the time dimension is compatible + if not set(da.coords['time'].values).issubset(set(timesteps)): + raise ConversionError("DataArray time coordinates aren't compatible with timesteps") + + # Broadcast to scenarios + values = np.tile(da.values, (len(scenarios), 1)) + return xr.DataArray(values, coords=coords, dims=dims) class TimeSeriesData: diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index 2884c10a1..19f9c47e5 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -11,6 +11,9 @@ def sample_time_index(): return pd.date_range('2024-01-01', periods=5, freq='D', name='time') +@pytest.fixture +def sample_scenario_index(): + return pd.Index(['baseline', 'high_demand', 'low_price'], name='scenario') class TestSingleDimensionConversion: @@ -72,6 +75,247 @@ def test_dataarray_conversion(self, sample_time_index): assert original[0].item() == 1 # Original should be unchanged +class TestMultiDimensionConversion: + """Tests for converting data with scenarios (2D: scenario × time)""" + + def test_scalar_with_scenarios(self, sample_time_index, sample_scenario_index): + # Convert scalar with scenario dimension + result = DataConverter.as_dataarray(42, sample_time_index, sample_scenario_index) + + assert isinstance(result, xr.DataArray) + assert result.shape == (len(sample_scenario_index), len(sample_time_index)) + assert result.dims == ('scenario', 'time') + assert np.all(result.values == 42) + assert set(result.coords['scenario'].values) == set(sample_scenario_index.values) + assert set(result.coords['time'].values) == set(sample_time_index.values) + + def test_series_with_scenarios(self, sample_time_index, sample_scenario_index): + # Create time series data + series = pd.Series([1, 2, 3, 4, 5], index=sample_time_index) + + # Convert with scenario dimension + result = DataConverter.as_dataarray(series, sample_time_index, sample_scenario_index) + + assert result.shape == (len(sample_scenario_index), len(sample_time_index)) + assert result.dims == ('scenario', 'time') + + # Values should be broadcast to all scenarios + for scenario in sample_scenario_index: + scenario_slice = result.sel(scenario=scenario) + assert np.array_equal(scenario_slice.values, series.values) + + def test_dataframe_with_scenarios(self, sample_time_index, sample_scenario_index): + # Create a single-column DataFrame + df = pd.DataFrame({'A': [1, 2, 3, 4, 5]}, index=sample_time_index) + + # Convert with scenario dimension + result = DataConverter.as_dataarray(df, sample_time_index, sample_scenario_index) + + assert result.shape == (len(sample_scenario_index), len(sample_time_index)) + assert result.dims == ('scenario', 'time') + + # Values should be broadcast to all scenarios + for scenario in sample_scenario_index: + scenario_slice = result.sel(scenario=scenario) + assert np.array_equal(scenario_slice.values, df['A'].values) + + def test_ndarray_with_scenarios(self, sample_time_index, sample_scenario_index): + # Test multi-scenario array conversion + # For multi-dimensional, the first dimension should match number of scenarios + arr_2d = np.array( + [ + [1, 2, 3, 4, 5], # baseline scenario + [6, 7, 8, 9, 10], # high_demand scenario + [11, 12, 13, 14, 15], # low_price scenario + ] + ) + + result = DataConverter.as_dataarray(arr_2d, sample_time_index, sample_scenario_index) + + assert result.shape == (3, 5) + assert result.dims == ('scenario', 'time') + + # Check that each scenario has correct values + assert np.array_equal(result.sel(scenario='baseline').values, arr_2d[0]) + assert np.array_equal(result.sel(scenario='high_demand').values, arr_2d[1]) + assert np.array_equal(result.sel(scenario='low_price').values, arr_2d[2]) + + def test_dataarray_with_scenarios(self, sample_time_index, sample_scenario_index): + # Create a multi-scenario DataArray + original = xr.DataArray( + data=np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]), + coords={'scenario': sample_scenario_index, 'time': sample_time_index}, + dims=['scenario', 'time'], + ) + + # Test conversion + result = DataConverter.as_dataarray(original, sample_time_index, sample_scenario_index) + + assert result.shape == (3, 5) + assert result.dims == ('scenario', 'time') + assert np.array_equal(result.values, original.values) + + # Ensure it's a copy + result.loc['baseline'] = 999 + assert original.sel(scenario='baseline')[0].item() == 1 # Original should be unchanged + + +class TestInvalidInputs: + """Tests for invalid inputs and error handling""" + + def test_time_index_validation(self): + # Test with unnamed index + unnamed_index = pd.date_range('2024-01-01', periods=5, freq='D') + with pytest.raises(ConversionError): + DataConverter.as_dataarray(42, unnamed_index) + + # Test with empty index + empty_index = pd.DatetimeIndex([], name='time') + with pytest.raises(ValueError): + DataConverter.as_dataarray(42, empty_index) + + # Test with non-DatetimeIndex + wrong_type_index = pd.Index([1, 2, 3, 4, 5], name='time') + with pytest.raises(ValueError): + DataConverter.as_dataarray(42, wrong_type_index) + + def test_scenario_index_validation(self, sample_time_index): + # Test with unnamed scenario index + unnamed_index = pd.Index(['baseline', 'high_demand']) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(42, sample_time_index, unnamed_index) + + # Test with empty scenario index + empty_index = pd.Index([], name='scenario') + with pytest.raises(ValueError): + DataConverter.as_dataarray(42, sample_time_index, empty_index) + + def test_invalid_data_types(self, sample_time_index, sample_scenario_index): + # Test invalid input type (string) + with pytest.raises(ConversionError): + DataConverter.as_dataarray('invalid_string', sample_time_index) + + # Test invalid input type with scenarios + with pytest.raises(ConversionError): + DataConverter.as_dataarray('invalid_string', sample_time_index, sample_scenario_index) + + # Test unsupported complex object + with pytest.raises(ConversionError): + DataConverter.as_dataarray(object(), sample_time_index) + + def test_mismatched_input_dimensions(self, sample_time_index, sample_scenario_index): + # Test mismatched Series index + mismatched_series = pd.Series( + [1, 2, 3, 4, 5, 6], + index=pd.date_range('2025-01-01', periods=6, freq='D', name='time') + ) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(mismatched_series, sample_time_index) + + # Test DataFrame with multiple columns + df_multi_col = pd.DataFrame( + {'A': [1, 2, 3, 4, 5], 'B': [6, 7, 8, 9, 10]}, + index=sample_time_index + ) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(df_multi_col, sample_time_index) + + # Test mismatched array shape for time-only + with pytest.raises(ConversionError): + DataConverter.as_dataarray(np.array([1, 2, 3]), sample_time_index) # Wrong length + + # Test mismatched array shape for scenario × time + # Array shape should be (n_scenarios, n_timesteps) + wrong_shape_array = np.array([ + [1, 2, 3, 4], # Missing a timestep + [5, 6, 7, 8], + [9, 10, 11, 12] + ]) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(wrong_shape_array, sample_time_index, sample_scenario_index) + + # Test array with too many dimensions + with pytest.raises(ConversionError): + # 3D array not allowed + DataConverter.as_dataarray(np.ones((3, 5, 2)), sample_time_index, sample_scenario_index) + + def test_dataarray_dimension_mismatch(self, sample_time_index, sample_scenario_index): + # Create DataArray with wrong dimensions + wrong_dims = xr.DataArray( + data=np.array([1, 2, 3, 4, 5]), + coords={'wrong_dim': range(5)}, + dims=['wrong_dim'] + ) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(wrong_dims, sample_time_index) + + # Create DataArray with scenario but no time + wrong_dims_2 = xr.DataArray( + data=np.array([1, 2, 3]), + coords={'scenario': ['a', 'b', 'c']}, + dims=['scenario'] + ) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(wrong_dims_2, sample_time_index, sample_scenario_index) + + +class TestEdgeCases: + """Tests for edge cases and special scenarios""" + + def test_single_timestep(self, sample_scenario_index): + # Test with only one timestep + single_timestep = pd.DatetimeIndex(['2024-01-01'], name='time') + + # Scalar conversion + result = DataConverter.as_dataarray(42, single_timestep) + assert result.shape == (1,) + assert result.dims == ('time',) + + # With scenarios + result_with_scenarios = DataConverter.as_dataarray(42, single_timestep, sample_scenario_index) + assert result_with_scenarios.shape == (len(sample_scenario_index), 1) + assert result_with_scenarios.dims == ('scenario', 'time') + + def test_single_scenario(self, sample_time_index): + # Test with only one scenario + single_scenario = pd.Index(['baseline'], name='scenario') + + # Scalar conversion with single scenario + result = DataConverter.as_dataarray(42, sample_time_index, single_scenario) + assert result.shape == (1, len(sample_time_index)) + assert result.dims == ('scenario', 'time') + + # Array conversion with single scenario + arr = np.array([1, 2, 3, 4, 5]) + result_arr = DataConverter.as_dataarray(arr, sample_time_index, single_scenario) + assert result_arr.shape == (1, 5) + assert np.array_equal(result_arr.sel(scenario='baseline').values, arr) + + def test_different_scenario_order(self, sample_time_index): + # Test that scenario order is preserved + scenarios1 = pd.Index(['a', 'b', 'c'], name='scenario') + scenarios2 = pd.Index(['c', 'b', 'a'], name='scenario') + + # Create DataArray with first order + data = np.array([ + [1, 2, 3, 4, 5], # a + [6, 7, 8, 9, 10], # b + [11, 12, 13, 14, 15] # c + ]) + + result1 = DataConverter.as_dataarray(data, sample_time_index, scenarios1) + assert np.array_equal(result1.sel(scenario='a').values, [1, 2, 3, 4, 5]) + assert np.array_equal(result1.sel(scenario='c').values, [11, 12, 13, 14, 15]) + + # Create DataArray with second order + result2 = DataConverter.as_dataarray(data, sample_time_index, scenarios2) + # First row should match 'c' now + assert np.array_equal(result2.sel(scenario='c').values, [1, 2, 3, 4, 5]) + # Last row should match 'a' now + assert np.array_equal(result2.sel(scenario='a').values, [11, 12, 13, 14, 15]) + + + def test_invalid_inputs(sample_time_index): # Test invalid input type with pytest.raises(ConversionError): From ccaac59526c4fac81b0db294998bd892c6eaf7b5 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 13:17:50 +0200 Subject: [PATCH 10/55] Improve DataConverter --- flixopt/core.py | 332 +++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 303 insertions(+), 29 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 4fd98a3e7..bc5ca2d82 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -106,7 +106,16 @@ def as_dataarray( @staticmethod def _validate_timesteps(timesteps: pd.DatetimeIndex) -> None: - """Validate that timesteps is a properly named non-empty DatetimeIndex.""" + """ + Validate that timesteps is a properly named non-empty DatetimeIndex. + + Args: + timesteps: The DatetimeIndex to validate + + Raises: + ValueError: If timesteps is not a non-empty DatetimeIndex + ConversionError: If timesteps is not named 'time' + """ if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0: raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}') if timesteps.name != 'time': @@ -114,15 +123,38 @@ def _validate_timesteps(timesteps: pd.DatetimeIndex) -> None: @staticmethod def _validate_scenarios(scenarios: pd.Index) -> None: - """Validate that scenarios is a properly named non-empty Index.""" + """ + Validate that scenarios is a properly named non-empty Index. + + Args: + scenarios: The Index to validate + + Raises: + ValueError: If scenarios is not a non-empty Index + ConversionError: If scenarios is not named 'scenario' + """ if not isinstance(scenarios, pd.Index) or len(scenarios) == 0: raise ValueError(f'Scenarios must be a non-empty Index, got {type(scenarios).__name__}') if scenarios.name != 'scenario': raise ConversionError(f'Scenarios Index must be named "scenario", got {scenarios.name=}') @staticmethod - def _get_dimensions(timesteps: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None): - """Create the coordinates, dimensions, and expected shape for the output DataArray.""" + def _get_dimensions( + timesteps: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None + ) -> Tuple[Dict[str, pd.Index], Tuple[str, ...], Tuple[int, ...]]: + """ + Create the coordinates, dimensions, and expected shape for the output DataArray. + + Args: + timesteps: The time index + scenarios: Optional scenario index + + Returns: + Tuple containing: + - Dict mapping dimension names to coordinate indexes + - Tuple of dimension names + - Tuple of expected shape + """ if scenarios is not None: coords = {'scenario': scenarios, 'time': timesteps} dims = ('scenario', 'time') @@ -135,13 +167,46 @@ def _get_dimensions(timesteps: pd.DatetimeIndex, scenarios: Optional[pd.Index] = return coords, dims, expected_shape @staticmethod - def _convert_scalar(data, coords, dims): - """Convert a scalar value to a DataArray.""" + def _convert_scalar( + data: Union[int, float, np.integer, np.floating], coords: Dict[str, pd.Index], dims: Tuple[str, ...] + ) -> xr.DataArray: + """ + Convert a scalar value to a DataArray. + + Args: + data: The scalar value to convert + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + + Returns: + DataArray with the scalar value broadcast to all coordinates + """ return xr.DataArray(data, coords=coords, dims=dims) @staticmethod - def _convert_dataframe(df, timesteps, scenarios, coords, dims): - """Convert a DataFrame to a DataArray.""" + def _convert_dataframe( + df: pd.DataFrame, + timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index], + coords: Dict[str, pd.Index], + dims: Tuple[str, ...], + ) -> xr.DataArray: + """ + Convert a DataFrame to a DataArray. + + Args: + df: The DataFrame to convert + timesteps: The time index + scenarios: Optional scenario index + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + + Returns: + DataArray created from the DataFrame + + Raises: + ConversionError: If the DataFrame cannot be converted to the expected dimensions + """ # Case 1: DataFrame with MultiIndex (scenario, time) if ( isinstance(df.index, pd.MultiIndex) @@ -157,11 +222,32 @@ def _convert_dataframe(df, timesteps, scenarios, coords, dims): return DataConverter._convert_standard_dataframe(df, timesteps, scenarios, coords, dims) else: - raise ConversionError('Unsupported DataFrame index structure') + raise ConversionError(f'Unsupported DataFrame index structure: {df}') @staticmethod - def _convert_multi_index_dataframe(df, timesteps, scenarios, coords, dims): - """Convert a DataFrame with MultiIndex (scenario, time) to a DataArray.""" + def _convert_multi_index_dataframe( + df: pd.DataFrame, + timesteps: pd.DatetimeIndex, + scenarios: pd.Index, + coords: Dict[str, pd.Index], + dims: Tuple[str, ...], + ) -> xr.DataArray: + """ + Convert a DataFrame with MultiIndex (scenario, time) to a DataArray. + + Args: + df: The DataFrame with MultiIndex to convert + timesteps: The time index + scenarios: The scenario index + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + + Returns: + DataArray created from the MultiIndex DataFrame + + Raises: + ConversionError: If the DataFrame's index doesn't match expected or has multiple columns + """ # Validate that the index contains the expected values if not set(df.index.get_level_values('time')).issubset(set(timesteps)): raise ConversionError("DataFrame time index doesn't match or isn't a subset of timesteps") @@ -181,8 +267,29 @@ def _convert_multi_index_dataframe(df, timesteps, scenarios, coords, dims): return xr.DataArray(reshaped, coords=coords, dims=dims) @staticmethod - def _convert_standard_dataframe(df, timesteps, scenarios, coords, dims): - """Convert a standard DataFrame with time index to a DataArray.""" + def _convert_standard_dataframe( + df: pd.DataFrame, + timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index], + coords: Dict[str, pd.Index], + dims: Tuple[str, ...], + ) -> xr.DataArray: + """ + Convert a standard DataFrame with time index to a DataArray. + + Args: + df: The DataFrame to convert + timesteps: The time index + scenarios: Optional scenario index + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + + Returns: + DataArray created from the DataFrame + + Raises: + ConversionError: If the DataFrame's index doesn't match timesteps or has multiple columns + """ if not df.index.equals(timesteps): raise ConversionError("DataFrame index doesn't match timesteps index") if len(df.columns) != 1: @@ -198,8 +305,29 @@ def _convert_standard_dataframe(df, timesteps, scenarios, coords, dims): return xr.DataArray(values, coords=coords, dims=dims) @staticmethod - def _convert_series(series, timesteps, scenarios, coords, dims): - """Convert a Series to a DataArray.""" + def _convert_series( + series: pd.Series, + timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index], + coords: Dict[str, pd.Index], + dims: Tuple[str, ...], + ) -> xr.DataArray: + """ + Convert a Series to a DataArray. + + Args: + series: The Series to convert + timesteps: The time index + scenarios: Optional scenario index + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + + Returns: + DataArray created from the Series + + Raises: + ConversionError: If the Series cannot be converted to the expected dimensions + """ # Case 1: Series with MultiIndex (scenario, time) if ( isinstance(series.index, pd.MultiIndex) @@ -218,8 +346,29 @@ def _convert_series(series, timesteps, scenarios, coords, dims): raise ConversionError('Unsupported Series index structure') @staticmethod - def _convert_multi_index_series(series, timesteps, scenarios, coords, dims): - """Convert a Series with MultiIndex (scenario, time) to a DataArray.""" + def _convert_multi_index_series( + series: pd.Series, + timesteps: pd.DatetimeIndex, + scenarios: pd.Index, + coords: Dict[str, pd.Index], + dims: Tuple[str, ...], + ) -> xr.DataArray: + """ + Convert a Series with MultiIndex (scenario, time) to a DataArray. + + Args: + series: The Series with MultiIndex to convert + timesteps: The time index + scenarios: The scenario index + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + + Returns: + DataArray created from the MultiIndex Series + + Raises: + ConversionError: If the Series' index doesn't match expected + """ # Validate that the index contains the expected values if not set(series.index.get_level_values('time')).issubset(set(timesteps)): raise ConversionError("Series time index doesn't match or isn't a subset of timesteps") @@ -235,8 +384,29 @@ def _convert_multi_index_series(series, timesteps, scenarios, coords, dims): return xr.DataArray(reshaped, coords=coords, dims=dims) @staticmethod - def _convert_standard_series(series, timesteps, scenarios, coords, dims): - """Convert a standard Series with time index to a DataArray.""" + def _convert_standard_series( + series: pd.Series, + timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index], + coords: Dict[str, pd.Index], + dims: Tuple[str, ...], + ) -> xr.DataArray: + """ + Convert a standard Series with time index to a DataArray. + + Args: + series: The Series to convert + timesteps: The time index + scenarios: Optional scenario index + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + + Returns: + DataArray created from the Series + + Raises: + ConversionError: If the Series' index doesn't match timesteps + """ if not series.index.equals(timesteps): raise ConversionError("Series index doesn't match timesteps index") @@ -250,8 +420,31 @@ def _convert_standard_series(series, timesteps, scenarios, coords, dims): return xr.DataArray(values, coords=coords, dims=dims) @staticmethod - def _convert_ndarray(arr, timesteps, scenarios, coords, dims, expected_shape): - """Convert a numpy array to a DataArray.""" + def _convert_ndarray( + arr: np.ndarray, + timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index], + coords: Dict[str, pd.Index], + dims: Tuple[str, ...], + expected_shape: Tuple[int, ...], + ) -> xr.DataArray: + """ + Convert a numpy array to a DataArray. + + Args: + arr: The numpy array to convert + timesteps: The time index + scenarios: Optional scenario index + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + expected_shape: Expected shape of the resulting array + + Returns: + DataArray created from the numpy array + + Raises: + ConversionError: If the array cannot be converted to the expected dimensions + """ # Case 1: With scenarios - array can be 1D or 2D if scenarios is not None: return DataConverter._convert_ndarray_with_scenarios( @@ -263,8 +456,31 @@ def _convert_ndarray(arr, timesteps, scenarios, coords, dims, expected_shape): return DataConverter._convert_ndarray_without_scenarios(arr, timesteps, coords, dims) @staticmethod - def _convert_ndarray_with_scenarios(arr, timesteps, scenarios, coords, dims, expected_shape): - """Convert a numpy array to a DataArray with scenarios dimension.""" + def _convert_ndarray_with_scenarios( + arr: np.ndarray, + timesteps: pd.DatetimeIndex, + scenarios: pd.Index, + coords: Dict[str, pd.Index], + dims: Tuple[str, ...], + expected_shape: Tuple[int, ...], + ) -> xr.DataArray: + """ + Convert a numpy array to a DataArray with scenarios dimension. + + Args: + arr: The numpy array to convert + timesteps: The time index + scenarios: The scenario index + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + expected_shape: Expected shape (scenarios, timesteps) + + Returns: + DataArray created from the numpy array + + Raises: + ConversionError: If the array dimensions don't match expected + """ if arr.ndim == 1: # 1D array should match timesteps and be broadcast to scenarios if arr.shape[0] != len(timesteps): @@ -283,8 +499,24 @@ def _convert_ndarray_with_scenarios(arr, timesteps, scenarios, coords, dims, exp raise ConversionError(f'Array must be 1D or 2D, got {arr.ndim}D') @staticmethod - def _convert_ndarray_without_scenarios(arr, timesteps, coords, dims): - """Convert a numpy array to a DataArray without scenarios dimension.""" + def _convert_ndarray_without_scenarios( + arr: np.ndarray, timesteps: pd.DatetimeIndex, coords: Dict[str, pd.Index], dims: Tuple[str, ...] + ) -> xr.DataArray: + """ + Convert a numpy array to a DataArray without scenarios dimension. + + Args: + arr: The numpy array to convert + timesteps: The time index + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + + Returns: + DataArray created from the numpy array + + Raises: + ConversionError: If the array isn't 1D or doesn't match timesteps length + """ if arr.ndim != 1: raise ConversionError(f'Without scenarios, array must be 1D, got {arr.ndim}D') if arr.shape[0] != len(timesteps): @@ -292,8 +524,29 @@ def _convert_ndarray_without_scenarios(arr, timesteps, coords, dims): return xr.DataArray(arr, coords=coords, dims=dims) @staticmethod - def _convert_dataarray(da, timesteps, scenarios, coords, dims): - """Convert an existing DataArray to a new DataArray with the desired dimensions.""" + def _convert_dataarray( + da: xr.DataArray, + timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index], + coords: Dict[str, pd.Index], + dims: Tuple[str, ...], + ) -> xr.DataArray: + """ + Convert an existing DataArray to a new DataArray with the desired dimensions. + + Args: + da: The DataArray to convert + timesteps: The time index + scenarios: Optional scenario index + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + + Returns: + New DataArray with the specified coordinates and dimensions + + Raises: + ConversionError: If the DataArray dimensions don't match expected + """ # Case 1: DataArray with only time dimension when scenarios are provided if scenarios is not None and set(da.dims) == {'time'}: return DataConverter._broadcast_time_only_dataarray(da, timesteps, scenarios, coords, dims) @@ -314,8 +567,29 @@ def _convert_dataarray(da, timesteps, scenarios, coords, dims): return result @staticmethod - def _broadcast_time_only_dataarray(da, timesteps, scenarios, coords, dims): - """Broadcast a time-only DataArray to include the scenarios dimension.""" + def _broadcast_time_only_dataarray( + da: xr.DataArray, + timesteps: pd.DatetimeIndex, + scenarios: pd.Index, + coords: Dict[str, pd.Index], + dims: Tuple[str, ...], + ) -> xr.DataArray: + """ + Broadcast a time-only DataArray to include the scenarios dimension. + + Args: + da: The DataArray with only time dimension + timesteps: The time index + scenarios: The scenario index + coords: Dictionary mapping dimension names to coordinate indexes + dims: Tuple of dimension names + + Returns: + DataArray with the data broadcast to include scenarios dimension + + Raises: + ConversionError: If the DataArray time coordinates aren't compatible with timesteps + """ # Ensure the time dimension is compatible if not set(da.coords['time'].values).issubset(set(timesteps)): raise ConversionError("DataArray time coordinates aren't compatible with timesteps") From 91a1bb89e1ed87b87a72d1573f2bb660ff701e1f Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 13:34:20 +0200 Subject: [PATCH 11/55] Improve conversion and copying --- flixopt/core.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index bc5ca2d82..91cd3ab62 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -557,13 +557,11 @@ def _convert_dataarray( # Validate dimensions sizes for dim in dims: - if dim in da.dims and da.sizes[dim] != len(coords[dim]): - raise ConversionError( - f"DataArray dimension '{dim}' length {da.sizes[dim]} doesn't match expected {len(coords[dim])}" - ) + if not np.array_equal(da.coords[dim].values, coords[dim].values): + raise ConversionError(f"DataArray dimension '{dim}' doesn't match expected {coords[dim]}") # Create a new DataArray with our coordinates to ensure consistency - result = xr.DataArray(da.values, coords=coords, dims=dims) + result = xr.DataArray(da.values.copy(), coords=coords, dims=dims) return result @staticmethod @@ -595,7 +593,7 @@ def _broadcast_time_only_dataarray( raise ConversionError("DataArray time coordinates aren't compatible with timesteps") # Broadcast to scenarios - values = np.tile(da.values, (len(scenarios), 1)) + values = np.tile(da.values.copy(), (len(scenarios), 1)) return xr.DataArray(values, coords=coords, dims=dims) From 108afd3d504ad162716862a00c6471cd8e4aa7fe Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 13:36:27 +0200 Subject: [PATCH 12/55] Improve conversion and copying --- flixopt/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flixopt/core.py b/flixopt/core.py index 91cd3ab62..9e3444b4f 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -589,7 +589,7 @@ def _broadcast_time_only_dataarray( ConversionError: If the DataArray time coordinates aren't compatible with timesteps """ # Ensure the time dimension is compatible - if not set(da.coords['time'].values).issubset(set(timesteps)): + if not np.array_equal(da.coords['time'].values, timesteps.values): raise ConversionError("DataArray time coordinates aren't compatible with timesteps") # Broadcast to scenarios From a1ce245358a718e75f1ab487d2a48cf0ec0c7ad3 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 13:39:01 +0200 Subject: [PATCH 13/55] Update tests --- tests/test_dataconverter.py | 627 +++++++++++++++++++++++++++++++++--- 1 file changed, 577 insertions(+), 50 deletions(-) diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index 19f9c47e5..2afd2c547 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -3,7 +3,7 @@ import pytest import xarray as xr -from flixopt.core import ConversionError, DataConverter # Adjust this import to match your project structure +from flixopt.core import ConversionError, DataConverter, TimeSeries # Adjust this import to match your project structure @pytest.fixture @@ -16,55 +16,102 @@ def sample_scenario_index(): return pd.Index(['baseline', 'high_demand', 'low_price'], name='scenario') +@pytest.fixture +def multi_index(sample_time_index, sample_scenario_index): + """Create a sample MultiIndex combining scenarios and times.""" + return pd.MultiIndex.from_product([sample_scenario_index, sample_time_index], names=['scenario', 'time']) + + class TestSingleDimensionConversion: - """Tests for converting data without scenarios (1D: time only)""" + """Tests for converting data without scenarios (1D: time only).""" def test_scalar_conversion(self, sample_time_index): - # Test scalar conversion + """Test converting a scalar value.""" + # Test with integer result = DataConverter.as_dataarray(42, sample_time_index) assert isinstance(result, xr.DataArray) assert result.shape == (len(sample_time_index),) assert result.dims == ('time',) assert np.all(result.values == 42) + # Test with float + result = DataConverter.as_dataarray(42.5, sample_time_index) + assert np.all(result.values == 42.5) + + # Test with numpy scalar types + result = DataConverter.as_dataarray(np.int64(42), sample_time_index) + assert np.all(result.values == 42) + result = DataConverter.as_dataarray(np.float32(42.5), sample_time_index) + assert np.all(result.values == 42.5) + def test_series_conversion(self, sample_time_index): + """Test converting a pandas Series.""" + # Test with integer values series = pd.Series([1, 2, 3, 4, 5], index=sample_time_index) - - # Test Series conversion result = DataConverter.as_dataarray(series, sample_time_index) assert isinstance(result, xr.DataArray) assert result.shape == (5,) assert result.dims == ('time',) assert np.array_equal(result.values, series.values) + # Test with float values + series = pd.Series([1.1, 2.2, 3.3, 4.4, 5.5], index=sample_time_index) + result = DataConverter.as_dataarray(series, sample_time_index) + assert np.array_equal(result.values, series.values) + + # Test with mixed NA values + series = pd.Series([1, np.nan, 3, None, 5], index=sample_time_index) + result = DataConverter.as_dataarray(series, sample_time_index) + assert np.array_equal(np.isnan(result.values), np.isnan(series.values)) + assert np.array_equal(result.values[~np.isnan(result.values)], series.values[~np.isnan(series.values)]) + def test_dataframe_conversion(self, sample_time_index): - # Create a single-column DataFrame + """Test converting a pandas DataFrame.""" + # Test with a single-column DataFrame df = pd.DataFrame({'A': [1, 2, 3, 4, 5]}, index=sample_time_index) - - # Test DataFrame conversion result = DataConverter.as_dataarray(df, sample_time_index) assert isinstance(result, xr.DataArray) assert result.shape == (5,) assert result.dims == ('time',) assert np.array_equal(result.values.flatten(), df['A'].values) + # Test with float values + df = pd.DataFrame({'A': [1.1, 2.2, 3.3, 4.4, 5.5]}, index=sample_time_index) + result = DataConverter.as_dataarray(df, sample_time_index) + assert np.array_equal(result.values.flatten(), df['A'].values) + + # Test with NA values + df = pd.DataFrame({'A': [1, np.nan, 3, None, 5]}, index=sample_time_index) + result = DataConverter.as_dataarray(df, sample_time_index) + assert np.array_equal(np.isnan(result.values), np.isnan(df['A'].values)) + assert np.array_equal(result.values[~np.isnan(result.values)], df['A'].values[~np.isnan(df['A'].values)]) + def test_ndarray_conversion(self, sample_time_index): - # Test 1D array conversion + """Test converting a numpy ndarray.""" + # Test with integer 1D array arr_1d = np.array([1, 2, 3, 4, 5]) result = DataConverter.as_dataarray(arr_1d, sample_time_index) assert result.shape == (5,) assert result.dims == ('time',) assert np.array_equal(result.values, arr_1d) + # Test with float 1D array + arr_1d = np.array([1.1, 2.2, 3.3, 4.4, 5.5]) + result = DataConverter.as_dataarray(arr_1d, sample_time_index) + assert np.array_equal(result.values, arr_1d) + + # Test with array containing NaN + arr_1d = np.array([1, np.nan, 3, np.nan, 5]) + result = DataConverter.as_dataarray(arr_1d, sample_time_index) + assert np.array_equal(np.isnan(result.values), np.isnan(arr_1d)) + assert np.array_equal(result.values[~np.isnan(result.values)], arr_1d[~np.isnan(arr_1d)]) + def test_dataarray_conversion(self, sample_time_index): - # Create a DataArray - original = xr.DataArray( - data=np.array([1, 2, 3, 4, 5]), - coords={'time': sample_time_index}, - dims=['time'] - ) + """Test converting an existing xarray DataArray.""" + # Create original DataArray + original = xr.DataArray(data=np.array([1, 2, 3, 4, 5]), coords={'time': sample_time_index}, dims=['time']) - # Test DataArray conversion + # Convert and check result = DataConverter.as_dataarray(original, sample_time_index) assert result.shape == (5,) assert result.dims == ('time',) @@ -74,12 +121,21 @@ def test_dataarray_conversion(self, sample_time_index): result[0] = 999 assert original[0].item() == 1 # Original should be unchanged + # Test with different time coordinates but same length + different_times = pd.date_range('2025-01-01', periods=5, freq='D', name='time') + original = xr.DataArray(data=np.array([1, 2, 3, 4, 5]), coords={'time': different_times}, dims=['time']) + + # Should raise an error for mismatched time coordinates + with pytest.raises(ConversionError): + DataConverter.as_dataarray(original, sample_time_index) + class TestMultiDimensionConversion: - """Tests for converting data with scenarios (2D: scenario × time)""" + """Tests for converting data with scenarios (2D: scenario × time).""" def test_scalar_with_scenarios(self, sample_time_index, sample_scenario_index): - # Convert scalar with scenario dimension + """Test converting scalar values with scenario dimension.""" + # Test with integer result = DataConverter.as_dataarray(42, sample_time_index, sample_scenario_index) assert isinstance(result, xr.DataArray) @@ -89,7 +145,12 @@ def test_scalar_with_scenarios(self, sample_time_index, sample_scenario_index): assert set(result.coords['scenario'].values) == set(sample_scenario_index.values) assert set(result.coords['time'].values) == set(sample_time_index.values) + # Test with float + result = DataConverter.as_dataarray(42.5, sample_time_index, sample_scenario_index) + assert np.all(result.values == 42.5) + def test_series_with_scenarios(self, sample_time_index, sample_scenario_index): + """Test converting Series with scenario dimension.""" # Create time series data series = pd.Series([1, 2, 3, 4, 5], index=sample_time_index) @@ -104,7 +165,71 @@ def test_series_with_scenarios(self, sample_time_index, sample_scenario_index): scenario_slice = result.sel(scenario=scenario) assert np.array_equal(scenario_slice.values, series.values) + # Test with series containing NaN + series = pd.Series([1, np.nan, 3, np.nan, 5], index=sample_time_index) + result = DataConverter.as_dataarray(series, sample_time_index, sample_scenario_index) + + # Each scenario should have the same pattern of NaNs + for scenario in sample_scenario_index: + scenario_slice = result.sel(scenario=scenario) + assert np.array_equal(np.isnan(scenario_slice.values), np.isnan(series.values)) + assert np.array_equal( + scenario_slice.values[~np.isnan(scenario_slice.values)], series.values[~np.isnan(series.values)] + ) + + def test_multi_index_series(self, sample_time_index, sample_scenario_index, multi_index): + """Test converting a Series with MultiIndex (scenario, time).""" + # Create a MultiIndex Series with scenario-specific values + values = [ + # baseline scenario + 10, + 20, + 30, + 40, + 50, + # high_demand scenario + 15, + 25, + 35, + 45, + 55, + # low_price scenario + 5, + 15, + 25, + 35, + 45, + ] + series_multi = pd.Series(values, index=multi_index) + + # Convert the MultiIndex Series + result = DataConverter.as_dataarray(series_multi, sample_time_index, sample_scenario_index) + + assert result.shape == (len(sample_scenario_index), len(sample_time_index)) + assert result.dims == ('scenario', 'time') + + # Check values for each scenario + baseline_values = result.sel(scenario='baseline').values + assert np.array_equal(baseline_values, [10, 20, 30, 40, 50]) + + high_demand_values = result.sel(scenario='high_demand').values + assert np.array_equal(high_demand_values, [15, 25, 35, 45, 55]) + + low_price_values = result.sel(scenario='low_price').values + assert np.array_equal(low_price_values, [5, 15, 25, 35, 45]) + + # Test with some missing values in the MultiIndex + incomplete_index = multi_index[:-2] # Remove last two entries + incomplete_values = values[:-2] # Remove corresponding values + incomplete_series = pd.Series(incomplete_values, index=incomplete_index) + + result = DataConverter.as_dataarray(incomplete_series, sample_time_index, sample_scenario_index) + + # The last value of low_price scenario should be NaN + assert np.isnan(result.sel(scenario='low_price').values[-1]) + def test_dataframe_with_scenarios(self, sample_time_index, sample_scenario_index): + """Test converting DataFrame with scenario dimension.""" # Create a single-column DataFrame df = pd.DataFrame({'A': [1, 2, 3, 4, 5]}, index=sample_time_index) @@ -119,9 +244,82 @@ def test_dataframe_with_scenarios(self, sample_time_index, sample_scenario_index scenario_slice = result.sel(scenario=scenario) assert np.array_equal(scenario_slice.values, df['A'].values) - def test_ndarray_with_scenarios(self, sample_time_index, sample_scenario_index): - # Test multi-scenario array conversion - # For multi-dimensional, the first dimension should match number of scenarios + def test_multi_index_dataframe(self, sample_time_index, sample_scenario_index, multi_index): + """Test converting a DataFrame with MultiIndex (scenario, time).""" + # Create a MultiIndex DataFrame with scenario-specific values + values = [ + # baseline scenario + 10, + 20, + 30, + 40, + 50, + # high_demand scenario + 15, + 25, + 35, + 45, + 55, + # low_price scenario + 5, + 15, + 25, + 35, + 45, + ] + df_multi = pd.DataFrame({'A': values}, index=multi_index) + + # Convert the MultiIndex DataFrame + result = DataConverter.as_dataarray(df_multi, sample_time_index, sample_scenario_index) + + assert result.shape == (len(sample_scenario_index), len(sample_time_index)) + assert result.dims == ('scenario', 'time') + + # Check values for each scenario + baseline_values = result.sel(scenario='baseline').values + assert np.array_equal(baseline_values, [10, 20, 30, 40, 50]) + + high_demand_values = result.sel(scenario='high_demand').values + assert np.array_equal(high_demand_values, [15, 25, 35, 45, 55]) + + low_price_values = result.sel(scenario='low_price').values + assert np.array_equal(low_price_values, [5, 15, 25, 35, 45]) + + # Test with missing values + incomplete_index = multi_index[:-2] # Remove last two entries + incomplete_values = values[:-2] # Remove corresponding values + incomplete_df = pd.DataFrame({'A': incomplete_values}, index=incomplete_index) + + result = DataConverter.as_dataarray(incomplete_df, sample_time_index, sample_scenario_index) + + # The last value of low_price scenario should be NaN + assert np.isnan(result.sel(scenario='low_price').values[-1]) + + # Test with multiple columns (should raise error) + df_multi_col = pd.DataFrame({'A': values, 'B': [v * 2 for v in values]}, index=multi_index) + + with pytest.raises(ConversionError): + DataConverter.as_dataarray(df_multi_col, sample_time_index, sample_scenario_index) + + def test_1d_array_with_scenarios(self, sample_time_index, sample_scenario_index): + """Test converting 1D array with scenario dimension (broadcasting).""" + # Create 1D array matching timesteps length + arr_1d = np.array([1, 2, 3, 4, 5]) + + # Convert with scenarios + result = DataConverter.as_dataarray(arr_1d, sample_time_index, sample_scenario_index) + + assert result.shape == (len(sample_scenario_index), len(sample_time_index)) + assert result.dims == ('scenario', 'time') + + # Each scenario should have the same values (broadcasting) + for scenario in sample_scenario_index: + scenario_slice = result.sel(scenario=scenario) + assert np.array_equal(scenario_slice.values, arr_1d) + + def test_2d_array_with_scenarios(self, sample_time_index, sample_scenario_index): + """Test converting 2D array with scenario dimension.""" + # Create 2D array with different values per scenario arr_2d = np.array( [ [1, 2, 3, 4, 5], # baseline scenario @@ -130,6 +328,7 @@ def test_ndarray_with_scenarios(self, sample_time_index, sample_scenario_index): ] ) + # Convert to DataArray result = DataConverter.as_dataarray(arr_2d, sample_time_index, sample_scenario_index) assert result.shape == (3, 5) @@ -141,6 +340,7 @@ def test_ndarray_with_scenarios(self, sample_time_index, sample_scenario_index): assert np.array_equal(result.sel(scenario='low_price').values, arr_2d[2]) def test_dataarray_with_scenarios(self, sample_time_index, sample_scenario_index): + """Test converting an existing DataArray with scenarios.""" # Create a multi-scenario DataArray original = xr.DataArray( data=np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]), @@ -159,11 +359,27 @@ def test_dataarray_with_scenarios(self, sample_time_index, sample_scenario_index result.loc['baseline'] = 999 assert original.sel(scenario='baseline')[0].item() == 1 # Original should be unchanged + def test_time_only_dataarray_with_scenarios(self, sample_time_index, sample_scenario_index): + """Test broadcasting a time-only DataArray to scenarios.""" + # Create a DataArray with only time dimension + time_only = xr.DataArray(data=np.array([1, 2, 3, 4, 5]), coords={'time': sample_time_index}, dims=['time']) + + # Convert with scenarios - should broadcast to all scenarios + result = DataConverter.as_dataarray(time_only, sample_time_index, sample_scenario_index) + + assert result.shape == (3, 5) + assert result.dims == ('scenario', 'time') + + # Each scenario should have same values + for scenario in sample_scenario_index: + assert np.array_equal(result.sel(scenario=scenario).values, time_only.values) + class TestInvalidInputs: - """Tests for invalid inputs and error handling""" + """Tests for invalid inputs and error handling.""" def test_time_index_validation(self): + """Test validation of time index.""" # Test with unnamed index unnamed_index = pd.date_range('2024-01-01', periods=5, freq='D') with pytest.raises(ConversionError): @@ -180,6 +396,7 @@ def test_time_index_validation(self): DataConverter.as_dataarray(42, wrong_type_index) def test_scenario_index_validation(self, sample_time_index): + """Test validation of scenario index.""" # Test with unnamed scenario index unnamed_index = pd.Index(['baseline', 'high_demand']) with pytest.raises(ConversionError): @@ -190,7 +407,12 @@ def test_scenario_index_validation(self, sample_time_index): with pytest.raises(ValueError): DataConverter.as_dataarray(42, sample_time_index, empty_index) + # Test with non-Index scenario + with pytest.raises(ValueError): + DataConverter.as_dataarray(42, sample_time_index, ['baseline', 'high_demand']) + def test_invalid_data_types(self, sample_time_index, sample_scenario_index): + """Test handling of invalid data types.""" # Test invalid input type (string) with pytest.raises(ConversionError): DataConverter.as_dataarray('invalid_string', sample_time_index) @@ -203,20 +425,21 @@ def test_invalid_data_types(self, sample_time_index, sample_scenario_index): with pytest.raises(ConversionError): DataConverter.as_dataarray(object(), sample_time_index) + # Test None value + with pytest.raises(ConversionError): + DataConverter.as_dataarray(None, sample_time_index) + def test_mismatched_input_dimensions(self, sample_time_index, sample_scenario_index): + """Test handling of mismatched input dimensions.""" # Test mismatched Series index mismatched_series = pd.Series( - [1, 2, 3, 4, 5, 6], - index=pd.date_range('2025-01-01', periods=6, freq='D', name='time') + [1, 2, 3, 4, 5, 6], index=pd.date_range('2025-01-01', periods=6, freq='D', name='time') ) with pytest.raises(ConversionError): DataConverter.as_dataarray(mismatched_series, sample_time_index) # Test DataFrame with multiple columns - df_multi_col = pd.DataFrame( - {'A': [1, 2, 3, 4, 5], 'B': [6, 7, 8, 9, 10]}, - index=sample_time_index - ) + df_multi_col = pd.DataFrame({'A': [1, 2, 3, 4, 5], 'B': [6, 7, 8, 9, 10]}, index=sample_time_index) with pytest.raises(ConversionError): DataConverter.as_dataarray(df_multi_col, sample_time_index) @@ -226,11 +449,13 @@ def test_mismatched_input_dimensions(self, sample_time_index, sample_scenario_in # Test mismatched array shape for scenario × time # Array shape should be (n_scenarios, n_timesteps) - wrong_shape_array = np.array([ - [1, 2, 3, 4], # Missing a timestep - [5, 6, 7, 8], - [9, 10, 11, 12] - ]) + wrong_shape_array = np.array( + [ + [1, 2, 3, 4], # Missing a timestep + [5, 6, 7, 8], + [9, 10, 11, 12], + ] + ) with pytest.raises(ConversionError): DataConverter.as_dataarray(wrong_shape_array, sample_time_index, sample_scenario_index) @@ -240,29 +465,35 @@ def test_mismatched_input_dimensions(self, sample_time_index, sample_scenario_in DataConverter.as_dataarray(np.ones((3, 5, 2)), sample_time_index, sample_scenario_index) def test_dataarray_dimension_mismatch(self, sample_time_index, sample_scenario_index): + """Test handling of mismatched DataArray dimensions.""" # Create DataArray with wrong dimensions - wrong_dims = xr.DataArray( - data=np.array([1, 2, 3, 4, 5]), - coords={'wrong_dim': range(5)}, - dims=['wrong_dim'] - ) + wrong_dims = xr.DataArray(data=np.array([1, 2, 3, 4, 5]), coords={'wrong_dim': range(5)}, dims=['wrong_dim']) with pytest.raises(ConversionError): DataConverter.as_dataarray(wrong_dims, sample_time_index) # Create DataArray with scenario but no time - wrong_dims_2 = xr.DataArray( - data=np.array([1, 2, 3]), - coords={'scenario': ['a', 'b', 'c']}, - dims=['scenario'] - ) + wrong_dims_2 = xr.DataArray(data=np.array([1, 2, 3]), coords={'scenario': ['a', 'b', 'c']}, dims=['scenario']) with pytest.raises(ConversionError): DataConverter.as_dataarray(wrong_dims_2, sample_time_index, sample_scenario_index) + # Create DataArray with right dims but wrong length + wrong_length = xr.DataArray( + data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), + coords={ + 'scenario': sample_scenario_index, + 'time': pd.date_range('2024-01-01', periods=3, freq='D', name='time'), + }, + dims=['scenario', 'time'], + ) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(wrong_length, sample_time_index, sample_scenario_index) + class TestEdgeCases: - """Tests for edge cases and special scenarios""" + """Tests for edge cases and special scenarios.""" def test_single_timestep(self, sample_scenario_index): + """Test with a single timestep.""" # Test with only one timestep single_timestep = pd.DatetimeIndex(['2024-01-01'], name='time') @@ -277,6 +508,7 @@ def test_single_timestep(self, sample_scenario_index): assert result_with_scenarios.dims == ('scenario', 'time') def test_single_scenario(self, sample_time_index): + """Test with a single scenario.""" # Test with only one scenario single_scenario = pd.Index(['baseline'], name='scenario') @@ -291,17 +523,26 @@ def test_single_scenario(self, sample_time_index): assert result_arr.shape == (1, 5) assert np.array_equal(result_arr.sel(scenario='baseline').values, arr) + # 2D array with single scenario + arr_2d = np.array([[1, 2, 3, 4, 5]]) # Note the extra dimension + result_arr_2d = DataConverter.as_dataarray(arr_2d, sample_time_index, single_scenario) + assert result_arr_2d.shape == (1, 5) + assert np.array_equal(result_arr_2d.sel(scenario='baseline').values, arr_2d[0]) + def test_different_scenario_order(self, sample_time_index): - # Test that scenario order is preserved + """Test that scenario order is preserved.""" + # Test with different scenario orders scenarios1 = pd.Index(['a', 'b', 'c'], name='scenario') scenarios2 = pd.Index(['c', 'b', 'a'], name='scenario') # Create DataArray with first order - data = np.array([ - [1, 2, 3, 4, 5], # a - [6, 7, 8, 9, 10], # b - [11, 12, 13, 14, 15] # c - ]) + data = np.array( + [ + [1, 2, 3, 4, 5], # a + [6, 7, 8, 9, 10], # b + [11, 12, 13, 14, 15], # c + ] + ) result1 = DataConverter.as_dataarray(data, sample_time_index, scenarios1) assert np.array_equal(result1.sel(scenario='a').values, [1, 2, 3, 4, 5]) @@ -314,6 +555,292 @@ def test_different_scenario_order(self, sample_time_index): # Last row should match 'a' now assert np.array_equal(result2.sel(scenario='a').values, [11, 12, 13, 14, 15]) + def test_all_nan_data(self, sample_time_index, sample_scenario_index): + """Test handling of all-NaN data.""" + # Create array of all NaNs + all_nan_array = np.full(5, np.nan) + result = DataConverter.as_dataarray(all_nan_array, sample_time_index) + assert np.all(np.isnan(result.values)) + + # With scenarios + result = DataConverter.as_dataarray(all_nan_array, sample_time_index, sample_scenario_index) + assert result.shape == (len(sample_scenario_index), len(sample_time_index)) + assert np.all(np.isnan(result.values)) + + # Series of all NaNs + all_nan_series = pd.Series([np.nan, np.nan, np.nan, np.nan, np.nan], index=sample_time_index) + result = DataConverter.as_dataarray(all_nan_series, sample_time_index, sample_scenario_index) + assert np.all(np.isnan(result.values)) + + def test_subset_index_multiindex(self, sample_time_index, sample_scenario_index): + """Test handling of MultiIndex Series/DataFrames with subset of expected indices.""" + # Create a subset of the expected indexes + subset_time = sample_time_index[1:4] # Middle subset + subset_scenarios = sample_scenario_index[0:2] # First two scenarios + + # Create MultiIndex with subset + subset_multi_index = pd.MultiIndex.from_product([subset_scenarios, subset_time], names=['scenario', 'time']) + + # Create Series with subset of data + values = [ + # baseline (3 values) + 20, + 30, + 40, + # high_demand (3 values) + 25, + 35, + 45, + ] + subset_series = pd.Series(values, index=subset_multi_index) + + # Convert and test + result = DataConverter.as_dataarray(subset_series, sample_time_index, sample_scenario_index) + + # Shape should be full size + assert result.shape == (len(sample_scenario_index), len(sample_time_index)) + + # Check values - present values should match + assert result.sel(scenario='baseline', time=subset_time[0]).item() == 20 + assert result.sel(scenario='high_demand', time=subset_time[1]).item() == 35 + + # Missing values should be NaN + assert np.isnan(result.sel(scenario='baseline', time=sample_time_index[0]).item()) + assert np.isnan(result.sel(scenario='low_price', time=sample_time_index[2]).item()) + + def test_mixed_data_types(self, sample_time_index, sample_scenario_index): + """Test conversion of mixed integer and float data.""" + # Create array with mixed types + mixed_array = np.array([1, 2.5, 3, 4.5, 5]) + result = DataConverter.as_dataarray(mixed_array, sample_time_index) + + # Result should be float dtype + assert np.issubdtype(result.dtype, np.floating) + assert np.array_equal(result.values, mixed_array) + + # With scenarios + result = DataConverter.as_dataarray(mixed_array, sample_time_index, sample_scenario_index) + assert np.issubdtype(result.dtype, np.floating) + for scenario in sample_scenario_index: + assert np.array_equal(result.sel(scenario=scenario).values, mixed_array) + + +class TestFunctionalUseCase: + """Tests for realistic use cases combining multiple features.""" + + def test_multiindex_with_nans_and_partial_data(self, sample_time_index, sample_scenario_index): + """Test MultiIndex Series with partial data and NaN values.""" + # Create a MultiIndex Series with missing values and partial coverage + time_subset = sample_time_index[1:4] # Middle 3 timestamps only + + # Build index with holes + idx_tuples = [] + for scenario in sample_scenario_index: + for time in time_subset: + # Skip some combinations to create holes + if scenario == 'baseline' and time == time_subset[0]: + continue + if scenario == 'high_demand' and time == time_subset[2]: + continue + idx_tuples.append((scenario, time)) + + partial_idx = pd.MultiIndex.from_tuples(idx_tuples, names=['scenario', 'time']) + + # Create values with some NaNs + values = [ + # baseline (2 values, skipping first) + 30, + 40, + # high_demand (2 values, skipping last) + 25, + 35, + # low_price (3 values) + 15, + np.nan, + 35, + ] + + # Create Series + partial_series = pd.Series(values, index=partial_idx) + + # Convert and test + result = DataConverter.as_dataarray(partial_series, sample_time_index, sample_scenario_index) + + # Shape should be full size + assert result.shape == (len(sample_scenario_index), len(sample_time_index)) + + # Check specific values + assert result.sel(scenario='baseline', time=time_subset[1]).item() == 30 + assert result.sel(scenario='high_demand', time=time_subset[0]).item() == 25 + assert np.isnan(result.sel(scenario='low_price', time=time_subset[1]).item()) + + # All skipped combinations should be NaN + assert np.isnan(result.sel(scenario='baseline', time=time_subset[0]).item()) + assert np.isnan(result.sel(scenario='high_demand', time=time_subset[2]).item()) + + # First and last timestamps should all be NaN (not in original subset) + assert np.all(np.isnan(result.sel(time=sample_time_index[0]).values)) + assert np.all(np.isnan(result.sel(time=sample_time_index[-1]).values)) + + def test_scenario_broadcast_with_nan_values(self, sample_time_index, sample_scenario_index): + """Test broadcasting a Series with NaN values to scenarios.""" + # Create Series with some NaN values + series = pd.Series([1, np.nan, 3, np.nan, 5], index=sample_time_index) + + # Convert with scenario broadcasting + result = DataConverter.as_dataarray(series, sample_time_index, sample_scenario_index) + + # All scenarios should have the same pattern of NaN values + for scenario in sample_scenario_index: + scenario_data = result.sel(scenario=scenario) + assert np.isnan(scenario_data[1].item()) + assert np.isnan(scenario_data[3].item()) + assert scenario_data[0].item() == 1 + assert scenario_data[2].item() == 3 + assert scenario_data[4].item() == 5 + + def test_large_dataset(self, sample_scenario_index): + """Test with a larger dataset to ensure performance.""" + # Create a larger timestep array (e.g., hourly for a year) + large_timesteps = pd.date_range( + '2024-01-01', + periods=8760, # Hours in a year + freq='H', + name='time', + ) + + # Create large 2D array (3 scenarios × 8760 hours) + large_data = np.random.rand(len(sample_scenario_index), len(large_timesteps)) + + # Convert and check + result = DataConverter.as_dataarray(large_data, large_timesteps, sample_scenario_index) + + assert result.shape == (len(sample_scenario_index), len(large_timesteps)) + assert result.dims == ('scenario', 'time') + assert np.array_equal(result.values, large_data) + + +class TestMultiScenarioArrayConversion: + """Tests specifically focused on array conversion with scenarios.""" + + def test_1d_array_broadcasting(self, sample_time_index, sample_scenario_index): + """Test that 1D arrays are properly broadcast to all scenarios.""" + arr_1d = np.array([1, 2, 3, 4, 5]) + result = DataConverter.as_dataarray(arr_1d, sample_time_index, sample_scenario_index) + + assert result.shape == (len(sample_scenario_index), len(sample_time_index)) + + # Each scenario should have identical values + for i, scenario in enumerate(sample_scenario_index): + assert np.array_equal(result.sel(scenario=scenario).values, arr_1d) + + # Modify one scenario's values + result.loc[dict(scenario=scenario)] = np.ones(len(sample_time_index)) * i + + # Ensure modifications are isolated to each scenario + for i, scenario in enumerate(sample_scenario_index): + assert np.all(result.sel(scenario=scenario).values == i) + + def test_2d_array_different_shapes(self, sample_time_index): + """Test different scenario shapes with 2D arrays.""" + # Test with 1 scenario + single_scenario = pd.Index(['baseline'], name='scenario') + arr_1_scenario = np.array([[1, 2, 3, 4, 5]]) + + result = DataConverter.as_dataarray(arr_1_scenario, sample_time_index, single_scenario) + assert result.shape == (1, len(sample_time_index)) + + # Test with 2 scenarios + two_scenarios = pd.Index(['baseline', 'high_demand'], name='scenario') + arr_2_scenarios = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) + + result = DataConverter.as_dataarray(arr_2_scenarios, sample_time_index, two_scenarios) + assert result.shape == (2, len(sample_time_index)) + assert np.array_equal(result.sel(scenario='baseline').values, arr_2_scenarios[0]) + assert np.array_equal(result.sel(scenario='high_demand').values, arr_2_scenarios[1]) + + # Test mismatched scenarios count + three_scenarios = pd.Index(['a', 'b', 'c'], name='scenario') + with pytest.raises(ConversionError): + DataConverter.as_dataarray(arr_2_scenarios, sample_time_index, three_scenarios) + + def test_array_handling_edge_cases(self, sample_time_index, sample_scenario_index): + """Test array edge cases.""" + # Test with boolean array + bool_array = np.array([True, False, True, False, True]) + result = DataConverter.as_dataarray(bool_array, sample_time_index, sample_scenario_index) + assert result.dtype == bool + assert result.shape == (len(sample_scenario_index), len(sample_time_index)) + + # Test with array containing infinite values + inf_array = np.array([1, np.inf, 3, -np.inf, 5]) + result = DataConverter.as_dataarray(inf_array, sample_time_index, sample_scenario_index) + for scenario in sample_scenario_index: + scenario_data = result.sel(scenario=scenario) + assert np.isinf(scenario_data[1].item()) + assert np.isinf(scenario_data[3].item()) + assert scenario_data[3].item() < 0 # Negative infinity + + +class TestScenarioReindexing: + """Tests for reindexing and coordinate preservation in DataConverter.""" + + def test_preserving_scenario_order(self, sample_time_index): + """Test that scenario order is preserved in converted DataArrays.""" + # Define scenarios in a specific order + scenarios = pd.Index(['scenario3', 'scenario1', 'scenario2'], name='scenario') + + # Create 2D array + data = np.array( + [ + [1, 2, 3, 4, 5], # scenario3 + [6, 7, 8, 9, 10], # scenario1 + [11, 12, 13, 14, 15], # scenario2 + ] + ) + + # Convert to DataArray + result = DataConverter.as_dataarray(data, sample_time_index, scenarios) + + # Verify order of scenarios is preserved + assert list(result.coords['scenario'].values) == list(scenarios) + + # Verify data for each scenario + assert np.array_equal(result.sel(scenario='scenario3').values, data[0]) + assert np.array_equal(result.sel(scenario='scenario1').values, data[1]) + assert np.array_equal(result.sel(scenario='scenario2').values, data[2]) + + def test_multiindex_reindexing(self, sample_time_index): + """Test reindexing of MultiIndex Series.""" + # Create scenarios with intentional different order + scenarios = pd.Index(['z_scenario', 'a_scenario', 'm_scenario'], name='scenario') + + # Create MultiIndex with different order than the target + source_scenarios = pd.Index(['a_scenario', 'm_scenario', 'z_scenario'], name='scenario') + multi_idx = pd.MultiIndex.from_product([source_scenarios, sample_time_index], names=['scenario', 'time']) + + # Create values - order should match the source index + values = [] + for i, scenario in enumerate(source_scenarios): + values.extend([i * 10 + j for j in range(1, len(sample_time_index) + 1)]) + + # Create Series + series = pd.Series(values, index=multi_idx) + + # Convert using the target scenario order + result = DataConverter.as_dataarray(series, sample_time_index, scenarios) + + # Verify scenario order matches the target + assert list(result.coords['scenario'].values) == list(scenarios) + + # Verify values are correctly indexed + assert np.array_equal(result.sel(scenario='a_scenario').values, [1, 2, 3, 4, 5]) + assert np.array_equal(result.sel(scenario='m_scenario').values, [11, 12, 13, 14, 15]) + assert np.array_equal(result.sel(scenario='z_scenario').values, [21, 22, 23, 24, 25]) + + +if __name__ == '__main__': + pytest.main() def test_invalid_inputs(sample_time_index): From 7ae381400a4104e7e889e0ec221600b8c560ea1b Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 13:43:01 +0200 Subject: [PATCH 14/55] Update test --- tests/test_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index 48c7ab7b2..bc9cd2fc1 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -73,7 +73,7 @@ def test_initialization_validation(self, sample_timesteps): multi_dim_data = xr.DataArray( [[1, 2, 3], [4, 5, 6]], coords={'dim1': [0, 1], 'time': sample_timesteps[:3]}, dims=['dim1', 'time'] ) - with pytest.raises(ValueError, match='dimensions of DataArray must be 1'): + with pytest.raises(ValueError, match='DataArray dimensions must be subset of'): TimeSeries(multi_dim_data, name='Multi-dim Series') def test_active_timesteps_getter_setter(self, sample_timeseries, sample_timesteps): From 23e5088bdb9f08bef4fb875626305ad9d8f1d231 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 13:55:34 +0200 Subject: [PATCH 15/55] Bugfix stats --- flixopt/core.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 9e3444b4f..3fad77a8c 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -819,7 +819,7 @@ def stats(self) -> str: Returns: String representation of data statistics """ - return get_numeric_stats(self.active_data, padd=0, by_scenario=(self._has_scenarios and len(self.active_scenarios) > 1)) + return get_numeric_stats(self.active_data, padd=0, by_scenario=True) def _update_active_data(self): """ @@ -1612,7 +1612,7 @@ def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10, by_ for scenario in data.coords['scenario'].values: scenario_data = data.sel(scenario=scenario) if np.unique(scenario_data).size == 1: - results.append(f' {scenario}: {scenario_data.item():{format_spec}} (constant)') + results.append(f' {scenario}: {scenario_data.max().item():{format_spec}} (constant)') else: mean = scenario_data.mean().item() median = scenario_data.median().item() From d734a58e4c6026fbe4f2e63d9b97bcb3ad0848c0 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 13:57:05 +0200 Subject: [PATCH 16/55] Bugfix stored_data.setter --- flixopt/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flixopt/core.py b/flixopt/core.py index 3fad77a8c..1c47e5277 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -910,7 +910,7 @@ def stored_data(self, value: NumericData): Args: value: New data to store """ - new_data = DataConverter.as_dataarray(value, timesteps=self.active_timesteps) + new_data = DataConverter.as_dataarray(value, timesteps=self.active_timesteps, scenarios=self.active_scenarios) # Skip if data is unchanged to avoid overwriting backup if new_data.equals(self._stored_data): From 0de50150dae77a5d04c8021fd1bc05b369136037 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 14:02:49 +0200 Subject: [PATCH 17/55] Improve __str__ of TimeSeries --- flixopt/core.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/flixopt/core.py b/flixopt/core.py index 1c47e5277..00647ed00 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -4,6 +4,7 @@ """ import inspect +import textwrap import json import logging import pathlib @@ -1022,7 +1023,7 @@ def __str__(self): Returns: Descriptive string with statistics """ - return f"TimeSeries '{self.name}': {self.stats}" + return f'TimeSeries "{self.name}":\n{textwrap.indent(self.stats, " ")}' class TimeSeriesCollection: From f827d9196d10aa4a5c31d1360b3a05b89932c20d Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 14:14:17 +0200 Subject: [PATCH 18/55] Bugfixes --- flixopt/core.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 00647ed00..0d8777168 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1340,11 +1340,7 @@ def get_scenario_data(self, scenario_name): # Create a DataFrame with data from all time series for this scenario data_dict = {} for name, ts in self.time_series_data.items(): - if hasattr(ts, '_has_scenarios') and ts._has_scenarios: - data_dict[name] = ts.select_scenario(scenario_name).values - else: - # For time series without scenarios, use the same data for all scenarios - data_dict[name] = ts.active_data.values + data_dict[name] = ts.active_data.sel(scenario=scenario_name).values # Create DataFrame with the right index df = pd.DataFrame(data_dict, index=self.timesteps) @@ -1407,6 +1403,7 @@ def scenario_summary(self): summary.loc[scenario, (ts_name, 'min')] = df[ts_name].min() summary.loc[scenario, (ts_name, 'max')] = df[ts_name].max() summary.loc[scenario, (ts_name, 'std')] = df[ts_name].std() + return summary def _update_time_series_active_states(self): """Update active timesteps and scenarios for all time series.""" From 63e59b222dea3871c7b154ab8519535c9c28acd2 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 14:14:32 +0200 Subject: [PATCH 19/55] Add tests --- tests/test_timeseries.py | 884 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 884 insertions(+) diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index bc9cd2fc1..562ebdf8b 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -603,3 +603,887 @@ def test_validation_and_errors(self, sample_timesteps): with pytest.raises(ValueError, match='must be a subset'): collection.activate_timesteps(invalid_timesteps) + + + +@pytest.fixture +def sample_scenario_index(): + """Create a sample scenario index with the required 'scenario' name.""" + return pd.Index(['baseline', 'high_demand', 'low_price'], name='scenario') + + +@pytest.fixture +def sample_multi_index(sample_timesteps, sample_scenario_index): + """Create a sample MultiIndex with scenarios and timesteps.""" + return pd.MultiIndex.from_product( + [sample_scenario_index, sample_timesteps], + names=['scenario', 'time'] + ) + + +@pytest.fixture +def simple_scenario_dataarray(sample_timesteps, sample_scenario_index): + """Create a DataArray with both scenario and time dimensions.""" + data = np.array([ + [10, 20, 30, 40, 50], # baseline + [15, 25, 35, 45, 55], # high_demand + [5, 15, 25, 35, 45] # low_price + ]) + return xr.DataArray( + data=data, + coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, + dims=['scenario', 'time'] + ) + + +@pytest.fixture +def sample_scenario_timeseries(simple_scenario_dataarray): + """Create a sample TimeSeries object with scenario dimension.""" + return TimeSeries(simple_scenario_dataarray, name='Test Scenario Series') + + +@pytest.fixture +def sample_scenario_collection(sample_timesteps, sample_scenario_index): + """Create a sample TimeSeriesCollection with scenarios.""" + return TimeSeriesCollection(sample_timesteps, scenarios=sample_scenario_index) + + +class TestTimeSeriesWithScenarios: + """Test suite for TimeSeries class with scenarios.""" + + def test_initialization_with_scenarios(self, simple_scenario_dataarray): + """Test initialization of TimeSeries with scenario dimension.""" + ts = TimeSeries(simple_scenario_dataarray, name='Scenario Series') + + # Check basic properties + assert ts.name == 'Scenario Series' + assert ts._has_scenarios is True + assert ts.active_scenarios is not None + assert len(ts.active_scenarios) == len(simple_scenario_dataarray.coords['scenario']) + + # Check data initialization + assert isinstance(ts.stored_data, xr.DataArray) + assert ts.stored_data.equals(simple_scenario_dataarray) + assert ts.active_data.equals(simple_scenario_dataarray) + + # Check backup was created + assert ts._backup.equals(simple_scenario_dataarray) + + # Check active timesteps and scenarios + assert ts.active_timesteps.equals(simple_scenario_dataarray.indexes['time']) + assert ts.active_scenarios.equals(simple_scenario_dataarray.indexes['scenario']) + + def test_reset_with_scenarios(self, sample_scenario_timeseries): + """Test reset method with scenarios.""" + # Get original full indexes + full_timesteps = sample_scenario_timeseries.active_timesteps + full_scenarios = sample_scenario_timeseries.active_scenarios + + # Set to subset timesteps and scenarios + subset_timesteps = full_timesteps[1:3] + subset_scenarios = full_scenarios[:2] + + sample_scenario_timeseries.active_timesteps = subset_timesteps + sample_scenario_timeseries.active_scenarios = subset_scenarios + + # Verify subsets were set + assert sample_scenario_timeseries.active_timesteps.equals(subset_timesteps) + assert sample_scenario_timeseries.active_scenarios.equals(subset_scenarios) + assert sample_scenario_timeseries.active_data.shape == (len(subset_scenarios), len(subset_timesteps)) + + # Reset + sample_scenario_timeseries.reset() + + # Should be back to full indexes + assert sample_scenario_timeseries.active_timesteps.equals(full_timesteps) + assert sample_scenario_timeseries.active_scenarios.equals(full_scenarios) + assert sample_scenario_timeseries.active_data.shape == (len(full_scenarios), len(full_timesteps)) + + def test_active_scenarios_getter_setter(self, sample_scenario_timeseries, sample_scenario_index): + """Test active_scenarios getter and setter.""" + # Initial state should use all scenarios + assert sample_scenario_timeseries.active_scenarios.equals(sample_scenario_index) + + # Set to a subset + subset_index = sample_scenario_index[:2] # First two scenarios + sample_scenario_timeseries.active_scenarios = subset_index + assert sample_scenario_timeseries.active_scenarios.equals(subset_index) + + # Active data should reflect the subset + assert sample_scenario_timeseries.active_data.equals( + sample_scenario_timeseries.stored_data.sel(scenario=subset_index) + ) + + # Reset to full index + sample_scenario_timeseries.active_scenarios = None + assert sample_scenario_timeseries.active_scenarios.equals(sample_scenario_index) + + # Test invalid type + with pytest.raises(TypeError, match='must be a pandas Index'): + sample_scenario_timeseries.active_scenarios = 'invalid' + + # Test invalid scenario names + invalid_scenarios = pd.Index(['invalid1', 'invalid2'], name='scenario') + with pytest.raises(ValueError, match='must be a subset'): + sample_scenario_timeseries.active_scenarios = invalid_scenarios + + def test_scenario_selection_methods(self, sample_scenario_timeseries): + """Test scenario selection helper methods.""" + # Test select_scenario + baseline_data = sample_scenario_timeseries.sel(scenario='baseline') + assert baseline_data.dims == ('time',) + assert np.array_equal(baseline_data.values, [10, 20, 30, 40, 50]) + + # Test with non-existent scenario + with pytest.raises(KeyError): + sample_scenario_timeseries.sel(scenario='nonexistent') + + # Test get_scenario_names + scenario_names = sample_scenario_timeseries.active_scenarios + assert len(scenario_names) == 3 + assert set(scenario_names) == {'baseline', 'high_demand', 'low_price'} + + def test_all_equal_with_scenarios(self, sample_timesteps, sample_scenario_index): + """Test all_equal property with scenarios.""" + # All values equal across all scenarios + equal_data = np.full((3, 5), 5) # All values are 5 + equal_dataarray = xr.DataArray( + data=equal_data, + coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, + dims=['scenario', 'time'] + ) + ts_equal = TimeSeries(equal_dataarray, 'Equal Scenario Series') + assert ts_equal.all_equal is True + + # Equal within each scenario but different between scenarios + per_scenario_equal = np.array([ + [5, 5, 5, 5, 5], # baseline - all 5 + [10, 10, 10, 10, 10], # high_demand - all 10 + [15, 15, 15, 15, 15] # low_price - all 15 + ]) + per_scenario_dataarray = xr.DataArray( + data=per_scenario_equal, + coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, + dims=['scenario', 'time'] + ) + ts_per_scenario = TimeSeries(per_scenario_dataarray, 'Per-Scenario Equal Series') + assert ts_per_scenario.all_equal is False + + # Not equal within at least one scenario + unequal_data = np.array([ + [5, 5, 5, 5, 5], # baseline - all equal + [10, 10, 10, 10, 10], # high_demand - all equal + [15, 15, 20, 15, 15] # low_price - not all equal + ]) + unequal_dataarray = xr.DataArray( + data=unequal_data, + coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, + dims=['scenario', 'time'] + ) + ts_unequal = TimeSeries(unequal_dataarray, 'Unequal Scenario Series') + assert ts_unequal.all_equal is False + + def test_stats_with_scenarios(self, sample_timesteps, sample_scenario_index): + """Test stats property with scenarios.""" + # Create data with different patterns in each scenario + data = np.array([ + [10, 20, 30, 40, 50], # baseline - increasing + [100, 100, 100, 100, 100], # high_demand - constant + [50, 40, 30, 20, 10] # low_price - decreasing + ]) + dataarray = xr.DataArray( + data=data, + coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, + dims=['scenario', 'time'] + ) + ts = TimeSeries(dataarray, 'Mixed Stats Series') + + # Get stats string + stats_str = ts.stats + + # Should include scenario information + assert "By scenario" in stats_str + assert "baseline" in stats_str + assert "high_demand" in stats_str + assert "low_price" in stats_str + + # Should include actual statistics + assert "mean" in stats_str + assert "min" in stats_str + assert "max" in stats_str + assert "std" in stats_str + assert "constant" in stats_str + + # Test with single active scenario + ts.active_scenarios = pd.Index(['baseline'], name='scenario') + single_stats_str = ts.stats + + # Should not include scenario breakdown + assert "By scenario" not in single_stats_str + assert "mean" in single_stats_str # Still has regular stats + + def test_stored_data_setter_with_scenarios(self, sample_scenario_timeseries, sample_timesteps, sample_scenario_index): + """Test stored_data setter with different scenario data types.""" + # Test with 2D array + array_data = np.array([ + [1, 2, 3, 4, 5], + [6, 7, 8, 9, 10], + [11, 12, 13, 14, 15] + ]) + sample_scenario_timeseries.stored_data = array_data + assert np.array_equal(sample_scenario_timeseries.stored_data.values, array_data) + + # Test with scalar (should broadcast to all scenarios and timesteps) + sample_scenario_timeseries.stored_data = 42 + assert np.all(sample_scenario_timeseries.stored_data.values == 42) + + # Test with another scenario DataArray + another_dataarray = xr.DataArray( + data=np.random.rand(3, 5), + coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, + dims=['scenario', 'time'] + ) + sample_scenario_timeseries.stored_data = another_dataarray + assert sample_scenario_timeseries.stored_data.equals(another_dataarray) + + # Test with MultiIndex Series + multi_idx = pd.MultiIndex.from_product( + [sample_scenario_index, sample_timesteps], + names=['scenario', 'time'] + ) + series_values = np.arange(15) # 15 = 3 scenarios * 5 timesteps + multi_series = pd.Series(series_values, index=multi_idx) + + sample_scenario_timeseries.stored_data = multi_series + assert sample_scenario_timeseries.stored_data.shape == (3, 5) + # Verify the first scenario's values + assert np.array_equal( + sample_scenario_timeseries.sel(scenario='baseline').values, + series_values[:5] + ) + + def test_from_datasource_with_scenarios(self, sample_timesteps, sample_scenario_index): + """Test from_datasource class method with scenarios.""" + # Test with 2D array + data = np.array([ + [1, 2, 3, 4, 5], + [6, 7, 8, 9, 10], + [11, 12, 13, 14, 15] + ]) + ts_array = TimeSeries.from_datasource(data, 'Array Series', sample_timesteps, scenarios=sample_scenario_index) + assert ts_array._has_scenarios + assert np.array_equal(ts_array.stored_data.values, data) + + # Test with scalar + ts_scalar = TimeSeries.from_datasource(42, 'Scalar Series', sample_timesteps, scenarios=sample_scenario_index) + assert ts_scalar._has_scenarios + assert np.all(ts_scalar.stored_data.values == 42) + + # Test with TimeSeriesData including scenarios + + #TODO: Test with TimeSeriesData including scenarios + + def test_to_json_from_json_with_scenarios(self, sample_scenario_timeseries): + """Test to_json and from_json methods with scenarios.""" + # Test to_json (dictionary only) + json_dict = sample_scenario_timeseries.to_json() + assert json_dict['name'] == sample_scenario_timeseries.name + assert 'data' in json_dict + assert 'coords' in json_dict['data'] + assert 'time' in json_dict['data']['coords'] + assert 'scenario' in json_dict['data']['coords'] + + # Test to_json with file saving + with tempfile.TemporaryDirectory() as tmpdirname: + filepath = Path(tmpdirname) / 'scenario_timeseries.json' + sample_scenario_timeseries.to_json(filepath) + assert filepath.exists() + + # Test from_json with file loading + loaded_ts = TimeSeries.from_json(path=filepath) + assert loaded_ts.name == sample_scenario_timeseries.name + assert loaded_ts._has_scenarios + assert np.array_equal(loaded_ts.stored_data.values, sample_scenario_timeseries.stored_data.values) + assert loaded_ts.active_scenarios.equals(sample_scenario_timeseries.active_scenarios) + + # Test from_json with dictionary + loaded_ts_dict = TimeSeries.from_json(data=json_dict) + assert loaded_ts_dict.name == sample_scenario_timeseries.name + assert loaded_ts_dict._has_scenarios + assert np.array_equal(loaded_ts_dict.stored_data.values, sample_scenario_timeseries.stored_data.values) + assert loaded_ts_dict.active_scenarios.equals(sample_scenario_timeseries.active_scenarios) + + def test_arithmetic_with_scenarios(self, sample_scenario_timeseries, sample_timesteps, sample_scenario_index): + """Test arithmetic operations with scenarios.""" + # Create a second TimeSeries with scenarios + data2 = np.ones((3, 5)) # All ones + second_dataarray = xr.DataArray( + data=data2, + coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, + dims=['scenario', 'time'] + ) + ts2 = TimeSeries(second_dataarray, 'Second Series') + + # Test operations between two scenario TimeSeries objects + result = sample_scenario_timeseries + ts2 + assert result.shape == (3, 5) + assert result.dims == ('scenario', 'time') + + # First scenario values should be increased by 1 + baseline_original = sample_scenario_timeseries.sel(scenario='baseline').values + baseline_result = result.sel(scenario='baseline').values + assert np.array_equal(baseline_result, baseline_original + 1) + + # Test operation with scalar + result_scalar = sample_scenario_timeseries * 2 + assert result_scalar.shape == (3, 5) + # All values should be doubled + assert np.array_equal( + result_scalar.sel(scenario='baseline').values, + baseline_original * 2 + ) + + def test_repr_and_str(self, sample_scenario_timeseries): + """Test __repr__ and __str__ methods with scenarios.""" + # Test __repr__ + repr_str = repr(sample_scenario_timeseries) + assert 'scenarios' in repr_str + assert str(len(sample_scenario_timeseries.active_scenarios)) in repr_str + + # Test __str__ + str_repr = str(sample_scenario_timeseries) + assert 'By scenario' in str_repr + # Should include the name + assert sample_scenario_timeseries.name in str_repr + + +class TestTimeSeriesCollectionWithScenarios: + """Test suite for TimeSeriesCollection with scenarios.""" + + def test_initialization_with_scenarios(self, sample_timesteps, sample_scenario_index): + """Test initialization with scenarios.""" + collection = TimeSeriesCollection(sample_timesteps, scenarios=sample_scenario_index) + + assert collection.all_timesteps.equals(sample_timesteps) + assert collection.all_scenarios.equals(sample_scenario_index) + assert len(collection) == 0 + + def test_create_time_series_with_scenarios(self, sample_scenario_collection): + """Test creating time series with scenarios.""" + # Test scalar (broadcasts to all scenarios) + ts1 = sample_scenario_collection.create_time_series(42, 'scalar_series') + assert ts1._has_scenarios + assert ts1.name == 'scalar_series' + assert ts1.active_data.shape == (3, 5) # 3 scenarios, 5 timesteps + assert np.all(ts1.active_data.values == 42) + + # Test 1D array (broadcasts to all scenarios) + data = np.array([1, 2, 3, 4, 5]) + ts2 = sample_scenario_collection.create_time_series(data, 'array_series') + assert ts2._has_scenarios + assert ts2.active_data.shape == (3, 5) + # Each scenario should have the same values + for scenario in sample_scenario_collection.scenarios: + assert np.array_equal(ts2.sel(scenario=scenario).values, data) + + # Test 2D array (one row per scenario) + data_2d = np.array([ + [10, 20, 30, 40, 50], + [15, 25, 35, 45, 55], + [5, 15, 25, 35, 45] + ]) + ts3 = sample_scenario_collection.create_time_series(data_2d, 'scenario_specific_series') + assert ts3._has_scenarios + assert ts3.active_data.shape == (3, 5) + # Each scenario should have its own values + assert np.array_equal(ts3.sel(scenario='baseline').values, data_2d[0]) + assert np.array_equal(ts3.sel(scenario='high_demand').values, data_2d[1]) + assert np.array_equal(ts3.sel(scenario='low_price').values, data_2d[2]) + + def test_activate_scenarios(self, sample_scenario_collection, sample_scenario_index): + """Test activating scenarios.""" + # Add some time series + sample_scenario_collection.create_time_series(42, 'scalar_series') + sample_scenario_collection.create_time_series( + np.array([ + [1, 2, 3, 4, 5], + [6, 7, 8, 9, 10], + [11, 12, 13, 14, 15] + ]), + 'array_series' + ) + + # Activate a subset of scenarios + subset_scenarios = sample_scenario_index[:2] # First two scenarios + sample_scenario_collection.activate_timesteps(active_scenarios=subset_scenarios) + + # Collection should have the subset + assert sample_scenario_collection.scenarios.equals(subset_scenarios) + + # Time series should have the subset too + assert sample_scenario_collection['scalar_series'].active_scenarios.equals(subset_scenarios) + assert sample_scenario_collection['array_series'].active_scenarios.equals(subset_scenarios) + + # Active data should reflect the subset + assert sample_scenario_collection['array_series'].active_data.shape == (2, 5) # 2 scenarios, 5 timesteps + + # Reset scenarios + sample_scenario_collection.reset() + assert sample_scenario_collection.scenarios.equals(sample_scenario_index) + assert sample_scenario_collection['scalar_series'].active_scenarios.equals(sample_scenario_index) + + def test_to_dataframe_with_scenarios(self, sample_scenario_collection): + """Test conversion to DataFrame with scenarios.""" + # Add some time series + sample_scenario_collection.create_time_series(42, 'constant_series') + sample_scenario_collection.create_time_series( + np.array([ + [10, 20, 30, 40, 50], # baseline + [15, 25, 35, 45, 55], # high_demand + [5, 15, 25, 35, 45] # low_price + ]), + 'varying_series' + ) + + # Convert to DataFrame + df = sample_scenario_collection.to_dataframe('all') + + # DataFrame should have MultiIndex with (scenario, time) + assert isinstance(df.index, pd.MultiIndex) + assert df.index.names == ['scenario', 'time'] + + # Should have correct number of rows (scenarios * timesteps) + assert len(df) == 18 # 3 scenarios * 5 timesteps (+1) + + # Should have both series as columns + assert 'constant_series' in df.columns + assert 'varying_series' in df.columns + + # Check values for specific scenario and time + baseline_t0 = df.loc[('baseline', sample_scenario_collection.timesteps[0])] + assert baseline_t0['constant_series'] == 42 + assert baseline_t0['varying_series'] == 10 + + def test_to_dataset_with_scenarios(self, sample_scenario_collection): + """Test conversion to Dataset with scenarios.""" + # Add some time series + sample_scenario_collection.create_time_series(42, 'constant_series') + sample_scenario_collection.create_time_series( + np.array([ + [10, 20, 30, 40, 50], + [15, 25, 35, 45, 55], + [5, 15, 25, 35, 45] + ]), + 'varying_series' + ) + + # Convert to Dataset + ds = sample_scenario_collection.to_dataset() + + # Dataset should have both dimensions + assert 'scenario' in ds.dims + assert 'time' in ds.dims + + # Should have both series as variables + assert 'constant_series' in ds + assert 'varying_series' in ds + + + # Check values for specific scenario and time + assert ds['varying_series'].sel( + scenario='baseline', + time=sample_scenario_collection.timesteps[0] + ).item() == 10 + + def test_get_scenario_data(self, sample_scenario_collection): + """Test get_scenario_data method.""" + # Add some time series + sample_scenario_collection.create_time_series(42, 'constant_series') + sample_scenario_collection.create_time_series( + np.array([ + [10, 20, 30, 40, 50], + [15, 25, 35, 45, 55], + [5, 15, 25, 35, 45] + ]), + 'varying_series' + ) + + # Get data for one scenario + baseline_df = sample_scenario_collection.get_scenario_data('baseline') + + # Should be a DataFrame with time index + assert isinstance(baseline_df, pd.DataFrame) + assert baseline_df.index.name == 'time' + assert len(baseline_df) == 5 # 5 timesteps + + # Should have both series as columns + assert 'constant_series' in baseline_df.columns + assert 'varying_series' in baseline_df.columns + + # Check specific values + assert baseline_df['constant_series'].iloc[0] == 42 + assert baseline_df['varying_series'].iloc[0] == 10 + + # Test with invalid scenario + with pytest.raises(ValueError, match="Scenario 'invalid' not found"): + sample_scenario_collection.get_scenario_data('invalid') + + def test_compare_scenarios(self, sample_scenario_collection): + """Test compare_scenarios method.""" + # Add some time series + sample_scenario_collection.create_time_series( + np.array([ + [10, 20, 30, 40, 50], # baseline + [15, 25, 35, 45, 55], # high_demand + [5, 15, 25, 35, 45] # low_price + ]), + 'varying_series' + ) + + # Compare two scenarios + diff_df = sample_scenario_collection.compare_scenarios('baseline', 'high_demand') + + # Should be a DataFrame with time index + assert isinstance(diff_df, pd.DataFrame) + assert diff_df.index.name == 'time' + + # Should show differences (baseline - high_demand) + assert np.array_equal(diff_df['varying_series'].values, np.array([-5, -5, -5, -5, -5])) + + # Compare with specific time series + diff_specific = sample_scenario_collection.compare_scenarios( + 'baseline', 'low_price', time_series_names=['varying_series'] + ) + + # Should only include the specified time series + assert list(diff_specific.columns) == ['varying_series'] + + # Should show correct differences (baseline - low_price) + assert np.array_equal(diff_specific['varying_series'].values, np.array([5, 5, 5, 5, 5])) + + def test_scenario_summary(self, sample_scenario_collection): + """Test scenario_summary method.""" + # Add some time series with different patterns + sample_scenario_collection.create_time_series( + np.array([ + [10, 20, 30, 40, 50], # baseline - increasing + [100, 100, 100, 100, 100], # high_demand - constant + [50, 40, 30, 20, 10] # low_price - decreasing + ]), + 'varying_series' + ) + + # Get summary + summary = sample_scenario_collection.scenario_summary() + + # Should be a DataFrame with scenario index and MultiIndex columns + assert isinstance(summary, pd.DataFrame) + assert summary.index.name == 'scenario' + assert isinstance(summary.columns, pd.MultiIndex) + + # Should include statistics for each time series and scenario + assert ('varying_series', 'mean') in summary.columns + assert ('varying_series', 'min') in summary.columns + assert ('varying_series', 'max') in summary.columns + + # Check specific statistics + # Baseline (increasing): 10,20,30,40,50 + assert summary.loc['baseline', ('varying_series', 'mean')] == 30 + assert summary.loc['baseline', ('varying_series', 'min')] == 10 + assert summary.loc['baseline', ('varying_series', 'max')] == 50 + + # high_demand (constant): 100,100,100,100,100 + assert summary.loc['high_demand', ('varying_series', 'mean')] == 100 + assert summary.loc['high_demand', ('varying_series', 'std')] == 0 + + # low_price (decreasing): 50,40,30,20,10 + assert summary.loc['low_price', ('varying_series', 'mean')] == 30 + assert summary.loc['low_price', ('varying_series', 'min')] == 10 + assert summary.loc['low_price', ('varying_series', 'max')] == 50 + + def test_insert_new_data_with_scenarios(self, sample_scenario_collection, sample_timesteps, sample_scenario_index): + """Test inserting new data with scenarios.""" + # Add some time series + sample_scenario_collection.create_time_series(42, 'constant_series') + sample_scenario_collection.create_time_series( + np.array([ + [10, 20, 30, 40, 50], + [15, 25, 35, 45, 55], + [5, 15, 25, 35, 45] + ]), + 'varying_series' + ) + + # Create new data with MultiIndex (scenario, time) + multi_idx = pd.MultiIndex.from_product( + [sample_scenario_index, sample_timesteps], + names=['scenario', 'time'] + ) + + new_data = pd.DataFrame( + { + 'constant_series': [100] * 15, # 3 scenarios * 5 timesteps + 'varying_series': np.arange(15) # Different value for each scenario-time combination + }, + index=multi_idx + ) + + # Insert data + sample_scenario_collection.insert_new_data(new_data) + + # Verify constant series updated + for scenario in sample_scenario_index: + assert np.all( + sample_scenario_collection['constant_series'] + .select_scenario(scenario) + .values == 100 + ) + + # Verify varying series updated with scenario-specific values + baseline_values = sample_scenario_collection['varying_series'].select_scenario('baseline').values + assert np.array_equal(baseline_values, np.arange(0, 5)) + + high_demand_values = sample_scenario_collection['varying_series'].select_scenario('high_demand').values + assert np.array_equal(high_demand_values, np.arange(5, 10)) + + low_price_values = sample_scenario_collection['varying_series'].select_scenario('low_price').values + assert np.array_equal(low_price_values, np.arange(10, 15)) + + # Test with partial data (missing some scenarios) + partial_idx = pd.MultiIndex.from_product( + [sample_scenario_index[:2], sample_timesteps], # Only first two scenarios + names=['scenario', 'time'] + ) + + partial_data = pd.DataFrame( + { + 'constant_series': [200] * 10, # 2 scenarios * 5 timesteps + 'varying_series': np.arange(100, 110) + }, + index=partial_idx + ) + + # Insert partial data + sample_scenario_collection.insert_new_data(partial_data) + + # First two scenarios should be updated + assert np.all( + sample_scenario_collection['constant_series'] + .select_scenario('baseline') + .values == 200 + ) + + assert np.all( + sample_scenario_collection['constant_series'] + .select_scenario('high_demand') + .values == 200 + ) + + # Last scenario should remain unchanged + assert np.all( + sample_scenario_collection['constant_series'] + .select_scenario('low_price') + .values == 100 + ) + + # Test with mismatched index + bad_scenarios = pd.Index(['s1', 's2', 's3'], name='scenario') + bad_idx = pd.MultiIndex.from_product( + [bad_scenarios, sample_timesteps], + names=['scenario', 'time'] + ) + + bad_data = pd.DataFrame( + {'constant_series': [1] * 15}, + index=bad_idx + ) + + with pytest.raises(ValueError, match="scenario index doesn't match"): + sample_scenario_collection.insert_new_data(bad_data) + + def test_with_scenarios_class_method(self): + """Test the with_scenarios class method.""" + collection = TimeSeriesCollection.with_scenarios( + start_time=pd.Timestamp('2023-01-01'), + periods=24, + freq='H', + scenario_names=['baseline', 'high', 'low'], + hours_per_step=1 + ) + + assert len(collection.timesteps) == 24 + assert collection.scenarios is not None + assert len(collection.scenarios) == 3 + assert list(collection.scenarios) == ['baseline', 'high', 'low'] + assert collection.hours_of_previous_timesteps == 1 + assert (collection.timesteps[1] - collection.timesteps[0]) == pd.Timedelta(hours=1) + + def test_string_representation_with_scenarios(self, sample_scenario_collection): + """Test string representation with scenarios.""" + # Add some time series + sample_scenario_collection.create_time_series(42, 'constant_series') + + # Get string representation + str_repr = str(sample_scenario_collection) + + # Should include scenario information + assert 'scenarios' in str_repr + assert str(len(sample_scenario_collection.scenarios)) in str_repr + + # Should include time series information + assert 'constant_series' in str_repr + + def test_restore_data_with_scenarios(self, sample_scenario_collection): + """Test restoring original data with scenarios.""" + # Add some time series + sample_scenario_collection.create_time_series( + np.array([ + [10, 20, 30, 40, 50], + [15, 25, 35, 45, 55], + [5, 15, 25, 35, 45] + ]), + 'varying_series' + ) + + # Capture original data + original_baseline = sample_scenario_collection['varying_series'].select_scenario('baseline').values.copy() + + # Modify data + sample_scenario_collection['varying_series'].stored_data = 999 + + # Verify data was changed + assert np.all(sample_scenario_collection['varying_series'].select_scenario('baseline').values == 999) + + # Restore data + sample_scenario_collection.restore_data() + + # Verify data was restored + assert np.array_equal( + sample_scenario_collection['varying_series'].select_scenario('baseline').values, + original_baseline + ) + + # Verify scenarios were preserved + assert sample_scenario_collection['varying_series']._has_scenarios + assert len(sample_scenario_collection['varying_series'].active_scenarios) == 3 + + +class TestIntegrationWithDataConverter: + """Test integration between DataConverter and TimeSeries with scenarios.""" + + def test_from_dataarray_with_scenarios(self, sample_timesteps, sample_scenario_index): + """Test creating TimeSeries from DataArray with scenarios.""" + # Create a DataArray with scenarios using DataConverter + data = np.array([ + [1, 2, 3, 4, 5], + [6, 7, 8, 9, 10], + [11, 12, 13, 14, 15] + ]) + + da = DataConverter.as_dataarray(data, sample_timesteps, sample_scenario_index) + + # Create TimeSeries from the DataArray + ts = TimeSeries(da, name="Converted Series") + + # Verify scenarios were preserved + assert ts._has_scenarios + assert ts.active_scenarios.equals(sample_scenario_index) + assert np.array_equal(ts.stored_data.values, data) + + # Test with different shapes + # Scalar should broadcast to all scenarios and timesteps + scalar_da = DataConverter.as_dataarray(42, sample_timesteps, sample_scenario_index) + scalar_ts = TimeSeries(scalar_da, name="Scalar Series") + + assert scalar_ts._has_scenarios + assert scalar_ts.active_scenarios.equals(sample_scenario_index) + assert np.all(scalar_ts.stored_data.values == 42) + + # 1D array should broadcast to all scenarios + array_1d = np.array([5, 10, 15, 20, 25]) + array_da = DataConverter.as_dataarray(array_1d, sample_timesteps, sample_scenario_index) + array_ts = TimeSeries(array_da, name="Array Series") + + assert array_ts._has_scenarios + for scenario in sample_scenario_index: + assert np.array_equal(array_ts.select_scenario(scenario).values, array_1d) + + def test_multiindex_series_to_timeseries(self, sample_timesteps, sample_scenario_index, sample_multi_index): + """Test creating TimeSeries from MultiIndex Series.""" + # Create a MultiIndex Series + series_values = np.arange(15) # 3 scenarios * 5 timesteps + multi_series = pd.Series(series_values, index=sample_multi_index) + + # Convert to DataArray + da = DataConverter.as_dataarray(multi_series, sample_timesteps, sample_scenario_index) + + # Create TimeSeries + ts = TimeSeries(da, name="From MultiIndex Series") + + # Verify scenarios and data + assert ts._has_scenarios + assert ts.active_scenarios.equals(sample_scenario_index) + + # Verify the first scenario's values (first 5 values) + baseline_values = ts.select_scenario('baseline').values + assert np.array_equal(baseline_values, series_values[:5]) + + # Verify the second scenario's values (second 5 values) + high_demand_values = ts.select_scenario('high_demand').values + assert np.array_equal(high_demand_values, series_values[5:10]) + + # Verify the third scenario's values (last 5 values) + low_price_values = ts.select_scenario('low_price').values + assert np.array_equal(low_price_values, series_values[10:15]) + + def test_dataconverter_to_timeseriescollection(self, sample_timesteps, sample_scenario_index): + """Test end-to-end DataConverter to TimeSeriesCollection flow.""" + # Create a collection with scenarios + collection = TimeSeriesCollection(sample_timesteps, scenarios=sample_scenario_index) + + # 1. Test with scalar + scalar_da = DataConverter.as_dataarray(42, sample_timesteps, sample_scenario_index) + collection.add_time_series(TimeSeries(scalar_da, name="scalar_series")) + + # 2. Test with 1D array + array_1d = np.array([5, 10, 15, 20, 25]) + array_da = DataConverter.as_dataarray(array_1d, sample_timesteps, sample_scenario_index) + collection.add_time_series(TimeSeries(array_da, name="array_series")) + + # 3. Test with 2D array + array_2d = np.array([ + [1, 2, 3, 4, 5], + [6, 7, 8, 9, 10], + [11, 12, 13, 14, 15] + ]) + array_2d_da = DataConverter.as_dataarray(array_2d, sample_timesteps, sample_scenario_index) + collection.add_time_series(TimeSeries(array_2d_da, name="array_2d_series")) + + # 4. Test with MultiIndex Series + multi_idx = pd.MultiIndex.from_product( + [sample_scenario_index, sample_timesteps], + names=['scenario', 'time'] + ) + series_values = np.arange(15) + multi_series = pd.Series(series_values, index=multi_idx) + series_da = DataConverter.as_dataarray(multi_series, sample_timesteps, sample_scenario_index) + collection.add_time_series(TimeSeries(series_da, name="multi_series")) + + # Verify all series were added with scenarios + assert len(collection) == 4 + assert all(ts._has_scenarios for ts in collection) + + # Try getting scenario-specific data + baseline_df = collection.get_scenario_data('baseline') + assert len(baseline_df) == 5 # 5 timesteps + assert len(baseline_df.columns) == 4 # 4 series + + # Values should match expected values for 'baseline' scenario + assert baseline_df['scalar_series'].iloc[0] == 42 + assert baseline_df['array_series'].iloc[0] == 5 + assert baseline_df['array_2d_series'].iloc[0] == 1 + assert baseline_df['multi_series'].iloc[0] == 0 + + +if __name__ == '__main__': + pytest.main() From fc339d8832653da4b8ca39353f8f8db42f68f7ec Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 19:21:48 +0200 Subject: [PATCH 20/55] Temp --- flixopt/core.py | 2 ++ flixopt/flow_system.py | 3 +++ 2 files changed, 5 insertions(+) diff --git a/flixopt/core.py b/flixopt/core.py index 0d8777168..fc862d7a9 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -53,6 +53,8 @@ class DataConverter: - Existing DataArrays """ + #TODO: Allow DataFrame with scenarios as columns + @staticmethod def as_dataarray( data: NumericData, timesteps: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index 93720de60..79ff44429 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -35,12 +35,14 @@ class FlowSystem: def __init__( self, timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index] = None, hours_of_last_timestep: Optional[float] = None, hours_of_previous_timesteps: Optional[Union[int, float, np.ndarray]] = None, ): """ Args: timesteps: The timesteps of the model. + scenarios: The scenarios of the model. hours_of_last_timestep: The duration of the last time step. Uses the last time interval if not specified hours_of_previous_timesteps: The duration of previous timesteps. If None, the first time increment of time_series is used. @@ -49,6 +51,7 @@ def __init__( """ self.time_series_collection = TimeSeriesCollection( timesteps=timesteps, + scenarios=scenarios, hours_of_last_timestep=hours_of_last_timestep, hours_of_previous_timesteps=hours_of_previous_timesteps, ) From 2bfa397809d8fa95cbc4dd15d38697ebed06d48b Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 19:44:37 +0200 Subject: [PATCH 21/55] Simplify the TImeSeriesCollection --- flixopt/core.py | 534 +++++++++--------------------------------------- 1 file changed, 95 insertions(+), 439 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index fc862d7a9..73cbe5d23 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1028,12 +1028,12 @@ def __str__(self): return f'TimeSeries "{self.name}":\n{textwrap.indent(self.stats, " ")}' -class TimeSeriesCollection: +class TimeSeriesAllocator: """ - Collection of TimeSeries objects with shared timestep management. + Simplified central manager for time series data with reference tracking. - TimeSeriesCollection handles multiple TimeSeries objects with synchronized - timesteps, provides operations on collections, and manages extra timesteps. + Provides a way to store time series data and work with subsets of dimensions + that automatically update all references when changed. """ def __init__( @@ -1043,17 +1043,7 @@ def __init__( hours_of_last_timestep: Optional[float] = None, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None, ): - """ - Args: - timesteps: The timesteps of the Collection. - scenarios: The scenarios of the Collection. - hours_of_last_timestep: The duration of the last time step. Uses the last time interval if not specified - hours_of_previous_timesteps: The duration of previous timesteps. - If None, the first time increment of time_series is used. - This is needed to calculate previous durations (for example consecutive_on_hours). - If you use an array, take care that its long enough to cover all previous values! - """ - # Prepare and validate timesteps + """Initialize a TimeSeriesAllocator.""" self._validate_timesteps(timesteps) self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( timesteps, hours_of_previous_timesteps @@ -1064,360 +1054,147 @@ def __init__( self.all_timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep) self.all_hours_per_timestep = self.calculate_hours_per_timestep(self.all_timesteps_extra) - # Active timestep tracking - self._active_timesteps = None - self._active_timesteps_extra = None - self._active_hours_per_timestep = None - - # Scenarios self.all_scenarios = scenarios - self._active_scenarios = None - # Dictionary of time series by name - self.time_series_data: Dict[str, TimeSeries] = {} + # Storage for all data arrays + self._data_arrays: Dict[str, xr.DataArray] = {} - # Aggregation - self.group_weights: Dict[str, float] = {} - self.weights: Dict[str, float] = {} + # Series that need extra timestep + self._has_extra_timestep: Dict[str, bool] = {} - @classmethod - def with_uniform_timesteps( - cls, start_time: pd.Timestamp, periods: int, freq: str, hours_per_step: Optional[float] = None - ) -> 'TimeSeriesCollection': - """Create a collection with uniform timesteps.""" - timesteps = pd.date_range(start_time, periods=periods, freq=freq, name='time') - return cls(timesteps, hours_of_previous_timesteps=hours_per_step) + # Active subset selectors + self._selection: Dict[str, Any] = {} - def create_time_series( - self, data: Union[NumericData, TimeSeriesData], name: str, needs_extra_timestep: bool = False - ) -> TimeSeries: + def add_data_array( + self, + name: str, + data: NumericData, + needs_extra_timestep: bool = False, + ) -> xr.DataArray: """ - Creates a TimeSeries from the given data and adds it to the collection. + Add a new data array to the allocator. Args: - data: The data to create the TimeSeries from. - name: The name of the TimeSeries. - needs_extra_timestep: Whether to create an additional timestep at the end of the timesteps. + name: Unique name for the data array + data: Data values + needs_extra_timestep: Whether this series requires an extra timestep Returns: - The created TimeSeries. - """ - # Check for duplicate name - if name in self.time_series_data: - raise ValueError(f"TimeSeries '{name}' already exists in this collection") - - # Determine which timesteps to use - timesteps_to_use = self.timesteps_extra if needs_extra_timestep else self.timesteps - - # Create the time series - if isinstance(data, TimeSeriesData): - time_series = TimeSeries.from_datasource( - name=name, - data=data.data, - timesteps=timesteps_to_use, - scenarios=self.scenarios, - aggregation_weight=data.agg_weight, - aggregation_group=data.agg_group, - needs_extra_timestep=needs_extra_timestep, - ) - # Connect the user time series to the created TimeSeries - data.label = name - else: - time_series = TimeSeries.from_datasource( - name=name, - data=data, - timesteps=timesteps_to_use, - scenarios=self.scenarios, - needs_extra_timestep=needs_extra_timestep, - ) - - # Add to the collection - self.add_time_series(time_series) - - return time_series - - def calculate_aggregation_weights(self) -> Dict[str, float]: - """Calculate and return aggregation weights for all time series.""" - self.group_weights = self._calculate_group_weights() - self.weights = self._calculate_weights() + Reference to the added data array + """ + data_array = DataConverter.as_dataarray( + data, + self.all_timesteps_extra if needs_extra_timestep else self.all_timesteps, + self.all_scenarios + ) - if np.all(np.isclose(list(self.weights.values()), 1, atol=1e-6)): - logger.info('All Aggregation weights were set to 1') + # Store the data array + self._data_arrays[name] = data_array + self._has_extra_timestep[name] = needs_extra_timestep - return self.weights + # Return reference to the stored data + return self.get_reference(name) - def activate_timesteps( # TODO: rename - self, active_timesteps: Optional[pd.DatetimeIndex] = None, active_scenarios: Optional[pd.Index] = None - ): + def get_reference(self, name: str) -> xr.DataArray: """ - Update active timesteps and scenarios for the collection and all time series. - If no arguments are provided, the active states are reset. + Get a reference to a data array, applying the active subset. Args: - active_timesteps: The active timesteps of the model. - If None, all timesteps of the TimeSeriesCollection are taken. - active_scenarios: The active scenarios of the model. - If None, all scenarios of the TimeSeriesCollection are taken. - """ - if active_timesteps is None and active_scenarios is None: - return self.reset() - - # Handle timesteps - if active_timesteps is not None: - if not np.all(np.isin(active_timesteps, self.all_timesteps)): - raise ValueError('active_timesteps must be a subset of the timesteps of the TimeSeriesCollection') - - # Calculate derived timesteps - self._active_timesteps = active_timesteps - first_ts_index = np.where(self.all_timesteps == active_timesteps[0])[0][0] - last_ts_idx = np.where(self.all_timesteps == active_timesteps[-1])[0][0] - self._active_timesteps_extra = self.all_timesteps_extra[first_ts_index : last_ts_idx + 2] - self._active_hours_per_timestep = self.all_hours_per_timestep.isel( - time=slice(first_ts_index, last_ts_idx + 1) - ) - - # Handle scenarios - if active_scenarios is not None: - if self.all_scenarios is None: - logger.warning('This TimeSeriesCollection does not have scenarios. Ignoring scenarios setting.') - else: - if not np.all(np.isin(active_scenarios, self.all_scenarios)): - raise ValueError('active_scenarios must be a subset of the scenarios of the TimeSeriesCollection') - self._active_scenarios = active_scenarios - - # Update all time series - self._update_time_series_active_states() + name: Name of the data array - def reset(self): - """Reset active timesteps and scenarios to defaults for all time series.""" - self._active_timesteps = None - self._active_timesteps_extra = None - self._active_hours_per_timestep = None - self._active_scenarios = None + Returns: + DataArray reference with active subset applied + """ + if name not in self._data_arrays: + raise KeyError(f"Data array '{name}' not found in allocator") - for time_series in self.time_series_data.values(): - time_series.reset() + data_array = self._data_arrays[name] - def restore_data(self): - """Restore original data for all time series.""" - for time_series in self.time_series_data.values(): - time_series.restore_data() + # Apply the active subset if any + if self._selection: + # Filter selector to only include dimensions in this data array + valid_selector = {dim: sel for dim, sel in self._selection.items() if dim in data_array.dims} + if valid_selector: + return data_array.sel(**valid_selector) - def add_time_series(self, time_series: TimeSeries): - """Add an existing TimeSeries to the collection.""" - if time_series.name in self.time_series_data: - raise ValueError(f"TimeSeries '{time_series.name}' already exists in this collection") + return data_array - self.time_series_data[time_series.name] = time_series - def insert_new_data(self, data: pd.DataFrame, include_extra_timestep: bool = False): + def set_selection(self, dimension: str, selector: Any): """ - Update time series with new data from a DataFrame. + Set active subset for a specific dimension. Args: - data: DataFrame containing new data with timestamps as index - include_extra_timestep: Whether the provided data already includes the extra timestep, by default False + dimension: Name of dimension to filter + selector: Value or slice to select """ - if not isinstance(data, pd.DataFrame): - raise TypeError(f'data must be a pandas DataFrame, got {type(data).__name__}') + self._selection[dimension] = selector - # Check if the DataFrame index matches the expected timesteps - expected_timesteps = self.timesteps_extra if include_extra_timestep else self.timesteps - if not data.index.equals(expected_timesteps): - raise ValueError( - f'DataFrame index must match {"collection timesteps with extra timestep" if include_extra_timestep else "collection timesteps"}' - ) - - for name, ts in self.time_series_data.items(): - if name in data.columns: - if not ts.needs_extra_timestep: - # For time series without extra timestep - if include_extra_timestep: - # If data includes extra timestep but series doesn't need it, exclude the last point - ts.stored_data = data[name].iloc[:-1] - else: - # Use data as is - ts.stored_data = data[name] - else: - # For time series with extra timestep - if include_extra_timestep: - # Data already includes extra timestep - ts.stored_data = data[name] - else: - # Need to add extra timestep - extrapolate from the last value - extra_step_value = data[name].iloc[-1] - extra_step_index = pd.DatetimeIndex([self.timesteps_extra[-1]], name='time') - extra_step_series = pd.Series([extra_step_value], index=extra_step_index) - - # Combine the regular data with the extra timestep - ts.stored_data = pd.concat([data[name], extra_step_series]) - - logger.debug(f'Updated data for {name}') - - def to_dataframe( - self, filtered: Literal['all', 'constant', 'non_constant'] = 'non_constant', include_extra_timestep: bool = True - ) -> pd.DataFrame: - """ - Convert collection to DataFrame with optional filtering and timestep control. + def clear_selection(self, dimension: Optional[str] = None): + """ + Clear active subset for a dimension or all dimensions. Args: - filtered: Filter time series by variability, by default 'non_constant' - include_extra_timestep: Whether to include the extra timestep in the result, by default True - - Returns: - DataFrame representation of the collection + dimension: Specific dimension to clear, or None to clear all """ - include_constants = filtered != 'non_constant' - ds = self.to_dataset(include_constants=include_constants) - - if not include_extra_timestep: - ds = ds.isel(time=slice(None, -1)) - - df = ds.to_dataframe() - - # Apply filtering - if filtered == 'all': - return df - elif filtered == 'constant': - return df.loc[:, df.nunique() == 1] - elif filtered == 'non_constant': - return df.loc[:, df.nunique() > 1] - else: - raise ValueError("filtered must be one of: 'all', 'constant', 'non_constant'") + if dimension is None: + self._selection = {} + elif dimension in self._selection: + del self._selection[dimension] - def to_dataset(self, include_constants: bool = True) -> xr.Dataset: + def update_data(self, name: str, new_data: NumericData): """ - Combine all time series into a single Dataset with all timesteps. + Update an existing data array with new values. Args: - include_constants: Whether to include time series with constant values, by default True - - Returns: - Dataset containing all selected time series with all timesteps + name: Name of the data array to update + new_data: New data values """ - # Determine which series to include - if include_constants: - series_to_include = self.time_series_data.values() - else: - series_to_include = self.non_constants - - # Create individual datasets and merge them - ds = xr.merge([ts.active_data.to_dataset(name=ts.name) for ts in series_to_include]) - - # Ensure the correct time coordinates - ds = ds.reindex(time=self.timesteps_extra) - - # Add scenarios dimension if present - if self.scenarios is not None: - ds = ds.reindex(scenario=self.scenarios) + if name not in self._data_arrays: + raise KeyError(f"Data array '{name}' not found in allocator") - ds.attrs.update( - { - 'timesteps_extra': f'{self.timesteps_extra[0]} ... {self.timesteps_extra[-1]} | len={len(self.timesteps_extra)}', - 'hours_per_timestep': self._format_stats(self.hours_per_timestep), - } - ) + # Handle different data types + if isinstance(new_data, xr.DataArray): + # Check if dimensions match + if new_data.dims != self._data_arrays[name].dims: + raise ValueError(f'Dimension mismatch: {new_data.dims} != {self._data_arrays[name].dims}') - return ds + # Update values + self._data_arrays[name].values = new_data.values + else: + # For other types, just update the values + self._data_arrays[name].values = np.asarray(new_data) - def get_scenario_data(self, scenario_name): + def activate_timesteps(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None): """ - Extract data for a specific scenario as a DataFrame. + Set active subset for timesteps and scenarios. Args: - scenario_name: Name of the scenario to extract - - Returns: - DataFrame containing all time series data for the specified scenario - - Raises: - ValueError: If scenario_name doesn't exist or collection doesn't have scenarios + timesteps: Timesteps to activate, or None to clear + scenarios: Scenarios to activate, or None to clear """ - if self.scenarios is None: - raise ValueError("This TimeSeriesCollection doesn't have scenarios") - - if scenario_name not in self.scenarios: - raise ValueError(f"Scenario '{scenario_name}' not found in collection") - - # Create a DataFrame with data from all time series for this scenario - data_dict = {} - for name, ts in self.time_series_data.items(): - data_dict[name] = ts.active_data.sel(scenario=scenario_name).values + if timesteps is None: + self.clear_selection('time') + else: + self.set_selection('time', timesteps) - # Create DataFrame with the right index - df = pd.DataFrame(data_dict, index=self.timesteps) - return df + if scenarios is None: + self.clear_selection('scenario') + else: + self.set_selection('scenario', scenarios) - def compare_scenarios(self, scenario1, scenario2, time_series_names=None): + def __getitem__(self, name: str) -> xr.DataArray: """ - Compare data between two scenarios and return the differences. + Get a reference to a data array by name. Args: - scenario1: First scenario to compare - scenario2: Second scenario to compare - time_series_names: Optional list of time series names to include (default: all) + name: Name of the data array Returns: - DataFrame with differences between scenarios - """ - if self.scenarios is None: - raise ValueError("This TimeSeriesCollection doesn't have scenarios") - - if scenario1 not in self.scenarios or scenario2 not in self.scenarios: - raise ValueError(f'Scenarios must exist in collection') - - # Get DataFrames for each scenario - df1 = self.get_scenario_data(scenario1) - df2 = self.get_scenario_data(scenario2) - - # Filter to specified time series if provided - if time_series_names is not None: - df1 = df1[time_series_names] - df2 = df2[time_series_names] - - # Calculate differences - diff_df = df1 - df2 - diff_df.name = f'Difference ({scenario1} - {scenario2})' - - return diff_df - - def scenario_summary(self): + DataArray reference with active subset applied """ - Generate a summary of all scenarios in the collection. + return self.get_reference(name) - Returns: - DataFrame with statistics for each time series by scenario - """ - if self.scenarios is None or len(self.scenarios) <= 1: - raise ValueError("This TimeSeriesCollection doesn't have multiple scenarios") - - # Create multi-level columns for the summary - index = pd.MultiIndex.from_product([self.time_series_data.keys(), ['mean', 'min', 'max', 'std']]) - summary = pd.DataFrame(index=self.scenarios, columns=index) - - # Calculate statistics for each time series in each scenario - for scenario in self.scenarios: - df = self.get_scenario_data(scenario) - - for ts_name in self.time_series_data.keys(): - if ts_name in df.columns: - summary.loc[scenario, (ts_name, 'mean')] = df[ts_name].mean() - summary.loc[scenario, (ts_name, 'min')] = df[ts_name].min() - summary.loc[scenario, (ts_name, 'max')] = df[ts_name].max() - summary.loc[scenario, (ts_name, 'std')] = df[ts_name].std() - return summary - - def _update_time_series_active_states(self): - """Update active timesteps and scenarios for all time series.""" - for ts in self.time_series_data.values(): - # Set timesteps - if ts.needs_extra_timestep: - ts.active_timesteps = self.timesteps_extra - else: - ts.active_timesteps = self.timesteps - # Set scenarios - if self.scenarios is not None: - ts.active_scenarios = self.scenarios @staticmethod def _validate_timesteps(timesteps: pd.DatetimeIndex): @@ -1470,127 +1247,6 @@ def calculate_hours_per_timestep(timesteps_extra: pd.DatetimeIndex) -> xr.DataAr data=hours_per_step, coords={'time': timesteps_extra[:-1]}, dims=('time',), name='hours_per_step' ) - def _calculate_group_weights(self) -> Dict[str, float]: - """Calculate weights for aggregation groups.""" - # Count series in each group - groups = [ts.aggregation_group for ts in self.time_series_data.values() if ts.aggregation_group is not None] - group_counts = Counter(groups) - - # Calculate weight for each group (1/count) - return {group: 1 / count for group, count in group_counts.items()} - - def _calculate_weights(self) -> Dict[str, float]: - """Calculate weights for all time series.""" - # Calculate weight for each time series - weights = {} - for name, ts in self.time_series_data.items(): - if ts.aggregation_group is not None: - # Use group weight - weights[name] = self.group_weights.get(ts.aggregation_group, 1) - else: - # Use individual weight or default to 1 - weights[name] = ts.aggregation_weight or 1 - - return weights - - def _format_stats(self, data) -> str: - """Format statistics for a data array.""" - if hasattr(data, 'values'): - values = data.values - else: - values = np.asarray(data) - - mean_val = np.mean(values) - min_val = np.min(values) - max_val = np.max(values) - - return f'mean: {mean_val:.2f}, min: {min_val:.2f}, max: {max_val:.2f}' - - def __getitem__(self, name: str) -> TimeSeries: - """Get a TimeSeries by name.""" - try: - return self.time_series_data[name] - except KeyError as e: - raise KeyError(f'TimeSeries "{name}" not found in the TimeSeriesCollection') from e - - def __iter__(self) -> Iterator[TimeSeries]: - """Iterate through all TimeSeries in the collection.""" - return iter(self.time_series_data.values()) - - def __len__(self) -> int: - """Get the number of TimeSeries in the collection.""" - return len(self.time_series_data) - - def __contains__(self, item: Union[str, TimeSeries]) -> bool: - """Check if a TimeSeries exists in the collection.""" - if isinstance(item, str): - return item in self.time_series_data - elif isinstance(item, TimeSeries): - return item in self.time_series_data.values() - return False - - @property - def non_constants(self) -> List[TimeSeries]: - """Get time series with varying values.""" - return [ts for ts in self.time_series_data.values() if not ts.all_equal] - - @property - def constants(self) -> List[TimeSeries]: - """Get time series with constant values.""" - return [ts for ts in self.time_series_data.values() if ts.all_equal] - - @property - def timesteps(self) -> pd.DatetimeIndex: - """Get the active timesteps.""" - return self.all_timesteps if self._active_timesteps is None else self._active_timesteps - - @property - def timesteps_extra(self) -> pd.DatetimeIndex: - """Get the active timesteps with extra step.""" - return self.all_timesteps_extra if self._active_timesteps_extra is None else self._active_timesteps_extra - - @property - def hours_per_timestep(self) -> xr.DataArray: - """Get the duration of each active timestep.""" - return ( - self.all_hours_per_timestep if self._active_hours_per_timestep is None else self._active_hours_per_timestep - ) - - @property - def hours_of_last_timestep(self) -> float: - """Get the duration of the last timestep.""" - return float(self.hours_per_timestep[-1].item()) - - @property - def scenarios(self) -> Optional[pd.Index]: - """Get the active scenarios.""" - return self.all_scenarios if self._active_scenarios is None else self._active_scenarios - - def __repr__(self): - return f'TimeSeriesCollection:\n{self.to_dataset()}' - - def __str__(self): - """Get a human-readable string representation.""" - longest_name = max([len(time_series.name) for time_series in self.time_series_data.values()]) - - stats_summary = '\n'.join( - [ - f' - {time_series.name:<{longest_name}}: {get_numeric_stats(time_series.active_data)}' - for time_series in self.time_series_data.values() - ] - ) - - return ( - f'TimeSeriesCollection with {len(self.time_series_data)} series\n' - f' Time Range: {self.timesteps[0]} → {self.timesteps[-1]}\n' - f' No. of timesteps: {len(self.timesteps)} + 1 extra\n' - f' No. of scenarios: {len(self.scenarios) if self.scenarios is not None else "No Scenarios"}\n' - f' Hours per timestep: {get_numeric_stats(self.hours_per_timestep)}\n' - f' Time Series Data:\n' - f'{stats_summary}' - ) - - def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10, by_scenario: bool = False) -> str: """ Calculates the mean, median, min, max, and standard deviation of a numeric DataArray. From d7acaf02c47a4cc5305a8250ae564b7e5480c2c2 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 20:02:13 +0200 Subject: [PATCH 22/55] Simplify the TImeSeriesCollection --- flixopt/core.py | 132 ++++++++++++++++++------------------------------ 1 file changed, 50 insertions(+), 82 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 73cbe5d23..bf0b68565 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1048,16 +1048,20 @@ def __init__( self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( timesteps, hours_of_previous_timesteps ) + self.timesteps = timesteps + self.timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep) + self.hours_per_timestep = self.calculate_hours_per_timestep(self.timesteps_extra) - # Set up timesteps and hours - self.all_timesteps = timesteps - self.all_timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep) - self.all_hours_per_timestep = self.calculate_hours_per_timestep(self.all_timesteps_extra) - - self.all_scenarios = scenarios + self.scenarios = scenarios # Storage for all data arrays - self._data_arrays: Dict[str, xr.DataArray] = {} + if scenarios is None: + self._dataset = xr.Dataset(coords={'time': self.timesteps}) + self._dataset_extra = xr.Dataset(coords={'time': self.timesteps_extra}) # For series that need extra timestep + + else: + self._dataset = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps}) + self._dataset_extra = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra}) # For series that need extra timestep # Series that need extra timestep self._has_extra_timestep: Dict[str, bool] = {} @@ -1073,99 +1077,63 @@ def add_data_array( ) -> xr.DataArray: """ Add a new data array to the allocator. + """ + if name in self._dataset or name in self._dataset_extra: + raise KeyError(f"Data array '{name}' already exists in allocator") - Args: - name: Unique name for the data array - data: Data values - needs_extra_timestep: Whether this series requires an extra timestep + # Choose which dataset to use + target_dataset = self._dataset_extra if needs_extra_timestep else self._dataset + target_timesteps = self.timesteps_extra if needs_extra_timestep else self.timesteps - Returns: - Reference to the added data array - """ - data_array = DataConverter.as_dataarray( - data, - self.all_timesteps_extra if needs_extra_timestep else self.all_timesteps, - self.all_scenarios - ) + # Convert to DataArray + data_array = DataConverter.as_dataarray(data, target_timesteps, self.scenarios) + + # Add to the appropriate dataset + target_dataset[name] = data_array - # Store the data array - self._data_arrays[name] = data_array + # Track if it needs extra timestep self._has_extra_timestep[name] = needs_extra_timestep - # Return reference to the stored data + # Return reference return self.get_reference(name) def get_reference(self, name: str) -> xr.DataArray: """ Get a reference to a data array, applying the active subset. - - Args: - name: Name of the data array - - Returns: - DataArray reference with active subset applied """ - if name not in self._data_arrays: + # Check which dataset contains this variable + if name in self._dataset: + dataset = self._dataset + elif name in self._dataset_extra: + dataset = self._dataset_extra + else: raise KeyError(f"Data array '{name}' not found in allocator") - data_array = self._data_arrays[name] - # Apply the active subset if any if self._selection: - # Filter selector to only include dimensions in this data array - valid_selector = {dim: sel for dim, sel in self._selection.items() if dim in data_array.dims} + # Filter selector to only include dimensions in this dataset + valid_selector = {dim: sel for dim, sel in self._selection.items() if dim in dataset.dims} if valid_selector: - return data_array.sel(**valid_selector) - - return data_array - - - def set_selection(self, dimension: str, selector: Any): - """ - Set active subset for a specific dimension. - - Args: - dimension: Name of dimension to filter - selector: Value or slice to select - """ - self._selection[dimension] = selector - - def clear_selection(self, dimension: Optional[str] = None): - """ - Clear active subset for a dimension or all dimensions. + # Get the subset of the dataset then extract the variable + return dataset.sel(**valid_selector)[name] - Args: - dimension: Specific dimension to clear, or None to clear all - """ - if dimension is None: - self._selection = {} - elif dimension in self._selection: - del self._selection[dimension] + # Return the variable directly + return dataset[name] - def update_data(self, name: str, new_data: NumericData): + def clear_selection(self, timesteps: bool = True, scenarios: bool = True): """ - Update an existing data array with new values. + Clear selection for timesteps and/or scenarios. Args: - name: Name of the data array to update - new_data: New data values + timesteps: Whether to clear timesteps selection + scenarios: Whether to clear scenarios selection """ - if name not in self._data_arrays: - raise KeyError(f"Data array '{name}' not found in allocator") - - # Handle different data types - if isinstance(new_data, xr.DataArray): - # Check if dimensions match - if new_data.dims != self._data_arrays[name].dims: - raise ValueError(f'Dimension mismatch: {new_data.dims} != {self._data_arrays[name].dims}') + if timesteps: + self._selection['time'] = None + if scenarios: + self._selection['scenario'] = None - # Update values - self._data_arrays[name].values = new_data.values - else: - # For other types, just update the values - self._data_arrays[name].values = np.asarray(new_data) - - def activate_timesteps(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None): + def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None): """ Set active subset for timesteps and scenarios. @@ -1174,14 +1142,14 @@ def activate_timesteps(self, timesteps: Optional[pd.DatetimeIndex] = None, scena scenarios: Scenarios to activate, or None to clear """ if timesteps is None: - self.clear_selection('time') + self.clear_selection(timesteps=True, scenarios=False) else: - self.set_selection('time', timesteps) + self._selection['time'] = timesteps if scenarios is None: - self.clear_selection('scenario') + self.clear_selection(timesteps=False, scenarios=True) else: - self.set_selection('scenario', scenarios) + self._selection['scenario'] = scenarios def __getitem__(self, name: str) -> xr.DataArray: """ @@ -1195,7 +1163,6 @@ def __getitem__(self, name: str) -> xr.DataArray: """ return self.get_reference(name) - @staticmethod def _validate_timesteps(timesteps: pd.DatetimeIndex): """Validate timesteps format and rename if needed.""" @@ -1247,6 +1214,7 @@ def calculate_hours_per_timestep(timesteps_extra: pd.DatetimeIndex) -> xr.DataAr data=hours_per_step, coords={'time': timesteps_extra[:-1]}, dims=('time',), name='hours_per_step' ) + def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10, by_scenario: bool = False) -> str: """ Calculates the mean, median, min, max, and standard deviation of a numeric DataArray. From 70808d1e5d3e8d2c5b6366748f1c678f0511ed7e Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 20:16:29 +0200 Subject: [PATCH 23/55] Add test script --- time_series_alloc.py | 168 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100644 time_series_alloc.py diff --git a/time_series_alloc.py b/time_series_alloc.py new file mode 100644 index 000000000..89fa899c4 --- /dev/null +++ b/time_series_alloc.py @@ -0,0 +1,168 @@ + +import numpy as np +import xarray as xr +import pandas as pd + +from flixopt.core import DataConverter, TimeSeriesAllocator + +class Element: + def __init__(self, name: str, data: xr.DataArray): + self.name = name + self.data = data + + +# Example script to demonstrate both classes +def main(): + print("Demonstrating DataConverter and TimeSeriesAllocator Classes") + print("=" * 70) + + # Create timesteps for our examples + start_date = pd.Timestamp('2025-01-01') + dates = [start_date + pd.Timedelta(days=i) for i in range(10)] + timesteps = pd.DatetimeIndex(dates, name='time') + + # Create scenarios for our examples + scenario_names = ['low', 'medium', 'high'] + scenarios = pd.Index(scenario_names, name='scenario') + + print(f"Created {len(timesteps)} timesteps from {timesteps[0]} to {timesteps[-1]}") + print(f"Created {len(scenarios)} scenarios: {', '.join(scenarios)}") + print("\n") + + # Part 1: Demonstrate DataConverter with different types + print("Part 1: DataConverter Examples") + print("-" * 30) + + # Example 1: Converting a scalar value + print("Example 1: Converting a scalar value (42)") + scalar_value = 42 + scalar_da = DataConverter.as_dataarray(scalar_value, timesteps) + print(f" Shape: {scalar_da.shape}, Dimensions: {scalar_da.dims}") + print(f" First few values: {scalar_da.values[:3]}") + print(f" All values are the same: {np.all(scalar_da.values == scalar_value)}") + print() + + # Example 2: Converting a 1D numpy array + print("Example 2: Converting a 1D numpy array") + array_1d = np.arange(len(timesteps)) * 10 + array_da = DataConverter.as_dataarray(array_1d, timesteps) + print(f" Shape: {array_da.shape}, Dimensions: {array_da.dims}") + print(f" First few values: {array_da.values[:3]}") + print(f" Values match input: {np.all(array_da.values == array_1d)}") + print() + + # Example 3: Converting a pandas Series with time index + print("Example 3: Converting a pandas Series with time index") + series = pd.Series(np.random.rand(len(timesteps)) * 100, index=timesteps) + series_da = DataConverter.as_dataarray(series, timesteps) + print(f" Shape: {series_da.shape}, Dimensions: {series_da.dims}") + print(f" First few values: {series_da.values[:3]}") + print(f" Values match input: {np.all(series_da.values == series.values)}") + print() + + # Example 4: Converting with scenarios + print("Example 4: Converting data with scenarios") + # Create 2D array with shape (scenarios, timesteps) + array_2d = np.random.rand(len(scenarios), len(timesteps)) * 100 + array_2d_da = DataConverter.as_dataarray(array_2d, timesteps, scenarios) + print(f" Shape: {array_2d_da.shape}, Dimensions: {array_2d_da.dims}") + print(f" Values for first scenario: {array_2d_da.sel(scenario='low').values[:3]}") + print(f" Values match input: {np.all(array_2d_da.values == array_2d)}") + print() + + # Example 5: Broadcasting a 1D array to scenarios + print("Example 5: Broadcasting a 1D array to scenarios") + broadcast_da = DataConverter.as_dataarray(array_1d, timesteps, scenarios) + print(f" Shape: {broadcast_da.shape}, Dimensions: {broadcast_da.dims}") + print(f" Original shape: {array_1d.shape}") + print(f" All scenarios have identical values: {np.all(broadcast_da.sel(scenario='low').values == broadcast_da.sel(scenario='medium').values)}") + print("\n") + + # Part 2: Demonstrate TimeSeriesAllocator + print("Part 2: TimeSeriesAllocator Examples") + print("-" * 35) + + # Create a TimeSeriesAllocator instance + print("Creating TimeSeriesAllocator with timesteps and scenarios") + allocator = TimeSeriesAllocator(timesteps, scenarios) + print(f" Regular timesteps: {len(allocator.timesteps)}") + print(f" Extended timesteps: {len(allocator.timesteps_extra)}") + print(f" Added extra timestep: {allocator.timesteps_extra[-1]}") + print(f" Hours per timestep: {allocator.hours_per_timestep.values[0]:.1f} hours") + print() + + # Add data arrays to the allocator + print("Adding data arrays to the allocator") + + # Example 1: Add a scalar value (broadcast to all timesteps and scenarios) + constant_val = 42 + constant_da = allocator.add_data_array("constant", constant_val) + print(" Added 'constant' (scalar value 42)") + print(f" Shape: {constant_da.shape}") + print(f" Values: All {constant_val}") + print() + + # Example 2: Add a 1D array (mapped to timesteps, broadcast to scenarios) + ramp_values = np.linspace(10, 100, len(timesteps)) + ramp_da = allocator.add_data_array("ramp", ramp_values) + print(" Added 'ramp' (linear values from 10 to 100)") + print(f" Shape: {ramp_da.shape}") + print(f" First few values: {ramp_da.sel(scenario='low').values[:3]}") + print() + + # Example 3: Add a 2D array (scenarios × timesteps) + demand_values = np.zeros((len(scenarios), len(timesteps))) + # Low scenario: constant demand + demand_values[0, :] = 50 + # Medium scenario: linearly increasing + demand_values[1, :] = np.linspace(50, 100, len(timesteps)) + # High scenario: exponentially increasing + demand_values[2, :] = 50 * np.exp(np.linspace(0, 1, len(timesteps))) + + demand_da = allocator.add_data_array("demand", demand_values) + print(" Added 'demand' (different profile per scenario)") + print(f" Shape: {demand_da.shape}") + for i, scenario in enumerate(scenarios): + print(f" {scenario} scenario first value: {demand_da.sel(scenario=scenario).values[0]:.1f}") + print() + + # Example 4: Add data with extra timestep + forecast_values = np.random.normal(size=(len(scenarios), len(timesteps) + 1)) * 10 + 100 + forecast_da = allocator.add_data_array("forecast", forecast_values, needs_extra_timestep=True) + print(" Added 'forecast' (with extra timestep)") + print(f" Shape: {forecast_da.shape}") + print(f" Last regular timestep: {timesteps[-1]}") + print(f" Extra timestep: {allocator.timesteps_extra[-1]}") + print() + + # Demonstrate selection functionality + print("Demonstrating selection functionality") + # Select a subset of timesteps + subset_timesteps = timesteps[3:7] + print(f" Selecting timesteps from {subset_timesteps[0]} to {subset_timesteps[-1]}") + allocator.set_selection(timesteps=subset_timesteps) + + # Access data with the selection applied + demand_subset = allocator["demand"] + print(f" Original demand shape: {demand_da.shape}") + print(f" Selected demand shape: {demand_subset.shape}") + print() + + # Select a single scenario + print(" Selecting only the 'high' scenario") + allocator.set_selection(scenarios=pd.Index(['high'], name='scenario')) + demand_high = allocator["demand"] + print(f" Shape after scenario selection: {demand_high.shape}") + print() + + # Clear the selection + print(" Clearing all selections") + allocator.clear_selection() + demand_full = allocator["demand"] + print(f" Shape after clearing selection: {demand_full.shape}") + print() + + print("Examples completed successfully!") + +if __name__ == "__main__": + main() From d21dd50e40f18e7c099bd5e0186184279f060dbe Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Sun, 30 Mar 2025 20:45:42 +0200 Subject: [PATCH 24/55] Improve TImeSeriesAllocator --- flixopt/core.py | 55 +++++++++++++++++++++---------------------------- 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index bf0b68565..7aa0dbc9f 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -4,10 +4,10 @@ """ import inspect -import textwrap import json import logging import pathlib +import textwrap from collections import Counter from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union @@ -1057,11 +1057,11 @@ def __init__( # Storage for all data arrays if scenarios is None: self._dataset = xr.Dataset(coords={'time': self.timesteps}) - self._dataset_extra = xr.Dataset(coords={'time': self.timesteps_extra}) # For series that need extra timestep + self._dataset_extra = xr.Dataset(coords={'time': self.timesteps_extra}) else: self._dataset = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps}) - self._dataset_extra = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra}) # For series that need extra timestep + self._dataset_extra = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra}) # Series that need extra timestep self._has_extra_timestep: Dict[str, bool] = {} @@ -1095,30 +1095,7 @@ def add_data_array( self._has_extra_timestep[name] = needs_extra_timestep # Return reference - return self.get_reference(name) - - def get_reference(self, name: str) -> xr.DataArray: - """ - Get a reference to a data array, applying the active subset. - """ - # Check which dataset contains this variable - if name in self._dataset: - dataset = self._dataset - elif name in self._dataset_extra: - dataset = self._dataset_extra - else: - raise KeyError(f"Data array '{name}' not found in allocator") - - # Apply the active subset if any - if self._selection: - # Filter selector to only include dimensions in this dataset - valid_selector = {dim: sel for dim, sel in self._selection.items() if dim in dataset.dims} - if valid_selector: - # Get the subset of the dataset then extract the variable - return dataset.sel(**valid_selector)[name] - - # Return the variable directly - return dataset[name] + return self[name] def clear_selection(self, timesteps: bool = True, scenarios: bool = True): """ @@ -1129,9 +1106,9 @@ def clear_selection(self, timesteps: bool = True, scenarios: bool = True): scenarios: Whether to clear scenarios selection """ if timesteps: - self._selection['time'] = None + self._selection['time'] = slice(None, None) if scenarios: - self._selection['scenario'] = None + self._selection['scenario'] = slice(None, None) def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None): """ @@ -1153,7 +1130,7 @@ def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: def __getitem__(self, name: str) -> xr.DataArray: """ - Get a reference to a data array by name. + Get the selected data of a data array. Args: name: Name of the data array @@ -1161,7 +1138,23 @@ def __getitem__(self, name: str) -> xr.DataArray: Returns: DataArray reference with active subset applied """ - return self.get_reference(name) + if name in self._dataset: + dataset = self._dataset + elif name in self._dataset_extra: + dataset = self._dataset_extra + else: + raise KeyError(f"Data array '{name}' not found in allocator") + + # Apply the active subset if any + if self._selection: + # Filter selector to only include dimensions in this dataset + valid_selector = {dim: sel for dim, sel in self._selection.items() if dim in dataset.dims} + if valid_selector: + # Get the subset of the dataset then extract the variable + return dataset.sel(**valid_selector)[name] + + # Return the variable directly + return dataset[name] @staticmethod def _validate_timesteps(timesteps: pd.DatetimeIndex): From 5dc3c78e7eb7146b5fa0b69899fae15ee0eca391 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 10:57:30 +0200 Subject: [PATCH 25/55] Update TimeSeries --- flixopt/core.py | 166 +++++++++++++++++++++++++++--------------------- 1 file changed, 95 insertions(+), 71 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 7aa0dbc9f..3ec044d36 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -761,22 +761,19 @@ def __init__( # Data management self._stored_data = data.copy(deep=True) self._backup = self._stored_data.copy(deep=True) - self._active_timesteps = self._stored_data.indexes['time'] - # Handle scenarios if present - self._has_scenarios = 'scenario' in data.dims - self._active_scenarios = self._stored_data.indexes.get('scenario', None) + # Selection state - use dictionaries for consistency with TimeSeriesAllocator + self._selection = {} - self._active_data = None - self._update_active_data() + # Flag for whether this series has scenarios + self._has_scenarios = 'scenario' in data.dims def reset(self): """ - Reset active timesteps and scenarios to the full set of stored data. + Reset selections to include all timesteps and scenarios. + This is equivalent to clearing all selections. """ - self.active_timesteps = None - if self._has_scenarios: - self.active_scenarios = None + self.clear_selection() def restore_data(self): """ @@ -824,15 +821,6 @@ def stats(self) -> str: """ return get_numeric_stats(self.active_data, padd=0, by_scenario=True) - def _update_active_data(self): - """ - Update the active data based on active_timesteps and active_scenarios. - """ - if self._has_scenarios and self._active_scenarios is not None: - self._active_data = self._stored_data.sel(time=self.active_timesteps, scenario=self._active_scenarios) - else: - self._active_data = self._stored_data.sel(time=self.active_timesteps) - @property def all_equal(self) -> bool: """Check if all values in the series are equal.""" @@ -841,64 +829,37 @@ def all_equal(self) -> bool: @property def active_timesteps(self) -> pd.DatetimeIndex: """Get the current active timesteps.""" - return self._active_timesteps - - @active_timesteps.setter - def active_timesteps(self, timesteps: Optional[pd.DatetimeIndex]): - """ - Set active_timesteps and refresh active_data. - - Args: - timesteps: New timesteps to activate, or None to use all stored timesteps - - Raises: - TypeError: If timesteps is not a pandas DatetimeIndex or None - """ - if timesteps is None: - self._active_timesteps = self.stored_data.indexes['time'] - elif isinstance(timesteps, pd.DatetimeIndex): - self._active_timesteps = timesteps - else: - raise TypeError('active_timesteps must be a pandas DatetimeIndex or None') - - self._update_active_data() + # If no selection is active, return all timesteps + if 'time' not in self._selection: + return self._stored_data.indexes['time'] + return self._selection['time'] @property def active_scenarios(self) -> Optional[pd.Index]: """Get the current active scenarios.""" - return self._active_scenarios - - @active_scenarios.setter - def active_scenarios(self, scenarios: Optional[pd.Index]): - """ - Set active_scenarios and refresh active_data. - - Args: - scenarios: New scenarios to activate, or None to use all stored scenarios - - Raises: - TypeError: If scenarios is not a pandas Index or None - ValueError: If scenarios is not a subset of stored scenarios - """ if not self._has_scenarios: - logger.warning('This TimeSeries does not have scenarios dimension. Ignoring scenarios setting.') - return + return None - if scenarios is None: - self._active_scenarios = self.stored_data.indexes.get('scenario', None) - elif isinstance(scenarios, pd.Index): - if not scenarios.isin(self.stored_data.indexes['scenario']).all(): - raise ValueError('active_scenarios must be a subset of the stored scenarios') - self._active_scenarios = scenarios - else: - raise TypeError('active_scenarios must be a pandas Index or None') - - self._update_active_data() + # If no selection is active, return all scenarios + if 'scenario' not in self._selection: + return self._stored_data.indexes.get('scenario', None) + return self._selection['scenario'] @property def active_data(self) -> xr.DataArray: - """Get a view of stored_data based on active_timesteps.""" - return self._active_data + """ + Get a view of stored_data based on current selections. + This computes the view dynamically based on the current selection state. + """ + # Start with stored data + result = self._stored_data + + # Apply selections if they exist + valid_selector = {dim: sel for dim, sel in self._selection.items() if dim in result.dims} + if valid_selector: + result = result.sel(**valid_selector) + + return result @property def stored_data(self) -> xr.DataArray: @@ -913,21 +874,83 @@ def stored_data(self, value: NumericData): Args: value: New data to store """ - new_data = DataConverter.as_dataarray(value, timesteps=self.active_timesteps, scenarios=self.active_scenarios) + # Get current timesteps and scenarios + timesteps = self.active_timesteps + scenarios = self.active_scenarios if self._has_scenarios else None + + new_data = DataConverter.as_dataarray(value, timesteps=timesteps, scenarios=scenarios) # Skip if data is unchanged to avoid overwriting backup if new_data.equals(self._stored_data): return self._stored_data = new_data - self.active_timesteps = None # Reset to full timeline + self.clear_selection() # Reset selections to full dataset + + def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None): + """ + Set active subset for timesteps and/or scenarios. + + Args: + timesteps: Timesteps to activate, or None to clear timestep selection + scenarios: Scenarios to activate, or None to clear scenario selection + + This method follows the same API as TimeSeriesAllocator for consistency. + """ + # Handle timesteps selection + if timesteps is None: + # Clear timestep selection + if 'time' in self._selection: + del self._selection['time'] + else: + # Validate and set timestep selection + if not isinstance(timesteps, pd.DatetimeIndex): + raise TypeError('timesteps must be a pandas DatetimeIndex') + self._selection['time'] = timesteps + + # Handle scenarios selection + if scenarios is None: + # Clear scenario selection + if 'scenario' in self._selection: + del self._selection['scenario'] + elif self._has_scenarios: + # Validate and set scenario selection + if not isinstance(scenarios, pd.Index): + raise TypeError('scenarios must be a pandas Index') + + # Check if scenarios are valid + stored_scenarios = self._stored_data.indexes['scenario'] + if not scenarios.isin(stored_scenarios).all(): + raise ValueError('scenarios must be a subset of the stored scenarios') + + self._selection['scenario'] = scenarios + elif scenarios is not None and not self._has_scenarios: + logger.warning('This TimeSeries does not have scenarios dimension. Ignoring scenarios selection.') + + def clear_selection(self, timesteps: bool = True, scenarios: bool = True): + """ + Clear selection for timesteps and/or scenarios. + + Args: + timesteps: Whether to clear timesteps selection + scenarios: Whether to clear scenarios selection + + This method follows the same API as TimeSeriesAllocator for consistency. + """ + if timesteps and 'time' in self._selection: + del self._selection['time'] + + if scenarios and 'scenario' in self._selection and self._has_scenarios: + del self._selection['scenario'] @property def sel(self): + """Direct access to the active_data's sel method for convenience.""" return self.active_data.sel @property def isel(self): + """Direct access to the active_data's isel method for convenience.""" return self.active_data.isel def _apply_operation(self, other, op): @@ -1011,7 +1034,8 @@ def __repr__(self): # Add scenario information if present if self._has_scenarios: - attrs['scenarios'] = f'{len(self.active_scenarios)} scenarios' + scenarios = self.active_scenarios + attrs['scenarios'] = f'{len(scenarios)} scenarios' if scenarios is not None else 'All scenarios' else: attrs['scenarios'] = 'No scenarios' From 6edc87d0cf9410fc1134eb4b460ed35f470fb5c4 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 11:00:29 +0200 Subject: [PATCH 26/55] Update TimeSeries --- flixopt/core.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 3ec044d36..80407a3b6 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -866,19 +866,18 @@ def stored_data(self) -> xr.DataArray: """Get a copy of the full stored data.""" return self._stored_data.copy() - @stored_data.setter - def stored_data(self, value: NumericData): + def update_stored_data(self, value: NumericData): """ Update stored_data and refresh active_data. Args: value: New data to store """ - # Get current timesteps and scenarios - timesteps = self.active_timesteps - scenarios = self.active_scenarios if self._has_scenarios else None - - new_data = DataConverter.as_dataarray(value, timesteps=timesteps, scenarios=scenarios) + new_data = DataConverter.as_dataarray( + value, + timesteps=self.active_timesteps, + scenarios=self.active_scenarios if self._has_scenarios else None + ) # Skip if data is unchanged to avoid overwriting backup if new_data.equals(self._stored_data): From 168aa39879dac6eae199db1b8fe0728568652528 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 11:06:41 +0200 Subject: [PATCH 27/55] Update selection --- flixopt/core.py | 43 ++++++++++--------------------------------- 1 file changed, 10 insertions(+), 33 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 80407a3b6..3492568aa 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -888,43 +888,21 @@ def update_stored_data(self, value: NumericData): def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None): """ - Set active subset for timesteps and/or scenarios. + Set active subset for timesteps and scenarios. Args: - timesteps: Timesteps to activate, or None to clear timestep selection - scenarios: Scenarios to activate, or None to clear scenario selection - - This method follows the same API as TimeSeriesAllocator for consistency. + timesteps: Timesteps to activate, or None to clear + scenarios: Scenarios to activate, or None to clear """ - # Handle timesteps selection if timesteps is None: - # Clear timestep selection - if 'time' in self._selection: - del self._selection['time'] + self.clear_selection(timesteps=True, scenarios=False) else: - # Validate and set timestep selection - if not isinstance(timesteps, pd.DatetimeIndex): - raise TypeError('timesteps must be a pandas DatetimeIndex') self._selection['time'] = timesteps - # Handle scenarios selection if scenarios is None: - # Clear scenario selection - if 'scenario' in self._selection: - del self._selection['scenario'] - elif self._has_scenarios: - # Validate and set scenario selection - if not isinstance(scenarios, pd.Index): - raise TypeError('scenarios must be a pandas Index') - - # Check if scenarios are valid - stored_scenarios = self._stored_data.indexes['scenario'] - if not scenarios.isin(stored_scenarios).all(): - raise ValueError('scenarios must be a subset of the stored scenarios') - + self.clear_selection(timesteps=False, scenarios=True) + else: self._selection['scenario'] = scenarios - elif scenarios is not None and not self._has_scenarios: - logger.warning('This TimeSeries does not have scenarios dimension. Ignoring scenarios selection.') def clear_selection(self, timesteps: bool = True, scenarios: bool = True): """ @@ -936,11 +914,10 @@ def clear_selection(self, timesteps: bool = True, scenarios: bool = True): This method follows the same API as TimeSeriesAllocator for consistency. """ - if timesteps and 'time' in self._selection: - del self._selection['time'] - - if scenarios and 'scenario' in self._selection and self._has_scenarios: - del self._selection['scenario'] + if timesteps: + self._selection['time'] = slice(None, None) + if scenarios: + self._selection['scenario'] = slice(None, None) @property def sel(self): From 3cc2b4136dda0c8a5b71cb7508f349bf5d45ec4a Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 11:09:04 +0200 Subject: [PATCH 28/55] Renaming --- flixopt/calculation.py | 22 +++++++++++----------- flixopt/effects.py | 2 +- flixopt/elements.py | 2 +- flixopt/flow_system.py | 22 +++++++++++----------- flixopt/results.py | 6 +++--- flixopt/structure.py | 14 +++++++------- 6 files changed, 34 insertions(+), 34 deletions(-) diff --git a/flixopt/calculation.py b/flixopt/calculation.py index c7367cad2..8f39f32ad 100644 --- a/flixopt/calculation.py +++ b/flixopt/calculation.py @@ -119,7 +119,7 @@ def main_results(self) -> Dict[str, Union[Scalar, Dict]]: def summary(self): return { 'Name': self.name, - 'Number of timesteps': len(self.flow_system.time_series_collection.timesteps), + 'Number of timesteps': len(self.flow_system.time_series_allocator.timesteps), 'Calculation Type': self.__class__.__name__, 'Constraints': self.model.constraints.ncons, 'Variables': self.model.variables.nvars, @@ -183,7 +183,7 @@ def solve(self, solver: _Solver, log_file: Optional[pathlib.Path] = None, log_ma def _activate_time_series(self): self.flow_system.transform_data() - self.flow_system.time_series_collection.activate_timesteps( + self.flow_system.time_series_allocator.activate_timesteps( active_timesteps=self.active_timesteps, ) @@ -245,8 +245,8 @@ def _perform_aggregation(self): # Validation dt_min, dt_max = ( - np.min(self.flow_system.time_series_collection.hours_per_timestep), - np.max(self.flow_system.time_series_collection.hours_per_timestep), + np.min(self.flow_system.time_series_allocator.hours_per_timestep), + np.max(self.flow_system.time_series_allocator.hours_per_timestep), ) if not dt_min == dt_max: raise ValueError( @@ -255,11 +255,11 @@ def _perform_aggregation(self): ) steps_per_period = ( self.aggregation_parameters.hours_per_period - / self.flow_system.time_series_collection.hours_per_timestep.max() + / self.flow_system.time_series_allocator.hours_per_timestep.max() ) is_integer = ( self.aggregation_parameters.hours_per_period - % self.flow_system.time_series_collection.hours_per_timestep.max() + % self.flow_system.time_series_allocator.hours_per_timestep.max() ).item() == 0 if not (steps_per_period.size == 1 and is_integer): raise ValueError( @@ -272,13 +272,13 @@ def _perform_aggregation(self): # Aggregation - creation of aggregated timeseries: self.aggregation = Aggregation( - original_data=self.flow_system.time_series_collection.to_dataframe( + original_data=self.flow_system.time_series_allocator.to_dataframe( include_extra_timestep=False ), # Exclude last row (NaN) hours_per_time_step=float(dt_min), hours_per_period=self.aggregation_parameters.hours_per_period, nr_of_periods=self.aggregation_parameters.nr_of_periods, - weights=self.flow_system.time_series_collection.calculate_aggregation_weights(), + weights=self.flow_system.time_series_allocator.calculate_aggregation_weights(), time_series_for_high_peaks=self.aggregation_parameters.labels_for_high_peaks, time_series_for_low_peaks=self.aggregation_parameters.labels_for_low_peaks, ) @@ -286,7 +286,7 @@ def _perform_aggregation(self): self.aggregation.cluster() self.aggregation.plot(show=True, save=self.folder / 'aggregation.html') if self.aggregation_parameters.aggregate_data_and_fix_non_binary_vars: - self.flow_system.time_series_collection.insert_new_data( + self.flow_system.time_series_allocator.insert_new_data( self.aggregation.aggregated_data, include_extra_timestep=False ) self.durations['aggregation'] = round(timeit.default_timer() - t_start_agg, 2) @@ -327,8 +327,8 @@ def __init__( self.nr_of_previous_values = nr_of_previous_values self.sub_calculations: List[FullCalculation] = [] - self.all_timesteps = self.flow_system.time_series_collection.all_timesteps - self.all_timesteps_extra = self.flow_system.time_series_collection.all_timesteps_extra + self.all_timesteps = self.flow_system.time_series_allocator.all_timesteps + self.all_timesteps_extra = self.flow_system.time_series_allocator.all_timesteps_extra self.segment_names = [ f'Segment_{i + 1}' for i in range(math.ceil(len(self.all_timesteps) / self.timesteps_per_segment)) diff --git a/flixopt/effects.py b/flixopt/effects.py index 82aa63a43..3b2f32311 100644 --- a/flixopt/effects.py +++ b/flixopt/effects.py @@ -13,7 +13,7 @@ import numpy as np import pandas as pd -from .core import NumericData, NumericDataTS, Scalar, TimeSeries, TimeSeriesCollection +from .core import NumericData, NumericDataTS, Scalar, TimeSeries, TimeSeriesAllocator from .features import ShareAllocationModel from .structure import Element, ElementModel, Interface, Model, SystemModel, register_class_for_io diff --git a/flixopt/elements.py b/flixopt/elements.py index 05898d4e5..378a5fab1 100644 --- a/flixopt/elements.py +++ b/flixopt/elements.py @@ -10,7 +10,7 @@ import numpy as np from .config import CONFIG -from .core import NumericData, NumericDataTS, PlausibilityError, Scalar, TimeSeriesCollection +from .core import NumericData, NumericDataTS, PlausibilityError, Scalar, TimeSeriesAllocator from .effects import EffectValuesUser from .features import InvestmentModel, OnOffModel, PreventSimultaneousUsageModel from .interface import InvestParameters, OnOffParameters diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index 79ff44429..a0529dd91 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -16,7 +16,7 @@ from rich.pretty import Pretty from . import io as fx_io -from .core import NumericData, NumericDataTS, TimeSeries, TimeSeriesCollection, TimeSeriesData +from .core import NumericData, NumericDataTS, TimeSeries, TimeSeriesAllocator, TimeSeriesData from .effects import Effect, EffectCollection, EffectTimeSeries, EffectValuesDict, EffectValuesUser from .elements import Bus, Component, Flow from .structure import CLASS_REGISTRY, Element, SystemModel, get_compact_representation, get_str_representation @@ -49,7 +49,7 @@ def __init__( This is needed to calculate previous durations (for example consecutive_on_hours). If you use an array, take care that its long enough to cover all previous values! """ - self.time_series_collection = TimeSeriesCollection( + self.time_series_allocator = TimeSeriesAllocator( timesteps=timesteps, scenarios=scenarios, hours_of_last_timestep=hours_of_last_timestep, @@ -67,7 +67,7 @@ def __init__( @classmethod def from_dataset(cls, ds: xr.Dataset): timesteps_extra = pd.DatetimeIndex(ds.attrs['timesteps_extra'], name='time') - hours_of_last_timestep = TimeSeriesCollection.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() + hours_of_last_timestep = TimeSeriesAllocator.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() flow_system = FlowSystem( timesteps=timesteps_extra[:-1], @@ -92,7 +92,7 @@ def from_dict(cls, data: Dict) -> 'FlowSystem': data: Dictionary containing the FlowSystem data. """ timesteps_extra = pd.DatetimeIndex(data['timesteps_extra'], name='time') - hours_of_last_timestep = TimeSeriesCollection.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() + hours_of_last_timestep = TimeSeriesAllocator.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() flow_system = FlowSystem( timesteps=timesteps_extra[:-1], @@ -171,8 +171,8 @@ def as_dict(self, data_mode: Literal['data', 'name', 'stats'] = 'data') -> Dict: effect.label: effect.to_dict() for effect in sorted(self.effects, key=lambda effect: effect.label.upper()) }, - 'timesteps_extra': [date.isoformat() for date in self.time_series_collection.timesteps_extra], - 'hours_of_previous_timesteps': self.time_series_collection.hours_of_previous_timesteps, + 'timesteps_extra': [date.isoformat() for date in self.time_series_allocator.timesteps_extra], + 'hours_of_previous_timesteps': self.time_series_allocator.hours_of_previous_timesteps, } if data_mode == 'data': return fx_io.replace_timeseries(data, 'data') @@ -187,7 +187,7 @@ def as_dataset(self, constants_in_dataset: bool = False) -> xr.Dataset: Args: constants_in_dataset: If True, constants are included as Dataset variables. """ - ds = self.time_series_collection.to_dataset(include_constants=constants_in_dataset) + ds = self.time_series_allocator.to_dataset(include_constants=constants_in_dataset) ds.attrs = self.as_dict(data_mode='name') return ds @@ -281,7 +281,7 @@ def create_time_series( needs_extra_timestep: bool = False, ) -> Optional[TimeSeries]: """ - Tries to create a TimeSeries from NumericData Data and adds it to the time_series_collection + Tries to create a TimeSeries from NumericData Data and adds it to the time_series_allocator If the data already is a TimeSeries, nothing happens and the TimeSeries gets reset and returned If the data is a TimeSeriesData, it is converted to a TimeSeries, and the aggregation weights are applied. If the data is None, nothing happens. @@ -291,12 +291,12 @@ def create_time_series( return None elif isinstance(data, TimeSeries): data.restore_data() - if data in self.time_series_collection: + if data in self.time_series_allocator: return data - return self.time_series_collection.create_time_series( + return self.time_series_allocator.create_time_series( data=data.active_data, name=name, needs_extra_timestep=needs_extra_timestep ) - return self.time_series_collection.create_time_series( + return self.time_series_allocator.create_time_series( data=data, name=name, needs_extra_timestep=needs_extra_timestep ) diff --git a/flixopt/results.py b/flixopt/results.py index d9eb5a654..90a86d1b2 100644 --- a/flixopt/results.py +++ b/flixopt/results.py @@ -14,7 +14,7 @@ from . import io as fx_io from . import plotting -from .core import TimeSeriesCollection +from .core import TimeSeriesAllocator if TYPE_CHECKING: import pyvis @@ -160,7 +160,7 @@ def __init__( } self.timesteps_extra = self.solution.indexes['time'] - self.hours_per_timestep = TimeSeriesCollection.calculate_hours_per_timestep(self.timesteps_extra) + self.hours_per_timestep = TimeSeriesAllocator.calculate_hours_per_timestep(self.timesteps_extra) def __getitem__(self, key: str) -> Union['ComponentResults', 'BusResults', 'EffectResults']: if key in self.components: @@ -684,7 +684,7 @@ def __init__( self.overlap_timesteps = overlap_timesteps self.name = name self.folder = pathlib.Path(folder) if folder is not None else pathlib.Path.cwd() / 'results' - self.hours_per_timestep = TimeSeriesCollection.calculate_hours_per_timestep(self.all_timesteps) + self.hours_per_timestep = TimeSeriesAllocator.calculate_hours_per_timestep(self.all_timesteps) @property def meta_data(self) -> Dict[str, Union[int, List[str]]]: diff --git a/flixopt/structure.py b/flixopt/structure.py index e7f1c62a4..4e3b26acf 100644 --- a/flixopt/structure.py +++ b/flixopt/structure.py @@ -19,7 +19,7 @@ from rich.pretty import Pretty from .config import CONFIG -from .core import NumericData, Scalar, TimeSeries, TimeSeriesCollection, TimeSeriesData +from .core import NumericData, Scalar, TimeSeries, TimeSeriesAllocator, TimeSeriesData if TYPE_CHECKING: # for type checking and preventing circular imports from .effects import EffectCollectionModel @@ -56,7 +56,7 @@ def __init__(self, flow_system: 'FlowSystem'): """ super().__init__(force_dim_names=True) self.flow_system = flow_system - self.time_series_collection = flow_system.time_series_collection + self.time_series_allocator = flow_system.time_series_allocator self.effects: Optional[EffectCollectionModel] = None def do_modeling(self): @@ -88,23 +88,23 @@ def solution(self): for effect in sorted(self.flow_system.effects, key=lambda effect: effect.label_full.upper()) }, } - return solution.reindex(time=self.time_series_collection.timesteps_extra) + return solution.reindex(time=self.time_series_allocator.timesteps_extra) @property def hours_per_step(self): - return self.time_series_collection.hours_per_timestep + return self.time_series_allocator.hours_per_timestep @property def hours_of_previous_timesteps(self): - return self.time_series_collection.hours_of_previous_timesteps + return self.time_series_allocator.hours_of_previous_timesteps @property def coords(self) -> Tuple[pd.DatetimeIndex]: - return (self.time_series_collection.timesteps,) + return (self.time_series_allocator.timesteps,) @property def coords_extra(self) -> Tuple[pd.DatetimeIndex]: - return (self.time_series_collection.timesteps_extra,) + return (self.time_series_allocator.timesteps_extra,) class Interface: From e69631b31d31b5d32f53e262b260f9f4fe04ac3a Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 11:18:07 +0200 Subject: [PATCH 29/55] Update TimeSeriesAllocator --- flixopt/core.py | 149 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 112 insertions(+), 37 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 3492568aa..578a28e81 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1058,7 +1058,6 @@ def __init__( if scenarios is None: self._dataset = xr.Dataset(coords={'time': self.timesteps}) self._dataset_extra = xr.Dataset(coords={'time': self.timesteps_extra}) - else: self._dataset = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps}) self._dataset_extra = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra}) @@ -1066,36 +1065,80 @@ def __init__( # Series that need extra timestep self._has_extra_timestep: Dict[str, bool] = {} + # Storage for TimeSeries objects + self._time_series: Dict[str, TimeSeries] = {} + # Active subset selectors - self._selection: Dict[str, Any] = {} + self._selection: Dict[str, Any] = {'time': slice(None, None), 'scenario': slice(None, None)} - def add_data_array( + def add_time_series( self, name: str, - data: NumericData, + data: Union[NumericData, TimeSeries], + aggregation_weight: Optional[float] = None, + aggregation_group: Optional[str] = None, needs_extra_timestep: bool = False, - ) -> xr.DataArray: + ) -> TimeSeries: """ - Add a new data array to the allocator. + Add a new TimeSeries to the allocator. + + Args: + name: Name of the time series + data: Data for the time series (can be raw data or an existing TimeSeries) + aggregation_weight: Weight used for aggregation + aggregation_group: Group name for shared aggregation weighting + needs_extra_timestep: Whether this series needs an extra timestep + + Returns: + The created TimeSeries object """ - if name in self._dataset or name in self._dataset_extra: - raise KeyError(f"Data array '{name}' already exists in allocator") + if name in self._time_series: + raise KeyError(f"TimeSeries '{name}' already exists in allocator") - # Choose which dataset to use - target_dataset = self._dataset_extra if needs_extra_timestep else self._dataset + # Choose which timesteps to use target_timesteps = self.timesteps_extra if needs_extra_timestep else self.timesteps - # Convert to DataArray - data_array = DataConverter.as_dataarray(data, target_timesteps, self.scenarios) + # Create or adapt the TimeSeries object + if isinstance(data, TimeSeries): + # Use the existing TimeSeries but update its parameters + time_series = data + # Update the stored data to use our timesteps and scenarios + data_array = DataConverter.as_dataarray( + time_series.stored_data, timesteps=target_timesteps, scenarios=self.scenarios + ) + time_series = TimeSeries( + data=data_array, + name=name, + aggregation_weight=aggregation_weight or time_series.aggregation_weight, + aggregation_group=aggregation_group or time_series.aggregation_group, + needs_extra_timestep=needs_extra_timestep or time_series.needs_extra_timestep, + ) + else: + # Create a new TimeSeries from raw data + time_series = TimeSeries.from_datasource( + data=data, + name=name, + timesteps=target_timesteps, + scenarios=self.scenarios, + aggregation_weight=aggregation_weight, + aggregation_group=aggregation_group, + needs_extra_timestep=needs_extra_timestep, + ) + + # Add to storage + self._time_series[name] = time_series - # Add to the appropriate dataset - target_dataset[name] = data_array + # Also add to internal dataset for selection management + if needs_extra_timestep: + self._dataset_extra[name] = time_series.stored_data + else: + self._dataset[name] = time_series.stored_data # Track if it needs extra timestep self._has_extra_timestep[name] = needs_extra_timestep - # Return reference - return self[name] + # Return the TimeSeries object + return time_series def clear_selection(self, timesteps: bool = True, scenarios: bool = True): """ @@ -1110,6 +1153,9 @@ def clear_selection(self, timesteps: bool = True, scenarios: bool = True): if scenarios: self._selection['scenario'] = slice(None, None) + # Apply the selection to all TimeSeries objects + self._propagate_selection_to_time_series() + def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None): """ Set active subset for timesteps and scenarios. @@ -1128,33 +1174,62 @@ def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: else: self._selection['scenario'] = scenarios - def __getitem__(self, name: str) -> xr.DataArray: + # Apply the selection to all TimeSeries objects + self._propagate_selection_to_time_series() + + def _propagate_selection_to_time_series(self): + """Apply the current selection to all TimeSeries objects.""" + timesteps = self._selection['time'] + scenarios = self._selection['scenario'] + for ts in self._time_series.values(): + ts.set_selection(timesteps=timesteps, scenarios=scenarios) + + def __getitem__(self, name: str) -> TimeSeries: """ - Get the selected data of a data array. + Get a reference to a time series or data array. Args: - name: Name of the data array + name: Name of the data array or time series Returns: - DataArray reference with active subset applied + TimeSeries object if it exists, otherwise DataArray with current selection applied """ - if name in self._dataset: - dataset = self._dataset - elif name in self._dataset_extra: - dataset = self._dataset_extra - else: - raise KeyError(f"Data array '{name}' not found in allocator") - - # Apply the active subset if any - if self._selection: - # Filter selector to only include dimensions in this dataset - valid_selector = {dim: sel for dim, sel in self._selection.items() if dim in dataset.dims} - if valid_selector: - # Get the subset of the dataset then extract the variable - return dataset.sel(**valid_selector)[name] - - # Return the variable directly - return dataset[name] + # First check if this is a TimeSeries + if name in self._time_series: + # Return the TimeSeries object (it will handle selection internally) + return self._time_series[name] + raise ValueError(f'No TimeSeries named "{name}" found') + + def update_time_series(self, name: str, data: NumericData) -> TimeSeries: + """ + Update an existing TimeSeries with new data. + + Args: + name: Name of the TimeSeries to update + data: New data to assign + + Returns: + The updated TimeSeries + + Raises: + KeyError: If no TimeSeries with the given name exists + """ + if name not in self._time_series: + raise KeyError(f"No TimeSeries named '{name}' found") + + # Get the TimeSeries + ts = self._time_series[name] + + # Choose appropriate timesteps + target_timesteps = self.timesteps_extra if self._has_extra_timestep[name] else self.timesteps + + # Convert data to proper format + data_array = DataConverter.as_dataarray(data, target_timesteps, self.scenarios) + + # Update the TimeSeries + ts.update_stored_data(data_array) + + return ts @staticmethod def _validate_timesteps(timesteps: pd.DatetimeIndex): From 6988e2aef98a70982ab3c8b7e90963faeb05672a Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 11:21:21 +0200 Subject: [PATCH 30/55] Update TimeSeriesAllocator --- flixopt/core.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 578a28e81..675e385b6 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1054,14 +1054,6 @@ def __init__( self.scenarios = scenarios - # Storage for all data arrays - if scenarios is None: - self._dataset = xr.Dataset(coords={'time': self.timesteps}) - self._dataset_extra = xr.Dataset(coords={'time': self.timesteps_extra}) - else: - self._dataset = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps}) - self._dataset_extra = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra}) - # Series that need extra timestep self._has_extra_timestep: Dict[str, bool] = {} @@ -1128,12 +1120,6 @@ def add_time_series( # Add to storage self._time_series[name] = time_series - # Also add to internal dataset for selection management - if needs_extra_timestep: - self._dataset_extra[name] = time_series.stored_data - else: - self._dataset[name] = time_series.stored_data - # Track if it needs extra timestep self._has_extra_timestep[name] = needs_extra_timestep @@ -1177,6 +1163,20 @@ def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: # Apply the selection to all TimeSeries objects self._propagate_selection_to_time_series() + def as_dataset(self) -> xr.Dataset: + """ + Convert the TimeSeriesAllocator to a xarray Dataset, containing the data of each TimeSeries. + """ + if self.scenarios is None: + ds = xr.Dataset(coords={'time': self.timesteps_extra}) + else: + ds = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra}) + + for ts in self._time_series.values(): + ds[ts.name] = ts.active_data + + return ds + def _propagate_selection_to_time_series(self): """Apply the current selection to all TimeSeries objects.""" timesteps = self._selection['time'] From acf869c8ff89c91d9f786e37976cb1f2475d8335 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 11:39:32 +0200 Subject: [PATCH 31/55] Update TimeSeriesAllocator --- flixopt/core.py | 79 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 74 insertions(+), 5 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 675e385b6..c99b71ace 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1045,14 +1045,15 @@ def __init__( ): """Initialize a TimeSeriesAllocator.""" self._validate_timesteps(timesteps) - self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( + self._original_hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( timesteps, hours_of_previous_timesteps ) - self.timesteps = timesteps - self.timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep) - self.hours_per_timestep = self.calculate_hours_per_timestep(self.timesteps_extra) - self.scenarios = scenarios + self._full_timesteps = timesteps + self._full_timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep) + self._full_hours_per_timestep = self.calculate_hours_per_timestep(self._full_timesteps_extra) + + self._full_scenarios = scenarios # Series that need extra timestep self._has_extra_timestep: Dict[str, bool] = {} @@ -1177,6 +1178,70 @@ def as_dataset(self) -> xr.Dataset: return ds + @property + def timesteps(self): + """Get the current active timesteps.""" + time_sel = self._selection['time'] + if isinstance(time_sel, slice) and time_sel == slice(None, None): + return self._full_timesteps + return self._full_timesteps[self._selection['time']] + + @property + def timesteps_extra(self): + """Get the current active timesteps with extra timestep.""" + # Handle the extra timestep appropriately when selection is applied + if isinstance(self._selection['time'], slice) and self._selection['time'] == slice(None, None): + return self._full_timesteps_extra + + # If there's a timestep selection, we need to include the extra timestep properly + selected_timesteps = self._full_timesteps[self._selection['time']] + if selected_timesteps[-1] == self._full_timesteps[-1]: + # Include the extra timestep if selection includes the last regular timestep + return pd.DatetimeIndex(list(selected_timesteps) + [self._full_timesteps_extra[-1]], name='time') + return selected_timesteps + + @property + def hours_per_timestep(self): + """Get the current active hours per timestep.""" + time_sel = self._selection['time'] + if isinstance(time_sel, slice) and time_sel == slice(None, None): + return self._full_hours_per_timestep + + # Select the corresponding hours per timestep + indices = np.where(np.isin(self._full_timesteps, self.timesteps))[0] + return self._full_hours_per_timestep.isel(time=indices) + + @property + def hours_of_previous_timesteps(self): + """ + Get the duration of previous timesteps. + + When no selection is active, returns the original hours of previous timesteps. + When a selection is active, returns the hours per timestep for the time period + right before the first timestep in the selection. + """ + time_sel = self._selection['time'] + + # If no selection or default selection, return the original value + if isinstance(time_sel, slice) and time_sel == slice(None, None): + return self._original_hours_of_previous_timesteps + + # Find the index of the first selected timestep + first_selected_idx = np.where(self._full_timesteps == self._full_timesteps[time_sel][0])[0][0] + + # Return the hours per timestep for the timestep right before the first selected one + return self._full_hours_per_timestep.sel(time=self._full_timesteps[first_selected_idx - 1]).item() + + @property + def scenarios(self): + """Get the current active scenarios.""" + if self._full_scenarios is None: + return None + scenario_sel = self._selection['scenario'] + if isinstance(scenario_sel, slice) and scenario_sel == slice(None, None): + return self._full_scenarios + return self._full_scenarios[self._selection['scenario']] + def _propagate_selection_to_time_series(self): """Apply the current selection to all TimeSeries objects.""" timesteps = self._selection['time'] @@ -1231,6 +1296,10 @@ def update_time_series(self, name: str, data: NumericData) -> TimeSeries: return ts + def _update_internal_dataset(self): + """Update the internal dataset with the index data""" + + @staticmethod def _validate_timesteps(timesteps: pd.DatetimeIndex): """Validate timesteps format and rename if needed.""" From c3f5b00fa08857ae66eb7a9a60eede60f8eb6986 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 12:23:02 +0200 Subject: [PATCH 32/55] Update TimeSeriesAllocator --- flixopt/core.py | 31 +++++-------------------------- 1 file changed, 5 insertions(+), 26 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index c99b71ace..b8b7681e5 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1045,9 +1045,9 @@ def __init__( ): """Initialize a TimeSeriesAllocator.""" self._validate_timesteps(timesteps) - self._original_hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( + self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( timesteps, hours_of_previous_timesteps - ) + ) #TODO: Make dynamic self._full_timesteps = timesteps self._full_timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep) @@ -1179,7 +1179,7 @@ def as_dataset(self) -> xr.Dataset: return ds @property - def timesteps(self): + def timesteps(self) -> pd.DatetimeIndex: """Get the current active timesteps.""" time_sel = self._selection['time'] if isinstance(time_sel, slice) and time_sel == slice(None, None): @@ -1187,7 +1187,7 @@ def timesteps(self): return self._full_timesteps[self._selection['time']] @property - def timesteps_extra(self): + def timesteps_extra(self) -> pd.DatetimeIndex: """Get the current active timesteps with extra timestep.""" # Handle the extra timestep appropriately when selection is applied if isinstance(self._selection['time'], slice) and self._selection['time'] == slice(None, None): @@ -1201,7 +1201,7 @@ def timesteps_extra(self): return selected_timesteps @property - def hours_per_timestep(self): + def hours_per_timestep(self) -> xr.DataArray: """Get the current active hours per timestep.""" time_sel = self._selection['time'] if isinstance(time_sel, slice) and time_sel == slice(None, None): @@ -1211,27 +1211,6 @@ def hours_per_timestep(self): indices = np.where(np.isin(self._full_timesteps, self.timesteps))[0] return self._full_hours_per_timestep.isel(time=indices) - @property - def hours_of_previous_timesteps(self): - """ - Get the duration of previous timesteps. - - When no selection is active, returns the original hours of previous timesteps. - When a selection is active, returns the hours per timestep for the time period - right before the first timestep in the selection. - """ - time_sel = self._selection['time'] - - # If no selection or default selection, return the original value - if isinstance(time_sel, slice) and time_sel == slice(None, None): - return self._original_hours_of_previous_timesteps - - # Find the index of the first selected timestep - first_selected_idx = np.where(self._full_timesteps == self._full_timesteps[time_sel][0])[0][0] - - # Return the hours per timestep for the timestep right before the first selected one - return self._full_hours_per_timestep.sel(time=self._full_timesteps[first_selected_idx - 1]).item() - @property def scenarios(self): """Get the current active scenarios.""" From 84715c325a0150b6138723757c7322c2fd4218af Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 12:30:47 +0200 Subject: [PATCH 33/55] Update selection --- flixopt/core.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index b8b7681e5..48be532af 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1062,7 +1062,7 @@ def __init__( self._time_series: Dict[str, TimeSeries] = {} # Active subset selectors - self._selection: Dict[str, Any] = {'time': slice(None, None), 'scenario': slice(None, None)} + self._selection: Dict[str, Any] = {'time': None, 'scenario': None} def add_time_series( self, @@ -1136,9 +1136,9 @@ def clear_selection(self, timesteps: bool = True, scenarios: bool = True): scenarios: Whether to clear scenarios selection """ if timesteps: - self._selection['time'] = slice(None, None) + self._selection['time'] = None if scenarios: - self._selection['scenario'] = slice(None, None) + self._selection['scenario'] = None # Apply the selection to all TimeSeries objects self._propagate_selection_to_time_series() @@ -1182,9 +1182,9 @@ def as_dataset(self) -> xr.Dataset: def timesteps(self) -> pd.DatetimeIndex: """Get the current active timesteps.""" time_sel = self._selection['time'] - if isinstance(time_sel, slice) and time_sel == slice(None, None): + if time_sel is None: return self._full_timesteps - return self._full_timesteps[self._selection['time']] + return self._full_timesteps[time_sel] @property def timesteps_extra(self) -> pd.DatetimeIndex: @@ -1223,10 +1223,8 @@ def scenarios(self): def _propagate_selection_to_time_series(self): """Apply the current selection to all TimeSeries objects.""" - timesteps = self._selection['time'] - scenarios = self._selection['scenario'] for ts in self._time_series.values(): - ts.set_selection(timesteps=timesteps, scenarios=scenarios) + ts.set_selection(timesteps=self._selection['time'], scenarios=self._selection['scenario']) def __getitem__(self, name: str) -> TimeSeries: """ From f9d38408abe6aacb3ccb9649ec7411376975f051 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 12:47:20 +0200 Subject: [PATCH 34/55] Improve selection --- flixopt/core.py | 56 +++++++++++++++++++------------------------------ 1 file changed, 22 insertions(+), 34 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 48be532af..03321039f 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1062,7 +1062,10 @@ def __init__( self._time_series: Dict[str, TimeSeries] = {} # Active subset selectors - self._selection: Dict[str, Any] = {'time': None, 'scenario': None} + self._selected_timesteps: Optional[pd.DatetimeIndex] = None + self._selected_scenarios: Optional[pd.Index] = None + self._selected_timesteps_extra: Optional[pd.DatetimeIndex] = None + self._selected_hours_per_timestep: Optional[xr.DataArray] = None def add_time_series( self, @@ -1136,9 +1139,9 @@ def clear_selection(self, timesteps: bool = True, scenarios: bool = True): scenarios: Whether to clear scenarios selection """ if timesteps: - self._selection['time'] = None + self._selected_timesteps = None if scenarios: - self._selection['scenario'] = None + self._selected_scenarios = None # Apply the selection to all TimeSeries objects self._propagate_selection_to_time_series() @@ -1154,12 +1157,16 @@ def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: if timesteps is None: self.clear_selection(timesteps=True, scenarios=False) else: - self._selection['time'] = timesteps + self._selected_timesteps = timesteps + self._selected_hours_per_timestep = self._full_hours_per_timestep.isel(time=timesteps) + self._selected_timesteps_extra = self._create_timesteps_with_extra( + timesteps, self._selected_hours_per_timestep.isel(time=-1).max().item() + ) if scenarios is None: self.clear_selection(timesteps=False, scenarios=True) else: - self._selection['scenario'] = scenarios + self._selected_scenarios = scenarios # Apply the selection to all TimeSeries objects self._propagate_selection_to_time_series() @@ -1181,50 +1188,35 @@ def as_dataset(self) -> xr.Dataset: @property def timesteps(self) -> pd.DatetimeIndex: """Get the current active timesteps.""" - time_sel = self._selection['time'] - if time_sel is None: + if self._selected_timesteps is None: return self._full_timesteps - return self._full_timesteps[time_sel] + return self._selected_timesteps @property def timesteps_extra(self) -> pd.DatetimeIndex: """Get the current active timesteps with extra timestep.""" - # Handle the extra timestep appropriately when selection is applied - if isinstance(self._selection['time'], slice) and self._selection['time'] == slice(None, None): + if self._selected_timesteps is None: return self._full_timesteps_extra - - # If there's a timestep selection, we need to include the extra timestep properly - selected_timesteps = self._full_timesteps[self._selection['time']] - if selected_timesteps[-1] == self._full_timesteps[-1]: - # Include the extra timestep if selection includes the last regular timestep - return pd.DatetimeIndex(list(selected_timesteps) + [self._full_timesteps_extra[-1]], name='time') - return selected_timesteps + return self._selected_timesteps @property def hours_per_timestep(self) -> xr.DataArray: """Get the current active hours per timestep.""" - time_sel = self._selection['time'] - if isinstance(time_sel, slice) and time_sel == slice(None, None): + if self._selected_hours_per_timestep is None: return self._full_hours_per_timestep - - # Select the corresponding hours per timestep - indices = np.where(np.isin(self._full_timesteps, self.timesteps))[0] - return self._full_hours_per_timestep.isel(time=indices) + return self._selected_hours_per_timestep @property - def scenarios(self): + def scenarios(self) -> Optional[pd.Index]: """Get the current active scenarios.""" - if self._full_scenarios is None: - return None - scenario_sel = self._selection['scenario'] - if isinstance(scenario_sel, slice) and scenario_sel == slice(None, None): + if self._selected_scenarios is None: return self._full_scenarios - return self._full_scenarios[self._selection['scenario']] + return self._selected_scenarios def _propagate_selection_to_time_series(self): """Apply the current selection to all TimeSeries objects.""" for ts in self._time_series.values(): - ts.set_selection(timesteps=self._selection['time'], scenarios=self._selection['scenario']) + ts.set_selection(timesteps=self._selected_timesteps, scenarios=self._selected_scenarios) def __getitem__(self, name: str) -> TimeSeries: """ @@ -1273,10 +1265,6 @@ def update_time_series(self, name: str, data: NumericData) -> TimeSeries: return ts - def _update_internal_dataset(self): - """Update the internal dataset with the index data""" - - @staticmethod def _validate_timesteps(timesteps: pd.DatetimeIndex): """Validate timesteps format and rename if needed.""" From 8c9a859934245e6cd6b09b4a969f5702bca1a4c5 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 12:52:04 +0200 Subject: [PATCH 35/55] Improve validation of Timesteps --- flixopt/core.py | 33 ++++++++++++++++++++++++++++++--- 1 file changed, 30 insertions(+), 3 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 03321039f..0b2391890 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1157,8 +1157,10 @@ def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: if timesteps is None: self.clear_selection(timesteps=True, scenarios=False) else: + self._validate_timesteps(timesteps, self._full_timesteps) + self._selected_timesteps = timesteps - self._selected_hours_per_timestep = self._full_hours_per_timestep.isel(time=timesteps) + self._selected_hours_per_timestep = self._full_hours_per_timestep.sel(time=timesteps) self._selected_timesteps_extra = self._create_timesteps_with_extra( timesteps, self._selected_hours_per_timestep.isel(time=-1).max().item() ) @@ -1266,8 +1268,21 @@ def update_time_series(self, name: str, data: NumericData) -> TimeSeries: return ts @staticmethod - def _validate_timesteps(timesteps: pd.DatetimeIndex): - """Validate timesteps format and rename if needed.""" + def _validate_timesteps(timesteps: pd.DatetimeIndex, present_timesteps: Optional[pd.DatetimeIndex] = None): + """ + Validate timesteps format and rename if needed. + Args: + timesteps: The timesteps to validate + present_timesteps: The timesteps that are present in the dataset + + Raises: + ValueError: If timesteps is not a pandas DatetimeIndex + ValueError: If timesteps is not at least 2 timestamps + ValueError: If timesteps has a different name than 'time' + ValueError: If timesteps is not sorted + ValueError: If timesteps contains duplicates + ValueError: If timesteps is not a subset of present_timesteps + """ if not isinstance(timesteps, pd.DatetimeIndex): raise TypeError('timesteps must be a pandas DatetimeIndex') @@ -1279,6 +1294,18 @@ def _validate_timesteps(timesteps: pd.DatetimeIndex): logger.warning('Renamed timesteps to "time" (was "%s")', timesteps.name) timesteps.name = 'time' + # Ensure timesteps is sorted + if not timesteps.is_monotonic_increasing: + raise ValueError('timesteps must be sorted') + + # Ensure timesteps has no duplicates + if len(timesteps) != len(timesteps.drop_duplicates()): + raise ValueError('timesteps must not contain duplicates') + + # Ensure timesteps is a subset of present_timesteps + if present_timesteps is not None and not set(timesteps).issubset(set(present_timesteps)): + raise ValueError('timesteps must be a subset of present_timesteps') + @staticmethod def _create_timesteps_with_extra( timesteps: pd.DatetimeIndex, hours_of_last_timestep: Optional[float] From 580e99a1f68434e0d16a5bd6e1288a3d1d1e07d8 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 14:13:39 +0200 Subject: [PATCH 36/55] Improve TimeSeries --- flixopt/core.py | 74 +++++++++++++------------------------------------ 1 file changed, 19 insertions(+), 55 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 0b2391890..cc53c0f29 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -762,8 +762,9 @@ def __init__( self._stored_data = data.copy(deep=True) self._backup = self._stored_data.copy(deep=True) - # Selection state - use dictionaries for consistency with TimeSeriesAllocator - self._selection = {} + # Selection state + self._selected_timesteps: Optional[pd.DatetimeIndex] = None + self._selected_scenarios: Optional[pd.Index] = None # Flag for whether this series has scenarios self._has_scenarios = 'scenario' in data.dims @@ -826,40 +827,13 @@ def all_equal(self) -> bool: """Check if all values in the series are equal.""" return np.unique(self.active_data.values).size == 1 - @property - def active_timesteps(self) -> pd.DatetimeIndex: - """Get the current active timesteps.""" - # If no selection is active, return all timesteps - if 'time' not in self._selection: - return self._stored_data.indexes['time'] - return self._selection['time'] - - @property - def active_scenarios(self) -> Optional[pd.Index]: - """Get the current active scenarios.""" - if not self._has_scenarios: - return None - - # If no selection is active, return all scenarios - if 'scenario' not in self._selection: - return self._stored_data.indexes.get('scenario', None) - return self._selection['scenario'] - @property def active_data(self) -> xr.DataArray: """ Get a view of stored_data based on current selections. This computes the view dynamically based on the current selection state. """ - # Start with stored data - result = self._stored_data - - # Apply selections if they exist - valid_selector = {dim: sel for dim, sel in self._selection.items() if dim in result.dims} - if valid_selector: - result = result.sel(**valid_selector) - - return result + return self._stored_data.sel(**self._valid_selector) @property def stored_data(self) -> xr.DataArray: @@ -886,38 +860,22 @@ def update_stored_data(self, value: NumericData): self._stored_data = new_data self.clear_selection() # Reset selections to full dataset - def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None): - """ - Set active subset for timesteps and scenarios. + def clear_selection(self, timesteps: bool = True, scenarios: bool = True): + if timesteps: + self._selected_timesteps = None + if scenarios: + self._selected_scenarios = None - Args: - timesteps: Timesteps to activate, or None to clear - scenarios: Scenarios to activate, or None to clear - """ + def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None): if timesteps is None: self.clear_selection(timesteps=True, scenarios=False) else: - self._selection['time'] = timesteps + self._selected_timesteps = timesteps if scenarios is None: self.clear_selection(timesteps=False, scenarios=True) else: - self._selection['scenario'] = scenarios - - def clear_selection(self, timesteps: bool = True, scenarios: bool = True): - """ - Clear selection for timesteps and/or scenarios. - - Args: - timesteps: Whether to clear timesteps selection - scenarios: Whether to clear scenarios selection - - This method follows the same API as TimeSeriesAllocator for consistency. - """ - if timesteps: - self._selection['time'] = slice(None, None) - if scenarios: - self._selection['scenario'] = slice(None, None) + self._selected_scenarios = scenarios @property def sel(self): @@ -929,6 +887,13 @@ def isel(self): """Direct access to the active_data's isel method for convenience.""" return self.active_data.isel + @property + def _valid_selector(self) -> Dict[str, pd.Index]: + """Get the current selection as a dictionary.""" + full_selection = {'time': self._selected_timesteps, 'scenario': self._selected_scenarios} + return {dim: sel for dim, sel in full_selection.items() if dim in self._stored_data.dims and sel is not None} + + def _apply_operation(self, other, op): """Apply an operation between this TimeSeries and another object.""" if isinstance(other, TimeSeries): @@ -1035,7 +1000,6 @@ class TimeSeriesAllocator: Provides a way to store time series data and work with subsets of dimensions that automatically update all references when changed. """ - def __init__( self, timesteps: pd.DatetimeIndex, From 9a93e923e299fe71450c8041214cb3d2b11d6cf7 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 15:03:26 +0200 Subject: [PATCH 37/55] Improve TimeSeriesAllocator --- flixopt/core.py | 69 ++++++++++++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 26 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index cc53c0f29..773f76186 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -970,16 +970,8 @@ def __repr__(self): 'aggregation_group': self.aggregation_group, 'needs_extra_timestep': self.needs_extra_timestep, 'shape': self.active_data.shape, - 'time_range': f'{self.active_timesteps[0]} to {self.active_timesteps[-1]}', } - # Add scenario information if present - if self._has_scenarios: - scenarios = self.active_scenarios - attrs['scenarios'] = f'{len(scenarios)} scenarios' if scenarios is not None else 'All scenarios' - else: - attrs['scenarios'] = 'No scenarios' - attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items()) return f'TimeSeries({attr_str})' @@ -1020,7 +1012,7 @@ def __init__( self._full_scenarios = scenarios # Series that need extra timestep - self._has_extra_timestep: Dict[str, bool] = {} + self._has_extra_timestep: set = set() # Storage for TimeSeries objects self._time_series: Dict[str, TimeSeries] = {} @@ -1089,7 +1081,8 @@ def add_time_series( self._time_series[name] = time_series # Track if it needs extra timestep - self._has_extra_timestep[name] = needs_extra_timestep + if needs_extra_timestep: + self._has_extra_timestep.add(name) # Return the TimeSeries object return time_series @@ -1103,7 +1096,7 @@ def clear_selection(self, timesteps: bool = True, scenarios: bool = True): scenarios: Whether to clear scenarios selection """ if timesteps: - self._selected_timesteps = None + self._update_selected_timesteps(timesteps=None) if scenarios: self._selected_scenarios = None @@ -1121,13 +1114,7 @@ def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: if timesteps is None: self.clear_selection(timesteps=True, scenarios=False) else: - self._validate_timesteps(timesteps, self._full_timesteps) - - self._selected_timesteps = timesteps - self._selected_hours_per_timestep = self._full_hours_per_timestep.sel(time=timesteps) - self._selected_timesteps_extra = self._create_timesteps_with_extra( - timesteps, self._selected_hours_per_timestep.isel(time=-1).max().item() - ) + self._update_selected_timesteps(timesteps) if scenarios is None: self.clear_selection(timesteps=False, scenarios=True) @@ -1137,6 +1124,24 @@ def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: # Apply the selection to all TimeSeries objects self._propagate_selection_to_time_series() + def _update_selected_timesteps(self, timesteps: Optional[pd.DatetimeIndex]): + """ + Updates the timestep and related metrics (timesteps_extra, hours_per_timestep) based on the current selection. + """ + if timesteps is None: + self._selected_timesteps = None + self._selected_timesteps_extra = None + self._selected_hours_per_timestep = None + return + + self._validate_timesteps(timesteps, self._full_timesteps) + + self._selected_timesteps = timesteps + self._selected_hours_per_timestep = self._full_hours_per_timestep.sel(time=timesteps) + self._selected_timesteps_extra = self._create_timesteps_with_extra( + timesteps, self._selected_hours_per_timestep.isel(time=-1).max().item() + ) + def as_dataset(self) -> xr.Dataset: """ Convert the TimeSeriesAllocator to a xarray Dataset, containing the data of each TimeSeries. @@ -1161,9 +1166,9 @@ def timesteps(self) -> pd.DatetimeIndex: @property def timesteps_extra(self) -> pd.DatetimeIndex: """Get the current active timesteps with extra timestep.""" - if self._selected_timesteps is None: + if self._selected_timesteps_extra is None: return self._full_timesteps_extra - return self._selected_timesteps + return self._selected_timesteps_extra @property def hours_per_timestep(self) -> xr.DataArray: @@ -1181,8 +1186,12 @@ def scenarios(self) -> Optional[pd.Index]: def _propagate_selection_to_time_series(self): """Apply the current selection to all TimeSeries objects.""" - for ts in self._time_series.values(): - ts.set_selection(timesteps=self._selected_timesteps, scenarios=self._selected_scenarios) + for ts_name, ts in self._time_series.items(): + timesteps = self._selected_timesteps_extra if ts_name in self._has_extra_timestep else self._selected_timesteps + ts.set_selection( + timesteps=timesteps, + scenarios=self._selected_scenarios + ) def __getitem__(self, name: str) -> TimeSeries: """ @@ -1200,6 +1209,13 @@ def __getitem__(self, name: str) -> TimeSeries: return self._time_series[name] raise ValueError(f'No TimeSeries named "{name}" found') + def __contains__(self, value): + if isinstance(value, str): + return value in self._time_series + elif isinstance(value, TimeSeries): + return value.name in self._time_series + raise TypeError(f'Invalid type for __contains__ of {self.__class__.__name__}: {type(value)}') + def update_time_series(self, name: str, data: NumericData) -> TimeSeries: """ Update an existing TimeSeries with new data. @@ -1220,11 +1236,12 @@ def update_time_series(self, name: str, data: NumericData) -> TimeSeries: # Get the TimeSeries ts = self._time_series[name] - # Choose appropriate timesteps - target_timesteps = self.timesteps_extra if self._has_extra_timestep[name] else self.timesteps - # Convert data to proper format - data_array = DataConverter.as_dataarray(data, target_timesteps, self.scenarios) + data_array = DataConverter.as_dataarray( + data, + self.timesteps_extra if name in self._has_extra_timestep else self.timesteps, + self.scenarios + ) # Update the TimeSeries ts.update_stored_data(data_array) From 14b4f58d1506f2c2c744e932e409f2f5065df871 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 15:03:51 +0200 Subject: [PATCH 38/55] Update calculation and FlowSystem --- flixopt/calculation.py | 4 ++-- flixopt/flow_system.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/flixopt/calculation.py b/flixopt/calculation.py index 8f39f32ad..fe8a503bf 100644 --- a/flixopt/calculation.py +++ b/flixopt/calculation.py @@ -183,8 +183,8 @@ def solve(self, solver: _Solver, log_file: Optional[pathlib.Path] = None, log_ma def _activate_time_series(self): self.flow_system.transform_data() - self.flow_system.time_series_allocator.activate_timesteps( - active_timesteps=self.active_timesteps, + self.flow_system.time_series_allocator.set_selection( + timesteps=self.active_timesteps ) diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index a0529dd91..dee7023e8 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -187,7 +187,7 @@ def as_dataset(self, constants_in_dataset: bool = False) -> xr.Dataset: Args: constants_in_dataset: If True, constants are included as Dataset variables. """ - ds = self.time_series_allocator.to_dataset(include_constants=constants_in_dataset) + ds = self.time_series_allocator.as_dataset() ds.attrs = self.as_dict(data_mode='name') return ds @@ -293,10 +293,10 @@ def create_time_series( data.restore_data() if data in self.time_series_allocator: return data - return self.time_series_allocator.create_time_series( + return self.time_series_allocator.add_time_series( data=data.active_data, name=name, needs_extra_timestep=needs_extra_timestep ) - return self.time_series_allocator.create_time_series( + return self.time_series_allocator.add_time_series( data=data, name=name, needs_extra_timestep=needs_extra_timestep ) From 0d3fc2eb0d3b003c07a4a9bd7ab29a5823cc68e1 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 15:09:38 +0200 Subject: [PATCH 39/55] rename active_data to selected_data --- flixopt/components.py | 20 +++++------ flixopt/core.py | 47 +++++++++++++------------- flixopt/effects.py | 6 ++-- flixopt/elements.py | 8 ++--- flixopt/features.py | 4 +-- flixopt/flow_system.py | 2 +- flixopt/io.py | 2 +- flixopt/structure.py | 2 +- tests/test_timeseries.py | 72 ++++++++++++++++++++-------------------- 9 files changed, 81 insertions(+), 82 deletions(-) diff --git a/flixopt/components.py b/flixopt/components.py index d5d1df12d..5835afd31 100644 --- a/flixopt/components.py +++ b/flixopt/components.py @@ -342,7 +342,7 @@ def __init__(self, model: SystemModel, element: Transmission): def do_modeling(self): """Initiates all FlowModels""" # Force On Variable if absolute losses are present - if (self.element.absolute_losses is not None) and np.any(self.element.absolute_losses.active_data != 0): + if (self.element.absolute_losses is not None) and np.any(self.element.absolute_losses.selected_data != 0): for flow in self.element.inputs + self.element.outputs: if flow.on_off_parameters is None: flow.on_off_parameters = OnOffParameters() @@ -379,14 +379,14 @@ def create_transmission_equation(self, name: str, in_flow: Flow, out_flow: Flow) # eq: out(t) + on(t)*loss_abs(t) = in(t)*(1 - loss_rel(t)) con_transmission = self.add( self._model.add_constraints( - out_flow.model.flow_rate == -in_flow.model.flow_rate * (self.element.relative_losses.active_data - 1), + out_flow.model.flow_rate == -in_flow.model.flow_rate * (self.element.relative_losses.selected_data - 1), name=f'{self.label_full}|{name}', ), name, ) if self.element.absolute_losses is not None: - con_transmission.lhs += in_flow.model.on_off.on * self.element.absolute_losses.active_data + con_transmission.lhs += in_flow.model.on_off.on * self.element.absolute_losses.selected_data return con_transmission @@ -413,8 +413,8 @@ def do_modeling(self): self.add( self._model.add_constraints( - sum([flow.model.flow_rate * conv_factors[flow.label].active_data for flow in used_inputs]) - == sum([flow.model.flow_rate * conv_factors[flow.label].active_data for flow in used_outputs]), + sum([flow.model.flow_rate * conv_factors[flow.label].selected_data for flow in used_inputs]) + == sum([flow.model.flow_rate * conv_factors[flow.label].selected_data for flow in used_outputs]), name=f'{self.label_full}|conversion_{i}', ) ) @@ -474,12 +474,12 @@ def do_modeling(self): ) charge_state = self.charge_state - rel_loss = self.element.relative_loss_per_hour.active_data + rel_loss = self.element.relative_loss_per_hour.selected_data hours_per_step = self._model.hours_per_step charge_rate = self.element.charging.model.flow_rate discharge_rate = self.element.discharging.model.flow_rate - eff_charge = self.element.eta_charge.active_data - eff_discharge = self.element.eta_discharge.active_data + eff_charge = self.element.eta_charge.selected_data + eff_discharge = self.element.eta_discharge.selected_data self.add( self._model.add_constraints( @@ -565,8 +565,8 @@ def absolute_charge_state_bounds(self) -> Tuple[NumericData, NumericData]: @property def relative_charge_state_bounds(self) -> Tuple[NumericData, NumericData]: return ( - self.element.relative_minimum_charge_state.active_data, - self.element.relative_maximum_charge_state.active_data, + self.element.relative_minimum_charge_state.selected_data, + self.element.relative_maximum_charge_state.selected_data, ) diff --git a/flixopt/core.py b/flixopt/core.py index 773f76186..57d1871e7 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -798,7 +798,7 @@ def to_json(self, path: Optional[pathlib.Path] = None) -> Dict[str, Any]: 'aggregation_weight': self.aggregation_weight, 'aggregation_group': self.aggregation_group, 'needs_extra_timestep': self.needs_extra_timestep, - 'data': self.active_data.to_dict(), + 'data': self.selected_data.to_dict(), } # Convert datetime objects to ISO strings @@ -820,15 +820,15 @@ def stats(self) -> str: Returns: String representation of data statistics """ - return get_numeric_stats(self.active_data, padd=0, by_scenario=True) + return get_numeric_stats(self.selected_data, padd=0, by_scenario=True) @property def all_equal(self) -> bool: """Check if all values in the series are equal.""" - return np.unique(self.active_data.values).size == 1 + return np.unique(self.selected_data.values).size == 1 @property - def active_data(self) -> xr.DataArray: + def selected_data(self) -> xr.DataArray: """ Get a view of stored_data based on current selections. This computes the view dynamically based on the current selection state. @@ -842,7 +842,7 @@ def stored_data(self) -> xr.DataArray: def update_stored_data(self, value: NumericData): """ - Update stored_data and refresh active_data. + Update stored_data and refresh selected_data. Args: value: New data to store @@ -879,13 +879,13 @@ def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: @property def sel(self): - """Direct access to the active_data's sel method for convenience.""" - return self.active_data.sel + """Direct access to the selected_data's sel method for convenience.""" + return self.selected_data.sel @property def isel(self): - """Direct access to the active_data's isel method for convenience.""" - return self.active_data.isel + """Direct access to the selected_data's isel method for convenience.""" + return self.selected_data.isel @property def _valid_selector(self) -> Dict[str, pd.Index]: @@ -893,12 +893,11 @@ def _valid_selector(self) -> Dict[str, pd.Index]: full_selection = {'time': self._selected_timesteps, 'scenario': self._selected_scenarios} return {dim: sel for dim, sel in full_selection.items() if dim in self._stored_data.dims and sel is not None} - def _apply_operation(self, other, op): """Apply an operation between this TimeSeries and another object.""" if isinstance(other, TimeSeries): - other = other.active_data - return op(self.active_data, other) + other = other.selected_data + return op(self.selected_data, other) def __add__(self, other): return self._apply_operation(other, lambda x, y: x + y) @@ -913,25 +912,25 @@ def __truediv__(self, other): return self._apply_operation(other, lambda x, y: x / y) def __radd__(self, other): - return other + self.active_data + return other + self.selected_data def __rsub__(self, other): - return other - self.active_data + return other - self.selected_data def __rmul__(self, other): - return other * self.active_data + return other * self.selected_data def __rtruediv__(self, other): - return other / self.active_data + return other / self.selected_data def __neg__(self) -> xr.DataArray: - return -self.active_data + return -self.selected_data def __pos__(self) -> xr.DataArray: - return +self.active_data + return +self.selected_data def __abs__(self) -> xr.DataArray: - return abs(self.active_data) + return abs(self.selected_data) def __gt__(self, other): """ @@ -944,7 +943,7 @@ def __gt__(self, other): True if all values in this TimeSeries are greater than other """ if isinstance(other, TimeSeries): - return (self.active_data > other.active_data).all().item() + return (self.selected_data > other.selected_data).all().item() return NotImplemented def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): @@ -953,8 +952,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): This allows NumPy functions to work with TimeSeries objects. """ - # Convert any TimeSeries inputs to their active_data - inputs = [x.active_data if isinstance(x, TimeSeries) else x for x in inputs] + # Convert any TimeSeries inputs to their selected_data + inputs = [x.selected_data if isinstance(x, TimeSeries) else x for x in inputs] return getattr(ufunc, method)(*inputs, **kwargs) def __repr__(self): @@ -969,7 +968,7 @@ def __repr__(self): 'aggregation_weight': self.aggregation_weight, 'aggregation_group': self.aggregation_group, 'needs_extra_timestep': self.needs_extra_timestep, - 'shape': self.active_data.shape, + 'shape': self.selected_data.shape, } attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items()) @@ -1152,7 +1151,7 @@ def as_dataset(self) -> xr.Dataset: ds = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra}) for ts in self._time_series.values(): - ds[ts.name] = ts.active_data + ds[ts.name] = ts.selected_data return ds diff --git a/flixopt/effects.py b/flixopt/effects.py index 3b2f32311..1b5745a0a 100644 --- a/flixopt/effects.py +++ b/flixopt/effects.py @@ -137,10 +137,10 @@ def __init__(self, model: SystemModel, element: Effect): label_full=f'{self.label_full}(operation)', total_max=self.element.maximum_operation, total_min=self.element.minimum_operation, - min_per_hour=self.element.minimum_operation_per_hour.active_data + min_per_hour=self.element.minimum_operation_per_hour.selected_data if self.element.minimum_operation_per_hour is not None else None, - max_per_hour=self.element.maximum_operation_per_hour.active_data + max_per_hour=self.element.maximum_operation_per_hour.selected_data if self.element.maximum_operation_per_hour is not None else None, ) @@ -376,7 +376,7 @@ def _add_share_between_effects(self): for target_effect, time_series in origin_effect.specific_share_to_other_effects_operation.items(): self.effects[target_effect].model.operation.add_share( origin_effect.model.operation.label_full, - origin_effect.model.operation.total_per_timestep * time_series.active_data, + origin_effect.model.operation.total_per_timestep * time_series.selected_data, ) # 2. invest: -> hier ist es Scalar (share) for target_effect, factor in origin_effect.specific_share_to_other_effects_invest.items(): diff --git a/flixopt/elements.py b/flixopt/elements.py index 378a5fab1..605554e5c 100644 --- a/flixopt/elements.py +++ b/flixopt/elements.py @@ -374,7 +374,7 @@ def _create_shares(self): self._model.effects.add_share_to_effects( name=self.label_full, # Use the full label of the element expressions={ - effect: self.flow_rate * self._model.hours_per_step * factor.active_data + effect: self.flow_rate * self._model.hours_per_step * factor.selected_data for effect, factor in self.element.effects_per_flow_hour.items() }, target='operation', @@ -429,8 +429,8 @@ def relative_flow_rate_bounds(self) -> Tuple[NumericData, NumericData]: """Returns relative flow rate bounds.""" fixed_profile = self.element.fixed_relative_profile if fixed_profile is None: - return self.element.relative_minimum.active_data, self.element.relative_maximum.active_data - return fixed_profile.active_data, fixed_profile.active_data + return self.element.relative_minimum.selected_data, self.element.relative_maximum.selected_data + return fixed_profile.selected_data, fixed_profile.selected_data class BusModel(ElementModel): @@ -451,7 +451,7 @@ def do_modeling(self) -> None: # Fehlerplus/-minus: if self.element.with_excess: excess_penalty = np.multiply( - self._model.hours_per_step, self.element.excess_penalty_per_flow_hour.active_data + self._model.hours_per_step, self.element.excess_penalty_per_flow_hour.selected_data ) self.excess_input = self.add( self._model.add_variables(lower=0, coords=self._model.coords, name=f'{self.label_full}|excess_input'), diff --git a/flixopt/features.py b/flixopt/features.py index 92caf9dc2..32c382486 100644 --- a/flixopt/features.py +++ b/flixopt/features.py @@ -441,7 +441,7 @@ def _get_duration_in_hours( if previous_duration + self._model.hours_per_step[0] > first_step_max: logger.warning( - f'The maximum duration of "{variable_name}" is set to {maximum_duration.active_data}h, ' + f'The maximum duration of "{variable_name}" is set to {maximum_duration.selected_data}h, ' f'but the consecutive_duration previous to this model is {previous_duration}h. ' f'This forces "{binary_variable.name} = 0" in the first time step ' f'(dt={self._model.hours_per_step[0]}h)!' @@ -450,7 +450,7 @@ def _get_duration_in_hours( duration_in_hours = self.add( self._model.add_variables( lower=0, - upper=maximum_duration.active_data if maximum_duration is not None else mega, + upper=maximum_duration.selected_data if maximum_duration is not None else mega, coords=self._model.coords, name=f'{self.label_full}|{variable_name}', ), diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index dee7023e8..800b9462c 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -294,7 +294,7 @@ def create_time_series( if data in self.time_series_allocator: return data return self.time_series_allocator.add_time_series( - data=data.active_data, name=name, needs_extra_timestep=needs_extra_timestep + data=data.selected_data, name=name, needs_extra_timestep=needs_extra_timestep ) return self.time_series_allocator.add_time_series( data=data, name=name, needs_extra_timestep=needs_extra_timestep diff --git a/flixopt/io.py b/flixopt/io.py index 35d927136..5cc353836 100644 --- a/flixopt/io.py +++ b/flixopt/io.py @@ -23,7 +23,7 @@ def replace_timeseries(obj, mode: Literal['name', 'stats', 'data'] = 'name'): return [replace_timeseries(v, mode) for v in obj] elif isinstance(obj, TimeSeries): # Adjust this based on the actual class if obj.all_equal: - return obj.active_data.values[0].item() + return obj.selected_data.values[0].item() elif mode == 'name': return f'::::{obj.name}' elif mode == 'stats': diff --git a/flixopt/structure.py b/flixopt/structure.py index 4e3b26acf..adabdfb80 100644 --- a/flixopt/structure.py +++ b/flixopt/structure.py @@ -534,7 +534,7 @@ def copy_and_convert_datatypes(data: Any, use_numpy: bool = True, use_element_la return copy_and_convert_datatypes(data.tolist(), use_numpy, use_element_label) elif isinstance(data, TimeSeries): - return copy_and_convert_datatypes(data.active_data, use_numpy, use_element_label) + return copy_and_convert_datatypes(data.selected_data, use_numpy, use_element_label) elif isinstance(data, TimeSeriesData): return copy_and_convert_datatypes(data.data, use_numpy, use_element_label) diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index 562ebdf8b..7f1dc1e8d 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -44,7 +44,7 @@ def test_initialization(self, simple_dataarray): # Check data initialization assert isinstance(ts.stored_data, xr.DataArray) assert ts.stored_data.equals(simple_dataarray) - assert ts.active_data.equals(simple_dataarray) + assert ts.selected_data.equals(simple_dataarray) # Check backup was created assert ts._backup.equals(simple_dataarray) @@ -87,7 +87,7 @@ def test_active_timesteps_getter_setter(self, sample_timeseries, sample_timestep assert sample_timeseries.active_timesteps.equals(subset_index) # Active data should reflect the subset - assert sample_timeseries.active_data.equals(sample_timeseries.stored_data.sel(time=subset_index)) + assert sample_timeseries.selected_data.equals(sample_timeseries.stored_data.sel(time=subset_index)) # Reset to full index sample_timeseries.active_timesteps = None @@ -108,7 +108,7 @@ def test_reset(self, sample_timeseries, sample_timesteps): # Should be back to full index assert sample_timeseries.active_timesteps.equals(sample_timesteps) - assert sample_timeseries.active_data.equals(sample_timeseries.stored_data) + assert sample_timeseries.selected_data.equals(sample_timeseries.stored_data) def test_restore_data(self, sample_timeseries, simple_dataarray): """Test restore_data method.""" @@ -127,7 +127,7 @@ def test_restore_data(self, sample_timeseries, simple_dataarray): # Should be back to original data assert sample_timeseries.stored_data.equals(original_data) - assert sample_timeseries.active_data.equals(original_data) + assert sample_timeseries.selected_data.equals(original_data) def test_stored_data_setter(self, sample_timeseries, sample_timesteps): """Test stored_data setter with different data types.""" @@ -234,30 +234,30 @@ def test_arithmetic_operations(self, sample_timeseries): # Test operations between two TimeSeries objects assert np.array_equal( - (sample_timeseries + ts2).values, sample_timeseries.active_data.values + ts2.active_data.values + (sample_timeseries + ts2).values, sample_timeseries.selected_data.values + ts2.selected_data.values ) assert np.array_equal( - (sample_timeseries - ts2).values, sample_timeseries.active_data.values - ts2.active_data.values + (sample_timeseries - ts2).values, sample_timeseries.selected_data.values - ts2.selected_data.values ) assert np.array_equal( - (sample_timeseries * ts2).values, sample_timeseries.active_data.values * ts2.active_data.values + (sample_timeseries * ts2).values, sample_timeseries.selected_data.values * ts2.selected_data.values ) assert np.array_equal( - (sample_timeseries / ts2).values, sample_timeseries.active_data.values / ts2.active_data.values + (sample_timeseries / ts2).values, sample_timeseries.selected_data.values / ts2.selected_data.values ) # Test operations with DataArrays - assert np.array_equal((sample_timeseries + data2).values, sample_timeseries.active_data.values + data2.values) - assert np.array_equal((data2 + sample_timeseries).values, data2.values + sample_timeseries.active_data.values) + assert np.array_equal((sample_timeseries + data2).values, sample_timeseries.selected_data.values + data2.values) + assert np.array_equal((data2 + sample_timeseries).values, data2.values + sample_timeseries.selected_data.values) # Test operations with scalars - assert np.array_equal((sample_timeseries + 5).values, sample_timeseries.active_data.values + 5) - assert np.array_equal((5 + sample_timeseries).values, 5 + sample_timeseries.active_data.values) + assert np.array_equal((sample_timeseries + 5).values, sample_timeseries.selected_data.values + 5) + assert np.array_equal((5 + sample_timeseries).values, 5 + sample_timeseries.selected_data.values) # Test unary operations - assert np.array_equal((-sample_timeseries).values, -sample_timeseries.active_data.values) - assert np.array_equal((+sample_timeseries).values, +sample_timeseries.active_data.values) - assert np.array_equal((abs(sample_timeseries)).values, abs(sample_timeseries.active_data.values)) + assert np.array_equal((-sample_timeseries).values, -sample_timeseries.selected_data.values) + assert np.array_equal((+sample_timeseries).values, +sample_timeseries.selected_data.values) + assert np.array_equal((abs(sample_timeseries)).values, abs(sample_timeseries.selected_data.values)) def test_comparison_operations(self, sample_timesteps): """Test comparison operations.""" @@ -279,10 +279,10 @@ def test_comparison_operations(self, sample_timesteps): def test_numpy_ufunc(self, sample_timeseries): """Test numpy ufunc compatibility.""" # Test basic numpy functions - assert np.array_equal(np.add(sample_timeseries, 5).values, np.add(sample_timeseries.active_data, 5).values) + assert np.array_equal(np.add(sample_timeseries, 5).values, np.add(sample_timeseries.selected_data, 5).values) assert np.array_equal( - np.multiply(sample_timeseries, 2).values, np.multiply(sample_timeseries.active_data, 2).values + np.multiply(sample_timeseries, 2).values, np.multiply(sample_timeseries.selected_data, 2).values ) # Test with two TimeSeries objects @@ -290,18 +290,18 @@ def test_numpy_ufunc(self, sample_timeseries): ts2 = TimeSeries(data2, 'Second Series') assert np.array_equal( - np.add(sample_timeseries, ts2).values, np.add(sample_timeseries.active_data, ts2.active_data).values + np.add(sample_timeseries, ts2).values, np.add(sample_timeseries.selected_data, ts2.selected_data).values ) def test_sel_and_isel_properties(self, sample_timeseries): """Test sel and isel properties.""" # Test that sel property works selected = sample_timeseries.sel(time=sample_timeseries.active_timesteps[0]) - assert selected.item() == sample_timeseries.active_data.values[0] + assert selected.item() == sample_timeseries.selected_data.values[0] # Test that isel property works indexed = sample_timeseries.isel(time=0) - assert indexed.item() == sample_timeseries.active_data.values[0] + assert indexed.item() == sample_timeseries.selected_data.values[0] @pytest.fixture @@ -372,12 +372,12 @@ def test_create_time_series(self, sample_collection): # Test scalar ts1 = sample_collection.create_time_series(42, 'scalar_series') assert ts1.name == 'scalar_series' - assert np.all(ts1.active_data.values == 42) + assert np.all(ts1.selected_data.values == 42) # Test numpy array data = np.array([1, 2, 3, 4, 5]) ts2 = sample_collection.create_time_series(data, 'array_series') - assert np.array_equal(ts2.active_data.values, data) + assert np.array_equal(ts2.selected_data.values, data) # Test with TimeSeriesData ts3 = sample_collection.create_time_series(TimeSeriesData(10, agg_weight=0.7), 'weighted_series') @@ -386,7 +386,7 @@ def test_create_time_series(self, sample_collection): # Test with extra timestep ts4 = sample_collection.create_time_series(5, 'extra_series', needs_extra_timestep=True) assert ts4.needs_extra_timestep - assert len(ts4.active_data) == len(sample_collection.timesteps_extra) + assert len(ts4.selected_data) == len(sample_collection.timesteps_extra) # Test duplicate name with pytest.raises(ValueError, match='already exists'): @@ -509,12 +509,12 @@ def test_insert_new_data(self, populated_collection, sample_timesteps): populated_collection.insert_new_data(new_data) # Verify updates - assert np.all(populated_collection['constant_series'].active_data.values == 100) - assert np.array_equal(populated_collection['varying_series'].active_data.values, np.array([5, 10, 15, 20, 25])) + assert np.all(populated_collection['constant_series'].selected_data.values == 100) + assert np.array_equal(populated_collection['varying_series'].selected_data.values, np.array([5, 10, 15, 20, 25])) # Series not in the DataFrame should be unchanged assert np.array_equal( - populated_collection['extra_timestep_series'].active_data.values[:-1], np.array([1, 2, 3, 4, 5]) + populated_collection['extra_timestep_series'].selected_data.values[:-1], np.array([1, 2, 3, 4, 5]) ) # Test with mismatched index @@ -542,7 +542,7 @@ def test_restore_data(self, populated_collection): populated_collection.insert_new_data(new_data) # Verify data was changed - assert np.all(populated_collection['constant_series'].active_data.values == 999) + assert np.all(populated_collection['constant_series'].selected_data.values == 999) # Restore data populated_collection.restore_data() @@ -664,7 +664,7 @@ def test_initialization_with_scenarios(self, simple_scenario_dataarray): # Check data initialization assert isinstance(ts.stored_data, xr.DataArray) assert ts.stored_data.equals(simple_scenario_dataarray) - assert ts.active_data.equals(simple_scenario_dataarray) + assert ts.selected_data.equals(simple_scenario_dataarray) # Check backup was created assert ts._backup.equals(simple_scenario_dataarray) @@ -689,7 +689,7 @@ def test_reset_with_scenarios(self, sample_scenario_timeseries): # Verify subsets were set assert sample_scenario_timeseries.active_timesteps.equals(subset_timesteps) assert sample_scenario_timeseries.active_scenarios.equals(subset_scenarios) - assert sample_scenario_timeseries.active_data.shape == (len(subset_scenarios), len(subset_timesteps)) + assert sample_scenario_timeseries.selected_data.shape == (len(subset_scenarios), len(subset_timesteps)) # Reset sample_scenario_timeseries.reset() @@ -697,7 +697,7 @@ def test_reset_with_scenarios(self, sample_scenario_timeseries): # Should be back to full indexes assert sample_scenario_timeseries.active_timesteps.equals(full_timesteps) assert sample_scenario_timeseries.active_scenarios.equals(full_scenarios) - assert sample_scenario_timeseries.active_data.shape == (len(full_scenarios), len(full_timesteps)) + assert sample_scenario_timeseries.selected_data.shape == (len(full_scenarios), len(full_timesteps)) def test_active_scenarios_getter_setter(self, sample_scenario_timeseries, sample_scenario_index): """Test active_scenarios getter and setter.""" @@ -710,7 +710,7 @@ def test_active_scenarios_getter_setter(self, sample_scenario_timeseries, sample assert sample_scenario_timeseries.active_scenarios.equals(subset_index) # Active data should reflect the subset - assert sample_scenario_timeseries.active_data.equals( + assert sample_scenario_timeseries.selected_data.equals( sample_scenario_timeseries.stored_data.sel(scenario=subset_index) ) @@ -974,14 +974,14 @@ def test_create_time_series_with_scenarios(self, sample_scenario_collection): ts1 = sample_scenario_collection.create_time_series(42, 'scalar_series') assert ts1._has_scenarios assert ts1.name == 'scalar_series' - assert ts1.active_data.shape == (3, 5) # 3 scenarios, 5 timesteps - assert np.all(ts1.active_data.values == 42) + assert ts1.selected_data.shape == (3, 5) # 3 scenarios, 5 timesteps + assert np.all(ts1.selected_data.values == 42) # Test 1D array (broadcasts to all scenarios) data = np.array([1, 2, 3, 4, 5]) ts2 = sample_scenario_collection.create_time_series(data, 'array_series') assert ts2._has_scenarios - assert ts2.active_data.shape == (3, 5) + assert ts2.selected_data.shape == (3, 5) # Each scenario should have the same values for scenario in sample_scenario_collection.scenarios: assert np.array_equal(ts2.sel(scenario=scenario).values, data) @@ -994,7 +994,7 @@ def test_create_time_series_with_scenarios(self, sample_scenario_collection): ]) ts3 = sample_scenario_collection.create_time_series(data_2d, 'scenario_specific_series') assert ts3._has_scenarios - assert ts3.active_data.shape == (3, 5) + assert ts3.selected_data.shape == (3, 5) # Each scenario should have its own values assert np.array_equal(ts3.sel(scenario='baseline').values, data_2d[0]) assert np.array_equal(ts3.sel(scenario='high_demand').values, data_2d[1]) @@ -1025,7 +1025,7 @@ def test_activate_scenarios(self, sample_scenario_collection, sample_scenario_in assert sample_scenario_collection['array_series'].active_scenarios.equals(subset_scenarios) # Active data should reflect the subset - assert sample_scenario_collection['array_series'].active_data.shape == (2, 5) # 2 scenarios, 5 timesteps + assert sample_scenario_collection['array_series'].selected_data.shape == (2, 5) # 2 scenarios, 5 timesteps # Reset scenarios sample_scenario_collection.reset() From fb281dfff6d2c6de2acbcb495b5d049c66a0b577 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 15:13:14 +0200 Subject: [PATCH 40/55] Add property --- flixopt/core.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/flixopt/core.py b/flixopt/core.py index 57d1871e7..575bfa093 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -835,12 +835,28 @@ def selected_data(self) -> xr.DataArray: """ return self._stored_data.sel(**self._valid_selector) + @property + def active_timesteps(self) -> pd.DatetimeIndex: + """Get the current active timesteps.""" + if self._selected_timesteps is None: + return self._stored_data.indexes['time'] + return self._selected_timesteps + + @property + def active_scenarios(self) -> Optional[pd.Index]: + """Get the current active scenarios.""" + if not self._has_scenarios: + return None + if self._selected_scenarios is None: + return self._stored_data.indexes['scenario'] + return self._selected_scenarios + @property def stored_data(self) -> xr.DataArray: """Get a copy of the full stored data.""" return self._stored_data.copy() - def update_stored_data(self, value: NumericData): + def update_stored_data(self, value: xr.DataArray) -> None: """ Update stored_data and refresh selected_data. From 30994d56f980cb6d8415dc0df7deb9d21357ab35 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 15:14:18 +0200 Subject: [PATCH 41/55] Improve type hints --- flixopt/core.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/flixopt/core.py b/flixopt/core.py index 575bfa093..4018f157a 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1224,13 +1224,17 @@ def __getitem__(self, name: str) -> TimeSeries: return self._time_series[name] raise ValueError(f'No TimeSeries named "{name}" found') - def __contains__(self, value): + def __contains__(self, value) -> bool: if isinstance(value, str): return value in self._time_series elif isinstance(value, TimeSeries): return value.name in self._time_series raise TypeError(f'Invalid type for __contains__ of {self.__class__.__name__}: {type(value)}') + def __iter__(self) -> Iterator[TimeSeries]: + """Iterate over TimeSeries objects.""" + return iter(self._time_series.values()) + def update_time_series(self, name: str, data: NumericData) -> TimeSeries: """ Update an existing TimeSeries with new data. From af697fe6a649aeb3a5b3aab333e043d87308ab06 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 15:17:02 +0200 Subject: [PATCH 42/55] Improve type hints --- flixopt/core.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index 4018f157a..4198c63d1 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -769,14 +769,14 @@ def __init__( # Flag for whether this series has scenarios self._has_scenarios = 'scenario' in data.dims - def reset(self): + def reset(self) -> None: """ Reset selections to include all timesteps and scenarios. This is equivalent to clearing all selections. """ self.clear_selection() - def restore_data(self): + def restore_data(self) -> None: """ Restore stored_data from the backup and reset active timesteps. """ @@ -876,13 +876,13 @@ def update_stored_data(self, value: xr.DataArray) -> None: self._stored_data = new_data self.clear_selection() # Reset selections to full dataset - def clear_selection(self, timesteps: bool = True, scenarios: bool = True): + def clear_selection(self, timesteps: bool = True, scenarios: bool = True) -> None: if timesteps: self._selected_timesteps = None if scenarios: self._selected_scenarios = None - def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None): + def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None: if timesteps is None: self.clear_selection(timesteps=True, scenarios=False) else: @@ -1102,7 +1102,7 @@ def add_time_series( # Return the TimeSeries object return time_series - def clear_selection(self, timesteps: bool = True, scenarios: bool = True): + def clear_selection(self, timesteps: bool = True, scenarios: bool = True) -> None: """ Clear selection for timesteps and/or scenarios. @@ -1118,7 +1118,7 @@ def clear_selection(self, timesteps: bool = True, scenarios: bool = True): # Apply the selection to all TimeSeries objects self._propagate_selection_to_time_series() - def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None): + def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None: """ Set active subset for timesteps and scenarios. @@ -1139,7 +1139,7 @@ def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: # Apply the selection to all TimeSeries objects self._propagate_selection_to_time_series() - def _update_selected_timesteps(self, timesteps: Optional[pd.DatetimeIndex]): + def _update_selected_timesteps(self, timesteps: Optional[pd.DatetimeIndex]) -> None: """ Updates the timestep and related metrics (timesteps_extra, hours_per_timestep) based on the current selection. """ @@ -1199,7 +1199,7 @@ def scenarios(self) -> Optional[pd.Index]: return self._full_scenarios return self._selected_scenarios - def _propagate_selection_to_time_series(self): + def _propagate_selection_to_time_series(self) -> None: """Apply the current selection to all TimeSeries objects.""" for ts_name, ts in self._time_series.items(): timesteps = self._selected_timesteps_extra if ts_name in self._has_extra_timestep else self._selected_timesteps From f2402685a9811334073c03e31bf2c428f5d4140f Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 15:38:34 +0200 Subject: [PATCH 43/55] Add options to get data without extra timestep --- flixopt/core.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/flixopt/core.py b/flixopt/core.py index 4198c63d1..16453a796 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1157,7 +1157,7 @@ def _update_selected_timesteps(self, timesteps: Optional[pd.DatetimeIndex]) -> N timesteps, self._selected_hours_per_timestep.isel(time=-1).max().item() ) - def as_dataset(self) -> xr.Dataset: + def as_dataset(self, without_extra_timestep: bool = False) -> xr.Dataset: """ Convert the TimeSeriesAllocator to a xarray Dataset, containing the data of each TimeSeries. """ @@ -1169,6 +1169,9 @@ def as_dataset(self) -> xr.Dataset: for ts in self._time_series.values(): ds[ts.name] = ts.selected_data + if without_extra_timestep: + return ds.sel(time=self.timesteps) + return ds @property From 054d0fcdcd4d51b9177b8c50b7a59c25c9ec3d87 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 15:40:55 +0200 Subject: [PATCH 44/55] Rename --- flixopt/components.py | 4 +- flixopt/core.py | 32 +- flixopt/flow_system.py | 6 +- tests/test_timeseries.py | 1312 +++++++++----------------------------- time_series_alloc.py | 2 +- 5 files changed, 308 insertions(+), 1048 deletions(-) diff --git a/flixopt/components.py b/flixopt/components.py index 5835afd31..2a69c6165 100644 --- a/flixopt/components.py +++ b/flixopt/components.py @@ -194,12 +194,12 @@ def transform_data(self, flow_system: 'FlowSystem') -> None: self.relative_minimum_charge_state = flow_system.create_time_series( f'{self.label_full}|relative_minimum_charge_state', self.relative_minimum_charge_state, - needs_extra_timestep=True, + has_extra_timestep=True, ) self.relative_maximum_charge_state = flow_system.create_time_series( f'{self.label_full}|relative_maximum_charge_state', self.relative_maximum_charge_state, - needs_extra_timestep=True, + has_extra_timestep=True, ) self.eta_charge = flow_system.create_time_series(f'{self.label_full}|eta_charge', self.eta_charge) self.eta_discharge = flow_system.create_time_series(f'{self.label_full}|eta_discharge', self.eta_discharge) diff --git a/flixopt/core.py b/flixopt/core.py index 16453a796..fdd99c613 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -654,7 +654,7 @@ class TimeSeries: name (str): The name of the time series aggregation_weight (Optional[float]): Weight used for aggregation aggregation_group (Optional[str]): Group name for shared aggregation weighting - needs_extra_timestep (bool): Whether this series needs an extra timestep + has_extra_timestep (bool): Whether this series needs an extra timestep """ @classmethod @@ -666,7 +666,7 @@ def from_datasource( scenarios: Optional[pd.Index] = None, aggregation_weight: Optional[float] = None, aggregation_group: Optional[str] = None, - needs_extra_timestep: bool = False, + has_extra_timestep: bool = False, ) -> 'TimeSeries': """ Initialize the TimeSeries from multiple data sources. @@ -678,7 +678,7 @@ def from_datasource( scenarios: The scenarios of the TimeSeries aggregation_weight: The weight in aggregation calculations aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing - needs_extra_timestep: Whether this series requires an extra timestep + has_extra_timestep: Whether this series requires an extra timestep Returns: A new TimeSeries instance @@ -688,7 +688,7 @@ def from_datasource( name, aggregation_weight, aggregation_group, - needs_extra_timestep, + has_extra_timestep, ) @classmethod @@ -722,7 +722,7 @@ def from_json(cls, data: Optional[Dict[str, Any]] = None, path: Optional[str] = name=data['name'], aggregation_weight=data['aggregation_weight'], aggregation_group=data['aggregation_group'], - needs_extra_timestep=data['needs_extra_timestep'], + has_extra_timestep=data['has_extra_timestep'], ) def __init__( @@ -731,7 +731,7 @@ def __init__( name: str, aggregation_weight: Optional[float] = None, aggregation_group: Optional[str] = None, - needs_extra_timestep: bool = False, + has_extra_timestep: bool = False, ): """ Initialize a TimeSeries with a DataArray. @@ -741,7 +741,7 @@ def __init__( name: The name of the TimeSeries aggregation_weight: The weight in aggregation calculations aggregation_group: Group this TimeSeries belongs to for weight sharing - needs_extra_timestep: Whether this series requires an extra timestep + has_extra_timestep: Whether this series requires an extra timestep Raises: ValueError: If data doesn't have a 'time' index or has unsupported dimensions @@ -756,7 +756,7 @@ def __init__( self.name = name self.aggregation_weight = aggregation_weight self.aggregation_group = aggregation_group - self.needs_extra_timestep = needs_extra_timestep + self.has_extra_timestep = has_extra_timestep # Data management self._stored_data = data.copy(deep=True) @@ -797,7 +797,7 @@ def to_json(self, path: Optional[pathlib.Path] = None) -> Dict[str, Any]: 'name': self.name, 'aggregation_weight': self.aggregation_weight, 'aggregation_group': self.aggregation_group, - 'needs_extra_timestep': self.needs_extra_timestep, + 'has_extra_timestep': self.has_extra_timestep, 'data': self.selected_data.to_dict(), } @@ -983,7 +983,7 @@ def __repr__(self): 'name': self.name, 'aggregation_weight': self.aggregation_weight, 'aggregation_group': self.aggregation_group, - 'needs_extra_timestep': self.needs_extra_timestep, + 'has_extra_timestep': self.has_extra_timestep, 'shape': self.selected_data.shape, } @@ -1044,7 +1044,7 @@ def add_time_series( data: Union[NumericData, TimeSeries], aggregation_weight: Optional[float] = None, aggregation_group: Optional[str] = None, - needs_extra_timestep: bool = False, + has_extra_timestep: bool = False, ) -> TimeSeries: """ Add a new TimeSeries to the allocator. @@ -1054,7 +1054,7 @@ def add_time_series( data: Data for the time series (can be raw data or an existing TimeSeries) aggregation_weight: Weight used for aggregation aggregation_group: Group name for shared aggregation weighting - needs_extra_timestep: Whether this series needs an extra timestep + has_extra_timestep: Whether this series needs an extra timestep Returns: The created TimeSeries object @@ -1063,7 +1063,7 @@ def add_time_series( raise KeyError(f"TimeSeries '{name}' already exists in allocator") # Choose which timesteps to use - target_timesteps = self.timesteps_extra if needs_extra_timestep else self.timesteps + target_timesteps = self.timesteps_extra if has_extra_timestep else self.timesteps # Create or adapt the TimeSeries object if isinstance(data, TimeSeries): @@ -1078,7 +1078,7 @@ def add_time_series( name=name, aggregation_weight=aggregation_weight or time_series.aggregation_weight, aggregation_group=aggregation_group or time_series.aggregation_group, - needs_extra_timestep=needs_extra_timestep or time_series.needs_extra_timestep, + has_extra_timestep=has_extra_timestep or time_series.has_extra_timestep, ) else: # Create a new TimeSeries from raw data @@ -1089,14 +1089,14 @@ def add_time_series( scenarios=self.scenarios, aggregation_weight=aggregation_weight, aggregation_group=aggregation_group, - needs_extra_timestep=needs_extra_timestep, + has_extra_timestep=has_extra_timestep, ) # Add to storage self._time_series[name] = time_series # Track if it needs extra timestep - if needs_extra_timestep: + if has_extra_timestep: self._has_extra_timestep.add(name) # Return the TimeSeries object diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index 800b9462c..142a8850d 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -278,7 +278,7 @@ def create_time_series( self, name: str, data: Optional[Union[NumericData, TimeSeriesData, TimeSeries]], - needs_extra_timestep: bool = False, + has_extra_timestep: bool = False, ) -> Optional[TimeSeries]: """ Tries to create a TimeSeries from NumericData Data and adds it to the time_series_allocator @@ -294,10 +294,10 @@ def create_time_series( if data in self.time_series_allocator: return data return self.time_series_allocator.add_time_series( - data=data.selected_data, name=name, needs_extra_timestep=needs_extra_timestep + data=data.selected_data, name=name, has_extra_timestep=has_extra_timestep ) return self.time_series_allocator.add_time_series( - data=data, name=name, needs_extra_timestep=needs_extra_timestep + data=data, name=name, has_extra_timestep=has_extra_timestep ) def create_effect_time_series( diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index 7f1dc1e8d..c8f18c88f 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -8,7 +8,7 @@ import pytest import xarray as xr -from flixopt.core import ConversionError, DataConverter, TimeSeries, TimeSeriesCollection, TimeSeriesData +from flixopt.core import ConversionError, DataConverter, TimeSeries, TimeSeriesAllocator @pytest.fixture @@ -50,7 +50,7 @@ def test_initialization(self, simple_dataarray): assert ts._backup.equals(simple_dataarray) # Check active timesteps - assert ts.active_timesteps.equals(simple_dataarray.indexes['time']) + assert ts._valid_selector == {} # No selections initially def test_initialization_with_aggregation_params(self, simple_dataarray): """Test initialization with aggregation parameters.""" @@ -76,50 +76,48 @@ def test_initialization_validation(self, sample_timesteps): with pytest.raises(ValueError, match='DataArray dimensions must be subset of'): TimeSeries(multi_dim_data, name='Multi-dim Series') - def test_active_timesteps_getter_setter(self, sample_timeseries, sample_timesteps): - """Test active_timesteps getter and setter.""" - # Initial state should use all timesteps - assert sample_timeseries.active_timesteps.equals(sample_timesteps) + def test_selection_methods(self, sample_timeseries, sample_timesteps): + """Test selection methods.""" + # Initial state should have no selections + assert sample_timeseries._selected_timesteps is None + assert sample_timeseries._selected_scenarios is None # Set to a subset subset_index = sample_timesteps[1:3] - sample_timeseries.active_timesteps = subset_index - assert sample_timeseries.active_timesteps.equals(subset_index) + sample_timeseries.set_selection(timesteps=subset_index) + assert sample_timeseries._selected_timesteps.equals(subset_index) # Active data should reflect the subset assert sample_timeseries.selected_data.equals(sample_timeseries.stored_data.sel(time=subset_index)) - # Reset to full index - sample_timeseries.active_timesteps = None - assert sample_timeseries.active_timesteps.equals(sample_timesteps) - - # Test invalid type - with pytest.raises(TypeError, match='must be a pandas DatetimeIndex'): - sample_timeseries.active_timesteps = 'invalid' + # Clear selection + sample_timeseries.clear_selection() + assert sample_timeseries._selected_timesteps is None + assert sample_timeseries.selected_data.equals(sample_timeseries.stored_data) def test_reset(self, sample_timeseries, sample_timesteps): """Test reset method.""" # Set to subset first subset_index = sample_timesteps[1:3] - sample_timeseries.active_timesteps = subset_index + sample_timeseries.set_selection(timesteps=subset_index) # Reset sample_timeseries.reset() - # Should be back to full index - assert sample_timeseries.active_timesteps.equals(sample_timesteps) + # Should be back to full index (all selections cleared) + assert sample_timeseries._selected_timesteps is None assert sample_timeseries.selected_data.equals(sample_timeseries.stored_data) def test_restore_data(self, sample_timeseries, simple_dataarray): """Test restore_data method.""" # Modify the stored data - new_data = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) + new_data = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.stored_data.coords['time']}, dims=['time']) # Store original data for comparison original_data = sample_timeseries.stored_data - # Set new data - sample_timeseries.stored_data = new_data + # Update data + sample_timeseries.update_stored_data(new_data) assert sample_timeseries.stored_data.equals(new_data) # Restore from backup @@ -129,40 +127,40 @@ def test_restore_data(self, sample_timeseries, simple_dataarray): assert sample_timeseries.stored_data.equals(original_data) assert sample_timeseries.selected_data.equals(original_data) - def test_stored_data_setter(self, sample_timeseries, sample_timesteps): - """Test stored_data setter with different data types.""" + def test_update_stored_data(self, sample_timeseries, sample_timesteps): + """Test update_stored_data method with different data types.""" # Test with a Series series_data = pd.Series([5, 6, 7, 8, 9], index=sample_timesteps) - sample_timeseries.stored_data = series_data + sample_timeseries.update_stored_data(series_data) assert np.array_equal(sample_timeseries.stored_data.values, series_data.values) # Test with a single-column DataFrame df_data = pd.DataFrame({'col1': [15, 16, 17, 18, 19]}, index=sample_timesteps) - sample_timeseries.stored_data = df_data + sample_timeseries.update_stored_data(df_data) assert np.array_equal(sample_timeseries.stored_data.values, df_data['col1'].values) # Test with a NumPy array array_data = np.array([25, 26, 27, 28, 29]) - sample_timeseries.stored_data = array_data + sample_timeseries.update_stored_data(array_data) assert np.array_equal(sample_timeseries.stored_data.values, array_data) # Test with a scalar - sample_timeseries.stored_data = 42 + sample_timeseries.update_stored_data(42) assert np.all(sample_timeseries.stored_data.values == 42) # Test with another DataArray another_dataarray = xr.DataArray([30, 31, 32, 33, 34], coords={'time': sample_timesteps}, dims=['time']) - sample_timeseries.stored_data = another_dataarray + sample_timeseries.update_stored_data(another_dataarray) assert sample_timeseries.stored_data.equals(another_dataarray) def test_stored_data_setter_no_change(self, sample_timeseries): - """Test stored_data setter when data doesn't change.""" + """Test update_stored_data method when data doesn't change.""" # Get current data current_data = sample_timeseries.stored_data current_backup = sample_timeseries._backup # Set the same data - sample_timeseries.stored_data = current_data + sample_timeseries.update_stored_data(current_data) # Backup shouldn't change assert sample_timeseries._backup is current_backup # Should be the same object @@ -229,7 +227,7 @@ def test_all_equal(self, sample_timesteps): def test_arithmetic_operations(self, sample_timeseries): """Test arithmetic operations.""" # Create a second TimeSeries for testing - data2 = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) + data2 = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.stored_data.coords['time']}, dims=['time']) ts2 = TimeSeries(data2, 'Second Series') # Test operations between two TimeSeries objects @@ -286,7 +284,7 @@ def test_numpy_ufunc(self, sample_timeseries): ) # Test with two TimeSeries objects - data2 = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) + data2 = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.stored_data.coords['time']}, dims=['time']) ts2 = TimeSeries(data2, 'Second Series') assert np.array_equal( @@ -296,7 +294,7 @@ def test_numpy_ufunc(self, sample_timeseries): def test_sel_and_isel_properties(self, sample_timeseries): """Test sel and isel properties.""" # Test that sel property works - selected = sample_timeseries.sel(time=sample_timeseries.active_timesteps[0]) + selected = sample_timeseries.sel(time=sample_timeseries.stored_data.coords['time'][0]) assert selected.item() == sample_timeseries.selected_data.values[0] # Test that isel property works @@ -304,323 +302,12 @@ def test_sel_and_isel_properties(self, sample_timeseries): assert indexed.item() == sample_timeseries.selected_data.values[0] -@pytest.fixture -def sample_collection(sample_timesteps): - """Create a sample TimeSeriesCollection.""" - return TimeSeriesCollection(sample_timesteps) - - -@pytest.fixture -def populated_collection(sample_collection): - """Create a TimeSeriesCollection with test data.""" - # Add a constant time series - sample_collection.create_time_series(42, 'constant_series') - - # Add a varying time series - varying_data = np.array([10, 20, 30, 40, 50]) - sample_collection.create_time_series(varying_data, 'varying_series') - - # Add a time series with extra timestep - sample_collection.create_time_series( - np.array([1, 2, 3, 4, 5, 6]), 'extra_timestep_series', needs_extra_timestep=True - ) - - # Add series with aggregation settings - sample_collection.create_time_series( - TimeSeriesData(np.array([5, 5, 5, 5, 5]), agg_group='group1'), 'group1_series1' - ) - sample_collection.create_time_series( - TimeSeriesData(np.array([6, 6, 6, 6, 6]), agg_group='group1'), 'group1_series2' - ) - sample_collection.create_time_series( - TimeSeriesData(np.array([10, 10, 10, 10, 10]), agg_weight=0.5), 'weighted_series' - ) - - return sample_collection - - -class TestTimeSeriesCollection: - """Test suite for TimeSeriesCollection.""" - - def test_initialization(self, sample_timesteps): - """Test basic initialization.""" - collection = TimeSeriesCollection(sample_timesteps) - - assert collection.all_timesteps.equals(sample_timesteps) - assert len(collection.all_timesteps_extra) == len(sample_timesteps) + 1 - assert isinstance(collection.all_hours_per_timestep, xr.DataArray) - assert len(collection) == 0 - - def test_initialization_with_custom_hours(self, sample_timesteps): - """Test initialization with custom hour settings.""" - # Test with last timestep duration - last_timestep_hours = 12 - collection = TimeSeriesCollection(sample_timesteps, hours_of_last_timestep=last_timestep_hours) - - # Verify the last timestep duration - extra_step_delta = collection.all_timesteps_extra[-1] - collection.all_timesteps_extra[-2] - assert extra_step_delta == pd.Timedelta(hours=last_timestep_hours) - - # Test with previous timestep duration - hours_per_step = 8 - collection2 = TimeSeriesCollection(sample_timesteps, hours_of_previous_timesteps=hours_per_step) - - assert collection2.hours_of_previous_timesteps == hours_per_step - - def test_create_time_series(self, sample_collection): - """Test creating time series.""" - # Test scalar - ts1 = sample_collection.create_time_series(42, 'scalar_series') - assert ts1.name == 'scalar_series' - assert np.all(ts1.selected_data.values == 42) - - # Test numpy array - data = np.array([1, 2, 3, 4, 5]) - ts2 = sample_collection.create_time_series(data, 'array_series') - assert np.array_equal(ts2.selected_data.values, data) - - # Test with TimeSeriesData - ts3 = sample_collection.create_time_series(TimeSeriesData(10, agg_weight=0.7), 'weighted_series') - assert ts3.aggregation_weight == 0.7 - - # Test with extra timestep - ts4 = sample_collection.create_time_series(5, 'extra_series', needs_extra_timestep=True) - assert ts4.needs_extra_timestep - assert len(ts4.selected_data) == len(sample_collection.timesteps_extra) - - # Test duplicate name - with pytest.raises(ValueError, match='already exists'): - sample_collection.create_time_series(1, 'scalar_series') - - def test_access_time_series(self, populated_collection): - """Test accessing time series.""" - # Test __getitem__ - ts = populated_collection['varying_series'] - assert ts.name == 'varying_series' - - # Test __contains__ with string - assert 'constant_series' in populated_collection - assert 'nonexistent_series' not in populated_collection - - # Test __contains__ with TimeSeries object - assert populated_collection['varying_series'] in populated_collection - - # Test __iter__ - names = [ts.name for ts in populated_collection] - assert len(names) == 6 - assert 'varying_series' in names - - # Test access to non-existent series - with pytest.raises(KeyError): - populated_collection['nonexistent_series'] - - def test_constants_and_non_constants(self, populated_collection): - """Test constants and non_constants properties.""" - # Test constants - constants = populated_collection.constants - assert len(constants) == 4 # constant_series, group1_series1, group1_series2, weighted_series - assert all(ts.all_equal for ts in constants) - - # Test non_constants - non_constants = populated_collection.non_constants - assert len(non_constants) == 2 # varying_series, extra_timestep_series - assert all(not ts.all_equal for ts in non_constants) - - # Test modifying a series changes the results - populated_collection['constant_series'].stored_data = np.array([1, 2, 3, 4, 5]) - updated_constants = populated_collection.constants - assert len(updated_constants) == 3 # One less constant - assert 'constant_series' not in [ts.name for ts in updated_constants] - - def test_timesteps_properties(self, populated_collection, sample_timesteps): - """Test timestep-related properties.""" - # Test default (all) timesteps - assert populated_collection.timesteps.equals(sample_timesteps) - assert len(populated_collection.timesteps_extra) == len(sample_timesteps) + 1 - - # Test activating a subset - subset = sample_timesteps[1:3] - populated_collection.activate_timesteps(subset) - - assert populated_collection.timesteps.equals(subset) - assert len(populated_collection.timesteps_extra) == len(subset) + 1 - - # Check that time series were updated - assert populated_collection['varying_series'].active_timesteps.equals(subset) - assert populated_collection['extra_timestep_series'].active_timesteps.equals( - populated_collection.timesteps_extra - ) - - # Test reset - populated_collection.reset() - assert populated_collection.timesteps.equals(sample_timesteps) - - def test_to_dataframe_and_dataset(self, populated_collection): - """Test conversion to DataFrame and Dataset.""" - # Test to_dataset - ds = populated_collection.to_dataset() - assert isinstance(ds, xr.Dataset) - assert len(ds.data_vars) == 6 - - # Test to_dataframe with different filters - df_all = populated_collection.to_dataframe(filtered='all') - assert len(df_all.columns) == 6 - - df_constant = populated_collection.to_dataframe(filtered='constant') - assert len(df_constant.columns) == 4 - - df_non_constant = populated_collection.to_dataframe(filtered='non_constant') - assert len(df_non_constant.columns) == 2 - - # Test invalid filter - with pytest.raises(ValueError): - populated_collection.to_dataframe(filtered='invalid') - - def test_calculate_aggregation_weights(self, populated_collection): - """Test aggregation weight calculation.""" - weights = populated_collection.calculate_aggregation_weights() - - # Group weights should be 0.5 each (1/2) - assert populated_collection.group_weights['group1'] == 0.5 - - # Series in group1 should have weight 0.5 - assert weights['group1_series1'] == 0.5 - assert weights['group1_series2'] == 0.5 - - # Series with explicit weight should have that weight - assert weights['weighted_series'] == 0.5 - - # Series without group or weight should have weight 1 - assert weights['constant_series'] == 1 - - def test_insert_new_data(self, populated_collection, sample_timesteps): - """Test inserting new data.""" - # Create new data - new_data = pd.DataFrame( - { - 'constant_series': [100, 100, 100, 100, 100], - 'varying_series': [5, 10, 15, 20, 25], - # extra_timestep_series is omitted to test partial updates - }, - index=sample_timesteps, - ) - - # Insert data - populated_collection.insert_new_data(new_data) - - # Verify updates - assert np.all(populated_collection['constant_series'].selected_data.values == 100) - assert np.array_equal(populated_collection['varying_series'].selected_data.values, np.array([5, 10, 15, 20, 25])) - - # Series not in the DataFrame should be unchanged - assert np.array_equal( - populated_collection['extra_timestep_series'].selected_data.values[:-1], np.array([1, 2, 3, 4, 5]) - ) - - # Test with mismatched index - bad_index = pd.date_range('2023-02-01', periods=5, freq='D', name='time') - bad_data = pd.DataFrame({'constant_series': [1, 1, 1, 1, 1]}, index=bad_index) - - with pytest.raises(ValueError, match='must match collection timesteps'): - populated_collection.insert_new_data(bad_data) - - def test_restore_data(self, populated_collection): - """Test restoring original data.""" - # Capture original data - original_values = {name: ts.stored_data.copy() for name, ts in populated_collection.time_series_data.items()} - - # Modify data - new_data = pd.DataFrame( - { - name: np.ones(len(populated_collection.timesteps)) * 999 - for name in populated_collection.time_series_data - if not populated_collection[name].needs_extra_timestep - }, - index=populated_collection.timesteps, - ) - - populated_collection.insert_new_data(new_data) - - # Verify data was changed - assert np.all(populated_collection['constant_series'].selected_data.values == 999) - - # Restore data - populated_collection.restore_data() - - # Verify data was restored - for name, original in original_values.items(): - restored = populated_collection[name].stored_data - assert np.array_equal(restored.values, original.values) - - def test_class_method_with_uniform_timesteps(self): - """Test the with_uniform_timesteps class method.""" - collection = TimeSeriesCollection.with_uniform_timesteps( - start_time=pd.Timestamp('2023-01-01'), periods=24, freq='H', hours_per_step=1 - ) - - assert len(collection.timesteps) == 24 - assert collection.hours_of_previous_timesteps == 1 - assert (collection.timesteps[1] - collection.timesteps[0]) == pd.Timedelta(hours=1) - - def test_hours_per_timestep(self, populated_collection): - """Test hours_per_timestep calculation.""" - # Standard case - uniform timesteps - hours = populated_collection.hours_per_timestep.values - assert np.allclose(hours, 24) # Default is daily timesteps - - # Create non-uniform timesteps - non_uniform_times = pd.DatetimeIndex( - [ - pd.Timestamp('2023-01-01'), - pd.Timestamp('2023-01-02'), - pd.Timestamp('2023-01-03 12:00:00'), # 1.5 days from previous - pd.Timestamp('2023-01-04'), # 0.5 days from previous - pd.Timestamp('2023-01-06'), # 2 days from previous - ], - name='time', - ) - - collection = TimeSeriesCollection(non_uniform_times) - hours = collection.hours_per_timestep.values - - # Expected hours between timestamps - expected = np.array([24, 36, 12, 48, 48]) - assert np.allclose(hours, expected) - - def test_validation_and_errors(self, sample_timesteps): - """Test validation and error handling.""" - # Test non-DatetimeIndex - with pytest.raises(TypeError, match='must be a pandas DatetimeIndex'): - TimeSeriesCollection(pd.Index([1, 2, 3, 4, 5])) - - # Test too few timesteps - with pytest.raises(ValueError, match='must contain at least 2 timestamps'): - TimeSeriesCollection(pd.DatetimeIndex([pd.Timestamp('2023-01-01')], name='time')) - - # Test invalid active_timesteps - collection = TimeSeriesCollection(sample_timesteps) - invalid_timesteps = pd.date_range('2024-01-01', periods=3, freq='D', name='time') - - with pytest.raises(ValueError, match='must be a subset'): - collection.activate_timesteps(invalid_timesteps) - - - @pytest.fixture def sample_scenario_index(): """Create a sample scenario index with the required 'scenario' name.""" return pd.Index(['baseline', 'high_demand', 'low_price'], name='scenario') -@pytest.fixture -def sample_multi_index(sample_timesteps, sample_scenario_index): - """Create a sample MultiIndex with scenarios and timesteps.""" - return pd.MultiIndex.from_product( - [sample_scenario_index, sample_timesteps], - names=['scenario', 'time'] - ) - - @pytest.fixture def simple_scenario_dataarray(sample_timesteps, sample_scenario_index): """Create a DataArray with both scenario and time dimensions.""" @@ -643,9 +330,15 @@ def sample_scenario_timeseries(simple_scenario_dataarray): @pytest.fixture -def sample_scenario_collection(sample_timesteps, sample_scenario_index): - """Create a sample TimeSeriesCollection with scenarios.""" - return TimeSeriesCollection(sample_timesteps, scenarios=sample_scenario_index) +def sample_allocator(sample_timesteps): + """Create a sample TimeSeriesAllocator.""" + return TimeSeriesAllocator(sample_timesteps) + + +@pytest.fixture +def sample_scenario_allocator(sample_timesteps, sample_scenario_index): + """Create a sample TimeSeriesAllocator with scenarios.""" + return TimeSeriesAllocator(sample_timesteps, scenarios=sample_scenario_index) class TestTimeSeriesWithScenarios: @@ -658,8 +351,7 @@ def test_initialization_with_scenarios(self, simple_scenario_dataarray): # Check basic properties assert ts.name == 'Scenario Series' assert ts._has_scenarios is True - assert ts.active_scenarios is not None - assert len(ts.active_scenarios) == len(simple_scenario_dataarray.coords['scenario']) + assert ts._selected_scenarios is None # No selection initially # Check data initialization assert isinstance(ts.stored_data, xr.DataArray) @@ -669,79 +361,49 @@ def test_initialization_with_scenarios(self, simple_scenario_dataarray): # Check backup was created assert ts._backup.equals(simple_scenario_dataarray) - # Check active timesteps and scenarios - assert ts.active_timesteps.equals(simple_scenario_dataarray.indexes['time']) - assert ts.active_scenarios.equals(simple_scenario_dataarray.indexes['scenario']) - - def test_reset_with_scenarios(self, sample_scenario_timeseries): + def test_reset_with_scenarios(self, sample_scenario_timeseries, simple_scenario_dataarray): """Test reset method with scenarios.""" # Get original full indexes - full_timesteps = sample_scenario_timeseries.active_timesteps - full_scenarios = sample_scenario_timeseries.active_scenarios + full_timesteps = simple_scenario_dataarray.coords['time'] + full_scenarios = simple_scenario_dataarray.coords['scenario'] # Set to subset timesteps and scenarios subset_timesteps = full_timesteps[1:3] subset_scenarios = full_scenarios[:2] - sample_scenario_timeseries.active_timesteps = subset_timesteps - sample_scenario_timeseries.active_scenarios = subset_scenarios + sample_scenario_timeseries.set_selection(timesteps=subset_timesteps, scenarios=subset_scenarios) # Verify subsets were set - assert sample_scenario_timeseries.active_timesteps.equals(subset_timesteps) - assert sample_scenario_timeseries.active_scenarios.equals(subset_scenarios) + assert sample_scenario_timeseries._selected_timesteps.equals(subset_timesteps) + assert sample_scenario_timeseries._selected_scenarios.equals(subset_scenarios) assert sample_scenario_timeseries.selected_data.shape == (len(subset_scenarios), len(subset_timesteps)) # Reset sample_scenario_timeseries.reset() # Should be back to full indexes - assert sample_scenario_timeseries.active_timesteps.equals(full_timesteps) - assert sample_scenario_timeseries.active_scenarios.equals(full_scenarios) + assert sample_scenario_timeseries._selected_timesteps is None + assert sample_scenario_timeseries._selected_scenarios is None assert sample_scenario_timeseries.selected_data.shape == (len(full_scenarios), len(full_timesteps)) - def test_active_scenarios_getter_setter(self, sample_scenario_timeseries, sample_scenario_index): - """Test active_scenarios getter and setter.""" + def test_scenario_selection(self, sample_scenario_timeseries, sample_scenario_index): + """Test scenario selection.""" # Initial state should use all scenarios - assert sample_scenario_timeseries.active_scenarios.equals(sample_scenario_index) + assert sample_scenario_timeseries._selected_scenarios is None # Set to a subset subset_index = sample_scenario_index[:2] # First two scenarios - sample_scenario_timeseries.active_scenarios = subset_index - assert sample_scenario_timeseries.active_scenarios.equals(subset_index) + sample_scenario_timeseries.set_selection(scenarios=subset_index) + assert sample_scenario_timeseries._selected_scenarios.equals(subset_index) # Active data should reflect the subset assert sample_scenario_timeseries.selected_data.equals( sample_scenario_timeseries.stored_data.sel(scenario=subset_index) ) - # Reset to full index - sample_scenario_timeseries.active_scenarios = None - assert sample_scenario_timeseries.active_scenarios.equals(sample_scenario_index) - - # Test invalid type - with pytest.raises(TypeError, match='must be a pandas Index'): - sample_scenario_timeseries.active_scenarios = 'invalid' - - # Test invalid scenario names - invalid_scenarios = pd.Index(['invalid1', 'invalid2'], name='scenario') - with pytest.raises(ValueError, match='must be a subset'): - sample_scenario_timeseries.active_scenarios = invalid_scenarios - - def test_scenario_selection_methods(self, sample_scenario_timeseries): - """Test scenario selection helper methods.""" - # Test select_scenario - baseline_data = sample_scenario_timeseries.sel(scenario='baseline') - assert baseline_data.dims == ('time',) - assert np.array_equal(baseline_data.values, [10, 20, 30, 40, 50]) - - # Test with non-existent scenario - with pytest.raises(KeyError): - sample_scenario_timeseries.sel(scenario='nonexistent') - - # Test get_scenario_names - scenario_names = sample_scenario_timeseries.active_scenarios - assert len(scenario_names) == 3 - assert set(scenario_names) == {'baseline', 'high_demand', 'low_price'} + # Clear selection + sample_scenario_timeseries.clear_selection(timesteps=False, scenarios=True) + assert sample_scenario_timeseries._selected_scenarios is None def test_all_equal_with_scenarios(self, sample_timesteps, sample_scenario_index): """Test all_equal property with scenarios.""" @@ -769,150 +431,6 @@ def test_all_equal_with_scenarios(self, sample_timesteps, sample_scenario_index) ts_per_scenario = TimeSeries(per_scenario_dataarray, 'Per-Scenario Equal Series') assert ts_per_scenario.all_equal is False - # Not equal within at least one scenario - unequal_data = np.array([ - [5, 5, 5, 5, 5], # baseline - all equal - [10, 10, 10, 10, 10], # high_demand - all equal - [15, 15, 20, 15, 15] # low_price - not all equal - ]) - unequal_dataarray = xr.DataArray( - data=unequal_data, - coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, - dims=['scenario', 'time'] - ) - ts_unequal = TimeSeries(unequal_dataarray, 'Unequal Scenario Series') - assert ts_unequal.all_equal is False - - def test_stats_with_scenarios(self, sample_timesteps, sample_scenario_index): - """Test stats property with scenarios.""" - # Create data with different patterns in each scenario - data = np.array([ - [10, 20, 30, 40, 50], # baseline - increasing - [100, 100, 100, 100, 100], # high_demand - constant - [50, 40, 30, 20, 10] # low_price - decreasing - ]) - dataarray = xr.DataArray( - data=data, - coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, - dims=['scenario', 'time'] - ) - ts = TimeSeries(dataarray, 'Mixed Stats Series') - - # Get stats string - stats_str = ts.stats - - # Should include scenario information - assert "By scenario" in stats_str - assert "baseline" in stats_str - assert "high_demand" in stats_str - assert "low_price" in stats_str - - # Should include actual statistics - assert "mean" in stats_str - assert "min" in stats_str - assert "max" in stats_str - assert "std" in stats_str - assert "constant" in stats_str - - # Test with single active scenario - ts.active_scenarios = pd.Index(['baseline'], name='scenario') - single_stats_str = ts.stats - - # Should not include scenario breakdown - assert "By scenario" not in single_stats_str - assert "mean" in single_stats_str # Still has regular stats - - def test_stored_data_setter_with_scenarios(self, sample_scenario_timeseries, sample_timesteps, sample_scenario_index): - """Test stored_data setter with different scenario data types.""" - # Test with 2D array - array_data = np.array([ - [1, 2, 3, 4, 5], - [6, 7, 8, 9, 10], - [11, 12, 13, 14, 15] - ]) - sample_scenario_timeseries.stored_data = array_data - assert np.array_equal(sample_scenario_timeseries.stored_data.values, array_data) - - # Test with scalar (should broadcast to all scenarios and timesteps) - sample_scenario_timeseries.stored_data = 42 - assert np.all(sample_scenario_timeseries.stored_data.values == 42) - - # Test with another scenario DataArray - another_dataarray = xr.DataArray( - data=np.random.rand(3, 5), - coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, - dims=['scenario', 'time'] - ) - sample_scenario_timeseries.stored_data = another_dataarray - assert sample_scenario_timeseries.stored_data.equals(another_dataarray) - - # Test with MultiIndex Series - multi_idx = pd.MultiIndex.from_product( - [sample_scenario_index, sample_timesteps], - names=['scenario', 'time'] - ) - series_values = np.arange(15) # 15 = 3 scenarios * 5 timesteps - multi_series = pd.Series(series_values, index=multi_idx) - - sample_scenario_timeseries.stored_data = multi_series - assert sample_scenario_timeseries.stored_data.shape == (3, 5) - # Verify the first scenario's values - assert np.array_equal( - sample_scenario_timeseries.sel(scenario='baseline').values, - series_values[:5] - ) - - def test_from_datasource_with_scenarios(self, sample_timesteps, sample_scenario_index): - """Test from_datasource class method with scenarios.""" - # Test with 2D array - data = np.array([ - [1, 2, 3, 4, 5], - [6, 7, 8, 9, 10], - [11, 12, 13, 14, 15] - ]) - ts_array = TimeSeries.from_datasource(data, 'Array Series', sample_timesteps, scenarios=sample_scenario_index) - assert ts_array._has_scenarios - assert np.array_equal(ts_array.stored_data.values, data) - - # Test with scalar - ts_scalar = TimeSeries.from_datasource(42, 'Scalar Series', sample_timesteps, scenarios=sample_scenario_index) - assert ts_scalar._has_scenarios - assert np.all(ts_scalar.stored_data.values == 42) - - # Test with TimeSeriesData including scenarios - - #TODO: Test with TimeSeriesData including scenarios - - def test_to_json_from_json_with_scenarios(self, sample_scenario_timeseries): - """Test to_json and from_json methods with scenarios.""" - # Test to_json (dictionary only) - json_dict = sample_scenario_timeseries.to_json() - assert json_dict['name'] == sample_scenario_timeseries.name - assert 'data' in json_dict - assert 'coords' in json_dict['data'] - assert 'time' in json_dict['data']['coords'] - assert 'scenario' in json_dict['data']['coords'] - - # Test to_json with file saving - with tempfile.TemporaryDirectory() as tmpdirname: - filepath = Path(tmpdirname) / 'scenario_timeseries.json' - sample_scenario_timeseries.to_json(filepath) - assert filepath.exists() - - # Test from_json with file loading - loaded_ts = TimeSeries.from_json(path=filepath) - assert loaded_ts.name == sample_scenario_timeseries.name - assert loaded_ts._has_scenarios - assert np.array_equal(loaded_ts.stored_data.values, sample_scenario_timeseries.stored_data.values) - assert loaded_ts.active_scenarios.equals(sample_scenario_timeseries.active_scenarios) - - # Test from_json with dictionary - loaded_ts_dict = TimeSeries.from_json(data=json_dict) - assert loaded_ts_dict.name == sample_scenario_timeseries.name - assert loaded_ts_dict._has_scenarios - assert np.array_equal(loaded_ts_dict.stored_data.values, sample_scenario_timeseries.stored_data.values) - assert loaded_ts_dict.active_scenarios.equals(sample_scenario_timeseries.active_scenarios) - def test_arithmetic_with_scenarios(self, sample_scenario_timeseries, sample_timesteps, sample_scenario_index): """Test arithmetic operations with scenarios.""" # Create a second TimeSeries with scenarios @@ -934,555 +452,297 @@ def test_arithmetic_with_scenarios(self, sample_scenario_timeseries, sample_time baseline_result = result.sel(scenario='baseline').values assert np.array_equal(baseline_result, baseline_original + 1) - # Test operation with scalar - result_scalar = sample_scenario_timeseries * 2 - assert result_scalar.shape == (3, 5) - # All values should be doubled - assert np.array_equal( - result_scalar.sel(scenario='baseline').values, - baseline_original * 2 - ) - def test_repr_and_str(self, sample_scenario_timeseries): - """Test __repr__ and __str__ methods with scenarios.""" - # Test __repr__ - repr_str = repr(sample_scenario_timeseries) - assert 'scenarios' in repr_str - assert str(len(sample_scenario_timeseries.active_scenarios)) in repr_str +class TestTimeSeriesAllocator: + """Test suite for TimeSeriesAllocator class.""" - # Test __str__ - str_repr = str(sample_scenario_timeseries) - assert 'By scenario' in str_repr - # Should include the name - assert sample_scenario_timeseries.name in str_repr + def test_initialization(self, sample_timesteps): + """Test basic initialization.""" + allocator = TimeSeriesAllocator(sample_timesteps) + assert allocator.timesteps.equals(sample_timesteps) + assert len(allocator.timesteps_extra) == len(sample_timesteps) + 1 + assert isinstance(allocator.hours_per_timestep, xr.DataArray) + assert len(allocator._time_series) == 0 -class TestTimeSeriesCollectionWithScenarios: - """Test suite for TimeSeriesCollection with scenarios.""" + def test_initialization_with_custom_hours(self, sample_timesteps): + """Test initialization with custom hour settings.""" + # Test with last timestep duration + last_timestep_hours = 12 + allocator = TimeSeriesAllocator(sample_timesteps, hours_of_last_timestep=last_timestep_hours) - def test_initialization_with_scenarios(self, sample_timesteps, sample_scenario_index): - """Test initialization with scenarios.""" - collection = TimeSeriesCollection(sample_timesteps, scenarios=sample_scenario_index) + # Verify the last timestep duration + extra_step_delta = allocator.timesteps_extra[-1] - allocator.timesteps_extra[-2] + assert extra_step_delta == pd.Timedelta(hours=last_timestep_hours) - assert collection.all_timesteps.equals(sample_timesteps) - assert collection.all_scenarios.equals(sample_scenario_index) - assert len(collection) == 0 + # Test with previous timestep duration + hours_per_step = 8 + allocator2 = TimeSeriesAllocator(sample_timesteps, hours_of_previous_timesteps=hours_per_step) - def test_create_time_series_with_scenarios(self, sample_scenario_collection): - """Test creating time series with scenarios.""" - # Test scalar (broadcasts to all scenarios) - ts1 = sample_scenario_collection.create_time_series(42, 'scalar_series') - assert ts1._has_scenarios + assert allocator2.hours_of_previous_timesteps == hours_per_step + + def test_add_time_series(self, sample_allocator, sample_timesteps): + """Test adding time series.""" + # Test scalar + ts1 = sample_allocator.add_time_series('scalar_series', 42) assert ts1.name == 'scalar_series' - assert ts1.selected_data.shape == (3, 5) # 3 scenarios, 5 timesteps assert np.all(ts1.selected_data.values == 42) - # Test 1D array (broadcasts to all scenarios) + # Test numpy array data = np.array([1, 2, 3, 4, 5]) - ts2 = sample_scenario_collection.create_time_series(data, 'array_series') - assert ts2._has_scenarios - assert ts2.selected_data.shape == (3, 5) - # Each scenario should have the same values - for scenario in sample_scenario_collection.scenarios: - assert np.array_equal(ts2.sel(scenario=scenario).values, data) - - # Test 2D array (one row per scenario) - data_2d = np.array([ - [10, 20, 30, 40, 50], - [15, 25, 35, 45, 55], - [5, 15, 25, 35, 45] - ]) - ts3 = sample_scenario_collection.create_time_series(data_2d, 'scenario_specific_series') - assert ts3._has_scenarios - assert ts3.selected_data.shape == (3, 5) - # Each scenario should have its own values - assert np.array_equal(ts3.sel(scenario='baseline').values, data_2d[0]) - assert np.array_equal(ts3.sel(scenario='high_demand').values, data_2d[1]) - assert np.array_equal(ts3.sel(scenario='low_price').values, data_2d[2]) - - def test_activate_scenarios(self, sample_scenario_collection, sample_scenario_index): - """Test activating scenarios.""" - # Add some time series - sample_scenario_collection.create_time_series(42, 'scalar_series') - sample_scenario_collection.create_time_series( - np.array([ - [1, 2, 3, 4, 5], - [6, 7, 8, 9, 10], - [11, 12, 13, 14, 15] - ]), - 'array_series' - ) - - # Activate a subset of scenarios - subset_scenarios = sample_scenario_index[:2] # First two scenarios - sample_scenario_collection.activate_timesteps(active_scenarios=subset_scenarios) - - # Collection should have the subset - assert sample_scenario_collection.scenarios.equals(subset_scenarios) - - # Time series should have the subset too - assert sample_scenario_collection['scalar_series'].active_scenarios.equals(subset_scenarios) - assert sample_scenario_collection['array_series'].active_scenarios.equals(subset_scenarios) - - # Active data should reflect the subset - assert sample_scenario_collection['array_series'].selected_data.shape == (2, 5) # 2 scenarios, 5 timesteps + ts2 = sample_allocator.add_time_series('array_series', data) + assert np.array_equal(ts2.selected_data.values, data) - # Reset scenarios - sample_scenario_collection.reset() - assert sample_scenario_collection.scenarios.equals(sample_scenario_index) - assert sample_scenario_collection['scalar_series'].active_scenarios.equals(sample_scenario_index) + # Test with existing TimeSeries + existing_ts = TimeSeries.from_datasource(10, 'original_name', sample_timesteps, aggregation_weight=0.7) + ts3 = sample_allocator.add_time_series('weighted_series', existing_ts) + assert ts3.name == 'weighted_series' # Name changed + assert ts3.aggregation_weight == 0.7 # Weight preserved - def test_to_dataframe_with_scenarios(self, sample_scenario_collection): - """Test conversion to DataFrame with scenarios.""" - # Add some time series - sample_scenario_collection.create_time_series(42, 'constant_series') - sample_scenario_collection.create_time_series( - np.array([ - [10, 20, 30, 40, 50], # baseline - [15, 25, 35, 45, 55], # high_demand - [5, 15, 25, 35, 45] # low_price - ]), - 'varying_series' - ) + # Test with extra timestep + ts4 = sample_allocator.add_time_series('extra_series', 5, has_extra_timestep=True) + assert ts4.name == 'extra_series' + assert ts4.has_extra_timestep + assert len(ts4.selected_data) == len(sample_allocator.timesteps_extra) - # Convert to DataFrame - df = sample_scenario_collection.to_dataframe('all') + # Test duplicate name + with pytest.raises(KeyError, match='already exists'): + sample_allocator.add_time_series('scalar_series', 1) - # DataFrame should have MultiIndex with (scenario, time) - assert isinstance(df.index, pd.MultiIndex) - assert df.index.names == ['scenario', 'time'] + def test_access_time_series(self, sample_allocator): + """Test accessing time series.""" + # Add a few time series + sample_allocator.add_time_series('series1', 42) + sample_allocator.add_time_series('series2', np.array([1, 2, 3, 4, 5])) - # Should have correct number of rows (scenarios * timesteps) - assert len(df) == 18 # 3 scenarios * 5 timesteps (+1) + # Test __getitem__ + ts = sample_allocator['series1'] + assert ts.name == 'series1' - # Should have both series as columns - assert 'constant_series' in df.columns - assert 'varying_series' in df.columns + # Test __contains__ with string + assert 'series1' in sample_allocator + assert 'nonexistent_series' not in sample_allocator - # Check values for specific scenario and time - baseline_t0 = df.loc[('baseline', sample_scenario_collection.timesteps[0])] - assert baseline_t0['constant_series'] == 42 - assert baseline_t0['varying_series'] == 10 + # Test __contains__ with TimeSeries object + assert sample_allocator['series2'] in sample_allocator - def test_to_dataset_with_scenarios(self, sample_scenario_collection): - """Test conversion to Dataset with scenarios.""" - # Add some time series - sample_scenario_collection.create_time_series(42, 'constant_series') - sample_scenario_collection.create_time_series( - np.array([ - [10, 20, 30, 40, 50], - [15, 25, 35, 45, 55], - [5, 15, 25, 35, 45] - ]), - 'varying_series' - ) + # Test access to non-existent series + with pytest.raises(ValueError): + sample_allocator['nonexistent_series'] - # Convert to Dataset - ds = sample_scenario_collection.to_dataset() + def test_selection_propagation(self, sample_allocator, sample_timesteps): + """Test that selections propagate to TimeSeries.""" + # Add a few time series + ts1 = sample_allocator.add_time_series('series1', 42) + ts2 = sample_allocator.add_time_series('series2', np.array([1, 2, 3, 4, 5])) + ts3 = sample_allocator.add_time_series('series3', 5, has_extra_timestep=True) - # Dataset should have both dimensions - assert 'scenario' in ds.dims - assert 'time' in ds.dims + # Initially no selections + assert ts1._selected_timesteps is None + assert ts2._selected_timesteps is None + assert ts3._selected_timesteps is None - # Should have both series as variables - assert 'constant_series' in ds - assert 'varying_series' in ds + # Apply selection + subset_timesteps = sample_timesteps[1:3] + sample_allocator.set_selection(timesteps=subset_timesteps) + # Check selection propagated to regular time series + assert ts1._selected_timesteps.equals(subset_timesteps) + assert ts2._selected_timesteps.equals(subset_timesteps) - # Check values for specific scenario and time - assert ds['varying_series'].sel( - scenario='baseline', - time=sample_scenario_collection.timesteps[0] - ).item() == 10 + # Check selection with extra timestep + assert ts3._selected_timesteps is not None + assert len(ts3._selected_timesteps) == len(subset_timesteps) + 1 - def test_get_scenario_data(self, sample_scenario_collection): - """Test get_scenario_data method.""" - # Add some time series - sample_scenario_collection.create_time_series(42, 'constant_series') - sample_scenario_collection.create_time_series( - np.array([ - [10, 20, 30, 40, 50], - [15, 25, 35, 45, 55], - [5, 15, 25, 35, 45] - ]), - 'varying_series' - ) + # Clear selection + sample_allocator.clear_selection() - # Get data for one scenario - baseline_df = sample_scenario_collection.get_scenario_data('baseline') + # Check selection cleared + assert ts1._selected_timesteps is None + assert ts2._selected_timesteps is None + assert ts3._selected_timesteps is None - # Should be a DataFrame with time index - assert isinstance(baseline_df, pd.DataFrame) - assert baseline_df.index.name == 'time' - assert len(baseline_df) == 5 # 5 timesteps + def test_update_time_series(self, sample_allocator): + """Test updating a time series.""" + # Add a time series + ts = sample_allocator.add_time_series('series', 42) - # Should have both series as columns - assert 'constant_series' in baseline_df.columns - assert 'varying_series' in baseline_df.columns + # Update it + sample_allocator.update_time_series('series', np.array([1, 2, 3, 4, 5])) - # Check specific values - assert baseline_df['constant_series'].iloc[0] == 42 - assert baseline_df['varying_series'].iloc[0] == 10 + # Check update was applied + assert np.array_equal(ts.selected_data.values, np.array([1, 2, 3, 4, 5])) - # Test with invalid scenario - with pytest.raises(ValueError, match="Scenario 'invalid' not found"): - sample_scenario_collection.get_scenario_data('invalid') + # Test updating non-existent series + with pytest.raises(KeyError): + sample_allocator.update_time_series('nonexistent', 42) - def test_compare_scenarios(self, sample_scenario_collection): - """Test compare_scenarios method.""" + def test_as_dataset(self, sample_allocator): + """Test as_dataset method.""" # Add some time series - sample_scenario_collection.create_time_series( - np.array([ - [10, 20, 30, 40, 50], # baseline - [15, 25, 35, 45, 55], # high_demand - [5, 15, 25, 35, 45] # low_price - ]), - 'varying_series' - ) - - # Compare two scenarios - diff_df = sample_scenario_collection.compare_scenarios('baseline', 'high_demand') + sample_allocator.add_time_series('series1', 42) + sample_allocator.add_time_series('series2', np.array([1, 2, 3, 4, 5])) - # Should be a DataFrame with time index - assert isinstance(diff_df, pd.DataFrame) - assert diff_df.index.name == 'time' - - # Should show differences (baseline - high_demand) - assert np.array_equal(diff_df['varying_series'].values, np.array([-5, -5, -5, -5, -5])) - - # Compare with specific time series - diff_specific = sample_scenario_collection.compare_scenarios( - 'baseline', 'low_price', time_series_names=['varying_series'] - ) - - # Should only include the specified time series - assert list(diff_specific.columns) == ['varying_series'] - - # Should show correct differences (baseline - low_price) - assert np.array_equal(diff_specific['varying_series'].values, np.array([5, 5, 5, 5, 5])) - - def test_scenario_summary(self, sample_scenario_collection): - """Test scenario_summary method.""" - # Add some time series with different patterns - sample_scenario_collection.create_time_series( - np.array([ - [10, 20, 30, 40, 50], # baseline - increasing - [100, 100, 100, 100, 100], # high_demand - constant - [50, 40, 30, 20, 10] # low_price - decreasing - ]), - 'varying_series' - ) - - # Get summary - summary = sample_scenario_collection.scenario_summary() - - # Should be a DataFrame with scenario index and MultiIndex columns - assert isinstance(summary, pd.DataFrame) - assert summary.index.name == 'scenario' - assert isinstance(summary.columns, pd.MultiIndex) - - # Should include statistics for each time series and scenario - assert ('varying_series', 'mean') in summary.columns - assert ('varying_series', 'min') in summary.columns - assert ('varying_series', 'max') in summary.columns - - # Check specific statistics - # Baseline (increasing): 10,20,30,40,50 - assert summary.loc['baseline', ('varying_series', 'mean')] == 30 - assert summary.loc['baseline', ('varying_series', 'min')] == 10 - assert summary.loc['baseline', ('varying_series', 'max')] == 50 - - # high_demand (constant): 100,100,100,100,100 - assert summary.loc['high_demand', ('varying_series', 'mean')] == 100 - assert summary.loc['high_demand', ('varying_series', 'std')] == 0 - - # low_price (decreasing): 50,40,30,20,10 - assert summary.loc['low_price', ('varying_series', 'mean')] == 30 - assert summary.loc['low_price', ('varying_series', 'min')] == 10 - assert summary.loc['low_price', ('varying_series', 'max')] == 50 - - def test_insert_new_data_with_scenarios(self, sample_scenario_collection, sample_timesteps, sample_scenario_index): - """Test inserting new data with scenarios.""" - # Add some time series - sample_scenario_collection.create_time_series(42, 'constant_series') - sample_scenario_collection.create_time_series( - np.array([ - [10, 20, 30, 40, 50], - [15, 25, 35, 45, 55], - [5, 15, 25, 35, 45] - ]), - 'varying_series' - ) + # Get dataset + ds = sample_allocator.as_dataset(without_extra_timestep=True) - # Create new data with MultiIndex (scenario, time) - multi_idx = pd.MultiIndex.from_product( - [sample_scenario_index, sample_timesteps], - names=['scenario', 'time'] - ) - - new_data = pd.DataFrame( - { - 'constant_series': [100] * 15, # 3 scenarios * 5 timesteps - 'varying_series': np.arange(15) # Different value for each scenario-time combination - }, - index=multi_idx - ) - - # Insert data - sample_scenario_collection.insert_new_data(new_data) - - # Verify constant series updated - for scenario in sample_scenario_index: - assert np.all( - sample_scenario_collection['constant_series'] - .select_scenario(scenario) - .values == 100 - ) - - # Verify varying series updated with scenario-specific values - baseline_values = sample_scenario_collection['varying_series'].select_scenario('baseline').values - assert np.array_equal(baseline_values, np.arange(0, 5)) - - high_demand_values = sample_scenario_collection['varying_series'].select_scenario('high_demand').values - assert np.array_equal(high_demand_values, np.arange(5, 10)) - - low_price_values = sample_scenario_collection['varying_series'].select_scenario('low_price').values - assert np.array_equal(low_price_values, np.arange(10, 15)) - - # Test with partial data (missing some scenarios) - partial_idx = pd.MultiIndex.from_product( - [sample_scenario_index[:2], sample_timesteps], # Only first two scenarios - names=['scenario', 'time'] - ) - - partial_data = pd.DataFrame( - { - 'constant_series': [200] * 10, # 2 scenarios * 5 timesteps - 'varying_series': np.arange(100, 110) - }, - index=partial_idx - ) + # Check dataset contents + assert isinstance(ds, xr.Dataset) + assert 'series1' in ds + assert 'series2' in ds + assert np.all(ds['series1'].values == 42) + assert np.array_equal(ds['series2'].values, np.array([1, 2, 3, 4, 5])) - # Insert partial data - sample_scenario_collection.insert_new_data(partial_data) - # First two scenarios should be updated - assert np.all( - sample_scenario_collection['constant_series'] - .select_scenario('baseline') - .values == 200 - ) - - assert np.all( - sample_scenario_collection['constant_series'] - .select_scenario('high_demand') - .values == 200 - ) +class TestTimeSeriesAllocatorWithScenarios: + """Test suite for TimeSeriesAllocator with scenarios.""" - # Last scenario should remain unchanged - assert np.all( - sample_scenario_collection['constant_series'] - .select_scenario('low_price') - .values == 100 - ) + def test_initialization_with_scenarios(self, sample_timesteps, sample_scenario_index): + """Test initialization with scenarios.""" + allocator = TimeSeriesAllocator(sample_timesteps, scenarios=sample_scenario_index) - # Test with mismatched index - bad_scenarios = pd.Index(['s1', 's2', 's3'], name='scenario') - bad_idx = pd.MultiIndex.from_product( - [bad_scenarios, sample_timesteps], - names=['scenario', 'time'] - ) + assert allocator.timesteps.equals(sample_timesteps) + assert allocator.scenarios.equals(sample_scenario_index) + assert len(allocator._time_series) == 0 - bad_data = pd.DataFrame( - {'constant_series': [1] * 15}, - index=bad_idx - ) + def test_add_time_series_with_scenarios(self, sample_scenario_allocator): + """Test creating time series with scenarios.""" + # Test scalar (broadcasts to all scenarios) + ts1 = sample_scenario_allocator.add_time_series('scalar_series', 42) + assert ts1._has_scenarios + assert ts1.name == 'scalar_series' + assert ts1.selected_data.shape == (3, 5) # 3 scenarios, 5 timesteps + assert np.all(ts1.selected_data.values == 42) - with pytest.raises(ValueError, match="scenario index doesn't match"): - sample_scenario_collection.insert_new_data(bad_data) - - def test_with_scenarios_class_method(self): - """Test the with_scenarios class method.""" - collection = TimeSeriesCollection.with_scenarios( - start_time=pd.Timestamp('2023-01-01'), - periods=24, - freq='H', - scenario_names=['baseline', 'high', 'low'], - hours_per_step=1 - ) + # Test 1D array (broadcasts to all scenarios) + data = np.array([1, 2, 3, 4, 5]) + ts2 = sample_scenario_allocator.add_time_series('array_series', data) + assert ts2._has_scenarios + assert ts2.selected_data.shape == (3, 5) + # Each scenario should have the same values + for scenario in sample_scenario_allocator.scenarios: + assert np.array_equal(ts2.sel(scenario=scenario).values, data) - assert len(collection.timesteps) == 24 - assert collection.scenarios is not None - assert len(collection.scenarios) == 3 - assert list(collection.scenarios) == ['baseline', 'high', 'low'] - assert collection.hours_of_previous_timesteps == 1 - assert (collection.timesteps[1] - collection.timesteps[0]) == pd.Timedelta(hours=1) + # Test 2D array (one row per scenario) + data_2d = np.array([ + [10, 20, 30, 40, 50], + [15, 25, 35, 45, 55], + [5, 15, 25, 35, 45] + ]) + ts3 = sample_scenario_allocator.add_time_series('scenario_specific_series', data_2d) + assert ts3._has_scenarios + assert ts3.selected_data.shape == (3, 5) + # Each scenario should have its own values + assert np.array_equal(ts3.sel(scenario='baseline').values, data_2d[0]) + assert np.array_equal(ts3.sel(scenario='high_demand').values, data_2d[1]) + assert np.array_equal(ts3.sel(scenario='low_price').values, data_2d[2]) - def test_string_representation_with_scenarios(self, sample_scenario_collection): - """Test string representation with scenarios.""" + def test_selection_propagation_with_scenarios(self, sample_scenario_allocator, sample_timesteps, sample_scenario_index): + """Test scenario selection propagation.""" # Add some time series - sample_scenario_collection.create_time_series(42, 'constant_series') - - # Get string representation - str_repr = str(sample_scenario_collection) - - # Should include scenario information - assert 'scenarios' in str_repr - assert str(len(sample_scenario_collection.scenarios)) in str_repr - - # Should include time series information - assert 'constant_series' in str_repr - - def test_restore_data_with_scenarios(self, sample_scenario_collection): - """Test restoring original data with scenarios.""" + ts1 = sample_scenario_allocator.add_time_series('series1', 42) + ts2 = sample_scenario_allocator.add_time_series('series2', np.array([1, 2, 3, 4, 5])) + + # Initial state - no selections + assert ts1._selected_scenarios is None + assert ts2._selected_scenarios is None + + # Select scenarios + subset_scenarios = sample_scenario_index[:2] + sample_scenario_allocator.set_selection(scenarios=subset_scenarios) + + # Check selections propagated + assert ts1._selected_scenarios.equals(subset_scenarios) + assert ts2._selected_scenarios.equals(subset_scenarios) + + # Check data is filtered + assert ts1.selected_data.shape == (2, 5) # 2 scenarios, 5 timesteps + assert ts2.selected_data.shape == (2, 5) + + # Apply combined selection + subset_timesteps = sample_timesteps[1:3] + sample_scenario_allocator.set_selection(timesteps=subset_timesteps, scenarios=subset_scenarios) + + # Check combined selection applied + assert ts1._selected_timesteps.equals(subset_timesteps) + assert ts1._selected_scenarios.equals(subset_scenarios) + assert ts1.selected_data.shape == (2, 2) # 2 scenarios, 2 timesteps + + # Clear selections + sample_scenario_allocator.clear_selection() + assert ts1._selected_timesteps is None + assert ts1._selected_scenarios is None + assert ts1.selected_data.shape == (3, 5) # Back to full shape + + def test_as_dataset_with_scenarios(self, sample_scenario_allocator): + """Test as_dataset method with scenarios.""" # Add some time series - sample_scenario_collection.create_time_series( + sample_scenario_allocator.add_time_series('scalar_series', 42) + sample_scenario_allocator.add_time_series( + 'varying_series', np.array([ [10, 20, 30, 40, 50], [15, 25, 35, 45, 55], [5, 15, 25, 35, 45] - ]), - 'varying_series' + ]) ) - # Capture original data - original_baseline = sample_scenario_collection['varying_series'].select_scenario('baseline').values.copy() - - # Modify data - sample_scenario_collection['varying_series'].stored_data = 999 - - # Verify data was changed - assert np.all(sample_scenario_collection['varying_series'].select_scenario('baseline').values == 999) - - # Restore data - sample_scenario_collection.restore_data() + # Get dataset + ds = sample_scenario_allocator.as_dataset() - # Verify data was restored - assert np.array_equal( - sample_scenario_collection['varying_series'].select_scenario('baseline').values, - original_baseline - ) + # Check dataset dimensions + assert 'scenario' in ds.dims + assert 'time' in ds.dims + assert ds.dims['scenario'] == 3 + assert ds.dims['time'] == 5 - # Verify scenarios were preserved - assert sample_scenario_collection['varying_series']._has_scenarios - assert len(sample_scenario_collection['varying_series'].active_scenarios) == 3 + # Check dataset variables + assert 'scalar_series' in ds + assert 'varying_series' in ds + # Check values + assert np.all(ds['scalar_series'].values == 42) + baseline_values = ds['varying_series'].sel(scenario='baseline').values + assert np.array_equal(baseline_values, np.array([10, 20, 30, 40, 50])) -class TestIntegrationWithDataConverter: - """Test integration between DataConverter and TimeSeries with scenarios.""" + def test_contains_and_iteration(self, sample_scenario_allocator): + """Test __contains__ and __iter__ methods.""" + # Add some time series + ts1 = sample_scenario_allocator.add_time_series('series1', 42) + ts2 = sample_scenario_allocator.add_time_series('series2', 10) + + # Test __contains__ + assert 'series1' in sample_scenario_allocator + assert ts1 in sample_scenario_allocator + assert 'nonexistent' not in sample_scenario_allocator + + # Test behavior with invalid type + with pytest.raises(TypeError): + 42 in sample_scenario_allocator + + def test_update_time_series_with_scenarios(self, sample_scenario_allocator, sample_scenario_index): + """Test updating a time series with scenarios.""" + # Add a time series + ts = sample_scenario_allocator.add_time_series('series', 42) + assert ts._has_scenarios + assert np.all(ts.selected_data.values == 42) - def test_from_dataarray_with_scenarios(self, sample_timesteps, sample_scenario_index): - """Test creating TimeSeries from DataArray with scenarios.""" - # Create a DataArray with scenarios using DataConverter - data = np.array([ + # Update with scenario-specific data + new_data = np.array([ [1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15] ]) + sample_scenario_allocator.update_time_series('series', new_data) - da = DataConverter.as_dataarray(data, sample_timesteps, sample_scenario_index) - - # Create TimeSeries from the DataArray - ts = TimeSeries(da, name="Converted Series") - - # Verify scenarios were preserved + # Check update was applied + assert np.array_equal(ts.selected_data.values, new_data) assert ts._has_scenarios - assert ts.active_scenarios.equals(sample_scenario_index) - assert np.array_equal(ts.stored_data.values, data) - - # Test with different shapes - # Scalar should broadcast to all scenarios and timesteps - scalar_da = DataConverter.as_dataarray(42, sample_timesteps, sample_scenario_index) - scalar_ts = TimeSeries(scalar_da, name="Scalar Series") - assert scalar_ts._has_scenarios - assert scalar_ts.active_scenarios.equals(sample_scenario_index) - assert np.all(scalar_ts.stored_data.values == 42) - - # 1D array should broadcast to all scenarios - array_1d = np.array([5, 10, 15, 20, 25]) - array_da = DataConverter.as_dataarray(array_1d, sample_timesteps, sample_scenario_index) - array_ts = TimeSeries(array_da, name="Array Series") - - assert array_ts._has_scenarios - for scenario in sample_scenario_index: - assert np.array_equal(array_ts.select_scenario(scenario).values, array_1d) - - def test_multiindex_series_to_timeseries(self, sample_timesteps, sample_scenario_index, sample_multi_index): - """Test creating TimeSeries from MultiIndex Series.""" - # Create a MultiIndex Series - series_values = np.arange(15) # 3 scenarios * 5 timesteps - multi_series = pd.Series(series_values, index=sample_multi_index) - - # Convert to DataArray - da = DataConverter.as_dataarray(multi_series, sample_timesteps, sample_scenario_index) - - # Create TimeSeries - ts = TimeSeries(da, name="From MultiIndex Series") - - # Verify scenarios and data - assert ts._has_scenarios - assert ts.active_scenarios.equals(sample_scenario_index) - - # Verify the first scenario's values (first 5 values) - baseline_values = ts.select_scenario('baseline').values - assert np.array_equal(baseline_values, series_values[:5]) - - # Verify the second scenario's values (second 5 values) - high_demand_values = ts.select_scenario('high_demand').values - assert np.array_equal(high_demand_values, series_values[5:10]) - - # Verify the third scenario's values (last 5 values) - low_price_values = ts.select_scenario('low_price').values - assert np.array_equal(low_price_values, series_values[10:15]) - - def test_dataconverter_to_timeseriescollection(self, sample_timesteps, sample_scenario_index): - """Test end-to-end DataConverter to TimeSeriesCollection flow.""" - # Create a collection with scenarios - collection = TimeSeriesCollection(sample_timesteps, scenarios=sample_scenario_index) - - # 1. Test with scalar - scalar_da = DataConverter.as_dataarray(42, sample_timesteps, sample_scenario_index) - collection.add_time_series(TimeSeries(scalar_da, name="scalar_series")) - - # 2. Test with 1D array - array_1d = np.array([5, 10, 15, 20, 25]) - array_da = DataConverter.as_dataarray(array_1d, sample_timesteps, sample_scenario_index) - collection.add_time_series(TimeSeries(array_da, name="array_series")) - - # 3. Test with 2D array - array_2d = np.array([ - [1, 2, 3, 4, 5], - [6, 7, 8, 9, 10], - [11, 12, 13, 14, 15] - ]) - array_2d_da = DataConverter.as_dataarray(array_2d, sample_timesteps, sample_scenario_index) - collection.add_time_series(TimeSeries(array_2d_da, name="array_2d_series")) - - # 4. Test with MultiIndex Series - multi_idx = pd.MultiIndex.from_product( - [sample_scenario_index, sample_timesteps], - names=['scenario', 'time'] - ) - series_values = np.arange(15) - multi_series = pd.Series(series_values, index=multi_idx) - series_da = DataConverter.as_dataarray(multi_series, sample_timesteps, sample_scenario_index) - collection.add_time_series(TimeSeries(series_da, name="multi_series")) - - # Verify all series were added with scenarios - assert len(collection) == 4 - assert all(ts._has_scenarios for ts in collection) - - # Try getting scenario-specific data - baseline_df = collection.get_scenario_data('baseline') - assert len(baseline_df) == 5 # 5 timesteps - assert len(baseline_df.columns) == 4 # 4 series - - # Values should match expected values for 'baseline' scenario - assert baseline_df['scalar_series'].iloc[0] == 42 - assert baseline_df['array_series'].iloc[0] == 5 - assert baseline_df['array_2d_series'].iloc[0] == 1 - assert baseline_df['multi_series'].iloc[0] == 0 + # Check scenario-specific values + assert np.array_equal(ts.sel(scenario='baseline').values, new_data[0]) + assert np.array_equal(ts.sel(scenario='high_demand').values, new_data[1]) + assert np.array_equal(ts.sel(scenario='low_price').values, new_data[2]) if __name__ == '__main__': diff --git a/time_series_alloc.py b/time_series_alloc.py index 89fa899c4..01876873b 100644 --- a/time_series_alloc.py +++ b/time_series_alloc.py @@ -128,7 +128,7 @@ def main(): # Example 4: Add data with extra timestep forecast_values = np.random.normal(size=(len(scenarios), len(timesteps) + 1)) * 10 + 100 - forecast_da = allocator.add_data_array("forecast", forecast_values, needs_extra_timestep=True) + forecast_da = allocator.add_data_array("forecast", forecast_values, has_extra_timestep=True) print(" Added 'forecast' (with extra timestep)") print(f" Shape: {forecast_da.shape}") print(f" Last regular timestep: {timesteps[-1]}") From 1b320b02a0a438f6a38ff9878949b04fdd5b07f8 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 15:43:26 +0200 Subject: [PATCH 45/55] Update tests --- tests/test_timeseries.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index c8f18c88f..ace2fdb41 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -688,7 +688,7 @@ def test_as_dataset_with_scenarios(self, sample_scenario_allocator): ) # Get dataset - ds = sample_scenario_allocator.as_dataset() + ds = sample_scenario_allocator.as_dataset(without_extra_timestep=True) # Check dataset dimensions assert 'scenario' in ds.dims From f96a81541948fe2bb553cbdc44676b0d93c1a0ac Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 16:00:37 +0200 Subject: [PATCH 46/55] Bugfix for TImeSeriesData to work --- flixopt/core.py | 4 ++-- flixopt/flow_system.py | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/flixopt/core.py b/flixopt/core.py index fdd99c613..08af4d6a5 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -660,7 +660,7 @@ class TimeSeries: @classmethod def from_datasource( cls, - data: NumericData, + data: NumericDataTS, name: str, timesteps: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None, @@ -1041,7 +1041,7 @@ def __init__( def add_time_series( self, name: str, - data: Union[NumericData, TimeSeries], + data: Union[NumericDataTS, TimeSeries], aggregation_weight: Optional[float] = None, aggregation_group: Optional[str] = None, has_extra_timestep: bool = False, diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index 142a8850d..a26ceb32c 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -296,6 +296,15 @@ def create_time_series( return self.time_series_allocator.add_time_series( data=data.selected_data, name=name, has_extra_timestep=has_extra_timestep ) + elif isinstance(data, TimeSeriesData): + data.name = name + return self.time_series_allocator.add_time_series( + data=data.data, + name=name, + has_extra_timestep=has_extra_timestep, + aggregation_weight=data.agg_weight, + aggregation_group=data.agg_group + ) return self.time_series_allocator.add_time_series( data=data, name=name, has_extra_timestep=has_extra_timestep ) From 8c7b986e0cd718cdc4d37810a28bb92d7b044957 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 16:56:29 +0200 Subject: [PATCH 47/55] Update calculation.py --- flixopt/calculation.py | 18 ++++++++++-------- flixopt/core.py | 27 +++++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 8 deletions(-) diff --git a/flixopt/calculation.py b/flixopt/calculation.py index fe8a503bf..322dce047 100644 --- a/flixopt/calculation.py +++ b/flixopt/calculation.py @@ -217,6 +217,8 @@ def __init__( list with indices, which should be used for calculation. If None, then all timesteps are used. folder: folder where results should be saved. If None, then the current working directory is used. """ + if flow_system.time_series_allocator.scenarios is not None: + raise ValueError('Aggregation is not supported for scenarios yet. Please use FullCalculation instead.') super().__init__(name, flow_system, active_timesteps, folder=folder) self.aggregation_parameters = aggregation_parameters self.components_to_clusterize = components_to_clusterize @@ -272,9 +274,7 @@ def _perform_aggregation(self): # Aggregation - creation of aggregated timeseries: self.aggregation = Aggregation( - original_data=self.flow_system.time_series_allocator.to_dataframe( - include_extra_timestep=False - ), # Exclude last row (NaN) + original_data=self.flow_system.time_series_allocator.as_dataset(without_extra_timestep=True).to_dataframe(), hours_per_time_step=float(dt_min), hours_per_period=self.aggregation_parameters.hours_per_period, nr_of_periods=self.aggregation_parameters.nr_of_periods, @@ -286,9 +286,11 @@ def _perform_aggregation(self): self.aggregation.cluster() self.aggregation.plot(show=True, save=self.folder / 'aggregation.html') if self.aggregation_parameters.aggregate_data_and_fix_non_binary_vars: - self.flow_system.time_series_allocator.insert_new_data( - self.aggregation.aggregated_data, include_extra_timestep=False - ) + for col in self.aggregation.aggregated_data.columns: + data = self.aggregation.aggregated_data[col].values + if col in self.flow_system.time_series_allocator._has_extra_timestep: + data = np.append(data, data[-1]) + self.flow_system.time_series_allocator.update_time_series(col, data) self.durations['aggregation'] = round(timeit.default_timer() - t_start_agg, 2) @@ -327,8 +329,8 @@ def __init__( self.nr_of_previous_values = nr_of_previous_values self.sub_calculations: List[FullCalculation] = [] - self.all_timesteps = self.flow_system.time_series_allocator.all_timesteps - self.all_timesteps_extra = self.flow_system.time_series_allocator.all_timesteps_extra + self.all_timesteps = self.flow_system.time_series_allocator._full_timesteps + self.all_timesteps_extra = self.flow_system.time_series_allocator._full_timesteps_extra self.segment_names = [ f'Segment_{i + 1}' for i in range(math.ceil(len(self.all_timesteps) / self.timesteps_per_segment)) diff --git a/flixopt/core.py b/flixopt/core.py index 08af4d6a5..007c2696c 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1270,6 +1270,33 @@ def update_time_series(self, name: str, data: NumericData) -> TimeSeries: return ts + def calculate_aggregation_weights(self) -> Dict[str, float]: + """Calculate and return aggregation weights for all time series.""" + group_weights = self._calculate_group_weights() + + weights = {} + for name, ts in self._time_series.items(): + if ts.aggregation_group is not None: + # Use group weight + weights[name] = group_weights.get(ts.aggregation_group, 1) + else: + # Use individual weight or default to 1 + weights[name] = ts.aggregation_weight or 1 + + if np.all(np.isclose(list(weights.values()), 1, atol=1e-6)): + logger.info('All Aggregation weights were set to 1') + + return weights + + def _calculate_group_weights(self) -> Dict[str, float]: + """Calculate weights for aggregation groups.""" + # Count series in each group + groups = [ts.aggregation_group for ts in self._time_series.values() if ts.aggregation_group is not None] + group_counts = Counter(groups) + + # Calculate weight for each group (1/count) + return {group: 1 / count for group, count in group_counts.items()} + @staticmethod def _validate_timesteps(timesteps: pd.DatetimeIndex, present_timesteps: Optional[pd.DatetimeIndex] = None): """ From 57cf231b2da13205588d666f84bf0714fafde715 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 16:56:38 +0200 Subject: [PATCH 48/55] Bugfix --- flixopt/flow_system.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index a26ceb32c..e70180cce 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -297,7 +297,7 @@ def create_time_series( data=data.selected_data, name=name, has_extra_timestep=has_extra_timestep ) elif isinstance(data, TimeSeriesData): - data.name = name + data.label = name return self.time_series_allocator.add_time_series( data=data.data, name=name, From 58577733616ad6cbccf84f3898e2e92967987598 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:01:36 +0200 Subject: [PATCH 49/55] Improve as_dataset to improve aggregation --- flixopt/calculation.py | 4 +++- flixopt/core.py | 11 +++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/flixopt/calculation.py b/flixopt/calculation.py index 322dce047..d5665b31a 100644 --- a/flixopt/calculation.py +++ b/flixopt/calculation.py @@ -274,7 +274,9 @@ def _perform_aggregation(self): # Aggregation - creation of aggregated timeseries: self.aggregation = Aggregation( - original_data=self.flow_system.time_series_allocator.as_dataset(without_extra_timestep=True).to_dataframe(), + original_data=self.flow_system.time_series_allocator.as_dataset( + with_extra_timestep=False, with_constants=False + ).to_dataframe(), hours_per_time_step=float(dt_min), hours_per_period=self.aggregation_parameters.hours_per_period, nr_of_periods=self.aggregation_parameters.nr_of_periods, diff --git a/flixopt/core.py b/flixopt/core.py index 007c2696c..3f71ddcfc 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1157,9 +1157,14 @@ def _update_selected_timesteps(self, timesteps: Optional[pd.DatetimeIndex]) -> N timesteps, self._selected_hours_per_timestep.isel(time=-1).max().item() ) - def as_dataset(self, without_extra_timestep: bool = False) -> xr.Dataset: + def as_dataset(self, with_extra_timestep: bool = False, with_constants: bool = True) -> xr.Dataset: """ Convert the TimeSeriesAllocator to a xarray Dataset, containing the data of each TimeSeries. + + Args: + with_extra_timestep: Whether to exclude the extra timesteps. + Effectively, this removes the last timestep for certain TImeSeries, but mitigates the presence of NANs in others. + with_constants: Whether to exclude TimeSeries with a constant value from the dataset. """ if self.scenarios is None: ds = xr.Dataset(coords={'time': self.timesteps_extra}) @@ -1167,9 +1172,11 @@ def as_dataset(self, without_extra_timestep: bool = False) -> xr.Dataset: ds = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra}) for ts in self._time_series.values(): + if not with_constants and ts.all_equal: + continue ds[ts.name] = ts.selected_data - if without_extra_timestep: + if not with_extra_timestep: return ds.sel(time=self.timesteps) return ds From 0376fca58ca8a91b577f4d5db18bb8160bfe98c8 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:10:45 +0200 Subject: [PATCH 50/55] Bugfix --- flixopt/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flixopt/core.py b/flixopt/core.py index 3f71ddcfc..ba9b91bb9 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1157,7 +1157,7 @@ def _update_selected_timesteps(self, timesteps: Optional[pd.DatetimeIndex]) -> N timesteps, self._selected_hours_per_timestep.isel(time=-1).max().item() ) - def as_dataset(self, with_extra_timestep: bool = False, with_constants: bool = True) -> xr.Dataset: + def as_dataset(self, with_extra_timestep: bool = True, with_constants: bool = True) -> xr.Dataset: """ Convert the TimeSeriesAllocator to a xarray Dataset, containing the data of each TimeSeries. From 28fac88e412e2e25fc2b51e862ae0a2c85479391 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:11:02 +0200 Subject: [PATCH 51/55] Update test --- tests/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 72aa1dee1..da0dc2564 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -290,8 +290,8 @@ def flow_system_segments_of_flows_2(flow_system_complex) -> fx.FlowSystem: { 'P_el': fx.Piecewise( [ - fx.Piece(np.linspace(5, 6, len(flow_system.time_series_collection.timesteps)), 30), - fx.Piece(40, np.linspace(60, 70, len(flow_system.time_series_collection.timesteps))), + fx.Piece(np.linspace(5, 6, len(flow_system.time_series_allocator.timesteps)), 30), + fx.Piece(40, np.linspace(60, 70, len(flow_system.time_series_allocator.timesteps))), ] ), 'Q_th': fx.Piecewise([fx.Piece(6, 35), fx.Piece(45, 100)]), From 6e2d7bfc669409817c0681b03be6ce04ed8138c0 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:19:44 +0200 Subject: [PATCH 52/55] Remove test script --- time_series_alloc.py | 168 ------------------------------------------- 1 file changed, 168 deletions(-) delete mode 100644 time_series_alloc.py diff --git a/time_series_alloc.py b/time_series_alloc.py deleted file mode 100644 index 01876873b..000000000 --- a/time_series_alloc.py +++ /dev/null @@ -1,168 +0,0 @@ - -import numpy as np -import xarray as xr -import pandas as pd - -from flixopt.core import DataConverter, TimeSeriesAllocator - -class Element: - def __init__(self, name: str, data: xr.DataArray): - self.name = name - self.data = data - - -# Example script to demonstrate both classes -def main(): - print("Demonstrating DataConverter and TimeSeriesAllocator Classes") - print("=" * 70) - - # Create timesteps for our examples - start_date = pd.Timestamp('2025-01-01') - dates = [start_date + pd.Timedelta(days=i) for i in range(10)] - timesteps = pd.DatetimeIndex(dates, name='time') - - # Create scenarios for our examples - scenario_names = ['low', 'medium', 'high'] - scenarios = pd.Index(scenario_names, name='scenario') - - print(f"Created {len(timesteps)} timesteps from {timesteps[0]} to {timesteps[-1]}") - print(f"Created {len(scenarios)} scenarios: {', '.join(scenarios)}") - print("\n") - - # Part 1: Demonstrate DataConverter with different types - print("Part 1: DataConverter Examples") - print("-" * 30) - - # Example 1: Converting a scalar value - print("Example 1: Converting a scalar value (42)") - scalar_value = 42 - scalar_da = DataConverter.as_dataarray(scalar_value, timesteps) - print(f" Shape: {scalar_da.shape}, Dimensions: {scalar_da.dims}") - print(f" First few values: {scalar_da.values[:3]}") - print(f" All values are the same: {np.all(scalar_da.values == scalar_value)}") - print() - - # Example 2: Converting a 1D numpy array - print("Example 2: Converting a 1D numpy array") - array_1d = np.arange(len(timesteps)) * 10 - array_da = DataConverter.as_dataarray(array_1d, timesteps) - print(f" Shape: {array_da.shape}, Dimensions: {array_da.dims}") - print(f" First few values: {array_da.values[:3]}") - print(f" Values match input: {np.all(array_da.values == array_1d)}") - print() - - # Example 3: Converting a pandas Series with time index - print("Example 3: Converting a pandas Series with time index") - series = pd.Series(np.random.rand(len(timesteps)) * 100, index=timesteps) - series_da = DataConverter.as_dataarray(series, timesteps) - print(f" Shape: {series_da.shape}, Dimensions: {series_da.dims}") - print(f" First few values: {series_da.values[:3]}") - print(f" Values match input: {np.all(series_da.values == series.values)}") - print() - - # Example 4: Converting with scenarios - print("Example 4: Converting data with scenarios") - # Create 2D array with shape (scenarios, timesteps) - array_2d = np.random.rand(len(scenarios), len(timesteps)) * 100 - array_2d_da = DataConverter.as_dataarray(array_2d, timesteps, scenarios) - print(f" Shape: {array_2d_da.shape}, Dimensions: {array_2d_da.dims}") - print(f" Values for first scenario: {array_2d_da.sel(scenario='low').values[:3]}") - print(f" Values match input: {np.all(array_2d_da.values == array_2d)}") - print() - - # Example 5: Broadcasting a 1D array to scenarios - print("Example 5: Broadcasting a 1D array to scenarios") - broadcast_da = DataConverter.as_dataarray(array_1d, timesteps, scenarios) - print(f" Shape: {broadcast_da.shape}, Dimensions: {broadcast_da.dims}") - print(f" Original shape: {array_1d.shape}") - print(f" All scenarios have identical values: {np.all(broadcast_da.sel(scenario='low').values == broadcast_da.sel(scenario='medium').values)}") - print("\n") - - # Part 2: Demonstrate TimeSeriesAllocator - print("Part 2: TimeSeriesAllocator Examples") - print("-" * 35) - - # Create a TimeSeriesAllocator instance - print("Creating TimeSeriesAllocator with timesteps and scenarios") - allocator = TimeSeriesAllocator(timesteps, scenarios) - print(f" Regular timesteps: {len(allocator.timesteps)}") - print(f" Extended timesteps: {len(allocator.timesteps_extra)}") - print(f" Added extra timestep: {allocator.timesteps_extra[-1]}") - print(f" Hours per timestep: {allocator.hours_per_timestep.values[0]:.1f} hours") - print() - - # Add data arrays to the allocator - print("Adding data arrays to the allocator") - - # Example 1: Add a scalar value (broadcast to all timesteps and scenarios) - constant_val = 42 - constant_da = allocator.add_data_array("constant", constant_val) - print(" Added 'constant' (scalar value 42)") - print(f" Shape: {constant_da.shape}") - print(f" Values: All {constant_val}") - print() - - # Example 2: Add a 1D array (mapped to timesteps, broadcast to scenarios) - ramp_values = np.linspace(10, 100, len(timesteps)) - ramp_da = allocator.add_data_array("ramp", ramp_values) - print(" Added 'ramp' (linear values from 10 to 100)") - print(f" Shape: {ramp_da.shape}") - print(f" First few values: {ramp_da.sel(scenario='low').values[:3]}") - print() - - # Example 3: Add a 2D array (scenarios × timesteps) - demand_values = np.zeros((len(scenarios), len(timesteps))) - # Low scenario: constant demand - demand_values[0, :] = 50 - # Medium scenario: linearly increasing - demand_values[1, :] = np.linspace(50, 100, len(timesteps)) - # High scenario: exponentially increasing - demand_values[2, :] = 50 * np.exp(np.linspace(0, 1, len(timesteps))) - - demand_da = allocator.add_data_array("demand", demand_values) - print(" Added 'demand' (different profile per scenario)") - print(f" Shape: {demand_da.shape}") - for i, scenario in enumerate(scenarios): - print(f" {scenario} scenario first value: {demand_da.sel(scenario=scenario).values[0]:.1f}") - print() - - # Example 4: Add data with extra timestep - forecast_values = np.random.normal(size=(len(scenarios), len(timesteps) + 1)) * 10 + 100 - forecast_da = allocator.add_data_array("forecast", forecast_values, has_extra_timestep=True) - print(" Added 'forecast' (with extra timestep)") - print(f" Shape: {forecast_da.shape}") - print(f" Last regular timestep: {timesteps[-1]}") - print(f" Extra timestep: {allocator.timesteps_extra[-1]}") - print() - - # Demonstrate selection functionality - print("Demonstrating selection functionality") - # Select a subset of timesteps - subset_timesteps = timesteps[3:7] - print(f" Selecting timesteps from {subset_timesteps[0]} to {subset_timesteps[-1]}") - allocator.set_selection(timesteps=subset_timesteps) - - # Access data with the selection applied - demand_subset = allocator["demand"] - print(f" Original demand shape: {demand_da.shape}") - print(f" Selected demand shape: {demand_subset.shape}") - print() - - # Select a single scenario - print(" Selecting only the 'high' scenario") - allocator.set_selection(scenarios=pd.Index(['high'], name='scenario')) - demand_high = allocator["demand"] - print(f" Shape after scenario selection: {demand_high.shape}") - print() - - # Clear the selection - print(" Clearing all selections") - allocator.clear_selection() - demand_full = allocator["demand"] - print(f" Shape after clearing selection: {demand_full.shape}") - print() - - print("Examples completed successfully!") - -if __name__ == "__main__": - main() From 28b81994273b23bc991af2bc1fc4a0c1f6c65392 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:22:32 +0200 Subject: [PATCH 53/55] ruff check --- tests/test_dataconverter.py | 8 ++++++-- tests/test_timeseries.py | 4 ++-- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index 2afd2c547..579de9c00 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -3,7 +3,11 @@ import pytest import xarray as xr -from flixopt.core import ConversionError, DataConverter, TimeSeries # Adjust this import to match your project structure +from flixopt.core import ( # Adjust this import to match your project structure + ConversionError, + DataConverter, + TimeSeries, +) @pytest.fixture @@ -821,7 +825,7 @@ def test_multiindex_reindexing(self, sample_time_index): # Create values - order should match the source index values = [] - for i, scenario in enumerate(source_scenarios): + for i, _ in enumerate(source_scenarios): values.extend([i * 10 + j for j in range(1, len(sample_time_index) + 1)]) # Create Series diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index ace2fdb41..a314ab06a 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -709,7 +709,7 @@ def test_contains_and_iteration(self, sample_scenario_allocator): """Test __contains__ and __iter__ methods.""" # Add some time series ts1 = sample_scenario_allocator.add_time_series('series1', 42) - ts2 = sample_scenario_allocator.add_time_series('series2', 10) + sample_scenario_allocator.add_time_series('series2', 10) # Test __contains__ assert 'series1' in sample_scenario_allocator @@ -718,7 +718,7 @@ def test_contains_and_iteration(self, sample_scenario_allocator): # Test behavior with invalid type with pytest.raises(TypeError): - 42 in sample_scenario_allocator + assert 42 in sample_scenario_allocator def test_update_time_series_with_scenarios(self, sample_scenario_allocator, sample_scenario_index): """Test updating a time series with scenarios.""" From 02f57f0c5dbe3a585ea5b4a7ada3d6c3b10e1508 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 17:24:06 +0200 Subject: [PATCH 54/55] Revert some renaming --- flixopt/calculation.py | 26 +++++++++++++------------- flixopt/core.py | 6 +++--- flixopt/effects.py | 2 +- flixopt/elements.py | 2 +- flixopt/flow_system.py | 24 ++++++++++++------------ flixopt/results.py | 6 +++--- flixopt/structure.py | 14 +++++++------- tests/conftest.py | 4 ++-- tests/test_timeseries.py | 22 +++++++++++----------- 9 files changed, 53 insertions(+), 53 deletions(-) diff --git a/flixopt/calculation.py b/flixopt/calculation.py index d5665b31a..03cf8b9a6 100644 --- a/flixopt/calculation.py +++ b/flixopt/calculation.py @@ -119,7 +119,7 @@ def main_results(self) -> Dict[str, Union[Scalar, Dict]]: def summary(self): return { 'Name': self.name, - 'Number of timesteps': len(self.flow_system.time_series_allocator.timesteps), + 'Number of timesteps': len(self.flow_system.time_series_collection.timesteps), 'Calculation Type': self.__class__.__name__, 'Constraints': self.model.constraints.ncons, 'Variables': self.model.variables.nvars, @@ -183,7 +183,7 @@ def solve(self, solver: _Solver, log_file: Optional[pathlib.Path] = None, log_ma def _activate_time_series(self): self.flow_system.transform_data() - self.flow_system.time_series_allocator.set_selection( + self.flow_system.time_series_collection.set_selection( timesteps=self.active_timesteps ) @@ -217,7 +217,7 @@ def __init__( list with indices, which should be used for calculation. If None, then all timesteps are used. folder: folder where results should be saved. If None, then the current working directory is used. """ - if flow_system.time_series_allocator.scenarios is not None: + if flow_system.time_series_collection.scenarios is not None: raise ValueError('Aggregation is not supported for scenarios yet. Please use FullCalculation instead.') super().__init__(name, flow_system, active_timesteps, folder=folder) self.aggregation_parameters = aggregation_parameters @@ -247,8 +247,8 @@ def _perform_aggregation(self): # Validation dt_min, dt_max = ( - np.min(self.flow_system.time_series_allocator.hours_per_timestep), - np.max(self.flow_system.time_series_allocator.hours_per_timestep), + np.min(self.flow_system.time_series_collection.hours_per_timestep), + np.max(self.flow_system.time_series_collection.hours_per_timestep), ) if not dt_min == dt_max: raise ValueError( @@ -257,11 +257,11 @@ def _perform_aggregation(self): ) steps_per_period = ( self.aggregation_parameters.hours_per_period - / self.flow_system.time_series_allocator.hours_per_timestep.max() + / self.flow_system.time_series_collection.hours_per_timestep.max() ) is_integer = ( self.aggregation_parameters.hours_per_period - % self.flow_system.time_series_allocator.hours_per_timestep.max() + % self.flow_system.time_series_collection.hours_per_timestep.max() ).item() == 0 if not (steps_per_period.size == 1 and is_integer): raise ValueError( @@ -274,13 +274,13 @@ def _perform_aggregation(self): # Aggregation - creation of aggregated timeseries: self.aggregation = Aggregation( - original_data=self.flow_system.time_series_allocator.as_dataset( + original_data=self.flow_system.time_series_collection.as_dataset( with_extra_timestep=False, with_constants=False ).to_dataframe(), hours_per_time_step=float(dt_min), hours_per_period=self.aggregation_parameters.hours_per_period, nr_of_periods=self.aggregation_parameters.nr_of_periods, - weights=self.flow_system.time_series_allocator.calculate_aggregation_weights(), + weights=self.flow_system.time_series_collection.calculate_aggregation_weights(), time_series_for_high_peaks=self.aggregation_parameters.labels_for_high_peaks, time_series_for_low_peaks=self.aggregation_parameters.labels_for_low_peaks, ) @@ -290,9 +290,9 @@ def _perform_aggregation(self): if self.aggregation_parameters.aggregate_data_and_fix_non_binary_vars: for col in self.aggregation.aggregated_data.columns: data = self.aggregation.aggregated_data[col].values - if col in self.flow_system.time_series_allocator._has_extra_timestep: + if col in self.flow_system.time_series_collection._has_extra_timestep: data = np.append(data, data[-1]) - self.flow_system.time_series_allocator.update_time_series(col, data) + self.flow_system.time_series_collection.update_time_series(col, data) self.durations['aggregation'] = round(timeit.default_timer() - t_start_agg, 2) @@ -331,8 +331,8 @@ def __init__( self.nr_of_previous_values = nr_of_previous_values self.sub_calculations: List[FullCalculation] = [] - self.all_timesteps = self.flow_system.time_series_allocator._full_timesteps - self.all_timesteps_extra = self.flow_system.time_series_allocator._full_timesteps_extra + self.all_timesteps = self.flow_system.time_series_collection._full_timesteps + self.all_timesteps_extra = self.flow_system.time_series_collection._full_timesteps_extra self.segment_names = [ f'Segment_{i + 1}' for i in range(math.ceil(len(self.all_timesteps) / self.timesteps_per_segment)) diff --git a/flixopt/core.py b/flixopt/core.py index ba9b91bb9..d2a8edd59 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -1000,7 +1000,7 @@ def __str__(self): return f'TimeSeries "{self.name}":\n{textwrap.indent(self.stats, " ")}' -class TimeSeriesAllocator: +class TimeSeriesCollection: """ Simplified central manager for time series data with reference tracking. @@ -1014,7 +1014,7 @@ def __init__( hours_of_last_timestep: Optional[float] = None, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None, ): - """Initialize a TimeSeriesAllocator.""" + """Initialize a TimeSeriesCollection.""" self._validate_timesteps(timesteps) self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( timesteps, hours_of_previous_timesteps @@ -1159,7 +1159,7 @@ def _update_selected_timesteps(self, timesteps: Optional[pd.DatetimeIndex]) -> N def as_dataset(self, with_extra_timestep: bool = True, with_constants: bool = True) -> xr.Dataset: """ - Convert the TimeSeriesAllocator to a xarray Dataset, containing the data of each TimeSeries. + Convert the TimeSeriesCollection to a xarray Dataset, containing the data of each TimeSeries. Args: with_extra_timestep: Whether to exclude the extra timesteps. diff --git a/flixopt/effects.py b/flixopt/effects.py index 1b5745a0a..9b5ea41d6 100644 --- a/flixopt/effects.py +++ b/flixopt/effects.py @@ -13,7 +13,7 @@ import numpy as np import pandas as pd -from .core import NumericData, NumericDataTS, Scalar, TimeSeries, TimeSeriesAllocator +from .core import NumericData, NumericDataTS, Scalar, TimeSeries, TimeSeriesCollection from .features import ShareAllocationModel from .structure import Element, ElementModel, Interface, Model, SystemModel, register_class_for_io diff --git a/flixopt/elements.py b/flixopt/elements.py index 605554e5c..95536b910 100644 --- a/flixopt/elements.py +++ b/flixopt/elements.py @@ -10,7 +10,7 @@ import numpy as np from .config import CONFIG -from .core import NumericData, NumericDataTS, PlausibilityError, Scalar, TimeSeriesAllocator +from .core import NumericData, NumericDataTS, PlausibilityError, Scalar, TimeSeriesCollection from .effects import EffectValuesUser from .features import InvestmentModel, OnOffModel, PreventSimultaneousUsageModel from .interface import InvestParameters, OnOffParameters diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index e70180cce..e39d71e94 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -16,7 +16,7 @@ from rich.pretty import Pretty from . import io as fx_io -from .core import NumericData, NumericDataTS, TimeSeries, TimeSeriesAllocator, TimeSeriesData +from .core import NumericData, NumericDataTS, TimeSeries, TimeSeriesCollection, TimeSeriesData from .effects import Effect, EffectCollection, EffectTimeSeries, EffectValuesDict, EffectValuesUser from .elements import Bus, Component, Flow from .structure import CLASS_REGISTRY, Element, SystemModel, get_compact_representation, get_str_representation @@ -49,7 +49,7 @@ def __init__( This is needed to calculate previous durations (for example consecutive_on_hours). If you use an array, take care that its long enough to cover all previous values! """ - self.time_series_allocator = TimeSeriesAllocator( + self.time_series_collection = TimeSeriesCollection( timesteps=timesteps, scenarios=scenarios, hours_of_last_timestep=hours_of_last_timestep, @@ -67,7 +67,7 @@ def __init__( @classmethod def from_dataset(cls, ds: xr.Dataset): timesteps_extra = pd.DatetimeIndex(ds.attrs['timesteps_extra'], name='time') - hours_of_last_timestep = TimeSeriesAllocator.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() + hours_of_last_timestep = TimeSeriesCollection.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() flow_system = FlowSystem( timesteps=timesteps_extra[:-1], @@ -92,7 +92,7 @@ def from_dict(cls, data: Dict) -> 'FlowSystem': data: Dictionary containing the FlowSystem data. """ timesteps_extra = pd.DatetimeIndex(data['timesteps_extra'], name='time') - hours_of_last_timestep = TimeSeriesAllocator.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() + hours_of_last_timestep = TimeSeriesCollection.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() flow_system = FlowSystem( timesteps=timesteps_extra[:-1], @@ -171,8 +171,8 @@ def as_dict(self, data_mode: Literal['data', 'name', 'stats'] = 'data') -> Dict: effect.label: effect.to_dict() for effect in sorted(self.effects, key=lambda effect: effect.label.upper()) }, - 'timesteps_extra': [date.isoformat() for date in self.time_series_allocator.timesteps_extra], - 'hours_of_previous_timesteps': self.time_series_allocator.hours_of_previous_timesteps, + 'timesteps_extra': [date.isoformat() for date in self.time_series_collection.timesteps_extra], + 'hours_of_previous_timesteps': self.time_series_collection.hours_of_previous_timesteps, } if data_mode == 'data': return fx_io.replace_timeseries(data, 'data') @@ -187,7 +187,7 @@ def as_dataset(self, constants_in_dataset: bool = False) -> xr.Dataset: Args: constants_in_dataset: If True, constants are included as Dataset variables. """ - ds = self.time_series_allocator.as_dataset() + ds = self.time_series_collection.as_dataset() ds.attrs = self.as_dict(data_mode='name') return ds @@ -281,7 +281,7 @@ def create_time_series( has_extra_timestep: bool = False, ) -> Optional[TimeSeries]: """ - Tries to create a TimeSeries from NumericData Data and adds it to the time_series_allocator + Tries to create a TimeSeries from NumericData Data and adds it to the time_series_collection If the data already is a TimeSeries, nothing happens and the TimeSeries gets reset and returned If the data is a TimeSeriesData, it is converted to a TimeSeries, and the aggregation weights are applied. If the data is None, nothing happens. @@ -291,21 +291,21 @@ def create_time_series( return None elif isinstance(data, TimeSeries): data.restore_data() - if data in self.time_series_allocator: + if data in self.time_series_collection: return data - return self.time_series_allocator.add_time_series( + return self.time_series_collection.add_time_series( data=data.selected_data, name=name, has_extra_timestep=has_extra_timestep ) elif isinstance(data, TimeSeriesData): data.label = name - return self.time_series_allocator.add_time_series( + return self.time_series_collection.add_time_series( data=data.data, name=name, has_extra_timestep=has_extra_timestep, aggregation_weight=data.agg_weight, aggregation_group=data.agg_group ) - return self.time_series_allocator.add_time_series( + return self.time_series_collection.add_time_series( data=data, name=name, has_extra_timestep=has_extra_timestep ) diff --git a/flixopt/results.py b/flixopt/results.py index 90a86d1b2..d9eb5a654 100644 --- a/flixopt/results.py +++ b/flixopt/results.py @@ -14,7 +14,7 @@ from . import io as fx_io from . import plotting -from .core import TimeSeriesAllocator +from .core import TimeSeriesCollection if TYPE_CHECKING: import pyvis @@ -160,7 +160,7 @@ def __init__( } self.timesteps_extra = self.solution.indexes['time'] - self.hours_per_timestep = TimeSeriesAllocator.calculate_hours_per_timestep(self.timesteps_extra) + self.hours_per_timestep = TimeSeriesCollection.calculate_hours_per_timestep(self.timesteps_extra) def __getitem__(self, key: str) -> Union['ComponentResults', 'BusResults', 'EffectResults']: if key in self.components: @@ -684,7 +684,7 @@ def __init__( self.overlap_timesteps = overlap_timesteps self.name = name self.folder = pathlib.Path(folder) if folder is not None else pathlib.Path.cwd() / 'results' - self.hours_per_timestep = TimeSeriesAllocator.calculate_hours_per_timestep(self.all_timesteps) + self.hours_per_timestep = TimeSeriesCollection.calculate_hours_per_timestep(self.all_timesteps) @property def meta_data(self) -> Dict[str, Union[int, List[str]]]: diff --git a/flixopt/structure.py b/flixopt/structure.py index adabdfb80..2e136c652 100644 --- a/flixopt/structure.py +++ b/flixopt/structure.py @@ -19,7 +19,7 @@ from rich.pretty import Pretty from .config import CONFIG -from .core import NumericData, Scalar, TimeSeries, TimeSeriesAllocator, TimeSeriesData +from .core import NumericData, Scalar, TimeSeries, TimeSeriesCollection, TimeSeriesData if TYPE_CHECKING: # for type checking and preventing circular imports from .effects import EffectCollectionModel @@ -56,7 +56,7 @@ def __init__(self, flow_system: 'FlowSystem'): """ super().__init__(force_dim_names=True) self.flow_system = flow_system - self.time_series_allocator = flow_system.time_series_allocator + self.time_series_collection = flow_system.time_series_collection self.effects: Optional[EffectCollectionModel] = None def do_modeling(self): @@ -88,23 +88,23 @@ def solution(self): for effect in sorted(self.flow_system.effects, key=lambda effect: effect.label_full.upper()) }, } - return solution.reindex(time=self.time_series_allocator.timesteps_extra) + return solution.reindex(time=self.time_series_collection.timesteps_extra) @property def hours_per_step(self): - return self.time_series_allocator.hours_per_timestep + return self.time_series_collection.hours_per_timestep @property def hours_of_previous_timesteps(self): - return self.time_series_allocator.hours_of_previous_timesteps + return self.time_series_collection.hours_of_previous_timesteps @property def coords(self) -> Tuple[pd.DatetimeIndex]: - return (self.time_series_allocator.timesteps,) + return (self.time_series_collection.timesteps,) @property def coords_extra(self) -> Tuple[pd.DatetimeIndex]: - return (self.time_series_allocator.timesteps_extra,) + return (self.time_series_collection.timesteps_extra,) class Interface: diff --git a/tests/conftest.py b/tests/conftest.py index da0dc2564..72aa1dee1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -290,8 +290,8 @@ def flow_system_segments_of_flows_2(flow_system_complex) -> fx.FlowSystem: { 'P_el': fx.Piecewise( [ - fx.Piece(np.linspace(5, 6, len(flow_system.time_series_allocator.timesteps)), 30), - fx.Piece(40, np.linspace(60, 70, len(flow_system.time_series_allocator.timesteps))), + fx.Piece(np.linspace(5, 6, len(flow_system.time_series_collection.timesteps)), 30), + fx.Piece(40, np.linspace(60, 70, len(flow_system.time_series_collection.timesteps))), ] ), 'Q_th': fx.Piecewise([fx.Piece(6, 35), fx.Piece(45, 100)]), diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index a314ab06a..9ba9d1847 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -8,7 +8,7 @@ import pytest import xarray as xr -from flixopt.core import ConversionError, DataConverter, TimeSeries, TimeSeriesAllocator +from flixopt.core import ConversionError, DataConverter, TimeSeries, TimeSeriesCollection @pytest.fixture @@ -331,14 +331,14 @@ def sample_scenario_timeseries(simple_scenario_dataarray): @pytest.fixture def sample_allocator(sample_timesteps): - """Create a sample TimeSeriesAllocator.""" - return TimeSeriesAllocator(sample_timesteps) + """Create a sample TimeSeriesCollection.""" + return TimeSeriesCollection(sample_timesteps) @pytest.fixture def sample_scenario_allocator(sample_timesteps, sample_scenario_index): - """Create a sample TimeSeriesAllocator with scenarios.""" - return TimeSeriesAllocator(sample_timesteps, scenarios=sample_scenario_index) + """Create a sample TimeSeriesCollection with scenarios.""" + return TimeSeriesCollection(sample_timesteps, scenarios=sample_scenario_index) class TestTimeSeriesWithScenarios: @@ -454,11 +454,11 @@ def test_arithmetic_with_scenarios(self, sample_scenario_timeseries, sample_time class TestTimeSeriesAllocator: - """Test suite for TimeSeriesAllocator class.""" + """Test suite for TimeSeriesCollection class.""" def test_initialization(self, sample_timesteps): """Test basic initialization.""" - allocator = TimeSeriesAllocator(sample_timesteps) + allocator = TimeSeriesCollection(sample_timesteps) assert allocator.timesteps.equals(sample_timesteps) assert len(allocator.timesteps_extra) == len(sample_timesteps) + 1 @@ -469,7 +469,7 @@ def test_initialization_with_custom_hours(self, sample_timesteps): """Test initialization with custom hour settings.""" # Test with last timestep duration last_timestep_hours = 12 - allocator = TimeSeriesAllocator(sample_timesteps, hours_of_last_timestep=last_timestep_hours) + allocator = TimeSeriesCollection(sample_timesteps, hours_of_last_timestep=last_timestep_hours) # Verify the last timestep duration extra_step_delta = allocator.timesteps_extra[-1] - allocator.timesteps_extra[-2] @@ -477,7 +477,7 @@ def test_initialization_with_custom_hours(self, sample_timesteps): # Test with previous timestep duration hours_per_step = 8 - allocator2 = TimeSeriesAllocator(sample_timesteps, hours_of_previous_timesteps=hours_per_step) + allocator2 = TimeSeriesCollection(sample_timesteps, hours_of_previous_timesteps=hours_per_step) assert allocator2.hours_of_previous_timesteps == hours_per_step @@ -595,11 +595,11 @@ def test_as_dataset(self, sample_allocator): class TestTimeSeriesAllocatorWithScenarios: - """Test suite for TimeSeriesAllocator with scenarios.""" + """Test suite for TimeSeriesCollection with scenarios.""" def test_initialization_with_scenarios(self, sample_timesteps, sample_scenario_index): """Test initialization with scenarios.""" - allocator = TimeSeriesAllocator(sample_timesteps, scenarios=sample_scenario_index) + allocator = TimeSeriesCollection(sample_timesteps, scenarios=sample_scenario_index) assert allocator.timesteps.equals(sample_timesteps) assert allocator.scenarios.equals(sample_scenario_index) From 9c66de523f29398282904e7bc2dae37e0eac36bc Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Mon, 31 Mar 2025 18:29:09 +0200 Subject: [PATCH 55/55] Bugfix in test --- tests/test_timeseries.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index 9ba9d1847..50136536b 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -584,7 +584,7 @@ def test_as_dataset(self, sample_allocator): sample_allocator.add_time_series('series2', np.array([1, 2, 3, 4, 5])) # Get dataset - ds = sample_allocator.as_dataset(without_extra_timestep=True) + ds = sample_allocator.as_dataset(with_extra_timestep=False) # Check dataset contents assert isinstance(ds, xr.Dataset) @@ -688,7 +688,7 @@ def test_as_dataset_with_scenarios(self, sample_scenario_allocator): ) # Get dataset - ds = sample_scenario_allocator.as_dataset(without_extra_timestep=True) + ds = sample_scenario_allocator.as_dataset(with_extra_timestep=False) # Check dataset dimensions assert 'scenario' in ds.dims