diff --git a/docs/user-guide/Mathematical Notation/Investment.md b/docs/user-guide/Mathematical Notation/Investment.md new file mode 100644 index 000000000..64467261d --- /dev/null +++ b/docs/user-guide/Mathematical Notation/Investment.md @@ -0,0 +1,115 @@ +# Investments + +## Current state +$$ +\beta_{\text{invest}} \cdot \text{max}(\epsilon, \text V^{\text L}) \leq V \leq \beta_{\text{invest}} \cdot \text V^{\text U} +$$ +With: +- $V$ = size +- $V^{\text L}$ = minimum size +- $V^{\text U}$ = maximum size +- $\epsilon$ = epsilon, a small number (such as $1e^{-5}$) +- $\beta_{invest} \in {0,1}$ = wether the size is invested or not + +_Please edit the use cases as needed_ +## Quickfix 1: Optimize the single best size overall +### Single variable +This is already possible and should be, as this is a needed use case +An additional factor to when the size is actually available might me practical (Which indicates the (fixed) time of investment) +## Math +$$ +V(p) = V * a(p) +$$ +with: +- $V$ = size +- $a(p)$ = factor for availlability per period + +Factor $a(p)$ is simply multiplied with relative minimum or maximum(t). This is already possible by doing this yourself. +Effectively, the relative minimum or maximum are altered before using the same constraiints as before. +THis might lead to some issues regariding minimum_load factor, or others, as the size is not 0 in a scenario where the component cant produce. +**Therefore this might not be the best choice. See (#Variable per Scenario) + +## Variable per Scenario +- **size** and **invest** as a variable per period $V(s)$ and $\beta_{invest}(s)$ +- with scenario $s \in S$ + +### Usecase 1: Optimize the size for each Scenario independently +Restrictions are seperatly for each scenario +No changes needed. This could be the default behaviour. + +### Usecase 2: Optimize ONE size for ALL scenarios +The size is the same globally, but not a scalar, but a variable per scenario $V(s)$ +#### 2a: The same size in all scenarios +$$ +V(s) = V(s') \quad \forall s,s' \in S +$$ + +With: +- $V(s)$ and $V(s')$ = size +- $S$ = set of scenarios + +#### 2b: The same size, but can be 0 prior to the first increment +- Find the Optimal time of investment. +- Force an investment in a certain scenario (parameter optional as a list/array ob booleans) +- Combine optional and minimum/maximum size to force an investment inside a range if scenarios + +$$ +\beta_{\text{invest}}(s) \leq \beta_{\text{invest}}(s+1) \quad \forall s \in \{1,2,\ldots,S-1\} +$$ + +$$ +V(s') - V(s) \leq M \cdot (2 - \beta_{\text{invest}}(s) - \beta_{\text{invest}}(s')) \quad \forall s, s' \in S +$$ +$$ +V(s') - V(s) \geq M \cdot (2 - \beta_{\text{invest}}(s) - \beta_{\text{invest}}(s')) \quad \forall s, s' \in S +$$ + +This could be the default behaviour. (which would be consistent with other variables) + + +### Switch + +$$ +\begin{aligned} +& \text{SWITCH}_s \in \{0,1\} \quad \forall s \in \{1,2,\ldots,S\} \\ +& \sum_{s=1}^{S} \text{SWITCH}_s = 1 \\ +& \beta_{\text{invest}}(s) = \sum_{s'=1}^{s} \text{SWITCH}_{s'} \quad \forall s \in \{1,2,\ldots,S\} \\ +\end{aligned} +$$ + +$$ +\begin{aligned} +& V(s) \leq V_{\text{actual}} \quad \forall s \in \{1,2,\ldots,S\} \\ +& V(s) \geq V_{\text{actual}} - M \cdot (1 - \beta_{\text{invest}}(s)) \quad \forall s \in \{1,2,\ldots,S\} +\end{aligned} +$$ + + + + +### Usecase 3: Find the best scenario to increment the size (Timing of the investment) +The size can only increment once (based on a starting point). This allows to optimize the timing of an investment. +#### Math +Treat $\beta_{invest}$ like an ON/OFF variable, and introduce a SwitchOn, that can only be active once. + +*Thoughts:* +- Treating $\beta_{invest}$ like an ON/OFF variable suggest using the already presentconstraints linked to On/OffModel +- The timing could be constraint to be first in scenario x, or last in scenario y +- Restrict the number of consecutive scenarios +THis might needs the OnOffModel to be more generic (HOURS). Further, the span between scenarios needs to be weighted (like dt_in_hours), or the scenarios need to be measureable (integers) + + +### Others + +#### Usecase 4: Only increase/decrease the size +Start from a certain size. For each scenario, the size can increase, but never decrease. (Or the other way around). +This would mean that a size expansion is possible, + +#### Usecase 5: Restrict the increment in size per scenario +Restrict how much the size can increase/decrease for in scenario, based on the prior scenario. + + + + + +Many more are possible diff --git a/examples/04_Scenarios/scenario_example.py b/examples/04_Scenarios/scenario_example.py new file mode 100644 index 000000000..03c2a5be0 --- /dev/null +++ b/examples/04_Scenarios/scenario_example.py @@ -0,0 +1,123 @@ +""" +This script shows how to use the flixopt framework to model a simple energy system. +""" + +import numpy as np +import pandas as pd +from rich.pretty import pprint # Used for pretty printing + +import flixopt as fx + +if __name__ == '__main__': + # Create datetime array starting from '2020-01-01' for the given time period + timesteps = pd.date_range('2020-01-01', periods=9, freq='h') + scenarios = pd.Index(['Base Case', 'High Demand']) + + # --- Create Time Series Data --- + # Heat demand profile (e.g., kW) over time and corresponding power prices + heat_demand_per_h = pd.DataFrame({'Base Case':[30, 0, 90, 110, 110, 20, 20, 20, 20], + 'High Demand':[30, 0, 100, 118, 125, 20, 20, 20, 20]}, index=timesteps) + power_prices = np.array([0.08, 0.09]) + + flow_system = fx.FlowSystem(timesteps=timesteps, scenarios=scenarios) + + # --- Define Energy Buses --- + # These represent nodes, where the used medias are balanced (electricity, heat, and gas) + flow_system.add_elements(fx.Bus(label='Strom'), fx.Bus(label='Fernwärme'), fx.Bus(label='Gas')) + + # --- Define Effects (Objective and CO2 Emissions) --- + # Cost effect: used as the optimization objective --> minimizing costs + costs = fx.Effect( + label='costs', + unit='€', + description='Kosten', + is_standard=True, # standard effect: no explicit value needed for costs + is_objective=True, # Minimizing costs as the optimization objective + ) + + # CO2 emissions effect with an associated cost impact + CO2 = fx.Effect( + label='CO2', + unit='kg', + description='CO2_e-Emissionen', + specific_share_to_other_effects_operation={costs.label: 0.2}, + maximum_operation_per_hour=1000, # Max CO2 emissions per hour + ) + + # --- Define Flow System Components --- + # Boiler: Converts fuel (gas) into thermal energy (heat) + boiler = fx.linear_converters.Boiler( + label='Boiler', + eta=0.5, + Q_th=fx.Flow(label='Q_th', bus='Fernwärme', size=50, relative_minimum=0.1, relative_maximum=1), + Q_fu=fx.Flow(label='Q_fu', bus='Gas'), + ) + + # Combined Heat and Power (CHP): Generates both electricity and heat from fuel + chp = fx.linear_converters.CHP( + label='CHP', + eta_th=0.5, + eta_el=0.4, + P_el=fx.Flow('P_el', bus='Strom', size=60, relative_minimum=5 / 60), + Q_th=fx.Flow('Q_th', bus='Fernwärme'), + Q_fu=fx.Flow('Q_fu', bus='Gas'), + ) + + # Storage: Energy storage system with charging and discharging capabilities + storage = fx.Storage( + label='Storage', + charging=fx.Flow('Q_th_load', bus='Fernwärme', size=1000), + discharging=fx.Flow('Q_th_unload', bus='Fernwärme', size=1000), + capacity_in_flow_hours=fx.InvestParameters(fix_effects=20, fixed_size=30, optional=False), + initial_charge_state=0, # Initial storage state: empty + relative_maximum_charge_state=np.array([80, 70, 80, 80, 80, 80, 80, 80, 80, 80]) * 0.01, + eta_charge=0.9, + eta_discharge=1, # Efficiency factors for charging/discharging + relative_loss_per_hour=0.08, # 8% loss per hour. Absolute loss depends on current charge state + prevent_simultaneous_charge_and_discharge=True, # Prevent charging and discharging at the same time + ) + + # Heat Demand Sink: Represents a fixed heat demand profile + heat_sink = fx.Sink( + label='Heat Demand', + sink=fx.Flow(label='Q_th_Last', bus='Fernwärme', size=1, fixed_relative_profile=heat_demand_per_h), + ) + + # Gas Source: Gas tariff source with associated costs and CO2 emissions + gas_source = fx.Source( + label='Gastarif', + source=fx.Flow(label='Q_Gas', bus='Gas', size=1000, effects_per_flow_hour={costs.label: 0.04, CO2.label: 0.3}), + ) + + # Power Sink: Represents the export of electricity to the grid + power_sink = fx.Sink( + label='Einspeisung', sink=fx.Flow(label='P_el', bus='Strom', effects_per_flow_hour=-1 * power_prices) + ) + + # --- Build the Flow System --- + # Add all defined components and effects to the flow system + flow_system.add_elements(costs, CO2, boiler, storage, chp, heat_sink, gas_source, power_sink) + + # Visualize the flow system for validation purposes + flow_system.plot_network(show=True) + + # --- Define and Run Calculation --- + # Create a calculation object to model the Flow System + calculation = fx.FullCalculation(name='Sim1', flow_system=flow_system) + calculation.do_modeling() # Translate the model to a solvable form, creating equations and Variables + + # --- Solve the Calculation and Save Results --- + calculation.solve(fx.solvers.HighsSolver(mip_gap=0, time_limit_seconds=30)) + + # --- Analyze Results --- + calculation.results['Fernwärme'].plot_node_balance_pie() + calculation.results['Fernwärme'].plot_node_balance() + calculation.results['Storage'].plot_node_balance() + calculation.results.plot_heatmap('CHP(Q_th)|flow_rate') + + # Convert the results for the storage component to a dataframe and display + df = calculation.results['Storage'].node_balance_with_charge_state() + print(df) + + # Save results to file for later usage + calculation.results.to_file() diff --git a/flixopt/calculation.py b/flixopt/calculation.py index c7367cad2..2024739ea 100644 --- a/flixopt/calculation.py +++ b/flixopt/calculation.py @@ -44,19 +44,22 @@ def __init__( name: str, flow_system: FlowSystem, active_timesteps: Optional[pd.DatetimeIndex] = None, + selected_scenarios: Optional[pd.Index] = None, folder: Optional[pathlib.Path] = None, ): """ Args: name: name of calculation flow_system: flow_system which should be calculated - active_timesteps: list with indices, which should be used for calculation. If None, then all timesteps are used. + active_timesteps: timesteps which should be used for calculation. If None, then all timesteps are used. + selected_scenarios: scenarios which should be used for calculation. If None, then all scenarios are used. folder: folder where results should be saved. If None, then the current working directory is used. """ self.name = name self.flow_system = flow_system self.model: Optional[SystemModel] = None self.active_timesteps = active_timesteps + self.selected_scenarios = selected_scenarios self.durations = {'modeling': 0.0, 'solving': 0.0, 'saving': 0.0} self.folder = pathlib.Path.cwd() / 'results' if folder is None else pathlib.Path(folder) @@ -74,47 +77,49 @@ def __init__( def main_results(self) -> Dict[str, Union[Scalar, Dict]]: from flixopt.features import InvestmentModel - return { + main_results = { 'Objective': self.model.objective.value, - 'Penalty': float(self.model.effects.penalty.total.solution.values), + 'Penalty': self.model.effects.penalty.total.solution.values, 'Effects': { f'{effect.label} [{effect.unit}]': { - 'operation': float(effect.model.operation.total.solution.values), - 'invest': float(effect.model.invest.total.solution.values), - 'total': float(effect.model.total.solution.values), + 'operation': effect.model.operation.total.solution.values, + 'invest': effect.model.invest.total.solution.values, + 'total': effect.model.total.solution.values, } for effect in self.flow_system.effects }, 'Invest-Decisions': { 'Invested': { - model.label_of_element: float(model.size.solution) + model.label_of_element: model.size.solution for component in self.flow_system.components.values() for model in component.model.all_sub_models - if isinstance(model, InvestmentModel) and float(model.size.solution) >= CONFIG.modeling.EPSILON + if isinstance(model, InvestmentModel) and model.size.solution.max() >= CONFIG.modeling.EPSILON }, 'Not invested': { - model.label_of_element: float(model.size.solution) + model.label_of_element: model.size.solution for component in self.flow_system.components.values() for model in component.model.all_sub_models - if isinstance(model, InvestmentModel) and float(model.size.solution) < CONFIG.modeling.EPSILON + if isinstance(model, InvestmentModel) and model.size.solution.max() < CONFIG.modeling.EPSILON }, }, 'Buses with excess': [ { bus.label_full: { - 'input': float(np.sum(bus.model.excess_input.solution.values)), - 'output': float(np.sum(bus.model.excess_output.solution.values)), + 'input': bus.model.excess_input.solution.sum('time'), + 'output': bus.model.excess_output.solution.sum('time'), } } for bus in self.flow_system.buses.values() if bus.with_excess and ( - float(np.sum(bus.model.excess_input.solution.values)) > 1e-3 - or float(np.sum(bus.model.excess_output.solution.values)) > 1e-3 + bus.model.excess_input.solution.sum() > 1e-3 + or bus.model.excess_output.solution.sum() > 1e-3 ) ], } + return utils.round_floats(main_results) + @property def summary(self): return { @@ -183,8 +188,8 @@ def solve(self, solver: _Solver, log_file: Optional[pathlib.Path] = None, log_ma def _activate_time_series(self): self.flow_system.transform_data() - self.flow_system.time_series_collection.activate_timesteps( - active_timesteps=self.active_timesteps, + self.flow_system.time_series_collection.set_selection( + timesteps=self.active_timesteps, scenarios=self.selected_scenarios ) @@ -217,6 +222,8 @@ def __init__( list with indices, which should be used for calculation. If None, then all timesteps are used. folder: folder where results should be saved. If None, then the current working directory is used. """ + if flow_system.time_series_collection.scenarios is not None: + raise ValueError('Aggregation is not supported for scenarios yet. Please use FullCalculation instead.') super().__init__(name, flow_system, active_timesteps, folder=folder) self.aggregation_parameters = aggregation_parameters self.components_to_clusterize = components_to_clusterize @@ -272,9 +279,9 @@ def _perform_aggregation(self): # Aggregation - creation of aggregated timeseries: self.aggregation = Aggregation( - original_data=self.flow_system.time_series_collection.to_dataframe( - include_extra_timestep=False - ), # Exclude last row (NaN) + original_data=self.flow_system.time_series_collection.as_dataset( + with_extra_timestep=False, with_constants=False + ).to_dataframe(), hours_per_time_step=float(dt_min), hours_per_period=self.aggregation_parameters.hours_per_period, nr_of_periods=self.aggregation_parameters.nr_of_periods, @@ -286,9 +293,11 @@ def _perform_aggregation(self): self.aggregation.cluster() self.aggregation.plot(show=True, save=self.folder / 'aggregation.html') if self.aggregation_parameters.aggregate_data_and_fix_non_binary_vars: - self.flow_system.time_series_collection.insert_new_data( - self.aggregation.aggregated_data, include_extra_timestep=False - ) + for col in self.aggregation.aggregated_data.columns: + data = self.aggregation.aggregated_data[col].values + if col in self.flow_system.time_series_collection._has_extra_timestep: + data = np.append(data, data[-1]) + self.flow_system.time_series_collection.update_time_series(col, data) self.durations['aggregation'] = round(timeit.default_timer() - t_start_agg, 2) @@ -327,8 +336,8 @@ def __init__( self.nr_of_previous_values = nr_of_previous_values self.sub_calculations: List[FullCalculation] = [] - self.all_timesteps = self.flow_system.time_series_collection.all_timesteps - self.all_timesteps_extra = self.flow_system.time_series_collection.all_timesteps_extra + self.all_timesteps = self.flow_system.time_series_collection._full_timesteps + self.all_timesteps_extra = self.flow_system.time_series_collection._full_timesteps_extra self.segment_names = [ f'Segment_{i + 1}' for i in range(math.ceil(len(self.all_timesteps) / self.timesteps_per_segment)) diff --git a/flixopt/components.py b/flixopt/components.py index d5d1df12d..598ff06ab 100644 --- a/flixopt/components.py +++ b/flixopt/components.py @@ -9,7 +9,7 @@ import numpy as np from . import utils -from .core import NumericData, NumericDataTS, PlausibilityError, Scalar, TimeSeries +from .core import PlausibilityError, Scalar, ScenarioData, TimeSeries, TimestepData from .elements import Component, ComponentModel, Flow from .features import InvestmentModel, OnOffModel, PiecewiseModel from .interface import InvestParameters, OnOffParameters, PiecewiseConversion @@ -34,7 +34,7 @@ def __init__( inputs: List[Flow], outputs: List[Flow], on_off_parameters: OnOffParameters = None, - conversion_factors: List[Dict[str, NumericDataTS]] = None, + conversion_factors: List[Dict[str, TimestepData]] = None, piecewise_conversion: Optional[PiecewiseConversion] = None, meta_data: Optional[Dict] = None, ): @@ -92,6 +92,7 @@ def transform_data(self, flow_system: 'FlowSystem'): if self.conversion_factors: self.conversion_factors = self._transform_conversion_factors(flow_system) if self.piecewise_conversion: + self.piecewise_conversion.has_time_dim = True self.piecewise_conversion.transform_data(flow_system, f'{self.label_full}|PiecewiseConversion') def _transform_conversion_factors(self, flow_system: 'FlowSystem') -> List[Dict[str, TimeSeries]]: @@ -124,14 +125,14 @@ def __init__( charging: Flow, discharging: Flow, capacity_in_flow_hours: Union[Scalar, InvestParameters], - relative_minimum_charge_state: NumericData = 0, - relative_maximum_charge_state: NumericData = 1, - initial_charge_state: Union[Scalar, Literal['lastValueOfSim']] = 0, - minimal_final_charge_state: Optional[Scalar] = None, - maximal_final_charge_state: Optional[Scalar] = None, - eta_charge: NumericData = 1, - eta_discharge: NumericData = 1, - relative_loss_per_hour: NumericData = 0, + relative_minimum_charge_state: TimestepData = 0, + relative_maximum_charge_state: TimestepData = 1, + initial_charge_state: Union[ScenarioData, Literal['lastValueOfSim']] = 0, + minimal_final_charge_state: Optional[ScenarioData] = None, + maximal_final_charge_state: Optional[ScenarioData] = None, + eta_charge: TimestepData = 1, + eta_discharge: TimestepData = 1, + relative_loss_per_hour: TimestepData = 0, prevent_simultaneous_charge_and_discharge: bool = True, meta_data: Optional[Dict] = None, ): @@ -172,16 +173,16 @@ def __init__( self.charging = charging self.discharging = discharging self.capacity_in_flow_hours = capacity_in_flow_hours - self.relative_minimum_charge_state: NumericDataTS = relative_minimum_charge_state - self.relative_maximum_charge_state: NumericDataTS = relative_maximum_charge_state + self.relative_minimum_charge_state: TimestepData = relative_minimum_charge_state + self.relative_maximum_charge_state: TimestepData = relative_maximum_charge_state self.initial_charge_state = initial_charge_state self.minimal_final_charge_state = minimal_final_charge_state self.maximal_final_charge_state = maximal_final_charge_state - self.eta_charge: NumericDataTS = eta_charge - self.eta_discharge: NumericDataTS = eta_discharge - self.relative_loss_per_hour: NumericDataTS = relative_loss_per_hour + self.eta_charge: TimestepData = eta_charge + self.eta_discharge: TimestepData = eta_discharge + self.relative_loss_per_hour: TimestepData = relative_loss_per_hour self.prevent_simultaneous_charge_and_discharge = prevent_simultaneous_charge_and_discharge def create_model(self, model: SystemModel) -> 'StorageModel': @@ -194,26 +195,38 @@ def transform_data(self, flow_system: 'FlowSystem') -> None: self.relative_minimum_charge_state = flow_system.create_time_series( f'{self.label_full}|relative_minimum_charge_state', self.relative_minimum_charge_state, - needs_extra_timestep=True, + has_extra_timestep=True, ) self.relative_maximum_charge_state = flow_system.create_time_series( f'{self.label_full}|relative_maximum_charge_state', self.relative_maximum_charge_state, - needs_extra_timestep=True, + has_extra_timestep=True, ) self.eta_charge = flow_system.create_time_series(f'{self.label_full}|eta_charge', self.eta_charge) self.eta_discharge = flow_system.create_time_series(f'{self.label_full}|eta_discharge', self.eta_discharge) self.relative_loss_per_hour = flow_system.create_time_series( f'{self.label_full}|relative_loss_per_hour', self.relative_loss_per_hour ) + if self.initial_charge_state != 'lastValueOfSim': + self.initial_charge_state = flow_system.create_time_series( + f'{self.label_full}|initial_charge_state', self.initial_charge_state, has_time_dim=False + ) + self.minimal_final_charge_state = flow_system.create_time_series( + f'{self.label_full}|minimal_final_charge_state', self.minimal_final_charge_state, has_time_dim=False + ) + self.maximal_final_charge_state = flow_system.create_time_series( + f'{self.label_full}|maximal_final_charge_state', self.maximal_final_charge_state, has_time_dim=False + ) if isinstance(self.capacity_in_flow_hours, InvestParameters): - self.capacity_in_flow_hours.transform_data(flow_system) + self.capacity_in_flow_hours.transform_data(flow_system, f'{self.label_full}|InvestParameters') def _plausibility_checks(self) -> None: """ Check for infeasible or uncommon combinations of parameters """ - if utils.is_number(self.initial_charge_state): + if isinstance(self.initial_charge_state, str) and not self.initial_charge_state == 'lastValueOfSim': + raise PlausibilityError(f'initial_charge_state has undefined value: {self.initial_charge_state}') + else: if isinstance(self.capacity_in_flow_hours, InvestParameters): if self.capacity_in_flow_hours.fixed_size is None: maximum_capacity = self.capacity_in_flow_hours.maximum_size @@ -229,19 +242,18 @@ def _plausibility_checks(self) -> None: minimum_inital_capacity = maximum_capacity * self.relative_minimum_charge_state.isel(time=1) # initial capacity <= allowed max for minimum_size: maximum_inital_capacity = minimum_capacity * self.relative_maximum_charge_state.isel(time=1) + # TODO: index=1 ??? I think index 0 - if self.initial_charge_state > maximum_inital_capacity: + if (self.initial_charge_state > maximum_inital_capacity).any(): raise ValueError( f'{self.label_full}: {self.initial_charge_state=} ' f'is above allowed maximum charge_state {maximum_inital_capacity}' ) - if self.initial_charge_state < minimum_inital_capacity: + if (self.initial_charge_state < minimum_inital_capacity).any(): raise ValueError( f'{self.label_full}: {self.initial_charge_state=} ' f'is below allowed minimum charge_state {minimum_inital_capacity}' ) - elif self.initial_charge_state != 'lastValueOfSim': - raise ValueError(f'{self.label_full}: {self.initial_charge_state=} has an invalid value') @register_class_for_io @@ -259,8 +271,8 @@ def __init__( out1: Flow, in2: Optional[Flow] = None, out2: Optional[Flow] = None, - relative_losses: Optional[NumericDataTS] = None, - absolute_losses: Optional[NumericDataTS] = None, + relative_losses: Optional[TimestepData] = None, + absolute_losses: Optional[TimestepData] = None, on_off_parameters: OnOffParameters = None, prevent_simultaneous_flows_in_both_directions: bool = True, meta_data: Optional[Dict] = None, @@ -342,7 +354,7 @@ def __init__(self, model: SystemModel, element: Transmission): def do_modeling(self): """Initiates all FlowModels""" # Force On Variable if absolute losses are present - if (self.element.absolute_losses is not None) and np.any(self.element.absolute_losses.active_data != 0): + if (self.element.absolute_losses is not None) and np.any(self.element.absolute_losses.selected_data != 0): for flow in self.element.inputs + self.element.outputs: if flow.on_off_parameters is None: flow.on_off_parameters = OnOffParameters() @@ -379,14 +391,14 @@ def create_transmission_equation(self, name: str, in_flow: Flow, out_flow: Flow) # eq: out(t) + on(t)*loss_abs(t) = in(t)*(1 - loss_rel(t)) con_transmission = self.add( self._model.add_constraints( - out_flow.model.flow_rate == -in_flow.model.flow_rate * (self.element.relative_losses.active_data - 1), + out_flow.model.flow_rate == -in_flow.model.flow_rate * (self.element.relative_losses.selected_data - 1), name=f'{self.label_full}|{name}', ), name, ) if self.element.absolute_losses is not None: - con_transmission.lhs += in_flow.model.on_off.on * self.element.absolute_losses.active_data + con_transmission.lhs += in_flow.model.on_off.on * self.element.absolute_losses.selected_data return con_transmission @@ -413,8 +425,10 @@ def do_modeling(self): self.add( self._model.add_constraints( - sum([flow.model.flow_rate * conv_factors[flow.label].active_data for flow in used_inputs]) - == sum([flow.model.flow_rate * conv_factors[flow.label].active_data for flow in used_outputs]), + sum([flow.model.flow_rate * conv_factors[flow.label].selected_data for flow in used_inputs]) + == sum( + [flow.model.flow_rate * conv_factors[flow.label].selected_data for flow in used_outputs] + ), name=f'{self.label_full}|conversion_{i}', ) ) @@ -454,12 +468,15 @@ def do_modeling(self): lb, ub = self.absolute_charge_state_bounds self.charge_state = self.add( self._model.add_variables( - lower=lb, upper=ub, coords=self._model.coords_extra, name=f'{self.label_full}|charge_state' + lower=lb, + upper=ub, + coords=self._model.get_coords(extra_timestep=True), + name=f'{self.label_full}|charge_state', ), 'charge_state', ) self.netto_discharge = self.add( - self._model.add_variables(coords=self._model.coords, name=f'{self.label_full}|netto_discharge'), + self._model.add_variables(coords=self._model.get_coords(), name=f'{self.label_full}|netto_discharge'), 'netto_discharge', ) # netto_discharge: @@ -474,12 +491,12 @@ def do_modeling(self): ) charge_state = self.charge_state - rel_loss = self.element.relative_loss_per_hour.active_data + rel_loss = self.element.relative_loss_per_hour.selected_data hours_per_step = self._model.hours_per_step charge_rate = self.element.charging.model.flow_rate discharge_rate = self.element.discharging.model.flow_rate - eff_charge = self.element.eta_charge.active_data - eff_discharge = self.element.eta_discharge.active_data + eff_charge = self.element.eta_charge.selected_data + eff_discharge = self.element.eta_discharge.selected_data self.add( self._model.add_constraints( @@ -511,24 +528,20 @@ def _initial_and_final_charge_state(self): name_short = 'initial_charge_state' name = f'{self.label_full}|{name_short}' - if utils.is_number(self.element.initial_charge_state): + if self.element.initial_charge_state == 'lastValueOfSim': self.add( self._model.add_constraints( - self.charge_state.isel(time=0) == self.element.initial_charge_state, name=name + self.charge_state.isel(time=0) == self.charge_state.isel(time=-1), name=name ), name_short, ) - elif self.element.initial_charge_state == 'lastValueOfSim': + else: self.add( self._model.add_constraints( - self.charge_state.isel(time=0) == self.charge_state.isel(time=-1), name=name + self.charge_state.isel(time=0) == self.element.initial_charge_state, name=name ), name_short, ) - else: # TODO: Validation in Storage Class, not in Model - raise PlausibilityError( - f'initial_charge_state has undefined value: {self.element.initial_charge_state}' - ) if self.element.maximal_final_charge_state is not None: self.add( @@ -549,7 +562,7 @@ def _initial_and_final_charge_state(self): ) @property - def absolute_charge_state_bounds(self) -> Tuple[NumericData, NumericData]: + def absolute_charge_state_bounds(self) -> Tuple[TimestepData, TimestepData]: relative_lower_bound, relative_upper_bound = self.relative_charge_state_bounds if not isinstance(self.element.capacity_in_flow_hours, InvestParameters): return ( @@ -563,10 +576,10 @@ def absolute_charge_state_bounds(self) -> Tuple[NumericData, NumericData]: ) @property - def relative_charge_state_bounds(self) -> Tuple[NumericData, NumericData]: + def relative_charge_state_bounds(self) -> Tuple[TimestepData, TimestepData]: return ( - self.element.relative_minimum_charge_state.active_data, - self.element.relative_maximum_charge_state.active_data, + self.element.relative_minimum_charge_state, + self.element.relative_maximum_charge_state, ) diff --git a/flixopt/core.py b/flixopt/core.py index 379828554..304048201 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -7,6 +7,7 @@ import json import logging import pathlib +import textwrap from collections import Counter from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union @@ -25,6 +26,12 @@ NumericDataTS = Union[NumericData, 'TimeSeriesData'] """Represents either standard numeric data or TimeSeriesData.""" +TimestepData = NumericData +"""Represents any form of numeric data that corresponds to timesteps.""" + +ScenarioData = NumericData +"""Represents any form of numeric data that corresponds to scenarios.""" + class PlausibilityError(Exception): """Error for a failing Plausibility check.""" @@ -40,61 +47,443 @@ class ConversionError(Exception): class DataConverter: """ - Converts various data types into xarray.DataArray with a timesteps index. + Converts various data types into xarray.DataArray with optional time and scenario dimension. - Supports: scalars, arrays, Series, DataFrames, and DataArrays. + Current implementation handles: + - Scalar values + - NumPy arrays + - xarray.DataArray """ @staticmethod - def as_dataarray(data: NumericData, timesteps: pd.DatetimeIndex) -> xr.DataArray: - """Convert data to xarray.DataArray with specified timesteps index.""" + def as_dataarray( + data: TimestepData, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None + ) -> xr.DataArray: + """ + Convert data to xarray.DataArray with specified dimensions. + + Args: + data: The data to convert (scalar, array, or DataArray) + timesteps: Optional DatetimeIndex for time dimension + scenarios: Optional Index for scenario dimension + + Returns: + DataArray with the converted data + """ + # Prepare dimensions and coordinates + coords, dims = DataConverter._prepare_dimensions(timesteps, scenarios) + + # Select appropriate converter based on data type + if isinstance(data, (int, float, np.integer, np.floating)): + return DataConverter._convert_scalar(data, coords, dims) + + elif isinstance(data, xr.DataArray): + return DataConverter._convert_dataarray(data, coords, dims) + + elif isinstance(data, np.ndarray): + return DataConverter._convert_ndarray(data, coords, dims) + + elif isinstance(data, pd.Series): + return DataConverter._convert_series(data, coords, dims) + + elif isinstance(data, pd.DataFrame): + return DataConverter._convert_dataframe(data, coords, dims) + + else: + raise ConversionError(f'Unsupported data type: {type(data).__name__}') + + @staticmethod + def _validate_timesteps(timesteps: pd.DatetimeIndex) -> pd.DatetimeIndex: + """ + Validate and prepare time index. + + Args: + timesteps: The time index to validate + + Returns: + Validated time index + """ if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0: - raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}') + raise ConversionError('Timesteps must be a non-empty DatetimeIndex') + if not timesteps.name == 'time': - raise ConversionError(f'DatetimeIndex is not named correctly. Must be named "time", got {timesteps.name=}') - - coords = [timesteps] - dims = ['time'] - expected_shape = (len(timesteps),) - - try: - if isinstance(data, (int, float, np.integer, np.floating)): - return xr.DataArray(data, coords=coords, dims=dims) - elif isinstance(data, pd.DataFrame): - if not data.index.equals(timesteps): - raise ConversionError("DataFrame index doesn't match timesteps index") - if not len(data.columns) == 1: - raise ConversionError('DataFrame must have exactly one column') - return xr.DataArray(data.values.flatten(), coords=coords, dims=dims) - elif isinstance(data, pd.Series): - if not data.index.equals(timesteps): - raise ConversionError("Series index doesn't match timesteps index") - return xr.DataArray(data.values, coords=coords, dims=dims) - elif isinstance(data, np.ndarray): - if data.ndim != 1: - raise ConversionError(f'Array must be 1-dimensional, got {data.ndim}') - elif data.shape[0] != expected_shape[0]: - raise ConversionError(f"Array shape {data.shape} doesn't match expected {expected_shape}") - return xr.DataArray(data, coords=coords, dims=dims) - elif isinstance(data, xr.DataArray): - if data.dims != tuple(dims): - raise ConversionError(f"DataArray dimensions {data.dims} don't match expected {dims}") - if data.sizes[dims[0]] != len(coords[0]): - raise ConversionError( - f"DataArray length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}" - ) - return data.copy(deep=True) + raise ConversionError(f'Scenarios must be named "time", got "{timesteps.name}"') + + return timesteps + + @staticmethod + def _validate_scenarios(scenarios: pd.Index) -> pd.Index: + """ + Validate and prepare scenario index. + + Args: + scenarios: The scenario index to validate + """ + if not isinstance(scenarios, pd.Index) or len(scenarios) == 0: + raise ConversionError('Scenarios must be a non-empty Index') + + if not scenarios.name == 'scenario': + raise ConversionError(f'Scenarios must be named "scenario", got "{scenarios.name}"') + + return scenarios + + @staticmethod + def _prepare_dimensions( + timesteps: Optional[pd.DatetimeIndex], scenarios: Optional[pd.Index] + ) -> Tuple[Dict[str, pd.Index], Tuple[str, ...]]: + """ + Prepare coordinates and dimensions for the DataArray. + + Args: + timesteps: Optional time index + scenarios: Optional scenario index + + Returns: + Tuple of (coordinates dict, dimensions tuple) + """ + # Validate inputs if provided + if timesteps is not None: + timesteps = DataConverter._validate_timesteps(timesteps) + + if scenarios is not None: + scenarios = DataConverter._validate_scenarios(scenarios) + + # Build coordinates and dimensions + coords = {} + dims = [] + + if timesteps is not None: + coords['time'] = timesteps + dims.append('time') + + if scenarios is not None: + coords['scenario'] = scenarios + dims.append('scenario') + + return coords, tuple(dims) + + @staticmethod + def _convert_scalar( + data: Union[int, float, np.integer, np.floating], coords: Dict[str, pd.Index], dims: Tuple[str, ...] + ) -> xr.DataArray: + """ + Convert a scalar value to a DataArray. + + Args: + data: The scalar value + coords: Coordinate dictionary + dims: Dimension names + + Returns: + DataArray with the scalar value + """ + if isinstance(data, (np.integer, np.floating)): + data = data.item() + return xr.DataArray(data, coords=coords, dims=dims) + + @staticmethod + def _convert_dataarray(data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray: + """ + Convert an existing DataArray to desired dimensions. + + Args: + data: The source DataArray + coords: Target coordinates + dims: Target dimensions + + Returns: + DataArray with the target dimensions + """ + # No dimensions case + if len(dims) == 0: + if data.size != 1: + raise ConversionError('When converting to dimensionless DataArray, source must be scalar') + return xr.DataArray(data.values.item()) + + # Check if data already has matching dimensions and coordinates + if set(data.dims) == set(dims): + # Check if coordinates match + is_compatible = True + for dim in dims: + if dim in data.dims and not np.array_equal(data.coords[dim].values, coords[dim].values): + is_compatible = False + break + + if is_compatible: + # Ensure dimensions are in the correct order + if data.dims != dims: + # Transpose to get dimensions in the right order + return data.transpose(*dims).copy(deep=True) + else: + # Return existing DataArray if compatible and order is correct + return data.copy(deep=True) + + # Handle dimension broadcasting + if len(data.dims) == 1 and len(dims) == 2: + # Single dimension to two dimensions + if data.dims[0] == 'time' and 'scenario' in dims: + # Broadcast time dimension to include scenarios + return DataConverter._broadcast_time_to_scenarios(data, coords, dims) + + elif data.dims[0] == 'scenario' and 'time' in dims: + # Broadcast scenario dimension to include time + return DataConverter._broadcast_scenario_to_time(data, coords, dims) + + raise ConversionError( + f'Cannot convert {data.dims} to {dims}. Source coordinates: {data.coords}, Target coordinates: {coords}' + ) + @staticmethod + def _broadcast_time_to_scenarios( + data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...] + ) -> xr.DataArray: + """ + Broadcast a time-only DataArray to include scenarios. + + Args: + data: The time-indexed DataArray + coords: Target coordinates + dims: Target dimensions + + Returns: + DataArray with time and scenario dimensions + """ + # Check compatibility + if not np.array_equal(data.coords['time'].values, coords['time'].values): + raise ConversionError("Source time coordinates don't match target time coordinates") + + # Broadcast values + values = np.tile(data.values, (len(coords['scenario']), 1)) + return xr.DataArray(values.copy(), coords=coords, dims=dims) + + @staticmethod + def _broadcast_scenario_to_time( + data: xr.DataArray, coords: Dict[str, pd.Index], dims: Tuple[str, ...] + ) -> xr.DataArray: + """ + Broadcast a scenario-only DataArray to include time. + + Args: + data: The scenario-indexed DataArray + coords: Target coordinates + dims: Target dimensions + + Returns: + DataArray with time and scenario dimensions + """ + # Check compatibility + if not np.array_equal(data.coords['scenario'].values, coords['scenario'].values): + raise ConversionError("Source scenario coordinates don't match target scenario coordinates") + + # Broadcast values + values = np.repeat(data.values[:, np.newaxis], len(coords['time']), axis=1) + return xr.DataArray(values.copy(), coords=coords, dims=dims) + + @staticmethod + def _convert_ndarray(data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray: + """ + Convert a NumPy array to a DataArray. + + Args: + data: The NumPy array + coords: Target coordinates + dims: Target dimensions + + Returns: + DataArray from the NumPy array + """ + # Handle dimensionless case + if len(dims) == 0: + if data.size != 1: + raise ConversionError('Without dimensions, can only convert scalar arrays') + return xr.DataArray(data.item()) + + # Handle single dimension + elif len(dims) == 1: + return DataConverter._convert_ndarray_single_dim(data, coords, dims) + + # Handle two dimensions + elif len(dims) == 2: + return DataConverter._convert_ndarray_two_dims(data, coords, dims) + + else: + raise ConversionError('Maximum 2 dimensions supported') + + @staticmethod + def _convert_ndarray_single_dim( + data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...] + ) -> xr.DataArray: + """ + Convert a NumPy array to a single-dimension DataArray. + + Args: + data: The NumPy array + coords: Target coordinates + dims: Target dimensions (length 1) + + Returns: + DataArray with single dimension + """ + dim_name = dims[0] + dim_length = len(coords[dim_name]) + + if data.ndim == 1: + # 1D array must match dimension length + if data.shape[0] != dim_length: + raise ConversionError(f"Array length {data.shape[0]} doesn't match {dim_name} length {dim_length}") + return xr.DataArray(data, coords=coords, dims=dims) + else: + raise ConversionError(f'Expected 1D array for single dimension, got {data.ndim}D') + + @staticmethod + def _convert_ndarray_two_dims(data: np.ndarray, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray: + """ + Convert a NumPy array to a two-dimension DataArray. + + Args: + data: The NumPy array + coords: Target coordinates + dims: Target dimensions (length 2) + + Returns: + DataArray with two dimensions + """ + scenario_length = len(coords['scenario']) + time_length = len(coords['time']) + + if data.ndim == 1: + # For 1D array, create 2D array based on which dimension it matches + if data.shape[0] == time_length: + # Broadcast across scenarios + values = np.repeat(data[:, np.newaxis], scenario_length, axis=1) + return xr.DataArray(values, coords=coords, dims=dims) + elif data.shape[0] == scenario_length: + # Broadcast across time + values = np.tile(data, (time_length, 1)) + return xr.DataArray(values, coords=coords, dims=dims) + else: + raise ConversionError(f"1D array length {data.shape[0]} doesn't match either dimension") + + elif data.ndim == 2: + # For 2D array, shape must match dimensions + expected_shape = (time_length, scenario_length) + if data.shape != expected_shape: + raise ConversionError(f"2D array shape {data.shape} doesn't match expected shape {expected_shape}") + return xr.DataArray(data, coords=coords, dims=dims) + + else: + raise ConversionError(f'Expected 1D or 2D array for two dimensions, got {data.ndim}D') + + @staticmethod + def _convert_series(data: pd.Series, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray: + """ + Convert pandas Series to xarray DataArray. + + Args: + data: pandas Series to convert + coords: Target coordinates + dims: Target dimensions + + Returns: + DataArray from the pandas Series + """ + # Handle single dimension case + if len(dims) == 1: + dim_name = dims[0] + + # Check if series index matches the dimension + if data.index.equals(coords[dim_name]): + return xr.DataArray(data.values.copy(), coords=coords, dims=dims) + else: + raise ConversionError( + f"Series index doesn't match {dim_name} coordinates.\n" + f'Series index: {data.index}\n' + f'Target {dim_name} coordinates: {coords[dim_name]}' + ) + + # Handle two dimensions case + elif len(dims) == 2: + # Check if dimensions are time and scenario + if dims != ('time', 'scenario'): + raise ConversionError( + f'Two-dimensional conversion only supports time and scenario dimensions, got {dims}' + ) + + # Case 1: Series is indexed by time + if data.index.equals(coords['time']): + # Broadcast across scenarios + values = np.tile(data.values[:, np.newaxis], (1, len(coords['scenario']))) + return xr.DataArray(values.copy(), coords=coords, dims=dims) + + # Case 2: Series is indexed by scenario + elif data.index.equals(coords['scenario']): + # Broadcast across time + values = np.repeat(data.values[np.newaxis, :], len(coords['time']), axis=0) + return xr.DataArray(values.copy(), coords=coords, dims=dims) + + else: + raise ConversionError( + "Series index must match either 'time' or 'scenario' coordinates.\n" + f'Series index: {data.index}\n' + f'Target time coordinates: {coords["time"]}\n' + f'Target scenario coordinates: {coords["scenario"]}' + ) + + else: + raise ConversionError(f'Maximum 2 dimensions supported, got {len(dims)}') + + @staticmethod + def _convert_dataframe(data: pd.DataFrame, coords: Dict[str, pd.Index], dims: Tuple[str, ...]) -> xr.DataArray: + """ + Convert pandas DataFrame to xarray DataArray. + Only allows time as index and scenarios as columns. + + Args: + data: pandas DataFrame to convert + coords: Target coordinates + dims: Target dimensions + + Returns: + DataArray from the pandas DataFrame + """ + # Single dimension case + if len(dims) == 1: + # If DataFrame has one column, treat it like a Series + if len(data.columns) == 1: + series = data.iloc[:, 0] + return DataConverter._convert_series(series, coords, dims) + + raise ConversionError( + f'When converting DataFrame to single-dimension DataArray, DataFrame must have exactly one column, got {len(data.columns)}' + ) + + # Two dimensions case + elif len(dims) == 2: + # Check if dimensions are time and scenario + if dims != ('time', 'scenario'): + raise ConversionError( + f'Two-dimensional conversion only supports time and scenario dimensions, got {dims}' + ) + + # DataFrame must have time as index and scenarios as columns + if data.index.equals(coords['time']) and data.columns.equals(coords['scenario']): + # Create DataArray with proper dimension order + return xr.DataArray(data.values.copy(), coords=coords, dims=dims) else: - raise ConversionError(f'Unsupported type: {type(data).__name__}') - except Exception as e: - if isinstance(e, ConversionError): - raise - raise ConversionError(f'Converting data {type(data)} to xarray.Dataset raised an error: {str(e)}') from e + raise ConversionError( + 'DataFrame must have time as index and scenarios as columns.\n' + f'DataFrame index: {data.index}\n' + f'DataFrame columns: {data.columns}\n' + f'Target time coordinates: {coords["time"]}\n' + f'Target scenario coordinates: {coords["scenario"]}' + ) + + else: + raise ConversionError(f'Maximum 2 dimensions supported, got {len(dims)}') class TimeSeriesData: # TODO: Move to Interface.py - def __init__(self, data: NumericData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None): + def __init__(self, data: TimestepData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None): """ timeseries class for transmit timeseries AND special characteristics of timeseries, i.g. to define weights needed in calculation_type 'aggregated' @@ -146,18 +535,19 @@ class TimeSeries: name (str): The name of the time series aggregation_weight (Optional[float]): Weight used for aggregation aggregation_group (Optional[str]): Group name for shared aggregation weighting - needs_extra_timestep (bool): Whether this series needs an extra timestep + has_extra_timestep (bool): Whether this series needs an extra timestep """ @classmethod def from_datasource( cls, - data: NumericData, + data: NumericDataTS, name: str, timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index] = None, aggregation_weight: Optional[float] = None, aggregation_group: Optional[str] = None, - needs_extra_timestep: bool = False, + has_extra_timestep: bool = False, ) -> 'TimeSeries': """ Initialize the TimeSeries from multiple data sources. @@ -166,19 +556,20 @@ def from_datasource( data: The time series data name: The name of the TimeSeries timesteps: The timesteps of the TimeSeries + scenarios: The scenarios of the TimeSeries aggregation_weight: The weight in aggregation calculations aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing - needs_extra_timestep: Whether this series requires an extra timestep + has_extra_timestep: Whether this series requires an extra timestep Returns: A new TimeSeries instance """ return cls( - DataConverter.as_dataarray(data, timesteps), + DataConverter.as_dataarray(data, timesteps, scenarios), name, aggregation_weight, aggregation_group, - needs_extra_timestep, + has_extra_timestep, ) @classmethod @@ -212,7 +603,7 @@ def from_json(cls, data: Optional[Dict[str, Any]] = None, path: Optional[str] = name=data['name'], aggregation_weight=data['aggregation_weight'], aggregation_group=data['aggregation_group'], - needs_extra_timestep=data['needs_extra_timestep'], + has_extra_timestep=data['has_extra_timestep'], ) def __init__( @@ -221,7 +612,7 @@ def __init__( name: str, aggregation_weight: Optional[float] = None, aggregation_group: Optional[str] = None, - needs_extra_timestep: bool = False, + has_extra_timestep: bool = False, ): """ Initialize a TimeSeries with a DataArray. @@ -231,35 +622,40 @@ def __init__( name: The name of the TimeSeries aggregation_weight: The weight in aggregation calculations aggregation_group: Group this TimeSeries belongs to for weight sharing - needs_extra_timestep: Whether this series requires an extra timestep + has_extra_timestep: Whether this series requires an extra timestep Raises: - ValueError: If data doesn't have a 'time' index or has more than 1 dimension + ValueError: If data has unsupported dimensions """ - if 'time' not in data.indexes: - raise ValueError(f'DataArray must have a "time" index. Got {data.indexes}') - if data.ndim > 1: - raise ValueError(f'Number of dimensions of DataArray must be 1. Got {data.ndim}') + allowed_dims = {'time', 'scenario'} + if not set(data.dims).issubset(allowed_dims): + raise ValueError(f'DataArray dimensions must be subset of {allowed_dims}. Got {data.dims}') self.name = name self.aggregation_weight = aggregation_weight self.aggregation_group = aggregation_group - self.needs_extra_timestep = needs_extra_timestep + self.has_extra_timestep = has_extra_timestep # Data management self._stored_data = data.copy(deep=True) self._backup = self._stored_data.copy(deep=True) - self._active_timesteps = self._stored_data.indexes['time'] - self._active_data = None - self._update_active_data() - def reset(self): + # Selection state + self._selected_timesteps: Optional[pd.DatetimeIndex] = None + self._selected_scenarios: Optional[pd.Index] = None + + # Flag for whether this series has various dimensions + self.has_time_dim = 'time' in data.dims + self.has_scenario_dim = 'scenario' in data.dims + + def reset(self) -> None: """ - Reset active timesteps to the full set of stored timesteps. + Reset selections to include all timesteps and scenarios. + This is equivalent to clearing all selections. """ - self.active_timesteps = None + self.clear_selection() - def restore_data(self): + def restore_data(self) -> None: """ Restore stored_data from the backup and reset active timesteps. """ @@ -280,8 +676,8 @@ def to_json(self, path: Optional[pathlib.Path] = None) -> Dict[str, Any]: 'name': self.name, 'aggregation_weight': self.aggregation_weight, 'aggregation_group': self.aggregation_group, - 'needs_extra_timestep': self.needs_extra_timestep, - 'data': self.active_data.to_dict(), + 'has_extra_timestep': self.has_extra_timestep, + 'data': self.selected_data.to_dict(), } # Convert datetime objects to ISO strings @@ -303,84 +699,122 @@ def stats(self) -> str: Returns: String representation of data statistics """ - return get_numeric_stats(self.active_data, padd=0) - - def _update_active_data(self): - """ - Update the active data based on active_timesteps. - """ - self._active_data = self._stored_data.sel(time=self.active_timesteps) + return get_numeric_stats(self.selected_data, padd=0, by_scenario=True) @property def all_equal(self) -> bool: """Check if all values in the series are equal.""" - return np.unique(self.active_data.values).size == 1 + return np.unique(self.selected_data.values).size == 1 @property - def active_timesteps(self) -> pd.DatetimeIndex: - """Get the current active timesteps.""" - return self._active_timesteps - - @active_timesteps.setter - def active_timesteps(self, timesteps: Optional[pd.DatetimeIndex]): + def selected_data(self) -> xr.DataArray: """ - Set active_timesteps and refresh active_data. - - Args: - timesteps: New timesteps to activate, or None to use all stored timesteps - - Raises: - TypeError: If timesteps is not a pandas DatetimeIndex or None + Get a view of stored_data based on current selections. + This computes the view dynamically based on the current selection state. """ - if timesteps is None: - self._active_timesteps = self.stored_data.indexes['time'] - elif isinstance(timesteps, pd.DatetimeIndex): - self._active_timesteps = timesteps - else: - raise TypeError('active_timesteps must be a pandas DatetimeIndex or None') + return self._stored_data.sel(**self._valid_selector) - self._update_active_data() + @property + def active_timesteps(self) -> Optional[pd.DatetimeIndex]: + """Get the current active timesteps, or None if no time dimension.""" + if not self.has_time_dim: + return None + if self._selected_timesteps is None: + return self._stored_data.indexes['time'] + return self._selected_timesteps @property - def active_data(self) -> xr.DataArray: - """Get a view of stored_data based on active_timesteps.""" - return self._active_data + def active_scenarios(self) -> Optional[pd.Index]: + """Get the current active scenarios, or None if no scenario dimension.""" + if not self.has_scenario_dim: + return None + if self._selected_scenarios is None: + return self._stored_data.indexes['scenario'] + return self._selected_scenarios @property def stored_data(self) -> xr.DataArray: """Get a copy of the full stored data.""" return self._stored_data.copy() - @stored_data.setter - def stored_data(self, value: NumericData): + def update_stored_data(self, value: xr.DataArray) -> None: """ - Update stored_data and refresh active_data. + Update stored_data and refresh selected_data. Args: value: New data to store """ - new_data = DataConverter.as_dataarray(value, timesteps=self.active_timesteps) + new_data = DataConverter.as_dataarray( + value, + timesteps=self.active_timesteps if self.has_time_dim else None, + scenarios=self.active_scenarios if self.has_scenario_dim else None, + ) # Skip if data is unchanged to avoid overwriting backup if new_data.equals(self._stored_data): return self._stored_data = new_data - self.active_timesteps = None # Reset to full timeline + self.clear_selection() # Reset selections to full dataset + + def clear_selection(self, timesteps: bool = True, scenarios: bool = True) -> None: + if timesteps: + self._selected_timesteps = None + if scenarios: + self._selected_scenarios = None + + def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None: + """ + Set active subset for timesteps and scenarios. + + Args: + timesteps: Timesteps to activate, or None to clear. Ignored if series has no time dimension. + scenarios: Scenarios to activate, or None to clear. Ignored if series has no scenario dimension. + """ + # Only update timesteps if the series has time dimension + if self.has_time_dim: + if timesteps is None: + self.clear_selection(timesteps=True, scenarios=False) + else: + self._selected_timesteps = timesteps + + # Only update scenarios if the series has scenario dimension + if self.has_scenario_dim: + if scenarios is None: + self.clear_selection(timesteps=False, scenarios=True) + else: + self._selected_scenarios = scenarios @property def sel(self): - return self.active_data.sel + """Direct access to the selected_data's sel method for convenience.""" + return self.selected_data.sel @property def isel(self): - return self.active_data.isel + """Direct access to the selected_data's isel method for convenience.""" + return self.selected_data.isel + + @property + def _valid_selector(self) -> Dict[str, pd.Index]: + """Get the current selection as a dictionary.""" + selector = {} + + # Only include time in selector if series has time dimension + if self.has_time_dim and self._selected_timesteps is not None: + selector['time'] = self._selected_timesteps + + # Only include scenario in selector if series has scenario dimension + if self.has_scenario_dim and self._selected_scenarios is not None: + selector['scenario'] = self._selected_scenarios + + return selector def _apply_operation(self, other, op): """Apply an operation between this TimeSeries and another object.""" if isinstance(other, TimeSeries): - other = other.active_data - return op(self.active_data, other) + other = other.selected_data + return op(self.selected_data, other) def __add__(self, other): return self._apply_operation(other, lambda x, y: x + y) @@ -395,25 +829,25 @@ def __truediv__(self, other): return self._apply_operation(other, lambda x, y: x / y) def __radd__(self, other): - return other + self.active_data + return other + self.selected_data def __rsub__(self, other): - return other - self.active_data + return other - self.selected_data def __rmul__(self, other): - return other * self.active_data + return other * self.selected_data def __rtruediv__(self, other): - return other / self.active_data + return other / self.selected_data def __neg__(self) -> xr.DataArray: - return -self.active_data + return -self.selected_data def __pos__(self) -> xr.DataArray: - return +self.active_data + return +self.selected_data def __abs__(self) -> xr.DataArray: - return abs(self.active_data) + return abs(self.selected_data) def __gt__(self, other): """ @@ -426,7 +860,7 @@ def __gt__(self, other): True if all values in this TimeSeries are greater than other """ if isinstance(other, TimeSeries): - return (self.active_data > other.active_data).all().item() + return (self.selected_data > other.selected_data).all().item() return NotImplemented def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): @@ -435,8 +869,8 @@ def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): This allows NumPy functions to work with TimeSeries objects. """ - # Convert any TimeSeries inputs to their active_data - inputs = [x.active_data if isinstance(x, TimeSeries) else x for x in inputs] + # Convert any TimeSeries inputs to their selected_data + inputs = [x.selected_data if isinstance(x, TimeSeries) else x for x in inputs] return getattr(ufunc, method)(*inputs, **kwargs) def __repr__(self): @@ -450,10 +884,10 @@ def __repr__(self): 'name': self.name, 'aggregation_weight': self.aggregation_weight, 'aggregation_group': self.aggregation_group, - 'needs_extra_timestep': self.needs_extra_timestep, - 'shape': self.active_data.shape, - 'time_range': f'{self.active_timesteps[0]} to {self.active_timesteps[-1]}', + 'has_extra_timestep': self.has_extra_timestep, + 'shape': self.selected_data.shape, } + attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items()) return f'TimeSeries({attr_str})' @@ -464,281 +898,354 @@ def __str__(self): Returns: Descriptive string with statistics """ - return f"TimeSeries '{self.name}': {self.stats}" + return f'TimeSeries "{self.name}":\n{textwrap.indent(self.stats, " ")}' class TimeSeriesCollection: """ - Collection of TimeSeries objects with shared timestep management. + Simplified central manager for time series data with reference tracking. - TimeSeriesCollection handles multiple TimeSeries objects with synchronized - timesteps, provides operations on collections, and manages extra timesteps. + Provides a way to store time series data and work with subsets of dimensions + that automatically update all references when changed. """ def __init__( self, timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index] = None, hours_of_last_timestep: Optional[float] = None, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None, ): - """ - Args: - timesteps: The timesteps of the Collection. - hours_of_last_timestep: The duration of the last time step. Uses the last time interval if not specified - hours_of_previous_timesteps: The duration of previous timesteps. - If None, the first time increment of time_series is used. - This is needed to calculate previous durations (for example consecutive_on_hours). - If you use an array, take care that its long enough to cover all previous values! - """ - # Prepare and validate timesteps - self._validate_timesteps(timesteps) - self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( - timesteps, hours_of_previous_timesteps + """Initialize a TimeSeriesCollection.""" + self._full_timesteps = self._validate_timesteps(timesteps) + self._full_scenarios = self._validate_scenarios(scenarios) + + self._full_timesteps_extra = self._create_timesteps_with_extra( + self._full_timesteps, + self._calculate_hours_of_final_timestep( + self._full_timesteps, hours_of_final_timestep=hours_of_last_timestep + ), + ) + self._full_hours_per_timestep = self.calculate_hours_per_timestep( + self._full_timesteps_extra, self._full_scenarios ) - # Set up timesteps and hours - self.all_timesteps = timesteps - self.all_timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep) - self.all_hours_per_timestep = self.calculate_hours_per_timestep(self.all_timesteps_extra) + self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( + timesteps, hours_of_previous_timesteps + ) # TODO: Make dynamic - # Active timestep tracking - self._active_timesteps = None - self._active_timesteps_extra = None - self._active_hours_per_timestep = None + # Series that need extra timestep + self._has_extra_timestep: set = set() - # Dictionary of time series by name - self.time_series_data: Dict[str, TimeSeries] = {} + # Storage for TimeSeries objects + self._time_series: Dict[str, TimeSeries] = {} - # Aggregation - self.group_weights: Dict[str, float] = {} - self.weights: Dict[str, float] = {} + # Active subset selectors + self._selected_timesteps: Optional[pd.DatetimeIndex] = None + self._selected_scenarios: Optional[pd.Index] = None + self._selected_timesteps_extra: Optional[pd.DatetimeIndex] = None + self._selected_hours_per_timestep: Optional[xr.DataArray] = None - @classmethod - def with_uniform_timesteps( - cls, start_time: pd.Timestamp, periods: int, freq: str, hours_per_step: Optional[float] = None - ) -> 'TimeSeriesCollection': - """Create a collection with uniform timesteps.""" - timesteps = pd.date_range(start_time, periods=periods, freq=freq, name='time') - return cls(timesteps, hours_of_previous_timesteps=hours_per_step) - - def create_time_series( - self, data: Union[NumericData, TimeSeriesData], name: str, needs_extra_timestep: bool = False + def add_time_series( + self, + name: str, + data: Union[NumericDataTS, TimeSeries], + has_time_dim: bool = True, + has_scenario_dim: bool = True, + aggregation_weight: Optional[float] = None, + aggregation_group: Optional[str] = None, + has_extra_timestep: bool = False, ) -> TimeSeries: """ - Creates a TimeSeries from the given data and adds it to the collection. + Add a new TimeSeries to the allocator. Args: - data: The data to create the TimeSeries from. - name: The name of the TimeSeries. - needs_extra_timestep: Whether to create an additional timestep at the end of the timesteps. - The data to create the TimeSeries from. + name: Name of the time series + data: Data for the time series (can be raw data or an existing TimeSeries) + has_time_dim: Whether the TimeSeries has a time dimension + has_scenario_dim: Whether the TimeSeries has a scenario dimension + aggregation_weight: Weight used for aggregation + aggregation_group: Group name for shared aggregation weighting + has_extra_timestep: Whether this series needs an extra timestep Returns: - The created TimeSeries. - + The created TimeSeries object """ - # Check for duplicate name - if name in self.time_series_data: - raise ValueError(f"TimeSeries '{name}' already exists in this collection") + if name in self._time_series: + raise KeyError(f"TimeSeries '{name}' already exists in allocator") + if not has_time_dim and has_extra_timestep: + raise ValueError('A not time-indexed TimeSeries cannot have an extra timestep') + + # Choose which timesteps to use + if has_time_dim: + target_timesteps = self.timesteps_extra if has_extra_timestep else self.timesteps + else: + target_timesteps = None - # Determine which timesteps to use - timesteps_to_use = self.timesteps_extra if needs_extra_timestep else self.timesteps + target_scenarios = self.scenarios if has_scenario_dim else None - # Create the time series - if isinstance(data, TimeSeriesData): - time_series = TimeSeries.from_datasource( + # Create or adapt the TimeSeries object + if isinstance(data, TimeSeries): + # Use the existing TimeSeries but update its parameters + time_series = data + # Update the stored data to use our timesteps and scenarios + data_array = DataConverter.as_dataarray( + time_series.stored_data, timesteps=target_timesteps, scenarios=target_scenarios + ) + time_series = TimeSeries( + data=data_array, name=name, - data=data.data, - timesteps=timesteps_to_use, - aggregation_weight=data.agg_weight, - aggregation_group=data.agg_group, - needs_extra_timestep=needs_extra_timestep, + aggregation_weight=aggregation_weight or time_series.aggregation_weight, + aggregation_group=aggregation_group or time_series.aggregation_group, + has_extra_timestep=has_extra_timestep or time_series.has_extra_timestep, ) - # Connect the user time series to the created TimeSeries - data.label = name else: + # Create a new TimeSeries from raw data time_series = TimeSeries.from_datasource( - name=name, data=data, timesteps=timesteps_to_use, needs_extra_timestep=needs_extra_timestep + data=data, + name=name, + timesteps=target_timesteps, + scenarios=target_scenarios, + aggregation_weight=aggregation_weight, + aggregation_group=aggregation_group, + has_extra_timestep=has_extra_timestep, ) - # Add to the collection - self.add_time_series(time_series) + # Add to storage + self._time_series[name] = time_series + + # Track if it needs extra timestep + if has_extra_timestep: + self._has_extra_timestep.add(name) + # Return the TimeSeries object return time_series - def calculate_aggregation_weights(self) -> Dict[str, float]: - """Calculate and return aggregation weights for all time series.""" - self.group_weights = self._calculate_group_weights() - self.weights = self._calculate_weights() + def clear_selection(self, timesteps: bool = True, scenarios: bool = True) -> None: + """ + Clear selection for timesteps and/or scenarios. - if np.all(np.isclose(list(self.weights.values()), 1, atol=1e-6)): - logger.info('All Aggregation weights were set to 1') + Args: + timesteps: Whether to clear timesteps selection + scenarios: Whether to clear scenarios selection + """ + if timesteps: + self._update_selected_timesteps(timesteps=None) + if scenarios: + self._selected_scenarios = None - return self.weights + for ts in self._time_series.values(): + ts.clear_selection(timesteps=timesteps, scenarios=scenarios) - def activate_timesteps(self, active_timesteps: Optional[pd.DatetimeIndex] = None): + def set_selection(self, timesteps: Optional[pd.DatetimeIndex] = None, scenarios: Optional[pd.Index] = None) -> None: """ - Update active timesteps for the collection and all time series. - If no arguments are provided, the active timesteps are reset. + Set active subset for timesteps and scenarios. Args: - active_timesteps: The active timesteps of the model. - If None, the all timesteps of the TimeSeriesCollection are taken. + timesteps: Timesteps to activate, or None to clear + scenarios: Scenarios to activate, or None to clear """ - if active_timesteps is None: - return self.reset() + if timesteps is None: + self.clear_selection(timesteps=True, scenarios=False) + else: + self._update_selected_timesteps(timesteps) - if not np.all(np.isin(active_timesteps, self.all_timesteps)): - raise ValueError('active_timesteps must be a subset of the timesteps of the TimeSeriesCollection') + if scenarios is None: + self.clear_selection(timesteps=False, scenarios=True) + else: + self._selected_scenarios = scenarios - # Calculate derived timesteps - self._active_timesteps = active_timesteps - first_ts_index = np.where(self.all_timesteps == active_timesteps[0])[0][0] - last_ts_idx = np.where(self.all_timesteps == active_timesteps[-1])[0][0] - self._active_timesteps_extra = self.all_timesteps_extra[first_ts_index : last_ts_idx + 2] - self._active_hours_per_timestep = self.all_hours_per_timestep.isel(time=slice(first_ts_index, last_ts_idx + 1)) + # Apply the selection to all TimeSeries objects + self._propagate_selection_to_time_series() - # Update all time series - self._update_time_series_timesteps() + def _update_selected_timesteps(self, timesteps: Optional[pd.DatetimeIndex]) -> None: + """ + Updates the timestep and related metrics (timesteps_extra, hours_per_timestep) based on the current selection. + """ + if timesteps is None: + self._selected_timesteps = None + self._selected_timesteps_extra = None + self._selected_hours_per_timestep = None + return - def reset(self): - """Reset active timesteps to defaults for all time series.""" - self._active_timesteps = None - self._active_timesteps_extra = None - self._active_hours_per_timestep = None + self._selected_timesteps = self._validate_timesteps(timesteps, self._full_timesteps) + self._selected_timesteps_extra = self._create_timesteps_with_extra( + timesteps, self._calculate_hours_of_final_timestep(timesteps, self._full_timesteps) + ) + self._selected_hours_per_timestep = self.calculate_hours_per_timestep( + self._selected_timesteps_extra, self._selected_scenarios + ) - for time_series in self.time_series_data.values(): - time_series.reset() + def as_dataset(self, with_extra_timestep: bool = True, with_constants: bool = True) -> xr.Dataset: + """ + Convert the TimeSeriesCollection to a xarray Dataset, containing the data of each TimeSeries. - def restore_data(self): - """Restore original data for all time series.""" - for time_series in self.time_series_data.values(): - time_series.restore_data() + Args: + with_extra_timestep: Whether to exclude the extra timesteps. + Effectively, this removes the last timestep for certain TimeSeries, but mitigates the presence of NANs in others. + with_constants: Whether to exclude TimeSeries with a constant value from the dataset. + """ + if self.scenarios is None: + ds = xr.Dataset(coords={'time': self.timesteps_extra}) + else: + ds = xr.Dataset(coords={'scenario': self.scenarios, 'time': self.timesteps_extra}) - def add_time_series(self, time_series: TimeSeries): - """Add an existing TimeSeries to the collection.""" - if time_series.name in self.time_series_data: - raise ValueError(f"TimeSeries '{time_series.name}' already exists in this collection") + for ts in self._time_series.values(): + if not with_constants and ts.all_equal: + continue + ds[ts.name] = ts.selected_data - self.time_series_data[time_series.name] = time_series + if not with_extra_timestep: + return ds.sel(time=self.timesteps) - def insert_new_data(self, data: pd.DataFrame, include_extra_timestep: bool = False): - """ - Update time series with new data from a DataFrame. + return ds - Args: - data: DataFrame containing new data with timestamps as index - include_extra_timestep: Whether the provided data already includes the extra timestep, by default False - """ - if not isinstance(data, pd.DataFrame): - raise TypeError(f'data must be a pandas DataFrame, got {type(data).__name__}') + @property + def timesteps(self) -> pd.DatetimeIndex: + """Get the current active timesteps.""" + if self._selected_timesteps is None: + return self._full_timesteps + return self._selected_timesteps - # Check if the DataFrame index matches the expected timesteps - expected_timesteps = self.timesteps_extra if include_extra_timestep else self.timesteps - if not data.index.equals(expected_timesteps): - raise ValueError( - f'DataFrame index must match {"collection timesteps with extra timestep" if include_extra_timestep else "collection timesteps"}' - ) + @property + def timesteps_extra(self) -> pd.DatetimeIndex: + """Get the current active timesteps with extra timestep.""" + if self._selected_timesteps_extra is None: + return self._full_timesteps_extra + return self._selected_timesteps_extra - for name, ts in self.time_series_data.items(): - if name in data.columns: - if not ts.needs_extra_timestep: - # For time series without extra timestep - if include_extra_timestep: - # If data includes extra timestep but series doesn't need it, exclude the last point - ts.stored_data = data[name].iloc[:-1] - else: - # Use data as is - ts.stored_data = data[name] - else: - # For time series with extra timestep - if include_extra_timestep: - # Data already includes extra timestep - ts.stored_data = data[name] - else: - # Need to add extra timestep - extrapolate from the last value - extra_step_value = data[name].iloc[-1] - extra_step_index = pd.DatetimeIndex([self.timesteps_extra[-1]], name='time') - extra_step_series = pd.Series([extra_step_value], index=extra_step_index) + @property + def hours_per_timestep(self) -> xr.DataArray: + """Get the current active hours per timestep.""" + if self._selected_hours_per_timestep is None: + return self._full_hours_per_timestep + return self._selected_hours_per_timestep - # Combine the regular data with the extra timestep - ts.stored_data = pd.concat([data[name], extra_step_series]) + @property + def scenarios(self) -> Optional[pd.Index]: + """Get the current active scenarios.""" + if self._selected_scenarios is None: + return self._full_scenarios + return self._selected_scenarios + + def _propagate_selection_to_time_series(self) -> None: + """Apply the current selection to all TimeSeries objects.""" + for ts_name, ts in self._time_series.items(): + if ts.has_time_dim: + timesteps = self.timesteps_extra if ts_name in self._has_extra_timestep else self.timesteps + else: + timesteps = None - logger.debug(f'Updated data for {name}') + ts.set_selection(timesteps=timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None) - def to_dataframe( - self, filtered: Literal['all', 'constant', 'non_constant'] = 'non_constant', include_extra_timestep: bool = True - ) -> pd.DataFrame: + def __getitem__(self, name: str) -> TimeSeries: """ - Convert collection to DataFrame with optional filtering and timestep control. + Get a reference to a time series or data array. Args: - filtered: Filter time series by variability, by default 'non_constant' - include_extra_timestep: Whether to include the extra timestep in the result, by default True + name: Name of the data array or time series Returns: - DataFrame representation of the collection + TimeSeries object if it exists, otherwise DataArray with current selection applied """ - include_constants = filtered != 'non_constant' - ds = self.to_dataset(include_constants=include_constants) - - if not include_extra_timestep: - ds = ds.isel(time=slice(None, -1)) - - df = ds.to_dataframe() + # First check if this is a TimeSeries + if name in self._time_series: + # Return the TimeSeries object (it will handle selection internally) + return self._time_series[name] + raise ValueError(f'No TimeSeries named "{name}" found') + + def __contains__(self, value) -> bool: + if isinstance(value, str): + return value in self._time_series + elif isinstance(value, TimeSeries): + return value.name in self._time_series + raise TypeError(f'Invalid type for __contains__ of {self.__class__.__name__}: {type(value)}') - # Apply filtering - if filtered == 'all': - return df - elif filtered == 'constant': - return df.loc[:, df.nunique() == 1] - elif filtered == 'non_constant': - return df.loc[:, df.nunique() > 1] - else: - raise ValueError("filtered must be one of: 'all', 'constant', 'non_constant'") + def __iter__(self) -> Iterator[TimeSeries]: + """Iterate over TimeSeries objects.""" + return iter(self._time_series.values()) - def to_dataset(self, include_constants: bool = True) -> xr.Dataset: + def update_time_series(self, name: str, data: TimestepData) -> TimeSeries: """ - Combine all time series into a single Dataset with all timesteps. + Update an existing TimeSeries with new data. Args: - include_constants: Whether to include time series with constant values, by default True + name: Name of the TimeSeries to update + data: New data to assign Returns: - Dataset containing all selected time series with all timesteps + The updated TimeSeries + + Raises: + KeyError: If no TimeSeries with the given name exists """ - # Determine which series to include - if include_constants: - series_to_include = self.time_series_data.values() - else: - series_to_include = self.non_constants + if name not in self._time_series: + raise KeyError(f"No TimeSeries named '{name}' found") - # Create individual datasets and merge them - ds = xr.merge([ts.active_data.to_dataset(name=ts.name) for ts in series_to_include]) + # Get the TimeSeries + ts = self._time_series[name] - # Ensure the correct time coordinates - ds = ds.reindex(time=self.timesteps_extra) + # Determine which timesteps to use if the series has a time dimension + if ts.has_time_dim: + target_timesteps = self.timesteps_extra if name in self._has_extra_timestep else self.timesteps + else: + target_timesteps = None - ds.attrs.update( - { - 'timesteps_extra': f'{self.timesteps_extra[0]} ... {self.timesteps_extra[-1]} | len={len(self.timesteps_extra)}', - 'hours_per_timestep': self._format_stats(self.hours_per_timestep), - } + # Convert data to proper format + data_array = DataConverter.as_dataarray( + data, timesteps=target_timesteps, scenarios=self.scenarios if ts.has_scenario_dim else None ) - return ds + # Update the TimeSeries + ts.update_stored_data(data_array) + + return ts + + def calculate_aggregation_weights(self) -> Dict[str, float]: + """Calculate and return aggregation weights for all time series.""" + group_weights = self._calculate_group_weights() - def _update_time_series_timesteps(self): - """Update active timesteps for all time series.""" - for ts in self.time_series_data.values(): - if ts.needs_extra_timestep: - ts.active_timesteps = self.timesteps_extra + weights = {} + for name, ts in self._time_series.items(): + if ts.aggregation_group is not None: + # Use group weight + weights[name] = group_weights.get(ts.aggregation_group, 1) else: - ts.active_timesteps = self.timesteps + # Use individual weight or default to 1 + weights[name] = ts.aggregation_weight or 1 + + if np.all(np.isclose(list(weights.values()), 1, atol=1e-6)): + logger.info('All Aggregation weights were set to 1') + + return weights + + def _calculate_group_weights(self) -> Dict[str, float]: + """Calculate weights for aggregation groups.""" + # Count series in each group + groups = [ts.aggregation_group for ts in self._time_series.values() if ts.aggregation_group is not None] + group_counts = Counter(groups) + + # Calculate weight for each group (1/count) + return {group: 1 / count for group, count in group_counts.items()} @staticmethod - def _validate_timesteps(timesteps: pd.DatetimeIndex): - """Validate timesteps format and rename if needed.""" + def _validate_timesteps( + timesteps: pd.DatetimeIndex, present_timesteps: Optional[pd.DatetimeIndex] = None + ) -> pd.DatetimeIndex: + """ + Validate timesteps format and rename if needed. + Args: + timesteps: The timesteps to validate + present_timesteps: The timesteps that are present in the dataset + + Raises: + ValueError: If timesteps is not a pandas DatetimeIndex + ValueError: If timesteps is not at least 2 timestamps + ValueError: If timesteps has a different name than 'time' + ValueError: If timesteps is not sorted + ValueError: If timesteps contains duplicates + ValueError: If timesteps is not a subset of present_timesteps + """ if not isinstance(timesteps, pd.DatetimeIndex): raise TypeError('timesteps must be a pandas DatetimeIndex') @@ -747,22 +1254,65 @@ def _validate_timesteps(timesteps: pd.DatetimeIndex): # Ensure timesteps has the required name if timesteps.name != 'time': - logger.warning('Renamed timesteps to "time" (was "%s")', timesteps.name) + logger.debug('Renamed timesteps to "time" (was "%s")', timesteps.name) timesteps.name = 'time' + # Ensure timesteps is sorted + if not timesteps.is_monotonic_increasing: + raise ValueError('timesteps must be sorted') + + # Ensure timesteps has no duplicates + if len(timesteps) != len(timesteps.drop_duplicates()): + raise ValueError('timesteps must not contain duplicates') + + # Ensure timesteps is a subset of present_timesteps + if present_timesteps is not None and not set(timesteps).issubset(set(present_timesteps)): + raise ValueError('timesteps must be a subset of present_timesteps') + + return timesteps + @staticmethod - def _create_timesteps_with_extra( - timesteps: pd.DatetimeIndex, hours_of_last_timestep: Optional[float] - ) -> pd.DatetimeIndex: - """Create timesteps with an extra step at the end.""" - if hours_of_last_timestep is not None: - # Create the extra timestep using the specified duration - last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time') - else: - # Use the last interval as the extra timestep duration - last_date = pd.DatetimeIndex([timesteps[-1] + (timesteps[-1] - timesteps[-2])], name='time') + def _validate_scenarios(scenarios: pd.Index, present_scenarios: Optional[pd.Index] = None) -> Optional[pd.Index]: + """ + Validate scenario format and rename if needed. + Args: + scenarios: The scenarios to validate + present_scenarios: The present_scenarios that are present in the dataset - # Combine with original timesteps + Raises: + ValueError: If timesteps is not a pandas DatetimeIndex + ValueError: If timesteps is not at least 2 timestamps + ValueError: If timesteps has a different name than 'time' + ValueError: If timesteps is not sorted + ValueError: If timesteps contains duplicates + ValueError: If timesteps is not a subset of present_timesteps + """ + if scenarios is None: + return None + + if not isinstance(scenarios, pd.Index): + logger.warning('Converting scenarios to pandas.Index') + scenarios = pd.Index(scenarios, name='scenario') + + if len(scenarios) < 2: + logger.warning('scenarios must contain at least 2 scenarios') + raise ValueError('timesteps must contain at least 2 timestamps') + + # Ensure timesteps has the required name + if scenarios.name != 'scenario': + logger.debug('Renamed scenarios to "scneario" (was "%s")', scenarios.name) + scenarios.name = 'scenario' + + # Ensure timesteps is a subset of present_timesteps + if present_scenarios is not None and not set(scenarios).issubset(set(present_scenarios)): + raise ValueError('scenarios must be a subset of present_scenarios') + + return scenarios + + @staticmethod + def _create_timesteps_with_extra(timesteps: pd.DatetimeIndex, hours_of_last_timestep: float) -> pd.DatetimeIndex: + """Create timesteps with an extra step at the end.""" + last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time') return pd.DatetimeIndex(timesteps.append(last_date), name='time') @staticmethod @@ -778,137 +1328,105 @@ def _calculate_hours_of_previous_timesteps( return first_interval.total_seconds() / 3600 # Convert to hours @staticmethod - def calculate_hours_per_timestep(timesteps_extra: pd.DatetimeIndex) -> xr.DataArray: - """Calculate duration of each timestep.""" - # Calculate differences between consecutive timestamps - hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1) + def _calculate_hours_of_final_timestep( + timesteps: pd.DatetimeIndex, + timesteps_superset: Optional[pd.DatetimeIndex] = None, + hours_of_final_timestep: Optional[float] = None, + ) -> float: + """ + Calculate duration of the final timestep. + If timesteps_subset is provided, the final timestep is calculated for this subset. + The hours_of_final_timestep is only used if the final timestep cant be determined from the timesteps. - return xr.DataArray( - data=hours_per_step, coords={'time': timesteps_extra[:-1]}, dims=('time',), name='hours_per_step' - ) + Args: + timesteps: The full timesteps + timesteps_subset: The subset of timesteps + hours_of_final_timestep: The duration of the final timestep, if already known - def _calculate_group_weights(self) -> Dict[str, float]: - """Calculate weights for aggregation groups.""" - # Count series in each group - groups = [ts.aggregation_group for ts in self.time_series_data.values() if ts.aggregation_group is not None] - group_counts = Counter(groups) + Returns: + The duration of the final timestep in hours - # Calculate weight for each group (1/count) - return {group: 1 / count for group, count in group_counts.items()} + Raises: + ValueError: If the provided timesteps_subset does not end before the timesteps superset + """ + if timesteps_superset is None: + if hours_of_final_timestep is not None: + return hours_of_final_timestep + return (timesteps[-1] - timesteps[-2]) / pd.Timedelta(hours=1) - def _calculate_weights(self) -> Dict[str, float]: - """Calculate weights for all time series.""" - # Calculate weight for each time series - weights = {} - for name, ts in self.time_series_data.items(): - if ts.aggregation_group is not None: - # Use group weight - weights[name] = self.group_weights.get(ts.aggregation_group, 1) - else: - # Use individual weight or default to 1 - weights[name] = ts.aggregation_weight or 1 + final_timestep = timesteps[-1] - return weights + if timesteps_superset[-1] == final_timestep: + if hours_of_final_timestep is not None: + return hours_of_final_timestep + return (timesteps_superset[-1] - timesteps_superset[-2]) / pd.Timedelta(hours=1) - def _format_stats(self, data) -> str: - """Format statistics for a data array.""" - if hasattr(data, 'values'): - values = data.values + elif timesteps_superset[-1] <= final_timestep: + raise ValueError( + f'The provided timesteps ({timesteps}) end after the provided timesteps_superset ({timesteps_superset})' + ) else: - values = np.asarray(data) - - mean_val = np.mean(values) - min_val = np.min(values) - max_val = np.max(values) - - return f'mean: {mean_val:.2f}, min: {min_val:.2f}, max: {max_val:.2f}' - - def __getitem__(self, name: str) -> TimeSeries: - """Get a TimeSeries by name.""" - try: - return self.time_series_data[name] - except KeyError as e: - raise KeyError(f'TimeSeries "{name}" not found in the TimeSeriesCollection') from e - - def __iter__(self) -> Iterator[TimeSeries]: - """Iterate through all TimeSeries in the collection.""" - return iter(self.time_series_data.values()) + # Get the first timestep in the superset that is after the final timestep of the subset + extra_timestep = timesteps_superset[timesteps_superset > final_timestep].min() + return (extra_timestep - final_timestep) / pd.Timedelta(hours=1) - def __len__(self) -> int: - """Get the number of TimeSeries in the collection.""" - return len(self.time_series_data) - - def __contains__(self, item: Union[str, TimeSeries]) -> bool: - """Check if a TimeSeries exists in the collection.""" - if isinstance(item, str): - return item in self.time_series_data - elif isinstance(item, TimeSeries): - return item in self.time_series_data.values() - return False - - @property - def non_constants(self) -> List[TimeSeries]: - """Get time series with varying values.""" - return [ts for ts in self.time_series_data.values() if not ts.all_equal] - - @property - def constants(self) -> List[TimeSeries]: - """Get time series with constant values.""" - return [ts for ts in self.time_series_data.values() if ts.all_equal] - - @property - def timesteps(self) -> pd.DatetimeIndex: - """Get the active timesteps.""" - return self.all_timesteps if self._active_timesteps is None else self._active_timesteps - - @property - def timesteps_extra(self) -> pd.DatetimeIndex: - """Get the active timesteps with extra step.""" - return self.all_timesteps_extra if self._active_timesteps_extra is None else self._active_timesteps_extra - - @property - def hours_per_timestep(self) -> xr.DataArray: - """Get the duration of each active timestep.""" - return ( - self.all_hours_per_timestep if self._active_hours_per_timestep is None else self._active_hours_per_timestep - ) - - @property - def hours_of_last_timestep(self) -> float: - """Get the duration of the last timestep.""" - return float(self.hours_per_timestep[-1].item()) - - def __repr__(self): - return f'TimeSeriesCollection:\n{self.to_dataset()}' + @staticmethod + def calculate_hours_per_timestep( + timesteps_extra: pd.DatetimeIndex, scenarios: Optional[pd.Index] = None + ) -> xr.DataArray: + """Calculate duration of each timestep.""" + # Calculate differences between consecutive timestamps + hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1) - def __str__(self): - longest_name = max([time_series.name for time_series in self.time_series_data], key=len) + return DataConverter.as_dataarray( + hours_per_step, + timesteps=timesteps_extra[:-1], + scenarios=scenarios, + ).rename('hours_per_step') - stats_summary = '\n'.join( - [ - f' - {time_series.name:<{len(longest_name)}}: {get_numeric_stats(time_series.active_data)}' - for time_series in self.time_series_data - ] - ) - return ( - f'TimeSeriesCollection with {len(self.time_series_data)} series\n' - f' Time Range: {self.timesteps[0]} → {self.timesteps[-1]}\n' - f' No. of timesteps: {len(self.timesteps)} + 1 extra\n' - f' Hours per timestep: {get_numeric_stats(self.hours_per_timestep)}\n' - f' Time Series Data:\n' - f'{stats_summary}' - ) +def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10, by_scenario: bool = False) -> str: + """ + Calculates the mean, median, min, max, and standard deviation of a numeric DataArray. + Args: + data: The DataArray to analyze + decimals: Number of decimal places to show + padd: Padding for alignment + by_scenario: Whether to break down stats by scenario -def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10) -> str: - """Calculates the mean, median, min, max, and standard deviation of a numeric DataArray.""" + Returns: + String representation of data statistics + """ format_spec = f'>{padd}.{decimals}f' if padd else f'.{decimals}f' + + # If by_scenario is True and there's a scenario dimension with multiple values + if by_scenario and 'scenario' in data.dims and data.sizes['scenario'] > 1: + results = [] + for scenario in data.coords['scenario'].values: + scenario_data = data.sel(scenario=scenario) + if np.unique(scenario_data).size == 1: + results.append(f' {scenario}: {scenario_data.max().item():{format_spec}} (constant)') + else: + mean = scenario_data.mean().item() + median = scenario_data.median().item() + min_val = scenario_data.min().item() + max_val = scenario_data.max().item() + std = scenario_data.std().item() + results.append( + f' {scenario}: {mean:{format_spec}} (mean), {median:{format_spec}} (median), ' + f'{min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)' + ) + return '\n'.join(['By scenario:'] + results) + + # Standard logic for non-scenario data or aggregated stats if np.unique(data).size == 1: return f'{data.max().item():{format_spec}} (constant)' + mean = data.mean().item() median = data.median().item() min_val = data.min().item() max_val = data.max().item() std = data.std().item() + return f'{mean:{format_spec}} (mean), {median:{format_spec}} (median), {min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)' diff --git a/flixopt/effects.py b/flixopt/effects.py index 82aa63a43..0cf165d66 100644 --- a/flixopt/effects.py +++ b/flixopt/effects.py @@ -11,9 +11,8 @@ import linopy import numpy as np -import pandas as pd -from .core import NumericData, NumericDataTS, Scalar, TimeSeries, TimeSeriesCollection +from .core import NumericDataTS, ScenarioData, TimeSeries, TimeSeriesCollection, TimestepData from .features import ShareAllocationModel from .structure import Element, ElementModel, Interface, Model, SystemModel, register_class_for_io @@ -38,16 +37,16 @@ def __init__( meta_data: Optional[Dict] = None, is_standard: bool = False, is_objective: bool = False, - specific_share_to_other_effects_operation: Optional['EffectValuesUser'] = None, - specific_share_to_other_effects_invest: Optional['EffectValuesUser'] = None, - minimum_operation: Optional[Scalar] = None, - maximum_operation: Optional[Scalar] = None, - minimum_invest: Optional[Scalar] = None, - maximum_invest: Optional[Scalar] = None, + specific_share_to_other_effects_operation: Optional['EffectValuesUserTimestep'] = None, + specific_share_to_other_effects_invest: Optional['EffectValuesUserScenario'] = None, + minimum_operation: Optional[ScenarioData] = None, + maximum_operation: Optional[ScenarioData] = None, + minimum_invest: Optional[ScenarioData] = None, + maximum_invest: Optional[ScenarioData] = None, minimum_operation_per_hour: Optional[NumericDataTS] = None, maximum_operation_per_hour: Optional[NumericDataTS] = None, - minimum_total: Optional[Scalar] = None, - maximum_total: Optional[Scalar] = None, + minimum_total: Optional[ScenarioData] = None, + maximum_total: Optional[ScenarioData] = None, ): """ Args: @@ -76,10 +75,12 @@ def __init__( self.description = description self.is_standard = is_standard self.is_objective = is_objective - self.specific_share_to_other_effects_operation: EffectValuesUser = ( + self.specific_share_to_other_effects_operation: EffectValuesUserTimestep = ( specific_share_to_other_effects_operation or {} ) - self.specific_share_to_other_effects_invest: EffectValuesUser = specific_share_to_other_effects_invest or {} + self.specific_share_to_other_effects_invest: EffectValuesUserTimestep = ( + specific_share_to_other_effects_invest or {} + ) self.minimum_operation = minimum_operation self.maximum_operation = maximum_operation self.minimum_operation_per_hour: NumericDataTS = minimum_operation_per_hour @@ -118,10 +119,11 @@ def __init__(self, model: SystemModel, element: Effect): self.total: Optional[linopy.Variable] = None self.invest: ShareAllocationModel = self.add( ShareAllocationModel( - self._model, - False, - self.label_of_element, - 'invest', + model=self._model, + has_time_dim=False, + has_scenario_dim=True, + label_of_element=self.label_of_element, + label='invest', label_full=f'{self.label_full}(invest)', total_max=self.element.maximum_invest, total_min=self.element.minimum_invest, @@ -130,17 +132,18 @@ def __init__(self, model: SystemModel, element: Effect): self.operation: ShareAllocationModel = self.add( ShareAllocationModel( - self._model, - True, - self.label_of_element, - 'operation', + model=self._model, + has_time_dim=True, + has_scenario_dim=True, + label_of_element=self.label_of_element, + label='operation', label_full=f'{self.label_full}(operation)', total_max=self.element.maximum_operation, total_min=self.element.minimum_operation, - min_per_hour=self.element.minimum_operation_per_hour.active_data + min_per_hour=self.element.minimum_operation_per_hour.selected_data if self.element.minimum_operation_per_hour is not None else None, - max_per_hour=self.element.maximum_operation_per_hour.active_data + max_per_hour=self.element.maximum_operation_per_hour.selected_data if self.element.maximum_operation_per_hour is not None else None, ) @@ -154,7 +157,7 @@ def do_modeling(self): self._model.add_variables( lower=self.element.minimum_total if self.element.minimum_total is not None else -np.inf, upper=self.element.maximum_total if self.element.maximum_total is not None else np.inf, - coords=None, + coords=self._model.get_coords(time_dim=False), name=f'{self.label_full}|total', ), 'total', @@ -162,7 +165,7 @@ def do_modeling(self): self.add( self._model.add_constraints( - self.total == self.operation.total.sum() + self.invest.total.sum(), name=f'{self.label_full}|total' + self.total == self.operation.total + self.invest.total, name=f'{self.label_full}|total' ), 'total', ) @@ -171,11 +174,12 @@ def do_modeling(self): EffectValuesExpr = Dict[str, linopy.LinearExpression] # Used to create Shares EffectTimeSeries = Dict[str, TimeSeries] # Used internally to index values EffectValuesDict = Dict[str, NumericDataTS] # How effect values are stored -EffectValuesUser = Union[NumericDataTS, Dict[str, NumericDataTS]] # User-specified Shares to Effects -""" This datatype is used to define the share to an effect by a certain attribute. """ -EffectValuesUserScalar = Union[Scalar, Dict[str, Scalar]] # User-specified Shares to Effects -""" This datatype is used to define the share to an effect by a certain attribute. Only scalars are allowed. """ +EffectValuesUserScenario = Union[ScenarioData, Dict[str, ScenarioData]] +""" This datatype is used to define the share to an effect for every scenario. """ + +EffectValuesUserTimestep = Union[TimestepData, Dict[str, TimestepData]] +""" This datatype is used to define the share to an effect for every timestep. """ class EffectCollection: @@ -207,7 +211,9 @@ def add_effects(self, *effects: Effect) -> None: self._effects[effect.label] = effect logger.info(f'Registered new Effect: {effect.label}') - def create_effect_values_dict(self, effect_values_user: EffectValuesUser) -> Optional[EffectValuesDict]: + def create_effect_values_dict( + self, effect_values_user: Union[EffectValuesUserScenario, EffectValuesUserTimestep] + ) -> Optional[EffectValuesDict]: """ Converts effect values into a dictionary. If a scalar is provided, it is associated with a default effect type. @@ -346,29 +352,42 @@ def add_share_to_effects( ) -> None: for effect, expression in expressions.items(): if target == 'operation': - self.effects[effect].model.operation.add_share(name, expression) + self.effects[effect].model.operation.add_share( + name, + expression, + has_time_dim=True, + has_scenario_dim=True, + ) elif target == 'invest': - self.effects[effect].model.invest.add_share(name, expression) + self.effects[effect].model.invest.add_share( + name, + expression, + has_time_dim=False, + has_scenario_dim=True, + ) else: raise ValueError(f'Target {target} not supported!') def add_share_to_penalty(self, name: str, expression: linopy.LinearExpression) -> None: if expression.ndim != 0: raise TypeError(f'Penalty shares must be scalar expressions! ({expression.ndim=})') - self.penalty.add_share(name, expression) + self.penalty.add_share(name, expression, has_time_dim=False, has_scenario_dim=False) def do_modeling(self): for effect in self.effects: effect.create_model(self._model) self.penalty = self.add( - ShareAllocationModel(self._model, shares_are_time_series=False, label_of_element='Penalty') + ShareAllocationModel(self._model, has_time_dim=False, has_scenario_dim=False, label_of_element='Penalty') ) for model in [effect.model for effect in self.effects] + [self.penalty]: model.do_modeling() self._add_share_between_effects() - self._model.add_objective(self.effects.objective_effect.model.total + self.penalty.total) + self._model.add_objective( + (self.effects.objective_effect.model.total * self._model.scenario_weights).sum() + + self.penalty.total.sum() + ) def _add_share_between_effects(self): for origin_effect in self.effects: @@ -376,11 +395,15 @@ def _add_share_between_effects(self): for target_effect, time_series in origin_effect.specific_share_to_other_effects_operation.items(): self.effects[target_effect].model.operation.add_share( origin_effect.model.operation.label_full, - origin_effect.model.operation.total_per_timestep * time_series.active_data, + origin_effect.model.operation.total_per_timestep * time_series.selected_data, + has_time_dim=True, + has_scenario_dim=True, ) # 2. invest: -> hier ist es Scalar (share) for target_effect, factor in origin_effect.specific_share_to_other_effects_invest.items(): self.effects[target_effect].model.invest.add_share( origin_effect.model.invest.label_full, origin_effect.model.invest.total * factor, + has_time_dim=False, + has_scenario_dim=True, ) diff --git a/flixopt/elements.py b/flixopt/elements.py index 05898d4e5..aa1c8e69b 100644 --- a/flixopt/elements.py +++ b/flixopt/elements.py @@ -10,8 +10,8 @@ import numpy as np from .config import CONFIG -from .core import NumericData, NumericDataTS, PlausibilityError, Scalar, TimeSeriesCollection -from .effects import EffectValuesUser +from .core import NumericDataTS, PlausibilityError, Scalar, ScenarioData, TimestepData +from .effects import EffectValuesUserTimestep from .features import InvestmentModel, OnOffModel, PreventSimultaneousUsageModel from .interface import InvestParameters, OnOffParameters from .structure import Element, ElementModel, SystemModel, register_class_for_io @@ -148,16 +148,16 @@ def __init__( label: str, bus: str, size: Union[Scalar, InvestParameters] = None, - fixed_relative_profile: Optional[NumericDataTS] = None, - relative_minimum: NumericDataTS = 0, - relative_maximum: NumericDataTS = 1, - effects_per_flow_hour: Optional[EffectValuesUser] = None, + fixed_relative_profile: Optional[TimestepData] = None, + relative_minimum: TimestepData = 0, + relative_maximum: TimestepData = 1, + effects_per_flow_hour: Optional[EffectValuesUserTimestep] = None, on_off_parameters: Optional[OnOffParameters] = None, - flow_hours_total_max: Optional[Scalar] = None, - flow_hours_total_min: Optional[Scalar] = None, - load_factor_min: Optional[Scalar] = None, - load_factor_max: Optional[Scalar] = None, - previous_flow_rate: Optional[NumericData] = None, + flow_hours_total_max: Optional[ScenarioData] = None, + flow_hours_total_min: Optional[ScenarioData] = None, + load_factor_min: Optional[ScenarioData] = None, + load_factor_max: Optional[ScenarioData] = None, + previous_flow_rate: Optional[ScenarioData] = None, meta_data: Optional[Dict] = None, ): r""" @@ -240,10 +240,23 @@ def transform_data(self, flow_system: 'FlowSystem'): self.effects_per_flow_hour = flow_system.create_effect_time_series( self.label_full, self.effects_per_flow_hour, 'per_flow_hour' ) + self.flow_hours_total_max = flow_system.create_time_series( + f'{self.label_full}|flow_hours_total_max', self.flow_hours_total_max, has_time_dim=False + ) + self.flow_hours_total_min = flow_system.create_time_series( + f'{self.label_full}|flow_hours_total_min', self.flow_hours_total_min, has_time_dim=False + ) + self.load_factor_max = flow_system.create_time_series( + f'{self.label_full}|load_factor_max', self.load_factor_max, has_time_dim=False + ) + self.load_factor_min = flow_system.create_time_series( + f'{self.label_full}|load_factor_min', self.load_factor_min, has_time_dim=False + ) + if self.on_off_parameters is not None: self.on_off_parameters.transform_data(flow_system, self.label_full) if isinstance(self.size, InvestParameters): - self.size.transform_data(flow_system) + self.size.transform_data(flow_system, self.label_full) def infos(self, use_numpy: bool = True, use_element_label: bool = False) -> Dict: infos = super().infos(use_numpy, use_element_label) @@ -308,7 +321,7 @@ def do_modeling(self): self._model.add_variables( lower=self.absolute_flow_rate_bounds[0] if self.element.on_off_parameters is None else 0, upper=self.absolute_flow_rate_bounds[1], - coords=self._model.coords, + coords=self._model.get_coords(), name=f'{self.label_full}|flow_rate', ), 'flow_rate', @@ -348,7 +361,7 @@ def do_modeling(self): self._model.add_variables( lower=self.element.flow_hours_total_min if self.element.flow_hours_total_min is not None else -np.inf, upper=self.element.flow_hours_total_max if self.element.flow_hours_total_max is not None else np.inf, - coords=None, + coords=self._model.get_coords(time_dim=False), name=f'{self.label_full}|total_flow_hours', ), 'total_flow_hours', @@ -356,7 +369,7 @@ def do_modeling(self): self.add( self._model.add_constraints( - self.total_flow_hours == (self.flow_rate * self._model.hours_per_step).sum(), + self.total_flow_hours == (self.flow_rate * self._model.hours_per_step).sum('time'), name=f'{self.label_full}|total_flow_hours', ), 'total_flow_hours', @@ -374,7 +387,7 @@ def _create_shares(self): self._model.effects.add_share_to_effects( name=self.label_full, # Use the full label of the element expressions={ - effect: self.flow_rate * self._model.hours_per_step * factor.active_data + effect: self.flow_rate * self._model.hours_per_step * factor.selected_data for effect, factor in self.element.effects_per_flow_hour.items() }, target='operation', @@ -386,7 +399,7 @@ def _create_bounds_for_load_factor(self): # eq: var_sumFlowHours <= size * dt_tot * load_factor_max if self.element.load_factor_max is not None: name_short = 'load_factor_max' - flow_hours_per_size_max = self._model.hours_per_step.sum() * self.element.load_factor_max + flow_hours_per_size_max = self._model.hours_per_step.sum('time') * self.element.load_factor_max size = self.element.size if self._investment is None else self._investment.size if self._investment is not None: @@ -401,7 +414,7 @@ def _create_bounds_for_load_factor(self): # eq: size * sum(dt)* load_factor_min <= var_sumFlowHours if self.element.load_factor_min is not None: name_short = 'load_factor_min' - flow_hours_per_size_min = self._model.hours_per_step.sum() * self.element.load_factor_min + flow_hours_per_size_min = self._model.hours_per_step.sum('time') * self.element.load_factor_min size = self.element.size if self._investment is None else self._investment.size if self._investment is not None: @@ -414,23 +427,23 @@ def _create_bounds_for_load_factor(self): ) @property - def absolute_flow_rate_bounds(self) -> Tuple[NumericData, NumericData]: + def absolute_flow_rate_bounds(self) -> Tuple[TimestepData, TimestepData]: """Returns absolute flow rate bounds. Important for OnOffModel""" relative_minimum, relative_maximum = self.relative_flow_rate_bounds size = self.element.size if not isinstance(size, InvestParameters): return relative_minimum * size, relative_maximum * size if size.fixed_size is not None: - return relative_minimum * size.fixed_size, relative_maximum * size.fixed_size - return relative_minimum * size.minimum_size, relative_maximum * size.maximum_size + return size.fixed_size * relative_minimum, size.fixed_size * relative_maximum + return size.minimum_size * relative_minimum, size.maximum_size * relative_maximum @property - def relative_flow_rate_bounds(self) -> Tuple[NumericData, NumericData]: + def relative_flow_rate_bounds(self) -> Tuple[TimestepData, TimestepData]: """Returns relative flow rate bounds.""" fixed_profile = self.element.fixed_relative_profile if fixed_profile is None: - return self.element.relative_minimum.active_data, self.element.relative_maximum.active_data - return fixed_profile.active_data, fixed_profile.active_data + return self.element.relative_minimum.selected_data, self.element.relative_maximum.selected_data + return fixed_profile.selected_data, fixed_profile.selected_data class BusModel(ElementModel): @@ -451,14 +464,18 @@ def do_modeling(self) -> None: # Fehlerplus/-minus: if self.element.with_excess: excess_penalty = np.multiply( - self._model.hours_per_step, self.element.excess_penalty_per_flow_hour.active_data + self._model.hours_per_step, self.element.excess_penalty_per_flow_hour.selected_data ) self.excess_input = self.add( - self._model.add_variables(lower=0, coords=self._model.coords, name=f'{self.label_full}|excess_input'), + self._model.add_variables( + lower=0, coords=self._model.get_coords(), name=f'{self.label_full}|excess_input' + ), 'excess_input', ) self.excess_output = self.add( - self._model.add_variables(lower=0, coords=self._model.coords, name=f'{self.label_full}|excess_output'), + self._model.add_variables( + lower=0, coords=self._model.get_coords(), name=f'{self.label_full}|excess_output' + ), 'excess_output', ) eq_bus_balance.lhs -= -self.excess_input + self.excess_output diff --git a/flixopt/features.py b/flixopt/features.py index 92caf9dc2..425f9382a 100644 --- a/flixopt/features.py +++ b/flixopt/features.py @@ -9,9 +9,8 @@ import linopy import numpy as np -from . import utils from .config import CONFIG -from .core import NumericData, Scalar, TimeSeries +from .core import Scalar, TimeSeries, TimestepData from .interface import InvestParameters, OnOffParameters, Piece, Piecewise, PiecewiseConversion, PiecewiseEffects from .structure import Model, SystemModel @@ -27,13 +26,14 @@ def __init__( label_of_element: str, parameters: InvestParameters, defining_variable: [linopy.Variable], - relative_bounds_of_defining_variable: Tuple[NumericData, NumericData], + relative_bounds_of_defining_variable: Tuple[TimestepData, TimestepData], label: Optional[str] = None, on_variable: Optional[linopy.Variable] = None, ): super().__init__(model, label_of_element, label) self.size: Optional[Union[Scalar, linopy.Variable]] = None self.is_invested: Optional[linopy.Variable] = None + self.scenario_of_investment: Optional[linopy.Variable] = None self.piecewise_effects: Optional[PiecewiseEffectsModel] = None @@ -43,31 +43,32 @@ def __init__( self.parameters = parameters def do_modeling(self): - if self.parameters.fixed_size and not self.parameters.optional: - self.size = self.add( - self._model.add_variables( - lower=self.parameters.fixed_size, upper=self.parameters.fixed_size, name=f'{self.label_full}|size' - ), - 'size', - ) - else: - self.size = self.add( - self._model.add_variables( - lower=0 if self.parameters.optional else self.parameters.minimum_size, - upper=self.parameters.maximum_size, - name=f'{self.label_full}|size', - ), - 'size', - ) + self.size = self.add( + self._model.add_variables( + lower=0 if self.parameters.optional else self.parameters.minimum_size*1, + upper=self.parameters.maximum_size*1, + name=f'{self.label_full}|size', + coords=self._model.get_coords(time_dim=False), + ), + 'size', + ) # Optional if self.parameters.optional: self.is_invested = self.add( - self._model.add_variables(binary=True, name=f'{self.label_full}|is_invested'), 'is_invested' + self._model.add_variables( + binary=True, + name=f'{self.label_full}|is_invested', + coords=self._model.get_coords(time_dim=False), + ), + 'is_invested', ) self._create_bounds_for_optional_investment() + if self._model.time_series_collection.scenarios is not None: + self._create_bounds_for_scenarios() + # Bounds for defining variable self._create_bounds_for_defining_variable() @@ -182,7 +183,7 @@ def _create_bounds_for_defining_variable(self): # ... mit mega = relative_maximum * maximum_size # äquivalent zu:. # eq: - defining_variable(t) + mega * On(t) + size * relative_minimum(t) <= + mega - mega = lb_relative * self.parameters.maximum_size + mega = self.parameters.maximum_size * lb_relative on = self._on_variable self.add( self._model.add_constraints( @@ -192,6 +193,73 @@ def _create_bounds_for_defining_variable(self): ) # anmerkung: Glg bei Spezialfall relative_minimum = 0 redundant zu OnOff ?? + def _create_bounds_for_scenarios(self): + if self.parameters.size_per_scenario == 'equal': + self.add( + self._model.add_constraints( + self.size.isel(scenario=slice(None, -1)) == self.size.isel(scenario=slice(1, None)), + name=f'{self.label_full}|equalize_size_per_scenario', + ), + 'equalize_size_per_scenario', + ) + elif self.parameters.size_per_scenario == 'increment_once': + if not self.parameters.optional: + raise ValueError('Increment once can only be used if the Investment is optional') + + self.scenario_of_investment = self.add( + self._model.add_variables( + binary=True, + name=f'{self.label_full}|scenario_of_investment', + coords=self._model.get_coords(time_dim=False), + ), + 'scenario_of_investment', + ) + + # eq: scenario_of_investment(t) = is_invested(t) - is_invested(t-1) + self.add( + self._model.add_constraints( + self.scenario_of_investment.isel(scenario=slice(1, None)) + == self.is_invested.isel(scenario=slice(1, None)) - self.is_invested.isel(scenario=slice(None, -1)), + name=f'{self.label_full}|scenario_of_investment', + ), + 'scenario_of_investment', + ) + + # eq: scenario_of_investment(t=0) = is_invested(t=0) + self.add( + self._model.add_constraints( + self.scenario_of_investment.isel(scenario=0) + == self.is_invested.isel(scenario=0), + name=f'{self.label_full}|initial_scenario_of_investment', + ), + 'initial_scenario_of_investment', + ) + + big_m = self.parameters.maximum_size.isel(scenario=slice(1, None)) + + self.add( + self._model.add_constraints( + self.size.isel(scenario=slice(1, None)) - self.size.isel(scenario=slice(None, -1)) + <= self.scenario_of_investment.isel(scenario=slice(1, None)) * big_m, + name=f'{self.label_full}|invest_once_1a', + ), + 'invest_once_1a', + ) + + self.add( + self._model.add_constraints( + self.size.isel(scenario=slice(1, None)) - self.size.isel(scenario=slice(None, -1)) + >= self.scenario_of_investment.isel(scenario=slice(1, None)) * big_m, + name=f'{self.label_full}|invest_once_1b', + ), + 'invest_once_1b', + ) + + elif self.parameters.size_per_scenario == 'individual': + pass + else: + raise ValueError(f'Invalid value for size_per_scenario: {self.parameters.size_per_scenario}') + class OnOffModel(Model): """ @@ -205,8 +273,8 @@ def __init__( on_off_parameters: OnOffParameters, label_of_element: str, defining_variables: List[linopy.Variable], - defining_bounds: List[Tuple[NumericData, NumericData]], - previous_values: List[Optional[NumericData]], + defining_bounds: List[Tuple[TimestepData, TimestepData]], + previous_values: List[Optional[TimestepData]], label: Optional[str] = None, ): """ @@ -246,7 +314,7 @@ def do_modeling(self): self._model.add_variables( name=f'{self.label_full}|on', binary=True, - coords=self._model.coords, + coords=self._model.get_coords(), ), 'on', ) @@ -255,6 +323,7 @@ def do_modeling(self): self._model.add_variables( lower=self.parameters.on_hours_total_min if self.parameters.on_hours_total_min is not None else 0, upper=self.parameters.on_hours_total_max if self.parameters.on_hours_total_max is not None else np.inf, + coords=self._model.get_coords(time_dim=False), name=f'{self.label_full}|on_hours_total', ), 'on_hours_total', @@ -262,7 +331,7 @@ def do_modeling(self): self.add( self._model.add_constraints( - self.total_on_hours == (self.on * self._model.hours_per_step).sum(), + self.total_on_hours == (self.on * self._model.hours_per_step).sum('time'), name=f'{self.label_full}|on_hours_total', ), 'on_hours_total', @@ -275,7 +344,7 @@ def do_modeling(self): self._model.add_variables( name=f'{self.label_full}|off', binary=True, - coords=self._model.coords, + coords=self._model.get_coords(), ), 'off', ) @@ -303,12 +372,16 @@ def do_modeling(self): if self.parameters.use_switch_on: self.switch_on = self.add( - self._model.add_variables(binary=True, name=f'{self.label_full}|switch_on', coords=self._model.coords), + self._model.add_variables( + binary=True, name=f'{self.label_full}|switch_on', coords=self._model.get_coords() + ), 'switch_on', ) self.switch_off = self.add( - self._model.add_variables(binary=True, name=f'{self.label_full}|switch_off', coords=self._model.coords), + self._model.add_variables( + binary=True, name=f'{self.label_full}|switch_off', coords=self._model.get_coords() + ), 'switch_off', ) @@ -434,14 +507,14 @@ def _get_duration_in_hours( """ assert binary_variable is not None, f'Duration Variable of {self.label_full} must be defined to add constraints' - mega = self._model.hours_per_step.sum() + previous_duration + mega = self._model.hours_per_step.sum('time') + previous_duration if maximum_duration is not None: first_step_max: Scalar = maximum_duration.isel(time=0) if previous_duration + self._model.hours_per_step[0] > first_step_max: logger.warning( - f'The maximum duration of "{variable_name}" is set to {maximum_duration.active_data}h, ' + f'The maximum duration of "{variable_name}" is set to {maximum_duration.selected_data}h, ' f'but the consecutive_duration previous to this model is {previous_duration}h. ' f'This forces "{binary_variable.name} = 0" in the first time step ' f'(dt={self._model.hours_per_step[0]}h)!' @@ -450,8 +523,8 @@ def _get_duration_in_hours( duration_in_hours = self.add( self._model.add_variables( lower=0, - upper=maximum_duration.active_data if maximum_duration is not None else mega, - coords=self._model.coords, + upper=maximum_duration.selected_data if maximum_duration is not None else mega, + coords=self._model.get_coords(), name=f'{self.label_full}|{variable_name}', ), variable_name, @@ -579,7 +652,7 @@ def _add_switch_constraints(self): # eq: nrSwitchOn = sum(SwitchOn(t)) self.add( self._model.add_constraints( - self.switch_on_nr == self.switch_on.sum(), name=f'{self.label_full}|switch_on_nr' + self.switch_on_nr == self.switch_on.sum('time'), name=f'{self.label_full}|switch_on_nr' ), 'switch_on_nr', ) @@ -623,7 +696,7 @@ def previous_consecutive_off_hours(self) -> Scalar: return self.compute_consecutive_duration(self.previous_off_values, self._model.hours_per_step) @staticmethod - def compute_previous_on_states(previous_values: List[Optional[NumericData]], epsilon: float = 1e-5) -> np.ndarray: + def compute_previous_on_states(previous_values: List[Optional[TimestepData]], epsilon: float = 1e-5) -> np.ndarray: """ Computes the previous 'on' states {0, 1} of defining variables as a binary array from their previous values. @@ -647,7 +720,7 @@ def compute_previous_on_states(previous_values: List[Optional[NumericData]], eps @staticmethod def compute_consecutive_duration( - binary_values: NumericData, hours_per_timestep: Union[int, float, np.ndarray] + binary_values: TimestepData, hours_per_timestep: Union[int, float, np.ndarray] ) -> Scalar: """ Computes the final consecutive duration in State 'on' (=1) in hours, from a binary. @@ -716,7 +789,7 @@ def do_modeling(self): self._model.add_variables( binary=True, name=f'{self.label_full}|inside_piece', - coords=self._model.coords if self._as_time_series else None, + coords=self._model.get_coords(time_dim=self._as_time_series), ), 'inside_piece', ) @@ -726,7 +799,7 @@ def do_modeling(self): lower=0, upper=1, name=f'{self.label_full}|lambda0', - coords=self._model.coords if self._as_time_series else None, + coords=self._model.get_coords(time_dim=self._as_time_series), ), 'lambda0', ) @@ -736,7 +809,7 @@ def do_modeling(self): lower=0, upper=1, name=f'{self.label_full}|lambda1', - coords=self._model.coords if self._as_time_series else None, + coords=self._model.get_coords(time_dim=self._as_time_series), ), 'lambda1', ) @@ -820,7 +893,7 @@ def do_modeling(self): elif self._zero_point is True: self.zero_point = self.add( self._model.add_variables( - coords=self._model.coords, binary=True, name=f'{self.label_full}|zero_point' + coords=self._model.get_coords(), binary=True, name=f'{self.label_full}|zero_point' ), 'zero_point', ) @@ -841,19 +914,20 @@ class ShareAllocationModel(Model): def __init__( self, model: SystemModel, - shares_are_time_series: bool, + has_time_dim: bool, + has_scenario_dim: bool, label_of_element: Optional[str] = None, label: Optional[str] = None, label_full: Optional[str] = None, total_max: Optional[Scalar] = None, total_min: Optional[Scalar] = None, - max_per_hour: Optional[NumericData] = None, - min_per_hour: Optional[NumericData] = None, + max_per_hour: Optional[TimestepData] = None, + min_per_hour: Optional[TimestepData] = None, ): super().__init__(model, label_of_element=label_of_element, label=label, label_full=label_full) - if not shares_are_time_series: # If the condition is True + if not has_time_dim: # If the condition is True assert max_per_hour is None and min_per_hour is None, ( - 'Both max_per_hour and min_per_hour cannot be used when shares_are_time_series is False' + 'Both max_per_hour and min_per_hour cannot be used when has_time_dim is False' ) self.total_per_timestep: Optional[linopy.Variable] = None self.total: Optional[linopy.Variable] = None @@ -864,7 +938,8 @@ def __init__( self._eq_total: Optional[linopy.Constraint] = None # Parameters - self._shares_are_time_series = shares_are_time_series + self._has_time_dim = has_time_dim + self._has_scenario_dim = has_scenario_dim self._total_max = total_max if total_min is not None else np.inf self._total_min = total_min if total_min is not None else -np.inf self._max_per_hour = max_per_hour if max_per_hour is not None else np.inf @@ -873,7 +948,10 @@ def __init__( def do_modeling(self): self.total = self.add( self._model.add_variables( - lower=self._total_min, upper=self._total_max, coords=None, name=f'{self.label_full}|total' + lower=self._total_min, + upper=self._total_max, + coords=self._model.get_coords(time_dim=False, scenario_dim=self._has_scenario_dim), + name=f'{self.label_full}|total', ), 'total', ) @@ -882,16 +960,12 @@ def do_modeling(self): self._model.add_constraints(self.total == 0, name=f'{self.label_full}|total'), 'total' ) - if self._shares_are_time_series: + if self._has_time_dim: self.total_per_timestep = self.add( self._model.add_variables( - lower=-np.inf - if (self._min_per_hour is None) - else np.multiply(self._min_per_hour, self._model.hours_per_step), - upper=np.inf - if (self._max_per_hour is None) - else np.multiply(self._max_per_hour, self._model.hours_per_step), - coords=self._model.coords, + lower=-np.inf if (self._min_per_hour is None) else self._min_per_hour * self._model.hours_per_step, + upper=np.inf if (self._max_per_hour is None) else self._max_per_hour * self._model.hours_per_step, + coords=self._model.get_coords(time_dim=True, scenario_dim=self._has_scenario_dim), name=f'{self.label_full}|total_per_timestep', ), 'total_per_timestep', @@ -903,12 +977,14 @@ def do_modeling(self): ) # Add it to the total - self._eq_total.lhs -= self.total_per_timestep.sum() + self._eq_total.lhs -= self.total_per_timestep.sum(dim='time') def add_share( self, name: str, expression: linopy.LinearExpression, + has_time_dim: bool, + has_scenario_dim: bool, ): """ Add a share to the share allocation model. If the share already exists, the expression is added to the existing share. @@ -920,16 +996,17 @@ def add_share( name: The name of the share. expression: The expression of the share. Added to the right hand side of the constraint. """ + if has_time_dim and not self._has_time_dim: + raise ValueError('Cannot add share with time_dim=True to a model without time_dim') + if has_scenario_dim and not self._has_scenario_dim: + raise ValueError('Cannot add share with scenario_dim=True to a model without scenario_dim') + if name in self.shares: self.share_constraints[name].lhs -= expression else: self.shares[name] = self.add( self._model.add_variables( - coords=None - if isinstance(expression, linopy.LinearExpression) - and expression.ndim == 0 - or not isinstance(expression, linopy.LinearExpression) - else self._model.coords, + coords=self._model.get_coords(time_dim=has_time_dim, scenario_dim=has_scenario_dim), name=f'{name}->{self.label_full}', ), name, @@ -937,7 +1014,7 @@ def add_share( self.share_constraints[name] = self.add( self._model.add_constraints(self.shares[name] == expression, name=f'{name}->{self.label_full}'), name ) - if self.shares[name].ndim == 0: + if not has_time_dim: self._eq_total.lhs -= self.shares[name] else: self._eq_total_per_timestep.lhs -= self.shares[name] @@ -966,7 +1043,12 @@ def __init__( def do_modeling(self): self.shares = { - effect: self.add(self._model.add_variables(coords=None, name=f'{self.label_full}|{effect}'), f'{effect}') + effect: self.add( + self._model.add_variables( + coords=self._model.get_coords(time_dim=False), name=f'{self.label_full}|{effect}' + ), + f'{effect}', + ) for effect in self._piecewise_shares } diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index 93720de60..d62f018bf 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -16,10 +16,17 @@ from rich.pretty import Pretty from . import io as fx_io -from .core import NumericData, NumericDataTS, TimeSeries, TimeSeriesCollection, TimeSeriesData -from .effects import Effect, EffectCollection, EffectTimeSeries, EffectValuesDict, EffectValuesUser +from .core import Scalar, ScenarioData, TimeSeries, TimeSeriesCollection, TimeSeriesData, TimestepData +from .effects import ( + Effect, + EffectCollection, + EffectTimeSeries, + EffectValuesDict, + EffectValuesUserScenario, + EffectValuesUserTimestep, +) from .elements import Bus, Component, Flow -from .structure import CLASS_REGISTRY, Element, SystemModel, get_compact_representation, get_str_representation +from .structure import CLASS_REGISTRY, Element, SystemModel if TYPE_CHECKING: import pyvis @@ -35,23 +42,29 @@ class FlowSystem: def __init__( self, timesteps: pd.DatetimeIndex, + scenarios: Optional[pd.Index] = None, hours_of_last_timestep: Optional[float] = None, hours_of_previous_timesteps: Optional[Union[int, float, np.ndarray]] = None, + scenario_weights: Optional[ScenarioData] = None, ): """ Args: timesteps: The timesteps of the model. + scenarios: The scenarios of the model. hours_of_last_timestep: The duration of the last time step. Uses the last time interval if not specified hours_of_previous_timesteps: The duration of previous timesteps. If None, the first time increment of time_series is used. This is needed to calculate previous durations (for example consecutive_on_hours). If you use an array, take care that its long enough to cover all previous values! + scenario_weights: The weights of the scenarios. If None, all scenarios have the same weight. All weights are normalized to 1. """ self.time_series_collection = TimeSeriesCollection( timesteps=timesteps, + scenarios=scenarios, hours_of_last_timestep=hours_of_last_timestep, hours_of_previous_timesteps=hours_of_previous_timesteps, ) + self.scenario_weights = scenario_weights # defaults: self.components: Dict[str, Component] = {} @@ -184,7 +197,7 @@ def as_dataset(self, constants_in_dataset: bool = False) -> xr.Dataset: Args: constants_in_dataset: If True, constants are included as Dataset variables. """ - ds = self.time_series_collection.to_dataset(include_constants=constants_in_dataset) + ds = self.time_series_collection.as_dataset() ds.attrs = self.as_dict(data_mode='name') return ds @@ -268,41 +281,80 @@ def network_infos(self) -> Tuple[Dict[str, Dict[str, str]], Dict[str, Dict[str, def transform_data(self): if not self._connected: self._connect_network() + self.scenario_weights = self.create_time_series( + 'scenario_weights', self.scenario_weights, has_time_dim=False, has_scenario_dim=True + ) for element in self.all_elements.values(): element.transform_data(self) def create_time_series( self, name: str, - data: Optional[Union[NumericData, TimeSeriesData, TimeSeries]], - needs_extra_timestep: bool = False, - ) -> Optional[TimeSeries]: + data: Optional[Union[TimestepData, TimeSeriesData, TimeSeries]], + has_time_dim: bool = True, + has_scenario_dim: bool = True, + has_extra_timestep: bool = False, + ) -> Optional[Union[Scalar, TimeSeries]]: """ Tries to create a TimeSeries from NumericData Data and adds it to the time_series_collection If the data already is a TimeSeries, nothing happens and the TimeSeries gets reset and returned If the data is a TimeSeriesData, it is converted to a TimeSeries, and the aggregation weights are applied. If the data is None, nothing happens. + + Args: + name: The name of the TimeSeries + data: The data to create a TimeSeries from + has_time_dim: Whether the data has a time dimension + has_scenario_dim: Whether the data has a scenario dimension + has_extra_timestep: Whether the data has an extra timestep """ + if not has_time_dim and not has_scenario_dim: + raise ValueError('At least one of the dimensions must be present') if data is None: return None - elif isinstance(data, TimeSeries): + + if not has_time_dim and self.time_series_collection.scenarios is None: + return data + + if isinstance(data, TimeSeries): data.restore_data() if data in self.time_series_collection: return data - return self.time_series_collection.create_time_series( - data=data.active_data, name=name, needs_extra_timestep=needs_extra_timestep + return self.time_series_collection.add_time_series( + data=data.selected_data, + name=name, + has_time_dim=has_time_dim, + has_scenario_dim=has_scenario_dim, + has_extra_timestep=has_extra_timestep, + ) + elif isinstance(data, TimeSeriesData): + data.label = name + return self.time_series_collection.add_time_series( + data=data.data, + name=name, + has_time_dim=has_time_dim, + has_scenario_dim=has_scenario_dim, + has_extra_timestep=has_extra_timestep, + aggregation_weight=data.agg_weight, + aggregation_group=data.agg_group, ) - return self.time_series_collection.create_time_series( - data=data, name=name, needs_extra_timestep=needs_extra_timestep + return self.time_series_collection.add_time_series( + data=data, + name=name, + has_time_dim=has_time_dim, + has_scenario_dim=has_scenario_dim, + has_extra_timestep=has_extra_timestep, ) def create_effect_time_series( self, label_prefix: Optional[str], - effect_values: EffectValuesUser, + effect_values: Union[EffectValuesUserScenario, EffectValuesUserTimestep], label_suffix: Optional[str] = None, - ) -> Optional[EffectTimeSeries]: + has_time_dim: bool = True, + has_scenario_dim: bool = True, + ) -> Optional[Union[EffectTimeSeries, EffectValuesDict]]: """ Transform EffectValues to EffectTimeSeries. Creates a TimeSeries for each key in the nested_values dictionary, using the value as the data. @@ -310,13 +362,31 @@ def create_effect_time_series( The resulting label of the TimeSeries is the label of the parent_element, followed by the label of the Effect in the nested_values and the label_suffix. If the key in the EffectValues is None, the alias 'Standard_Effect' is used + + Args: + label_prefix: Prefix for the TimeSeries name + effect_values: Dictionary of EffectValues + label_suffix: Suffix for the TimeSeries name + has_time_dim: Whether the data has a time dimension + has_scenario_dim: Whether the data has a scenario dimension """ + if not has_time_dim and not has_scenario_dim: + raise ValueError('At least one of the dimensions must be present') + effect_values: Optional[EffectValuesDict] = self.effects.create_effect_values_dict(effect_values) if effect_values is None: return None + if not has_time_dim and self.time_series_collection.scenarios is None: + return effect_values + return { - effect: self.create_time_series('|'.join(filter(None, [label_prefix, effect, label_suffix])), value) + effect: self.create_time_series( + name='|'.join(filter(None, [label_prefix, effect, label_suffix])), + data=value, + has_time_dim=has_time_dim, + has_scenario_dim=has_scenario_dim, + ) for effect, value in effect_values.items() } diff --git a/flixopt/interface.py b/flixopt/interface.py index f9dbeb518..a7b254fb6 100644 --- a/flixopt/interface.py +++ b/flixopt/interface.py @@ -4,14 +4,14 @@ """ import logging -from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Union +from typing import TYPE_CHECKING, Dict, Iterator, List, Literal, Optional, Union from .config import CONFIG -from .core import NumericData, NumericDataTS, Scalar +from .core import NumericDataTS, Scalar, ScenarioData, TimestepData from .structure import Interface, register_class_for_io if TYPE_CHECKING: # for type checking and preventing circular imports - from .effects import EffectValuesUser, EffectValuesUserScalar + from .effects import EffectValuesUserScenario, EffectValuesUserTimestep from .flow_system import FlowSystem @@ -20,7 +20,7 @@ @register_class_for_io class Piece(Interface): - def __init__(self, start: NumericData, end: NumericData): + def __init__(self, start: TimestepData, end: TimestepData): """ Define a Piece, which is part of a Piecewise object. @@ -30,10 +30,15 @@ def __init__(self, start: NumericData, end: NumericData): """ self.start = start self.end = end + self.has_time_dim = False def transform_data(self, flow_system: 'FlowSystem', name_prefix: str): - self.start = flow_system.create_time_series(f'{name_prefix}|start', self.start) - self.end = flow_system.create_time_series(f'{name_prefix}|end', self.end) + self.start = flow_system.create_time_series( + name=f'{name_prefix}|start', data=self.start, has_time_dim=self.has_time_dim, has_scenario_dim=True + ) + self.end = flow_system.create_time_series( + name=f'{name_prefix}|end', data=self.end, has_time_dim=self.has_time_dim, has_scenario_dim=True + ) @register_class_for_io @@ -46,6 +51,17 @@ def __init__(self, pieces: List[Piece]): pieces: The pieces of the piecewise. """ self.pieces = pieces + self._has_time_dim = False + + @property + def has_time_dim(self): + return self._has_time_dim + + @has_time_dim.setter + def has_time_dim(self, value): + self._has_time_dim = value + for piece in self.pieces: + piece.has_time_dim = value def __len__(self): return len(self.pieces) @@ -73,6 +89,18 @@ def __init__(self, piecewises: Dict[str, Piecewise]): piecewises: Dict of Piecewises defining the conversion factors. flow labels as keys, piecewise as values """ self.piecewises = piecewises + self._has_time_dim = True + self.has_time_dim = True # Inital propagation + + @property + def has_time_dim(self): + return self._has_time_dim + + @has_time_dim.setter + def has_time_dim(self, value): + self._has_time_dim = value + for piecewise in self.piecewises.values(): + piecewise.has_time_dim = value def items(self): return self.piecewises.items() @@ -94,12 +122,24 @@ def __init__(self, piecewise_origin: Piecewise, piecewise_shares: Dict[str, Piec """ self.piecewise_origin = piecewise_origin self.piecewise_shares = piecewise_shares + self._has_time_dim = False + self.has_time_dim = False # Inital propagation + + @property + def has_time_dim(self): + return self._has_time_dim + + @has_time_dim.setter + def has_time_dim(self, value): + self._has_time_dim = value + self.piecewise_origin.has_time_dim = value + for piecewise in self.piecewise_shares.values(): + piecewise.has_time_dim = value def transform_data(self, flow_system: 'FlowSystem', name_prefix: str): - raise NotImplementedError('PiecewiseEffects is not yet implemented for non scalar shares') - # self.piecewise_origin.transform_data(flow_system, f'{name_prefix}|PiecewiseEffects|origin') - # for name, piecewise in self.piecewise_shares.items(): - # piecewise.transform_data(flow_system, f'{name_prefix}|PiecewiseEffects|{name}') + self.piecewise_origin.transform_data(flow_system, f'{name_prefix}|PiecewiseEffects|origin') + for effect, piecewise in self.piecewise_shares.items(): + piecewise.transform_data(flow_system, f'{name_prefix}|PiecewiseEffects|{effect}') @register_class_for_io @@ -110,14 +150,15 @@ class InvestParameters(Interface): def __init__( self, - fixed_size: Optional[Union[int, float]] = None, - minimum_size: Union[int, float] = 0, # TODO: Use EPSILON? - maximum_size: Optional[Union[int, float]] = None, + fixed_size: Optional[ScenarioData] = None, + minimum_size: ScenarioData = 0, # TODO: Use EPSILON? + maximum_size: Optional[ScenarioData] = None, optional: bool = True, # Investition ist weglassbar - fix_effects: Optional['EffectValuesUserScalar'] = None, - specific_effects: Optional['EffectValuesUserScalar'] = None, # costs per Flow-Unit/Storage-Size/... + fix_effects: Optional['EffectValuesUserScenario'] = None, + specific_effects: Optional['EffectValuesUserScenario'] = None, # costs per Flow-Unit/Storage-Size/... piecewise_effects: Optional[PiecewiseEffects] = None, - divest_effects: Optional['EffectValuesUserScalar'] = None, + divest_effects: Optional['EffectValuesUserScenario'] = None, + size_per_scenario: Literal['equal', 'individual', 'increment_once'] = 'equal', ): """ Args: @@ -128,58 +169,84 @@ def __init__( specific_effects: Specific costs, e.g., in €/kW_nominal or €/m²_nominal. Example: {costs: 3, CO2: 0.3} with costs and CO2 representing an Object of class Effect (Attention: Annualize costs to chosen period!) - piecewise_effects: Linear piecewise relation [invest_pieces, cost_pieces]. - Example 1: - [ [5, 25, 25, 100], # size in kW - {costs: [50,250,250,800], # € - PE: [5, 25, 25, 100] # kWh_PrimaryEnergy - } - ] - Example 2 (if only standard-effect): - [ [5, 25, 25, 100], # kW # size in kW - [50,250,250,800] # value for standart effect, typically € - ] # € - (Attention: Annualize costs to chosen period!) - (Args 'specific_effects' and 'fix_effects' can be used in parallel to Investsizepieces) - minimum_size: Min nominal value (only if: size_is_fixed = False). - maximum_size: Max nominal value (only if: size_is_fixed = False). + piecewise_effects: Define the effects of the investment as a piecewise function of the size of the investment. + minimum_size: Minimum possible size of the investment. + maximum_size: Maximum possible size of the investment. + size_per_scenario: How to treat the size in each scenario + - 'equal': Equalize the size of all scenarios + - 'individual': Optimize the size of each scenario individually + - 'increment_once': Allow the size to increase only once. This is useful if the scenarios are related to + different periods (years, months). Tune the timing by setting the maximum size to 0 in the first scenarios. """ - self.fix_effects: EffectValuesUser = fix_effects or {} - self.divest_effects: EffectValuesUser = divest_effects or {} + self.fix_effects: EffectValuesUserScenario = fix_effects if fix_effects is not None else {} + self.divest_effects: EffectValuesUserScenario = divest_effects if divest_effects is not None else {} self.fixed_size = fixed_size self.optional = optional - self.specific_effects: EffectValuesUser = specific_effects or {} + self.specific_effects: EffectValuesUserScenario = specific_effects if specific_effects is not None else {} self.piecewise_effects = piecewise_effects self._minimum_size = minimum_size - self._maximum_size = maximum_size or CONFIG.modeling.BIG # default maximum + self._maximum_size = CONFIG.modeling.BIG if maximum_size is None else maximum_size # default maximum + self.size_per_scenario = size_per_scenario - def transform_data(self, flow_system: 'FlowSystem'): - self.fix_effects = flow_system.effects.create_effect_values_dict(self.fix_effects) - self.divest_effects = flow_system.effects.create_effect_values_dict(self.divest_effects) - self.specific_effects = flow_system.effects.create_effect_values_dict(self.specific_effects) + def transform_data(self, flow_system: 'FlowSystem', name_prefix: str): + self.fix_effects = flow_system.create_effect_time_series( + label_prefix=name_prefix, + effect_values=self.fix_effects, + label_suffix='fix_effects', + has_time_dim=False, + has_scenario_dim=True, + ) + self.divest_effects = flow_system.create_effect_time_series( + label_prefix=name_prefix, + effect_values=self.divest_effects, + label_suffix='divest_effects', + has_time_dim=False, + has_scenario_dim=True, + ) + self.specific_effects = flow_system.create_effect_time_series( + label_prefix=name_prefix, + effect_values=self.specific_effects, + label_suffix='specific_effects', + has_time_dim=False, + has_scenario_dim=True, + ) + if self.piecewise_effects is not None: + self.piecewise_effects.has_time_dim = False + self.piecewise_effects.transform_data(flow_system, f'{name_prefix}|PiecewiseEffects') + + self._minimum_size = flow_system.create_time_series( + f'{name_prefix}|minimum_size', self.minimum_size, has_time_dim=False, has_scenario_dim=True + ) + self._maximum_size = flow_system.create_time_series( + f'{name_prefix}|maximum_size', self.maximum_size, has_time_dim=False, has_scenario_dim=True + ) + if self.fixed_size is not None: + self.fixed_size = flow_system.create_time_series( + f'{name_prefix}|fixed_size', self.fixed_size, has_time_dim=False, has_scenario_dim=True + ) @property def minimum_size(self): - return self.fixed_size or self._minimum_size + return self.fixed_size if self.fixed_size is not None else self._minimum_size @property def maximum_size(self): - return self.fixed_size or self._maximum_size + return self.fixed_size if self.fixed_size is not None else self._maximum_size @register_class_for_io class OnOffParameters(Interface): def __init__( self, - effects_per_switch_on: Optional['EffectValuesUser'] = None, - effects_per_running_hour: Optional['EffectValuesUser'] = None, - on_hours_total_min: Optional[int] = None, - on_hours_total_max: Optional[int] = None, - consecutive_on_hours_min: Optional[NumericData] = None, - consecutive_on_hours_max: Optional[NumericData] = None, - consecutive_off_hours_min: Optional[NumericData] = None, - consecutive_off_hours_max: Optional[NumericData] = None, - switch_on_total_max: Optional[int] = None, + effects_per_switch_on: Optional['EffectValuesUserTimestep'] = None, + effects_per_running_hour: Optional['EffectValuesUserTimestep'] = None, + on_hours_total_min: Optional[ScenarioData] = None, + on_hours_total_max: Optional[ScenarioData] = None, + consecutive_on_hours_min: Optional[TimestepData] = None, + consecutive_on_hours_max: Optional[TimestepData] = None, + consecutive_off_hours_min: Optional[TimestepData] = None, + consecutive_off_hours_max: Optional[TimestepData] = None, + switch_on_total_max: Optional[ScenarioData] = None, force_switch_on: bool = False, ): """ @@ -202,8 +269,8 @@ def __init__( switch_on_total_max: max nr of switchOn operations force_switch_on: force creation of switch on variable, even if there is no switch_on_total_max """ - self.effects_per_switch_on: EffectValuesUser = effects_per_switch_on or {} - self.effects_per_running_hour: EffectValuesUser = effects_per_running_hour or {} + self.effects_per_switch_on: EffectValuesUserTimestep = effects_per_switch_on or {} + self.effects_per_running_hour: EffectValuesUserTimestep = effects_per_running_hour or {} self.on_hours_total_min: Scalar = on_hours_total_min self.on_hours_total_max: Scalar = on_hours_total_max self.consecutive_on_hours_min: NumericDataTS = consecutive_on_hours_min @@ -232,6 +299,15 @@ def transform_data(self, flow_system: 'FlowSystem', name_prefix: str): self.consecutive_off_hours_max = flow_system.create_time_series( f'{name_prefix}|consecutive_off_hours_max', self.consecutive_off_hours_max ) + self.on_hours_total_max = flow_system.create_time_series( + f'{name_prefix}|on_hours_total_max', self.on_hours_total_max, has_time_dim=False + ) + self.on_hours_total_min = flow_system.create_time_series( + f'{name_prefix}|on_hours_total_min', self.on_hours_total_min, has_time_dim=False + ) + self.switch_on_total_max = flow_system.create_time_series( + f'{name_prefix}|switch_on_total_max', self.switch_on_total_max, has_time_dim=False + ) @property def use_off(self) -> bool: diff --git a/flixopt/io.py b/flixopt/io.py index 35d927136..1ef9578e5 100644 --- a/flixopt/io.py +++ b/flixopt/io.py @@ -23,7 +23,7 @@ def replace_timeseries(obj, mode: Literal['name', 'stats', 'data'] = 'name'): return [replace_timeseries(v, mode) for v in obj] elif isinstance(obj, TimeSeries): # Adjust this based on the actual class if obj.all_equal: - return obj.active_data.values[0].item() + return obj.selected_data.values.max().item() elif mode == 'name': return f'::::{obj.name}' elif mode == 'stats': @@ -44,7 +44,7 @@ def insert_dataarray(obj, ds: xr.Dataset): return [insert_dataarray(v, ds) for v in obj] elif isinstance(obj, str) and obj.startswith('::::'): da = ds[obj[4:]] - if da.isel(time=-1).isnull(): + if 'time' in da.dims and da.isel(time=-1).isnull().any().item(): return da.isel(time=slice(0, -1)) return da else: diff --git a/flixopt/results.py b/flixopt/results.py index d9eb5a654..ae54b9e2e 100644 --- a/flixopt/results.py +++ b/flixopt/results.py @@ -161,6 +161,7 @@ def __init__( self.timesteps_extra = self.solution.indexes['time'] self.hours_per_timestep = TimeSeriesCollection.calculate_hours_per_timestep(self.timesteps_extra) + self.scenarios = self.solution.indexes['scenario'] if 'scenario' in self.solution.indexes else None def __getitem__(self, key: str) -> Union['ComponentResults', 'BusResults', 'EffectResults']: if key in self.components: @@ -196,19 +197,39 @@ def constraints(self) -> linopy.Constraints: return self.model.constraints def filter_solution( - self, variable_dims: Optional[Literal['scalar', 'time']] = None, element: Optional[str] = None + self, + variable_dims: Optional[Literal['scalar', 'time', 'scenario', 'timeonly', 'scenarioonly']] = None, + element: Optional[str] = None, + timesteps: Optional[pd.DatetimeIndex] = None, + scenarios: Optional[pd.Index] = None, ) -> xr.Dataset: """ Filter the solution to a specific variable dimension and element. If no element is specified, all elements are included. Args: - variable_dims: The dimension of the variables to filter for. + variable_dims: The dimension of which to get variables from. + - 'scalar': Get scalar variables (without dimensions) + - 'time': Get time-dependent variables (with a time dimension) + - 'scenario': Get scenario-dependent variables (with ONLY a scenario dimension) + - 'timeonly': Get time-dependent variables (with ONLY a time dimension) + - 'scenarioonly': Get scenario-dependent variables (with ONLY a scenario dimension) element: The element to filter for. + timesteps: Optional time indexes to select. Can be: + - pd.DatetimeIndex: Multiple timesteps + - str/pd.Timestamp: Single timestep + Defaults to all available timesteps. + scenarios: Optional scenario indexes to select. Can be: + - pd.Index: Multiple scenarios + - str/int: Single scenario (int is treated as a label, not an index position) + Defaults to all available scenarios. """ - if element is not None: - return filter_dataset(self[element].solution, variable_dims) - return filter_dataset(self.solution, variable_dims) + return filter_dataset( + self.solution if element is None else self[element].solution, + variable_dims=variable_dims, + timesteps=timesteps, + scenarios=scenarios, + ) def plot_heatmap( self, @@ -219,10 +240,32 @@ def plot_heatmap( save: Union[bool, pathlib.Path] = False, show: bool = True, engine: plotting.PlottingEngine = 'plotly', + scenario: Optional[Union[str, int]] = None, ) -> Union[plotly.graph_objs.Figure, Tuple[plt.Figure, plt.Axes]]: + """ + Plots a heatmap of the solution of a variable. + + Args: + variable_name: The name of the variable to plot. + heatmap_timeframes: The timeframes to use for the heatmap. + heatmap_timesteps_per_frame: The timesteps per frame to use for the heatmap. + color_map: The color map to use for the heatmap. + save: Whether to save the plot or not. If a path is provided, the plot will be saved at that location. + show: Whether to show the plot or not. + engine: The engine to use for plotting. Can be either 'plotly' or 'matplotlib'. + scenario: The scenario to plot. Defaults to the first scenario. Has no effect without scenarios present + """ + dataarray = self.solution[variable_name] + + scenario_suffix = '' + if 'scenario' in dataarray.indexes: + chosen_scenario = scenario or self.scenarios[0] + dataarray = dataarray.sel(scenario=chosen_scenario).drop_vars('scenario') + scenario_suffix = f'--{chosen_scenario}' + return plot_heatmap( - dataarray=self.solution[variable_name], - name=variable_name, + dataarray=dataarray, + name=f'{variable_name}{scenario_suffix}', folder=self.folder, heatmap_timeframes=heatmap_timeframes, heatmap_timesteps_per_frame=heatmap_timesteps_per_frame, @@ -345,14 +388,38 @@ def constraints(self) -> linopy.Constraints: raise ValueError('The linopy model is not available.') return self._calculation_results.model.constraints[self._variable_names] - def filter_solution(self, variable_dims: Optional[Literal['scalar', 'time']] = None) -> xr.Dataset: + def filter_solution( + self, + variable_dims: Optional[Literal['scalar', 'time', 'scenario', 'timeonly', 'scenarioonly']] = None, + timesteps: Optional[pd.DatetimeIndex] = None, + scenarios: Optional[pd.Index] = None, + ) -> xr.Dataset: """ - Filter the solution of the element by dimension. + Filter the solution to a specific variable dimension and element. + If no element is specified, all elements are included. Args: - variable_dims: The dimension of the variables to filter for. + variable_dims: The dimension of which to get variables from. + - 'scalar': Get scalar variables (without dimensions) + - 'time': Get time-dependent variables (with a time dimension) + - 'scenario': Get scenario-dependent variables (with ONLY a scenario dimension) + - 'timeonly': Get time-dependent variables (with ONLY a time dimension) + - 'scenarioonly': Get scenario-dependent variables (with ONLY a scenario dimension) + timesteps: Optional time indexes to select. Can be: + - pd.DatetimeIndex: Multiple timesteps + - str/pd.Timestamp: Single timestep + Defaults to all available timesteps. + scenarios: Optional scenario indexes to select. Can be: + - pd.Index: Multiple scenarios + - str/int: Single scenario (int is treated as a label, not an index position) + Defaults to all available scenarios. """ - return filter_dataset(self.solution, variable_dims) + return filter_dataset( + self.solution, + variable_dims=variable_dims, + timesteps=timesteps, + scenarios=scenarios, + ) class _NodeResults(_ElementResults): @@ -386,28 +453,46 @@ def plot_node_balance( show: bool = True, colors: plotting.ColorType = 'viridis', engine: plotting.PlottingEngine = 'plotly', + scenario: Optional[Union[str, int]] = None, + mode: Literal['flow_rate', 'flow_hours'] = 'flow_rate', + drop_suffix: bool = True, ) -> Union[plotly.graph_objs.Figure, Tuple[plt.Figure, plt.Axes]]: """ Plots the node balance of the Component or Bus. Args: save: Whether to save the plot or not. If a path is provided, the plot will be saved at that location. show: Whether to show the plot or not. + colors: The colors to use for the plot. See `flixopt.plotting.ColorType` for options. engine: The engine to use for plotting. Can be either 'plotly' or 'matplotlib'. + scenario: The scenario to plot. Defaults to the first scenario. Has no effect without scenarios present + mode: The mode to use for the dataset. Can be 'flow_rate' or 'flow_hours'. + - 'flow_rate': Returns the flow_rates of the Node. + - 'flow_hours': Returns the flow_hours of the Node. [flow_hours(t) = flow_rate(t) * dt(t)]. Renames suffixes to |flow_hours. + drop_suffix: Whether to drop the suffix from the variable names. """ + ds = self.node_balance(with_last_timestep=True, mode=mode, drop_suffix=drop_suffix) + + title = f'{self.label} (flow rates)' if mode == 'flow_rate' else f'{self.label} (flow hours)' + + if 'scenario' in ds.indexes: + chosen_scenario = scenario or self._calculation_results.scenarios[0] + ds = ds.sel(scenario=chosen_scenario).drop_vars('scenario') + title = f'{title} - {chosen_scenario}' + if engine == 'plotly': figure_like = plotting.with_plotly( - self.node_balance(with_last_timestep=True).to_dataframe(), + ds.to_dataframe(), colors=colors, mode='area', - title=f'Flow rates of {self.label}', + title=title, ) default_filetype = '.html' elif engine == 'matplotlib': figure_like = plotting.with_matplotlib( - self.node_balance(with_last_timestep=True).to_dataframe(), + ds.to_dataframe(), colors=colors, mode='bar', - title=f'Flow rates of {self.label}', + title=title, ) default_filetype = '.png' else: @@ -415,7 +500,7 @@ def plot_node_balance( return plotting.export_figure( figure_like=figure_like, - default_path=self._calculation_results.folder / f'{self.label} (flow rates)', + default_path=self._calculation_results.folder / title, default_filetype=default_filetype, user_path=None if isinstance(save, bool) else pathlib.Path(save), show=show, @@ -430,6 +515,8 @@ def plot_node_balance_pie( save: Union[bool, pathlib.Path] = False, show: bool = True, engine: plotting.PlottingEngine = 'plotly', + scenario: Optional[Union[str, int]] = None, + drop_suffix: bool = True, ) -> plotly.graph_objects.Figure: """ Plots a pie chart of the flow hours of the inputs and outputs of buses or components. @@ -441,32 +528,45 @@ def plot_node_balance_pie( save: Whether to save the figure. show: Whether to show the figure. engine: Plotting engine to use. Only 'plotly' is implemented atm. + scenario: If scenarios are present: The scenario to plot. If None, the first scenario is used. + drop_suffix: Whether to drop the suffix from the variable names. """ - inputs = ( - sanitize_dataset( - ds=self.solution[self.inputs], - threshold=1e-5, - drop_small_vars=True, - zero_small_values=True, - ) - * self._calculation_results.hours_per_timestep + inputs = sanitize_dataset( + ds=self.solution[self.inputs], + threshold=1e-5, + drop_small_vars=True, + zero_small_values=True, ) - outputs = ( - sanitize_dataset( - ds=self.solution[self.outputs], - threshold=1e-5, - drop_small_vars=True, - zero_small_values=True, - ) - * self._calculation_results.hours_per_timestep + outputs = sanitize_dataset( + ds=self.solution[self.outputs], + threshold=1e-5, + drop_small_vars=True, + zero_small_values=True, ) + inputs = inputs.sum('time') + outputs = outputs.sum('time') + + title = f'{self.label} (total flow hours)' + + if 'scenario' in inputs.indexes: + chosen_scenario = scenario or self._calculation_results.scenarios[0] + inputs = inputs.sel(scenario=chosen_scenario).drop_vars('scenario') + outputs = outputs.sel(scenario=chosen_scenario).drop_vars('scenario') + title = f'{title} - {chosen_scenario}' + + if drop_suffix: + inputs = inputs.rename_vars({var: var.split('|flow_rate')[0] for var in inputs}) + outputs = outputs.rename_vars({var: var.split('|flow_rate')[0] for var in outputs}) + else: + inputs = inputs.rename_vars({var: var.replace('flow_rate', 'flow_hours') for var in inputs}) + outputs = outputs.rename_vars({var: var.replace('flow_rate', 'flow_hours') for var in outputs}) if engine == 'plotly': figure_like = plotting.dual_pie_with_plotly( - inputs.to_dataframe().sum(), - outputs.to_dataframe().sum(), + data_left=inputs.to_pandas(), + data_right=outputs.to_pandas(), colors=colors, - title=f'Flow hours of {self.label}', + title=title, text_info=text_info, subtitles=('Inputs', 'Outputs'), legend_title='Flows', @@ -476,10 +576,10 @@ def plot_node_balance_pie( elif engine == 'matplotlib': logger.debug('Parameter text_info is not supported for matplotlib') figure_like = plotting.dual_pie_with_matplotlib( - inputs.to_dataframe().sum(), - outputs.to_dataframe().sum(), + data_left=inputs.to_pandas(), + data_right=outputs.to_pandas(), colors=colors, - title=f'Total flow hours of {self.label}', + title=title, subtitles=('Inputs', 'Outputs'), legend_title='Flows', lower_percentage_group=lower_percentage_group, @@ -490,7 +590,7 @@ def plot_node_balance_pie( return plotting.export_figure( figure_like=figure_like, - default_path=self._calculation_results.folder / f'{self.label} (total flow hours)', + default_path=self._calculation_results.folder / title, default_filetype=default_filetype, user_path=None if isinstance(save, bool) else pathlib.Path(save), show=show, @@ -503,9 +603,29 @@ def node_balance( negate_outputs: bool = False, threshold: Optional[float] = 1e-5, with_last_timestep: bool = False, + mode: Literal['flow_rate', 'flow_hours'] = 'flow_rate', + drop_suffix: bool = False, ) -> xr.Dataset: + """ + Returns a dataset with the node balance of the Component or Bus. + Args: + negate_inputs: Whether to negate the input flow_rates of the Node. + negate_outputs: Whether to negate the output flow_rates of the Node. + threshold: The threshold for small values. Variables with all values below the threshold are dropped. + with_last_timestep: Whether to include the last timestep in the dataset. + mode: The mode to use for the dataset. Can be 'flow_rate' or 'flow_hours'. + - 'flow_rate': Returns the flow_rates of the Node. + - 'flow_hours': Returns the flow_hours of the Node. [flow_hours(t) = flow_rate(t) * dt(t)]. Renames suffixes to |flow_hours. + drop_suffix: Whether to drop the suffix from the variable names. + """ + ds = self.solution[self.inputs + self.outputs] + if drop_suffix: + ds = ds.rename_vars({var: var.split('|flow_hours')[0] for var in ds.data_vars}) + if mode == 'flow_hours': + ds = ds * self._calculation_results.hours_per_timestep + ds = ds.rename_vars({var: var.replace('flow_rate', 'flow_hours') for var in ds.data_vars}) return sanitize_dataset( - ds=self.solution[self.inputs + self.outputs], + ds=ds, threshold=threshold, timesteps=self._calculation_results.timesteps_extra if with_last_timestep else None, negate=( @@ -548,6 +668,7 @@ def plot_charge_state( show: bool = True, colors: plotting.ColorType = 'viridis', engine: plotting.PlottingEngine = 'plotly', + scenario: Optional[Union[str, int]] = None, ) -> plotly.graph_objs.Figure: """ Plots the charge state of a Storage. @@ -556,6 +677,7 @@ def plot_charge_state( show: Whether to show the plot or not. colors: The c engine: Plotting engine to use. Only 'plotly' is implemented atm. + scenario: The scenario to plot. Defaults to the first scenario. Has no effect without scenarios present Raises: ValueError: If the Component is not a Storage. @@ -568,16 +690,26 @@ def plot_charge_state( if not self.is_storage: raise ValueError(f'Cant plot charge_state. "{self.label}" is not a storage') + ds = self.node_balance(with_last_timestep=True) + charge_state = self.charge_state + + scenario_suffix = '' + if 'scenario' in ds.indexes: + chosen_scenario = scenario or self._calculation_results.scenarios[0] + ds = ds.sel(scenario=chosen_scenario).drop_vars('scenario') + charge_state = charge_state.sel(scenario=chosen_scenario).drop_vars('scenario') + scenario_suffix = f'--{chosen_scenario}' + fig = plotting.with_plotly( - self.node_balance(with_last_timestep=True).to_dataframe(), + ds.to_dataframe(), colors=colors, mode='area', - title=f'Operation Balance of {self.label}', + title=f'Operation Balance of {self.label}{scenario_suffix}', ) # TODO: Use colors for charge state? - charge_state = self.charge_state.to_dataframe() + charge_state = charge_state.to_dataframe() fig.add_trace( plotly.graph_objs.Scatter( x=charge_state.index, y=charge_state.values.flatten(), mode='lines', name=self._charge_state @@ -586,7 +718,7 @@ def plot_charge_state( return plotting.export_figure( fig, - default_path=self._calculation_results.folder / f'{self.label} (charge state)', + default_path=self._calculation_results.folder / f'{self.label} (charge state){scenario_suffix}', default_filetype='.html', user_path=None if isinstance(save, bool) else pathlib.Path(save), show=show, @@ -878,21 +1010,71 @@ def sanitize_dataset( def filter_dataset( ds: xr.Dataset, - variable_dims: Optional[Literal['scalar', 'time']] = None, + variable_dims: Optional[Literal['scalar', 'time', 'scenario', 'timeonly', 'scenarioonly']] = None, + timesteps: Optional[Union[pd.DatetimeIndex, str, pd.Timestamp]] = None, + scenarios: Optional[Union[pd.Index, str, int]] = None, ) -> xr.Dataset: """ - Filters a dataset by its dimensions. + Filters a dataset by its dimensions and optionally selects specific indexes. Args: ds: The dataset to filter. - variable_dims: The dimension of the variables to filter for. + variable_dims: The dimension of which to get variables from. + - 'scalar': Get scalar variables (without dimensions) + - 'time': Get time-dependent variables (with a time dimension) + - 'scenario': Get scenario-dependent variables (with ONLY a scenario dimension) + - 'timeonly': Get time-dependent variables (with ONLY a time dimension) + - 'scenarioonly': Get scenario-dependent variables (with ONLY a scenario dimension) + timesteps: Optional time indexes to select. Can be: + - pd.DatetimeIndex: Multiple timesteps + - str/pd.Timestamp: Single timestep + Defaults to all available timesteps. + scenarios: Optional scenario indexes to select. Can be: + - pd.Index: Multiple scenarios + - str/int: Single scenario (int is treated as a label, not an index position) + Defaults to all available scenarios. + + Returns: + Filtered dataset with specified variables and indexes. """ + # Return the full dataset if all dimension types are included if variable_dims is None: - return ds - - if variable_dims == 'scalar': - return ds[[name for name, da in ds.data_vars.items() if len(da.dims) == 0]] + pass + elif variable_dims == 'scalar': + ds = ds[[v for v in ds.data_vars if not ds[v].dims]] elif variable_dims == 'time': - return ds[[name for name, da in ds.data_vars.items() if 'time' in da.dims]] + ds = ds[[v for v in ds.data_vars if 'time' in ds[v].dims]] + elif variable_dims == 'scenario': + ds = ds[[v for v in ds.data_vars if 'scenario' in ds[v].dims]] + elif variable_dims == 'timeonly': + ds = ds[[v for v in ds.data_vars if ds[v].dims == ('time',)]] + elif variable_dims == 'scenarioonly': + ds = ds[[v for v in ds.data_vars if ds[v].dims == ('scenario',)]] else: - raise ValueError(f'Not allowed value for "filter_dataset()": {variable_dims=}') + raise ValueError(f'Unknown variable_dims "{variable_dims}" for filter_dataset') + + # Handle time selection if needed + if timesteps is not None and 'time' in ds.dims: + try: + ds = ds.sel(time=timesteps) + except KeyError as e: + available_times = set(ds.indexes['time']) + requested_times = set([timesteps]) if not isinstance(timesteps, pd.Index) else set(timesteps) + missing_times = requested_times - available_times + raise ValueError( + f'Timesteps not found in dataset: {missing_times}. Available times: {available_times}' + ) from e + + # Handle scenario selection if needed + if scenarios is not None and 'scenario' in ds.dims: + try: + ds = ds.sel(scenario=scenarios) + except KeyError as e: + available_scenarios = set(ds.indexes['scenario']) + requested_scenarios = set([scenarios]) if not isinstance(scenarios, pd.Index) else set(scenarios) + missing_scenarios = requested_scenarios - available_scenarios + raise ValueError( + f'Scenarios not found in dataset: {missing_scenarios}. Available scenarios: {available_scenarios}' + ) from e + + return ds diff --git a/flixopt/structure.py b/flixopt/structure.py index e7f1c62a4..a1c9ffa0d 100644 --- a/flixopt/structure.py +++ b/flixopt/structure.py @@ -19,7 +19,7 @@ from rich.pretty import Pretty from .config import CONFIG -from .core import NumericData, Scalar, TimeSeries, TimeSeriesCollection, TimeSeriesData +from .core import Scalar, TimeSeries, TimeSeriesCollection, TimeSeriesData, TimestepData if TYPE_CHECKING: # for type checking and preventing circular imports from .effects import EffectCollectionModel @@ -58,6 +58,7 @@ def __init__(self, flow_system: 'FlowSystem'): self.flow_system = flow_system self.time_series_collection = flow_system.time_series_collection self.effects: Optional[EffectCollectionModel] = None + self.scenario_weights = self._calculate_scenario_weights(flow_system.scenario_weights) def do_modeling(self): self.effects = self.flow_system.effects.create_model(self) @@ -69,6 +70,24 @@ def do_modeling(self): for bus_model in bus_models: # Buses after Components, because FlowModels are created in ComponentModels bus_model.do_modeling() + def _calculate_scenario_weights(self, weights: Optional[TimeSeries] = None) -> xr.DataArray: + """Calculates the weights of the scenarios. If None, all scenarios have the same weight. All weights are normalized to 1. + If no scenarios are present, s single weight of 1 is returned. + """ + if weights is not None and not isinstance(weights, TimeSeries): + raise TypeError(f'Weights must be a TimeSeries or None, got {type(weights)}') + if self.time_series_collection.scenarios is None: + return xr.DataArray(1) + if weights is None: + weights = xr.DataArray( + np.ones(len(self.time_series_collection.scenarios)), + coords={'scenario': self.time_series_collection.scenarios} + ) + elif isinstance(weights, TimeSeries): + weights = weights.selected_data + + return weights / weights.sum() + @property def solution(self): solution = super().solution @@ -98,13 +117,40 @@ def hours_per_step(self): def hours_of_previous_timesteps(self): return self.time_series_collection.hours_of_previous_timesteps - @property - def coords(self) -> Tuple[pd.DatetimeIndex]: - return (self.time_series_collection.timesteps,) + def get_coords( + self, scenario_dim=True, time_dim=True, extra_timestep=False + ) -> Optional[Union[Tuple[pd.Index], Tuple[pd.Index, pd.Index]]]: + """ + Returns the coordinates of the model - @property - def coords_extra(self) -> Tuple[pd.DatetimeIndex]: - return (self.time_series_collection.timesteps_extra,) + Args: + scenario_dim: If True, the scenario dimension is included in the coordinates + time_dim: If True, the time dimension is included in the coordinates + extra_timestep: If True, the extra timesteps are used instead of the regular timesteps + + Returns: + The coordinates of the model. Might also be None if no scenarios are present and time_dim is False + """ + if not scenario_dim and not time_dim: + return None + scenarios = self.time_series_collection.scenarios + timesteps = ( + self.time_series_collection.timesteps if not extra_timestep else self.time_series_collection.timesteps_extra + ) + + if scenario_dim and time_dim: + if scenarios is None: + return (timesteps,) + return timesteps, scenarios + + if scenario_dim and not time_dim: + if scenarios is None: + return None + return (scenarios,) + if time_dim and not scenario_dim: + return (timesteps,) + + raise ValueError(f'Cannot get coordinates with both {scenario_dim=} and {time_dim=}') class Interface: @@ -534,9 +580,12 @@ def copy_and_convert_datatypes(data: Any, use_numpy: bool = True, use_element_la return copy_and_convert_datatypes(data.tolist(), use_numpy, use_element_label) elif isinstance(data, TimeSeries): - return copy_and_convert_datatypes(data.active_data, use_numpy, use_element_label) + return copy_and_convert_datatypes(data.selected_data, use_numpy, use_element_label) elif isinstance(data, TimeSeriesData): return copy_and_convert_datatypes(data.data, use_numpy, use_element_label) + elif isinstance(data, (pd.Series, pd.DataFrame)): + #TODO: This can be improved + return copy_and_convert_datatypes(data.values, use_numpy, use_element_label) elif isinstance(data, Interface): if use_element_label and isinstance(data, Element): diff --git a/flixopt/utils.py b/flixopt/utils.py index bb6e8ec40..3e65328a4 100644 --- a/flixopt/utils.py +++ b/flixopt/utils.py @@ -11,15 +11,6 @@ logger = logging.getLogger('flixopt') -def is_number(number_alias: Union[int, float, str]): - """Returns True is string is a number.""" - try: - float(number_alias) - return True - except ValueError: - return False - - def round_floats(obj, decimals=2): if isinstance(obj, dict): return {k: round_floats(v, decimals) for k, v in obj.items()} @@ -27,6 +18,12 @@ def round_floats(obj, decimals=2): return [round_floats(v, decimals) for v in obj] elif isinstance(obj, float): return round(obj, decimals) + elif isinstance(obj, int): + return obj + elif isinstance(obj, np.ndarray): + return np.round(obj, decimals).tolist() + elif isinstance(obj, xr.DataArray): + return obj.round(decimals).values.tolist() return obj diff --git a/site/release-notes/_template.txt b/site/release-notes/_template.txt deleted file mode 100644 index fe85a0554..000000000 --- a/site/release-notes/_template.txt +++ /dev/null @@ -1,32 +0,0 @@ -# Release v{version} - -**Release Date:** YYYY-MM-DD - -## What's New - -* Feature 1 - Description -* Feature 2 - Description - -## Improvements - -* Improvement 1 - Description -* Improvement 2 - Description - -## Bug Fixes - -* Fixed issue with X -* Resolved problem with Y - -## Breaking Changes - -* Change 1 - Migration instructions -* Change 2 - Migration instructions - -## Deprecations - -* Feature X will be removed in v{next_version} - -## Dependencies - -* Added dependency X v1.2.3 -* Updated dependency Y to v2.0.0 \ No newline at end of file diff --git a/tests/run_all_tests.py b/tests/run_all_tests.py index 5597a47f3..83b6dfacf 100644 --- a/tests/run_all_tests.py +++ b/tests/run_all_tests.py @@ -7,4 +7,4 @@ import pytest if __name__ == '__main__': - pytest.main(['test_functional.py', '--disable-warnings']) + pytest.main(['test_integration.py', '--disable-warnings']) diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index 49f1438e7..a50754301 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -3,69 +3,673 @@ import pytest import xarray as xr -from flixopt.core import ConversionError, DataConverter # Adjust this import to match your project structure +from flixopt.core import ( # Adjust this import to match your project structure + ConversionError, + DataConverter, + TimeSeries, +) @pytest.fixture -def sample_time_index(request): - index = pd.date_range('2024-01-01', periods=5, freq='D', name='time') - return index +def sample_time_index(): + return pd.date_range('2024-01-01', periods=5, freq='D', name='time') -def test_scalar_conversion(sample_time_index): - # Test scalar conversion - result = DataConverter.as_dataarray(42, sample_time_index) - assert isinstance(result, xr.DataArray) - assert result.shape == (len(sample_time_index),) - assert result.dims == ('time',) - assert np.all(result.values == 42) +@pytest.fixture +def sample_scenario_index(): + return pd.Index(['baseline', 'high_demand', 'low_price'], name='scenario') + + +@pytest.fixture +def multi_index(sample_time_index, sample_scenario_index): + """Create a sample MultiIndex combining scenarios and times.""" + return pd.MultiIndex.from_product([sample_scenario_index, sample_time_index], names=['scenario', 'time']) + + +class TestSingleDimensionConversion: + """Tests for converting data without scenarios (1D: time only).""" + + def test_scalar_conversion(self, sample_time_index): + """Test converting a scalar value.""" + # Test with integer + result = DataConverter.as_dataarray(42, sample_time_index) + assert isinstance(result, xr.DataArray) + assert result.shape == (len(sample_time_index),) + assert result.dims == ('time',) + assert np.all(result.values == 42) + + # Test with float + result = DataConverter.as_dataarray(42.5, sample_time_index) + assert np.all(result.values == 42.5) + + # Test with numpy scalar types + result = DataConverter.as_dataarray(np.int64(42), sample_time_index) + assert np.all(result.values == 42) + result = DataConverter.as_dataarray(np.float32(42.5), sample_time_index) + assert np.all(result.values == 42.5) + + def test_ndarray_conversion(self, sample_time_index): + """Test converting a numpy ndarray.""" + # Test with integer 1D array + arr_1d = np.array([1, 2, 3, 4, 5]) + result = DataConverter.as_dataarray(arr_1d, sample_time_index) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, arr_1d) + + # Test with float 1D array + arr_1d = np.array([1.1, 2.2, 3.3, 4.4, 5.5]) + result = DataConverter.as_dataarray(arr_1d, sample_time_index) + assert np.array_equal(result.values, arr_1d) + + # Test with array containing NaN + arr_1d = np.array([1, np.nan, 3, np.nan, 5]) + result = DataConverter.as_dataarray(arr_1d, sample_time_index) + assert np.array_equal(np.isnan(result.values), np.isnan(arr_1d)) + assert np.array_equal(result.values[~np.isnan(result.values)], arr_1d[~np.isnan(arr_1d)]) + + def test_dataarray_conversion(self, sample_time_index): + """Test converting an existing xarray DataArray.""" + # Create original DataArray + original = xr.DataArray(data=np.array([1, 2, 3, 4, 5]), coords={'time': sample_time_index}, dims=['time']) + + # Convert and check + result = DataConverter.as_dataarray(original, sample_time_index) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, original.values) + + # Ensure it's a copy + result[0] = 999 + assert original[0].item() == 1 # Original should be unchanged + + # Test with different time coordinates but same length + different_times = pd.date_range('2025-01-01', periods=5, freq='D', name='time') + original = xr.DataArray(data=np.array([1, 2, 3, 4, 5]), coords={'time': different_times}, dims=['time']) + + # Should raise an error for mismatched time coordinates + with pytest.raises(ConversionError): + DataConverter.as_dataarray(original, sample_time_index) + + +class TestMultiDimensionConversion: + """Tests for converting data with scenarios (2D: scenario × time).""" + + def test_scalar_with_scenarios(self, sample_time_index, sample_scenario_index): + """Test converting scalar values with scenario dimension.""" + # Test with integer + result = DataConverter.as_dataarray(42, sample_time_index, sample_scenario_index) + + assert isinstance(result, xr.DataArray) + assert result.shape == (len(sample_time_index), len(sample_scenario_index)) + assert result.dims == ('time', 'scenario') + assert np.all(result.values == 42) + assert set(result.coords['scenario'].values) == set(sample_scenario_index.values) + assert set(result.coords['time'].values) == set(sample_time_index.values) + + # Test with float + result = DataConverter.as_dataarray(42.5, sample_time_index, sample_scenario_index) + assert np.all(result.values == 42.5) + + def test_1d_array_with_scenarios(self, sample_time_index, sample_scenario_index): + """Test converting 1D array with scenario dimension (broadcasting).""" + # Create 1D array matching timesteps length + arr_1d = np.array([1, 2, 3, 4, 5]) + + # Convert with scenarios + result = DataConverter.as_dataarray(arr_1d, sample_time_index, sample_scenario_index) + + assert result.shape == (len(sample_time_index), len(sample_scenario_index)) + assert result.dims == ('time', 'scenario') + + # Each scenario should have the same values (broadcasting) + for scenario in sample_scenario_index: + scenario_slice = result.sel(scenario=scenario) + assert np.array_equal(scenario_slice.values, arr_1d) + + def test_2d_array_with_scenarios(self, sample_time_index, sample_scenario_index): + """Test converting 2D array with scenario dimension.""" + # Create 2D array with different values per scenario + arr_2d = np.array( + [ + [1, 2, 3, 4, 5], # baseline scenario + [6, 7, 8, 9, 10], # high_demand scenario + [11, 12, 13, 14, 15], # low_price scenario + ] + ) + + # Convert to DataArray + result = DataConverter.as_dataarray(arr_2d.T, sample_time_index, sample_scenario_index) + + assert result.shape == (5, 3) + assert result.dims == ('time', 'scenario') + + # Check that each scenario has correct values + assert np.array_equal(result.sel(scenario='baseline').values, arr_2d[0]) + assert np.array_equal(result.sel(scenario='high_demand').values, arr_2d[1]) + assert np.array_equal(result.sel(scenario='low_price').values, arr_2d[2]) + + def test_dataarray_with_scenarios(self, sample_time_index, sample_scenario_index): + """Test converting an existing DataArray with scenarios.""" + # Create a multi-scenario DataArray + original = xr.DataArray( + data=np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]), + coords={'scenario': sample_scenario_index, 'time': sample_time_index}, + dims=['scenario', 'time'], + ) + + # Test conversion + result = DataConverter.as_dataarray(original, sample_time_index, sample_scenario_index) + + assert result.shape == (5, 3) + assert result.dims == ('time', 'scenario') + assert np.array_equal(result.values, original.values.T) + + # Ensure it's a copy + result.loc[:, 'baseline'] = 999 + assert original.sel(scenario='baseline')[0].item() == 1 # Original should be unchanged + + +class TestSeriesConversion: + """Tests for converting pandas Series to DataArray.""" + + def test_series_single_dimension(self, sample_time_index): + """Test converting a pandas Series with time index.""" + # Create a Series with matching time index + series = pd.Series([10, 20, 30, 40, 50], index=sample_time_index) + + # Convert and check + result = DataConverter.as_dataarray(series, sample_time_index) + assert isinstance(result, xr.DataArray) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, series.values) + assert np.array_equal(result.coords['time'].values, sample_time_index.values) + + # Test with scenario index + scenario_index = pd.Index(['baseline', 'high_demand', 'low_price'], name='scenario') + series = pd.Series([100, 200, 300], index=scenario_index) + + result = DataConverter.as_dataarray(series, scenarios=scenario_index) + assert result.shape == (3,) + assert result.dims == ('scenario',) + assert np.array_equal(result.values, series.values) + assert np.array_equal(result.coords['scenario'].values, scenario_index.values) + + def test_series_mismatched_index(self, sample_time_index): + """Test converting a Series with mismatched index.""" + # Create Series with different time index + different_times = pd.date_range('2025-01-01', periods=5, freq='D', name='time') + series = pd.Series([10, 20, 30, 40, 50], index=different_times) + + # Should raise error for mismatched index + with pytest.raises(ConversionError): + DataConverter.as_dataarray(series, sample_time_index) + + def test_series_broadcast_to_scenarios(self, sample_time_index, sample_scenario_index): + """Test broadcasting a time-indexed Series across scenarios.""" + # Create a Series with time index + series = pd.Series([10, 20, 30, 40, 50], index=sample_time_index) + + # Convert with scenarios + result = DataConverter.as_dataarray(series, sample_time_index, sample_scenario_index) + + assert result.shape == (5, 3) + assert result.dims == ('time', 'scenario') + + # Check broadcasting - each scenario should have the same values + for scenario in sample_scenario_index: + scenario_slice = result.sel(scenario=scenario) + assert np.array_equal(scenario_slice.values, series.values) + + def test_series_broadcast_to_time(self, sample_time_index, sample_scenario_index): + """Test broadcasting a scenario-indexed Series across time.""" + # Create a Series with scenario index + series = pd.Series([100, 200, 300], index=sample_scenario_index) + + # Convert with time + result = DataConverter.as_dataarray(series, sample_time_index, sample_scenario_index) + + assert result.shape == (5, 3) + assert result.dims == ('time', 'scenario') + + # Check broadcasting - each time should have the same scenario values + for time in sample_time_index: + time_slice = result.sel(time=time) + assert np.array_equal(time_slice.values, series.values) + + def test_series_dimension_order(self, sample_time_index, sample_scenario_index): + """Test that dimension order is respected with Series conversions.""" + # Create custom dimensions tuple with reversed order + dims = ('scenario', 'time',) + coords = {'time': sample_time_index, 'scenario': sample_scenario_index} + + # Time-indexed series + series = pd.Series([10, 20, 30, 40, 50], index=sample_time_index) + with pytest.raises(ConversionError, match="only supports time and scenario dimensions"): + _ = DataConverter._convert_series(series, coords, dims) + + # Scenario-indexed series + series = pd.Series([100, 200, 300], index=sample_scenario_index) + with pytest.raises(ConversionError, match="only supports time and scenario dimensions"): + _ = DataConverter._convert_series(series, coords, dims) + + +class TestDataFrameConversion: + """Tests for converting pandas DataFrame to DataArray.""" + + def test_dataframe_single_column(self, sample_time_index): + """Test converting a DataFrame with a single column.""" + # Create DataFrame with one column + df = pd.DataFrame({'value': [10, 20, 30, 40, 50]}, index=sample_time_index) + + # Convert and check + result = DataConverter.as_dataarray(df, sample_time_index) + assert isinstance(result, xr.DataArray) + assert result.shape == (5,) + assert result.dims == ('time',) + assert np.array_equal(result.values, df['value'].values) + + def test_dataframe_multi_column_fails(self, sample_time_index): + """Test that converting a multi-column DataFrame to 1D fails.""" + # Create DataFrame with multiple columns + df = pd.DataFrame({'val1': [10, 20, 30, 40, 50], 'val2': [15, 25, 35, 45, 55]}, index=sample_time_index) + + # Should raise error + with pytest.raises(ConversionError): + DataConverter.as_dataarray(df, sample_time_index) + + def test_dataframe_time_scenario(self, sample_time_index, sample_scenario_index): + """Test converting a DataFrame with time index and scenario columns.""" + # Create DataFrame with time as index and scenarios as columns + data = {'baseline': [10, 20, 30, 40, 50], 'high_demand': [15, 25, 35, 45, 55], 'low_price': [5, 15, 25, 35, 45]} + df = pd.DataFrame(data, index=sample_time_index) + + # Make sure columns are named properly + df.columns.name = 'scenario' + + # Convert and check + result = DataConverter.as_dataarray(df, sample_time_index, sample_scenario_index) + + assert result.shape == (5, 3) + assert result.dims == ('time', 'scenario') + assert np.array_equal(result.values, df.values) + + # Check values for specific scenarios + assert np.array_equal(result.sel(scenario='baseline').values, df['baseline'].values) + assert np.array_equal(result.sel(scenario='high_demand').values, df['high_demand'].values) + + def test_dataframe_mismatched_coordinates(self, sample_time_index, sample_scenario_index): + """Test conversion fails with mismatched coordinates.""" + # Create DataFrame with different time index + different_times = pd.date_range('2025-01-01', periods=5, freq='D', name='time') + data = {'baseline': [10, 20, 30, 40, 50], 'high_demand': [15, 25, 35, 45, 55], 'low_price': [5, 15, 25, 35, 45]} + df = pd.DataFrame(data, index=different_times) + df.columns = sample_scenario_index + + # Should raise error + with pytest.raises(ConversionError): + DataConverter.as_dataarray(df, sample_time_index, sample_scenario_index) + # Create DataFrame with different scenario columns + different_scenarios = pd.Index(['scenario1', 'scenario2', 'scenario3'], name='scenario') + data = {'scenario1': [10, 20, 30, 40, 50], 'scenario2': [15, 25, 35, 45, 55], 'scenario3': [5, 15, 25, 35, 45]} + df = pd.DataFrame(data, index=sample_time_index) + df.columns = different_scenarios -def test_series_conversion(sample_time_index): - series = pd.Series([1, 2, 3, 4, 5], index=sample_time_index) + # Should raise error + with pytest.raises(ConversionError): + DataConverter.as_dataarray(df, sample_time_index, sample_scenario_index) - # Test Series conversion - result = DataConverter.as_dataarray(series, sample_time_index) - assert isinstance(result, xr.DataArray) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values, series.values) + def test_ensure_copy(self, sample_time_index, sample_scenario_index): + """Test that the returned DataArray is a copy.""" + # Create DataFrame + data = {'baseline': [10, 20, 30, 40, 50], 'high_demand': [15, 25, 35, 45, 55], 'low_price': [5, 15, 25, 35, 45]} + df = pd.DataFrame(data, index=sample_time_index) + df.columns = sample_scenario_index + # Convert + result = DataConverter.as_dataarray(df, sample_time_index, sample_scenario_index) -def test_dataframe_conversion(sample_time_index): - # Create a single-column DataFrame - df = pd.DataFrame({'A': [1, 2, 3, 4, 5]}, index=sample_time_index) + # Modify the result + result.loc[dict(time=sample_time_index[0], scenario='baseline')] = 999 - # Test DataFrame conversion - result = DataConverter.as_dataarray(df, sample_time_index) - assert isinstance(result, xr.DataArray) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values.flatten(), df['A'].values) + # Original should be unchanged + assert df.loc[sample_time_index[0], 'baseline'] == 10 -def test_ndarray_conversion(sample_time_index): - # Test 1D array conversion - arr_1d = np.array([1, 2, 3, 4, 5]) - result = DataConverter.as_dataarray(arr_1d, sample_time_index) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values, arr_1d) +class TestInvalidInputs: + """Tests for invalid inputs and error handling.""" + def test_time_index_validation(self): + """Test validation of time index.""" + # Test with unnamed index + unnamed_index = pd.date_range('2024-01-01', periods=5, freq='D') + with pytest.raises(ConversionError): + DataConverter.as_dataarray(42, unnamed_index) -def test_dataarray_conversion(sample_time_index): - # Create a DataArray - original = xr.DataArray(data=np.array([1, 2, 3, 4, 5]), coords={'time': sample_time_index}, dims=['time']) + # Test with empty index + empty_index = pd.DatetimeIndex([], name='time') + with pytest.raises(ConversionError): + DataConverter.as_dataarray(42, empty_index) - # Test DataArray conversion - result = DataConverter.as_dataarray(original, sample_time_index) - assert result.shape == (5,) - assert result.dims == ('time',) - assert np.array_equal(result.values, original.values) + # Test with non-DatetimeIndex + wrong_type_index = pd.Index([1, 2, 3, 4, 5], name='time') + with pytest.raises(ConversionError): + DataConverter.as_dataarray(42, wrong_type_index) - # Ensure it's a copy - result[0] = 999 - assert original[0].item() == 1 # Original should be unchanged + def test_scenario_index_validation(self, sample_time_index): + """Test validation of scenario index.""" + # Test with unnamed scenario index + unnamed_index = pd.Index(['baseline', 'high_demand']) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(42, sample_time_index, unnamed_index) + + # Test with empty scenario index + empty_index = pd.Index([], name='scenario') + with pytest.raises(ConversionError): + DataConverter.as_dataarray(42, sample_time_index, empty_index) + + # Test with non-Index scenario + with pytest.raises(ConversionError): + DataConverter.as_dataarray(42, sample_time_index, ['baseline', 'high_demand']) + + def test_invalid_data_types(self, sample_time_index, sample_scenario_index): + """Test handling of invalid data types.""" + # Test invalid input type (string) + with pytest.raises(ConversionError): + DataConverter.as_dataarray('invalid_string', sample_time_index) + + # Test invalid input type with scenarios + with pytest.raises(ConversionError): + DataConverter.as_dataarray('invalid_string', sample_time_index, sample_scenario_index) + + # Test unsupported complex object + with pytest.raises(ConversionError): + DataConverter.as_dataarray(object(), sample_time_index) + + # Test None value + with pytest.raises(ConversionError): + DataConverter.as_dataarray(None, sample_time_index) + + def test_mismatched_input_dimensions(self, sample_time_index, sample_scenario_index): + """Test handling of mismatched input dimensions.""" + # Test mismatched Series index + mismatched_series = pd.Series( + [1, 2, 3, 4, 5, 6], index=pd.date_range('2025-01-01', periods=6, freq='D', name='time') + ) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(mismatched_series, sample_time_index) + + # Test DataFrame with multiple columns + df_multi_col = pd.DataFrame({'A': [1, 2, 3, 4, 5], 'B': [6, 7, 8, 9, 10]}, index=sample_time_index) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(df_multi_col, sample_time_index) + + # Test mismatched array shape for time-only + with pytest.raises(ConversionError): + DataConverter.as_dataarray(np.array([1, 2, 3]), sample_time_index) # Wrong length + + # Test mismatched array shape for scenario × time + # Array shape should be (n_scenarios, n_timesteps) + wrong_shape_array = np.array( + [ + [1, 2, 3, 4], # Missing a timestep + [5, 6, 7, 8], + [9, 10, 11, 12], + ] + ) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(wrong_shape_array, sample_time_index, sample_scenario_index) + + # Test array with too many dimensions + with pytest.raises(ConversionError): + # 3D array not allowed + DataConverter.as_dataarray(np.ones((3, 5, 2)), sample_time_index, sample_scenario_index) + + def test_dataarray_dimension_mismatch(self, sample_time_index, sample_scenario_index): + """Test handling of mismatched DataArray dimensions.""" + # Create DataArray with wrong dimensions + wrong_dims = xr.DataArray(data=np.array([1, 2, 3, 4, 5]), coords={'wrong_dim': range(5)}, dims=['wrong_dim']) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(wrong_dims, sample_time_index) + + # Create DataArray with scenario but no time + wrong_dims_2 = xr.DataArray(data=np.array([1, 2, 3]), coords={'scenario': ['a', 'b', 'c']}, dims=['scenario']) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(wrong_dims_2, sample_time_index, sample_scenario_index) + + # Create DataArray with right dims but wrong length + wrong_length = xr.DataArray( + data=np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]), + coords={ + 'scenario': sample_scenario_index, + 'time': pd.date_range('2024-01-01', periods=3, freq='D', name='time'), + }, + dims=['scenario', 'time'], + ) + with pytest.raises(ConversionError): + DataConverter.as_dataarray(wrong_length, sample_time_index, sample_scenario_index) + + +class TestEdgeCases: + """Tests for edge cases and special scenarios.""" + + def test_single_timestep(self, sample_scenario_index): + """Test with a single timestep.""" + # Test with only one timestep + single_timestep = pd.DatetimeIndex(['2024-01-01'], name='time') + + # Scalar conversion + result = DataConverter.as_dataarray(42, single_timestep) + assert result.shape == (1,) + assert result.dims == ('time',) + + # With scenarios + result_with_scenarios = DataConverter.as_dataarray(42, single_timestep, sample_scenario_index) + assert result_with_scenarios.shape == (1, len(sample_scenario_index)) + assert result_with_scenarios.dims == ('time', 'scenario') + + def test_single_scenario(self, sample_time_index): + """Test with a single scenario.""" + # Test with only one scenario + single_scenario = pd.Index(['baseline'], name='scenario') + + # Scalar conversion with single scenario + result = DataConverter.as_dataarray(42, sample_time_index, single_scenario) + assert result.shape == (len(sample_time_index), 1) + assert result.dims == ('time', 'scenario') + + # Array conversion with single scenario + arr = np.array([1, 2, 3, 4, 5]) + result_arr = DataConverter.as_dataarray(arr, sample_time_index, single_scenario) + assert result_arr.shape == (5, 1) + assert np.array_equal(result_arr.sel(scenario='baseline').values, arr) + + # 2D array with single scenario + arr_2d = np.array([[1, 2, 3, 4, 5]]) # Note the extra dimension + result_arr_2d = DataConverter.as_dataarray(arr_2d.T, sample_time_index, single_scenario) + assert result_arr_2d.shape == (5, 1) + assert np.array_equal(result_arr_2d.sel(scenario='baseline').values, arr_2d[0]) + + def test_different_scenario_order(self, sample_time_index): + """Test that scenario order is preserved.""" + # Test with different scenario orders + scenarios1 = pd.Index(['a', 'b', 'c'], name='scenario') + scenarios2 = pd.Index(['c', 'b', 'a'], name='scenario') + + # Create DataArray with first order + data = np.array( + [ + [1, 2, 3, 4, 5], # a + [6, 7, 8, 9, 10], # b + [11, 12, 13, 14, 15], # c + ] + ).T + + result1 = DataConverter.as_dataarray(data, sample_time_index, scenarios1) + assert np.array_equal(result1.sel(scenario='a').values, [1, 2, 3, 4, 5]) + assert np.array_equal(result1.sel(scenario='c').values, [11, 12, 13, 14, 15]) + + # Create DataArray with second order + result2 = DataConverter.as_dataarray(data, sample_time_index, scenarios2) + # First row should match 'c' now + assert np.array_equal(result2.sel(scenario='c').values, [1, 2, 3, 4, 5]) + # Last row should match 'a' now + assert np.array_equal(result2.sel(scenario='a').values, [11, 12, 13, 14, 15]) + + def test_all_nan_data(self, sample_time_index, sample_scenario_index): + """Test handling of all-NaN data.""" + # Create array of all NaNs + all_nan_array = np.full(5, np.nan) + result = DataConverter.as_dataarray(all_nan_array, sample_time_index) + assert np.all(np.isnan(result.values)) + + # With scenarios + result = DataConverter.as_dataarray(all_nan_array, sample_time_index, sample_scenario_index) + assert result.shape == (len(sample_time_index), len(sample_scenario_index)) + assert np.all(np.isnan(result.values)) + + # Series of all NaNs + result = DataConverter.as_dataarray( + np.array([np.nan, np.nan, np.nan, np.nan, np.nan]), sample_time_index, sample_scenario_index + ) + assert np.all(np.isnan(result.values)) + + def test_mixed_data_types(self, sample_time_index, sample_scenario_index): + """Test conversion of mixed integer and float data.""" + # Create array with mixed types + mixed_array = np.array([1, 2.5, 3, 4.5, 5]) + result = DataConverter.as_dataarray(mixed_array, sample_time_index) + + # Result should be float dtype + assert np.issubdtype(result.dtype, np.floating) + assert np.array_equal(result.values, mixed_array) + + # With scenarios + result = DataConverter.as_dataarray(mixed_array, sample_time_index, sample_scenario_index) + assert np.issubdtype(result.dtype, np.floating) + for scenario in sample_scenario_index: + assert np.array_equal(result.sel(scenario=scenario).values, mixed_array) + + +class TestFunctionalUseCase: + """Tests for realistic use cases combining multiple features.""" + + def test_large_dataset(self, sample_scenario_index): + """Test with a larger dataset to ensure performance.""" + # Create a larger timestep array (e.g., hourly for a year) + large_timesteps = pd.date_range( + '2024-01-01', + periods=8760, # Hours in a year + freq='H', + name='time', + ) + + # Create large 2D array (3 scenarios × 8760 hours) + large_data = np.random.rand(len(sample_scenario_index), len(large_timesteps)) + + # Convert and check + result = DataConverter.as_dataarray(large_data.T, large_timesteps, sample_scenario_index) + + assert result.shape == (len(large_timesteps), len(sample_scenario_index)) + assert result.dims == ('time', 'scenario') + assert np.array_equal(result.values, large_data.T) + + +class TestMultiScenarioArrayConversion: + """Tests specifically focused on array conversion with scenarios.""" + + def test_1d_array_broadcasting(self, sample_time_index, sample_scenario_index): + """Test that 1D arrays are properly broadcast to all scenarios.""" + arr_1d = np.array([1, 2, 3, 4, 5]) + result = DataConverter.as_dataarray(arr_1d, sample_time_index, sample_scenario_index) + + assert result.shape == (len(sample_time_index), len(sample_scenario_index)) + + # Each scenario should have identical values + for i, scenario in enumerate(sample_scenario_index): + assert np.array_equal(result.sel(scenario=scenario).values, arr_1d) + + # Modify one scenario's values + result.loc[dict(scenario=scenario)] = np.ones(len(sample_time_index)) * i + + # Ensure modifications are isolated to each scenario + for i, scenario in enumerate(sample_scenario_index): + assert np.all(result.sel(scenario=scenario).values == i) + + def test_2d_array_different_shapes(self, sample_time_index): + """Test different scenario shapes with 2D arrays.""" + # Test with 1 scenario + single_scenario = pd.Index(['baseline'], name='scenario') + arr_1_scenario = np.array([[1, 2, 3, 4, 5]]) + + result = DataConverter.as_dataarray(arr_1_scenario.T, sample_time_index, single_scenario) + assert result.shape == (len(sample_time_index), 1) + + # Test with 2 scenarios + two_scenarios = pd.Index(['baseline', 'high_demand'], name='scenario') + arr_2_scenarios = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) + + result = DataConverter.as_dataarray(arr_2_scenarios.T, sample_time_index, two_scenarios) + assert result.shape == (len(sample_time_index), 2) + assert np.array_equal(result.sel(scenario='baseline').values, arr_2_scenarios[0]) + assert np.array_equal(result.sel(scenario='high_demand').values, arr_2_scenarios[1]) + + # Test mismatched scenarios count + three_scenarios = pd.Index(['a', 'b', 'c'], name='scenario') + with pytest.raises(ConversionError): + DataConverter.as_dataarray(arr_2_scenarios, sample_time_index, three_scenarios) + + def test_array_handling_edge_cases(self, sample_time_index, sample_scenario_index): + """Test array edge cases.""" + # Test with boolean array + bool_array = np.array([True, False, True, False, True]) + result = DataConverter.as_dataarray(bool_array, sample_time_index, sample_scenario_index) + assert result.dtype == bool + assert result.shape == (len(sample_time_index), len(sample_scenario_index)) + + # Test with array containing infinite values + inf_array = np.array([1, np.inf, 3, -np.inf, 5]) + result = DataConverter.as_dataarray(inf_array, sample_time_index, sample_scenario_index) + for scenario in sample_scenario_index: + scenario_data = result.sel(scenario=scenario) + assert np.isinf(scenario_data[1].item()) + assert np.isinf(scenario_data[3].item()) + assert scenario_data[3].item() < 0 # Negative infinity + + +class TestScenarioReindexing: + """Tests for reindexing and coordinate preservation in DataConverter.""" + + def test_preserving_scenario_order(self, sample_time_index): + """Test that scenario order is preserved in converted DataArrays.""" + # Define scenarios in a specific order + scenarios = pd.Index(['scenario3', 'scenario1', 'scenario2'], name='scenario') + + # Create 2D array + data = np.array( + [ + [1, 2, 3, 4, 5], # scenario3 + [6, 7, 8, 9, 10], # scenario1 + [11, 12, 13, 14, 15], # scenario2 + ] + ) + + # Convert to DataArray + result = DataConverter.as_dataarray(data.T, sample_time_index, scenarios) + + # Verify order of scenarios is preserved + assert list(result.coords['scenario'].values) == list(scenarios) + + # Verify data for each scenario + assert np.array_equal(result.sel(scenario='scenario3').values, data[0]) + assert np.array_equal(result.sel(scenario='scenario1').values, data[1]) + assert np.array_equal(result.sel(scenario='scenario2').values, data[2]) + + +if __name__ == '__main__': + pytest.main() def test_invalid_inputs(sample_time_index): @@ -100,12 +704,12 @@ def test_time_index_validation(): # Test with empty index empty_index = pd.DatetimeIndex([], name='time') - with pytest.raises(ValueError): + with pytest.raises(ConversionError): DataConverter.as_dataarray(42, empty_index) # Test with non-DatetimeIndex wrong_type_index = pd.Index([1, 2, 3, 4, 5], name='time') - with pytest.raises(ValueError): + with pytest.raises(ConversionError): DataConverter.as_dataarray(42, wrong_type_index) diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py index 48c7ab7b2..8237cf293 100644 --- a/tests/test_timeseries.py +++ b/tests/test_timeseries.py @@ -8,7 +8,7 @@ import pytest import xarray as xr -from flixopt.core import ConversionError, DataConverter, TimeSeries, TimeSeriesCollection, TimeSeriesData +from flixopt.core import ConversionError, DataConverter, TimeSeries, TimeSeriesCollection @pytest.fixture @@ -44,13 +44,13 @@ def test_initialization(self, simple_dataarray): # Check data initialization assert isinstance(ts.stored_data, xr.DataArray) assert ts.stored_data.equals(simple_dataarray) - assert ts.active_data.equals(simple_dataarray) + assert ts.selected_data.equals(simple_dataarray) # Check backup was created assert ts._backup.equals(simple_dataarray) # Check active timesteps - assert ts.active_timesteps.equals(simple_dataarray.indexes['time']) + assert ts._valid_selector == {} # No selections initially def test_initialization_with_aggregation_params(self, simple_dataarray): """Test initialization with aggregation parameters.""" @@ -66,60 +66,60 @@ def test_initialization_validation(self, sample_timesteps): """Test validation during initialization.""" # Test missing time dimension invalid_data = xr.DataArray([1, 2, 3], dims=['invalid_dim']) - with pytest.raises(ValueError, match='must have a "time" index'): + with pytest.raises(ValueError, match='DataArray dimensions must be subset of'): TimeSeries(invalid_data, name='Invalid Series') # Test multi-dimensional data multi_dim_data = xr.DataArray( [[1, 2, 3], [4, 5, 6]], coords={'dim1': [0, 1], 'time': sample_timesteps[:3]}, dims=['dim1', 'time'] ) - with pytest.raises(ValueError, match='dimensions of DataArray must be 1'): + with pytest.raises(ValueError, match='DataArray dimensions must be subset of'): TimeSeries(multi_dim_data, name='Multi-dim Series') - def test_active_timesteps_getter_setter(self, sample_timeseries, sample_timesteps): - """Test active_timesteps getter and setter.""" - # Initial state should use all timesteps - assert sample_timeseries.active_timesteps.equals(sample_timesteps) + def test_selection_methods(self, sample_timeseries, sample_timesteps): + """Test selection methods.""" + # Initial state should have no selections + assert sample_timeseries._selected_timesteps is None + assert sample_timeseries._selected_scenarios is None # Set to a subset subset_index = sample_timesteps[1:3] - sample_timeseries.active_timesteps = subset_index - assert sample_timeseries.active_timesteps.equals(subset_index) + sample_timeseries.set_selection(timesteps=subset_index) + assert sample_timeseries._selected_timesteps.equals(subset_index) # Active data should reflect the subset - assert sample_timeseries.active_data.equals(sample_timeseries.stored_data.sel(time=subset_index)) + assert sample_timeseries.selected_data.equals(sample_timeseries.stored_data.sel(time=subset_index)) - # Reset to full index - sample_timeseries.active_timesteps = None - assert sample_timeseries.active_timesteps.equals(sample_timesteps) - - # Test invalid type - with pytest.raises(TypeError, match='must be a pandas DatetimeIndex'): - sample_timeseries.active_timesteps = 'invalid' + # Clear selection + sample_timeseries.clear_selection() + assert sample_timeseries._selected_timesteps is None + assert sample_timeseries.selected_data.equals(sample_timeseries.stored_data) def test_reset(self, sample_timeseries, sample_timesteps): """Test reset method.""" # Set to subset first subset_index = sample_timesteps[1:3] - sample_timeseries.active_timesteps = subset_index + sample_timeseries.set_selection(timesteps=subset_index) # Reset sample_timeseries.reset() - # Should be back to full index - assert sample_timeseries.active_timesteps.equals(sample_timesteps) - assert sample_timeseries.active_data.equals(sample_timeseries.stored_data) + # Should be back to full index (all selections cleared) + assert sample_timeseries._selected_timesteps is None + assert sample_timeseries.selected_data.equals(sample_timeseries.stored_data) def test_restore_data(self, sample_timeseries, simple_dataarray): """Test restore_data method.""" # Modify the stored data - new_data = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) + new_data = xr.DataArray( + [1, 2, 3, 4, 5], coords={'time': sample_timeseries.stored_data.coords['time']}, dims=['time'] + ) # Store original data for comparison original_data = sample_timeseries.stored_data - # Set new data - sample_timeseries.stored_data = new_data + # Update data + sample_timeseries.update_stored_data(new_data) assert sample_timeseries.stored_data.equals(new_data) # Restore from backup @@ -127,42 +127,42 @@ def test_restore_data(self, sample_timeseries, simple_dataarray): # Should be back to original data assert sample_timeseries.stored_data.equals(original_data) - assert sample_timeseries.active_data.equals(original_data) + assert sample_timeseries.selected_data.equals(original_data) - def test_stored_data_setter(self, sample_timeseries, sample_timesteps): - """Test stored_data setter with different data types.""" + def test_update_stored_data(self, sample_timeseries, sample_timesteps): + """Test update_stored_data method with different data types.""" # Test with a Series series_data = pd.Series([5, 6, 7, 8, 9], index=sample_timesteps) - sample_timeseries.stored_data = series_data + sample_timeseries.update_stored_data(series_data) assert np.array_equal(sample_timeseries.stored_data.values, series_data.values) # Test with a single-column DataFrame df_data = pd.DataFrame({'col1': [15, 16, 17, 18, 19]}, index=sample_timesteps) - sample_timeseries.stored_data = df_data + sample_timeseries.update_stored_data(df_data) assert np.array_equal(sample_timeseries.stored_data.values, df_data['col1'].values) # Test with a NumPy array array_data = np.array([25, 26, 27, 28, 29]) - sample_timeseries.stored_data = array_data + sample_timeseries.update_stored_data(array_data) assert np.array_equal(sample_timeseries.stored_data.values, array_data) # Test with a scalar - sample_timeseries.stored_data = 42 + sample_timeseries.update_stored_data(42) assert np.all(sample_timeseries.stored_data.values == 42) # Test with another DataArray another_dataarray = xr.DataArray([30, 31, 32, 33, 34], coords={'time': sample_timesteps}, dims=['time']) - sample_timeseries.stored_data = another_dataarray + sample_timeseries.update_stored_data(another_dataarray) assert sample_timeseries.stored_data.equals(another_dataarray) def test_stored_data_setter_no_change(self, sample_timeseries): - """Test stored_data setter when data doesn't change.""" + """Test update_stored_data method when data doesn't change.""" # Get current data current_data = sample_timeseries.stored_data current_backup = sample_timeseries._backup # Set the same data - sample_timeseries.stored_data = current_data + sample_timeseries.update_stored_data(current_data) # Backup shouldn't change assert sample_timeseries._backup is current_backup # Should be the same object @@ -229,35 +229,37 @@ def test_all_equal(self, sample_timesteps): def test_arithmetic_operations(self, sample_timeseries): """Test arithmetic operations.""" # Create a second TimeSeries for testing - data2 = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) + data2 = xr.DataArray( + [1, 2, 3, 4, 5], coords={'time': sample_timeseries.stored_data.coords['time']}, dims=['time'] + ) ts2 = TimeSeries(data2, 'Second Series') # Test operations between two TimeSeries objects assert np.array_equal( - (sample_timeseries + ts2).values, sample_timeseries.active_data.values + ts2.active_data.values + (sample_timeseries + ts2).values, sample_timeseries.selected_data.values + ts2.selected_data.values ) assert np.array_equal( - (sample_timeseries - ts2).values, sample_timeseries.active_data.values - ts2.active_data.values + (sample_timeseries - ts2).values, sample_timeseries.selected_data.values - ts2.selected_data.values ) assert np.array_equal( - (sample_timeseries * ts2).values, sample_timeseries.active_data.values * ts2.active_data.values + (sample_timeseries * ts2).values, sample_timeseries.selected_data.values * ts2.selected_data.values ) assert np.array_equal( - (sample_timeseries / ts2).values, sample_timeseries.active_data.values / ts2.active_data.values + (sample_timeseries / ts2).values, sample_timeseries.selected_data.values / ts2.selected_data.values ) # Test operations with DataArrays - assert np.array_equal((sample_timeseries + data2).values, sample_timeseries.active_data.values + data2.values) - assert np.array_equal((data2 + sample_timeseries).values, data2.values + sample_timeseries.active_data.values) + assert np.array_equal((sample_timeseries + data2).values, sample_timeseries.selected_data.values + data2.values) + assert np.array_equal((data2 + sample_timeseries).values, data2.values + sample_timeseries.selected_data.values) # Test operations with scalars - assert np.array_equal((sample_timeseries + 5).values, sample_timeseries.active_data.values + 5) - assert np.array_equal((5 + sample_timeseries).values, 5 + sample_timeseries.active_data.values) + assert np.array_equal((sample_timeseries + 5).values, sample_timeseries.selected_data.values + 5) + assert np.array_equal((5 + sample_timeseries).values, 5 + sample_timeseries.selected_data.values) # Test unary operations - assert np.array_equal((-sample_timeseries).values, -sample_timeseries.active_data.values) - assert np.array_equal((+sample_timeseries).values, +sample_timeseries.active_data.values) - assert np.array_equal((abs(sample_timeseries)).values, abs(sample_timeseries.active_data.values)) + assert np.array_equal((-sample_timeseries).values, -sample_timeseries.selected_data.values) + assert np.array_equal((+sample_timeseries).values, +sample_timeseries.selected_data.values) + assert np.array_equal((abs(sample_timeseries)).values, abs(sample_timeseries.selected_data.values)) def test_comparison_operations(self, sample_timesteps): """Test comparison operations.""" @@ -279,327 +281,466 @@ def test_comparison_operations(self, sample_timesteps): def test_numpy_ufunc(self, sample_timeseries): """Test numpy ufunc compatibility.""" # Test basic numpy functions - assert np.array_equal(np.add(sample_timeseries, 5).values, np.add(sample_timeseries.active_data, 5).values) + assert np.array_equal(np.add(sample_timeseries, 5).values, np.add(sample_timeseries.selected_data, 5).values) assert np.array_equal( - np.multiply(sample_timeseries, 2).values, np.multiply(sample_timeseries.active_data, 2).values + np.multiply(sample_timeseries, 2).values, np.multiply(sample_timeseries.selected_data, 2).values ) # Test with two TimeSeries objects - data2 = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) + data2 = xr.DataArray( + [1, 2, 3, 4, 5], coords={'time': sample_timeseries.stored_data.coords['time']}, dims=['time'] + ) ts2 = TimeSeries(data2, 'Second Series') assert np.array_equal( - np.add(sample_timeseries, ts2).values, np.add(sample_timeseries.active_data, ts2.active_data).values + np.add(sample_timeseries, ts2).values, np.add(sample_timeseries.selected_data, ts2.selected_data).values ) def test_sel_and_isel_properties(self, sample_timeseries): """Test sel and isel properties.""" # Test that sel property works - selected = sample_timeseries.sel(time=sample_timeseries.active_timesteps[0]) - assert selected.item() == sample_timeseries.active_data.values[0] + selected = sample_timeseries.sel(time=sample_timeseries.stored_data.coords['time'][0]) + assert selected.item() == sample_timeseries.selected_data.values[0] # Test that isel property works indexed = sample_timeseries.isel(time=0) - assert indexed.item() == sample_timeseries.active_data.values[0] + assert indexed.item() == sample_timeseries.selected_data.values[0] + + +@pytest.fixture +def sample_scenario_index(): + """Create a sample scenario index with the required 'scenario' name.""" + return pd.Index(['baseline', 'high_demand', 'low_price'], name='scenario') + + +@pytest.fixture +def simple_scenario_dataarray(sample_timesteps, sample_scenario_index): + """Create a DataArray with both scenario and time dimensions.""" + data = np.array( + [ + [10, 20, 30, 40, 50], # baseline + [15, 25, 35, 45, 55], # high_demand + [5, 15, 25, 35, 45], # low_price + ] + ) + return xr.DataArray( + data=data, coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, dims=['scenario', 'time'] + ) @pytest.fixture -def sample_collection(sample_timesteps): +def sample_scenario_timeseries(simple_scenario_dataarray): + """Create a sample TimeSeries object with scenario dimension.""" + return TimeSeries(simple_scenario_dataarray, name='Test Scenario Series') + + +@pytest.fixture +def sample_allocator(sample_timesteps): """Create a sample TimeSeriesCollection.""" return TimeSeriesCollection(sample_timesteps) @pytest.fixture -def populated_collection(sample_collection): - """Create a TimeSeriesCollection with test data.""" - # Add a constant time series - sample_collection.create_time_series(42, 'constant_series') - - # Add a varying time series - varying_data = np.array([10, 20, 30, 40, 50]) - sample_collection.create_time_series(varying_data, 'varying_series') - - # Add a time series with extra timestep - sample_collection.create_time_series( - np.array([1, 2, 3, 4, 5, 6]), 'extra_timestep_series', needs_extra_timestep=True - ) +def sample_scenario_allocator(sample_timesteps, sample_scenario_index): + """Create a sample TimeSeriesCollection with scenarios.""" + return TimeSeriesCollection(sample_timesteps, scenarios=sample_scenario_index) - # Add series with aggregation settings - sample_collection.create_time_series( - TimeSeriesData(np.array([5, 5, 5, 5, 5]), agg_group='group1'), 'group1_series1' - ) - sample_collection.create_time_series( - TimeSeriesData(np.array([6, 6, 6, 6, 6]), agg_group='group1'), 'group1_series2' - ) - sample_collection.create_time_series( - TimeSeriesData(np.array([10, 10, 10, 10, 10]), agg_weight=0.5), 'weighted_series' - ) - return sample_collection +class TestTimeSeriesWithScenarios: + """Test suite for TimeSeries class with scenarios.""" + + def test_initialization_with_scenarios(self, simple_scenario_dataarray): + """Test initialization of TimeSeries with scenario dimension.""" + ts = TimeSeries(simple_scenario_dataarray, name='Scenario Series') + + # Check basic properties + assert ts.name == 'Scenario Series' + assert ts.has_scenario_dim is True + assert ts._selected_scenarios is None # No selection initially + + # Check data initialization + assert isinstance(ts.stored_data, xr.DataArray) + assert ts.stored_data.equals(simple_scenario_dataarray) + assert ts.selected_data.equals(simple_scenario_dataarray) + + # Check backup was created + assert ts._backup.equals(simple_scenario_dataarray) + + def test_reset_with_scenarios(self, sample_scenario_timeseries, simple_scenario_dataarray): + """Test reset method with scenarios.""" + # Get original full indexes + full_timesteps = simple_scenario_dataarray.coords['time'] + full_scenarios = simple_scenario_dataarray.coords['scenario'] + + # Set to subset timesteps and scenarios + subset_timesteps = full_timesteps[1:3] + subset_scenarios = full_scenarios[:2] + + sample_scenario_timeseries.set_selection(timesteps=subset_timesteps, scenarios=subset_scenarios) + + # Verify subsets were set + assert sample_scenario_timeseries._selected_timesteps.equals(subset_timesteps) + assert sample_scenario_timeseries._selected_scenarios.equals(subset_scenarios) + assert sample_scenario_timeseries.selected_data.shape == (len(subset_scenarios), len(subset_timesteps)) + + # Reset + sample_scenario_timeseries.reset() + + # Should be back to full indexes + assert sample_scenario_timeseries._selected_timesteps is None + assert sample_scenario_timeseries._selected_scenarios is None + assert sample_scenario_timeseries.selected_data.shape == (len(full_scenarios), len(full_timesteps)) + + def test_scenario_selection(self, sample_scenario_timeseries, sample_scenario_index): + """Test scenario selection.""" + # Initial state should use all scenarios + assert sample_scenario_timeseries._selected_scenarios is None + + # Set to a subset + subset_index = sample_scenario_index[:2] # First two scenarios + sample_scenario_timeseries.set_selection(scenarios=subset_index) + assert sample_scenario_timeseries._selected_scenarios.equals(subset_index) + + # Active data should reflect the subset + assert sample_scenario_timeseries.selected_data.equals( + sample_scenario_timeseries.stored_data.sel(scenario=subset_index) + ) + + # Clear selection + sample_scenario_timeseries.clear_selection(timesteps=False, scenarios=True) + assert sample_scenario_timeseries._selected_scenarios is None + + def test_all_equal_with_scenarios(self, sample_timesteps, sample_scenario_index): + """Test all_equal property with scenarios.""" + # All values equal across all scenarios + equal_data = np.full((3, 5), 5) # All values are 5 + equal_dataarray = xr.DataArray( + data=equal_data, + coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, + dims=['scenario', 'time'], + ) + ts_equal = TimeSeries(equal_dataarray, 'Equal Scenario Series') + assert ts_equal.all_equal is True + + # Equal within each scenario but different between scenarios + per_scenario_equal = np.array( + [ + [5, 5, 5, 5, 5], # baseline - all 5 + [10, 10, 10, 10, 10], # high_demand - all 10 + [15, 15, 15, 15, 15], # low_price - all 15 + ] + ) + per_scenario_dataarray = xr.DataArray( + data=per_scenario_equal, + coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, + dims=['scenario', 'time'], + ) + ts_per_scenario = TimeSeries(per_scenario_dataarray, 'Per-Scenario Equal Series') + assert ts_per_scenario.all_equal is False + + def test_arithmetic_with_scenarios(self, sample_scenario_timeseries, sample_timesteps, sample_scenario_index): + """Test arithmetic operations with scenarios.""" + # Create a second TimeSeries with scenarios + data2 = np.ones((3, 5)) # All ones + second_dataarray = xr.DataArray( + data=data2, coords={'scenario': sample_scenario_index, 'time': sample_timesteps}, dims=['scenario', 'time'] + ) + ts2 = TimeSeries(second_dataarray, 'Second Series') + + # Test operations between two scenario TimeSeries objects + result = sample_scenario_timeseries + ts2 + assert result.shape == (3, 5) + assert result.dims == ('scenario', 'time') + + # First scenario values should be increased by 1 + baseline_original = sample_scenario_timeseries.sel(scenario='baseline').values + baseline_result = result.sel(scenario='baseline').values + assert np.array_equal(baseline_result, baseline_original + 1) -class TestTimeSeriesCollection: - """Test suite for TimeSeriesCollection.""" +class TestTimeSeriesAllocator: + """Test suite for TimeSeriesCollection class.""" def test_initialization(self, sample_timesteps): """Test basic initialization.""" - collection = TimeSeriesCollection(sample_timesteps) + allocator = TimeSeriesCollection(sample_timesteps) - assert collection.all_timesteps.equals(sample_timesteps) - assert len(collection.all_timesteps_extra) == len(sample_timesteps) + 1 - assert isinstance(collection.all_hours_per_timestep, xr.DataArray) - assert len(collection) == 0 + assert allocator.timesteps.equals(sample_timesteps) + assert len(allocator.timesteps_extra) == len(sample_timesteps) + 1 + assert isinstance(allocator.hours_per_timestep, xr.DataArray) + assert len(allocator._time_series) == 0 def test_initialization_with_custom_hours(self, sample_timesteps): """Test initialization with custom hour settings.""" # Test with last timestep duration last_timestep_hours = 12 - collection = TimeSeriesCollection(sample_timesteps, hours_of_last_timestep=last_timestep_hours) + allocator = TimeSeriesCollection(sample_timesteps, hours_of_last_timestep=last_timestep_hours) # Verify the last timestep duration - extra_step_delta = collection.all_timesteps_extra[-1] - collection.all_timesteps_extra[-2] + extra_step_delta = allocator.timesteps_extra[-1] - allocator.timesteps_extra[-2] assert extra_step_delta == pd.Timedelta(hours=last_timestep_hours) # Test with previous timestep duration hours_per_step = 8 - collection2 = TimeSeriesCollection(sample_timesteps, hours_of_previous_timesteps=hours_per_step) + allocator2 = TimeSeriesCollection(sample_timesteps, hours_of_previous_timesteps=hours_per_step) - assert collection2.hours_of_previous_timesteps == hours_per_step + assert allocator2.hours_of_previous_timesteps == hours_per_step - def test_create_time_series(self, sample_collection): - """Test creating time series.""" + def test_add_time_series(self, sample_allocator, sample_timesteps): + """Test adding time series.""" # Test scalar - ts1 = sample_collection.create_time_series(42, 'scalar_series') + ts1 = sample_allocator.add_time_series('scalar_series', 42) assert ts1.name == 'scalar_series' - assert np.all(ts1.active_data.values == 42) + assert np.all(ts1.selected_data.values == 42) # Test numpy array data = np.array([1, 2, 3, 4, 5]) - ts2 = sample_collection.create_time_series(data, 'array_series') - assert np.array_equal(ts2.active_data.values, data) + ts2 = sample_allocator.add_time_series('array_series', data) + assert np.array_equal(ts2.selected_data.values, data) - # Test with TimeSeriesData - ts3 = sample_collection.create_time_series(TimeSeriesData(10, agg_weight=0.7), 'weighted_series') - assert ts3.aggregation_weight == 0.7 + # Test with existing TimeSeries + existing_ts = TimeSeries.from_datasource(10, 'original_name', sample_timesteps, aggregation_weight=0.7) + ts3 = sample_allocator.add_time_series('weighted_series', existing_ts) + assert ts3.name == 'weighted_series' # Name changed + assert ts3.aggregation_weight == 0.7 # Weight preserved # Test with extra timestep - ts4 = sample_collection.create_time_series(5, 'extra_series', needs_extra_timestep=True) - assert ts4.needs_extra_timestep - assert len(ts4.active_data) == len(sample_collection.timesteps_extra) + ts4 = sample_allocator.add_time_series('extra_series', 5, has_extra_timestep=True) + assert ts4.name == 'extra_series' + assert ts4.has_extra_timestep + assert len(ts4.selected_data) == len(sample_allocator.timesteps_extra) # Test duplicate name - with pytest.raises(ValueError, match='already exists'): - sample_collection.create_time_series(1, 'scalar_series') + with pytest.raises(KeyError, match='already exists'): + sample_allocator.add_time_series('scalar_series', 1) - def test_access_time_series(self, populated_collection): + def test_access_time_series(self, sample_allocator): """Test accessing time series.""" + # Add a few time series + sample_allocator.add_time_series('series1', 42) + sample_allocator.add_time_series('series2', np.array([1, 2, 3, 4, 5])) + # Test __getitem__ - ts = populated_collection['varying_series'] - assert ts.name == 'varying_series' + ts = sample_allocator['series1'] + assert ts.name == 'series1' # Test __contains__ with string - assert 'constant_series' in populated_collection - assert 'nonexistent_series' not in populated_collection + assert 'series1' in sample_allocator + assert 'nonexistent_series' not in sample_allocator # Test __contains__ with TimeSeries object - assert populated_collection['varying_series'] in populated_collection - - # Test __iter__ - names = [ts.name for ts in populated_collection] - assert len(names) == 6 - assert 'varying_series' in names + assert sample_allocator['series2'] in sample_allocator # Test access to non-existent series - with pytest.raises(KeyError): - populated_collection['nonexistent_series'] - - def test_constants_and_non_constants(self, populated_collection): - """Test constants and non_constants properties.""" - # Test constants - constants = populated_collection.constants - assert len(constants) == 4 # constant_series, group1_series1, group1_series2, weighted_series - assert all(ts.all_equal for ts in constants) - - # Test non_constants - non_constants = populated_collection.non_constants - assert len(non_constants) == 2 # varying_series, extra_timestep_series - assert all(not ts.all_equal for ts in non_constants) - - # Test modifying a series changes the results - populated_collection['constant_series'].stored_data = np.array([1, 2, 3, 4, 5]) - updated_constants = populated_collection.constants - assert len(updated_constants) == 3 # One less constant - assert 'constant_series' not in [ts.name for ts in updated_constants] - - def test_timesteps_properties(self, populated_collection, sample_timesteps): - """Test timestep-related properties.""" - # Test default (all) timesteps - assert populated_collection.timesteps.equals(sample_timesteps) - assert len(populated_collection.timesteps_extra) == len(sample_timesteps) + 1 - - # Test activating a subset - subset = sample_timesteps[1:3] - populated_collection.activate_timesteps(subset) - - assert populated_collection.timesteps.equals(subset) - assert len(populated_collection.timesteps_extra) == len(subset) + 1 - - # Check that time series were updated - assert populated_collection['varying_series'].active_timesteps.equals(subset) - assert populated_collection['extra_timestep_series'].active_timesteps.equals( - populated_collection.timesteps_extra - ) - - # Test reset - populated_collection.reset() - assert populated_collection.timesteps.equals(sample_timesteps) + with pytest.raises(ValueError): + sample_allocator['nonexistent_series'] - def test_to_dataframe_and_dataset(self, populated_collection): - """Test conversion to DataFrame and Dataset.""" - # Test to_dataset - ds = populated_collection.to_dataset() - assert isinstance(ds, xr.Dataset) - assert len(ds.data_vars) == 6 + def test_selection_propagation(self, sample_allocator, sample_timesteps): + """Test that selections propagate to TimeSeries.""" + # Add a few time series + ts1 = sample_allocator.add_time_series('series1', 42) + ts2 = sample_allocator.add_time_series('series2', np.array([1, 2, 3, 4, 5])) + ts3 = sample_allocator.add_time_series('series3', 5, has_extra_timestep=True) - # Test to_dataframe with different filters - df_all = populated_collection.to_dataframe(filtered='all') - assert len(df_all.columns) == 6 + # Initially no selections + assert ts1._selected_timesteps is None + assert ts2._selected_timesteps is None + assert ts3._selected_timesteps is None - df_constant = populated_collection.to_dataframe(filtered='constant') - assert len(df_constant.columns) == 4 + # Apply selection + subset_timesteps = sample_timesteps[1:3] + sample_allocator.set_selection(timesteps=subset_timesteps) - df_non_constant = populated_collection.to_dataframe(filtered='non_constant') - assert len(df_non_constant.columns) == 2 + # Check selection propagated to regular time series + assert ts1._selected_timesteps.equals(subset_timesteps) + assert ts2._selected_timesteps.equals(subset_timesteps) - # Test invalid filter - with pytest.raises(ValueError): - populated_collection.to_dataframe(filtered='invalid') - - def test_calculate_aggregation_weights(self, populated_collection): - """Test aggregation weight calculation.""" - weights = populated_collection.calculate_aggregation_weights() - - # Group weights should be 0.5 each (1/2) - assert populated_collection.group_weights['group1'] == 0.5 - - # Series in group1 should have weight 0.5 - assert weights['group1_series1'] == 0.5 - assert weights['group1_series2'] == 0.5 - - # Series with explicit weight should have that weight - assert weights['weighted_series'] == 0.5 - - # Series without group or weight should have weight 1 - assert weights['constant_series'] == 1 - - def test_insert_new_data(self, populated_collection, sample_timesteps): - """Test inserting new data.""" - # Create new data - new_data = pd.DataFrame( - { - 'constant_series': [100, 100, 100, 100, 100], - 'varying_series': [5, 10, 15, 20, 25], - # extra_timestep_series is omitted to test partial updates - }, - index=sample_timesteps, - ) + # Check selection with extra timestep + assert ts3._selected_timesteps is not None + assert len(ts3._selected_timesteps) == len(subset_timesteps) + 1 - # Insert data - populated_collection.insert_new_data(new_data) - - # Verify updates - assert np.all(populated_collection['constant_series'].active_data.values == 100) - assert np.array_equal(populated_collection['varying_series'].active_data.values, np.array([5, 10, 15, 20, 25])) - - # Series not in the DataFrame should be unchanged - assert np.array_equal( - populated_collection['extra_timestep_series'].active_data.values[:-1], np.array([1, 2, 3, 4, 5]) - ) + # Clear selection + sample_allocator.clear_selection() - # Test with mismatched index - bad_index = pd.date_range('2023-02-01', periods=5, freq='D', name='time') - bad_data = pd.DataFrame({'constant_series': [1, 1, 1, 1, 1]}, index=bad_index) - - with pytest.raises(ValueError, match='must match collection timesteps'): - populated_collection.insert_new_data(bad_data) - - def test_restore_data(self, populated_collection): - """Test restoring original data.""" - # Capture original data - original_values = {name: ts.stored_data.copy() for name, ts in populated_collection.time_series_data.items()} - - # Modify data - new_data = pd.DataFrame( - { - name: np.ones(len(populated_collection.timesteps)) * 999 - for name in populated_collection.time_series_data - if not populated_collection[name].needs_extra_timestep - }, - index=populated_collection.timesteps, - ) + # Check selection cleared + assert ts1._selected_timesteps is None + assert ts2._selected_timesteps is None + assert ts3._selected_timesteps is None - populated_collection.insert_new_data(new_data) + def test_update_time_series(self, sample_allocator): + """Test updating a time series.""" + # Add a time series + ts = sample_allocator.add_time_series('series', 42) - # Verify data was changed - assert np.all(populated_collection['constant_series'].active_data.values == 999) + # Update it + sample_allocator.update_time_series('series', np.array([1, 2, 3, 4, 5])) - # Restore data - populated_collection.restore_data() + # Check update was applied + assert np.array_equal(ts.selected_data.values, np.array([1, 2, 3, 4, 5])) - # Verify data was restored - for name, original in original_values.items(): - restored = populated_collection[name].stored_data - assert np.array_equal(restored.values, original.values) + # Test updating non-existent series + with pytest.raises(KeyError): + sample_allocator.update_time_series('nonexistent', 42) - def test_class_method_with_uniform_timesteps(self): - """Test the with_uniform_timesteps class method.""" - collection = TimeSeriesCollection.with_uniform_timesteps( - start_time=pd.Timestamp('2023-01-01'), periods=24, freq='H', hours_per_step=1 - ) + def test_as_dataset(self, sample_allocator): + """Test as_dataset method.""" + # Add some time series + sample_allocator.add_time_series('series1', 42) + sample_allocator.add_time_series('series2', np.array([1, 2, 3, 4, 5])) - assert len(collection.timesteps) == 24 - assert collection.hours_of_previous_timesteps == 1 - assert (collection.timesteps[1] - collection.timesteps[0]) == pd.Timedelta(hours=1) + # Get dataset + ds = sample_allocator.as_dataset(with_extra_timestep=False) - def test_hours_per_timestep(self, populated_collection): - """Test hours_per_timestep calculation.""" - # Standard case - uniform timesteps - hours = populated_collection.hours_per_timestep.values - assert np.allclose(hours, 24) # Default is daily timesteps + # Check dataset contents + assert isinstance(ds, xr.Dataset) + assert 'series1' in ds + assert 'series2' in ds + assert np.all(ds['series1'].values == 42) + assert np.array_equal(ds['series2'].values, np.array([1, 2, 3, 4, 5])) - # Create non-uniform timesteps - non_uniform_times = pd.DatetimeIndex( - [ - pd.Timestamp('2023-01-01'), - pd.Timestamp('2023-01-02'), - pd.Timestamp('2023-01-03 12:00:00'), # 1.5 days from previous - pd.Timestamp('2023-01-04'), # 0.5 days from previous - pd.Timestamp('2023-01-06'), # 2 days from previous - ], - name='time', - ) - collection = TimeSeriesCollection(non_uniform_times) - hours = collection.hours_per_timestep.values +class TestTimeSeriesAllocatorWithScenarios: + """Test suite for TimeSeriesCollection with scenarios.""" - # Expected hours between timestamps - expected = np.array([24, 36, 12, 48, 48]) - assert np.allclose(hours, expected) + def test_initialization_with_scenarios(self, sample_timesteps, sample_scenario_index): + """Test initialization with scenarios.""" + allocator = TimeSeriesCollection(sample_timesteps, scenarios=sample_scenario_index) - def test_validation_and_errors(self, sample_timesteps): - """Test validation and error handling.""" - # Test non-DatetimeIndex - with pytest.raises(TypeError, match='must be a pandas DatetimeIndex'): - TimeSeriesCollection(pd.Index([1, 2, 3, 4, 5])) + assert allocator.timesteps.equals(sample_timesteps) + assert allocator.scenarios.equals(sample_scenario_index) + assert len(allocator._time_series) == 0 - # Test too few timesteps - with pytest.raises(ValueError, match='must contain at least 2 timestamps'): - TimeSeriesCollection(pd.DatetimeIndex([pd.Timestamp('2023-01-01')], name='time')) + def test_add_time_series_with_scenarios(self, sample_scenario_allocator): + """Test creating time series with scenarios.""" + # Test scalar (broadcasts to all scenarios) + ts1 = sample_scenario_allocator.add_time_series('scalar_series', 42) + assert ts1.has_scenario_dim + assert ts1.name == 'scalar_series' + assert ts1.selected_data.shape == (5, 3) # 5 timesteps, 3 scenarios + assert np.all(ts1.selected_data.values == 42) - # Test invalid active_timesteps - collection = TimeSeriesCollection(sample_timesteps) - invalid_timesteps = pd.date_range('2024-01-01', periods=3, freq='D', name='time') + # Test 1D array (broadcasts to all scenarios) + data = np.array([1, 2, 3, 4, 5]) + ts2 = sample_scenario_allocator.add_time_series('array_series', data) + assert ts2.has_scenario_dim + assert ts2.selected_data.shape == (5, 3) + # Each scenario should have the same values + for scenario in sample_scenario_allocator.scenarios: + assert np.array_equal(ts2.sel(scenario=scenario).values, data) + + # Test 2D array (one row per scenario) + data_2d = np.array([[10, 20, 30, 40, 50], [15, 25, 35, 45, 55], [5, 15, 25, 35, 45]]).T + ts3 = sample_scenario_allocator.add_time_series('scenario_specific_series', data_2d) + assert ts3.has_scenario_dim + assert ts3.selected_data.shape == (5, 3) + # Each scenario should have its own values + assert np.array_equal(ts3.sel(scenario='baseline').values, data_2d[:,0]) + assert np.array_equal(ts3.sel(scenario='high_demand').values, data_2d[:,1]) + assert np.array_equal(ts3.sel(scenario='low_price').values, data_2d[:,2]) + + def test_selection_propagation_with_scenarios( + self, sample_scenario_allocator, sample_timesteps, sample_scenario_index + ): + """Test scenario selection propagation.""" + # Add some time series + ts1 = sample_scenario_allocator.add_time_series('series1', 42) + ts2 = sample_scenario_allocator.add_time_series('series2', np.array([1, 2, 3, 4, 5])) + + # Initial state - no selections + assert ts1._selected_scenarios is None + assert ts2._selected_scenarios is None + + # Select scenarios + subset_scenarios = sample_scenario_index[:2] + sample_scenario_allocator.set_selection(scenarios=subset_scenarios) + + # Check selections propagated + assert ts1._selected_scenarios.equals(subset_scenarios) + assert ts2._selected_scenarios.equals(subset_scenarios) + + # Check data is filtered + assert ts1.selected_data.shape == (5, 2) # 5 timesteps, 2 scenarios + assert ts2.selected_data.shape == (5, 2) + + # Apply combined selection + subset_timesteps = sample_timesteps[1:3] + sample_scenario_allocator.set_selection(timesteps=subset_timesteps, scenarios=subset_scenarios) + + # Check combined selection applied + assert ts1._selected_timesteps.equals(subset_timesteps) + assert ts1._selected_scenarios.equals(subset_scenarios) + assert ts1.selected_data.shape == (2, 2) # 2 timesteps, 2 scenarios + + # Clear selections + sample_scenario_allocator.clear_selection() + assert ts1._selected_timesteps is None + assert ts1.active_timesteps.equals(sample_scenario_allocator.timesteps) + assert ts1._selected_scenarios is None + assert ts1.active_scenarios.equals(sample_scenario_allocator.scenarios) + assert ts1.selected_data.shape == (5, 3) # Back to full shape + + def test_as_dataset_with_scenarios(self, sample_scenario_allocator): + """Test as_dataset method with scenarios.""" + # Add some time series + sample_scenario_allocator.add_time_series('scalar_series', 42) + sample_scenario_allocator.add_time_series( + 'varying_series', np.array([[10, 20, 30, 40, 50], [15, 25, 35, 45, 55], [5, 15, 25, 35, 45]]).T + ) - with pytest.raises(ValueError, match='must be a subset'): - collection.activate_timesteps(invalid_timesteps) + # Get dataset + ds = sample_scenario_allocator.as_dataset(with_extra_timestep=False) + + # Check dataset dimensions + assert 'scenario' in ds.dims + assert 'time' in ds.dims + assert ds.dims['scenario'] == 3 + assert ds.dims['time'] == 5 + + # Check dataset variables + assert 'scalar_series' in ds + assert 'varying_series' in ds + + # Check values + assert np.all(ds['scalar_series'].values == 42) + baseline_values = ds['varying_series'].sel(scenario='baseline').values + assert np.array_equal(baseline_values, np.array([10, 20, 30, 40, 50])) + + def test_contains_and_iteration(self, sample_scenario_allocator): + """Test __contains__ and __iter__ methods.""" + # Add some time series + ts1 = sample_scenario_allocator.add_time_series('series1', 42) + sample_scenario_allocator.add_time_series('series2', 10) + + # Test __contains__ + assert 'series1' in sample_scenario_allocator + assert ts1 in sample_scenario_allocator + assert 'nonexistent' not in sample_scenario_allocator + + # Test behavior with invalid type + with pytest.raises(TypeError): + assert 42 in sample_scenario_allocator + + def test_update_time_series_with_scenarios(self, sample_scenario_allocator, sample_scenario_index): + """Test updating a time series with scenarios.""" + # Add a time series + ts = sample_scenario_allocator.add_time_series('series', 42) + assert ts.has_scenario_dim + assert np.all(ts.selected_data.values == 42) + + # Update with scenario-specific data + new_data = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]]).T + sample_scenario_allocator.update_time_series('series', new_data) + + # Check update was applied + assert np.array_equal(ts.selected_data.values, new_data) + assert ts.has_scenario_dim + + # Check scenario-specific values + assert np.array_equal(ts.sel(scenario='baseline').values, new_data[:,0]) + assert np.array_equal(ts.sel(scenario='high_demand').values, new_data[:,1]) + assert np.array_equal(ts.sel(scenario='low_price').values, new_data[:,2]) + + +if __name__ == '__main__': + pytest.main()