diff --git a/examples/01_Simple/simple_example.py b/examples/01_Simple/simple_example.py index 45550c9cc..da10aed62 100644 --- a/examples/01_Simple/simple_example.py +++ b/examples/01_Simple/simple_example.py @@ -103,9 +103,14 @@ calculation = fx.FullCalculation(name='Sim1', flow_system=flow_system) calculation.do_modeling() # Translate the model to a solvable form, creating equations and Variables + calculation2 = fx.FullCalculation(name='Sim2', flow_system=flow_system) + calculation2.do_modeling() # Translate the model to a solvable form, creating equations and Variables + # --- Solve the Calculation and Save Results --- calculation.solve(fx.solvers.HighsSolver(mip_gap=0, time_limit_seconds=30)) + calculation2.solve(fx.solvers.HighsSolver(mip_gap=0, time_limit_seconds=30)) + # --- Analyze Results --- calculation.results['Fernwärme'].plot_node_balance_pie() calculation.results['Fernwärme'].plot_node_balance() diff --git a/flixopt/calculation.py b/flixopt/calculation.py index c7367cad2..251a50075 100644 --- a/flixopt/calculation.py +++ b/flixopt/calculation.py @@ -29,7 +29,7 @@ from .flow_system import FlowSystem from .results import CalculationResults, SegmentedCalculationResults from .solvers import _Solver -from .structure import SystemModel, copy_and_convert_datatypes, get_compact_representation +from .structure import SystemModel logger = logging.getLogger('flixopt') @@ -54,7 +54,13 @@ def __init__( folder: folder where results should be saved. If None, then the current working directory is used. """ self.name = name + if flow_system.used_in_calculation: + logging.warning(f'FlowSystem {flow_system.name} is already used in a calculation. ' + f'Creating a copy for Calculation "{self.name}".') + flow_system = flow_system.copy() + self.flow_system = flow_system + self.flow_system._used_in_calculation = True self.model: Optional[SystemModel] = None self.active_timesteps = active_timesteps @@ -119,7 +125,7 @@ def main_results(self) -> Dict[str, Union[Scalar, Dict]]: def summary(self): return { 'Name': self.name, - 'Number of timesteps': len(self.flow_system.time_series_collection.timesteps), + 'Number of timesteps': len(self.flow_system.timesteps), 'Calculation Type': self.__class__.__name__, 'Constraints': self.model.constraints.ncons, 'Variables': self.model.variables.nvars, @@ -136,7 +142,7 @@ class for defined way of solving a flow_system optimization def do_modeling(self) -> SystemModel: t_start = timeit.default_timer() - self._activate_time_series() + self.flow_system.connect_and_transform() self.model = self.flow_system.create_model() self.model.do_modeling() @@ -181,12 +187,6 @@ def solve(self, solver: _Solver, log_file: Optional[pathlib.Path] = None, log_ma self.results = CalculationResults.from_calculation(self) - def _activate_time_series(self): - self.flow_system.transform_data() - self.flow_system.time_series_collection.activate_timesteps( - active_timesteps=self.active_timesteps, - ) - class AggregatedCalculation(FullCalculation): """ @@ -224,7 +224,7 @@ def __init__( def do_modeling(self) -> SystemModel: t_start = timeit.default_timer() - self._activate_time_series() + self.flow_system.connect_and_transform() self._perform_aggregation() # Model the System @@ -245,8 +245,8 @@ def _perform_aggregation(self): # Validation dt_min, dt_max = ( - np.min(self.flow_system.time_series_collection.hours_per_timestep), - np.max(self.flow_system.time_series_collection.hours_per_timestep), + np.min(self.flow_system.hours_per_timestep), + np.max(self.flow_system.hours_per_timestep), ) if not dt_min == dt_max: raise ValueError( @@ -255,11 +255,11 @@ def _perform_aggregation(self): ) steps_per_period = ( self.aggregation_parameters.hours_per_period - / self.flow_system.time_series_collection.hours_per_timestep.max() + / self.flow_system.hours_per_timestep.max() ) is_integer = ( self.aggregation_parameters.hours_per_period - % self.flow_system.time_series_collection.hours_per_timestep.max() + % self.flow_system.hours_per_timestep.max() ).item() == 0 if not (steps_per_period.size == 1 and is_integer): raise ValueError( @@ -272,13 +272,13 @@ def _perform_aggregation(self): # Aggregation - creation of aggregated timeseries: self.aggregation = Aggregation( - original_data=self.flow_system.time_series_collection.to_dataframe( + original_data=self.flow_system.to_dataframe( include_extra_timestep=False ), # Exclude last row (NaN) hours_per_time_step=float(dt_min), hours_per_period=self.aggregation_parameters.hours_per_period, nr_of_periods=self.aggregation_parameters.nr_of_periods, - weights=self.flow_system.time_series_collection.calculate_aggregation_weights(), + weights=self.flow_system.calculate_aggregation_weights(), time_series_for_high_peaks=self.aggregation_parameters.labels_for_high_peaks, time_series_for_low_peaks=self.aggregation_parameters.labels_for_low_peaks, ) @@ -286,7 +286,7 @@ def _perform_aggregation(self): self.aggregation.cluster() self.aggregation.plot(show=True, save=self.folder / 'aggregation.html') if self.aggregation_parameters.aggregate_data_and_fix_non_binary_vars: - self.flow_system.time_series_collection.insert_new_data( + self.flow_system.insert_new_data( self.aggregation.aggregated_data, include_extra_timestep=False ) self.durations['aggregation'] = round(timeit.default_timer() - t_start_agg, 2) @@ -327,8 +327,8 @@ def __init__( self.nr_of_previous_values = nr_of_previous_values self.sub_calculations: List[FullCalculation] = [] - self.all_timesteps = self.flow_system.time_series_collection.all_timesteps - self.all_timesteps_extra = self.flow_system.time_series_collection.all_timesteps_extra + self.all_timesteps = self.flow_system.all_timesteps + self.all_timesteps_extra = self.flow_system.all_timesteps_extra self.segment_names = [ f'Segment_{i + 1}' for i in range(math.ceil(len(self.all_timesteps) / self.timesteps_per_segment)) diff --git a/flixopt/components.py b/flixopt/components.py index 1f5fe5ece..3f41783a8 100644 --- a/flixopt/components.py +++ b/flixopt/components.py @@ -7,9 +7,10 @@ import linopy import numpy as np +import xarray as xr from . import utils -from .core import NumericData, NumericDataTS, PlausibilityError, Scalar, TimeSeries +from .core import NumericDataUser, PlausibilityError, Scalar from .elements import Component, ComponentModel, Flow from .features import InvestmentModel, OnOffModel, PiecewiseModel from .interface import InvestParameters, OnOffParameters, PiecewiseConversion @@ -34,7 +35,7 @@ def __init__( inputs: List[Flow], outputs: List[Flow], on_off_parameters: OnOffParameters = None, - conversion_factors: List[Dict[str, NumericDataTS]] = None, + conversion_factors: List[Dict[str, NumericDataUser]] = None, piecewise_conversion: Optional[PiecewiseConversion] = None, meta_data: Optional[Dict] = None, ): @@ -98,14 +99,14 @@ def transform_data(self, flow_system: 'FlowSystem'): if self.piecewise_conversion: self.piecewise_conversion.transform_data(flow_system, f'{self.label_full}|PiecewiseConversion') - def _transform_conversion_factors(self, flow_system: 'FlowSystem') -> List[Dict[str, TimeSeries]]: - """macht alle Faktoren, die nicht TimeSeries sind, zu TimeSeries""" + def _transform_conversion_factors(self, flow_system: 'FlowSystem') -> List[Dict[str, xr.DataArray]]: + """Converts all conversion factors to internal datatypes""" list_of_conversion_factors = [] for idx, conversion_factor in enumerate(self.conversion_factors): transformed_dict = {} for flow, values in conversion_factor.items(): # TODO: Might be better to use the label of the component instead of the flow - transformed_dict[flow] = flow_system.create_time_series( + transformed_dict[flow] = flow_system.fit_to_model_coords( f'{self.flows[flow].label_full}|conversion_factor{idx}', values ) list_of_conversion_factors.append(transformed_dict) @@ -128,14 +129,14 @@ def __init__( charging: Flow, discharging: Flow, capacity_in_flow_hours: Union[Scalar, InvestParameters], - relative_minimum_charge_state: NumericData = 0, - relative_maximum_charge_state: NumericData = 1, + relative_minimum_charge_state: NumericDataUser = 0, + relative_maximum_charge_state: NumericDataUser = 1, initial_charge_state: Union[Scalar, Literal['lastValueOfSim']] = 0, minimal_final_charge_state: Optional[Scalar] = None, maximal_final_charge_state: Optional[Scalar] = None, - eta_charge: NumericData = 1, - eta_discharge: NumericData = 1, - relative_loss_per_hour: NumericData = 0, + eta_charge: NumericDataUser = 1, + eta_discharge: NumericDataUser = 1, + relative_loss_per_hour: NumericDataUser = 0, prevent_simultaneous_charge_and_discharge: bool = True, meta_data: Optional[Dict] = None, ): @@ -176,16 +177,16 @@ def __init__( self.charging = charging self.discharging = discharging self.capacity_in_flow_hours = capacity_in_flow_hours - self.relative_minimum_charge_state: NumericDataTS = relative_minimum_charge_state - self.relative_maximum_charge_state: NumericDataTS = relative_maximum_charge_state + self.relative_minimum_charge_state: NumericDataUser = relative_minimum_charge_state + self.relative_maximum_charge_state: NumericDataUser = relative_maximum_charge_state self.initial_charge_state = initial_charge_state self.minimal_final_charge_state = minimal_final_charge_state self.maximal_final_charge_state = maximal_final_charge_state - self.eta_charge: NumericDataTS = eta_charge - self.eta_discharge: NumericDataTS = eta_discharge - self.relative_loss_per_hour: NumericDataTS = relative_loss_per_hour + self.eta_charge: NumericDataUser = eta_charge + self.eta_discharge: NumericDataUser = eta_discharge + self.relative_loss_per_hour: NumericDataUser = relative_loss_per_hour self.prevent_simultaneous_charge_and_discharge = prevent_simultaneous_charge_and_discharge def create_model(self, model: SystemModel) -> 'StorageModel': @@ -195,19 +196,19 @@ def create_model(self, model: SystemModel) -> 'StorageModel': def transform_data(self, flow_system: 'FlowSystem') -> None: super().transform_data(flow_system) - self.relative_minimum_charge_state = flow_system.create_time_series( + self.relative_minimum_charge_state = flow_system.fit_to_model_coords( f'{self.label_full}|relative_minimum_charge_state', self.relative_minimum_charge_state, needs_extra_timestep=True, ) - self.relative_maximum_charge_state = flow_system.create_time_series( + self.relative_maximum_charge_state = flow_system.fit_to_model_coords( f'{self.label_full}|relative_maximum_charge_state', self.relative_maximum_charge_state, needs_extra_timestep=True, ) - self.eta_charge = flow_system.create_time_series(f'{self.label_full}|eta_charge', self.eta_charge) - self.eta_discharge = flow_system.create_time_series(f'{self.label_full}|eta_discharge', self.eta_discharge) - self.relative_loss_per_hour = flow_system.create_time_series( + self.eta_charge = flow_system.fit_to_model_coords(f'{self.label_full}|eta_charge', self.eta_charge) + self.eta_discharge = flow_system.fit_to_model_coords(f'{self.label_full}|eta_discharge', self.eta_discharge) + self.relative_loss_per_hour = flow_system.fit_to_model_coords( f'{self.label_full}|relative_loss_per_hour', self.relative_loss_per_hour ) if isinstance(self.capacity_in_flow_hours, InvestParameters): @@ -264,8 +265,8 @@ def __init__( out1: Flow, in2: Optional[Flow] = None, out2: Optional[Flow] = None, - relative_losses: Optional[NumericDataTS] = None, - absolute_losses: Optional[NumericDataTS] = None, + relative_losses: Optional[NumericDataUser] = None, + absolute_losses: Optional[NumericDataUser] = None, on_off_parameters: OnOffParameters = None, prevent_simultaneous_flows_in_both_directions: bool = True, meta_data: Optional[Dict] = None, @@ -331,10 +332,10 @@ def create_model(self, model) -> 'TransmissionModel': def transform_data(self, flow_system: 'FlowSystem') -> None: super().transform_data(flow_system) - self.relative_losses = flow_system.create_time_series( + self.relative_losses = flow_system.fit_to_model_coords( f'{self.label_full}|relative_losses', self.relative_losses ) - self.absolute_losses = flow_system.create_time_series( + self.absolute_losses = flow_system.fit_to_model_coords( f'{self.label_full}|absolute_losses', self.absolute_losses ) @@ -348,7 +349,7 @@ def __init__(self, model: SystemModel, element: Transmission): def do_modeling(self): """Initiates all FlowModels""" # Force On Variable if absolute losses are present - if (self.element.absolute_losses is not None) and np.any(self.element.absolute_losses.active_data != 0): + if (self.element.absolute_losses is not None) and np.any(self.element.absolute_losses != 0): for flow in self.element.inputs + self.element.outputs: if flow.on_off_parameters is None: flow.on_off_parameters = OnOffParameters() @@ -385,14 +386,14 @@ def create_transmission_equation(self, name: str, in_flow: Flow, out_flow: Flow) # eq: out(t) + on(t)*loss_abs(t) = in(t)*(1 - loss_rel(t)) con_transmission = self.add( self._model.add_constraints( - out_flow.model.flow_rate == -in_flow.model.flow_rate * (self.element.relative_losses.active_data - 1), + out_flow.model.flow_rate == -in_flow.model.flow_rate * (self.element.relative_losses - 1), name=f'{self.label_full}|{name}', ), name, ) if self.element.absolute_losses is not None: - con_transmission.lhs += in_flow.model.on_off.on * self.element.absolute_losses.active_data + con_transmission.lhs += in_flow.model.on_off.on * self.element.absolute_losses return con_transmission @@ -420,8 +421,8 @@ def do_modeling(self): self.add( self._model.add_constraints( - sum([flow.model.flow_rate * conv_factors[flow.label].active_data for flow in used_inputs]) - == sum([flow.model.flow_rate * conv_factors[flow.label].active_data for flow in used_outputs]), + sum([flow.model.flow_rate * conv_factors[flow.label] for flow in used_inputs]) + == sum([flow.model.flow_rate * conv_factors[flow.label] for flow in used_outputs]), name=f'{self.label_full}|conversion_{i}', ) ) @@ -481,12 +482,12 @@ def do_modeling(self): ) charge_state = self.charge_state - rel_loss = self.element.relative_loss_per_hour.active_data + rel_loss = self.element.relative_loss_per_hour hours_per_step = self._model.hours_per_step charge_rate = self.element.charging.model.flow_rate discharge_rate = self.element.discharging.model.flow_rate - eff_charge = self.element.eta_charge.active_data - eff_discharge = self.element.eta_discharge.active_data + eff_charge = self.element.eta_charge + eff_discharge = self.element.eta_discharge self.add( self._model.add_constraints( @@ -556,7 +557,7 @@ def _initial_and_final_charge_state(self): ) @property - def absolute_charge_state_bounds(self) -> Tuple[NumericData, NumericData]: + def absolute_charge_state_bounds(self) -> Tuple[NumericDataUser, NumericDataUser]: relative_lower_bound, relative_upper_bound = self.relative_charge_state_bounds if not isinstance(self.element.capacity_in_flow_hours, InvestParameters): return ( @@ -570,10 +571,10 @@ def absolute_charge_state_bounds(self) -> Tuple[NumericData, NumericData]: ) @property - def relative_charge_state_bounds(self) -> Tuple[NumericData, NumericData]: + def relative_charge_state_bounds(self) -> Tuple[NumericDataUser, NumericDataUser]: return ( - self.element.relative_minimum_charge_state.active_data, - self.element.relative_maximum_charge_state.active_data, + self.element.relative_minimum_charge_state, + self.element.relative_maximum_charge_state, ) diff --git a/flixopt/core.py b/flixopt/core.py index 08be18f1d..61e951019 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -17,13 +17,13 @@ logger = logging.getLogger('flixopt') Scalar = Union[int, float] -"""A type representing a single number, either integer or float.""" +"""A single number, either integer or float.""" -NumericData = Union[int, float, np.integer, np.floating, np.ndarray, pd.Series, pd.DataFrame, xr.DataArray] -"""Represents any form of numeric data, from simple scalars to complex data structures.""" +NumericDataUser = Union[int, float, np.integer, np.floating, np.ndarray, pd.Series, pd.DataFrame, xr.DataArray, 'TimeSeriesData'] +"""Numeric data accepted in varios types. Will be converted to an xr.DataArray or Scalar internally.""" -NumericDataTS = Union[NumericData, 'TimeSeriesData'] -"""Represents either standard numeric data or TimeSeriesData.""" +NumericDataInternal = Union[int, float, xr.DataArray, 'TimeSeriesData'] +"""Internally used datatypes for numeric data.""" class PlausibilityError(Exception): @@ -38,933 +38,222 @@ class ConversionError(Exception): pass -class DataConverter: - """ - Converts various data types into xarray.DataArray with a timesteps index. - - Supports: scalars, arrays, Series, DataFrames, and DataArrays. - """ - - @staticmethod - def as_dataarray(data: NumericData, timesteps: pd.DatetimeIndex) -> xr.DataArray: - """Convert data to xarray.DataArray with specified timesteps index.""" - if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0: - raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}') - if not timesteps.name == 'time': - raise ConversionError(f'DatetimeIndex is not named correctly. Must be named "time", got {timesteps.name=}') +class TimeSeriesData(xr.DataArray): + """Minimal TimeSeriesData that inherits from xr.DataArray with aggregation metadata.""" - coords = [timesteps] - dims = ['time'] - expected_shape = (len(timesteps),) + __slots__ = () # No additional instance attributes - everything goes in attrs - try: - if isinstance(data, (int, float, np.integer, np.floating)): - return xr.DataArray(data, coords=coords, dims=dims) - elif isinstance(data, pd.DataFrame): - if not data.index.equals(timesteps): - raise ConversionError("DataFrame index doesn't match timesteps index") - if not len(data.columns) == 1: - raise ConversionError('DataFrame must have exactly one column') - return xr.DataArray(data.values.flatten(), coords=coords, dims=dims) - elif isinstance(data, pd.Series): - if not data.index.equals(timesteps): - raise ConversionError("Series index doesn't match timesteps index") - return xr.DataArray(data.values, coords=coords, dims=dims) - elif isinstance(data, np.ndarray): - if data.ndim != 1: - raise ConversionError(f'Array must be 1-dimensional, got {data.ndim}') - elif data.shape[0] != expected_shape[0]: - raise ConversionError(f"Array shape {data.shape} doesn't match expected {expected_shape}") - return xr.DataArray(data, coords=coords, dims=dims) - elif isinstance(data, xr.DataArray): - if data.dims != tuple(dims): - raise ConversionError(f"DataArray dimensions {data.dims} don't match expected {dims}") - if data.sizes[dims[0]] != len(coords[0]): - raise ConversionError( - f"DataArray length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}" - ) - return data.copy(deep=True) - else: - raise ConversionError(f'Unsupported type: {type(data).__name__}') - except Exception as e: - if isinstance(e, ConversionError): - raise - raise ConversionError(f'Converting data {type(data)} to xarray.Dataset raised an error: {str(e)}') from e - - -class TimeSeriesData: - # TODO: Move to Interface.py - def __init__(self, data: NumericData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None): + def __init__(self, *args, agg_group: Optional[str] = None, agg_weight: Optional[float] = None, **kwargs): """ - timeseries class for transmit timeseries AND special characteristics of timeseries, - i.g. to define weights needed in calculation_type 'aggregated' - EXAMPLE solar: - you have several solar timeseries. These should not be overweighted - compared to the remaining timeseries (i.g. heat load, price)! - fixed_relative_profile_solar1 = TimeSeriesData(sol_array_1, type = 'solar') - fixed_relative_profile_solar2 = TimeSeriesData(sol_array_2, type = 'solar') - fixed_relative_profile_solar3 = TimeSeriesData(sol_array_3, type = 'solar') - --> this 3 series of same type share one weight, i.e. internally assigned each weight = 1/3 - (instead of standard weight = 1) - Args: - data: The timeseries data, which can be a scalar, array, or numpy array. - agg_group: The group this TimeSeriesData is a part of. agg_weight is split between members of a group. Default is None. - agg_weight: The weight for calculation_type 'aggregated', should be between 0 and 1. Default is None. - - Raises: - Exception: If both agg_group and agg_weight are set, an exception is raised. + *args: Arguments passed to DataArray + agg_group: Aggregation group name + agg_weight: Aggregation weight (0-1) + **kwargs: Additional arguments passed to DataArray """ - self.data = data - self.agg_group = agg_group - self.agg_weight = agg_weight if (agg_group is not None) and (agg_weight is not None): - raise ValueError('Either or explicit can be used. Not both!') - self.label: Optional[str] = None - - def __repr__(self): - # Get the constructor arguments and their current values - init_signature = inspect.signature(self.__init__) - init_args = init_signature.parameters - - # Create a dictionary with argument names and their values - args_str = ', '.join(f'{name}={repr(getattr(self, name, None))}' for name in init_args if name != 'self') - return f'{self.__class__.__name__}({args_str})' - - def __str__(self): - return str(self.data) - - -class TimeSeries: - """ - A class representing time series data with active and stored states. - - TimeSeries provides a way to store time-indexed data and work with temporal subsets. - It supports arithmetic operations, aggregation, and JSON serialization. - - Attributes: - name (str): The name of the time series - aggregation_weight (Optional[float]): Weight used for aggregation - aggregation_group (Optional[str]): Group name for shared aggregation weighting - needs_extra_timestep (bool): Whether this series needs an extra timestep - """ - - @classmethod - def from_datasource( - cls, - data: NumericData, - name: str, - timesteps: pd.DatetimeIndex, - aggregation_weight: Optional[float] = None, - aggregation_group: Optional[str] = None, - needs_extra_timestep: bool = False, - ) -> 'TimeSeries': - """ - Initialize the TimeSeries from multiple data sources. - - Args: - data: The time series data - name: The name of the TimeSeries - timesteps: The timesteps of the TimeSeries - aggregation_weight: The weight in aggregation calculations - aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing - needs_extra_timestep: Whether this series requires an extra timestep - - Returns: - A new TimeSeries instance - """ - return cls( - DataConverter.as_dataarray(data, timesteps), - name, - aggregation_weight, - aggregation_group, - needs_extra_timestep, - ) - - @classmethod - def from_json(cls, data: Optional[Dict[str, Any]] = None, path: Optional[str] = None) -> 'TimeSeries': - """ - Load a TimeSeries from a dictionary or json file. - - Args: - data: Dictionary containing TimeSeries data - path: Path to a JSON file containing TimeSeries data - - Returns: - A new TimeSeries instance - - Raises: - ValueError: If both path and data are provided or neither is provided - """ - if (path is None and data is None) or (path is not None and data is not None): - raise ValueError("Exactly one of 'path' or 'data' must be provided") - - if path is not None: - with open(path, 'r') as f: - data = json.load(f) - - # Convert ISO date strings to datetime objects - data['data']['coords']['time']['data'] = pd.to_datetime(data['data']['coords']['time']['data']) - - # Create the TimeSeries instance - return cls( - data=xr.DataArray.from_dict(data['data']), - name=data['name'], - aggregation_weight=data['aggregation_weight'], - aggregation_group=data['aggregation_group'], - needs_extra_timestep=data['needs_extra_timestep'], - ) - - def __init__( - self, - data: xr.DataArray, - name: str, - aggregation_weight: Optional[float] = None, - aggregation_group: Optional[str] = None, - needs_extra_timestep: bool = False, - ): - """ - Initialize a TimeSeries with a DataArray. - - Args: - data: The DataArray containing time series data - name: The name of the TimeSeries - aggregation_weight: The weight in aggregation calculations - aggregation_group: Group this TimeSeries belongs to for weight sharing - needs_extra_timestep: Whether this series requires an extra timestep - - Raises: - ValueError: If data doesn't have a 'time' index or has more than 1 dimension - """ - if 'time' not in data.indexes: - raise ValueError(f'DataArray must have a "time" index. Got {data.indexes}') - if data.ndim > 1: - raise ValueError(f'Number of dimensions of DataArray must be 1. Got {data.ndim}') - - self.name = name - self.aggregation_weight = aggregation_weight - self.aggregation_group = aggregation_group - self.needs_extra_timestep = needs_extra_timestep - - # Data management - self._stored_data = data.copy(deep=True) - self._backup = self._stored_data.copy(deep=True) - self._active_timesteps = self._stored_data.indexes['time'] - self._active_data = None - self._update_active_data() - - def reset(self): - """ - Reset active timesteps to the full set of stored timesteps. - """ - self.active_timesteps = None - - def restore_data(self): - """ - Restore stored_data from the backup and reset active timesteps. - """ - self._stored_data = self._backup.copy(deep=True) - self.reset() - - def to_json(self, path: Optional[pathlib.Path] = None) -> Dict[str, Any]: - """ - Save the TimeSeries to a dictionary or JSON file. - - Args: - path: Optional path to save JSON file - - Returns: - Dictionary representation of the TimeSeries - """ - data = { - 'name': self.name, - 'aggregation_weight': self.aggregation_weight, - 'aggregation_group': self.aggregation_group, - 'needs_extra_timestep': self.needs_extra_timestep, - 'data': self.active_data.to_dict(), - } + raise ValueError('Use either agg_group or agg_weight, not both') - # Convert datetime objects to ISO strings - data['data']['coords']['time']['data'] = [date.isoformat() for date in data['data']['coords']['time']['data']] + # Let xarray handle all the initialization complexity + super().__init__(*args, **kwargs) - # Save to file if path is provided - if path is not None: - indent = 4 if len(self.active_timesteps) <= 480 else None - with open(path, 'w', encoding='utf-8') as f: - json.dump(data, f, indent=indent, ensure_ascii=False) + # Add our metadata to attrs after initialization + if agg_group is not None: + self.attrs['agg_group'] = agg_group + if agg_weight is not None: + self.attrs['agg_weight'] = agg_weight - return data + # Always mark as TimeSeriesData + self.attrs['__timeseries_data__'] = True @property - def stats(self) -> str: - """ - Return a statistical summary of the active data. - - Returns: - String representation of data statistics - """ - return get_numeric_stats(self.active_data, padd=0) - - def _update_active_data(self): - """ - Update the active data based on active_timesteps. - """ - self._active_data = self._stored_data.sel(time=self.active_timesteps) + def agg_group(self) -> Optional[str]: + return self.attrs.get('agg_group') @property - def all_equal(self) -> bool: - """Check if all values in the series are equal.""" - return np.unique(self.active_data.values).size == 1 + def agg_weight(self) -> Optional[float]: + return self.attrs.get('agg_weight') - @property - def active_timesteps(self) -> pd.DatetimeIndex: - """Get the current active timesteps.""" - return self._active_timesteps - - @active_timesteps.setter - def active_timesteps(self, timesteps: Optional[pd.DatetimeIndex]): - """ - Set active_timesteps and refresh active_data. - - Args: - timesteps: New timesteps to activate, or None to use all stored timesteps - - Raises: - TypeError: If timesteps is not a pandas DatetimeIndex or None - """ - if timesteps is None: - self._active_timesteps = self.stored_data.indexes['time'] - elif isinstance(timesteps, pd.DatetimeIndex): - self._active_timesteps = timesteps - else: - raise TypeError('active_timesteps must be a pandas DatetimeIndex or None') - - self._update_active_data() - - @property - def active_data(self) -> xr.DataArray: - """Get a view of stored_data based on active_timesteps.""" - return self._active_data - - @property - def stored_data(self) -> xr.DataArray: - """Get a copy of the full stored data.""" - return self._stored_data.copy() - - @stored_data.setter - def stored_data(self, value: NumericData): - """ - Update stored_data and refresh active_data. - - Args: - value: New data to store - """ - new_data = DataConverter.as_dataarray(value, timesteps=self.active_timesteps) - - # Skip if data is unchanged to avoid overwriting backup - if new_data.equals(self._stored_data): - return - - self._stored_data = new_data - self.active_timesteps = None # Reset to full timeline - - @property - def sel(self): - return self.active_data.sel - - @property - def isel(self): - return self.active_data.isel - - def _apply_operation(self, other, op): - """Apply an operation between this TimeSeries and another object.""" - if isinstance(other, TimeSeries): - other = other.active_data - return op(self.active_data, other) - - def __add__(self, other): - return self._apply_operation(other, lambda x, y: x + y) - - def __sub__(self, other): - return self._apply_operation(other, lambda x, y: x - y) - - def __mul__(self, other): - return self._apply_operation(other, lambda x, y: x * y) - - def __truediv__(self, other): - return self._apply_operation(other, lambda x, y: x / y) - - def __radd__(self, other): - return other + self.active_data - - def __rsub__(self, other): - return other - self.active_data - - def __rmul__(self, other): - return other * self.active_data - - def __rtruediv__(self, other): - return other / self.active_data - - def __neg__(self) -> xr.DataArray: - return -self.active_data - - def __pos__(self) -> xr.DataArray: - return +self.active_data - - def __abs__(self) -> xr.DataArray: - return abs(self.active_data) - - def __gt__(self, other): - """ - Compare if this TimeSeries is greater than another. - - Args: - other: Another TimeSeries to compare with - - Returns: - True if all values in this TimeSeries are greater than other - """ - if isinstance(other, TimeSeries): - return self.active_data > other.active_data - return self.active_data > other - - def __ge__(self, other): - """ - Compare if this TimeSeries is greater than or equal to another. - - Args: - other: Another TimeSeries to compare with - - Returns: - True if all values in this TimeSeries are greater than or equal to other - """ - if isinstance(other, TimeSeries): - return self.active_data >= other.active_data - return self.active_data >= other - - def __lt__(self, other): - """ - Compare if this TimeSeries is less than another. - - Args: - other: Another TimeSeries to compare with - - Returns: - True if all values in this TimeSeries are less than other - """ - if isinstance(other, TimeSeries): - return self.active_data < other.active_data - return self.active_data < other - - def __le__(self, other): - """ - Compare if this TimeSeries is less than or equal to another. - - Args: - other: Another TimeSeries to compare with - - Returns: - True if all values in this TimeSeries are less than or equal to other - """ - if isinstance(other, TimeSeries): - return self.active_data <= other.active_data - return self.active_data <= other - - def __eq__(self, other): - """ - Compare if this TimeSeries is equal to another. - - Args: - other: Another TimeSeries to compare with - - Returns: - True if all values in this TimeSeries are equal to other - """ - if isinstance(other, TimeSeries): - return self.active_data == other.active_data - return self.active_data == other + @classmethod + def from_dataarray(cls, da: xr.DataArray, agg_group: Optional[str] = None, agg_weight: Optional[float] = None): + """Create TimeSeriesData from DataArray, extracting metadata from attrs.""" + # Get aggregation metadata from attrs or parameters + final_agg_group = agg_group if agg_group is not None else da.attrs.get('agg_group') + final_agg_weight = agg_weight if agg_weight is not None else da.attrs.get('agg_weight') - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - """ - Handle NumPy universal functions. + return cls(da, agg_group=final_agg_group, agg_weight=final_agg_weight) - This allows NumPy functions to work with TimeSeries objects. - """ - # Convert any TimeSeries inputs to their active_data - inputs = [x.active_data if isinstance(x, TimeSeries) else x for x in inputs] - return getattr(ufunc, method)(*inputs, **kwargs) + @classmethod + def is_timeseries_data(cls, obj) -> bool: + """Check if an object is TimeSeriesData.""" + return isinstance(obj, xr.DataArray) and obj.attrs.get('__timeseries_data__', False) def __repr__(self): - """ - Get a string representation of the TimeSeries. + agg_info = [] + if self.agg_group: + agg_info.append(f"agg_group='{self.agg_group}'") + if self.agg_weight is not None: + agg_info.append(f'agg_weight={self.agg_weight}') - Returns: - String showing TimeSeries details - """ - attrs = { - 'name': self.name, - 'aggregation_weight': self.aggregation_weight, - 'aggregation_group': self.aggregation_group, - 'needs_extra_timestep': self.needs_extra_timestep, - 'shape': self.active_data.shape, - 'time_range': f'{self.active_timesteps[0]} to {self.active_timesteps[-1]}', - } - attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items()) - return f'TimeSeries({attr_str})' - - def __str__(self): - """ - Get a human-readable string representation. - - Returns: - Descriptive string with statistics - """ - return f"TimeSeries '{self.name}': {self.stats}" + info_str = f'TimeSeriesData({", ".join(agg_info)})' if agg_info else 'TimeSeriesData' + return f'{info_str}\n{super().__repr__()}' -class TimeSeriesCollection: +class DataConverter: """ - Collection of TimeSeries objects with shared timestep management. + Converts various data types into xarray.DataArray with a timesteps index. - TimeSeriesCollection handles multiple TimeSeries objects with synchronized - timesteps, provides operations on collections, and manages extra timesteps. + Supports: scalars, arrays, Series, DataFrames, DataArrays, and TimeSeriesData. """ - def __init__( - self, - timesteps: pd.DatetimeIndex, - hours_of_last_timestep: Optional[float] = None, - hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None, - ): - """ - Args: - timesteps: The timesteps of the Collection. - hours_of_last_timestep: The duration of the last time step. Uses the last time interval if not specified - hours_of_previous_timesteps: The duration of previous timesteps. - If None, the first time increment of time_series is used. - This is needed to calculate previous durations (for example consecutive_on_hours). - If you use an array, take care that its long enough to cover all previous values! - """ - # Prepare and validate timesteps - self._validate_timesteps(timesteps) - self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( - timesteps, hours_of_previous_timesteps - ) - - # Set up timesteps and hours - self.all_timesteps = timesteps - self.all_timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep) - self.all_hours_per_timestep = self.calculate_hours_per_timestep(self.all_timesteps_extra) - - # Active timestep tracking - self._active_timesteps = None - self._active_timesteps_extra = None - self._active_hours_per_timestep = None - - # Dictionary of time series by name - self.time_series_data: Dict[str, TimeSeries] = {} - - # Aggregation - self.group_weights: Dict[str, float] = {} - self.weights: Dict[str, float] = {} - - @classmethod - def with_uniform_timesteps( - cls, start_time: pd.Timestamp, periods: int, freq: str, hours_per_step: Optional[float] = None - ) -> 'TimeSeriesCollection': - """Create a collection with uniform timesteps.""" - timesteps = pd.date_range(start_time, periods=periods, freq=freq, name='time') - return cls(timesteps, hours_of_previous_timesteps=hours_per_step) - - def create_time_series( - self, data: Union[NumericData, TimeSeriesData], name: str, needs_extra_timestep: bool = False - ) -> TimeSeries: + @staticmethod + def _fix_timeseries_data_indexing( + data: TimeSeriesData, timesteps: pd.DatetimeIndex, dims: list, coords: list + ) -> TimeSeriesData: """ - Creates a TimeSeries from the given data and adds it to the collection. + Fix TimeSeriesData indexing issues and return properly indexed TimeSeriesData. Args: - data: The data to create the TimeSeries from. - name: The name of the TimeSeries. - needs_extra_timestep: Whether to create an additional timestep at the end of the timesteps. - The data to create the TimeSeries from. + data: TimeSeriesData that might have indexing issues + timesteps: Target timesteps + dims: Expected dimensions + coords: Expected coordinates Returns: - The created TimeSeries. - - """ - # Check for duplicate name - if name in self.time_series_data: - raise ValueError(f"TimeSeries '{name}' already exists in this collection") - - # Determine which timesteps to use - timesteps_to_use = self.timesteps_extra if needs_extra_timestep else self.timesteps - - # Create the time series - if isinstance(data, TimeSeriesData): - time_series = TimeSeries.from_datasource( - name=name, - data=data.data, - timesteps=timesteps_to_use, - aggregation_weight=data.agg_weight, - aggregation_group=data.agg_group, - needs_extra_timestep=needs_extra_timestep, - ) - # Connect the user time series to the created TimeSeries - data.label = name - else: - time_series = TimeSeries.from_datasource( - name=name, data=data, timesteps=timesteps_to_use, needs_extra_timestep=needs_extra_timestep - ) - - # Add to the collection - self.add_time_series(time_series) - - return time_series - - def calculate_aggregation_weights(self) -> Dict[str, float]: - """Calculate and return aggregation weights for all time series.""" - self.group_weights = self._calculate_group_weights() - self.weights = self._calculate_weights() - - if np.all(np.isclose(list(self.weights.values()), 1, atol=1e-6)): - logger.info('All Aggregation weights were set to 1') - - return self.weights - - def activate_timesteps(self, active_timesteps: Optional[pd.DatetimeIndex] = None): - """ - Update active timesteps for the collection and all time series. - If no arguments are provided, the active timesteps are reset. - - Args: - active_timesteps: The active timesteps of the model. - If None, the all timesteps of the TimeSeriesCollection are taken. - """ - if active_timesteps is None: - return self.reset() - - if not np.all(np.isin(active_timesteps, self.all_timesteps)): - raise ValueError('active_timesteps must be a subset of the timesteps of the TimeSeriesCollection') - - # Calculate derived timesteps - self._active_timesteps = active_timesteps - first_ts_index = np.where(self.all_timesteps == active_timesteps[0])[0][0] - last_ts_idx = np.where(self.all_timesteps == active_timesteps[-1])[0][0] - self._active_timesteps_extra = self.all_timesteps_extra[first_ts_index : last_ts_idx + 2] - self._active_hours_per_timestep = self.all_hours_per_timestep.isel(time=slice(first_ts_index, last_ts_idx + 1)) - - # Update all time series - self._update_time_series_timesteps() - - def reset(self): - """Reset active timesteps to defaults for all time series.""" - self._active_timesteps = None - self._active_timesteps_extra = None - self._active_hours_per_timestep = None - - for time_series in self.time_series_data.values(): - time_series.reset() - - def restore_data(self): - """Restore original data for all time series.""" - for time_series in self.time_series_data.values(): - time_series.restore_data() - - def add_time_series(self, time_series: TimeSeries): - """Add an existing TimeSeries to the collection.""" - if time_series.name in self.time_series_data: - raise ValueError(f"TimeSeries '{time_series.name}' already exists in this collection") - - self.time_series_data[time_series.name] = time_series + TimeSeriesData with correct indexing - def insert_new_data(self, data: pd.DataFrame, include_extra_timestep: bool = False): + Raises: + ConversionError: If data cannot be fixed to match expected indexing """ - Update time series with new data from a DataFrame. + expected_shape = (len(timesteps),) - Args: - data: DataFrame containing new data with timestamps as index - include_extra_timestep: Whether the provided data already includes the extra timestep, by default False - """ - if not isinstance(data, pd.DataFrame): - raise TypeError(f'data must be a pandas DataFrame, got {type(data).__name__}') - - # Check if the DataFrame index matches the expected timesteps - expected_timesteps = self.timesteps_extra if include_extra_timestep else self.timesteps - if not data.index.equals(expected_timesteps): - raise ValueError( - f'DataFrame index must match {"collection timesteps with extra timestep" if include_extra_timestep else "collection timesteps"}' + # Check if dimensions match + if data.dims != tuple(dims): + logger.warning(f'TimeSeriesData has dimensions {data.dims}, expected {dims}. Reshaping to match timesteps.') + # Try to reshape the data to match expected dimensions + if data.size != len(timesteps): + raise ConversionError( + f'TimeSeriesData has {data.size} elements, cannot reshape to match {len(timesteps)} timesteps' + ) + # Create new DataArray with correct coordinates, preserving metadata + reshaped_data = xr.DataArray( + data.values.reshape(expected_shape), coords=coords, dims=dims, name=data.name, attrs=data.attrs.copy() ) + return TimeSeriesData(reshaped_data) - for name, ts in self.time_series_data.items(): - if name in data.columns: - if not ts.needs_extra_timestep: - # For time series without extra timestep - if include_extra_timestep: - # If data includes extra timestep but series doesn't need it, exclude the last point - ts.stored_data = data[name].iloc[:-1] - else: - # Use data as is - ts.stored_data = data[name] - else: - # For time series with extra timestep - if include_extra_timestep: - # Data already includes extra timestep - ts.stored_data = data[name] - else: - # Need to add extra timestep - extrapolate from the last value - extra_step_value = data[name].iloc[-1] - extra_step_index = pd.DatetimeIndex([self.timesteps_extra[-1]], name='time') - extra_step_series = pd.Series([extra_step_value], index=extra_step_index) - - # Combine the regular data with the extra timestep - ts.stored_data = pd.concat([data[name], extra_step_series]) - - logger.debug(f'Updated data for {name}') - - def to_dataframe( - self, filtered: Literal['all', 'constant', 'non_constant'] = 'non_constant', include_extra_timestep: bool = True - ) -> pd.DataFrame: - """ - Convert collection to DataFrame with optional filtering and timestep control. - - Args: - filtered: Filter time series by variability, by default 'non_constant' - include_extra_timestep: Whether to include the extra timestep in the result, by default True - - Returns: - DataFrame representation of the collection - """ - include_constants = filtered != 'non_constant' - ds = self.to_dataset(include_constants=include_constants) - - if not include_extra_timestep: - ds = ds.isel(time=slice(None, -1)) - - df = ds.to_dataframe() - - # Apply filtering - if filtered == 'all': - return df - elif filtered == 'constant': - return df.loc[:, df.nunique() == 1] - elif filtered == 'non_constant': - return df.loc[:, df.nunique() > 1] - else: - raise ValueError("filtered must be one of: 'all', 'constant', 'non_constant'") - - def to_dataset(self, include_constants: bool = True) -> xr.Dataset: - """ - Combine all time series into a single Dataset with all timesteps. + # Check if time coordinate length matches + elif data.sizes[dims[0]] != len(coords[0]): + logger.warning( + f'TimeSeriesData has {data.sizes[dims[0]]} time points, ' + f"expected {len(coords[0])}. Cannot reindex - lengths don't match." + ) + raise ConversionError( + f"TimeSeriesData length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}" + ) - Args: - include_constants: Whether to include time series with constant values, by default True + # Check if time coordinates are identical + elif not data.coords['time'].equals(timesteps): + logger.warning( + 'TimeSeriesData has different time coordinates than expected. Replacing with provided timesteps.' + ) + # Replace time coordinates while preserving data and metadata + recoordinated_data = xr.DataArray( + data.values, coords=coords, dims=dims, name=data.name, attrs=data.attrs.copy() + ) + return TimeSeriesData(recoordinated_data) - Returns: - Dataset containing all selected time series with all timesteps - """ - # Determine which series to include - if include_constants: - series_to_include = self.time_series_data.values() else: - series_to_include = self.non_constants - - # Create individual datasets and merge them - ds = xr.merge([ts.active_data.to_dataset(name=ts.name) for ts in series_to_include]) + # Everything matches - return copy to avoid modifying original + return data.copy(deep=True) - # Ensure the correct time coordinates - ds = ds.reindex(time=self.timesteps_extra) - - ds.attrs.update( - { - 'timesteps_extra': f'{self.timesteps_extra[0]} ... {self.timesteps_extra[-1]} | len={len(self.timesteps_extra)}', - 'hours_per_timestep': self._format_stats(self.hours_per_timestep), - } - ) - - return ds + @staticmethod + def to_dataarray(data: NumericDataUser, timesteps: pd.DatetimeIndex) -> xr.DataArray: + """Convert data to xarray.DataArray with specified timesteps index.""" + if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0: + raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}') + if not timesteps.name == 'time': + raise ConversionError(f'DatetimeIndex is not named correctly. Must be named "time", got {timesteps.name=}') - def _update_time_series_timesteps(self): - """Update active timesteps for all time series.""" - for ts in self.time_series_data.values(): - if ts.needs_extra_timestep: - ts.active_timesteps = self.timesteps_extra - else: - ts.active_timesteps = self.timesteps + coords = [timesteps] + dims = ['time'] + expected_shape = (len(timesteps),) - @staticmethod - def _validate_timesteps(timesteps: pd.DatetimeIndex): - """Validate timesteps format and rename if needed.""" - if not isinstance(timesteps, pd.DatetimeIndex): - raise TypeError('timesteps must be a pandas DatetimeIndex') + try: + # Handle TimeSeriesData first (before generic DataArray check) + if isinstance(data, TimeSeriesData): + return DataConverter._fix_timeseries_data_indexing(data, timesteps, dims, coords) - if len(timesteps) < 2: - raise ValueError('timesteps must contain at least 2 timestamps') + elif isinstance(data, (int, float, np.integer, np.floating)): + # Scalar: broadcast to all timesteps + scalar_data = np.full(expected_shape, data) + return xr.DataArray(scalar_data, coords=coords, dims=dims) - # Ensure timesteps has the required name - if timesteps.name != 'time': - logger.warning('Renamed timesteps to "time" (was "%s")', timesteps.name) - timesteps.name = 'time' + elif isinstance(data, pd.DataFrame): + if not data.index.equals(timesteps): + raise ConversionError("DataFrame index doesn't match timesteps index") + if not len(data.columns) == 1: + raise ConversionError('DataFrame must have exactly one column') + return xr.DataArray(data.values.flatten(), coords=coords, dims=dims) - @staticmethod - def _create_timesteps_with_extra( - timesteps: pd.DatetimeIndex, hours_of_last_timestep: Optional[float] - ) -> pd.DatetimeIndex: - """Create timesteps with an extra step at the end.""" - if hours_of_last_timestep is not None: - # Create the extra timestep using the specified duration - last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time') - else: - # Use the last interval as the extra timestep duration - last_date = pd.DatetimeIndex([timesteps[-1] + (timesteps[-1] - timesteps[-2])], name='time') + elif isinstance(data, pd.Series): + if not data.index.equals(timesteps): + raise ConversionError("Series index doesn't match timesteps index") + return xr.DataArray(data.values, coords=coords, dims=dims) - # Combine with original timesteps - return pd.DatetimeIndex(timesteps.append(last_date), name='time') + elif isinstance(data, np.ndarray): + if data.ndim != 1: + raise ConversionError(f'Array must be 1-dimensional, got {data.ndim}') + elif data.shape[0] != expected_shape[0]: + raise ConversionError(f"Array shape {data.shape} doesn't match expected {expected_shape}") + return xr.DataArray(data, coords=coords, dims=dims) - @staticmethod - def _calculate_hours_of_previous_timesteps( - timesteps: pd.DatetimeIndex, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] - ) -> Union[float, np.ndarray]: - """Calculate duration of regular timesteps.""" - if hours_of_previous_timesteps is not None: - return hours_of_previous_timesteps + elif isinstance(data, xr.DataArray): + if data.dims != tuple(dims): + raise ConversionError(f"DataArray dimensions {data.dims} don't match expected {dims}") + if data.sizes[dims[0]] != len(coords[0]): + raise ConversionError( + f"DataArray length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}" + ) + return data.copy(deep=True) - # Calculate from the first interval - first_interval = timesteps[1] - timesteps[0] - return first_interval.total_seconds() / 3600 # Convert to hours + elif isinstance(data, list): + logger.warning('Converting list to DataArray. This is not recommended.') + if len(data) != expected_shape[0]: + raise ConversionError(f"List length {len(data)} doesn't match expected {expected_shape[0]}") + return xr.DataArray(data, coords=coords, dims=dims) - @staticmethod - def calculate_hours_per_timestep(timesteps_extra: pd.DatetimeIndex) -> xr.DataArray: - """Calculate duration of each timestep.""" - # Calculate differences between consecutive timestamps - hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1) - - return xr.DataArray( - data=hours_per_step, coords={'time': timesteps_extra[:-1]}, dims=('time',), name='hours_per_step' - ) - - def _calculate_group_weights(self) -> Dict[str, float]: - """Calculate weights for aggregation groups.""" - # Count series in each group - groups = [ts.aggregation_group for ts in self.time_series_data.values() if ts.aggregation_group is not None] - group_counts = Counter(groups) - - # Calculate weight for each group (1/count) - return {group: 1 / count for group, count in group_counts.items()} - - def _calculate_weights(self) -> Dict[str, float]: - """Calculate weights for all time series.""" - # Calculate weight for each time series - weights = {} - for name, ts in self.time_series_data.items(): - if ts.aggregation_group is not None: - # Use group weight - weights[name] = self.group_weights.get(ts.aggregation_group, 1) else: - # Use individual weight or default to 1 - weights[name] = ts.aggregation_weight or 1 - - return weights + raise ConversionError(f'Unsupported type: {type(data).__name__}') - def _format_stats(self, data) -> str: - """Format statistics for a data array.""" - if hasattr(data, 'values'): - values = data.values - else: - values = np.asarray(data) + except Exception as e: + if isinstance(e, ConversionError): + raise + raise ConversionError(f'Converting data {type(data)} to xarray.DataArray raised an error: {str(e)}') from e - mean_val = np.mean(values) - min_val = np.min(values) - max_val = np.max(values) - return f'mean: {mean_val:.2f}, min: {min_val:.2f}, max: {max_val:.2f}' +def get_dataarray_stats(arr: xr.DataArray) -> Dict: + """Generate statistical summary of a DataArray.""" + stats = {} - def __getitem__(self, name: str) -> TimeSeries: - """Get a TimeSeries by name.""" + if arr.dtype.kind in 'biufc': # bool, int, uint, float, complex try: - return self.time_series_data[name] - except KeyError as e: - raise KeyError(f'TimeSeries "{name}" not found in the TimeSeriesCollection') from e - - def __iter__(self) -> Iterator[TimeSeries]: - """Iterate through all TimeSeries in the collection.""" - return iter(self.time_series_data.values()) - - def __len__(self) -> int: - """Get the number of TimeSeries in the collection.""" - return len(self.time_series_data) - - def __contains__(self, item: Union[str, TimeSeries]) -> bool: - """Check if a TimeSeries exists in the collection.""" - if isinstance(item, str): - return item in self.time_series_data - elif isinstance(item, TimeSeries): - return any([item is ts for ts in self.time_series_data.values()]) - return False - - @property - def non_constants(self) -> List[TimeSeries]: - """Get time series with varying values.""" - return [ts for ts in self.time_series_data.values() if not ts.all_equal] - - @property - def constants(self) -> List[TimeSeries]: - """Get time series with constant values.""" - return [ts for ts in self.time_series_data.values() if ts.all_equal] - - @property - def timesteps(self) -> pd.DatetimeIndex: - """Get the active timesteps.""" - return self.all_timesteps if self._active_timesteps is None else self._active_timesteps - - @property - def timesteps_extra(self) -> pd.DatetimeIndex: - """Get the active timesteps with extra step.""" - return self.all_timesteps_extra if self._active_timesteps_extra is None else self._active_timesteps_extra + stats.update( + { + 'min': float(arr.min().values), + 'max': float(arr.max().values), + 'mean': float(arr.mean().values), + 'median': float(arr.median().values), + 'std': float(arr.std().values), + 'count': int(arr.count().values), # non-null count + } + ) - @property - def hours_per_timestep(self) -> xr.DataArray: - """Get the duration of each active timestep.""" - return ( - self.all_hours_per_timestep if self._active_hours_per_timestep is None else self._active_hours_per_timestep - ) + # Add null count only if there are nulls + null_count = int(arr.isnull().sum().values) + if null_count > 0: + stats['nulls'] = null_count - @property - def hours_of_last_timestep(self) -> float: - """Get the duration of the last timestep.""" - return float(self.hours_per_timestep[-1].item()) + except Exception: + pass - def __repr__(self): - return f'TimeSeriesCollection:\n{self.to_dataset()}' - - def __str__(self): - longest_name = max([time_series.name for time_series in self.time_series_data], key=len) - - stats_summary = '\n'.join( - [ - f' - {time_series.name:<{len(longest_name)}}: {get_numeric_stats(time_series.active_data)}' - for time_series in self.time_series_data - ] - ) - - return ( - f'TimeSeriesCollection with {len(self.time_series_data)} series\n' - f' Time Range: {self.timesteps[0]} → {self.timesteps[-1]}\n' - f' No. of timesteps: {len(self.timesteps)} + 1 extra\n' - f' Hours per timestep: {get_numeric_stats(self.hours_per_timestep)}\n' - f' Time Series Data:\n' - f'{stats_summary}' - ) - - -def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10) -> str: - """Calculates the mean, median, min, max, and standard deviation of a numeric DataArray.""" - format_spec = f'>{padd}.{decimals}f' if padd else f'.{decimals}f' - if np.unique(data).size == 1: - return f'{data.max().item():{format_spec}} (constant)' - mean = data.mean().item() - median = data.median().item() - min_val = data.min().item() - max_val = data.max().item() - std = data.std().item() - return f'{mean:{format_spec}} (mean), {median:{format_spec}} (median), {min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)' + return stats diff --git a/flixopt/effects.py b/flixopt/effects.py index 82aa63a43..7fa136f5b 100644 --- a/flixopt/effects.py +++ b/flixopt/effects.py @@ -13,7 +13,7 @@ import numpy as np import pandas as pd -from .core import NumericData, NumericDataTS, Scalar, TimeSeries, TimeSeriesCollection +from .core import NumericDataInternal, NumericDataUser, Scalar from .features import ShareAllocationModel from .structure import Element, ElementModel, Interface, Model, SystemModel, register_class_for_io @@ -44,8 +44,8 @@ def __init__( maximum_operation: Optional[Scalar] = None, minimum_invest: Optional[Scalar] = None, maximum_invest: Optional[Scalar] = None, - minimum_operation_per_hour: Optional[NumericDataTS] = None, - maximum_operation_per_hour: Optional[NumericDataTS] = None, + minimum_operation_per_hour: Optional[NumericDataUser] = None, + maximum_operation_per_hour: Optional[NumericDataUser] = None, minimum_total: Optional[Scalar] = None, maximum_total: Optional[Scalar] = None, ): @@ -82,22 +82,22 @@ def __init__( self.specific_share_to_other_effects_invest: EffectValuesUser = specific_share_to_other_effects_invest or {} self.minimum_operation = minimum_operation self.maximum_operation = maximum_operation - self.minimum_operation_per_hour: NumericDataTS = minimum_operation_per_hour - self.maximum_operation_per_hour: NumericDataTS = maximum_operation_per_hour + self.minimum_operation_per_hour: NumericDataUser = minimum_operation_per_hour + self.maximum_operation_per_hour: NumericDataUser = maximum_operation_per_hour self.minimum_invest = minimum_invest self.maximum_invest = maximum_invest self.minimum_total = minimum_total self.maximum_total = maximum_total def transform_data(self, flow_system: 'FlowSystem'): - self.minimum_operation_per_hour = flow_system.create_time_series( + self.minimum_operation_per_hour = flow_system.fit_to_model_coords( f'{self.label_full}|minimum_operation_per_hour', self.minimum_operation_per_hour ) - self.maximum_operation_per_hour = flow_system.create_time_series( + self.maximum_operation_per_hour = flow_system.fit_to_model_coords( f'{self.label_full}|maximum_operation_per_hour', self.maximum_operation_per_hour, flow_system ) - self.specific_share_to_other_effects_operation = flow_system.create_effect_time_series( + self.specific_share_to_other_effects_operation = flow_system.fit_effects_to_model_coords( f'{self.label_full}|operation->', self.specific_share_to_other_effects_operation, 'operation' ) @@ -137,10 +137,10 @@ def __init__(self, model: SystemModel, element: Effect): label_full=f'{self.label_full}(operation)', total_max=self.element.maximum_operation, total_min=self.element.minimum_operation, - min_per_hour=self.element.minimum_operation_per_hour.active_data + min_per_hour=self.element.minimum_operation_per_hour if self.element.minimum_operation_per_hour is not None else None, - max_per_hour=self.element.maximum_operation_per_hour.active_data + max_per_hour=self.element.maximum_operation_per_hour if self.element.maximum_operation_per_hour is not None else None, ) @@ -168,10 +168,9 @@ def do_modeling(self): ) -EffectValuesExpr = Dict[str, linopy.LinearExpression] # Used to create Shares -EffectTimeSeries = Dict[str, TimeSeries] # Used internally to index values -EffectValuesDict = Dict[str, NumericDataTS] # How effect values are stored -EffectValuesUser = Union[NumericDataTS, Dict[str, NumericDataTS]] # User-specified Shares to Effects +EffectExpr = Dict[str, linopy.LinearExpression] # Used to create Shares +EffectValuesInternal = Dict[str, NumericDataInternal] # Used internally to index values +EffectValuesUser = Union[NumericDataUser, Dict[str, NumericDataUser]] # User-specified Shares to Effects """ This datatype is used to define the share to an effect by a certain attribute. """ EffectValuesUserScalar = Union[Scalar, Dict[str, Scalar]] # User-specified Shares to Effects @@ -207,7 +206,7 @@ def add_effects(self, *effects: Effect) -> None: self._effects[effect.label] = effect logger.info(f'Registered new Effect: {effect.label}') - def create_effect_values_dict(self, effect_values_user: EffectValuesUser) -> Optional[EffectValuesDict]: + def create_effect_values_dict(self, effect_values_user: EffectValuesUser) -> Optional[Dict[str, NumericDataUser]]: """ Converts effect values into a dictionary. If a scalar is provided, it is associated with a default effect type. @@ -233,6 +232,8 @@ def get_effect_label(eff: Union[Effect, str]) -> str: stacklevel=2, ) return eff.label_full + elif eff is None: + return self.standard_effect.label_full else: return eff @@ -341,7 +342,7 @@ def __init__(self, model: SystemModel, effects: EffectCollection): def add_share_to_effects( self, name: str, - expressions: EffectValuesExpr, + expressions: EffectExpr, target: Literal['operation', 'invest'], ) -> None: for effect, expression in expressions.items(): @@ -376,7 +377,7 @@ def _add_share_between_effects(self): for target_effect, time_series in origin_effect.specific_share_to_other_effects_operation.items(): self.effects[target_effect].model.operation.add_share( origin_effect.model.operation.label_full, - origin_effect.model.operation.total_per_timestep * time_series.active_data, + origin_effect.model.operation.total_per_timestep * time_series, ) # 2. invest: -> hier ist es Scalar (share) for target_effect, factor in origin_effect.specific_share_to_other_effects_invest.items(): diff --git a/flixopt/elements.py b/flixopt/elements.py index a0bd8c91f..061a00b65 100644 --- a/flixopt/elements.py +++ b/flixopt/elements.py @@ -10,7 +10,7 @@ import numpy as np from .config import CONFIG -from .core import NumericData, NumericDataTS, PlausibilityError, Scalar, TimeSeriesCollection +from .core import NumericDataUser, PlausibilityError, Scalar from .effects import EffectValuesUser from .features import InvestmentModel, OnOffModel, PreventSimultaneousUsageModel from .interface import InvestParameters, OnOffParameters @@ -72,12 +72,6 @@ def transform_data(self, flow_system: 'FlowSystem') -> None: if self.on_off_parameters is not None: self.on_off_parameters.transform_data(flow_system, self.label_full) - def infos(self, use_numpy=True, use_element_label: bool = False) -> Dict: - infos = super().infos(use_numpy, use_element_label) - infos['inputs'] = [flow.infos(use_numpy, use_element_label) for flow in self.inputs] - infos['outputs'] = [flow.infos(use_numpy, use_element_label) for flow in self.outputs] - return infos - def _check_unique_flow_labels(self): all_flow_labels = [flow.label for flow in self.inputs + self.outputs] @@ -96,7 +90,7 @@ class Bus(Element): """ def __init__( - self, label: str, excess_penalty_per_flow_hour: Optional[NumericDataTS] = 1e5, meta_data: Optional[Dict] = None + self, label: str, excess_penalty_per_flow_hour: Optional[NumericDataUser] = 1e5, meta_data: Optional[Dict] = None ): """ Args: @@ -117,7 +111,7 @@ def create_model(self, model: SystemModel) -> 'BusModel': return self.model def transform_data(self, flow_system: 'FlowSystem'): - self.excess_penalty_per_flow_hour = flow_system.create_time_series( + self.excess_penalty_per_flow_hour = flow_system.fit_to_model_coords( f'{self.label_full}|excess_penalty_per_flow_hour', self.excess_penalty_per_flow_hour ) @@ -155,16 +149,16 @@ def __init__( label: str, bus: str, size: Union[Scalar, InvestParameters] = None, - fixed_relative_profile: Optional[NumericDataTS] = None, - relative_minimum: NumericDataTS = 0, - relative_maximum: NumericDataTS = 1, + fixed_relative_profile: Optional[NumericDataUser] = None, + relative_minimum: NumericDataUser = 0, + relative_maximum: NumericDataUser = 1, effects_per_flow_hour: Optional[EffectValuesUser] = None, on_off_parameters: Optional[OnOffParameters] = None, flow_hours_total_max: Optional[Scalar] = None, flow_hours_total_min: Optional[Scalar] = None, load_factor_min: Optional[Scalar] = None, load_factor_max: Optional[Scalar] = None, - previous_flow_rate: Optional[NumericData] = None, + previous_flow_rate: Optional[NumericDataUser] = None, meta_data: Optional[Dict] = None, ): r""" @@ -236,16 +230,16 @@ def create_model(self, model: SystemModel) -> 'FlowModel': return self.model def transform_data(self, flow_system: 'FlowSystem'): - self.relative_minimum = flow_system.create_time_series( + self.relative_minimum = flow_system.fit_to_model_coords( f'{self.label_full}|relative_minimum', self.relative_minimum ) - self.relative_maximum = flow_system.create_time_series( + self.relative_maximum = flow_system.fit_to_model_coords( f'{self.label_full}|relative_maximum', self.relative_maximum ) - self.fixed_relative_profile = flow_system.create_time_series( + self.fixed_relative_profile = flow_system.fit_to_model_coords( f'{self.label_full}|fixed_relative_profile', self.fixed_relative_profile ) - self.effects_per_flow_hour = flow_system.create_effect_time_series( + self.effects_per_flow_hour = flow_system.fit_effects_to_model_coords( self.label_full, self.effects_per_flow_hour, 'per_flow_hour' ) if self.on_off_parameters is not None: @@ -253,17 +247,6 @@ def transform_data(self, flow_system: 'FlowSystem'): if isinstance(self.size, InvestParameters): self.size.transform_data(flow_system) - def infos(self, use_numpy: bool = True, use_element_label: bool = False) -> Dict: - infos = super().infos(use_numpy, use_element_label) - infos['is_input_in_component'] = self.is_input_in_component - return infos - - def to_dict(self) -> Dict: - data = super().to_dict() - if isinstance(data.get('previous_flow_rate'), np.ndarray): - data['previous_flow_rate'] = data['previous_flow_rate'].tolist() - return data - def _plausibility_checks(self) -> None: # TODO: Incorporate into Variable? (Lower_bound can not be greater than upper bound if np.any(self.relative_minimum > self.relative_maximum): @@ -287,7 +270,7 @@ def _plausibility_checks(self) -> None: if (self.relative_minimum > 0).any() and self.on_off_parameters is None: logger.warning( - f'Flow {self.label} has a relative_minimum of {self.relative_minimum.active_data} and no on_off_parameters. ' + f'Flow {self.label} has a relative_minimum of {self.relative_minimum} and no on_off_parameters. ' f'This prevents the flow_rate from switching off (flow_rate = 0). ' f'Consider using on_off_parameters to allow the flow to be switched on and off.' ) @@ -390,7 +373,7 @@ def _create_shares(self): self._model.effects.add_share_to_effects( name=self.label_full, # Use the full label of the element expressions={ - effect: self.flow_rate * self._model.hours_per_step * factor.active_data + effect: self.flow_rate * self._model.hours_per_step * factor for effect, factor in self.element.effects_per_flow_hour.items() }, target='operation', @@ -428,7 +411,7 @@ def _create_bounds_for_load_factor(self): ) @property - def flow_rate_bounds_on(self) -> Tuple[NumericData, NumericData]: + def flow_rate_bounds_on(self) -> Tuple[NumericDataUser, NumericDataUser]: """Returns absolute flow rate bounds. Important for OnOffModel""" relative_minimum, relative_maximum = self.flow_rate_lower_bound_relative, self.flow_rate_upper_bound_relative size = self.element.size @@ -439,23 +422,23 @@ def flow_rate_bounds_on(self) -> Tuple[NumericData, NumericData]: return relative_minimum * size.minimum_size, relative_maximum * size.maximum_size @property - def flow_rate_lower_bound_relative(self) -> NumericData: + def flow_rate_lower_bound_relative(self) -> NumericDataUser: """Returns the lower bound of the flow_rate relative to its size""" fixed_profile = self.element.fixed_relative_profile if fixed_profile is None: - return self.element.relative_minimum.active_data - return fixed_profile.active_data + return self.element.relative_minimum + return fixed_profile @property - def flow_rate_upper_bound_relative(self) -> NumericData: + def flow_rate_upper_bound_relative(self) -> NumericDataUser: """ Returns the upper bound of the flow_rate relative to its size""" fixed_profile = self.element.fixed_relative_profile if fixed_profile is None: - return self.element.relative_maximum.active_data - return fixed_profile.active_data + return self.element.relative_maximum + return fixed_profile @property - def flow_rate_lower_bound(self) -> NumericData: + def flow_rate_lower_bound(self) -> NumericDataUser: """ Returns the minimum bound the flow_rate can reach. Further constraining might be done in OnOffModel and InvestmentModel @@ -469,7 +452,7 @@ def flow_rate_lower_bound(self) -> NumericData: return self.flow_rate_lower_bound_relative * self.element.size @property - def flow_rate_upper_bound(self) -> NumericData: + def flow_rate_upper_bound(self) -> NumericDataUser: """ Returns the maximum bound the flow_rate can reach. Further constraining might be done in OnOffModel and InvestmentModel @@ -497,7 +480,7 @@ def do_modeling(self) -> None: # Fehlerplus/-minus: if self.element.with_excess: excess_penalty = np.multiply( - self._model.hours_per_step, self.element.excess_penalty_per_flow_hour.active_data + self._model.hours_per_step, self.element.excess_penalty_per_flow_hour ) self.excess_input = self.add( self._model.add_variables(lower=0, coords=self._model.coords, name=f'{self.label_full}|excess_input'), diff --git a/flixopt/features.py b/flixopt/features.py index c2a62adb1..5bc8f7922 100644 --- a/flixopt/features.py +++ b/flixopt/features.py @@ -11,7 +11,7 @@ from . import utils from .config import CONFIG -from .core import NumericData, Scalar, TimeSeries +from .core import NumericDataUser, Scalar from .interface import InvestParameters, OnOffParameters, Piecewise from .structure import Model, SystemModel @@ -27,7 +27,7 @@ def __init__( label_of_element: str, parameters: InvestParameters, defining_variable: [linopy.Variable], - relative_bounds_of_defining_variable: Tuple[NumericData, NumericData], + relative_bounds_of_defining_variable: Tuple[NumericDataUser, NumericDataUser], label: Optional[str] = None, on_variable: Optional[linopy.Variable] = None, ): @@ -203,12 +203,12 @@ def __init__( model: SystemModel, label_of_element: str, defining_variables: List[linopy.Variable], - defining_bounds: List[Tuple[NumericData, NumericData]], - previous_values: List[Optional[NumericData]] = None, + defining_bounds: List[Tuple[NumericDataUser, NumericDataUser]], + previous_values: List[Optional[NumericDataUser]] = None, use_off: bool = True, - on_hours_total_min: Optional[NumericData] = 0, - on_hours_total_max: Optional[NumericData] = None, - effects_per_running_hour: Dict[str, NumericData] = None, + on_hours_total_min: Optional[NumericDataUser] = 0, + on_hours_total_max: Optional[NumericDataUser] = None, + effects_per_running_hour: Dict[str, NumericDataUser] = None, label: Optional[str] = None, ): """ @@ -344,7 +344,7 @@ def previous_off_states(self): return 1 - self.previous_states @staticmethod - def compute_previous_states(previous_values: List[NumericData], epsilon: float = 1e-5) -> np.ndarray: + def compute_previous_states(previous_values: List[NumericDataUser], epsilon: float = 1e-5) -> np.ndarray: """Computes the previous states {0, 1} of defining variables as a binary array from their previous values.""" if not previous_values or all([val is None for val in previous_values]): return np.array([0]) @@ -451,9 +451,9 @@ def __init__( model: SystemModel, label_of_element: str, state_variable: linopy.Variable, - minimum_duration: Optional[NumericData] = None, - maximum_duration: Optional[NumericData] = None, - previous_states: Optional[NumericData] = None, + minimum_duration: Optional[NumericDataUser] = None, + maximum_duration: Optional[NumericDataUser] = None, + previous_states: Optional[NumericDataUser] = None, label: Optional[str] = None, ): """ @@ -474,11 +474,6 @@ def __init__( self._minimum_duration = minimum_duration self._maximum_duration = maximum_duration - if isinstance(self._minimum_duration, TimeSeries): - self._minimum_duration = self._minimum_duration.active_data - if isinstance(self._maximum_duration, TimeSeries): - self._maximum_duration = self._maximum_duration.active_data - self.duration = None def do_modeling(self): @@ -575,7 +570,7 @@ def previous_duration(self) -> Scalar: @staticmethod def compute_consecutive_hours_in_state( - binary_values: NumericData, hours_per_timestep: Union[int, float, np.ndarray] + binary_values: NumericDataUser, hours_per_timestep: Union[int, float, np.ndarray] ) -> Scalar: """ Computes the final consecutive duration in state 'on' (=1) in hours, from a binary array. @@ -634,8 +629,8 @@ def __init__( on_off_parameters: OnOffParameters, label_of_element: str, defining_variables: List[linopy.Variable], - defining_bounds: List[Tuple[NumericData, NumericData]], - previous_values: List[Optional[NumericData]], + defining_bounds: List[Tuple[NumericDataUser, NumericDataUser]], + previous_values: List[Optional[NumericDataUser]], label: Optional[str] = None, ): """ @@ -923,8 +918,8 @@ def __init__( label_full: Optional[str] = None, total_max: Optional[Scalar] = None, total_min: Optional[Scalar] = None, - max_per_hour: Optional[NumericData] = None, - min_per_hour: Optional[NumericData] = None, + max_per_hour: Optional[NumericDataUser] = None, + min_per_hour: Optional[NumericDataUser] = None, ): super().__init__(model, label_of_element=label_of_element, label=label, label_full=label_full) if not shares_are_time_series: # If the condition is True diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index 93720de60..7724a9e61 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -16,10 +16,10 @@ from rich.pretty import Pretty from . import io as fx_io -from .core import NumericData, NumericDataTS, TimeSeries, TimeSeriesCollection, TimeSeriesData -from .effects import Effect, EffectCollection, EffectTimeSeries, EffectValuesDict, EffectValuesUser +from .core import ConversionError, DataConverter, NumericDataInternal, NumericDataUser, TimeSeriesData +from .effects import Effect, EffectCollection, EffectValuesInternal, EffectValuesUser from .elements import Bus, Component, Flow -from .structure import CLASS_REGISTRY, Element, SystemModel, get_compact_representation, get_str_representation +from .structure import Element, Interface, SystemModel if TYPE_CHECKING: import pyvis @@ -27,9 +27,20 @@ logger = logging.getLogger('flixopt') -class FlowSystem: +class FlowSystem(Interface): """ - A FlowSystem organizes the high level Elements (Components & Effects). + FlowSystem serves as the main container for energy system modeling, organizing + high-level elements including Components (like boilers, heat pumps, storages), + Buses (connection points), and Effects (system-wide influences). It handles + time series data management, network connectivity, and provides serialization + capabilities for saving and loading complete system configurations. + + The system uses xarray.Dataset for efficient time series data handling. It can be exported and restored to NETCDF. + + See Also: + Component: Base class for system components like boilers, heat pumps. + Bus: Connection points for flows between components. + Effect: System-wide effects, like the optimization objective. """ def __init__( @@ -47,74 +58,290 @@ def __init__( This is needed to calculate previous durations (for example consecutive_on_hours). If you use an array, take care that its long enough to cover all previous values! """ - self.time_series_collection = TimeSeriesCollection( - timesteps=timesteps, - hours_of_last_timestep=hours_of_last_timestep, - hours_of_previous_timesteps=hours_of_previous_timesteps, + # Store timing information directly + self.timesteps = self._validate_timesteps(timesteps) + self.timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep) + self.hours_per_timestep = self.calculate_hours_per_timestep(self.timesteps_extra) + self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( + timesteps, hours_of_previous_timesteps ) - # defaults: + # Element collections self.components: Dict[str, Component] = {} self.buses: Dict[str, Bus] = {} self.effects: EffectCollection = EffectCollection() self.model: Optional[SystemModel] = None - self._connected = False + self._connected_and_transformed = False + self._used_in_calculation = False + + @staticmethod + def _validate_timesteps(timesteps: pd.DatetimeIndex) -> pd.DatetimeIndex: + """Validate timesteps format and rename if needed.""" + if not isinstance(timesteps, pd.DatetimeIndex): + raise TypeError('timesteps must be a pandas DatetimeIndex') + if len(timesteps) < 2: + raise ValueError('timesteps must contain at least 2 timestamps') + if timesteps.name != 'time': + timesteps.name = 'time' + if not timesteps.is_monotonic_increasing: + raise ValueError('timesteps must be sorted') + return timesteps + + @staticmethod + def _create_timesteps_with_extra( + timesteps: pd.DatetimeIndex, hours_of_last_timestep: Optional[float] + ) -> pd.DatetimeIndex: + """Create timesteps with an extra step at the end.""" + if hours_of_last_timestep is None: + hours_of_last_timestep = (timesteps[-1] - timesteps[-2]) / pd.Timedelta(hours=1) + + last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time') + return pd.DatetimeIndex(timesteps.append(last_date), name='time') + + @staticmethod + def calculate_hours_per_timestep(timesteps_extra: pd.DatetimeIndex) -> xr.DataArray: + """Calculate duration of each timestep.""" + hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1) + return xr.DataArray( + hours_per_step, coords={'time': timesteps_extra[:-1]}, dims=['time'], name='hours_per_timestep' + ) + + @staticmethod + def _calculate_hours_of_previous_timesteps( + timesteps: pd.DatetimeIndex, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] + ) -> Union[float, np.ndarray]: + """Calculate duration of regular timesteps.""" + if hours_of_previous_timesteps is not None: + return hours_of_previous_timesteps + # Calculate from the first interval + first_interval = timesteps[1] - timesteps[0] + return first_interval.total_seconds() / 3600 # Convert to hours + + def _create_reference_structure(self) -> Tuple[Dict, Dict[str, xr.DataArray]]: + """ + Override Interface method to handle FlowSystem-specific serialization. + Combines custom FlowSystem logic with Interface pattern for nested objects. + + Returns: + Tuple of (reference_structure, extracted_arrays_dict) + """ + # Start with Interface base functionality for constructor parameters + reference_structure, all_extracted_arrays = super()._create_reference_structure() + + # Override timesteps serialization (we need timesteps_extra instead of timesteps) + reference_structure['timesteps_extra'] = [date.isoformat() for date in self.timesteps_extra] + + # Remove timesteps from structure since we're using timesteps_extra + reference_structure.pop('timesteps', None) + + # Add timing arrays directly (not handled by Interface introspection) + all_extracted_arrays['hours_per_timestep'] = self.hours_per_timestep + + # Extract from components + components_structure = {} + for comp_label, component in self.components.items(): + comp_structure, comp_arrays = component._create_reference_structure() + all_extracted_arrays.update(comp_arrays) + components_structure[comp_label] = comp_structure + reference_structure['components'] = components_structure + + # Extract from buses + buses_structure = {} + for bus_label, bus in self.buses.items(): + bus_structure, bus_arrays = bus._create_reference_structure() + all_extracted_arrays.update(bus_arrays) + buses_structure[bus_label] = bus_structure + reference_structure['buses'] = buses_structure + + # Extract from effects + effects_structure = {} + for effect in self.effects: + effect_structure, effect_arrays = effect._create_reference_structure() + all_extracted_arrays.update(effect_arrays) + effects_structure[effect.label] = effect_structure + reference_structure['effects'] = effects_structure + + return reference_structure, all_extracted_arrays + + def to_dataset(self) -> xr.Dataset: + """ + Convert the FlowSystem to an xarray Dataset. + Ensures FlowSystem is connected before serialization. + + Returns: + xr.Dataset: Dataset containing all DataArrays with structure in attributes + """ + if not self._connected_and_transformed: + logger.warning('FlowSystem is not connected_and_transformed..') + self.connect_and_transform() + + return super().to_dataset() @classmethod - def from_dataset(cls, ds: xr.Dataset): - timesteps_extra = pd.DatetimeIndex(ds.attrs['timesteps_extra'], name='time') - hours_of_last_timestep = TimeSeriesCollection.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() + def from_dataset(cls, ds: xr.Dataset) -> 'FlowSystem': + """ + Create a FlowSystem from an xarray Dataset. + Handles FlowSystem-specific reconstruction logic. + + Args: + ds: Dataset containing the FlowSystem data + + Returns: + FlowSystem instance + """ + # Get the reference structure from attrs + reference_structure = dict(ds.attrs) + + # Extract FlowSystem constructor parameters + timesteps_extra = pd.DatetimeIndex(reference_structure['timesteps_extra'], name='time') + hours_of_previous_timesteps = reference_structure['hours_of_previous_timesteps'] - flow_system = FlowSystem( + # Calculate hours_of_last_timestep from the timesteps + hours_of_last_timestep = float((timesteps_extra[-1] - timesteps_extra[-2]) / pd.Timedelta(hours=1)) + + # Create FlowSystem instance with constructor parameters + flow_system = cls( timesteps=timesteps_extra[:-1], hours_of_last_timestep=hours_of_last_timestep, - hours_of_previous_timesteps=ds.attrs['hours_of_previous_timesteps'], + hours_of_previous_timesteps=hours_of_previous_timesteps, ) - structure = fx_io.insert_dataarray({key: ds.attrs[key] for key in ['components', 'buses', 'effects']}, ds) - flow_system.add_elements( - *[Bus.from_dict(bus) for bus in structure['buses'].values()] - + [Effect.from_dict(effect) for effect in structure['effects'].values()] - + [CLASS_REGISTRY[comp['__class__']].from_dict(comp) for comp in structure['components'].values()] - ) + # Create arrays dictionary from dataset variables + arrays_dict = {name: array for name, array in ds.data_vars.items()} + + # Restore components + components_structure = reference_structure.get('components', {}) + for comp_label, comp_data in components_structure.items(): + component = cls._resolve_reference_structure(comp_data, arrays_dict) + if not isinstance(component, Component): + logger.critical(f'Restoring component {comp_label} failed.') + flow_system._add_components(component) + + # Restore buses + buses_structure = reference_structure.get('buses', {}) + for bus_label, bus_data in buses_structure.items(): + bus = cls._resolve_reference_structure(bus_data, arrays_dict) + if not isinstance(bus, Bus): + logger.critical(f'Restoring bus {bus_label} failed.') + flow_system._add_buses(bus) + + # Restore effects + effects_structure = reference_structure.get('effects', {}) + for effect_label, effect_data in effects_structure.items(): + effect = cls._resolve_reference_structure(effect_data, arrays_dict) + if not isinstance(effect, Effect): + logger.critical(f'Restoring effect {effect_label} failed.') + flow_system._add_effects(effect) + return flow_system - @classmethod - def from_dict(cls, data: Dict) -> 'FlowSystem': + def to_netcdf(self, path: Union[str, pathlib.Path], compression: int = 0): """ - Load a FlowSystem from a dictionary. + Save the FlowSystem to a NetCDF file. + Ensures FlowSystem is connected before saving. Args: - data: Dictionary containing the FlowSystem data. + path: The path to the netCDF file. + compression: The compression level to use when saving the file. """ - timesteps_extra = pd.DatetimeIndex(data['timesteps_extra'], name='time') - hours_of_last_timestep = TimeSeriesCollection.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() + if not self._connected_and_transformed: + logger.warning('FlowSystem is not connected. Calling connect_and_transform() now.') + self.connect_and_transform() - flow_system = FlowSystem( - timesteps=timesteps_extra[:-1], - hours_of_last_timestep=hours_of_last_timestep, - hours_of_previous_timesteps=data['hours_of_previous_timesteps'], - ) + super().to_netcdf(path, compression) + logger.info(f'Saved FlowSystem to {path}') + + def get_structure(self, clean: bool = False, stats: bool = False) -> Dict: + """ + Get FlowSystem structure. + Ensures FlowSystem is connected before getting structure. + + Args: + clean: If True, remove None and empty dicts and lists. + stats: If True, replace DataArray references with statistics + """ + if not self._connected_and_transformed: + logger.warning('FlowSystem is not connected. Calling connect_and_transform() now.') + self.connect_and_transform() - flow_system.add_elements(*[Bus.from_dict(bus) for bus in data['buses'].values()]) + return super().get_structure(clean, stats) - flow_system.add_elements(*[Effect.from_dict(effect) for effect in data['effects'].values()]) + def to_json(self, path: Union[str, pathlib.Path]): + """ + Save the flow system to a JSON file. + Ensures FlowSystem is connected before saving. - flow_system.add_elements( - *[CLASS_REGISTRY[comp['__class__']].from_dict(comp) for comp in data['components'].values()] - ) + Args: + path: The path to the JSON file. + """ + if not self._connected_and_transformed: + logger.warning('FlowSystem needs to be connected and transformed before saving to JSON. Calling connect_and_transform() now.') + self.connect_and_transform() - flow_system.transform_data() + super().to_json(path) - return flow_system + def fit_to_model_coords( + self, + name: str, + data: Optional[NumericDataUser], + needs_extra_timestep: bool = False, + ) -> Optional[NumericDataInternal]: + """ + Fit data to model coordinate system (currently time, but extensible). - @classmethod - def from_netcdf(cls, path: Union[str, pathlib.Path]): + Args: + name: Name of the data + data: Data to fit to model coordinates + needs_extra_timestep: Whether to use extended time coordinates + + Returns: + xr.DataArray aligned to model coordinate system + """ + if data is None: + return None + + # Choose appropriate timesteps + target_timesteps = self.timesteps_extra if needs_extra_timestep else self.timesteps + + if isinstance(data, TimeSeriesData): + try: + return TimeSeriesData( + DataConverter.to_dataarray(data, timesteps=target_timesteps), + agg_group=data.agg_group, agg_weight=data.agg_weight + ).rename(name) + except ConversionError as e: + logger.critical(f'Could not convert time series data "{name}" to DataArray: {e}. \n' + f'Take care to use the correct (time) index.') + else: + return DataConverter.to_dataarray(data, timesteps=target_timesteps).rename(name) + + def fit_effects_to_model_coords( + self, + label_prefix: Optional[str], + effect_values: Optional[EffectValuesUser], + label_suffix: Optional[str] = None, + ) -> Optional[EffectValuesInternal]: """ - Load a FlowSystem from a netcdf file + Transform EffectValues from the user to Internal Datatypes aligned with model coordinates. """ - return cls.from_dataset(fx_io.load_dataset_from_netcdf(path)) + if effect_values is None: + return None + + effect_values_dict = self.effects.create_effect_values_dict(effect_values) + + return { + effect: self.fit_to_model_coords('|'.join(filter(None, [label_prefix, effect, label_suffix])), value) + for effect, value in effect_values_dict.items() + } + + def connect_and_transform(self): + """Transform data for all elements using the new simplified approach.""" + if not self._connected_and_transformed: + self._connect_network() + for element in self.all_elements.values(): + element.transform_data(self) + self._connected_and_transformed = True def add_elements(self, *elements: Element) -> None: """ @@ -124,12 +351,12 @@ def add_elements(self, *elements: Element) -> None: *elements: childs of Element like Boiler, HeatPump, Bus,... modeling Elements """ - if self._connected: + if self._connected_and_transformed: warnings.warn( 'You are adding elements to an already connected FlowSystem. This is not recommended (But it works).', stacklevel=2, ) - self._connected = False + self._connected_and_transformed = False for new_element in list(elements): if isinstance(new_element, Component): self._add_components(new_element) @@ -142,63 +369,11 @@ def add_elements(self, *elements: Element) -> None: f'Tried to add incompatible object to FlowSystem: {type(new_element)=}: {new_element=} ' ) - def to_json(self, path: Union[str, pathlib.Path]): - """ - Saves the flow system to a json file. - This not meant to be reloaded and recreate the object, - but rather used to document or compare the flow_system to others. - - Args: - path: The path to the json file. - """ - with open(path, 'w', encoding='utf-8') as f: - json.dump(self.as_dict('stats'), f, indent=4, ensure_ascii=False) - - def as_dict(self, data_mode: Literal['data', 'name', 'stats'] = 'data') -> Dict: - """Convert the object to a dictionary representation.""" - data = { - 'components': { - comp.label: comp.to_dict() - for comp in sorted(self.components.values(), key=lambda component: component.label.upper()) - }, - 'buses': { - bus.label: bus.to_dict() for bus in sorted(self.buses.values(), key=lambda bus: bus.label.upper()) - }, - 'effects': { - effect.label: effect.to_dict() - for effect in sorted(self.effects, key=lambda effect: effect.label.upper()) - }, - 'timesteps_extra': [date.isoformat() for date in self.time_series_collection.timesteps_extra], - 'hours_of_previous_timesteps': self.time_series_collection.hours_of_previous_timesteps, - } - if data_mode == 'data': - return fx_io.replace_timeseries(data, 'data') - elif data_mode == 'stats': - return fx_io.remove_none_and_empty(fx_io.replace_timeseries(data, data_mode)) - return fx_io.replace_timeseries(data, data_mode) - - def as_dataset(self, constants_in_dataset: bool = False) -> xr.Dataset: - """ - Convert the FlowSystem to a xarray Dataset. - - Args: - constants_in_dataset: If True, constants are included as Dataset variables. - """ - ds = self.time_series_collection.to_dataset(include_constants=constants_in_dataset) - ds.attrs = self.as_dict(data_mode='name') - return ds - - def to_netcdf(self, path: Union[str, pathlib.Path], compression: int = 0, constants_in_dataset: bool = True): - """ - Saves the FlowSystem to a netCDF file. - Args: - path: The path to the netCDF file. - compression: The compression level to use when saving the file. - constants_in_dataset: If True, constants are included as Dataset variables. - """ - ds = self.as_dataset(constants_in_dataset=constants_in_dataset) - fx_io.save_dataset_to_netcdf(ds, path, compression=compression) - logger.info(f'Saved FlowSystem to {path}') + def create_model(self) -> SystemModel: + if not self._connected_and_transformed: + raise RuntimeError('FlowSystem is not connected_and_transformed. Call FlowSystem.connect_and_transform() first.') + self.model = SystemModel(self) + return self.model def plot_network( self, @@ -213,28 +388,6 @@ def plot_network( ) -> Optional['pyvis.network.Network']: """ Visualizes the network structure of a FlowSystem using PyVis, saving it as an interactive HTML file. - - Args: - path: Path to save the HTML visualization. - - `False`: Visualization is created but not saved. - - `str` or `Path`: Specifies file path (default: 'flow_system.html'). - controls: UI controls to add to the visualization. - - `True`: Enables all available controls. - - `List`: Specify controls, e.g., ['nodes', 'layout']. - - Options: 'nodes', 'edges', 'layout', 'interaction', 'manipulation', 'physics', 'selection', 'renderer'. - show: Whether to open the visualization in the web browser. - - Returns: - - Optional[pyvis.network.Network]: The `Network` instance representing the visualization, or `None` if `pyvis` is not installed. - - Examples: - >>> flow_system.plot_network() - >>> flow_system.plot_network(show=False) - >>> flow_system.plot_network(path='output/custom_network.html', controls=['nodes', 'layout']) - - Notes: - - This function requires `pyvis`. If not installed, the function prints a warning and returns `None`. - - Nodes are styled based on type (e.g., circles for buses, boxes for components) and annotated with node information. """ from . import plotting @@ -242,8 +395,8 @@ def plot_network( return plotting.plot_network(node_infos, edge_infos, path, controls, show) def network_infos(self) -> Tuple[Dict[str, Dict[str, str]], Dict[str, Dict[str, str]]]: - if not self._connected: - self._connect_network() + if not self._connected_and_transformed: + self.connect_and_transform() nodes = { node.label_full: { 'label': node.label, @@ -265,67 +418,6 @@ def network_infos(self) -> Tuple[Dict[str, Dict[str, str]], Dict[str, Dict[str, return nodes, edges - def transform_data(self): - if not self._connected: - self._connect_network() - for element in self.all_elements.values(): - element.transform_data(self) - - def create_time_series( - self, - name: str, - data: Optional[Union[NumericData, TimeSeriesData, TimeSeries]], - needs_extra_timestep: bool = False, - ) -> Optional[TimeSeries]: - """ - Tries to create a TimeSeries from NumericData Data and adds it to the time_series_collection - If the data already is a TimeSeries, nothing happens and the TimeSeries gets reset and returned - If the data is a TimeSeriesData, it is converted to a TimeSeries, and the aggregation weights are applied. - If the data is None, nothing happens. - """ - - if data is None: - return None - elif isinstance(data, TimeSeries): - data.restore_data() - if data in self.time_series_collection: - return data - return self.time_series_collection.create_time_series( - data=data.active_data, name=name, needs_extra_timestep=needs_extra_timestep - ) - return self.time_series_collection.create_time_series( - data=data, name=name, needs_extra_timestep=needs_extra_timestep - ) - - def create_effect_time_series( - self, - label_prefix: Optional[str], - effect_values: EffectValuesUser, - label_suffix: Optional[str] = None, - ) -> Optional[EffectTimeSeries]: - """ - Transform EffectValues to EffectTimeSeries. - Creates a TimeSeries for each key in the nested_values dictionary, using the value as the data. - - The resulting label of the TimeSeries is the label of the parent_element, - followed by the label of the Effect in the nested_values and the label_suffix. - If the key in the EffectValues is None, the alias 'Standard_Effect' is used - """ - effect_values: Optional[EffectValuesDict] = self.effects.create_effect_values_dict(effect_values) - if effect_values is None: - return None - - return { - effect: self.create_time_series('|'.join(filter(None, [label_prefix, effect, label_suffix])), value) - for effect, value in effect_values.items() - } - - def create_model(self) -> SystemModel: - if not self._connected: - raise RuntimeError('FlowSystem is not connected. Call FlowSystem.connect() first.') - self.model = SystemModel(self) - return self.model - def _check_if_element_is_unique(self, element: Element) -> None: """ checks if element or label of element already exists in list @@ -387,17 +479,61 @@ def _connect_network(self): f'Connected {len(self.buses)} Buses and {len(self.components)} ' f'via {len(self.flows)} Flows inside the FlowSystem.' ) - self._connected = True - def __repr__(self): - return f'<{self.__class__.__name__} with {len(self.components)} components and {len(self.effects)} effects>' + def __repr__(self) -> str: + """Compact representation for debugging.""" + status = '✓' if self._connected_and_transformed else '⚠' + return ( + f'FlowSystem({len(self.timesteps)} timesteps ' + f'[{self.timesteps[0].strftime("%Y-%m-%d")} to {self.timesteps[-1].strftime("%Y-%m-%d")}], ' + f'{len(self.components)} Components, {len(self.buses)} Buses, {len(self.effects)} Effects, {status})' + ) + + def __str__(self) -> str: + """Structured summary for users.""" + + def format_elements(element_names: list, label: str, alignment: int = 12): + name_list = ', '.join(element_names[:3]) + if len(element_names) > 3: + name_list += f' ... (+{len(element_names) - 3} more)' + + suffix = f' ({name_list})' if element_names else '' + padding = alignment - len(label) - 1 # -1 for the colon + return f'{label}:{"":<{padding}} {len(element_names)}{suffix}' + + time_period = f'Time period: {self.timesteps[0].date()} to {self.timesteps[-1].date()}' + freq_str = str(self.timesteps.freq).replace('<', '').replace('>', '') if self.timesteps.freq else 'irregular' - def __str__(self): - with StringIO() as output_buffer: - console = Console(file=output_buffer, width=1000) # Adjust width as needed - console.print(Pretty(self.as_dict('stats'), expand_all=True, indent_guides=True)) - value = output_buffer.getvalue() - return value + lines = [ + 'FlowSystem Overview:', + f'{"─" * 50}', + time_period, + f'Timesteps: {len(self.timesteps)} ({freq_str})', + format_elements(list(self.components.keys()), 'Components'), + format_elements(list(self.buses.keys()), 'Buses'), + format_elements(list(self.effects.effects.keys()), 'Effects'), + f'Status: {"Connected & Transformed" if self._connected_and_transformed else "Not connected"}', + ] + + return '\n'.join(lines) + + def __eq__(self, other: 'FlowSystem'): + """Check if two FlowSystems are equal by comparing their dataset representations.""" + if not isinstance(other, FlowSystem): + raise NotImplementedError('Comparison with other types is not implemented for class FlowSystem') + + ds_me = self.to_dataset() + ds_other = other.to_dataset() + + try: + xr.testing.assert_equal(ds_me, ds_other) + except AssertionError: + return False + + if ds_me.attrs != ds_other.attrs: + return False + + return True @property def flows(self) -> Dict[str, Flow]: @@ -407,3 +543,7 @@ def flows(self) -> Dict[str, Flow]: @property def all_elements(self) -> Dict[str, Element]: return {**self.components, **self.effects.effects, **self.flows, **self.buses} + + @property + def used_in_calculation(self) -> bool: + return self._used_in_calculation diff --git a/flixopt/interface.py b/flixopt/interface.py index c38d6c619..e5ee962ed 100644 --- a/flixopt/interface.py +++ b/flixopt/interface.py @@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Union from .config import CONFIG -from .core import NumericData, NumericDataTS, Scalar +from .core import NumericDataUser, Scalar from .structure import Interface, register_class_for_io if TYPE_CHECKING: # for type checking and preventing circular imports @@ -20,7 +20,7 @@ @register_class_for_io class Piece(Interface): - def __init__(self, start: NumericData, end: NumericData): + def __init__(self, start: NumericDataUser, end: NumericDataUser): """ Define a Piece, which is part of a Piecewise object. @@ -32,8 +32,8 @@ def __init__(self, start: NumericData, end: NumericData): self.end = end def transform_data(self, flow_system: 'FlowSystem', name_prefix: str): - self.start = flow_system.create_time_series(f'{name_prefix}|start', self.start) - self.end = flow_system.create_time_series(f'{name_prefix}|end', self.end) + self.start = flow_system.fit_to_model_coords(f'{name_prefix}|start', self.start) + self.end = flow_system.fit_to_model_coords(f'{name_prefix}|end', self.end) @register_class_for_io @@ -175,10 +175,10 @@ def __init__( effects_per_running_hour: Optional['EffectValuesUser'] = None, on_hours_total_min: Optional[int] = None, on_hours_total_max: Optional[int] = None, - consecutive_on_hours_min: Optional[NumericData] = None, - consecutive_on_hours_max: Optional[NumericData] = None, - consecutive_off_hours_min: Optional[NumericData] = None, - consecutive_off_hours_max: Optional[NumericData] = None, + consecutive_on_hours_min: Optional[NumericDataUser] = None, + consecutive_on_hours_max: Optional[NumericDataUser] = None, + consecutive_off_hours_min: Optional[NumericDataUser] = None, + consecutive_off_hours_max: Optional[NumericDataUser] = None, switch_on_total_max: Optional[int] = None, force_switch_on: bool = False, ): @@ -206,30 +206,30 @@ def __init__( self.effects_per_running_hour: EffectValuesUser = effects_per_running_hour or {} self.on_hours_total_min: Scalar = on_hours_total_min self.on_hours_total_max: Scalar = on_hours_total_max - self.consecutive_on_hours_min: NumericDataTS = consecutive_on_hours_min - self.consecutive_on_hours_max: NumericDataTS = consecutive_on_hours_max - self.consecutive_off_hours_min: NumericDataTS = consecutive_off_hours_min - self.consecutive_off_hours_max: NumericDataTS = consecutive_off_hours_max + self.consecutive_on_hours_min: NumericDataUser = consecutive_on_hours_min + self.consecutive_on_hours_max: NumericDataUser = consecutive_on_hours_max + self.consecutive_off_hours_min: NumericDataUser = consecutive_off_hours_min + self.consecutive_off_hours_max: NumericDataUser = consecutive_off_hours_max self.switch_on_total_max: Scalar = switch_on_total_max self.force_switch_on: bool = force_switch_on def transform_data(self, flow_system: 'FlowSystem', name_prefix: str): - self.effects_per_switch_on = flow_system.create_effect_time_series( + self.effects_per_switch_on = flow_system.fit_effects_to_model_coords( name_prefix, self.effects_per_switch_on, 'per_switch_on' ) - self.effects_per_running_hour = flow_system.create_effect_time_series( + self.effects_per_running_hour = flow_system.fit_effects_to_model_coords( name_prefix, self.effects_per_running_hour, 'per_running_hour' ) - self.consecutive_on_hours_min = flow_system.create_time_series( + self.consecutive_on_hours_min = flow_system.fit_to_model_coords( f'{name_prefix}|consecutive_on_hours_min', self.consecutive_on_hours_min ) - self.consecutive_on_hours_max = flow_system.create_time_series( + self.consecutive_on_hours_max = flow_system.fit_to_model_coords( f'{name_prefix}|consecutive_on_hours_max', self.consecutive_on_hours_max ) - self.consecutive_off_hours_min = flow_system.create_time_series( + self.consecutive_off_hours_min = flow_system.fit_to_model_coords( f'{name_prefix}|consecutive_off_hours_min', self.consecutive_off_hours_min ) - self.consecutive_off_hours_max = flow_system.create_time_series( + self.consecutive_off_hours_max = flow_system.fit_to_model_coords( f'{name_prefix}|consecutive_off_hours_max', self.consecutive_off_hours_max ) diff --git a/flixopt/io.py b/flixopt/io.py index 35d927136..9527eb66a 100644 --- a/flixopt/io.py +++ b/flixopt/io.py @@ -10,47 +10,9 @@ import xarray as xr import yaml -from .core import TimeSeries - logger = logging.getLogger('flixopt') -def replace_timeseries(obj, mode: Literal['name', 'stats', 'data'] = 'name'): - """Recursively replaces TimeSeries objects with their names prefixed by '::::'.""" - if isinstance(obj, dict): - return {k: replace_timeseries(v, mode) for k, v in obj.items()} - elif isinstance(obj, list): - return [replace_timeseries(v, mode) for v in obj] - elif isinstance(obj, TimeSeries): # Adjust this based on the actual class - if obj.all_equal: - return obj.active_data.values[0].item() - elif mode == 'name': - return f'::::{obj.name}' - elif mode == 'stats': - return obj.stats - elif mode == 'data': - return obj - else: - raise ValueError(f'Invalid mode {mode}') - else: - return obj - - -def insert_dataarray(obj, ds: xr.Dataset): - """Recursively inserts TimeSeries objects into a dataset.""" - if isinstance(obj, dict): - return {k: insert_dataarray(v, ds) for k, v in obj.items()} - elif isinstance(obj, list): - return [insert_dataarray(v, ds) for v in obj] - elif isinstance(obj, str) and obj.startswith('::::'): - da = ds[obj[4:]] - if da.isel(time=-1).isnull(): - return da.isel(time=slice(0, -1)) - return da - else: - return obj - - def remove_none_and_empty(obj): """Recursively removes None and empty dicts and lists values from a dictionary or list.""" @@ -206,7 +168,7 @@ def save_dataset_to_netcdf( compression: int = 0, ) -> None: """ - Save a dataset to a netcdf file. Store the attrs as a json string in the 'attrs' attribute. + Save a dataset to a netcdf file. Store all attrs as JSON strings in 'attrs' attributes. Args: ds: Dataset to save. @@ -216,6 +178,7 @@ def save_dataset_to_netcdf( Raises: ValueError: If the path has an invalid file extension. """ + path = pathlib.Path(path) if path.suffix not in ['.nc', '.nc4']: raise ValueError(f'Invalid file extension for path {path}. Only .nc and .nc4 are supported') @@ -228,8 +191,20 @@ def save_dataset_to_netcdf( 'Dataset was exported without compression due to missing dependency "netcdf4".' 'Install netcdf4 via `pip install netcdf4`.' ) + ds = ds.copy(deep=True) ds.attrs = {'attrs': json.dumps(ds.attrs)} + + # Convert all DataArray attrs to JSON strings + for var_name, data_var in ds.data_vars.items(): + if data_var.attrs: # Only if there are attrs + ds[var_name].attrs = {'attrs': json.dumps(data_var.attrs)} + + # Also handle coordinate attrs if they exist + for coord_name, coord_var in ds.coords.items(): + if hasattr(coord_var, 'attrs') and coord_var.attrs: + ds[coord_name].attrs = {'attrs': json.dumps(coord_var.attrs)} + ds.to_netcdf( path, encoding=None @@ -240,16 +215,30 @@ def save_dataset_to_netcdf( def load_dataset_from_netcdf(path: Union[str, pathlib.Path]) -> xr.Dataset: """ - Load a dataset from a netcdf file. Load the attrs from the 'attrs' attribute. + Load a dataset from a netcdf file. Load all attrs from 'attrs' attributes. Args: path: Path to load the dataset from. Returns: - Dataset: Loaded dataset. + Dataset: Loaded dataset with restored attrs. """ ds = xr.load_dataset(path) - ds.attrs = json.loads(ds.attrs['attrs']) + + # Restore Dataset attrs + if 'attrs' in ds.attrs: + ds.attrs = json.loads(ds.attrs['attrs']) + + # Restore DataArray attrs + for var_name, data_var in ds.data_vars.items(): + if 'attrs' in data_var.attrs: + ds[var_name].attrs = json.loads(data_var.attrs['attrs']) + + # Restore coordinate attrs + for coord_name, coord_var in ds.coords.items(): + if hasattr(coord_var, 'attrs') and 'attrs' in coord_var.attrs: + ds[coord_name].attrs = json.loads(coord_var.attrs['attrs']) + return ds diff --git a/flixopt/linear_converters.py b/flixopt/linear_converters.py index 3fd032632..94463c492 100644 --- a/flixopt/linear_converters.py +++ b/flixopt/linear_converters.py @@ -8,7 +8,7 @@ import numpy as np from .components import LinearConverter -from .core import NumericDataTS, TimeSeriesData +from .core import NumericDataUser, TimeSeriesData from .elements import Flow from .interface import OnOffParameters from .structure import register_class_for_io @@ -21,7 +21,7 @@ class Boiler(LinearConverter): def __init__( self, label: str, - eta: NumericDataTS, + eta: NumericDataUser, Q_fu: Flow, Q_th: Flow, on_off_parameters: OnOffParameters = None, @@ -62,7 +62,7 @@ class Power2Heat(LinearConverter): def __init__( self, label: str, - eta: NumericDataTS, + eta: NumericDataUser, P_el: Flow, Q_th: Flow, on_off_parameters: OnOffParameters = None, @@ -104,7 +104,7 @@ class HeatPump(LinearConverter): def __init__( self, label: str, - COP: NumericDataTS, + COP: NumericDataUser, P_el: Flow, Q_th: Flow, on_off_parameters: OnOffParameters = None, @@ -146,7 +146,7 @@ class CoolingTower(LinearConverter): def __init__( self, label: str, - specific_electricity_demand: NumericDataTS, + specific_electricity_demand: NumericDataUser, P_el: Flow, Q_th: Flow, on_off_parameters: OnOffParameters = None, @@ -190,8 +190,8 @@ class CHP(LinearConverter): def __init__( self, label: str, - eta_th: NumericDataTS, - eta_el: NumericDataTS, + eta_th: NumericDataUser, + eta_el: NumericDataUser, Q_fu: Flow, P_el: Flow, Q_th: Flow, @@ -251,7 +251,7 @@ class HeatPumpWithSource(LinearConverter): def __init__( self, label: str, - COP: NumericDataTS, + COP: NumericDataUser, P_el: Flow, Q_ab: Flow, Q_th: Flow, @@ -297,11 +297,11 @@ def COP(self, value): # noqa: N802 def check_bounds( - value: NumericDataTS, + value: NumericDataUser, parameter_label: str, element_label: str, - lower_bound: NumericDataTS, - upper_bound: NumericDataTS, + lower_bound: NumericDataUser, + upper_bound: NumericDataUser, ) -> None: """ Check if the value is within the bounds. The bounds are exclusive. diff --git a/flixopt/results.py b/flixopt/results.py index 223e3708e..e13cb0785 100644 --- a/flixopt/results.py +++ b/flixopt/results.py @@ -14,7 +14,7 @@ from . import io as fx_io from . import plotting -from .core import TimeSeriesCollection +from .flow_system import FlowSystem if TYPE_CHECKING: import pyvis @@ -118,7 +118,7 @@ def from_calculation(cls, calculation: 'Calculation'): """ return cls( solution=calculation.model.solution, - flow_system=calculation.flow_system.as_dataset(constants_in_dataset=True), + flow_system=calculation.flow_system.to_dataset(), summary=calculation.summary, model=calculation.model, name=calculation.name, @@ -160,7 +160,7 @@ def __init__( } self.timesteps_extra = self.solution.indexes['time'] - self.hours_per_timestep = TimeSeriesCollection.calculate_hours_per_timestep(self.timesteps_extra) + self.hours_per_timestep = FlowSystem.calculate_hours_per_timestep(self.timesteps_extra) def __getitem__(self, key: str) -> Union['ComponentResults', 'BusResults', 'EffectResults']: if key in self.components: @@ -684,7 +684,7 @@ def __init__( self.overlap_timesteps = overlap_timesteps self.name = name self.folder = pathlib.Path(folder) if folder is not None else pathlib.Path.cwd() / 'results' - self.hours_per_timestep = TimeSeriesCollection.calculate_hours_per_timestep(self.all_timesteps) + self.hours_per_timestep = FlowSystem.calculate_hours_per_timestep(self.all_timesteps) @property def meta_data(self) -> Dict[str, Union[int, List[str]]]: diff --git a/flixopt/structure.py b/flixopt/structure.py index 1d0f2324f..b4fcf7d38 100644 --- a/flixopt/structure.py +++ b/flixopt/structure.py @@ -18,8 +18,9 @@ from rich.console import Console from rich.pretty import Pretty +from . import io as fx_io from .config import CONFIG -from .core import NumericData, Scalar, TimeSeries, TimeSeriesCollection, TimeSeriesData +from .core import NumericDataUser, Scalar, TimeSeriesData, get_dataarray_stats if TYPE_CHECKING: # for type checking and preventing circular imports from .effects import EffectCollectionModel @@ -56,7 +57,6 @@ def __init__(self, flow_system: 'FlowSystem'): """ super().__init__(force_dim_names=True) self.flow_system = flow_system - self.time_series_collection = flow_system.time_series_collection self.effects: Optional[EffectCollectionModel] = None def do_modeling(self): @@ -88,170 +88,500 @@ def solution(self): for effect in sorted(self.flow_system.effects, key=lambda effect: effect.label_full.upper()) }, } - return solution.reindex(time=self.time_series_collection.timesteps_extra) + return solution.reindex(time=self.flow_system.timesteps_extra) @property def hours_per_step(self): - return self.time_series_collection.hours_per_timestep + return self.flow_system.hours_per_timestep @property def hours_of_previous_timesteps(self): - return self.time_series_collection.hours_of_previous_timesteps + return self.flow_system.hours_of_previous_timesteps @property def coords(self) -> Tuple[pd.DatetimeIndex]: - return (self.time_series_collection.timesteps,) + return (self.flow_system.timesteps,) @property def coords_extra(self) -> Tuple[pd.DatetimeIndex]: - return (self.time_series_collection.timesteps_extra,) + return (self.flow_system.timesteps_extra,) class Interface: """ - This class is used to collect arguments about a Model. Its the base class for all Elements and Models in flixopt. + Base class for all Elements and Models in flixopt that provides serialization capabilities. + + This class enables automatic serialization/deserialization of objects containing xarray DataArrays + and nested Interface objects to/from xarray Datasets and NetCDF files. It uses introspection + of constructor parameters to automatically handle most serialization scenarios. + + Key Features: + - Automatic extraction and restoration of xarray DataArrays + - Support for nested Interface objects + - NetCDF and JSON export/import + - Recursive handling of complex nested structures + + Subclasses must implement: + transform_data(flow_system): Transform data to match FlowSystem dimensions """ def transform_data(self, flow_system: 'FlowSystem'): - """Transforms the data of the interface to match the FlowSystem's dimensions""" - raise NotImplementedError('Every Interface needs a transform_data() method') + """Transform the data of the interface to match the FlowSystem's dimensions. + + Args: + flow_system: The FlowSystem containing timing and dimensional information - def infos(self, use_numpy: bool = True, use_element_label: bool = False) -> Dict: + Raises: + NotImplementedError: Must be implemented by subclasses """ - Generate a dictionary representation of the object's constructor arguments. - Excludes default values and empty dictionaries and lists. - Converts data to be compatible with JSON. + raise NotImplementedError('Every Interface subclass needs a transform_data() method') - Args: - use_numpy: Whether to convert NumPy arrays to lists. Defaults to True. - If True, numeric numpy arrays (`np.ndarray`) are preserved as-is. - If False, they are converted to lists. - use_element_label: Whether to use the element label instead of the infos of the element. Defaults to False. - Note that Elements used as keys in dictionaries are always converted to their labels. + def _create_reference_structure(self) -> Tuple[Dict, Dict[str, xr.DataArray]]: + """ + Convert all DataArrays to references and extract them. + This is the core method that both to_dict() and to_dataset() build upon. Returns: - A dictionary representation of the object's constructor arguments. + Tuple of (reference_structure, extracted_arrays_dict) + Raises: + ValueError: If DataArrays don't have unique names or are duplicated """ - # Get the constructor arguments and their default values - init_params = sorted( - inspect.signature(self.__init__).parameters.items(), - key=lambda x: (x[0].lower() != 'label', x[0].lower()), # Prioritize 'label' - ) - # Build a dict of attribute=value pairs, excluding defaults - details = {'class': ':'.join([cls.__name__ for cls in self.__class__.__mro__])} - for name, param in init_params: + # Get constructor parameters using caching for performance + if not hasattr(self, '_cached_init_params'): + self._cached_init_params = list(inspect.signature(self.__init__).parameters.keys()) + + # Process all constructor parameters + reference_structure = {'__class__': self.__class__.__name__} + all_extracted_arrays = {} + + for name in self._cached_init_params: if name == 'self': continue - value, default = getattr(self, name, None), param.default - # Ignore default values and empty dicts and list - if np.all(value == default) or (isinstance(value, (dict, list)) and not value): + + value = getattr(self, name, None) + if value is None: continue - details[name] = copy_and_convert_datatypes(value, use_numpy, use_element_label) - return details - def to_json(self, path: Union[str, pathlib.Path]): + # Extract arrays and get reference structure + processed_value, extracted_arrays = self._extract_dataarrays_recursive(value, name) + + # Check for array name conflicts + conflicts = set(all_extracted_arrays.keys()) & set(extracted_arrays.keys()) + if conflicts: + raise ValueError( + f'DataArray name conflicts detected: {conflicts}. ' + f'Each DataArray must have a unique name for serialization.' + ) + + # Add extracted arrays to the collection + all_extracted_arrays.update(extracted_arrays) + + # Only store in structure if it's not None/empty after processing + if processed_value is not None and not self._is_empty_container(processed_value): + reference_structure[name] = processed_value + + return reference_structure, all_extracted_arrays + + @staticmethod + def _is_empty_container(obj) -> bool: + """Check if object is an empty container (dict, list, tuple, set).""" + return isinstance(obj, (dict, list, tuple, set)) and len(obj) == 0 + + def _extract_dataarrays_recursive(self, obj, context_name: str = '') -> Tuple[Any, Dict[str, xr.DataArray]]: """ - Saves the element to a json file. - This not meant to be reloaded and recreate the object, but rather used to document or compare the object. + Recursively extract DataArrays from nested structures. Args: - path: The path to the json file. + obj: Object to process + context_name: Name context for better error messages + + Returns: + Tuple of (processed_object_with_references, extracted_arrays_dict) + + Raises: + ValueError: If DataArrays don't have unique names """ - data = get_compact_representation(self.infos(use_numpy=True, use_element_label=True)) - with open(path, 'w', encoding='utf-8') as f: - json.dump(data, f, indent=4, ensure_ascii=False) + extracted_arrays = {} + + # Handle DataArrays directly - use their unique name + if isinstance(obj, xr.DataArray): + if not obj.name: + raise ValueError( + f'DataArrays must have a unique name for serialization. ' + f'Unnamed DataArray found in {context_name}. Please set array.name = "unique_name"' + ) - def to_dict(self) -> Dict: - """Convert the object to a dictionary representation.""" - data = {'__class__': self.__class__.__name__} + array_name = str(obj.name) # Ensure string type + if array_name in extracted_arrays: + raise ValueError( + f'DataArray name "{array_name}" is duplicated in {context_name}. ' + f'Each DataArray must have a unique name for serialization.' + ) - # Get the constructor parameters - init_params = inspect.signature(self.__init__).parameters + extracted_arrays[array_name] = obj + return f':::{array_name}', extracted_arrays - for name in init_params: - if name == 'self': - continue + # Handle Interface objects - extract their DataArrays too + elif isinstance(obj, Interface): + try: + interface_structure, interface_arrays = obj._create_reference_structure() + extracted_arrays.update(interface_arrays) + return interface_structure, extracted_arrays + except Exception as e: + raise ValueError(f'Failed to process nested Interface object in {context_name}: {e}') from e + + # Handle sequences (lists, tuples) + elif isinstance(obj, (list, tuple)): + processed_items = [] + for i, item in enumerate(obj): + item_context = f'{context_name}[{i}]' if context_name else f'item[{i}]' + processed_item, nested_arrays = self._extract_dataarrays_recursive(item, item_context) + extracted_arrays.update(nested_arrays) + processed_items.append(processed_item) + return processed_items, extracted_arrays + + # Handle dictionaries + elif isinstance(obj, dict): + processed_dict = {} + for key, value in obj.items(): + key_context = f'{context_name}.{key}' if context_name else str(key) + processed_value, nested_arrays = self._extract_dataarrays_recursive(value, key_context) + extracted_arrays.update(nested_arrays) + processed_dict[key] = processed_value + return processed_dict, extracted_arrays + + # Handle sets (convert to list for JSON compatibility) + elif isinstance(obj, set): + processed_items = [] + for i, item in enumerate(obj): + item_context = f'{context_name}.set_item[{i}]' if context_name else f'set_item[{i}]' + processed_item, nested_arrays = self._extract_dataarrays_recursive(item, item_context) + extracted_arrays.update(nested_arrays) + processed_items.append(processed_item) + return processed_items, extracted_arrays + + # For all other types, serialize to basic types + else: + return self._serialize_to_basic_types(obj), extracted_arrays + + @classmethod + def _resolve_reference_structure(cls, structure, arrays_dict: Dict[str, xr.DataArray]): + """ + Convert reference structure back to actual objects using provided arrays. + + Args: + structure: Structure containing references (:::name) or special type markers + arrays_dict: Dictionary of available DataArrays + + Returns: + Structure with references resolved to actual DataArrays or objects + + Raises: + ValueError: If referenced arrays are not found or class is not registered + """ + # Handle DataArray references + if isinstance(structure, str) and structure.startswith(':::'): + array_name = structure[3:] # Remove ":::" prefix + if array_name not in arrays_dict: + raise ValueError(f"Referenced DataArray '{array_name}' not found in dataset") + + array = arrays_dict[array_name] + + # Handle null values with warning + if array.isnull().any(): + logger.warning(f"DataArray '{array_name}' contains null values. Dropping them.") + array = array.dropna(dim='time', how='all') + + # Check if this should be restored as TimeSeriesData + if TimeSeriesData.is_timeseries_data(array): + return TimeSeriesData.from_dataarray(array) + + return array + + elif isinstance(structure, list): + resolved_list = [] + for item in structure: + resolved_item = cls._resolve_reference_structure(item, arrays_dict) + if resolved_item is not None: # Filter out None values from missing references + resolved_list.append(resolved_item) + return resolved_list + + elif isinstance(structure, dict): + if structure.get('__class__'): + class_name = structure['__class__'] + if class_name not in CLASS_REGISTRY: + raise ValueError( + f"Class '{class_name}' not found in CLASS_REGISTRY. " + f'Available classes: {list(CLASS_REGISTRY.keys())}' + ) + + # This is a nested Interface object - restore it recursively + nested_class = CLASS_REGISTRY[class_name] + # Remove the __class__ key and process the rest + nested_data = {k: v for k, v in structure.items() if k != '__class__'} + # Resolve references in the nested data + resolved_nested_data = cls._resolve_reference_structure(nested_data, arrays_dict) + + try: + return nested_class(**resolved_nested_data) + except Exception as e: + raise ValueError(f'Failed to create instance of {class_name}: {e}') from e + else: + # Regular dictionary - resolve references in values + resolved_dict = {} + for key, value in structure.items(): + resolved_value = cls._resolve_reference_structure(value, arrays_dict) + if resolved_value is not None or value is None: # Keep None values if they were originally None + resolved_dict[key] = resolved_value + return resolved_dict - value = getattr(self, name, None) - data[name] = self._serialize_value(value) - - return data - - def _serialize_value(self, value: Any): - """Helper method to serialize a value based on its type.""" - if value is None: - return None - elif isinstance(value, Interface): - return value.to_dict() - elif isinstance(value, (list, tuple)): - return self._serialize_list(value) - elif isinstance(value, dict): - return self._serialize_dict(value) else: - return value + return structure - def _serialize_list(self, items): - """Serialize a list of items.""" - return [self._serialize_value(item) for item in items] + def _serialize_to_basic_types(self, obj): + """ + Convert object to basic Python types only (no DataArrays, no custom objects). - def _serialize_dict(self, d): - """Serialize a dictionary of items.""" - return {k: self._serialize_value(v) for k, v in d.items()} + Args: + obj: Object to serialize - @classmethod - def _deserialize_dict(cls, data: Dict) -> Union[Dict, 'Interface']: - if '__class__' in data: - class_name = data.pop('__class__') - try: - class_type = CLASS_REGISTRY[class_name] - if issubclass(class_type, Interface): - # Use _deserialize_dict to process the arguments - processed_data = {k: cls._deserialize_value(v) for k, v in data.items()} - return class_type(**processed_data) - else: - raise ValueError(f'Class "{class_name}" is not an Interface.') - except (AttributeError, KeyError) as e: - raise ValueError(f'Class "{class_name}" could not get reconstructed.') from e + Returns: + Object converted to basic Python types (str, int, float, bool, list, dict) + """ + if obj is None or isinstance(obj, (str, int, float, bool)): + return obj + elif isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.bool_): + return bool(obj) + elif isinstance(obj, (np.ndarray, pd.Series, pd.DataFrame)): + return obj.tolist() if hasattr(obj, 'tolist') else list(obj) + elif isinstance(obj, dict): + return {k: self._serialize_to_basic_types(v) for k, v in obj.items()} + elif isinstance(obj, (list, tuple)): + return [self._serialize_to_basic_types(item) for item in obj] + elif isinstance(obj, set): + return [self._serialize_to_basic_types(item) for item in obj] + elif hasattr(obj, 'isoformat'): # datetime objects + return obj.isoformat() + elif hasattr(obj, '__dict__'): # Custom objects with attributes + logger.warning(f'Converting custom object {type(obj)} to dict representation: {obj}') + return {str(k): self._serialize_to_basic_types(v) for k, v in obj.__dict__.items()} else: - return {k: cls._deserialize_value(v) for k, v in data.items()} + # For any other object, try to convert to string as fallback + logger.warning(f'Converting unknown type {type(obj)} to string: {obj}') + return str(obj) - @classmethod - def _deserialize_list(cls, data: List) -> List: - return [cls._deserialize_value(value) for value in data] + def to_dataset(self) -> xr.Dataset: + """ + Convert the object to an xarray Dataset representation. + All DataArrays become dataset variables, everything else goes to attrs. + + Returns: + xr.Dataset: Dataset containing all DataArrays with basic objects only in attributes + + Raises: + ValueError: If serialization fails due to naming conflicts or invalid data + """ + try: + reference_structure, extracted_arrays = self._create_reference_structure() + # Create the dataset with extracted arrays as variables and structure as attrs + return xr.Dataset(extracted_arrays, attrs=reference_structure) + except Exception as e: + raise ValueError(f'Failed to convert {self.__class__.__name__} to dataset: {e}') from e + + def to_netcdf(self, path: Union[str, pathlib.Path], compression: int = 0): + """ + Save the object to a NetCDF file. + + Args: + path: Path to save the NetCDF file + compression: Compression level (0-9) + + Raises: + ValueError: If serialization fails + IOError: If file cannot be written + """ + try: + ds = self.to_dataset() + fx_io.save_dataset_to_netcdf(ds, path, compression=compression) + except Exception as e: + raise IOError(f'Failed to save {self.__class__.__name__} to NetCDF file {path}: {e}') from e @classmethod - def _deserialize_value(cls, value: Any): - """Helper method to deserialize a value based on its type.""" - if value is None: - return None - elif isinstance(value, dict): - return cls._deserialize_dict(value) - elif isinstance(value, list): - return cls._deserialize_list(value) - return value + def from_dataset(cls, ds: xr.Dataset) -> 'Interface': + """ + Create an instance from an xarray Dataset. + + Args: + ds: Dataset containing the object data + + Returns: + Interface instance + + Raises: + ValueError: If dataset format is invalid or class mismatch + """ + try: + # Get class name and verify it matches + class_name = ds.attrs.get('__class__') + if class_name and class_name != cls.__name__: + logger.warning(f"Dataset class '{class_name}' doesn't match target class '{cls.__name__}'") + + # Get the reference structure from attrs + reference_structure = dict(ds.attrs) + + # Remove the class name since it's not a constructor parameter + reference_structure.pop('__class__', None) + + # Create arrays dictionary from dataset variables + arrays_dict = {name: array for name, array in ds.data_vars.items()} + + # Resolve all references using the centralized method + resolved_params = cls._resolve_reference_structure(reference_structure, arrays_dict) + + return cls(**resolved_params) + except Exception as e: + raise ValueError(f'Failed to create {cls.__name__} from dataset: {e}') from e @classmethod - def from_dict(cls, data: Dict) -> 'Interface': + def from_netcdf(cls, path: Union[str, pathlib.Path]) -> 'Interface': """ - Create an instance from a dictionary representation. + Load an instance from a NetCDF file. Args: - data: Dictionary containing the data for the object. + path: Path to the NetCDF file + + Returns: + Interface instance + + Raises: + IOError: If file cannot be read + ValueError: If file format is invalid """ - return cls._deserialize_dict(data) + try: + ds = fx_io.load_dataset_from_netcdf(path) + return cls.from_dataset(ds) + except Exception as e: + raise IOError(f'Failed to load {cls.__name__} from NetCDF file {path}: {e}') from e - def __repr__(self): - # Get the constructor arguments and their current values - init_signature = inspect.signature(self.__init__) - init_args = init_signature.parameters + def get_structure(self, clean: bool = False, stats: bool = False) -> Dict: + """ + Get object structure as a dictionary. + + Args: + clean: If True, remove None and empty dicts and lists. + stats: If True, replace DataArray references with statistics - # Create a dictionary with argument names and their values - args_str = ', '.join(f'{name}={repr(getattr(self, name, None))}' for name in init_args if name != 'self') - return f'{self.__class__.__name__}({args_str})' + Returns: + Dictionary representation of the object structure + """ + reference_structure, extracted_arrays = self._create_reference_structure() + + if stats: + # Replace references with statistics + reference_structure = self._replace_references_with_stats(reference_structure, extracted_arrays) + + if clean: + return fx_io.remove_none_and_empty(reference_structure) + return reference_structure + + def _replace_references_with_stats(self, structure, arrays_dict: Dict[str, xr.DataArray]): + """Replace DataArray references with statistical summaries.""" + if isinstance(structure, str) and structure.startswith(':::'): + array_name = structure[3:] + if array_name in arrays_dict: + return get_dataarray_stats(arrays_dict[array_name]) + return structure + + elif isinstance(structure, dict): + return {k: self._replace_references_with_stats(v, arrays_dict) for k, v in structure.items()} + + elif isinstance(structure, list): + return [self._replace_references_with_stats(item, arrays_dict) for item in structure] + + return structure + + def to_json(self, path: Union[str, pathlib.Path]): + """ + Save the object to a JSON file. + This is meant for documentation and comparison, not for reloading. + + Args: + path: The path to the JSON file. + + Raises: + IOError: If file cannot be written + """ + try: + # Use the stats mode for JSON export (cleaner output) + data = self.get_structure(clean=True, stats=True) + with open(path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=4, ensure_ascii=False) + except Exception as e: + raise IOError(f'Failed to save {self.__class__.__name__} to JSON file {path}: {e}') from e + + def __repr__(self): + """Return a detailed string representation for debugging.""" + try: + # Get the constructor arguments and their current values + init_signature = inspect.signature(self.__init__) + init_args = init_signature.parameters + + # Create a dictionary with argument names and their values, with better formatting + args_parts = [] + for name in init_args: + if name == 'self': + continue + value = getattr(self, name, None) + # Truncate long representations + value_repr = repr(value) + if len(value_repr) > 50: + value_repr = value_repr[:47] + '...' + args_parts.append(f'{name}={value_repr}') + + args_str = ', '.join(args_parts) + return f'{self.__class__.__name__}({args_str})' + except Exception: + # Fallback if introspection fails + return f'{self.__class__.__name__}()' def __str__(self): - return get_str_representation(self.infos(use_numpy=True, use_element_label=True)) + """Return a user-friendly string representation.""" + try: + data = self.get_structure(clean=True, stats=True) + with StringIO() as output_buffer: + console = Console(file=output_buffer, width=1000) # Adjust width as needed + console.print(Pretty(data, expand_all=True, indent_guides=True)) + return output_buffer.getvalue() + except Exception: + # Fallback if structure generation fails + return f'{self.__class__.__name__} instance' + + def copy(self) -> 'Interface': + """ + Create a copy of the Interface object. + + Uses the existing serialization infrastructure to ensure proper copying + of all DataArrays and nested objects. + + Returns: + A new instance of the same class with copied data. + """ + # Convert to dataset, copy it, and convert back + dataset = self.to_dataset().copy(deep=True) + return self.__class__.from_dataset(dataset) + + def __copy__(self): + """Support for copy.copy().""" + return self.copy() + + def __deepcopy__(self, memo): + """Support for copy.deepcopy().""" + return self.copy() class Element(Interface): @@ -454,177 +784,3 @@ def results_structure(self): 'variables': list(self.variables), 'constraints': list(self.constraints), } - - -def copy_and_convert_datatypes(data: Any, use_numpy: bool = True, use_element_label: bool = False) -> Any: - """ - Converts values in a nested data structure into JSON-compatible types while preserving or transforming numpy arrays - and custom `Element` objects based on the specified options. - - The function handles various data types and transforms them into a consistent, readable format: - - Primitive types (`int`, `float`, `str`, `bool`, `None`) are returned as-is. - - Numpy scalars are converted to their corresponding Python scalar types. - - Collections (`list`, `tuple`, `set`, `dict`) are recursively processed to ensure all elements are compatible. - - Numpy arrays are preserved or converted to lists, depending on `use_numpy`. - - Custom `Element` objects can be represented either by their `label` or their initialization parameters as a dictionary. - - Timestamps (`datetime`) are converted to ISO 8601 strings. - - Args: - data: The input data to process, which may be deeply nested and contain a mix of types. - use_numpy: If `True`, numeric numpy arrays (`np.ndarray`) are preserved as-is. If `False`, they are converted to lists. - Default is `True`. - use_element_label: If `True`, `Element` objects are represented by their `label`. If `False`, they are converted into a dictionary - based on their initialization parameters. Default is `False`. - - Returns: - A transformed version of the input data, containing only JSON-compatible types: - - `int`, `float`, `str`, `bool`, `None` - - `list`, `dict` - - `np.ndarray` (if `use_numpy=True`. This is NOT JSON-compatible) - - Raises: - TypeError: If the data cannot be converted to the specified types. - - Examples: - >>> copy_and_convert_datatypes({'a': np.array([1, 2, 3]), 'b': Element(label='example')}) - {'a': array([1, 2, 3]), 'b': {'class': 'Element', 'label': 'example'}} - - >>> copy_and_convert_datatypes({'a': np.array([1, 2, 3]), 'b': Element(label='example')}, use_numpy=False) - {'a': [1, 2, 3], 'b': {'class': 'Element', 'label': 'example'}} - - Notes: - - The function gracefully handles unexpected types by issuing a warning and returning a deep copy of the data. - - Empty collections (lists, dictionaries) and default parameter values in `Element` objects are omitted from the output. - - Numpy arrays with non-numeric data types are automatically converted to lists. - """ - if isinstance(data, np.integer): # This must be checked before checking for regular int and float! - return int(data) - elif isinstance(data, np.floating): - return float(data) - - elif isinstance(data, (int, float, str, bool, type(None))): - return data - elif isinstance(data, datetime): - return data.isoformat() - - elif isinstance(data, (tuple, set)): - return copy_and_convert_datatypes([item for item in data], use_numpy, use_element_label) - elif isinstance(data, dict): - return { - copy_and_convert_datatypes(key, use_numpy, use_element_label=True): copy_and_convert_datatypes( - value, use_numpy, use_element_label - ) - for key, value in data.items() - } - elif isinstance(data, list): # Shorten arrays/lists to be readable - if use_numpy and all([isinstance(value, (int, float)) for value in data]): - return np.array([item for item in data]) - else: - return [copy_and_convert_datatypes(item, use_numpy, use_element_label) for item in data] - - elif isinstance(data, np.ndarray): - if not use_numpy: - return copy_and_convert_datatypes(data.tolist(), use_numpy, use_element_label) - elif use_numpy and np.issubdtype(data.dtype, np.number): - return data - else: - logger.critical( - f'An np.array with non-numeric content was found: {data=}.It will be converted to a list instead' - ) - return copy_and_convert_datatypes(data.tolist(), use_numpy, use_element_label) - - elif isinstance(data, TimeSeries): - return copy_and_convert_datatypes(data.active_data, use_numpy, use_element_label) - elif isinstance(data, TimeSeriesData): - return copy_and_convert_datatypes(data.data, use_numpy, use_element_label) - - elif isinstance(data, Interface): - if use_element_label and isinstance(data, Element): - return data.label - return data.infos(use_numpy, use_element_label) - elif isinstance(data, xr.DataArray): - # TODO: This is a temporary basic work around - return copy_and_convert_datatypes(data.values, use_numpy, use_element_label) - else: - raise TypeError(f'copy_and_convert_datatypes() did get unexpected data of type "{type(data)}": {data=}') - - -def get_compact_representation(data: Any, array_threshold: int = 50, decimals: int = 2) -> Dict: - """ - Generate a compact json serializable representation of deeply nested data. - Numpy arrays are statistically described if they exceed a threshold and converted to lists. - - Args: - data (Any): The data to format and represent. - array_threshold (int): Maximum length of NumPy arrays to display. Longer arrays are statistically described. - decimals (int): Number of decimal places in which to describe the arrays. - - Returns: - Dict: A dictionary representation of the data - """ - - def format_np_array_if_found(value: Any) -> Any: - """Recursively processes the data, formatting NumPy arrays.""" - if isinstance(value, (int, float, str, bool, type(None))): - return value - elif isinstance(value, np.ndarray): - return describe_numpy_arrays(value) - elif isinstance(value, dict): - return {format_np_array_if_found(k): format_np_array_if_found(v) for k, v in value.items()} - elif isinstance(value, (list, tuple, set)): - return [format_np_array_if_found(v) for v in value] - else: - logger.warning( - f'Unexpected value found when trying to format numpy array numpy array: {type(value)=}; {value=}' - ) - return value - - def describe_numpy_arrays(arr: np.ndarray) -> Union[str, List]: - """Shortens NumPy arrays if they exceed the specified length.""" - - def normalized_center_of_mass(array: Any) -> float: - # position in array (0 bis 1 normiert) - positions = np.linspace(0, 1, len(array)) # weights w_i - # mass center - if np.sum(array) == 0: - return np.nan - else: - return np.sum(positions * array) / np.sum(array) - - if arr.size > array_threshold: # Calculate basic statistics - fmt = f'.{decimals}f' - return ( - f'Array (min={np.min(arr):{fmt}}, max={np.max(arr):{fmt}}, mean={np.mean(arr):{fmt}}, ' - f'median={np.median(arr):{fmt}}, std={np.std(arr):{fmt}}, len={len(arr)}, ' - f'center={normalized_center_of_mass(arr):{fmt}})' - ) - else: - return np.around(arr, decimals=decimals).tolist() - - # Process the data to handle NumPy arrays - formatted_data = format_np_array_if_found(copy_and_convert_datatypes(data, use_numpy=True)) - - return formatted_data - - -def get_str_representation(data: Any, array_threshold: int = 50, decimals: int = 2) -> str: - """ - Generate a string representation of deeply nested data using `rich.print`. - NumPy arrays are shortened to the specified length and converted to strings. - - Args: - data (Any): The data to format and represent. - array_threshold (int): Maximum length of NumPy arrays to display. Longer arrays are statistically described. - decimals (int): Number of decimal places in which to describe the arrays. - - Returns: - str: The formatted string representation of the data. - """ - - formatted_data = get_compact_representation(data, array_threshold, decimals) - - # Use Rich to format and print the data - with StringIO() as output_buffer: - console = Console(file=output_buffer, width=1000) # Adjust width as needed - console.print(Pretty(formatted_data, expand_all=True, indent_guides=True)) - return output_buffer.getvalue() diff --git a/tests/conftest.py b/tests/conftest.py index 5399be72a..43f9f8bae 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -293,8 +293,8 @@ def flow_system_segments_of_flows_2(flow_system_complex) -> fx.FlowSystem: { 'P_el': fx.Piecewise( [ - fx.Piece(np.linspace(5, 6, len(flow_system.time_series_collection.timesteps)), 30), - fx.Piece(40, np.linspace(60, 70, len(flow_system.time_series_collection.timesteps))), + fx.Piece(np.linspace(5, 6, len(flow_system.timesteps)), 30), + fx.Piece(40, np.linspace(60, 70, len(flow_system.timesteps))), ] ), 'Q_th': fx.Piecewise([fx.Piece(6, 35), fx.Piece(45, 100)]), diff --git a/tests/test_bus.py b/tests/test_bus.py index 4a41a9f9e..136f9d2cc 100644 --- a/tests/test_bus.py +++ b/tests/test_bus.py @@ -31,7 +31,7 @@ def test_bus(self, basic_flow_system_linopy): def test_bus_penalty(self, basic_flow_system_linopy): """Test that flow model constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps bus = fx.Bus('TestBus') flow_system.add_elements(bus, fx.Sink('WärmelastTest', sink=fx.Flow('Q_th_Last', 'TestBus')), diff --git a/tests/test_component.py b/tests/test_component.py index d87a28c29..18ceb717a 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -57,7 +57,7 @@ def test_component(self, basic_flow_system_linopy): def test_on_with_multiple_flows(self, basic_flow_system_linopy): """Test that flow model constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps ub_out2 = np.linspace(1, 1.5, 10).round(2) inputs = [ fx.Flow('In1', 'Fernwärme', relative_minimum=np.ones(10) * 0.1, size=100), @@ -128,7 +128,7 @@ def test_on_with_multiple_flows(self, basic_flow_system_linopy): def test_on_with_single_flow(self, basic_flow_system_linopy): """Test that flow model constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps inputs = [ fx.Flow('In1', 'Fernwärme', relative_minimum=np.ones(10) * 0.1, size=100), ] diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index 49f1438e7..329da7f92 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -14,7 +14,7 @@ def sample_time_index(request): def test_scalar_conversion(sample_time_index): # Test scalar conversion - result = DataConverter.as_dataarray(42, sample_time_index) + result = DataConverter.to_dataarray(42, sample_time_index) assert isinstance(result, xr.DataArray) assert result.shape == (len(sample_time_index),) assert result.dims == ('time',) @@ -25,7 +25,7 @@ def test_series_conversion(sample_time_index): series = pd.Series([1, 2, 3, 4, 5], index=sample_time_index) # Test Series conversion - result = DataConverter.as_dataarray(series, sample_time_index) + result = DataConverter.to_dataarray(series, sample_time_index) assert isinstance(result, xr.DataArray) assert result.shape == (5,) assert result.dims == ('time',) @@ -37,7 +37,7 @@ def test_dataframe_conversion(sample_time_index): df = pd.DataFrame({'A': [1, 2, 3, 4, 5]}, index=sample_time_index) # Test DataFrame conversion - result = DataConverter.as_dataarray(df, sample_time_index) + result = DataConverter.to_dataarray(df, sample_time_index) assert isinstance(result, xr.DataArray) assert result.shape == (5,) assert result.dims == ('time',) @@ -47,7 +47,7 @@ def test_dataframe_conversion(sample_time_index): def test_ndarray_conversion(sample_time_index): # Test 1D array conversion arr_1d = np.array([1, 2, 3, 4, 5]) - result = DataConverter.as_dataarray(arr_1d, sample_time_index) + result = DataConverter.to_dataarray(arr_1d, sample_time_index) assert result.shape == (5,) assert result.dims == ('time',) assert np.array_equal(result.values, arr_1d) @@ -58,7 +58,7 @@ def test_dataarray_conversion(sample_time_index): original = xr.DataArray(data=np.array([1, 2, 3, 4, 5]), coords={'time': sample_time_index}, dims=['time']) # Test DataArray conversion - result = DataConverter.as_dataarray(original, sample_time_index) + result = DataConverter.to_dataarray(original, sample_time_index) assert result.shape == (5,) assert result.dims == ('time',) assert np.array_equal(result.values, original.values) @@ -71,42 +71,42 @@ def test_dataarray_conversion(sample_time_index): def test_invalid_inputs(sample_time_index): # Test invalid input type with pytest.raises(ConversionError): - DataConverter.as_dataarray('invalid_string', sample_time_index) + DataConverter.to_dataarray('invalid_string', sample_time_index) # Test mismatched Series index mismatched_series = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('2025-01-01', periods=6, freq='D')) with pytest.raises(ConversionError): - DataConverter.as_dataarray(mismatched_series, sample_time_index) + DataConverter.to_dataarray(mismatched_series, sample_time_index) # Test DataFrame with multiple columns df_multi_col = pd.DataFrame({'A': [1, 2, 3, 4, 5], 'B': [6, 7, 8, 9, 10]}, index=sample_time_index) with pytest.raises(ConversionError): - DataConverter.as_dataarray(df_multi_col, sample_time_index) + DataConverter.to_dataarray(df_multi_col, sample_time_index) # Test mismatched array shape with pytest.raises(ConversionError): - DataConverter.as_dataarray(np.array([1, 2, 3]), sample_time_index) # Wrong length + DataConverter.to_dataarray(np.array([1, 2, 3]), sample_time_index) # Wrong length # Test multi-dimensional array with pytest.raises(ConversionError): - DataConverter.as_dataarray(np.array([[1, 2], [3, 4]]), sample_time_index) # 2D array not allowed + DataConverter.to_dataarray(np.array([[1, 2], [3, 4]]), sample_time_index) # 2D array not allowed def test_time_index_validation(): # Test with unnamed index unnamed_index = pd.date_range('2024-01-01', periods=5, freq='D') with pytest.raises(ConversionError): - DataConverter.as_dataarray(42, unnamed_index) + DataConverter.to_dataarray(42, unnamed_index) # Test with empty index empty_index = pd.DatetimeIndex([], name='time') with pytest.raises(ValueError): - DataConverter.as_dataarray(42, empty_index) + DataConverter.to_dataarray(42, empty_index) # Test with non-DatetimeIndex wrong_type_index = pd.Index([1, 2, 3, 4, 5], name='time') with pytest.raises(ValueError): - DataConverter.as_dataarray(42, wrong_type_index) + DataConverter.to_dataarray(42, wrong_type_index) if __name__ == '__main__': diff --git a/tests/test_effect.py b/tests/test_effect.py index 5cbc04ac6..9b4e1012a 100644 --- a/tests/test_effect.py +++ b/tests/test_effect.py @@ -13,7 +13,7 @@ class TestBusModel: def test_minimal(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps effect = fx.Effect('Effect1', '€', 'Testing Effect') flow_system.add_elements(effect) @@ -43,7 +43,7 @@ def test_minimal(self, basic_flow_system_linopy): def test_bounds(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps effect = fx.Effect('Effect1', '€', 'Testing Effect', minimum_operation=1.0, maximum_operation=1.1, diff --git a/tests/test_flow.py b/tests/test_flow.py index 2308dbd31..cce10b21a 100644 --- a/tests/test_flow.py +++ b/tests/test_flow.py @@ -14,7 +14,7 @@ class TestFlowModel: def test_flow_minimal(self, basic_flow_system_linopy): """Test that flow model constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow('Wärme', bus='Fernwärme', size=100) flow_system.add_elements(fx.Sink('Sink', sink=flow)) @@ -34,7 +34,7 @@ def test_flow_minimal(self, basic_flow_system_linopy): def test_flow(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', bus='Fernwärme', @@ -86,7 +86,7 @@ def test_flow(self, basic_flow_system_linopy): def test_effects_per_flow_hour(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps costs_per_flow_hour = xr.DataArray(np.linspace(1,2,timesteps.size), coords=(timesteps,)) co2_per_flow_hour = xr.DataArray(np.linspace(4, 5, timesteps.size), coords=(timesteps,)) @@ -120,7 +120,7 @@ class TestFlowInvestModel: def test_flow_invest(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -175,7 +175,7 @@ def test_flow_invest(self, basic_flow_system_linopy): def test_flow_invest_optional(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -239,7 +239,7 @@ def test_flow_invest_optional(self, basic_flow_system_linopy): def test_flow_invest_optional_wo_min_size(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -303,7 +303,7 @@ def test_flow_invest_optional_wo_min_size(self, basic_flow_system_linopy): def test_flow_invest_wo_min_size_non_optional(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -354,7 +354,7 @@ def test_flow_invest_wo_min_size_non_optional(self, basic_flow_system_linopy): def test_flow_invest_fixed_size(self, basic_flow_system_linopy): """Test flow with fixed size investment.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -446,7 +446,7 @@ class TestFlowOnModel: def test_flow_on(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', bus='Fernwärme', @@ -506,7 +506,7 @@ def test_flow_on(self, basic_flow_system_linopy): def test_effects_per_running_hour(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps costs_per_running_hour = xr.DataArray(np.linspace(1, 2, timesteps.size), coords=(timesteps,)) co2_per_running_hour = xr.DataArray(np.linspace(4, 5, timesteps.size), coords=(timesteps,)) @@ -553,7 +553,7 @@ def test_effects_per_running_hour(self, basic_flow_system_linopy): def test_consecutive_on_hours(self, basic_flow_system_linopy): """Test flow with minimum and maximum consecutive on hours.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -619,7 +619,7 @@ def test_consecutive_on_hours(self, basic_flow_system_linopy): def test_consecutive_on_hours_previous(self, basic_flow_system_linopy): """Test flow with minimum and maximum consecutive on hours.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -686,7 +686,7 @@ def test_consecutive_on_hours_previous(self, basic_flow_system_linopy): def test_consecutive_off_hours(self, basic_flow_system_linopy): """Test flow with minimum and maximum consecutive off hours.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -753,7 +753,7 @@ def test_consecutive_off_hours(self, basic_flow_system_linopy): def test_consecutive_off_hours_previous(self, basic_flow_system_linopy): """Test flow with minimum and maximum consecutive off hours.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -906,7 +906,7 @@ class TestFlowOnInvestModel: def test_flow_on_invest_optional(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', bus='Fernwärme', @@ -991,7 +991,7 @@ def test_flow_on_invest_optional(self, basic_flow_system_linopy): def test_flow_on_invest_non_optional(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', bus='Fernwärme', @@ -1078,7 +1078,7 @@ class TestFlowWithFixedProfile: def test_fixed_relative_profile(self, basic_flow_system_linopy): """Test flow with a fixed relative profile.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create a time-varying profile (e.g., for a load or renewable generation) profile = np.sin(np.linspace(0, 2 * np.pi, len(timesteps))) * 0.5 + 0.5 # Values between 0 and 1 @@ -1100,7 +1100,7 @@ def test_fixed_relative_profile(self, basic_flow_system_linopy): def test_fixed_profile_with_investment(self, basic_flow_system_linopy): """Test flow with fixed profile and investment.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create a fixed profile profile = np.sin(np.linspace(0, 2 * np.pi, len(timesteps))) * 0.5 + 0.5 diff --git a/tests/test_io.py b/tests/test_io.py index 2e6c61ccf..497b334c8 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -50,11 +50,12 @@ def test_flow_system_file_io(flow_system, highs_solver): def test_flow_system_io(flow_system): - di = flow_system.as_dict() - _ = fx.FlowSystem.from_dict(di) + flow_system.to_json('fs.json') - ds = flow_system.as_dataset() - _ = fx.FlowSystem.from_dataset(ds) + ds = flow_system.to_dataset() + new_fs = fx.FlowSystem.from_dataset(ds) + + assert flow_system == new_fs print(flow_system) flow_system.__repr__() diff --git a/tests/test_linear_converter.py b/tests/test_linear_converter.py index aaab60dcc..a01c17ef2 100644 --- a/tests/test_linear_converter.py +++ b/tests/test_linear_converter.py @@ -52,7 +52,7 @@ def test_basic_linear_converter(self, basic_flow_system_linopy): def test_linear_converter_time_varying(self, basic_flow_system_linopy): """Test a LinearConverter with time-varying conversion factors.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create time-varying efficiency (e.g., temperature-dependent) varying_efficiency = np.linspace(0.7, 0.9, len(timesteps)) @@ -268,7 +268,7 @@ def test_linear_converter_multidimensional(self, basic_flow_system_linopy): def test_edge_case_time_varying_conversion(self, basic_flow_system_linopy): """Test edge case with extreme time-varying conversion factors.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create fluctuating conversion efficiency (e.g., for a heat pump) # Values range from very low (0.1) to very high (5.0) @@ -317,7 +317,7 @@ def test_edge_case_time_varying_conversion(self, basic_flow_system_linopy): def test_piecewise_conversion(self, basic_flow_system_linopy): """Test a LinearConverter with PiecewiseConversion.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create input and output flows input_flow = fx.Flow('input', bus='input_bus', size=100) @@ -423,7 +423,7 @@ def test_piecewise_conversion(self, basic_flow_system_linopy): def test_piecewise_conversion_with_onoff(self, basic_flow_system_linopy): """Test a LinearConverter with PiecewiseConversion and OnOffParameters.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create input and output flows input_flow = fx.Flow('input', bus='input_bus', size=100) diff --git a/tests/test_storage.py b/tests/test_storage.py index a3b453c2b..472ba4add 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -14,8 +14,8 @@ class TestStorageModel: def test_basic_storage(self, basic_flow_system_linopy): """Test that basic storage model variables and constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps - timesteps_extra = flow_system.time_series_collection.timesteps_extra + timesteps = flow_system.timesteps + timesteps_extra = flow_system.timesteps_extra # Create a simple storage storage = fx.Storage( @@ -91,8 +91,8 @@ def test_basic_storage(self, basic_flow_system_linopy): def test_lossy_storage(self, basic_flow_system_linopy): """Test that basic storage model variables and constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps - timesteps_extra = flow_system.time_series_collection.timesteps_extra + timesteps = flow_system.timesteps + timesteps_extra = flow_system.timesteps_extra # Create a simple storage storage = fx.Storage( diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py deleted file mode 100644 index a8bc5fa85..000000000 --- a/tests/test_timeseries.py +++ /dev/null @@ -1,605 +0,0 @@ -import json -import tempfile -from pathlib import Path -from typing import Dict, List, Tuple - -import numpy as np -import pandas as pd -import pytest -import xarray as xr - -from flixopt.core import ConversionError, DataConverter, TimeSeries, TimeSeriesCollection, TimeSeriesData - - -@pytest.fixture -def sample_timesteps(): - """Create a sample time index with the required 'time' name.""" - return pd.date_range('2023-01-01', periods=5, freq='D', name='time') - - -@pytest.fixture -def simple_dataarray(sample_timesteps): - """Create a simple DataArray with time dimension.""" - return xr.DataArray([10, 20, 30, 40, 50], coords={'time': sample_timesteps}, dims=['time']) - - -@pytest.fixture -def sample_timeseries(simple_dataarray): - """Create a sample TimeSeries object.""" - return TimeSeries(simple_dataarray, name='Test Series') - - -class TestTimeSeries: - """Test suite for TimeSeries class.""" - - def test_initialization(self, simple_dataarray): - """Test basic initialization of TimeSeries.""" - ts = TimeSeries(simple_dataarray, name='Test Series') - - # Check basic properties - assert ts.name == 'Test Series' - assert ts.aggregation_weight is None - assert ts.aggregation_group is None - - # Check data initialization - assert isinstance(ts.stored_data, xr.DataArray) - assert ts.stored_data.equals(simple_dataarray) - assert ts.active_data.equals(simple_dataarray) - - # Check backup was created - assert ts._backup.equals(simple_dataarray) - - # Check active timesteps - assert ts.active_timesteps.equals(simple_dataarray.indexes['time']) - - def test_initialization_with_aggregation_params(self, simple_dataarray): - """Test initialization with aggregation parameters.""" - ts = TimeSeries( - simple_dataarray, name='Weighted Series', aggregation_weight=0.5, aggregation_group='test_group' - ) - - assert ts.name == 'Weighted Series' - assert ts.aggregation_weight == 0.5 - assert ts.aggregation_group == 'test_group' - - def test_initialization_validation(self, sample_timesteps): - """Test validation during initialization.""" - # Test missing time dimension - invalid_data = xr.DataArray([1, 2, 3], dims=['invalid_dim']) - with pytest.raises(ValueError, match='must have a "time" index'): - TimeSeries(invalid_data, name='Invalid Series') - - # Test multi-dimensional data - multi_dim_data = xr.DataArray( - [[1, 2, 3], [4, 5, 6]], coords={'dim1': [0, 1], 'time': sample_timesteps[:3]}, dims=['dim1', 'time'] - ) - with pytest.raises(ValueError, match='dimensions of DataArray must be 1'): - TimeSeries(multi_dim_data, name='Multi-dim Series') - - def test_active_timesteps_getter_setter(self, sample_timeseries, sample_timesteps): - """Test active_timesteps getter and setter.""" - # Initial state should use all timesteps - assert sample_timeseries.active_timesteps.equals(sample_timesteps) - - # Set to a subset - subset_index = sample_timesteps[1:3] - sample_timeseries.active_timesteps = subset_index - assert sample_timeseries.active_timesteps.equals(subset_index) - - # Active data should reflect the subset - assert sample_timeseries.active_data.equals(sample_timeseries.stored_data.sel(time=subset_index)) - - # Reset to full index - sample_timeseries.active_timesteps = None - assert sample_timeseries.active_timesteps.equals(sample_timesteps) - - # Test invalid type - with pytest.raises(TypeError, match='must be a pandas DatetimeIndex'): - sample_timeseries.active_timesteps = 'invalid' - - def test_reset(self, sample_timeseries, sample_timesteps): - """Test reset method.""" - # Set to subset first - subset_index = sample_timesteps[1:3] - sample_timeseries.active_timesteps = subset_index - - # Reset - sample_timeseries.reset() - - # Should be back to full index - assert sample_timeseries.active_timesteps.equals(sample_timesteps) - assert sample_timeseries.active_data.equals(sample_timeseries.stored_data) - - def test_restore_data(self, sample_timeseries, simple_dataarray): - """Test restore_data method.""" - # Modify the stored data - new_data = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) - - # Store original data for comparison - original_data = sample_timeseries.stored_data - - # Set new data - sample_timeseries.stored_data = new_data - assert sample_timeseries.stored_data.equals(new_data) - - # Restore from backup - sample_timeseries.restore_data() - - # Should be back to original data - assert sample_timeseries.stored_data.equals(original_data) - assert sample_timeseries.active_data.equals(original_data) - - def test_stored_data_setter(self, sample_timeseries, sample_timesteps): - """Test stored_data setter with different data types.""" - # Test with a Series - series_data = pd.Series([5, 6, 7, 8, 9], index=sample_timesteps) - sample_timeseries.stored_data = series_data - assert np.array_equal(sample_timeseries.stored_data.values, series_data.values) - - # Test with a single-column DataFrame - df_data = pd.DataFrame({'col1': [15, 16, 17, 18, 19]}, index=sample_timesteps) - sample_timeseries.stored_data = df_data - assert np.array_equal(sample_timeseries.stored_data.values, df_data['col1'].values) - - # Test with a NumPy array - array_data = np.array([25, 26, 27, 28, 29]) - sample_timeseries.stored_data = array_data - assert np.array_equal(sample_timeseries.stored_data.values, array_data) - - # Test with a scalar - sample_timeseries.stored_data = 42 - assert np.all(sample_timeseries.stored_data.values == 42) - - # Test with another DataArray - another_dataarray = xr.DataArray([30, 31, 32, 33, 34], coords={'time': sample_timesteps}, dims=['time']) - sample_timeseries.stored_data = another_dataarray - assert sample_timeseries.stored_data.equals(another_dataarray) - - def test_stored_data_setter_no_change(self, sample_timeseries): - """Test stored_data setter when data doesn't change.""" - # Get current data - current_data = sample_timeseries.stored_data - current_backup = sample_timeseries._backup - - # Set the same data - sample_timeseries.stored_data = current_data - - # Backup shouldn't change - assert sample_timeseries._backup is current_backup # Should be the same object - - def test_from_datasource(self, sample_timesteps): - """Test from_datasource class method.""" - # Test with scalar - ts_scalar = TimeSeries.from_datasource(42, 'Scalar Series', sample_timesteps) - assert np.all(ts_scalar.stored_data.values == 42) - - # Test with Series - series_data = pd.Series([1, 2, 3, 4, 5], index=sample_timesteps) - ts_series = TimeSeries.from_datasource(series_data, 'Series Data', sample_timesteps) - assert np.array_equal(ts_series.stored_data.values, series_data.values) - - # Test with aggregation parameters - ts_with_agg = TimeSeries.from_datasource( - series_data, 'Aggregated Series', sample_timesteps, aggregation_weight=0.7, aggregation_group='group1' - ) - assert ts_with_agg.aggregation_weight == 0.7 - assert ts_with_agg.aggregation_group == 'group1' - - def test_to_json_from_json(self, sample_timeseries): - """Test to_json and from_json methods.""" - # Test to_json (dictionary only) - json_dict = sample_timeseries.to_json() - assert json_dict['name'] == sample_timeseries.name - assert 'data' in json_dict - assert 'coords' in json_dict['data'] - assert 'time' in json_dict['data']['coords'] - - # Test to_json with file saving - with tempfile.TemporaryDirectory() as tmpdirname: - filepath = Path(tmpdirname) / 'timeseries.json' - sample_timeseries.to_json(filepath) - assert filepath.exists() - - # Test from_json with file loading - loaded_ts = TimeSeries.from_json(path=filepath) - assert loaded_ts.name == sample_timeseries.name - assert np.array_equal(loaded_ts.stored_data.values, sample_timeseries.stored_data.values) - - # Test from_json with dictionary - loaded_ts_dict = TimeSeries.from_json(data=json_dict) - assert loaded_ts_dict.name == sample_timeseries.name - assert np.array_equal(loaded_ts_dict.stored_data.values, sample_timeseries.stored_data.values) - - # Test validation in from_json - with pytest.raises(ValueError, match="one of 'path' or 'data'"): - TimeSeries.from_json(data=json_dict, path='dummy.json') - - def test_all_equal(self, sample_timesteps): - """Test all_equal property.""" - # All equal values - equal_data = xr.DataArray([5, 5, 5, 5, 5], coords={'time': sample_timesteps}, dims=['time']) - ts_equal = TimeSeries(equal_data, 'Equal Series') - assert ts_equal.all_equal is True - - # Not all equal - unequal_data = xr.DataArray([5, 5, 6, 5, 5], coords={'time': sample_timesteps}, dims=['time']) - ts_unequal = TimeSeries(unequal_data, 'Unequal Series') - assert ts_unequal.all_equal is False - - def test_arithmetic_operations(self, sample_timeseries): - """Test arithmetic operations.""" - # Create a second TimeSeries for testing - data2 = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) - ts2 = TimeSeries(data2, 'Second Series') - - # Test operations between two TimeSeries objects - assert np.array_equal( - (sample_timeseries + ts2).values, sample_timeseries.active_data.values + ts2.active_data.values - ) - assert np.array_equal( - (sample_timeseries - ts2).values, sample_timeseries.active_data.values - ts2.active_data.values - ) - assert np.array_equal( - (sample_timeseries * ts2).values, sample_timeseries.active_data.values * ts2.active_data.values - ) - assert np.array_equal( - (sample_timeseries / ts2).values, sample_timeseries.active_data.values / ts2.active_data.values - ) - - # Test operations with DataArrays - assert np.array_equal((sample_timeseries + data2).values, sample_timeseries.active_data.values + data2.values) - assert np.array_equal((data2 + sample_timeseries).values, data2.values + sample_timeseries.active_data.values) - - # Test operations with scalars - assert np.array_equal((sample_timeseries + 5).values, sample_timeseries.active_data.values + 5) - assert np.array_equal((5 + sample_timeseries).values, 5 + sample_timeseries.active_data.values) - - # Test unary operations - assert np.array_equal((-sample_timeseries).values, -sample_timeseries.active_data.values) - assert np.array_equal((+sample_timeseries).values, +sample_timeseries.active_data.values) - assert np.array_equal((abs(sample_timeseries)).values, abs(sample_timeseries.active_data.values)) - - def test_comparison_operations(self, sample_timesteps): - """Test comparison operations.""" - data1 = xr.DataArray([10, 20, 30, 40, 50], coords={'time': sample_timesteps}, dims=['time']) - data2 = xr.DataArray([5, 10, 15, 20, 25], coords={'time': sample_timesteps}, dims=['time']) - - ts1 = TimeSeries(data1, 'Series 1') - ts2 = TimeSeries(data2, 'Series 2') - - # Test __gt__ method - assert (ts1 > ts2).all().item() - - # Test with mixed values - data3 = xr.DataArray([5, 25, 15, 45, 25], coords={'time': sample_timesteps}, dims=['time']) - ts3 = TimeSeries(data3, 'Series 3') - - assert not (ts1 > ts3).all().item() # Not all values in ts1 are greater than ts3 - - def test_numpy_ufunc(self, sample_timeseries): - """Test numpy ufunc compatibility.""" - # Test basic numpy functions - assert np.array_equal(np.add(sample_timeseries, 5).values, np.add(sample_timeseries.active_data, 5).values) - - assert np.array_equal( - np.multiply(sample_timeseries, 2).values, np.multiply(sample_timeseries.active_data, 2).values - ) - - # Test with two TimeSeries objects - data2 = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) - ts2 = TimeSeries(data2, 'Second Series') - - assert np.array_equal( - np.add(sample_timeseries, ts2).values, np.add(sample_timeseries.active_data, ts2.active_data).values - ) - - def test_sel_and_isel_properties(self, sample_timeseries): - """Test sel and isel properties.""" - # Test that sel property works - selected = sample_timeseries.sel(time=sample_timeseries.active_timesteps[0]) - assert selected.item() == sample_timeseries.active_data.values[0] - - # Test that isel property works - indexed = sample_timeseries.isel(time=0) - assert indexed.item() == sample_timeseries.active_data.values[0] - - -@pytest.fixture -def sample_collection(sample_timesteps): - """Create a sample TimeSeriesCollection.""" - return TimeSeriesCollection(sample_timesteps) - - -@pytest.fixture -def populated_collection(sample_collection): - """Create a TimeSeriesCollection with test data.""" - # Add a constant time series - sample_collection.create_time_series(42, 'constant_series') - - # Add a varying time series - varying_data = np.array([10, 20, 30, 40, 50]) - sample_collection.create_time_series(varying_data, 'varying_series') - - # Add a time series with extra timestep - sample_collection.create_time_series( - np.array([1, 2, 3, 4, 5, 6]), 'extra_timestep_series', needs_extra_timestep=True - ) - - # Add series with aggregation settings - sample_collection.create_time_series( - TimeSeriesData(np.array([5, 5, 5, 5, 5]), agg_group='group1'), 'group1_series1' - ) - sample_collection.create_time_series( - TimeSeriesData(np.array([6, 6, 6, 6, 6]), agg_group='group1'), 'group1_series2' - ) - sample_collection.create_time_series( - TimeSeriesData(np.array([10, 10, 10, 10, 10]), agg_weight=0.5), 'weighted_series' - ) - - return sample_collection - - -class TestTimeSeriesCollection: - """Test suite for TimeSeriesCollection.""" - - def test_initialization(self, sample_timesteps): - """Test basic initialization.""" - collection = TimeSeriesCollection(sample_timesteps) - - assert collection.all_timesteps.equals(sample_timesteps) - assert len(collection.all_timesteps_extra) == len(sample_timesteps) + 1 - assert isinstance(collection.all_hours_per_timestep, xr.DataArray) - assert len(collection) == 0 - - def test_initialization_with_custom_hours(self, sample_timesteps): - """Test initialization with custom hour settings.""" - # Test with last timestep duration - last_timestep_hours = 12 - collection = TimeSeriesCollection(sample_timesteps, hours_of_last_timestep=last_timestep_hours) - - # Verify the last timestep duration - extra_step_delta = collection.all_timesteps_extra[-1] - collection.all_timesteps_extra[-2] - assert extra_step_delta == pd.Timedelta(hours=last_timestep_hours) - - # Test with previous timestep duration - hours_per_step = 8 - collection2 = TimeSeriesCollection(sample_timesteps, hours_of_previous_timesteps=hours_per_step) - - assert collection2.hours_of_previous_timesteps == hours_per_step - - def test_create_time_series(self, sample_collection): - """Test creating time series.""" - # Test scalar - ts1 = sample_collection.create_time_series(42, 'scalar_series') - assert ts1.name == 'scalar_series' - assert np.all(ts1.active_data.values == 42) - - # Test numpy array - data = np.array([1, 2, 3, 4, 5]) - ts2 = sample_collection.create_time_series(data, 'array_series') - assert np.array_equal(ts2.active_data.values, data) - - # Test with TimeSeriesData - ts3 = sample_collection.create_time_series(TimeSeriesData(10, agg_weight=0.7), 'weighted_series') - assert ts3.aggregation_weight == 0.7 - - # Test with extra timestep - ts4 = sample_collection.create_time_series(5, 'extra_series', needs_extra_timestep=True) - assert ts4.needs_extra_timestep - assert len(ts4.active_data) == len(sample_collection.timesteps_extra) - - # Test duplicate name - with pytest.raises(ValueError, match='already exists'): - sample_collection.create_time_series(1, 'scalar_series') - - def test_access_time_series(self, populated_collection): - """Test accessing time series.""" - # Test __getitem__ - ts = populated_collection['varying_series'] - assert ts.name == 'varying_series' - - # Test __contains__ with string - assert 'constant_series' in populated_collection - assert 'nonexistent_series' not in populated_collection - - # Test __contains__ with TimeSeries object - assert populated_collection['varying_series'] in populated_collection - - # Test __iter__ - names = [ts.name for ts in populated_collection] - assert len(names) == 6 - assert 'varying_series' in names - - # Test access to non-existent series - with pytest.raises(KeyError): - populated_collection['nonexistent_series'] - - def test_constants_and_non_constants(self, populated_collection): - """Test constants and non_constants properties.""" - # Test constants - constants = populated_collection.constants - assert len(constants) == 4 # constant_series, group1_series1, group1_series2, weighted_series - assert all(ts.all_equal for ts in constants) - - # Test non_constants - non_constants = populated_collection.non_constants - assert len(non_constants) == 2 # varying_series, extra_timestep_series - assert all(not ts.all_equal for ts in non_constants) - - # Test modifying a series changes the results - populated_collection['constant_series'].stored_data = np.array([1, 2, 3, 4, 5]) - updated_constants = populated_collection.constants - assert len(updated_constants) == 3 # One less constant - assert 'constant_series' not in [ts.name for ts in updated_constants] - - def test_timesteps_properties(self, populated_collection, sample_timesteps): - """Test timestep-related properties.""" - # Test default (all) timesteps - assert populated_collection.timesteps.equals(sample_timesteps) - assert len(populated_collection.timesteps_extra) == len(sample_timesteps) + 1 - - # Test activating a subset - subset = sample_timesteps[1:3] - populated_collection.activate_timesteps(subset) - - assert populated_collection.timesteps.equals(subset) - assert len(populated_collection.timesteps_extra) == len(subset) + 1 - - # Check that time series were updated - assert populated_collection['varying_series'].active_timesteps.equals(subset) - assert populated_collection['extra_timestep_series'].active_timesteps.equals( - populated_collection.timesteps_extra - ) - - # Test reset - populated_collection.reset() - assert populated_collection.timesteps.equals(sample_timesteps) - - def test_to_dataframe_and_dataset(self, populated_collection): - """Test conversion to DataFrame and Dataset.""" - # Test to_dataset - ds = populated_collection.to_dataset() - assert isinstance(ds, xr.Dataset) - assert len(ds.data_vars) == 6 - - # Test to_dataframe with different filters - df_all = populated_collection.to_dataframe(filtered='all') - assert len(df_all.columns) == 6 - - df_constant = populated_collection.to_dataframe(filtered='constant') - assert len(df_constant.columns) == 4 - - df_non_constant = populated_collection.to_dataframe(filtered='non_constant') - assert len(df_non_constant.columns) == 2 - - # Test invalid filter - with pytest.raises(ValueError): - populated_collection.to_dataframe(filtered='invalid') - - def test_calculate_aggregation_weights(self, populated_collection): - """Test aggregation weight calculation.""" - weights = populated_collection.calculate_aggregation_weights() - - # Group weights should be 0.5 each (1/2) - assert populated_collection.group_weights['group1'] == 0.5 - - # Series in group1 should have weight 0.5 - assert weights['group1_series1'] == 0.5 - assert weights['group1_series2'] == 0.5 - - # Series with explicit weight should have that weight - assert weights['weighted_series'] == 0.5 - - # Series without group or weight should have weight 1 - assert weights['constant_series'] == 1 - - def test_insert_new_data(self, populated_collection, sample_timesteps): - """Test inserting new data.""" - # Create new data - new_data = pd.DataFrame( - { - 'constant_series': [100, 100, 100, 100, 100], - 'varying_series': [5, 10, 15, 20, 25], - # extra_timestep_series is omitted to test partial updates - }, - index=sample_timesteps, - ) - - # Insert data - populated_collection.insert_new_data(new_data) - - # Verify updates - assert np.all(populated_collection['constant_series'].active_data.values == 100) - assert np.array_equal(populated_collection['varying_series'].active_data.values, np.array([5, 10, 15, 20, 25])) - - # Series not in the DataFrame should be unchanged - assert np.array_equal( - populated_collection['extra_timestep_series'].active_data.values[:-1], np.array([1, 2, 3, 4, 5]) - ) - - # Test with mismatched index - bad_index = pd.date_range('2023-02-01', periods=5, freq='D', name='time') - bad_data = pd.DataFrame({'constant_series': [1, 1, 1, 1, 1]}, index=bad_index) - - with pytest.raises(ValueError, match='must match collection timesteps'): - populated_collection.insert_new_data(bad_data) - - def test_restore_data(self, populated_collection): - """Test restoring original data.""" - # Capture original data - original_values = {name: ts.stored_data.copy() for name, ts in populated_collection.time_series_data.items()} - - # Modify data - new_data = pd.DataFrame( - { - name: np.ones(len(populated_collection.timesteps)) * 999 - for name in populated_collection.time_series_data - if not populated_collection[name].needs_extra_timestep - }, - index=populated_collection.timesteps, - ) - - populated_collection.insert_new_data(new_data) - - # Verify data was changed - assert np.all(populated_collection['constant_series'].active_data.values == 999) - - # Restore data - populated_collection.restore_data() - - # Verify data was restored - for name, original in original_values.items(): - restored = populated_collection[name].stored_data - assert np.array_equal(restored.values, original.values) - - def test_class_method_with_uniform_timesteps(self): - """Test the with_uniform_timesteps class method.""" - collection = TimeSeriesCollection.with_uniform_timesteps( - start_time=pd.Timestamp('2023-01-01'), periods=24, freq='h', hours_per_step=1 - ) - - assert len(collection.timesteps) == 24 - assert collection.hours_of_previous_timesteps == 1 - assert (collection.timesteps[1] - collection.timesteps[0]) == pd.Timedelta(hours=1) - - def test_hours_per_timestep(self, populated_collection): - """Test hours_per_timestep calculation.""" - # Standard case - uniform timesteps - hours = populated_collection.hours_per_timestep.values - assert np.allclose(hours, 24) # Default is daily timesteps - - # Create non-uniform timesteps - non_uniform_times = pd.DatetimeIndex( - [ - pd.Timestamp('2023-01-01'), - pd.Timestamp('2023-01-02'), - pd.Timestamp('2023-01-03 12:00:00'), # 1.5 days from previous - pd.Timestamp('2023-01-04'), # 0.5 days from previous - pd.Timestamp('2023-01-06'), # 2 days from previous - ], - name='time', - ) - - collection = TimeSeriesCollection(non_uniform_times) - hours = collection.hours_per_timestep.values - - # Expected hours between timestamps - expected = np.array([24, 36, 12, 48, 48]) - assert np.allclose(hours, expected) - - def test_validation_and_errors(self, sample_timesteps): - """Test validation and error handling.""" - # Test non-DatetimeIndex - with pytest.raises(TypeError, match='must be a pandas DatetimeIndex'): - TimeSeriesCollection(pd.Index([1, 2, 3, 4, 5])) - - # Test too few timesteps - with pytest.raises(ValueError, match='must contain at least 2 timestamps'): - TimeSeriesCollection(pd.DatetimeIndex([pd.Timestamp('2023-01-01')], name='time')) - - # Test invalid active_timesteps - collection = TimeSeriesCollection(sample_timesteps) - invalid_timesteps = pd.date_range('2024-01-01', periods=3, freq='D', name='time') - - with pytest.raises(ValueError, match='must be a subset'): - collection.activate_timesteps(invalid_timesteps)