diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d692d5e5..45121534f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,51 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed +* **BREAKING**: `relative_minimum_charge_state` and `relative_maximum_charge_state` don't have an extra timestep anymore. The final charge state can now be constrained by parameters `relative_minimum_final_charge_state` and `relative_maximum_final_charge_state` instead +* FlowSystems can not be shared across multiple Calculations anymore. A copy of the FlowSystem is created instead, making every Calculation independent +* Type system overhaul - added clear separation between temporal and non-temporal data throughout codebase for better clarity +* FlowSystem data management simplified - removed `time_series_collection` pattern in favor of direct timestep properties +* Enhanced FlowSystem interface with improved `__repr__()` and `__str__()` methods + +### Added +* **NEW**: Complete serialization infrastructure through `Interface` base class + * IO for all Interfaces and the FlowSystem with round-trip serialization support + * Automatic DataArray extraction and restoration + * NetCDF export/import capabilities for all Interface objects and FlowSystem + * JSON export for documentation purposes + * Recursive handling of nested Interface objects +* **NEW**: FlowSystem data manipulation methods + * `sel()` and `isel()` methods for temporal data selection + * `resample()` method for temporal resampling + * `copy()` method to create a copy of a FlowSystem, including all underlying Elements and their data + * `__eq__()` method for FlowSystem comparison +* **NEW**: Storage component enhancements + * `relative_minimum_final_charge_state` parameter for final state control + * `relative_maximum_final_charge_state` parameter for final state control +* *Internal*: Enhanced data handling methods + * `fit_to_model_coords()` method for data alignment + * `fit_effects_to_model_coords()` method for effect data processing + * `connect_and_transform()` method replacing separate operations +* **NEW**: Core data handling improvements + * `get_dataarray_stats()` function for statistical summaries + * Enhanced `DataConverter` class with better TimeSeriesData support + +### Fixed +* Enhanced NetCDF I/O with proper attribute preservation for DataArrays +* Improved error handling and validation in serialization processes +* Better type consistency across all framework components + +### Know Issues +* Plotly >= 6 may raise errors if "nbformat" is not installed. We pinned plotly to <6, but this may be fixed in the future. +* IO for single Interfaces/Elemenets to Datasets might not work properly if the Interface/Element is not part of a fully transformed and connected FlowSystem. This arrises from Numeric Data not being stored as xr.DataArray by the user. TO avoid this, always use the `to_dataset()` on Elements inside a FlowSystem thats connected and transformed. + +### Deprecated +* The `agg_group` and `agg_weight` parameters of `TimeSeriesData` are deprecated and will be removed in a future version. Use `aggregation_group` and `aggregation_weight` instead. +* The `active_timesteps` parameter of `Calculation` is deprecated and will be removed in a future version. Use the new `sel(time=...)` method on the FlowSystem instead. +* The assignment of Bus Objects to Flow.bus is deprecated and will be removed in a future version. Use the label of the Bus instead. +* The usage of Effects objects in Dicts to assign shares to Effects is deprecated and will be removed in a future version. Use the label of the Effect instead. + ## [2.1.2] - 2025-06-14 ### Fixed diff --git a/examples/01_Simple/simple_example.py b/examples/01_Simple/simple_example.py index 45550c9cc..963f2fbe1 100644 --- a/examples/01_Simple/simple_example.py +++ b/examples/01_Simple/simple_example.py @@ -67,7 +67,8 @@ discharging=fx.Flow('Q_th_unload', bus='Fernwärme', size=1000), capacity_in_flow_hours=fx.InvestParameters(fix_effects=20, fixed_size=30, optional=False), initial_charge_state=0, # Initial storage state: empty - relative_maximum_charge_state=1 / 100 * np.array([80, 70, 80, 80, 80, 80, 80, 80, 80, 80]), + relative_maximum_charge_state=1 / 100 * np.array([80, 70, 80, 80, 80, 80, 80, 80, 80]), + relative_maximum_final_charge_state=0.8, eta_charge=0.9, eta_discharge=1, # Efficiency factors for charging/discharging relative_loss_per_hour=0.08, # 8% loss per hour. Absolute loss depends on current charge state diff --git a/examples/03_Calculation_types/example_calculation_types.py b/examples/03_Calculation_types/example_calculation_types.py index 97b18e3c0..cac628042 100644 --- a/examples/03_Calculation_types/example_calculation_types.py +++ b/examples/03_Calculation_types/example_calculation_types.py @@ -48,9 +48,9 @@ # TimeSeriesData objects TS_heat_demand = fx.TimeSeriesData(heat_demand) - TS_electricity_demand = fx.TimeSeriesData(electricity_demand, agg_weight=0.7) - TS_electricity_price_sell = fx.TimeSeriesData(-(electricity_demand - 0.5), agg_group='p_el') - TS_electricity_price_buy = fx.TimeSeriesData(electricity_price + 0.5, agg_group='p_el') + TS_electricity_demand = fx.TimeSeriesData(electricity_demand, aggregation_weight=0.7) + TS_electricity_price_sell = fx.TimeSeriesData(-(electricity_demand - 0.5), aggregation_group='p_el') + TS_electricity_price_buy = fx.TimeSeriesData(electricity_price + 0.5, aggregation_group='p_el') flow_system = fx.FlowSystem(timesteps) flow_system.add_elements( @@ -164,12 +164,12 @@ if full: calculation = fx.FullCalculation('Full', flow_system) calculation.do_modeling() - calculation.solve(fx.solvers.HighsSolver(0, 60)) + calculation.solve(fx.solvers.HighsSolver(0.01/100, 60)) calculations.append(calculation) if segmented: calculation = fx.SegmentedCalculation('Segmented', flow_system, segment_length, overlap_length) - calculation.do_modeling_and_solve(fx.solvers.HighsSolver(0, 60)) + calculation.do_modeling_and_solve(fx.solvers.HighsSolver(0.01/100, 60)) calculations.append(calculation) if aggregated: @@ -178,7 +178,7 @@ aggregation_parameters.time_series_for_low_peaks = [TS_electricity_demand, TS_heat_demand] calculation = fx.AggregatedCalculation('Aggregated', flow_system, aggregation_parameters) calculation.do_modeling() - calculation.solve(fx.solvers.HighsSolver(0, 60)) + calculation.solve(fx.solvers.HighsSolver(0.01/100, 60)) calculations.append(calculation) # Get solutions for plotting for different calculations diff --git a/flixopt/aggregation.py b/flixopt/aggregation.py index f149d5f20..d47a42997 100644 --- a/flixopt/aggregation.py +++ b/flixopt/aggregation.py @@ -274,11 +274,11 @@ def use_extreme_periods(self): @property def labels_for_high_peaks(self) -> List[str]: - return [ts.label for ts in self.time_series_for_high_peaks] + return [ts.name for ts in self.time_series_for_high_peaks] @property def labels_for_low_peaks(self) -> List[str]: - return [ts.label for ts in self.time_series_for_low_peaks] + return [ts.name for ts in self.time_series_for_low_peaks] @property def use_low_peaks(self): diff --git a/flixopt/calculation.py b/flixopt/calculation.py index c7367cad2..66a33497b 100644 --- a/flixopt/calculation.py +++ b/flixopt/calculation.py @@ -12,10 +12,13 @@ import math import pathlib import timeit -from typing import Any, Dict, List, Optional, Union +import warnings +from collections import Counter +from typing import Annotated, Any, Dict, List, Optional, Union import numpy as np import pandas as pd +import xarray as xr import yaml from . import io as fx_io @@ -23,13 +26,13 @@ from .aggregation import AggregationModel, AggregationParameters from .components import Storage from .config import CONFIG -from .core import Scalar +from .core import DataConverter, Scalar, TimeSeriesData, drop_constant_arrays from .elements import Component from .features import InvestmentModel from .flow_system import FlowSystem from .results import CalculationResults, SegmentedCalculationResults from .solvers import _Solver -from .structure import SystemModel, copy_and_convert_datatypes, get_compact_representation +from .structure import SystemModel logger = logging.getLogger('flixopt') @@ -43,20 +46,39 @@ def __init__( self, name: str, flow_system: FlowSystem, - active_timesteps: Optional[pd.DatetimeIndex] = None, + active_timesteps: Annotated[ + Optional[pd.DatetimeIndex], + "DEPRECATED: Use flow_system.sel(time=...) or flow_system.isel(time=...) instead" + ] = None, folder: Optional[pathlib.Path] = None, ): """ Args: name: name of calculation flow_system: flow_system which should be calculated - active_timesteps: list with indices, which should be used for calculation. If None, then all timesteps are used. folder: folder where results should be saved. If None, then the current working directory is used. """ self.name = name + if flow_system.used_in_calculation: + logging.warning(f'FlowSystem {flow_system} is already used in a calculation. ' + f'Creating a copy for Calculation "{self.name}".') + flow_system = flow_system.copy() + + if active_timesteps is not None: + warnings.warn( + "The 'active_timesteps' parameter is deprecated and will be removed in a future version. " + 'Use flow_system.sel(time=timesteps) or flow_system.isel(time=indices) before passing ' + 'the FlowSystem to the Calculation instead.', + DeprecationWarning, + stacklevel=2, + ) + flow_system = flow_system.sel(time=active_timesteps) + + flow_system._used_in_calculation = True + self.flow_system = flow_system self.model: Optional[SystemModel] = None - self.active_timesteps = active_timesteps + self._active_timesteps = active_timesteps # deprecated self.durations = {'modeling': 0.0, 'solving': 0.0, 'saving': 0.0} self.folder = pathlib.Path.cwd() / 'results' if folder is None else pathlib.Path(folder) @@ -119,7 +141,7 @@ def main_results(self) -> Dict[str, Union[Scalar, Dict]]: def summary(self): return { 'Name': self.name, - 'Number of timesteps': len(self.flow_system.time_series_collection.timesteps), + 'Number of timesteps': len(self.flow_system.timesteps), 'Calculation Type': self.__class__.__name__, 'Constraints': self.model.constraints.ncons, 'Variables': self.model.variables.nvars, @@ -128,6 +150,15 @@ def summary(self): 'Config': CONFIG.to_dict(), } + @property + def active_timesteps(self) -> pd.DatetimeIndex: + warnings.warn( + "The 'active_timesteps' is deprecated and will be removed in a future version.", + DeprecationWarning, + stacklevel=2, + ) + return self.flow_system.timesteps + class FullCalculation(Calculation): """ @@ -136,7 +167,7 @@ class for defined way of solving a flow_system optimization def do_modeling(self) -> SystemModel: t_start = timeit.default_timer() - self._activate_time_series() + self.flow_system.connect_and_transform() self.model = self.flow_system.create_model() self.model.do_modeling() @@ -181,12 +212,6 @@ def solve(self, solver: _Solver, log_file: Optional[pathlib.Path] = None, log_ma self.results = CalculationResults.from_calculation(self) - def _activate_time_series(self): - self.flow_system.transform_data() - self.flow_system.time_series_collection.activate_timesteps( - active_timesteps=self.active_timesteps, - ) - class AggregatedCalculation(FullCalculation): """ @@ -199,7 +224,10 @@ def __init__( flow_system: FlowSystem, aggregation_parameters: AggregationParameters, components_to_clusterize: Optional[List[Component]] = None, - active_timesteps: Optional[pd.DatetimeIndex] = None, + active_timesteps: Annotated[ + Optional[pd.DatetimeIndex], + 'DEPRECATED: Use flow_system.sel(time=...) or flow_system.isel(time=...) instead', + ] = None, folder: Optional[pathlib.Path] = None, ): """ @@ -213,8 +241,6 @@ def __init__( components_to_clusterize: List of Components to perform aggregation on. If None, then all components are aggregated. This means, teh variables in the components are equalized to each other, according to the typical periods computed in the DataAggregation - active_timesteps: pd.DatetimeIndex or None - list with indices, which should be used for calculation. If None, then all timesteps are used. folder: folder where results should be saved. If None, then the current working directory is used. """ super().__init__(name, flow_system, active_timesteps, folder=folder) @@ -224,7 +250,7 @@ def __init__( def do_modeling(self) -> SystemModel: t_start = timeit.default_timer() - self._activate_time_series() + self.flow_system.connect_and_transform() self._perform_aggregation() # Model the System @@ -245,8 +271,8 @@ def _perform_aggregation(self): # Validation dt_min, dt_max = ( - np.min(self.flow_system.time_series_collection.hours_per_timestep), - np.max(self.flow_system.time_series_collection.hours_per_timestep), + np.min(self.flow_system.hours_per_timestep), + np.max(self.flow_system.hours_per_timestep), ) if not dt_min == dt_max: raise ValueError( @@ -255,11 +281,11 @@ def _perform_aggregation(self): ) steps_per_period = ( self.aggregation_parameters.hours_per_period - / self.flow_system.time_series_collection.hours_per_timestep.max() + / self.flow_system.hours_per_timestep.max() ) is_integer = ( self.aggregation_parameters.hours_per_period - % self.flow_system.time_series_collection.hours_per_timestep.max() + % self.flow_system.hours_per_timestep.max() ).item() == 0 if not (steps_per_period.size == 1 and is_integer): raise ValueError( @@ -270,15 +296,17 @@ def _perform_aggregation(self): logger.info(f'{"":#^80}') logger.info(f'{" Aggregating TimeSeries Data ":#^80}') + ds = self.flow_system.to_dataset() + + temporaly_changing_ds = drop_constant_arrays(ds, dim='time') + # Aggregation - creation of aggregated timeseries: self.aggregation = Aggregation( - original_data=self.flow_system.time_series_collection.to_dataframe( - include_extra_timestep=False - ), # Exclude last row (NaN) + original_data=temporaly_changing_ds.to_dataframe(), hours_per_time_step=float(dt_min), hours_per_period=self.aggregation_parameters.hours_per_period, nr_of_periods=self.aggregation_parameters.nr_of_periods, - weights=self.flow_system.time_series_collection.calculate_aggregation_weights(), + weights=self.calculate_aggregation_weights(temporaly_changing_ds), time_series_for_high_peaks=self.aggregation_parameters.labels_for_high_peaks, time_series_for_low_peaks=self.aggregation_parameters.labels_for_low_peaks, ) @@ -286,11 +314,41 @@ def _perform_aggregation(self): self.aggregation.cluster() self.aggregation.plot(show=True, save=self.folder / 'aggregation.html') if self.aggregation_parameters.aggregate_data_and_fix_non_binary_vars: - self.flow_system.time_series_collection.insert_new_data( - self.aggregation.aggregated_data, include_extra_timestep=False - ) + ds = self.flow_system.to_dataset() + for name, series in self.aggregation.aggregated_data.items(): + da = DataConverter.to_dataarray(series, timesteps=self.flow_system.timesteps).rename(name).assign_attrs(ds[name].attrs) + if TimeSeriesData.is_timeseries_data(da): + da = TimeSeriesData.from_dataarray(da) + + ds[name] = da + + self.flow_system = FlowSystem.from_dataset(ds) + self.flow_system.connect_and_transform() self.durations['aggregation'] = round(timeit.default_timer() - t_start_agg, 2) + @classmethod + def calculate_aggregation_weights(cls, ds: xr.Dataset) -> Dict[str, float]: + """Calculate weights for all datavars in the dataset. Weights are pulled from the attrs of the datavars.""" + + groups = [da.attrs['aggregation_group'] for da in ds.values() if 'aggregation_group' in da.attrs] + group_counts = Counter(groups) + + # Calculate weight for each group (1/count) + group_weights = {group: 1 / count for group, count in group_counts.items()} + + weights = {} + for name, da in ds.data_vars.items(): + group_weight = group_weights.get(da.attrs.get('aggregation_group')) + if group_weight is not None: + weights[name] = group_weight + else: + weights[name] = da.attrs.get('aggregation_weight', 1) + + if np.all(np.isclose(list(weights.values()), 1, atol=1e-6)): + logger.info('All Aggregation weights were set to 1') + + return weights + class SegmentedCalculation(Calculation): def __init__( @@ -327,20 +385,18 @@ def __init__( self.nr_of_previous_values = nr_of_previous_values self.sub_calculations: List[FullCalculation] = [] - self.all_timesteps = self.flow_system.time_series_collection.all_timesteps - self.all_timesteps_extra = self.flow_system.time_series_collection.all_timesteps_extra self.segment_names = [ f'Segment_{i + 1}' for i in range(math.ceil(len(self.all_timesteps) / self.timesteps_per_segment)) ] - self.active_timesteps_per_segment = self._calculate_timesteps_of_segment() + self._timesteps_per_segment = self._calculate_timesteps_per_segment() assert timesteps_per_segment > 2, 'The Segment length must be greater 2, due to unwanted internal side effects' assert self.timesteps_per_segment_with_overlap <= len(self.all_timesteps), ( f'{self.timesteps_per_segment_with_overlap=} cant be greater than the total length {len(self.all_timesteps)}' ) - self.flow_system._connect_network() # Connect network to ensure that all FLows know their Component + self.flow_system._connect_network() # Connect network to ensure that all Flows know their Component # Storing all original start values self._original_start_values = { **{flow.label_full: flow.previous_flow_rate for flow in self.flow_system.flows.values()}, @@ -352,104 +408,116 @@ def __init__( } self._transfered_start_values: List[Dict[str, Any]] = [] - def do_modeling_and_solve( - self, solver: _Solver, log_file: Optional[pathlib.Path] = None, log_main_results: bool = False - ): - logger.info(f'{"":#^80}') - logger.info(f'{" Segmented Solving ":#^80}') - + def _create_sub_calculations(self): for i, (segment_name, timesteps_of_segment) in enumerate( - zip(self.segment_names, self.active_timesteps_per_segment, strict=False) + zip(self.segment_names, self._timesteps_per_segment, strict=True) ): - if self.sub_calculations: - self._transfer_start_values(i) + calc = FullCalculation(f'{self.name}-{segment_name}', self.flow_system.sel(timesteps_of_segment)) + calc.flow_system._connect_network() # Connect to have Correct names of Flows! + self.sub_calculations.append(calc) logger.info( f'{segment_name} [{i + 1:>2}/{len(self.segment_names):<2}] ' f'({timesteps_of_segment[0]} -> {timesteps_of_segment[-1]}):' ) - calculation = FullCalculation( - f'{self.name}-{segment_name}', self.flow_system, active_timesteps=timesteps_of_segment + def do_modeling_and_solve( + self, solver: _Solver, log_file: Optional[pathlib.Path] = None, log_main_results: bool = False + ): + logger.info(f'{"":#^80}') + logger.info(f'{" Segmented Solving ":#^80}') + self._create_sub_calculations() + + for i, calculation in enumerate(self.sub_calculations): + logger.info( + f'{self.segment_names[i]} [{i + 1:>2}/{len(self.segment_names):<2}] ' + f'({calculation.flow_system.timesteps[0]} -> {calculation.flow_system.timesteps[-1]}):' ) - self.sub_calculations.append(calculation) + + if i > 0 and self.nr_of_previous_values > 0: + self._transfer_start_values(i) + calculation.do_modeling() - invest_elements = [ - model.label_full - for component in self.flow_system.components.values() - for model in component.model.all_sub_models - if isinstance(model, InvestmentModel) - ] - if invest_elements: - logger.critical( - f'Investments are not supported in Segmented Calculation! ' - f'Following InvestmentModels were found: {invest_elements}' - ) + + # Warn about Investments, but only in fist run + if i == 0: + invest_elements = [ + model.label_full + for component in calculation.flow_system.components.values() + for model in component.model.all_sub_models + if isinstance(model, InvestmentModel) + ] + if invest_elements: + logger.critical( + f'Investments are not supported in Segmented Calculation! ' + f'Following InvestmentModels were found: {invest_elements}' + ) + calculation.solve( solver, log_file=pathlib.Path(log_file) if log_file is not None else self.folder / f'{self.name}.log', log_main_results=log_main_results, ) - self._reset_start_values() - for calc in self.sub_calculations: for key, value in calc.durations.items(): self.durations[key] += value self.results = SegmentedCalculationResults.from_calculation(self) - def _transfer_start_values(self, segment_index: int): + def _transfer_start_values(self, i: int): """ This function gets the last values of the previous solved segment and inserts them as start values for the next segment """ - timesteps_of_prior_segment = self.active_timesteps_per_segment[segment_index - 1] + timesteps_of_prior_segment = self.sub_calculations[i - 1].flow_system.timesteps_extra - start = self.active_timesteps_per_segment[segment_index][0] + start = self.sub_calculations[i].flow_system.timesteps[0] start_previous_values = timesteps_of_prior_segment[self.timesteps_per_segment - self.nr_of_previous_values] end_previous_values = timesteps_of_prior_segment[self.timesteps_per_segment - 1] logger.debug( - f'start of next segment: {start}. indices of previous values: {start_previous_values}:{end_previous_values}' + f'Start of next segment: {start}. Indices of previous values: {start_previous_values} -> {end_previous_values}' ) + current_flow_system = self.sub_calculations[i -1].flow_system + next_flow_system = self.sub_calculations[i].flow_system + start_values_of_this_segment = {} - for flow in self.flow_system.flows.values(): - flow.previous_flow_rate = flow.model.flow_rate.solution.sel( + + for current_flow in current_flow_system.flows.values(): + next_flow = next_flow_system.flows[current_flow.label_full] + next_flow.previous_flow_rate = current_flow.model.flow_rate.solution.sel( time=slice(start_previous_values, end_previous_values) ).values - start_values_of_this_segment[flow.label_full] = flow.previous_flow_rate - for comp in self.flow_system.components.values(): - if isinstance(comp, Storage): - comp.initial_charge_state = comp.model.charge_state.solution.sel(time=start).item() - start_values_of_this_segment[comp.label_full] = comp.initial_charge_state + start_values_of_this_segment[current_flow.label_full] = next_flow.previous_flow_rate - self._transfered_start_values.append(start_values_of_this_segment) + for current_comp in current_flow_system.components.values(): + next_comp = next_flow_system.components[current_comp.label_full] + if isinstance(next_comp, Storage): + next_comp.initial_charge_state = current_comp.model.charge_state.solution.sel(time=start).item() + start_values_of_this_segment[current_comp.label_full] = next_comp.initial_charge_state - def _reset_start_values(self): - """This resets the start values of all Elements to its original state""" - for flow in self.flow_system.flows.values(): - flow.previous_flow_rate = self._original_start_values[flow.label_full] - for comp in self.flow_system.components.values(): - if isinstance(comp, Storage): - comp.initial_charge_state = self._original_start_values[comp.label_full] + self._transfered_start_values.append(start_values_of_this_segment) - def _calculate_timesteps_of_segment(self) -> List[pd.DatetimeIndex]: - active_timesteps_per_segment = [] + def _calculate_timesteps_per_segment(self) -> List[pd.DatetimeIndex]: + timesteps_per_segment = [] for i, _ in enumerate(self.segment_names): start = self.timesteps_per_segment * i end = min(start + self.timesteps_per_segment_with_overlap, len(self.all_timesteps)) - active_timesteps_per_segment.append(self.all_timesteps[start:end]) - return active_timesteps_per_segment + timesteps_per_segment.append(self.all_timesteps[start:end]) + return timesteps_per_segment @property def timesteps_per_segment_with_overlap(self): return self.timesteps_per_segment + self.overlap_timesteps @property - def start_values_of_segments(self) -> Dict[int, Dict[str, Any]]: + def start_values_of_segments(self) -> List[Dict[str, Any]]: """Gives an overview of the start values of all Segments""" - return { - 0: {element.label_full: value for element, value in self._original_start_values.items()}, - **{i: start_values for i, start_values in enumerate(self._transfered_start_values, 1)}, - } + return [ + {name: value for name, value in self._original_start_values.items()} + ] + [start_values for start_values in self._transfered_start_values] + + @property + def all_timesteps(self) -> pd.DatetimeIndex: + return self.flow_system.timesteps diff --git a/flixopt/components.py b/flixopt/components.py index 1f5fe5ece..639046cfc 100644 --- a/flixopt/components.py +++ b/flixopt/components.py @@ -7,9 +7,10 @@ import linopy import numpy as np +import xarray as xr from . import utils -from .core import NumericData, NumericDataTS, PlausibilityError, Scalar, TimeSeries +from .core import PlausibilityError, Scalar, TemporalData, TemporalDataUser from .elements import Component, ComponentModel, Flow from .features import InvestmentModel, OnOffModel, PiecewiseModel from .interface import InvestParameters, OnOffParameters, PiecewiseConversion @@ -34,7 +35,7 @@ def __init__( inputs: List[Flow], outputs: List[Flow], on_off_parameters: OnOffParameters = None, - conversion_factors: List[Dict[str, NumericDataTS]] = None, + conversion_factors: List[Dict[str, TemporalDataUser]] = None, piecewise_conversion: Optional[PiecewiseConversion] = None, meta_data: Optional[Dict] = None, ): @@ -98,14 +99,14 @@ def transform_data(self, flow_system: 'FlowSystem'): if self.piecewise_conversion: self.piecewise_conversion.transform_data(flow_system, f'{self.label_full}|PiecewiseConversion') - def _transform_conversion_factors(self, flow_system: 'FlowSystem') -> List[Dict[str, TimeSeries]]: - """macht alle Faktoren, die nicht TimeSeries sind, zu TimeSeries""" + def _transform_conversion_factors(self, flow_system: 'FlowSystem') -> List[Dict[str, xr.DataArray]]: + """Converts all conversion factors to internal datatypes""" list_of_conversion_factors = [] for idx, conversion_factor in enumerate(self.conversion_factors): transformed_dict = {} for flow, values in conversion_factor.items(): # TODO: Might be better to use the label of the component instead of the flow - transformed_dict[flow] = flow_system.create_time_series( + transformed_dict[flow] = flow_system.fit_to_model_coords( f'{self.flows[flow].label_full}|conversion_factor{idx}', values ) list_of_conversion_factors.append(transformed_dict) @@ -128,14 +129,16 @@ def __init__( charging: Flow, discharging: Flow, capacity_in_flow_hours: Union[Scalar, InvestParameters], - relative_minimum_charge_state: NumericData = 0, - relative_maximum_charge_state: NumericData = 1, + relative_minimum_charge_state: TemporalDataUser = 0, + relative_maximum_charge_state: TemporalDataUser = 1, initial_charge_state: Union[Scalar, Literal['lastValueOfSim']] = 0, minimal_final_charge_state: Optional[Scalar] = None, maximal_final_charge_state: Optional[Scalar] = None, - eta_charge: NumericData = 1, - eta_discharge: NumericData = 1, - relative_loss_per_hour: NumericData = 0, + relative_minimum_final_charge_state: Optional[Scalar] = None, + relative_maximum_final_charge_state: Optional[Scalar] = None, + eta_charge: TemporalDataUser = 1, + eta_discharge: TemporalDataUser = 1, + relative_loss_per_hour: TemporalDataUser = 0, prevent_simultaneous_charge_and_discharge: bool = True, meta_data: Optional[Dict] = None, ): @@ -157,6 +160,8 @@ def __init__( initial_charge_state: storage charge_state at the beginning. The default is 0. minimal_final_charge_state: minimal value of chargeState at the end of timeseries. maximal_final_charge_state: maximal value of chargeState at the end of timeseries. + minimal_final_charge_state: relative minimal value of chargeState at the end of timeseries. + maximal_final_charge_state: relative maximal value of chargeState at the end of timeseries. eta_charge: efficiency factor of charging/loading. The default is 1. eta_discharge: efficiency factor of uncharging/unloading. The default is 1. relative_loss_per_hour: loss per chargeState-Unit per hour. The default is 0. @@ -176,16 +181,19 @@ def __init__( self.charging = charging self.discharging = discharging self.capacity_in_flow_hours = capacity_in_flow_hours - self.relative_minimum_charge_state: NumericDataTS = relative_minimum_charge_state - self.relative_maximum_charge_state: NumericDataTS = relative_maximum_charge_state + self.relative_minimum_charge_state: TemporalDataUser = relative_minimum_charge_state + self.relative_maximum_charge_state: TemporalDataUser = relative_maximum_charge_state + + self.relative_minimum_final_charge_state: Scalar = relative_minimum_final_charge_state + self.relative_maximum_final_charge_state: Scalar = relative_maximum_final_charge_state self.initial_charge_state = initial_charge_state self.minimal_final_charge_state = minimal_final_charge_state self.maximal_final_charge_state = maximal_final_charge_state - self.eta_charge: NumericDataTS = eta_charge - self.eta_discharge: NumericDataTS = eta_discharge - self.relative_loss_per_hour: NumericDataTS = relative_loss_per_hour + self.eta_charge: TemporalDataUser = eta_charge + self.eta_discharge: TemporalDataUser = eta_discharge + self.relative_loss_per_hour: TemporalDataUser = relative_loss_per_hour self.prevent_simultaneous_charge_and_discharge = prevent_simultaneous_charge_and_discharge def create_model(self, model: SystemModel) -> 'StorageModel': @@ -195,19 +203,17 @@ def create_model(self, model: SystemModel) -> 'StorageModel': def transform_data(self, flow_system: 'FlowSystem') -> None: super().transform_data(flow_system) - self.relative_minimum_charge_state = flow_system.create_time_series( + self.relative_minimum_charge_state = flow_system.fit_to_model_coords( f'{self.label_full}|relative_minimum_charge_state', self.relative_minimum_charge_state, - needs_extra_timestep=True, ) - self.relative_maximum_charge_state = flow_system.create_time_series( + self.relative_maximum_charge_state = flow_system.fit_to_model_coords( f'{self.label_full}|relative_maximum_charge_state', self.relative_maximum_charge_state, - needs_extra_timestep=True, ) - self.eta_charge = flow_system.create_time_series(f'{self.label_full}|eta_charge', self.eta_charge) - self.eta_discharge = flow_system.create_time_series(f'{self.label_full}|eta_discharge', self.eta_discharge) - self.relative_loss_per_hour = flow_system.create_time_series( + self.eta_charge = flow_system.fit_to_model_coords(f'{self.label_full}|eta_charge', self.eta_charge) + self.eta_discharge = flow_system.fit_to_model_coords(f'{self.label_full}|eta_discharge', self.eta_discharge) + self.relative_loss_per_hour = flow_system.fit_to_model_coords( f'{self.label_full}|relative_loss_per_hour', self.relative_loss_per_hour ) if isinstance(self.capacity_in_flow_hours, InvestParameters): @@ -231,9 +237,9 @@ def _plausibility_checks(self) -> None: minimum_capacity = self.capacity_in_flow_hours # initial capacity >= allowed min for maximum_size: - minimum_inital_capacity = maximum_capacity * self.relative_minimum_charge_state.isel(time=1) + minimum_inital_capacity = maximum_capacity * self.relative_minimum_charge_state.isel(time=0) # initial capacity <= allowed max for minimum_size: - maximum_inital_capacity = minimum_capacity * self.relative_maximum_charge_state.isel(time=1) + maximum_inital_capacity = minimum_capacity * self.relative_maximum_charge_state.isel(time=0) if self.initial_charge_state > maximum_inital_capacity: raise ValueError( @@ -264,8 +270,8 @@ def __init__( out1: Flow, in2: Optional[Flow] = None, out2: Optional[Flow] = None, - relative_losses: Optional[NumericDataTS] = None, - absolute_losses: Optional[NumericDataTS] = None, + relative_losses: Optional[TemporalDataUser] = None, + absolute_losses: Optional[TemporalDataUser] = None, on_off_parameters: OnOffParameters = None, prevent_simultaneous_flows_in_both_directions: bool = True, meta_data: Optional[Dict] = None, @@ -331,10 +337,10 @@ def create_model(self, model) -> 'TransmissionModel': def transform_data(self, flow_system: 'FlowSystem') -> None: super().transform_data(flow_system) - self.relative_losses = flow_system.create_time_series( + self.relative_losses = flow_system.fit_to_model_coords( f'{self.label_full}|relative_losses', self.relative_losses ) - self.absolute_losses = flow_system.create_time_series( + self.absolute_losses = flow_system.fit_to_model_coords( f'{self.label_full}|absolute_losses', self.absolute_losses ) @@ -348,7 +354,7 @@ def __init__(self, model: SystemModel, element: Transmission): def do_modeling(self): """Initiates all FlowModels""" # Force On Variable if absolute losses are present - if (self.element.absolute_losses is not None) and np.any(self.element.absolute_losses.active_data != 0): + if (self.element.absolute_losses is not None) and np.any(self.element.absolute_losses != 0): for flow in self.element.inputs + self.element.outputs: if flow.on_off_parameters is None: flow.on_off_parameters = OnOffParameters() @@ -385,14 +391,14 @@ def create_transmission_equation(self, name: str, in_flow: Flow, out_flow: Flow) # eq: out(t) + on(t)*loss_abs(t) = in(t)*(1 - loss_rel(t)) con_transmission = self.add( self._model.add_constraints( - out_flow.model.flow_rate == -in_flow.model.flow_rate * (self.element.relative_losses.active_data - 1), + out_flow.model.flow_rate == -in_flow.model.flow_rate * (self.element.relative_losses - 1), name=f'{self.label_full}|{name}', ), name, ) if self.element.absolute_losses is not None: - con_transmission.lhs += in_flow.model.on_off.on * self.element.absolute_losses.active_data + con_transmission.lhs += in_flow.model.on_off.on * self.element.absolute_losses return con_transmission @@ -420,8 +426,8 @@ def do_modeling(self): self.add( self._model.add_constraints( - sum([flow.model.flow_rate * conv_factors[flow.label].active_data for flow in used_inputs]) - == sum([flow.model.flow_rate * conv_factors[flow.label].active_data for flow in used_outputs]), + sum([flow.model.flow_rate * conv_factors[flow.label] for flow in used_inputs]) + == sum([flow.model.flow_rate * conv_factors[flow.label] for flow in used_outputs]), name=f'{self.label_full}|conversion_{i}', ) ) @@ -481,12 +487,12 @@ def do_modeling(self): ) charge_state = self.charge_state - rel_loss = self.element.relative_loss_per_hour.active_data + rel_loss = self.element.relative_loss_per_hour hours_per_step = self._model.hours_per_step charge_rate = self.element.charging.model.flow_rate discharge_rate = self.element.discharging.model.flow_rate - eff_charge = self.element.eta_charge.active_data - eff_discharge = self.element.eta_discharge.active_data + eff_charge = self.element.eta_charge + eff_discharge = self.element.eta_discharge self.add( self._model.add_constraints( @@ -556,7 +562,7 @@ def _initial_and_final_charge_state(self): ) @property - def absolute_charge_state_bounds(self) -> Tuple[NumericData, NumericData]: + def absolute_charge_state_bounds(self) -> Tuple[TemporalData, TemporalData]: relative_lower_bound, relative_upper_bound = self.relative_charge_state_bounds if not isinstance(self.element.capacity_in_flow_hours, InvestParameters): return ( @@ -570,11 +576,41 @@ def absolute_charge_state_bounds(self) -> Tuple[NumericData, NumericData]: ) @property - def relative_charge_state_bounds(self) -> Tuple[NumericData, NumericData]: - return ( - self.element.relative_minimum_charge_state.active_data, - self.element.relative_maximum_charge_state.active_data, - ) + def relative_charge_state_bounds(self) -> Tuple[xr.DataArray, xr.DataArray]: + """ + Get relative charge state bounds with final timestep values. + + Returns: + Tuple of (minimum_bounds, maximum_bounds) DataArrays extending to final timestep + """ + final_timestep = self._model.flow_system.timesteps_extra[-1] + final_coords = {'time': [final_timestep]} + + # Get final minimum charge state + if self.element.relative_minimum_final_charge_state is None: + min_final = self.element.relative_minimum_charge_state.isel( + time=-1, drop=True + ).assign_coords(time=final_timestep) + else: + min_final = xr.DataArray( + [self.element.relative_minimum_final_charge_state], coords=final_coords, dims=['time'] + ) + + # Get final maximum charge state + if self.element.relative_maximum_final_charge_state is None: + max_final = self.element.relative_maximum_charge_state.isel( + time=-1, drop=True + ).assign_coords(time=final_timestep) + else: + max_final = xr.DataArray( + [self.element.relative_maximum_final_charge_state], coords=final_coords, dims=['time'] + ) + + # Concatenate with original bounds + min_bounds = xr.concat([self.element.relative_minimum_charge_state, min_final], dim='time') + max_bounds = xr.concat([self.element.relative_maximum_charge_state, max_final], dim='time') + + return min_bounds, max_bounds @register_class_for_io diff --git a/flixopt/core.py b/flixopt/core.py index 08be18f1d..121c7fb12 100644 --- a/flixopt/core.py +++ b/flixopt/core.py @@ -3,12 +3,9 @@ It provides Datatypes, logging functionality, and some functions to transform data structures. """ -import inspect -import json import logging -import pathlib -from collections import Counter -from typing import Any, Dict, Iterator, List, Literal, Optional, Tuple, Union +import warnings +from typing import Dict, Optional, Union import numpy as np import pandas as pd @@ -17,13 +14,15 @@ logger = logging.getLogger('flixopt') Scalar = Union[int, float] -"""A type representing a single number, either integer or float.""" +"""A single number, either integer or float.""" -NumericData = Union[int, float, np.integer, np.floating, np.ndarray, pd.Series, pd.DataFrame, xr.DataArray] -"""Represents any form of numeric data, from simple scalars to complex data structures.""" +TemporalDataUser = Union[ + int, float, np.integer, np.floating, np.ndarray, pd.Series, pd.DataFrame, xr.DataArray, 'TimeSeriesData' +] +"""User data which might have a time dimension. Internally converted to an xr.DataArray with time dimension.""" -NumericDataTS = Union[NumericData, 'TimeSeriesData'] -"""Represents either standard numeric data or TimeSeriesData.""" +TemporalData = Union[xr.DataArray, 'TimeSeriesData'] +"""Internally used datatypes for temporal data.""" class PlausibilityError(Exception): @@ -38,933 +37,261 @@ class ConversionError(Exception): pass -class DataConverter: - """ - Converts various data types into xarray.DataArray with a timesteps index. - - Supports: scalars, arrays, Series, DataFrames, and DataArrays. - """ - - @staticmethod - def as_dataarray(data: NumericData, timesteps: pd.DatetimeIndex) -> xr.DataArray: - """Convert data to xarray.DataArray with specified timesteps index.""" - if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0: - raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}') - if not timesteps.name == 'time': - raise ConversionError(f'DatetimeIndex is not named correctly. Must be named "time", got {timesteps.name=}') - - coords = [timesteps] - dims = ['time'] - expected_shape = (len(timesteps),) - - try: - if isinstance(data, (int, float, np.integer, np.floating)): - return xr.DataArray(data, coords=coords, dims=dims) - elif isinstance(data, pd.DataFrame): - if not data.index.equals(timesteps): - raise ConversionError("DataFrame index doesn't match timesteps index") - if not len(data.columns) == 1: - raise ConversionError('DataFrame must have exactly one column') - return xr.DataArray(data.values.flatten(), coords=coords, dims=dims) - elif isinstance(data, pd.Series): - if not data.index.equals(timesteps): - raise ConversionError("Series index doesn't match timesteps index") - return xr.DataArray(data.values, coords=coords, dims=dims) - elif isinstance(data, np.ndarray): - if data.ndim != 1: - raise ConversionError(f'Array must be 1-dimensional, got {data.ndim}') - elif data.shape[0] != expected_shape[0]: - raise ConversionError(f"Array shape {data.shape} doesn't match expected {expected_shape}") - return xr.DataArray(data, coords=coords, dims=dims) - elif isinstance(data, xr.DataArray): - if data.dims != tuple(dims): - raise ConversionError(f"DataArray dimensions {data.dims} don't match expected {dims}") - if data.sizes[dims[0]] != len(coords[0]): - raise ConversionError( - f"DataArray length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}" - ) - return data.copy(deep=True) - else: - raise ConversionError(f'Unsupported type: {type(data).__name__}') - except Exception as e: - if isinstance(e, ConversionError): - raise - raise ConversionError(f'Converting data {type(data)} to xarray.Dataset raised an error: {str(e)}') from e - - -class TimeSeriesData: - # TODO: Move to Interface.py - def __init__(self, data: NumericData, agg_group: Optional[str] = None, agg_weight: Optional[float] = None): - """ - timeseries class for transmit timeseries AND special characteristics of timeseries, - i.g. to define weights needed in calculation_type 'aggregated' - EXAMPLE solar: - you have several solar timeseries. These should not be overweighted - compared to the remaining timeseries (i.g. heat load, price)! - fixed_relative_profile_solar1 = TimeSeriesData(sol_array_1, type = 'solar') - fixed_relative_profile_solar2 = TimeSeriesData(sol_array_2, type = 'solar') - fixed_relative_profile_solar3 = TimeSeriesData(sol_array_3, type = 'solar') - --> this 3 series of same type share one weight, i.e. internally assigned each weight = 1/3 - (instead of standard weight = 1) - - Args: - data: The timeseries data, which can be a scalar, array, or numpy array. - agg_group: The group this TimeSeriesData is a part of. agg_weight is split between members of a group. Default is None. - agg_weight: The weight for calculation_type 'aggregated', should be between 0 and 1. Default is None. - - Raises: - Exception: If both agg_group and agg_weight are set, an exception is raised. - """ - self.data = data - self.agg_group = agg_group - self.agg_weight = agg_weight - if (agg_group is not None) and (agg_weight is not None): - raise ValueError('Either or explicit can be used. Not both!') - self.label: Optional[str] = None - - def __repr__(self): - # Get the constructor arguments and their current values - init_signature = inspect.signature(self.__init__) - init_args = init_signature.parameters +class TimeSeriesData(xr.DataArray): + """Minimal TimeSeriesData that inherits from xr.DataArray with aggregation metadata.""" - # Create a dictionary with argument names and their values - args_str = ', '.join(f'{name}={repr(getattr(self, name, None))}' for name in init_args if name != 'self') - return f'{self.__class__.__name__}({args_str})' + __slots__ = () # No additional instance attributes - everything goes in attrs - def __str__(self): - return str(self.data) - - -class TimeSeries: - """ - A class representing time series data with active and stored states. - - TimeSeries provides a way to store time-indexed data and work with temporal subsets. - It supports arithmetic operations, aggregation, and JSON serialization. - - Attributes: - name (str): The name of the time series - aggregation_weight (Optional[float]): Weight used for aggregation - aggregation_group (Optional[str]): Group name for shared aggregation weighting - needs_extra_timestep (bool): Whether this series needs an extra timestep - """ - - @classmethod - def from_datasource( - cls, - data: NumericData, - name: str, - timesteps: pd.DatetimeIndex, - aggregation_weight: Optional[float] = None, - aggregation_group: Optional[str] = None, - needs_extra_timestep: bool = False, - ) -> 'TimeSeries': + def __init__(self, *args, aggregation_group: Optional[str] = None, aggregation_weight: Optional[float] = None, + agg_group: Optional[str] = None, agg_weight: Optional[float] = None, **kwargs): """ - Initialize the TimeSeries from multiple data sources. - Args: - data: The time series data - name: The name of the TimeSeries - timesteps: The timesteps of the TimeSeries - aggregation_weight: The weight in aggregation calculations - aggregation_group: Group this TimeSeries belongs to for aggregation weight sharing - needs_extra_timestep: Whether this series requires an extra timestep - - Returns: - A new TimeSeries instance - """ - return cls( - DataConverter.as_dataarray(data, timesteps), - name, - aggregation_weight, - aggregation_group, - needs_extra_timestep, - ) - - @classmethod - def from_json(cls, data: Optional[Dict[str, Any]] = None, path: Optional[str] = None) -> 'TimeSeries': - """ - Load a TimeSeries from a dictionary or json file. - - Args: - data: Dictionary containing TimeSeries data - path: Path to a JSON file containing TimeSeries data - - Returns: - A new TimeSeries instance - - Raises: - ValueError: If both path and data are provided or neither is provided - """ - if (path is None and data is None) or (path is not None and data is not None): - raise ValueError("Exactly one of 'path' or 'data' must be provided") - - if path is not None: - with open(path, 'r') as f: - data = json.load(f) - - # Convert ISO date strings to datetime objects - data['data']['coords']['time']['data'] = pd.to_datetime(data['data']['coords']['time']['data']) - - # Create the TimeSeries instance - return cls( - data=xr.DataArray.from_dict(data['data']), - name=data['name'], - aggregation_weight=data['aggregation_weight'], - aggregation_group=data['aggregation_group'], - needs_extra_timestep=data['needs_extra_timestep'], - ) - - def __init__( - self, - data: xr.DataArray, - name: str, - aggregation_weight: Optional[float] = None, - aggregation_group: Optional[str] = None, - needs_extra_timestep: bool = False, - ): - """ - Initialize a TimeSeries with a DataArray. - - Args: - data: The DataArray containing time series data - name: The name of the TimeSeries - aggregation_weight: The weight in aggregation calculations - aggregation_group: Group this TimeSeries belongs to for weight sharing - needs_extra_timestep: Whether this series requires an extra timestep - - Raises: - ValueError: If data doesn't have a 'time' index or has more than 1 dimension - """ - if 'time' not in data.indexes: - raise ValueError(f'DataArray must have a "time" index. Got {data.indexes}') - if data.ndim > 1: - raise ValueError(f'Number of dimensions of DataArray must be 1. Got {data.ndim}') - - self.name = name - self.aggregation_weight = aggregation_weight - self.aggregation_group = aggregation_group - self.needs_extra_timestep = needs_extra_timestep - - # Data management - self._stored_data = data.copy(deep=True) - self._backup = self._stored_data.copy(deep=True) - self._active_timesteps = self._stored_data.indexes['time'] - self._active_data = None - self._update_active_data() - - def reset(self): - """ - Reset active timesteps to the full set of stored timesteps. - """ - self.active_timesteps = None - - def restore_data(self): - """ - Restore stored_data from the backup and reset active timesteps. - """ - self._stored_data = self._backup.copy(deep=True) - self.reset() - - def to_json(self, path: Optional[pathlib.Path] = None) -> Dict[str, Any]: - """ - Save the TimeSeries to a dictionary or JSON file. - - Args: - path: Optional path to save JSON file - - Returns: - Dictionary representation of the TimeSeries - """ - data = { - 'name': self.name, - 'aggregation_weight': self.aggregation_weight, - 'aggregation_group': self.aggregation_group, - 'needs_extra_timestep': self.needs_extra_timestep, - 'data': self.active_data.to_dict(), - } - - # Convert datetime objects to ISO strings - data['data']['coords']['time']['data'] = [date.isoformat() for date in data['data']['coords']['time']['data']] - - # Save to file if path is provided - if path is not None: - indent = 4 if len(self.active_timesteps) <= 480 else None - with open(path, 'w', encoding='utf-8') as f: - json.dump(data, f, indent=indent, ensure_ascii=False) - - return data + *args: Arguments passed to DataArray + aggregation_group: Aggregation group name + aggregation_weight: Aggregation weight (0-1) + agg_group: Deprecated, use aggregation_group instead + agg_weight: Deprecated, use aggregation_weight instead + **kwargs: Additional arguments passed to DataArray + """ + if agg_group is not None: + warnings.warn('agg_group is deprecated, use aggregation_group instead', DeprecationWarning, stacklevel=2) + aggregation_group = agg_group + if agg_weight is not None: + warnings.warn('agg_weight is deprecated, use aggregation_weight instead', DeprecationWarning, stacklevel=2) + aggregation_weight = agg_weight + + if (aggregation_group is not None) and (aggregation_weight is not None): + raise ValueError('Use either aggregation_group or aggregation_weight, not both') + + # Let xarray handle all the initialization complexity + super().__init__(*args, **kwargs) + + # Add our metadata to attrs after initialization + if aggregation_group is not None: + self.attrs['aggregation_group'] = aggregation_group + if aggregation_weight is not None: + self.attrs['aggregation_weight'] = aggregation_weight + + # Always mark as TimeSeriesData + self.attrs['__timeseries_data__'] = True @property - def stats(self) -> str: - """ - Return a statistical summary of the active data. - - Returns: - String representation of data statistics - """ - return get_numeric_stats(self.active_data, padd=0) - - def _update_active_data(self): - """ - Update the active data based on active_timesteps. - """ - self._active_data = self._stored_data.sel(time=self.active_timesteps) + def aggregation_group(self) -> Optional[str]: + return self.attrs.get('aggregation_group') @property - def all_equal(self) -> bool: - """Check if all values in the series are equal.""" - return np.unique(self.active_data.values).size == 1 + def aggregation_weight(self) -> Optional[float]: + return self.attrs.get('aggregation_weight') - @property - def active_timesteps(self) -> pd.DatetimeIndex: - """Get the current active timesteps.""" - return self._active_timesteps - - @active_timesteps.setter - def active_timesteps(self, timesteps: Optional[pd.DatetimeIndex]): - """ - Set active_timesteps and refresh active_data. - - Args: - timesteps: New timesteps to activate, or None to use all stored timesteps - - Raises: - TypeError: If timesteps is not a pandas DatetimeIndex or None - """ - if timesteps is None: - self._active_timesteps = self.stored_data.indexes['time'] - elif isinstance(timesteps, pd.DatetimeIndex): - self._active_timesteps = timesteps - else: - raise TypeError('active_timesteps must be a pandas DatetimeIndex or None') - - self._update_active_data() - - @property - def active_data(self) -> xr.DataArray: - """Get a view of stored_data based on active_timesteps.""" - return self._active_data + @classmethod + def from_dataarray(cls, da: xr.DataArray, aggregation_group: Optional[str] = None, aggregation_weight: Optional[float] = None): + """Create TimeSeriesData from DataArray, extracting metadata from attrs.""" + # Get aggregation metadata from attrs or parameters + final_aggregation_group = aggregation_group if aggregation_group is not None else da.attrs.get('aggregation_group') + final_aggregation_weight = aggregation_weight if aggregation_weight is not None else da.attrs.get('aggregation_weight') - @property - def stored_data(self) -> xr.DataArray: - """Get a copy of the full stored data.""" - return self._stored_data.copy() + return cls(da, aggregation_group=final_aggregation_group, aggregation_weight=final_aggregation_weight) - @stored_data.setter - def stored_data(self, value: NumericData): - """ - Update stored_data and refresh active_data. - - Args: - value: New data to store - """ - new_data = DataConverter.as_dataarray(value, timesteps=self.active_timesteps) + @classmethod + def is_timeseries_data(cls, obj) -> bool: + """Check if an object is TimeSeriesData.""" + return isinstance(obj, xr.DataArray) and obj.attrs.get('__timeseries_data__', False) - # Skip if data is unchanged to avoid overwriting backup - if new_data.equals(self._stored_data): - return + def __repr__(self): + agg_info = [] + if self.aggregation_group: + agg_info.append(f"aggregation_group='{self.aggregation_group}'") + if self.aggregation_weight is not None: + agg_info.append(f'aggregation_weight={self.aggregation_weight}') - self._stored_data = new_data - self.active_timesteps = None # Reset to full timeline + info_str = f'TimeSeriesData({", ".join(agg_info)})' if agg_info else 'TimeSeriesData' + return f'{info_str}\n{super().__repr__()}' @property - def sel(self): - return self.active_data.sel + def agg_group(self): + warnings.warn('agg_group is deprecated, use aggregation_group instead', DeprecationWarning, stacklevel=2) + return self._aggregation_group @property - def isel(self): - return self.active_data.isel - - def _apply_operation(self, other, op): - """Apply an operation between this TimeSeries and another object.""" - if isinstance(other, TimeSeries): - other = other.active_data - return op(self.active_data, other) - - def __add__(self, other): - return self._apply_operation(other, lambda x, y: x + y) - - def __sub__(self, other): - return self._apply_operation(other, lambda x, y: x - y) - - def __mul__(self, other): - return self._apply_operation(other, lambda x, y: x * y) - - def __truediv__(self, other): - return self._apply_operation(other, lambda x, y: x / y) - - def __radd__(self, other): - return other + self.active_data - - def __rsub__(self, other): - return other - self.active_data - - def __rmul__(self, other): - return other * self.active_data - - def __rtruediv__(self, other): - return other / self.active_data - - def __neg__(self) -> xr.DataArray: - return -self.active_data - - def __pos__(self) -> xr.DataArray: - return +self.active_data - - def __abs__(self) -> xr.DataArray: - return abs(self.active_data) - - def __gt__(self, other): - """ - Compare if this TimeSeries is greater than another. - - Args: - other: Another TimeSeries to compare with - - Returns: - True if all values in this TimeSeries are greater than other - """ - if isinstance(other, TimeSeries): - return self.active_data > other.active_data - return self.active_data > other - - def __ge__(self, other): - """ - Compare if this TimeSeries is greater than or equal to another. - - Args: - other: Another TimeSeries to compare with - - Returns: - True if all values in this TimeSeries are greater than or equal to other - """ - if isinstance(other, TimeSeries): - return self.active_data >= other.active_data - return self.active_data >= other - - def __lt__(self, other): - """ - Compare if this TimeSeries is less than another. - - Args: - other: Another TimeSeries to compare with - - Returns: - True if all values in this TimeSeries are less than other - """ - if isinstance(other, TimeSeries): - return self.active_data < other.active_data - return self.active_data < other - - def __le__(self, other): - """ - Compare if this TimeSeries is less than or equal to another. - - Args: - other: Another TimeSeries to compare with - - Returns: - True if all values in this TimeSeries are less than or equal to other - """ - if isinstance(other, TimeSeries): - return self.active_data <= other.active_data - return self.active_data <= other - - def __eq__(self, other): - """ - Compare if this TimeSeries is equal to another. - - Args: - other: Another TimeSeries to compare with - - Returns: - True if all values in this TimeSeries are equal to other - """ - if isinstance(other, TimeSeries): - return self.active_data == other.active_data - return self.active_data == other - - def __array_ufunc__(self, ufunc, method, *inputs, **kwargs): - """ - Handle NumPy universal functions. - - This allows NumPy functions to work with TimeSeries objects. - """ - # Convert any TimeSeries inputs to their active_data - inputs = [x.active_data if isinstance(x, TimeSeries) else x for x in inputs] - return getattr(ufunc, method)(*inputs, **kwargs) + def agg_weight(self): + warnings.warn('agg_weight is deprecated, use aggregation_weight instead', DeprecationWarning, stacklevel=2) + return self._aggregation_weight - def __repr__(self): - """ - Get a string representation of the TimeSeries. - Returns: - String showing TimeSeries details - """ - attrs = { - 'name': self.name, - 'aggregation_weight': self.aggregation_weight, - 'aggregation_group': self.aggregation_group, - 'needs_extra_timestep': self.needs_extra_timestep, - 'shape': self.active_data.shape, - 'time_range': f'{self.active_timesteps[0]} to {self.active_timesteps[-1]}', - } - attr_str = ', '.join(f'{k}={repr(v)}' for k, v in attrs.items()) - return f'TimeSeries({attr_str})' - - def __str__(self): - """ - Get a human-readable string representation. - - Returns: - Descriptive string with statistics - """ - return f"TimeSeries '{self.name}': {self.stats}" - - -class TimeSeriesCollection: +class DataConverter: """ - Collection of TimeSeries objects with shared timestep management. + Converts various data types into xarray.DataArray with a timesteps index. - TimeSeriesCollection handles multiple TimeSeries objects with synchronized - timesteps, provides operations on collections, and manages extra timesteps. + Supports: scalars, arrays, Series, DataFrames, DataArrays, and TimeSeriesData. """ - def __init__( - self, - timesteps: pd.DatetimeIndex, - hours_of_last_timestep: Optional[float] = None, - hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] = None, - ): - """ - Args: - timesteps: The timesteps of the Collection. - hours_of_last_timestep: The duration of the last time step. Uses the last time interval if not specified - hours_of_previous_timesteps: The duration of previous timesteps. - If None, the first time increment of time_series is used. - This is needed to calculate previous durations (for example consecutive_on_hours). - If you use an array, take care that its long enough to cover all previous values! - """ - # Prepare and validate timesteps - self._validate_timesteps(timesteps) - self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( - timesteps, hours_of_previous_timesteps - ) - - # Set up timesteps and hours - self.all_timesteps = timesteps - self.all_timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep) - self.all_hours_per_timestep = self.calculate_hours_per_timestep(self.all_timesteps_extra) - - # Active timestep tracking - self._active_timesteps = None - self._active_timesteps_extra = None - self._active_hours_per_timestep = None - - # Dictionary of time series by name - self.time_series_data: Dict[str, TimeSeries] = {} - - # Aggregation - self.group_weights: Dict[str, float] = {} - self.weights: Dict[str, float] = {} - - @classmethod - def with_uniform_timesteps( - cls, start_time: pd.Timestamp, periods: int, freq: str, hours_per_step: Optional[float] = None - ) -> 'TimeSeriesCollection': - """Create a collection with uniform timesteps.""" - timesteps = pd.date_range(start_time, periods=periods, freq=freq, name='time') - return cls(timesteps, hours_of_previous_timesteps=hours_per_step) - - def create_time_series( - self, data: Union[NumericData, TimeSeriesData], name: str, needs_extra_timestep: bool = False - ) -> TimeSeries: + @staticmethod + def _fix_timeseries_data_indexing( + data: TimeSeriesData, timesteps: pd.DatetimeIndex, dims: list, coords: list + ) -> TimeSeriesData: """ - Creates a TimeSeries from the given data and adds it to the collection. + Fix TimeSeriesData indexing issues and return properly indexed TimeSeriesData. Args: - data: The data to create the TimeSeries from. - name: The name of the TimeSeries. - needs_extra_timestep: Whether to create an additional timestep at the end of the timesteps. - The data to create the TimeSeries from. + data: TimeSeriesData that might have indexing issues + timesteps: Target timesteps + dims: Expected dimensions + coords: Expected coordinates Returns: - The created TimeSeries. + TimeSeriesData with correct indexing + Raises: + ConversionError: If data cannot be fixed to match expected indexing """ - # Check for duplicate name - if name in self.time_series_data: - raise ValueError(f"TimeSeries '{name}' already exists in this collection") - - # Determine which timesteps to use - timesteps_to_use = self.timesteps_extra if needs_extra_timestep else self.timesteps - - # Create the time series - if isinstance(data, TimeSeriesData): - time_series = TimeSeries.from_datasource( - name=name, - data=data.data, - timesteps=timesteps_to_use, - aggregation_weight=data.agg_weight, - aggregation_group=data.agg_group, - needs_extra_timestep=needs_extra_timestep, + expected_shape = (len(timesteps),) + + # Check if dimensions match + if data.dims != tuple(dims): + logger.warning( + f'TimeSeriesData has dimensions {data.dims}, expected {dims}. Reshaping to match timesteps. To avoid ' + f'this warning, create a correctly shaped DataArray with the correct dimensions in the first place.' ) - # Connect the user time series to the created TimeSeries - data.label = name - else: - time_series = TimeSeries.from_datasource( - name=name, data=data, timesteps=timesteps_to_use, needs_extra_timestep=needs_extra_timestep + # Try to reshape the data to match expected dimensions + if data.size != len(timesteps): + raise ConversionError( + f'TimeSeriesData has {data.size} elements, cannot reshape to match {len(timesteps)} timesteps' + ) + # Create new DataArray with correct coordinates, preserving metadata + reshaped_data = xr.DataArray( + data.values.reshape(expected_shape), coords=coords, dims=dims, name=data.name, attrs=data.attrs.copy() ) + return TimeSeriesData(reshaped_data) - # Add to the collection - self.add_time_series(time_series) - - return time_series - - def calculate_aggregation_weights(self) -> Dict[str, float]: - """Calculate and return aggregation weights for all time series.""" - self.group_weights = self._calculate_group_weights() - self.weights = self._calculate_weights() - - if np.all(np.isclose(list(self.weights.values()), 1, atol=1e-6)): - logger.info('All Aggregation weights were set to 1') - - return self.weights - - def activate_timesteps(self, active_timesteps: Optional[pd.DatetimeIndex] = None): - """ - Update active timesteps for the collection and all time series. - If no arguments are provided, the active timesteps are reset. - - Args: - active_timesteps: The active timesteps of the model. - If None, the all timesteps of the TimeSeriesCollection are taken. - """ - if active_timesteps is None: - return self.reset() - - if not np.all(np.isin(active_timesteps, self.all_timesteps)): - raise ValueError('active_timesteps must be a subset of the timesteps of the TimeSeriesCollection') - - # Calculate derived timesteps - self._active_timesteps = active_timesteps - first_ts_index = np.where(self.all_timesteps == active_timesteps[0])[0][0] - last_ts_idx = np.where(self.all_timesteps == active_timesteps[-1])[0][0] - self._active_timesteps_extra = self.all_timesteps_extra[first_ts_index : last_ts_idx + 2] - self._active_hours_per_timestep = self.all_hours_per_timestep.isel(time=slice(first_ts_index, last_ts_idx + 1)) - - # Update all time series - self._update_time_series_timesteps() - - def reset(self): - """Reset active timesteps to defaults for all time series.""" - self._active_timesteps = None - self._active_timesteps_extra = None - self._active_hours_per_timestep = None - - for time_series in self.time_series_data.values(): - time_series.reset() - - def restore_data(self): - """Restore original data for all time series.""" - for time_series in self.time_series_data.values(): - time_series.restore_data() - - def add_time_series(self, time_series: TimeSeries): - """Add an existing TimeSeries to the collection.""" - if time_series.name in self.time_series_data: - raise ValueError(f"TimeSeries '{time_series.name}' already exists in this collection") - - self.time_series_data[time_series.name] = time_series - - def insert_new_data(self, data: pd.DataFrame, include_extra_timestep: bool = False): - """ - Update time series with new data from a DataFrame. - - Args: - data: DataFrame containing new data with timestamps as index - include_extra_timestep: Whether the provided data already includes the extra timestep, by default False - """ - if not isinstance(data, pd.DataFrame): - raise TypeError(f'data must be a pandas DataFrame, got {type(data).__name__}') - - # Check if the DataFrame index matches the expected timesteps - expected_timesteps = self.timesteps_extra if include_extra_timestep else self.timesteps - if not data.index.equals(expected_timesteps): - raise ValueError( - f'DataFrame index must match {"collection timesteps with extra timestep" if include_extra_timestep else "collection timesteps"}' + # Check if time coordinate length matches + elif data.sizes[dims[0]] != len(coords[0]): + logger.warning( + f'TimeSeriesData has {data.sizes[dims[0]]} time points, ' + f"expected {len(coords[0])}. Cannot reindex - lengths don't match." + ) + raise ConversionError( + f"TimeSeriesData length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}" ) - for name, ts in self.time_series_data.items(): - if name in data.columns: - if not ts.needs_extra_timestep: - # For time series without extra timestep - if include_extra_timestep: - # If data includes extra timestep but series doesn't need it, exclude the last point - ts.stored_data = data[name].iloc[:-1] - else: - # Use data as is - ts.stored_data = data[name] - else: - # For time series with extra timestep - if include_extra_timestep: - # Data already includes extra timestep - ts.stored_data = data[name] - else: - # Need to add extra timestep - extrapolate from the last value - extra_step_value = data[name].iloc[-1] - extra_step_index = pd.DatetimeIndex([self.timesteps_extra[-1]], name='time') - extra_step_series = pd.Series([extra_step_value], index=extra_step_index) - - # Combine the regular data with the extra timestep - ts.stored_data = pd.concat([data[name], extra_step_series]) - - logger.debug(f'Updated data for {name}') - - def to_dataframe( - self, filtered: Literal['all', 'constant', 'non_constant'] = 'non_constant', include_extra_timestep: bool = True - ) -> pd.DataFrame: - """ - Convert collection to DataFrame with optional filtering and timestep control. - - Args: - filtered: Filter time series by variability, by default 'non_constant' - include_extra_timestep: Whether to include the extra timestep in the result, by default True - - Returns: - DataFrame representation of the collection - """ - include_constants = filtered != 'non_constant' - ds = self.to_dataset(include_constants=include_constants) - - if not include_extra_timestep: - ds = ds.isel(time=slice(None, -1)) - - df = ds.to_dataframe() - - # Apply filtering - if filtered == 'all': - return df - elif filtered == 'constant': - return df.loc[:, df.nunique() == 1] - elif filtered == 'non_constant': - return df.loc[:, df.nunique() > 1] - else: - raise ValueError("filtered must be one of: 'all', 'constant', 'non_constant'") - - def to_dataset(self, include_constants: bool = True) -> xr.Dataset: - """ - Combine all time series into a single Dataset with all timesteps. - - Args: - include_constants: Whether to include time series with constant values, by default True + # Check if time coordinates are identical + elif not data.coords['time'].equals(timesteps): + logger.warning( + 'TimeSeriesData has different time coordinates than expected. Replacing with provided timesteps.' + ) + # Replace time coordinates while preserving data and metadata + recoordinated_data = xr.DataArray( + data.values, coords=coords, dims=dims, name=data.name, attrs=data.attrs.copy() + ) + return TimeSeriesData(recoordinated_data) - Returns: - Dataset containing all selected time series with all timesteps - """ - # Determine which series to include - if include_constants: - series_to_include = self.time_series_data.values() else: - series_to_include = self.non_constants - - # Create individual datasets and merge them - ds = xr.merge([ts.active_data.to_dataset(name=ts.name) for ts in series_to_include]) + # Everything matches - return copy to avoid modifying original + return data.copy(deep=True) - # Ensure the correct time coordinates - ds = ds.reindex(time=self.timesteps_extra) - - ds.attrs.update( - { - 'timesteps_extra': f'{self.timesteps_extra[0]} ... {self.timesteps_extra[-1]} | len={len(self.timesteps_extra)}', - 'hours_per_timestep': self._format_stats(self.hours_per_timestep), - } - ) - - return ds + @staticmethod + def to_dataarray(data: TemporalData, timesteps: pd.DatetimeIndex) -> xr.DataArray: + """Convert data to xarray.DataArray with specified timesteps index.""" + if not isinstance(timesteps, pd.DatetimeIndex) or len(timesteps) == 0: + raise ValueError(f'Timesteps must be a non-empty DatetimeIndex, got {type(timesteps).__name__}') + if not timesteps.name == 'time': + raise ConversionError(f'DatetimeIndex is not named correctly. Must be named "time", got {timesteps.name=}') - def _update_time_series_timesteps(self): - """Update active timesteps for all time series.""" - for ts in self.time_series_data.values(): - if ts.needs_extra_timestep: - ts.active_timesteps = self.timesteps_extra - else: - ts.active_timesteps = self.timesteps + coords = [timesteps] + dims = ['time'] + expected_shape = (len(timesteps),) - @staticmethod - def _validate_timesteps(timesteps: pd.DatetimeIndex): - """Validate timesteps format and rename if needed.""" - if not isinstance(timesteps, pd.DatetimeIndex): - raise TypeError('timesteps must be a pandas DatetimeIndex') + try: + # Handle TimeSeriesData first (before generic DataArray check) + if isinstance(data, TimeSeriesData): + return DataConverter._fix_timeseries_data_indexing(data, timesteps, dims, coords) - if len(timesteps) < 2: - raise ValueError('timesteps must contain at least 2 timestamps') + elif isinstance(data, (int, float, np.integer, np.floating)): + # Scalar: broadcast to all timesteps + scalar_data = np.full(expected_shape, data) + return xr.DataArray(scalar_data, coords=coords, dims=dims) - # Ensure timesteps has the required name - if timesteps.name != 'time': - logger.warning('Renamed timesteps to "time" (was "%s")', timesteps.name) - timesteps.name = 'time' + elif isinstance(data, pd.DataFrame): + if not data.index.equals(timesteps): + raise ConversionError("DataFrame index doesn't match timesteps index") + if not len(data.columns) == 1: + raise ConversionError('DataFrame must have exactly one column') + return xr.DataArray(data.values.flatten(), coords=coords, dims=dims) - @staticmethod - def _create_timesteps_with_extra( - timesteps: pd.DatetimeIndex, hours_of_last_timestep: Optional[float] - ) -> pd.DatetimeIndex: - """Create timesteps with an extra step at the end.""" - if hours_of_last_timestep is not None: - # Create the extra timestep using the specified duration - last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time') - else: - # Use the last interval as the extra timestep duration - last_date = pd.DatetimeIndex([timesteps[-1] + (timesteps[-1] - timesteps[-2])], name='time') + elif isinstance(data, pd.Series): + if not data.index.equals(timesteps): + raise ConversionError("Series index doesn't match timesteps index") + return xr.DataArray(data.values, coords=coords, dims=dims) - # Combine with original timesteps - return pd.DatetimeIndex(timesteps.append(last_date), name='time') + elif isinstance(data, np.ndarray): + if data.ndim != 1: + raise ConversionError(f'Array must be 1-dimensional, got {data.ndim}') + elif data.shape[0] != expected_shape[0]: + raise ConversionError(f"Array shape {data.shape} doesn't match expected {expected_shape}") + return xr.DataArray(data, coords=coords, dims=dims) - @staticmethod - def _calculate_hours_of_previous_timesteps( - timesteps: pd.DatetimeIndex, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] - ) -> Union[float, np.ndarray]: - """Calculate duration of regular timesteps.""" - if hours_of_previous_timesteps is not None: - return hours_of_previous_timesteps + elif isinstance(data, xr.DataArray): + if data.dims != tuple(dims): + raise ConversionError(f"DataArray dimensions {data.dims} don't match expected {dims}") + if data.sizes[dims[0]] != len(coords[0]): + raise ConversionError( + f"DataArray length {data.sizes[dims[0]]} doesn't match expected {len(coords[0])}: {data}" + ) + return data.copy(deep=True) - # Calculate from the first interval - first_interval = timesteps[1] - timesteps[0] - return first_interval.total_seconds() / 3600 # Convert to hours + elif isinstance(data, list): + logger.warning('Converting list to DataArray. This is not recommended.') + if len(data) != expected_shape[0]: + raise ConversionError(f"List length {len(data)} doesn't match expected {expected_shape[0]}") + return xr.DataArray(data, coords=coords, dims=dims) - @staticmethod - def calculate_hours_per_timestep(timesteps_extra: pd.DatetimeIndex) -> xr.DataArray: - """Calculate duration of each timestep.""" - # Calculate differences between consecutive timestamps - hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1) - - return xr.DataArray( - data=hours_per_step, coords={'time': timesteps_extra[:-1]}, dims=('time',), name='hours_per_step' - ) - - def _calculate_group_weights(self) -> Dict[str, float]: - """Calculate weights for aggregation groups.""" - # Count series in each group - groups = [ts.aggregation_group for ts in self.time_series_data.values() if ts.aggregation_group is not None] - group_counts = Counter(groups) - - # Calculate weight for each group (1/count) - return {group: 1 / count for group, count in group_counts.items()} - - def _calculate_weights(self) -> Dict[str, float]: - """Calculate weights for all time series.""" - # Calculate weight for each time series - weights = {} - for name, ts in self.time_series_data.items(): - if ts.aggregation_group is not None: - # Use group weight - weights[name] = self.group_weights.get(ts.aggregation_group, 1) else: - # Use individual weight or default to 1 - weights[name] = ts.aggregation_weight or 1 - - return weights + raise ConversionError(f'Unsupported type: {type(data).__name__}') - def _format_stats(self, data) -> str: - """Format statistics for a data array.""" - if hasattr(data, 'values'): - values = data.values - else: - values = np.asarray(data) + except Exception as e: + if isinstance(e, ConversionError): + raise + raise ConversionError(f'Converting data {type(data)} to xarray.DataArray raised an error: {str(e)}') from e - mean_val = np.mean(values) - min_val = np.min(values) - max_val = np.max(values) - return f'mean: {mean_val:.2f}, min: {min_val:.2f}, max: {max_val:.2f}' +def get_dataarray_stats(arr: xr.DataArray) -> Dict: + """Generate statistical summary of a DataArray.""" + stats = {} - def __getitem__(self, name: str) -> TimeSeries: - """Get a TimeSeries by name.""" + if arr.dtype.kind in 'biufc': # bool, int, uint, float, complex try: - return self.time_series_data[name] - except KeyError as e: - raise KeyError(f'TimeSeries "{name}" not found in the TimeSeriesCollection') from e - - def __iter__(self) -> Iterator[TimeSeries]: - """Iterate through all TimeSeries in the collection.""" - return iter(self.time_series_data.values()) - - def __len__(self) -> int: - """Get the number of TimeSeries in the collection.""" - return len(self.time_series_data) - - def __contains__(self, item: Union[str, TimeSeries]) -> bool: - """Check if a TimeSeries exists in the collection.""" - if isinstance(item, str): - return item in self.time_series_data - elif isinstance(item, TimeSeries): - return any([item is ts for ts in self.time_series_data.values()]) - return False + stats.update( + { + 'min': float(arr.min().values), + 'max': float(arr.max().values), + 'mean': float(arr.mean().values), + 'median': float(arr.median().values), + 'std': float(arr.std().values), + 'count': int(arr.count().values), # non-null count + } + ) - @property - def non_constants(self) -> List[TimeSeries]: - """Get time series with varying values.""" - return [ts for ts in self.time_series_data.values() if not ts.all_equal] + # Add null count only if there are nulls + null_count = int(arr.isnull().sum().values) + if null_count > 0: + stats['nulls'] = null_count - @property - def constants(self) -> List[TimeSeries]: - """Get time series with constant values.""" - return [ts for ts in self.time_series_data.values() if ts.all_equal] + except Exception: + pass - @property - def timesteps(self) -> pd.DatetimeIndex: - """Get the active timesteps.""" - return self.all_timesteps if self._active_timesteps is None else self._active_timesteps + return stats - @property - def timesteps_extra(self) -> pd.DatetimeIndex: - """Get the active timesteps with extra step.""" - return self.all_timesteps_extra if self._active_timesteps_extra is None else self._active_timesteps_extra - @property - def hours_per_timestep(self) -> xr.DataArray: - """Get the duration of each active timestep.""" - return ( - self.all_hours_per_timestep if self._active_hours_per_timestep is None else self._active_hours_per_timestep - ) +def drop_constant_arrays(ds: xr.Dataset, dim='time', drop_arrays_without_dim: bool = True): + """Drop variables with very low variance (near-constant).""" + drop_vars = [] - @property - def hours_of_last_timestep(self) -> float: - """Get the duration of the last timestep.""" - return float(self.hours_per_timestep[-1].item()) + for name, da in ds.data_vars.items(): + if dim in da.dims: + if da.max(dim) == da.min(dim): + drop_vars.append(name) + continue + elif drop_arrays_without_dim: + drop_vars.append(name) - def __repr__(self): - return f'TimeSeriesCollection:\n{self.to_dataset()}' - - def __str__(self): - longest_name = max([time_series.name for time_series in self.time_series_data], key=len) - - stats_summary = '\n'.join( - [ - f' - {time_series.name:<{len(longest_name)}}: {get_numeric_stats(time_series.active_data)}' - for time_series in self.time_series_data - ] - ) - - return ( - f'TimeSeriesCollection with {len(self.time_series_data)} series\n' - f' Time Range: {self.timesteps[0]} → {self.timesteps[-1]}\n' - f' No. of timesteps: {len(self.timesteps)} + 1 extra\n' - f' Hours per timestep: {get_numeric_stats(self.hours_per_timestep)}\n' - f' Time Series Data:\n' - f'{stats_summary}' - ) - - -def get_numeric_stats(data: xr.DataArray, decimals: int = 2, padd: int = 10) -> str: - """Calculates the mean, median, min, max, and standard deviation of a numeric DataArray.""" - format_spec = f'>{padd}.{decimals}f' if padd else f'.{decimals}f' - if np.unique(data).size == 1: - return f'{data.max().item():{format_spec}} (constant)' - mean = data.mean().item() - median = data.median().item() - min_val = data.min().item() - max_val = data.max().item() - std = data.std().item() - return f'{mean:{format_spec}} (mean), {median:{format_spec}} (median), {min_val:{format_spec}} (min), {max_val:{format_spec}} (max), {std:{format_spec}} (std)' + logger.debug(f'Dropping {len(drop_vars)} arrays with constant values') + return ds.drop_vars(drop_vars) diff --git a/flixopt/effects.py b/flixopt/effects.py index 82aa63a43..1d1a5216c 100644 --- a/flixopt/effects.py +++ b/flixopt/effects.py @@ -13,7 +13,7 @@ import numpy as np import pandas as pd -from .core import NumericData, NumericDataTS, Scalar, TimeSeries, TimeSeriesCollection +from .core import Scalar, TemporalData, TemporalDataUser from .features import ShareAllocationModel from .structure import Element, ElementModel, Interface, Model, SystemModel, register_class_for_io @@ -38,14 +38,14 @@ def __init__( meta_data: Optional[Dict] = None, is_standard: bool = False, is_objective: bool = False, - specific_share_to_other_effects_operation: Optional['EffectValuesUser'] = None, - specific_share_to_other_effects_invest: Optional['EffectValuesUser'] = None, + specific_share_to_other_effects_operation: Optional['TemporalEffectsUser'] = None, + specific_share_to_other_effects_invest: Optional['ScalarEffectsUser'] = None, minimum_operation: Optional[Scalar] = None, maximum_operation: Optional[Scalar] = None, minimum_invest: Optional[Scalar] = None, maximum_invest: Optional[Scalar] = None, - minimum_operation_per_hour: Optional[NumericDataTS] = None, - maximum_operation_per_hour: Optional[NumericDataTS] = None, + minimum_operation_per_hour: Optional[TemporalDataUser] = None, + maximum_operation_per_hour: Optional[TemporalDataUser] = None, minimum_total: Optional[Scalar] = None, maximum_total: Optional[Scalar] = None, ): @@ -76,28 +76,28 @@ def __init__( self.description = description self.is_standard = is_standard self.is_objective = is_objective - self.specific_share_to_other_effects_operation: EffectValuesUser = ( + self.specific_share_to_other_effects_operation: TemporalEffectsUser = ( specific_share_to_other_effects_operation or {} ) - self.specific_share_to_other_effects_invest: EffectValuesUser = specific_share_to_other_effects_invest or {} + self.specific_share_to_other_effects_invest: ScalarEffectsUser = specific_share_to_other_effects_invest or {} self.minimum_operation = minimum_operation self.maximum_operation = maximum_operation - self.minimum_operation_per_hour: NumericDataTS = minimum_operation_per_hour - self.maximum_operation_per_hour: NumericDataTS = maximum_operation_per_hour + self.minimum_operation_per_hour: TemporalDataUser = minimum_operation_per_hour + self.maximum_operation_per_hour: TemporalDataUser = maximum_operation_per_hour self.minimum_invest = minimum_invest self.maximum_invest = maximum_invest self.minimum_total = minimum_total self.maximum_total = maximum_total def transform_data(self, flow_system: 'FlowSystem'): - self.minimum_operation_per_hour = flow_system.create_time_series( + self.minimum_operation_per_hour = flow_system.fit_to_model_coords( f'{self.label_full}|minimum_operation_per_hour', self.minimum_operation_per_hour ) - self.maximum_operation_per_hour = flow_system.create_time_series( - f'{self.label_full}|maximum_operation_per_hour', self.maximum_operation_per_hour, flow_system + self.maximum_operation_per_hour = flow_system.fit_to_model_coords( + f'{self.label_full}|maximum_operation_per_hour', self.maximum_operation_per_hour ) - self.specific_share_to_other_effects_operation = flow_system.create_effect_time_series( + self.specific_share_to_other_effects_operation = flow_system.fit_effects_to_model_coords( f'{self.label_full}|operation->', self.specific_share_to_other_effects_operation, 'operation' ) @@ -137,10 +137,10 @@ def __init__(self, model: SystemModel, element: Effect): label_full=f'{self.label_full}(operation)', total_max=self.element.maximum_operation, total_min=self.element.minimum_operation, - min_per_hour=self.element.minimum_operation_per_hour.active_data + min_per_hour=self.element.minimum_operation_per_hour if self.element.minimum_operation_per_hour is not None else None, - max_per_hour=self.element.maximum_operation_per_hour.active_data + max_per_hour=self.element.maximum_operation_per_hour if self.element.maximum_operation_per_hour is not None else None, ) @@ -168,14 +168,19 @@ def do_modeling(self): ) -EffectValuesExpr = Dict[str, linopy.LinearExpression] # Used to create Shares -EffectTimeSeries = Dict[str, TimeSeries] # Used internally to index values -EffectValuesDict = Dict[str, NumericDataTS] # How effect values are stored -EffectValuesUser = Union[NumericDataTS, Dict[str, NumericDataTS]] # User-specified Shares to Effects -""" This datatype is used to define the share to an effect by a certain attribute. """ +TemporalEffectsUser = Union[TemporalDataUser, Dict[str, TemporalDataUser]] # User-specified Shares to Effects +""" This datatype is used to define a temporal share to an effect by a certain attribute. """ -EffectValuesUserScalar = Union[Scalar, Dict[str, Scalar]] # User-specified Shares to Effects -""" This datatype is used to define the share to an effect by a certain attribute. Only scalars are allowed. """ +ScalarEffectsUser = Union[Scalar, Dict[str, Scalar]] # User-specified Shares to Effects +""" This datatype is used to define a scalar share to an effect by a certain attribute. """ + +TemporalEffects = Dict[str, TemporalData] # User-specified Shares to Effects +""" This datatype is used internally to handle temporal shares to an effect. """ + +ScalarEffects = Dict[str, Scalar] +""" This datatype is used internally to handle scalar shares to an effect. """ + +EffectExpr = Dict[str, linopy.LinearExpression] # Used to create Shares class EffectCollection: @@ -207,7 +212,10 @@ def add_effects(self, *effects: Effect) -> None: self._effects[effect.label] = effect logger.info(f'Registered new Effect: {effect.label}') - def create_effect_values_dict(self, effect_values_user: EffectValuesUser) -> Optional[EffectValuesDict]: + def create_effect_values_dict( + self, + effect_values_user: Union[ScalarEffectsUser, TemporalEffectsUser] + ) -> Optional[Dict[str, Union[Scalar, TemporalDataUser]]]: """ Converts effect values into a dictionary. If a scalar is provided, it is associated with a default effect type. @@ -233,6 +241,8 @@ def get_effect_label(eff: Union[Effect, str]) -> str: stacklevel=2, ) return eff.label_full + elif eff is None: + return self.standard_effect.label_full else: return eff @@ -341,7 +351,7 @@ def __init__(self, model: SystemModel, effects: EffectCollection): def add_share_to_effects( self, name: str, - expressions: EffectValuesExpr, + expressions: EffectExpr, target: Literal['operation', 'invest'], ) -> None: for effect, expression in expressions.items(): @@ -376,7 +386,7 @@ def _add_share_between_effects(self): for target_effect, time_series in origin_effect.specific_share_to_other_effects_operation.items(): self.effects[target_effect].model.operation.add_share( origin_effect.model.operation.label_full, - origin_effect.model.operation.total_per_timestep * time_series.active_data, + origin_effect.model.operation.total_per_timestep * time_series, ) # 2. invest: -> hier ist es Scalar (share) for target_effect, factor in origin_effect.specific_share_to_other_effects_invest.items(): diff --git a/flixopt/elements.py b/flixopt/elements.py index a0bd8c91f..a49a12f0d 100644 --- a/flixopt/elements.py +++ b/flixopt/elements.py @@ -10,8 +10,8 @@ import numpy as np from .config import CONFIG -from .core import NumericData, NumericDataTS, PlausibilityError, Scalar, TimeSeriesCollection -from .effects import EffectValuesUser +from .core import PlausibilityError, Scalar, TemporalData, TemporalDataUser +from .effects import TemporalEffectsUser from .features import InvestmentModel, OnOffModel, PreventSimultaneousUsageModel from .interface import InvestParameters, OnOffParameters from .structure import Element, ElementModel, SystemModel, register_class_for_io @@ -72,12 +72,6 @@ def transform_data(self, flow_system: 'FlowSystem') -> None: if self.on_off_parameters is not None: self.on_off_parameters.transform_data(flow_system, self.label_full) - def infos(self, use_numpy=True, use_element_label: bool = False) -> Dict: - infos = super().infos(use_numpy, use_element_label) - infos['inputs'] = [flow.infos(use_numpy, use_element_label) for flow in self.inputs] - infos['outputs'] = [flow.infos(use_numpy, use_element_label) for flow in self.outputs] - return infos - def _check_unique_flow_labels(self): all_flow_labels = [flow.label for flow in self.inputs + self.outputs] @@ -96,7 +90,7 @@ class Bus(Element): """ def __init__( - self, label: str, excess_penalty_per_flow_hour: Optional[NumericDataTS] = 1e5, meta_data: Optional[Dict] = None + self, label: str, excess_penalty_per_flow_hour: Optional[TemporalDataUser] = 1e5, meta_data: Optional[Dict] = None ): """ Args: @@ -117,13 +111,13 @@ def create_model(self, model: SystemModel) -> 'BusModel': return self.model def transform_data(self, flow_system: 'FlowSystem'): - self.excess_penalty_per_flow_hour = flow_system.create_time_series( + self.excess_penalty_per_flow_hour = flow_system.fit_to_model_coords( f'{self.label_full}|excess_penalty_per_flow_hour', self.excess_penalty_per_flow_hour ) def _plausibility_checks(self) -> None: if self.excess_penalty_per_flow_hour is not None and (self.excess_penalty_per_flow_hour == 0).all(): - logger.warning(f'In Bus {self.label}, the excess_penalty_per_flow_hour is 0. Use "None" or a value > 0.') + logger.warning(f'In Bus {self.label_full}, the excess_penalty_per_flow_hour is 0. Use "None" or a value > 0.') @property def with_excess(self) -> bool: @@ -155,16 +149,16 @@ def __init__( label: str, bus: str, size: Union[Scalar, InvestParameters] = None, - fixed_relative_profile: Optional[NumericDataTS] = None, - relative_minimum: NumericDataTS = 0, - relative_maximum: NumericDataTS = 1, - effects_per_flow_hour: Optional[EffectValuesUser] = None, + fixed_relative_profile: Optional[TemporalDataUser] = None, + relative_minimum: TemporalDataUser = 0, + relative_maximum: TemporalDataUser = 1, + effects_per_flow_hour: Optional[TemporalEffectsUser] = None, on_off_parameters: Optional[OnOffParameters] = None, flow_hours_total_max: Optional[Scalar] = None, flow_hours_total_min: Optional[Scalar] = None, load_factor_min: Optional[Scalar] = None, load_factor_max: Optional[Scalar] = None, - previous_flow_rate: Optional[NumericData] = None, + previous_flow_rate: Optional[TemporalDataUser] = None, meta_data: Optional[Dict] = None, ): r""" @@ -236,16 +230,16 @@ def create_model(self, model: SystemModel) -> 'FlowModel': return self.model def transform_data(self, flow_system: 'FlowSystem'): - self.relative_minimum = flow_system.create_time_series( + self.relative_minimum = flow_system.fit_to_model_coords( f'{self.label_full}|relative_minimum', self.relative_minimum ) - self.relative_maximum = flow_system.create_time_series( + self.relative_maximum = flow_system.fit_to_model_coords( f'{self.label_full}|relative_maximum', self.relative_maximum ) - self.fixed_relative_profile = flow_system.create_time_series( + self.fixed_relative_profile = flow_system.fit_to_model_coords( f'{self.label_full}|fixed_relative_profile', self.fixed_relative_profile ) - self.effects_per_flow_hour = flow_system.create_effect_time_series( + self.effects_per_flow_hour = flow_system.fit_effects_to_model_coords( self.label_full, self.effects_per_flow_hour, 'per_flow_hour' ) if self.on_off_parameters is not None: @@ -253,17 +247,6 @@ def transform_data(self, flow_system: 'FlowSystem'): if isinstance(self.size, InvestParameters): self.size.transform_data(flow_system) - def infos(self, use_numpy: bool = True, use_element_label: bool = False) -> Dict: - infos = super().infos(use_numpy, use_element_label) - infos['is_input_in_component'] = self.is_input_in_component - return infos - - def to_dict(self) -> Dict: - data = super().to_dict() - if isinstance(data.get('previous_flow_rate'), np.ndarray): - data['previous_flow_rate'] = data['previous_flow_rate'].tolist() - return data - def _plausibility_checks(self) -> None: # TODO: Incorporate into Variable? (Lower_bound can not be greater than upper bound if np.any(self.relative_minimum > self.relative_maximum): @@ -273,21 +256,21 @@ def _plausibility_checks(self) -> None: self.size == CONFIG.modeling.BIG and self.fixed_relative_profile is not None ): # Default Size --> Most likely by accident logger.warning( - f'Flow "{self.label}" has no size assigned, but a "fixed_relative_profile". ' + f'Flow "{self.label_full}" has no size assigned, but a "fixed_relative_profile". ' f'The default size is {CONFIG.modeling.BIG}. As "flow_rate = size * fixed_relative_profile", ' f'the resulting flow_rate will be very high. To fix this, assign a size to the Flow {self}.' ) if self.fixed_relative_profile is not None and self.on_off_parameters is not None: raise ValueError( - f'Flow {self.label} has both a fixed_relative_profile and an on_off_parameters. This is not supported. ' + f'Flow {self.label_full} has both a fixed_relative_profile and an on_off_parameters. This is not supported. ' f'Use relative_minimum and relative_maximum instead, ' f'if you want to allow flows to be switched on and off.' ) if (self.relative_minimum > 0).any() and self.on_off_parameters is None: logger.warning( - f'Flow {self.label} has a relative_minimum of {self.relative_minimum.active_data} and no on_off_parameters. ' + f'Flow {self.label_full} has a relative_minimum of {self.relative_minimum} and no on_off_parameters. ' f'This prevents the flow_rate from switching off (flow_rate = 0). ' f'Consider using on_off_parameters to allow the flow to be switched on and off.' ) @@ -390,7 +373,7 @@ def _create_shares(self): self._model.effects.add_share_to_effects( name=self.label_full, # Use the full label of the element expressions={ - effect: self.flow_rate * self._model.hours_per_step * factor.active_data + effect: self.flow_rate * self._model.hours_per_step * factor for effect, factor in self.element.effects_per_flow_hour.items() }, target='operation', @@ -428,7 +411,7 @@ def _create_bounds_for_load_factor(self): ) @property - def flow_rate_bounds_on(self) -> Tuple[NumericData, NumericData]: + def flow_rate_bounds_on(self) -> Tuple[TemporalData, TemporalData]: """Returns absolute flow rate bounds. Important for OnOffModel""" relative_minimum, relative_maximum = self.flow_rate_lower_bound_relative, self.flow_rate_upper_bound_relative size = self.element.size @@ -439,23 +422,23 @@ def flow_rate_bounds_on(self) -> Tuple[NumericData, NumericData]: return relative_minimum * size.minimum_size, relative_maximum * size.maximum_size @property - def flow_rate_lower_bound_relative(self) -> NumericData: + def flow_rate_lower_bound_relative(self) -> TemporalData: """Returns the lower bound of the flow_rate relative to its size""" fixed_profile = self.element.fixed_relative_profile if fixed_profile is None: - return self.element.relative_minimum.active_data - return fixed_profile.active_data + return self.element.relative_minimum + return fixed_profile @property - def flow_rate_upper_bound_relative(self) -> NumericData: + def flow_rate_upper_bound_relative(self) -> TemporalData: """ Returns the upper bound of the flow_rate relative to its size""" fixed_profile = self.element.fixed_relative_profile if fixed_profile is None: - return self.element.relative_maximum.active_data - return fixed_profile.active_data + return self.element.relative_maximum + return fixed_profile @property - def flow_rate_lower_bound(self) -> NumericData: + def flow_rate_lower_bound(self) -> TemporalData: """ Returns the minimum bound the flow_rate can reach. Further constraining might be done in OnOffModel and InvestmentModel @@ -469,7 +452,7 @@ def flow_rate_lower_bound(self) -> NumericData: return self.flow_rate_lower_bound_relative * self.element.size @property - def flow_rate_upper_bound(self) -> NumericData: + def flow_rate_upper_bound(self) -> TemporalData: """ Returns the maximum bound the flow_rate can reach. Further constraining might be done in OnOffModel and InvestmentModel @@ -497,7 +480,7 @@ def do_modeling(self) -> None: # Fehlerplus/-minus: if self.element.with_excess: excess_penalty = np.multiply( - self._model.hours_per_step, self.element.excess_penalty_per_flow_hour.active_data + self._model.hours_per_step, self.element.excess_penalty_per_flow_hour ) self.excess_input = self.add( self._model.add_variables(lower=0, coords=self._model.coords, name=f'{self.label_full}|excess_input'), diff --git a/flixopt/features.py b/flixopt/features.py index c2a62adb1..287f4e933 100644 --- a/flixopt/features.py +++ b/flixopt/features.py @@ -11,7 +11,7 @@ from . import utils from .config import CONFIG -from .core import NumericData, Scalar, TimeSeries +from .core import Scalar, TemporalData from .interface import InvestParameters, OnOffParameters, Piecewise from .structure import Model, SystemModel @@ -27,7 +27,7 @@ def __init__( label_of_element: str, parameters: InvestParameters, defining_variable: [linopy.Variable], - relative_bounds_of_defining_variable: Tuple[NumericData, NumericData], + relative_bounds_of_defining_variable: Tuple[TemporalData, TemporalData], label: Optional[str] = None, on_variable: Optional[linopy.Variable] = None, ): @@ -203,12 +203,12 @@ def __init__( model: SystemModel, label_of_element: str, defining_variables: List[linopy.Variable], - defining_bounds: List[Tuple[NumericData, NumericData]], - previous_values: List[Optional[NumericData]] = None, + defining_bounds: List[Tuple[TemporalData, TemporalData]], + previous_values: List[Optional[TemporalData]] = None, use_off: bool = True, - on_hours_total_min: Optional[NumericData] = 0, - on_hours_total_max: Optional[NumericData] = None, - effects_per_running_hour: Dict[str, NumericData] = None, + on_hours_total_min: Optional[TemporalData] = 0, + on_hours_total_max: Optional[TemporalData] = None, + effects_per_running_hour: Dict[str, TemporalData] = None, label: Optional[str] = None, ): """ @@ -344,7 +344,7 @@ def previous_off_states(self): return 1 - self.previous_states @staticmethod - def compute_previous_states(previous_values: List[NumericData], epsilon: float = 1e-5) -> np.ndarray: + def compute_previous_states(previous_values: List[TemporalData], epsilon: float = 1e-5) -> np.ndarray: """Computes the previous states {0, 1} of defining variables as a binary array from their previous values.""" if not previous_values or all([val is None for val in previous_values]): return np.array([0]) @@ -451,9 +451,9 @@ def __init__( model: SystemModel, label_of_element: str, state_variable: linopy.Variable, - minimum_duration: Optional[NumericData] = None, - maximum_duration: Optional[NumericData] = None, - previous_states: Optional[NumericData] = None, + minimum_duration: Optional[TemporalData] = None, + maximum_duration: Optional[TemporalData] = None, + previous_states: Optional[TemporalData] = None, label: Optional[str] = None, ): """ @@ -474,11 +474,6 @@ def __init__( self._minimum_duration = minimum_duration self._maximum_duration = maximum_duration - if isinstance(self._minimum_duration, TimeSeries): - self._minimum_duration = self._minimum_duration.active_data - if isinstance(self._maximum_duration, TimeSeries): - self._maximum_duration = self._maximum_duration.active_data - self.duration = None def do_modeling(self): @@ -575,7 +570,7 @@ def previous_duration(self) -> Scalar: @staticmethod def compute_consecutive_hours_in_state( - binary_values: NumericData, hours_per_timestep: Union[int, float, np.ndarray] + binary_values: TemporalData, hours_per_timestep: Union[int, float, np.ndarray] ) -> Scalar: """ Computes the final consecutive duration in state 'on' (=1) in hours, from a binary array. @@ -634,8 +629,8 @@ def __init__( on_off_parameters: OnOffParameters, label_of_element: str, defining_variables: List[linopy.Variable], - defining_bounds: List[Tuple[NumericData, NumericData]], - previous_values: List[Optional[NumericData]], + defining_bounds: List[Tuple[TemporalData, TemporalData]], + previous_values: List[Optional[TemporalData]], label: Optional[str] = None, ): """ @@ -923,8 +918,8 @@ def __init__( label_full: Optional[str] = None, total_max: Optional[Scalar] = None, total_min: Optional[Scalar] = None, - max_per_hour: Optional[NumericData] = None, - min_per_hour: Optional[NumericData] = None, + max_per_hour: Optional[TemporalData] = None, + min_per_hour: Optional[TemporalData] = None, ): super().__init__(model, label_of_element=label_of_element, label=label, label_full=label_full) if not shares_are_time_series: # If the condition is True diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index 93720de60..306872674 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -7,7 +7,7 @@ import pathlib import warnings from io import StringIO -from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Tuple, Union +from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple, Union import numpy as np import pandas as pd @@ -16,10 +16,10 @@ from rich.pretty import Pretty from . import io as fx_io -from .core import NumericData, NumericDataTS, TimeSeries, TimeSeriesCollection, TimeSeriesData -from .effects import Effect, EffectCollection, EffectTimeSeries, EffectValuesDict, EffectValuesUser +from .core import ConversionError, DataConverter, TemporalData, TemporalDataUser, TimeSeriesData +from .effects import Effect, EffectCollection, ScalarEffects, ScalarEffectsUser, TemporalEffects, TemporalEffectsUser from .elements import Bus, Component, Flow -from .structure import CLASS_REGISTRY, Element, SystemModel, get_compact_representation, get_str_representation +from .structure import Element, Interface, SystemModel if TYPE_CHECKING: import pyvis @@ -27,9 +27,20 @@ logger = logging.getLogger('flixopt') -class FlowSystem: +class FlowSystem(Interface): """ - A FlowSystem organizes the high level Elements (Components & Effects). + FlowSystem serves as the main container for energy system modeling, organizing + high-level elements including Components (like boilers, heat pumps, storages), + Buses (connection points), and Effects (system-wide influences). It handles + time series data management, network connectivity, and provides serialization + capabilities for saving and loading complete system configurations. + + The system uses xarray.Dataset for efficient time series data handling. It can be exported and restored to NETCDF. + + See Also: + Component: Base class for system components like boilers, heat pumps. + Bus: Connection points for flows between components. + Effect: System-wide effects, like the optimization objective. """ def __init__( @@ -47,74 +58,273 @@ def __init__( This is needed to calculate previous durations (for example consecutive_on_hours). If you use an array, take care that its long enough to cover all previous values! """ - self.time_series_collection = TimeSeriesCollection( - timesteps=timesteps, - hours_of_last_timestep=hours_of_last_timestep, - hours_of_previous_timesteps=hours_of_previous_timesteps, + # Store timing information directly + self.timesteps = self._validate_timesteps(timesteps) + self.timesteps_extra = self._create_timesteps_with_extra(timesteps, hours_of_last_timestep) + self.hours_per_timestep = self.calculate_hours_per_timestep(self.timesteps_extra) + self.hours_of_previous_timesteps = self._calculate_hours_of_previous_timesteps( + timesteps, hours_of_previous_timesteps ) - # defaults: + # Element collections self.components: Dict[str, Component] = {} self.buses: Dict[str, Bus] = {} self.effects: EffectCollection = EffectCollection() self.model: Optional[SystemModel] = None - self._connected = False + self._connected_and_transformed = False + self._used_in_calculation = False + + @staticmethod + def _validate_timesteps(timesteps: pd.DatetimeIndex) -> pd.DatetimeIndex: + """Validate timesteps format and rename if needed.""" + if not isinstance(timesteps, pd.DatetimeIndex): + raise TypeError('timesteps must be a pandas DatetimeIndex') + if len(timesteps) < 2: + raise ValueError('timesteps must contain at least 2 timestamps') + if timesteps.name != 'time': + timesteps.name = 'time' + if not timesteps.is_monotonic_increasing: + raise ValueError('timesteps must be sorted') + return timesteps + + @staticmethod + def _create_timesteps_with_extra( + timesteps: pd.DatetimeIndex, hours_of_last_timestep: Optional[float] + ) -> pd.DatetimeIndex: + """Create timesteps with an extra step at the end.""" + if hours_of_last_timestep is None: + hours_of_last_timestep = (timesteps[-1] - timesteps[-2]) / pd.Timedelta(hours=1) + + last_date = pd.DatetimeIndex([timesteps[-1] + pd.Timedelta(hours=hours_of_last_timestep)], name='time') + return pd.DatetimeIndex(timesteps.append(last_date), name='time') + + @staticmethod + def calculate_hours_per_timestep(timesteps_extra: pd.DatetimeIndex) -> xr.DataArray: + """Calculate duration of each timestep.""" + hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1) + return xr.DataArray( + hours_per_step, coords={'time': timesteps_extra[:-1]}, dims=['time'], name='hours_per_timestep' + ) + + @staticmethod + def _calculate_hours_of_previous_timesteps( + timesteps: pd.DatetimeIndex, hours_of_previous_timesteps: Optional[Union[float, np.ndarray]] + ) -> Union[float, np.ndarray]: + """Calculate duration of regular timesteps.""" + if hours_of_previous_timesteps is not None: + return hours_of_previous_timesteps + # Calculate from the first interval + first_interval = timesteps[1] - timesteps[0] + return first_interval.total_seconds() / 3600 # Convert to hours + + def _create_reference_structure(self) -> Tuple[Dict, Dict[str, xr.DataArray]]: + """ + Override Interface method to handle FlowSystem-specific serialization. + Combines custom FlowSystem logic with Interface pattern for nested objects. + + Returns: + Tuple of (reference_structure, extracted_arrays_dict) + """ + # Start with Interface base functionality for constructor parameters + reference_structure, all_extracted_arrays = super()._create_reference_structure() + + # Remove timesteps, as it's directly stored in dataset index + reference_structure.pop('timesteps', None) + + # Extract from components + components_structure = {} + for comp_label, component in self.components.items(): + comp_structure, comp_arrays = component._create_reference_structure() + all_extracted_arrays.update(comp_arrays) + components_structure[comp_label] = comp_structure + reference_structure['components'] = components_structure + + # Extract from buses + buses_structure = {} + for bus_label, bus in self.buses.items(): + bus_structure, bus_arrays = bus._create_reference_structure() + all_extracted_arrays.update(bus_arrays) + buses_structure[bus_label] = bus_structure + reference_structure['buses'] = buses_structure + + # Extract from effects + effects_structure = {} + for effect in self.effects: + effect_structure, effect_arrays = effect._create_reference_structure() + all_extracted_arrays.update(effect_arrays) + effects_structure[effect.label] = effect_structure + reference_structure['effects'] = effects_structure + + return reference_structure, all_extracted_arrays + + def to_dataset(self) -> xr.Dataset: + """ + Convert the FlowSystem to an xarray Dataset. + Ensures FlowSystem is connected before serialization. + + Returns: + xr.Dataset: Dataset containing all DataArrays with structure in attributes + """ + if not self._connected_and_transformed: + logger.warning('FlowSystem is not connected_and_transformed. Connecting and transforming data now.') + self.connect_and_transform() + + return super().to_dataset() @classmethod - def from_dataset(cls, ds: xr.Dataset): - timesteps_extra = pd.DatetimeIndex(ds.attrs['timesteps_extra'], name='time') - hours_of_last_timestep = TimeSeriesCollection.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() - - flow_system = FlowSystem( - timesteps=timesteps_extra[:-1], - hours_of_last_timestep=hours_of_last_timestep, - hours_of_previous_timesteps=ds.attrs['hours_of_previous_timesteps'], - ) + def from_dataset(cls, ds: xr.Dataset) -> 'FlowSystem': + """ + Create a FlowSystem from an xarray Dataset. + Handles FlowSystem-specific reconstruction logic. - structure = fx_io.insert_dataarray({key: ds.attrs[key] for key in ['components', 'buses', 'effects']}, ds) - flow_system.add_elements( - *[Bus.from_dict(bus) for bus in structure['buses'].values()] - + [Effect.from_dict(effect) for effect in structure['effects'].values()] - + [CLASS_REGISTRY[comp['__class__']].from_dict(comp) for comp in structure['components'].values()] + Args: + ds: Dataset containing the FlowSystem data + + Returns: + FlowSystem instance + """ + # Get the reference structure from attrs + reference_structure = dict(ds.attrs) + + # Create FlowSystem instance with constructor parameters + flow_system = cls( + timesteps=ds.indexes['time'], + hours_of_last_timestep=reference_structure.get('hours_of_last_timestep'), + hours_of_previous_timesteps=reference_structure.get('hours_of_previous_timesteps'), ) + + # Create arrays dictionary from dataset variables + arrays_dict = {name: array for name, array in ds.data_vars.items()} + + # Restore components + components_structure = reference_structure.get('components', {}) + for comp_label, comp_data in components_structure.items(): + component = cls._resolve_reference_structure(comp_data, arrays_dict) + if not isinstance(component, Component): + logger.critical(f'Restoring component {comp_label} failed.') + flow_system._add_components(component) + + # Restore buses + buses_structure = reference_structure.get('buses', {}) + for bus_label, bus_data in buses_structure.items(): + bus = cls._resolve_reference_structure(bus_data, arrays_dict) + if not isinstance(bus, Bus): + logger.critical(f'Restoring bus {bus_label} failed.') + flow_system._add_buses(bus) + + # Restore effects + effects_structure = reference_structure.get('effects', {}) + for effect_label, effect_data in effects_structure.items(): + effect = cls._resolve_reference_structure(effect_data, arrays_dict) + if not isinstance(effect, Effect): + logger.critical(f'Restoring effect {effect_label} failed.') + flow_system._add_effects(effect) + return flow_system - @classmethod - def from_dict(cls, data: Dict) -> 'FlowSystem': + def to_netcdf(self, path: Union[str, pathlib.Path], compression: int = 0): """ - Load a FlowSystem from a dictionary. + Save the FlowSystem to a NetCDF file. + Ensures FlowSystem is connected before saving. Args: - data: Dictionary containing the FlowSystem data. + path: The path to the netCDF file. + compression: The compression level to use when saving the file. """ - timesteps_extra = pd.DatetimeIndex(data['timesteps_extra'], name='time') - hours_of_last_timestep = TimeSeriesCollection.calculate_hours_per_timestep(timesteps_extra).isel(time=-1).item() + if not self._connected_and_transformed: + logger.warning('FlowSystem is not connected. Calling connect_and_transform() now.') + self.connect_and_transform() - flow_system = FlowSystem( - timesteps=timesteps_extra[:-1], - hours_of_last_timestep=hours_of_last_timestep, - hours_of_previous_timesteps=data['hours_of_previous_timesteps'], - ) + super().to_netcdf(path, compression) + logger.info(f'Saved FlowSystem to {path}') - flow_system.add_elements(*[Bus.from_dict(bus) for bus in data['buses'].values()]) + def get_structure(self, clean: bool = False, stats: bool = False) -> Dict: + """ + Get FlowSystem structure. + Ensures FlowSystem is connected before getting structure. - flow_system.add_elements(*[Effect.from_dict(effect) for effect in data['effects'].values()]) + Args: + clean: If True, remove None and empty dicts and lists. + stats: If True, replace DataArray references with statistics + """ + if not self._connected_and_transformed: + logger.warning('FlowSystem is not connected. Calling connect_and_transform() now.') + self.connect_and_transform() - flow_system.add_elements( - *[CLASS_REGISTRY[comp['__class__']].from_dict(comp) for comp in data['components'].values()] - ) + return super().get_structure(clean, stats) - flow_system.transform_data() + def to_json(self, path: Union[str, pathlib.Path]): + """ + Save the flow system to a JSON file. + Ensures FlowSystem is connected before saving. - return flow_system + Args: + path: The path to the JSON file. + """ + if not self._connected_and_transformed: + logger.warning('FlowSystem needs to be connected and transformed before saving to JSON. Calling connect_and_transform() now.') + self.connect_and_transform() - @classmethod - def from_netcdf(cls, path: Union[str, pathlib.Path]): + super().to_json(path) + + def fit_to_model_coords( + self, + name: str, + data: Optional[TemporalDataUser], + ) -> Optional[TemporalData]: + """ + Fit data to model coordinate system (currently time, but extensible). + + Args: + name: Name of the data + data: Data to fit to model coordinates + + Returns: + xr.DataArray aligned to model coordinate system """ - Load a FlowSystem from a netcdf file + if data is None: + return None + + if isinstance(data, TimeSeriesData): + try: + data.name = name # Set name of previous object! + return TimeSeriesData( + DataConverter.to_dataarray(data, timesteps=self.timesteps), + aggregation_group=data.aggregation_group, aggregation_weight=data.aggregation_weight + ).rename(name) + except ConversionError as e: + logger.critical(f'Could not convert time series data "{name}" to DataArray: {e}. \n' + f'Take care to use the correct (time) index.') + else: + return DataConverter.to_dataarray(data, timesteps=self.timesteps).rename(name) + + def fit_effects_to_model_coords( + self, + label_prefix: Optional[str], + effect_values: Optional[TemporalEffectsUser], + label_suffix: Optional[str] = None, + ) -> Optional[TemporalEffects]: + """ + Transform EffectValues from the user to Internal Datatypes aligned with model coordinates. """ - return cls.from_dataset(fx_io.load_dataset_from_netcdf(path)) + if effect_values is None: + return None + + effect_values_dict = self.effects.create_effect_values_dict(effect_values) + + return { + effect: self.fit_to_model_coords('|'.join(filter(None, [label_prefix, effect, label_suffix])), value) + for effect, value in effect_values_dict.items() + } + + def connect_and_transform(self): + """Transform data for all elements using the new simplified approach.""" + if not self._connected_and_transformed: + self._connect_network() + for element in self.all_elements.values(): + element.transform_data(self) + self._connected_and_transformed = True def add_elements(self, *elements: Element) -> None: """ @@ -124,12 +334,12 @@ def add_elements(self, *elements: Element) -> None: *elements: childs of Element like Boiler, HeatPump, Bus,... modeling Elements """ - if self._connected: + if self._connected_and_transformed: warnings.warn( 'You are adding elements to an already connected FlowSystem. This is not recommended (But it works).', stacklevel=2, ) - self._connected = False + self._connected_and_transformed = False for new_element in list(elements): if isinstance(new_element, Component): self._add_components(new_element) @@ -142,63 +352,11 @@ def add_elements(self, *elements: Element) -> None: f'Tried to add incompatible object to FlowSystem: {type(new_element)=}: {new_element=} ' ) - def to_json(self, path: Union[str, pathlib.Path]): - """ - Saves the flow system to a json file. - This not meant to be reloaded and recreate the object, - but rather used to document or compare the flow_system to others. - - Args: - path: The path to the json file. - """ - with open(path, 'w', encoding='utf-8') as f: - json.dump(self.as_dict('stats'), f, indent=4, ensure_ascii=False) - - def as_dict(self, data_mode: Literal['data', 'name', 'stats'] = 'data') -> Dict: - """Convert the object to a dictionary representation.""" - data = { - 'components': { - comp.label: comp.to_dict() - for comp in sorted(self.components.values(), key=lambda component: component.label.upper()) - }, - 'buses': { - bus.label: bus.to_dict() for bus in sorted(self.buses.values(), key=lambda bus: bus.label.upper()) - }, - 'effects': { - effect.label: effect.to_dict() - for effect in sorted(self.effects, key=lambda effect: effect.label.upper()) - }, - 'timesteps_extra': [date.isoformat() for date in self.time_series_collection.timesteps_extra], - 'hours_of_previous_timesteps': self.time_series_collection.hours_of_previous_timesteps, - } - if data_mode == 'data': - return fx_io.replace_timeseries(data, 'data') - elif data_mode == 'stats': - return fx_io.remove_none_and_empty(fx_io.replace_timeseries(data, data_mode)) - return fx_io.replace_timeseries(data, data_mode) - - def as_dataset(self, constants_in_dataset: bool = False) -> xr.Dataset: - """ - Convert the FlowSystem to a xarray Dataset. - - Args: - constants_in_dataset: If True, constants are included as Dataset variables. - """ - ds = self.time_series_collection.to_dataset(include_constants=constants_in_dataset) - ds.attrs = self.as_dict(data_mode='name') - return ds - - def to_netcdf(self, path: Union[str, pathlib.Path], compression: int = 0, constants_in_dataset: bool = True): - """ - Saves the FlowSystem to a netCDF file. - Args: - path: The path to the netCDF file. - compression: The compression level to use when saving the file. - constants_in_dataset: If True, constants are included as Dataset variables. - """ - ds = self.as_dataset(constants_in_dataset=constants_in_dataset) - fx_io.save_dataset_to_netcdf(ds, path, compression=compression) - logger.info(f'Saved FlowSystem to {path}') + def create_model(self) -> SystemModel: + if not self._connected_and_transformed: + raise RuntimeError('FlowSystem is not connected_and_transformed. Call FlowSystem.connect_and_transform() first.') + self.model = SystemModel(self) + return self.model def plot_network( self, @@ -213,28 +371,6 @@ def plot_network( ) -> Optional['pyvis.network.Network']: """ Visualizes the network structure of a FlowSystem using PyVis, saving it as an interactive HTML file. - - Args: - path: Path to save the HTML visualization. - - `False`: Visualization is created but not saved. - - `str` or `Path`: Specifies file path (default: 'flow_system.html'). - controls: UI controls to add to the visualization. - - `True`: Enables all available controls. - - `List`: Specify controls, e.g., ['nodes', 'layout']. - - Options: 'nodes', 'edges', 'layout', 'interaction', 'manipulation', 'physics', 'selection', 'renderer'. - show: Whether to open the visualization in the web browser. - - Returns: - - Optional[pyvis.network.Network]: The `Network` instance representing the visualization, or `None` if `pyvis` is not installed. - - Examples: - >>> flow_system.plot_network() - >>> flow_system.plot_network(show=False) - >>> flow_system.plot_network(path='output/custom_network.html', controls=['nodes', 'layout']) - - Notes: - - This function requires `pyvis`. If not installed, the function prints a warning and returns `None`. - - Nodes are styled based on type (e.g., circles for buses, boxes for components) and annotated with node information. """ from . import plotting @@ -242,8 +378,8 @@ def plot_network( return plotting.plot_network(node_infos, edge_infos, path, controls, show) def network_infos(self) -> Tuple[Dict[str, Dict[str, str]], Dict[str, Dict[str, str]]]: - if not self._connected: - self._connect_network() + if not self._connected_and_transformed: + self.connect_and_transform() nodes = { node.label_full: { 'label': node.label, @@ -265,67 +401,6 @@ def network_infos(self) -> Tuple[Dict[str, Dict[str, str]], Dict[str, Dict[str, return nodes, edges - def transform_data(self): - if not self._connected: - self._connect_network() - for element in self.all_elements.values(): - element.transform_data(self) - - def create_time_series( - self, - name: str, - data: Optional[Union[NumericData, TimeSeriesData, TimeSeries]], - needs_extra_timestep: bool = False, - ) -> Optional[TimeSeries]: - """ - Tries to create a TimeSeries from NumericData Data and adds it to the time_series_collection - If the data already is a TimeSeries, nothing happens and the TimeSeries gets reset and returned - If the data is a TimeSeriesData, it is converted to a TimeSeries, and the aggregation weights are applied. - If the data is None, nothing happens. - """ - - if data is None: - return None - elif isinstance(data, TimeSeries): - data.restore_data() - if data in self.time_series_collection: - return data - return self.time_series_collection.create_time_series( - data=data.active_data, name=name, needs_extra_timestep=needs_extra_timestep - ) - return self.time_series_collection.create_time_series( - data=data, name=name, needs_extra_timestep=needs_extra_timestep - ) - - def create_effect_time_series( - self, - label_prefix: Optional[str], - effect_values: EffectValuesUser, - label_suffix: Optional[str] = None, - ) -> Optional[EffectTimeSeries]: - """ - Transform EffectValues to EffectTimeSeries. - Creates a TimeSeries for each key in the nested_values dictionary, using the value as the data. - - The resulting label of the TimeSeries is the label of the parent_element, - followed by the label of the Effect in the nested_values and the label_suffix. - If the key in the EffectValues is None, the alias 'Standard_Effect' is used - """ - effect_values: Optional[EffectValuesDict] = self.effects.create_effect_values_dict(effect_values) - if effect_values is None: - return None - - return { - effect: self.create_time_series('|'.join(filter(None, [label_prefix, effect, label_suffix])), value) - for effect, value in effect_values.items() - } - - def create_model(self) -> SystemModel: - if not self._connected: - raise RuntimeError('FlowSystem is not connected. Call FlowSystem.connect() first.') - self.model = SystemModel(self) - return self.model - def _check_if_element_is_unique(self, element: Element) -> None: """ checks if element or label of element already exists in list @@ -334,25 +409,25 @@ def _check_if_element_is_unique(self, element: Element) -> None: element: new element to check """ if element in self.all_elements.values(): - raise ValueError(f'Element {element.label} already added to FlowSystem!') + raise ValueError(f'Element {element.label_full} already added to FlowSystem!') # check if name is already used: if element.label_full in self.all_elements: - raise ValueError(f'Label of Element {element.label} already used in another element!') + raise ValueError(f'Label of Element {element.label_full} already used in another element!') def _add_effects(self, *args: Effect) -> None: self.effects.add_effects(*args) def _add_components(self, *components: Component) -> None: for new_component in list(components): - logger.info(f'Registered new Component: {new_component.label}') + logger.info(f'Registered new Component: {new_component.label_full}') self._check_if_element_is_unique(new_component) # check if already exists: - self.components[new_component.label] = new_component # Add to existing components + self.components[new_component.label_full] = new_component # Add to existing components def _add_buses(self, *buses: Bus): for new_bus in list(buses): - logger.info(f'Registered new Bus: {new_bus.label}') + logger.info(f'Registered new Bus: {new_bus.label_full}') self._check_if_element_is_unique(new_bus) # check if already exists: - self.buses[new_bus.label] = new_bus # Add to existing components + self.buses[new_bus.label_full] = new_bus # Add to existing components def _connect_network(self): """Connects the network of components and buses. Can be rerun without changes if no elements were added""" @@ -365,7 +440,7 @@ def _connect_network(self): if flow._bus_object is not None and flow._bus_object not in self.buses.values(): self._add_buses(flow._bus_object) warnings.warn( - f'The Bus {flow._bus_object.label} was added to the FlowSystem from {flow.label_full}.' + f'The Bus {flow._bus_object.label_full} was added to the FlowSystem from {flow.label_full}.' f'This is deprecated and will be removed in the future. ' f'Please pass the Bus.label to the Flow and the Bus to the FlowSystem instead.', UserWarning, @@ -387,17 +462,85 @@ def _connect_network(self): f'Connected {len(self.buses)} Buses and {len(self.components)} ' f'via {len(self.flows)} Flows inside the FlowSystem.' ) - self._connected = True - def __repr__(self): - return f'<{self.__class__.__name__} with {len(self.components)} components and {len(self.effects)} effects>' + def __repr__(self) -> str: + """Compact representation for debugging.""" + status = '✓' if self._connected_and_transformed else '⚠' + return ( + f'FlowSystem({len(self.timesteps)} timesteps ' + f'[{self.timesteps[0].strftime("%Y-%m-%d")} to {self.timesteps[-1].strftime("%Y-%m-%d")}], ' + f'{len(self.components)} Components, {len(self.buses)} Buses, {len(self.effects)} Effects, {status})' + ) + + def __str__(self) -> str: + """Structured summary for users.""" + + def format_elements(element_names: list, label: str, alignment: int = 12): + name_list = ', '.join(element_names[:3]) + if len(element_names) > 3: + name_list += f' ... (+{len(element_names) - 3} more)' + + suffix = f' ({name_list})' if element_names else '' + padding = alignment - len(label) - 1 # -1 for the colon + return f'{label}:{"":<{padding}} {len(element_names)}{suffix}' + + time_period = f'Time period: {self.timesteps[0].date()} to {self.timesteps[-1].date()}' + freq_str = str(self.timesteps.freq).replace('<', '').replace('>', '') if self.timesteps.freq else 'irregular' + + lines = [ + 'FlowSystem Overview:', + f'{"─" * 50}', + time_period, + f'Timesteps: {len(self.timesteps)} ({freq_str})', + format_elements(list(self.components.keys()), 'Components'), + format_elements(list(self.buses.keys()), 'Buses'), + format_elements(list(self.effects.effects.keys()), 'Effects'), + f'Status: {"Connected & Transformed" if self._connected_and_transformed else "Not connected"}', + ] + + return '\n'.join(lines) + + def __eq__(self, other: 'FlowSystem'): + """Check if two FlowSystems are equal by comparing their dataset representations.""" + if not isinstance(other, FlowSystem): + raise NotImplementedError('Comparison with other types is not implemented for class FlowSystem') + + ds_me = self.to_dataset() + ds_other = other.to_dataset() + + try: + xr.testing.assert_equal(ds_me, ds_other) + except AssertionError: + return False - def __str__(self): - with StringIO() as output_buffer: - console = Console(file=output_buffer, width=1000) # Adjust width as needed - console.print(Pretty(self.as_dict('stats'), expand_all=True, indent_guides=True)) - value = output_buffer.getvalue() - return value + if ds_me.attrs != ds_other.attrs: + return False + + return True + + def __getitem__(self, item) -> Element: + """Get element by exact label with helpful error messages.""" + if item in self.all_elements: + return self.all_elements[item] + + # Provide helpful error with suggestions + from difflib import get_close_matches + + suggestions = get_close_matches(item, self.all_elements.keys(), n=3, cutoff=0.6) + + if suggestions: + suggestion_str = ', '.join(f"'{s}'" for s in suggestions) + raise KeyError(f"Element '{item}' not found. Did you mean: {suggestion_str}?") + else: + raise KeyError(f"Element '{item}' not found in FlowSystem") + + def __contains__(self, item: str) -> bool: + """Check if element exists in the FlowSystem.""" + return item in self.all_elements + + def __iter__(self): + """Iterate over element labels.""" + return iter(self.all_elements.keys()) @property def flows(self) -> Dict[str, Flow]: @@ -407,3 +550,86 @@ def flows(self) -> Dict[str, Flow]: @property def all_elements(self) -> Dict[str, Element]: return {**self.components, **self.effects.effects, **self.flows, **self.buses} + + @property + def used_in_calculation(self) -> bool: + return self._used_in_calculation + + def sel(self, time: Optional[Union[str, slice, List[str], pd.Timestamp, pd.DatetimeIndex]] = None) -> 'FlowSystem': + """ + Select a subset of the flowsystem by the time coordinate. + + Args: + time: Time selection (e.g., slice('2023-01-01', '2023-12-31'), '2023-06-15', or list of times) + + Returns: + FlowSystem: New FlowSystem with selected data + """ + if not self._connected_and_transformed: + self.connect_and_transform() + + # Build indexers dict from non-None parameters + indexers = {} + if time is not None: + indexers['time'] = time + + if not indexers: + return self.copy() # Return a copy when no selection + + selected_dataset = self.to_dataset().sel(**indexers) + return self.__class__.from_dataset(selected_dataset) + + def isel(self, time: Optional[Union[int, slice, List[int]]] = None) -> 'FlowSystem': + """ + Select a subset of the flowsystem by integer indices. + + Args: + time: Time selection by integer index (e.g., slice(0, 100), 50, or [0, 5, 10]) + + Returns: + FlowSystem: New FlowSystem with selected data + """ + if not self._connected_and_transformed: + self.connect_and_transform() + + # Build indexers dict from non-None parameters + indexers = {} + if time is not None: + indexers['time'] = time + + if not indexers: + return self.copy() # Return a copy when no selection + + selected_dataset = self.to_dataset().isel(**indexers) + return self.__class__.from_dataset(selected_dataset) + + def resample( + self, + time: str, + method: Literal['mean', 'sum', 'max', 'min', 'first', 'last', 'std', 'var', 'median', 'count'] = 'mean', + **kwargs: Any + ) -> 'FlowSystem': + """ + Create a resampled FlowSystem by resampling data along the time dimension (like xr.Dataset.resample()). + + Args: + time: Resampling frequency (e.g., '3h', '2D', '1M') + method: Resampling method. Recommended: 'mean', 'first', 'last', 'max', 'min' + **kwargs: Additional arguments passed to xarray.resample() + + Returns: + FlowSystem: New FlowSystem with resampled data + """ + if not self._connected_and_transformed: + self.connect_and_transform() + + dataset = self.to_dataset() + resampler = dataset.resample(time=time, **kwargs) + + if hasattr(resampler, method): + resampled_dataset = getattr(resampler, method)() + else: + available_methods = ['mean', 'sum', 'max', 'min', 'first', 'last', 'std', 'var', 'median', 'count'] + raise ValueError(f'Unsupported resampling method: {method}. Available: {available_methods}') + + return self.__class__.from_dataset(resampled_dataset) diff --git a/flixopt/interface.py b/flixopt/interface.py index c38d6c619..ad331b904 100644 --- a/flixopt/interface.py +++ b/flixopt/interface.py @@ -7,7 +7,7 @@ from typing import TYPE_CHECKING, Dict, Iterator, List, Optional, Union from .config import CONFIG -from .core import NumericData, NumericDataTS, Scalar +from .core import Scalar, TemporalDataUser from .structure import Interface, register_class_for_io if TYPE_CHECKING: # for type checking and preventing circular imports @@ -20,7 +20,7 @@ @register_class_for_io class Piece(Interface): - def __init__(self, start: NumericData, end: NumericData): + def __init__(self, start: TemporalDataUser, end: TemporalDataUser): """ Define a Piece, which is part of a Piecewise object. @@ -32,8 +32,8 @@ def __init__(self, start: NumericData, end: NumericData): self.end = end def transform_data(self, flow_system: 'FlowSystem', name_prefix: str): - self.start = flow_system.create_time_series(f'{name_prefix}|start', self.start) - self.end = flow_system.create_time_series(f'{name_prefix}|end', self.end) + self.start = flow_system.fit_to_model_coords(f'{name_prefix}|start', self.start) + self.end = flow_system.fit_to_model_coords(f'{name_prefix}|end', self.end) @register_class_for_io @@ -175,10 +175,10 @@ def __init__( effects_per_running_hour: Optional['EffectValuesUser'] = None, on_hours_total_min: Optional[int] = None, on_hours_total_max: Optional[int] = None, - consecutive_on_hours_min: Optional[NumericData] = None, - consecutive_on_hours_max: Optional[NumericData] = None, - consecutive_off_hours_min: Optional[NumericData] = None, - consecutive_off_hours_max: Optional[NumericData] = None, + consecutive_on_hours_min: Optional[TemporalDataUser] = None, + consecutive_on_hours_max: Optional[TemporalDataUser] = None, + consecutive_off_hours_min: Optional[TemporalDataUser] = None, + consecutive_off_hours_max: Optional[TemporalDataUser] = None, switch_on_total_max: Optional[int] = None, force_switch_on: bool = False, ): @@ -206,30 +206,30 @@ def __init__( self.effects_per_running_hour: EffectValuesUser = effects_per_running_hour or {} self.on_hours_total_min: Scalar = on_hours_total_min self.on_hours_total_max: Scalar = on_hours_total_max - self.consecutive_on_hours_min: NumericDataTS = consecutive_on_hours_min - self.consecutive_on_hours_max: NumericDataTS = consecutive_on_hours_max - self.consecutive_off_hours_min: NumericDataTS = consecutive_off_hours_min - self.consecutive_off_hours_max: NumericDataTS = consecutive_off_hours_max + self.consecutive_on_hours_min: TemporalDataUser = consecutive_on_hours_min + self.consecutive_on_hours_max: TemporalDataUser = consecutive_on_hours_max + self.consecutive_off_hours_min: TemporalDataUser = consecutive_off_hours_min + self.consecutive_off_hours_max: TemporalDataUser = consecutive_off_hours_max self.switch_on_total_max: Scalar = switch_on_total_max self.force_switch_on: bool = force_switch_on def transform_data(self, flow_system: 'FlowSystem', name_prefix: str): - self.effects_per_switch_on = flow_system.create_effect_time_series( + self.effects_per_switch_on = flow_system.fit_effects_to_model_coords( name_prefix, self.effects_per_switch_on, 'per_switch_on' ) - self.effects_per_running_hour = flow_system.create_effect_time_series( + self.effects_per_running_hour = flow_system.fit_effects_to_model_coords( name_prefix, self.effects_per_running_hour, 'per_running_hour' ) - self.consecutive_on_hours_min = flow_system.create_time_series( + self.consecutive_on_hours_min = flow_system.fit_to_model_coords( f'{name_prefix}|consecutive_on_hours_min', self.consecutive_on_hours_min ) - self.consecutive_on_hours_max = flow_system.create_time_series( + self.consecutive_on_hours_max = flow_system.fit_to_model_coords( f'{name_prefix}|consecutive_on_hours_max', self.consecutive_on_hours_max ) - self.consecutive_off_hours_min = flow_system.create_time_series( + self.consecutive_off_hours_min = flow_system.fit_to_model_coords( f'{name_prefix}|consecutive_off_hours_min', self.consecutive_off_hours_min ) - self.consecutive_off_hours_max = flow_system.create_time_series( + self.consecutive_off_hours_max = flow_system.fit_to_model_coords( f'{name_prefix}|consecutive_off_hours_max', self.consecutive_off_hours_max ) diff --git a/flixopt/io.py b/flixopt/io.py index 35d927136..9527eb66a 100644 --- a/flixopt/io.py +++ b/flixopt/io.py @@ -10,47 +10,9 @@ import xarray as xr import yaml -from .core import TimeSeries - logger = logging.getLogger('flixopt') -def replace_timeseries(obj, mode: Literal['name', 'stats', 'data'] = 'name'): - """Recursively replaces TimeSeries objects with their names prefixed by '::::'.""" - if isinstance(obj, dict): - return {k: replace_timeseries(v, mode) for k, v in obj.items()} - elif isinstance(obj, list): - return [replace_timeseries(v, mode) for v in obj] - elif isinstance(obj, TimeSeries): # Adjust this based on the actual class - if obj.all_equal: - return obj.active_data.values[0].item() - elif mode == 'name': - return f'::::{obj.name}' - elif mode == 'stats': - return obj.stats - elif mode == 'data': - return obj - else: - raise ValueError(f'Invalid mode {mode}') - else: - return obj - - -def insert_dataarray(obj, ds: xr.Dataset): - """Recursively inserts TimeSeries objects into a dataset.""" - if isinstance(obj, dict): - return {k: insert_dataarray(v, ds) for k, v in obj.items()} - elif isinstance(obj, list): - return [insert_dataarray(v, ds) for v in obj] - elif isinstance(obj, str) and obj.startswith('::::'): - da = ds[obj[4:]] - if da.isel(time=-1).isnull(): - return da.isel(time=slice(0, -1)) - return da - else: - return obj - - def remove_none_and_empty(obj): """Recursively removes None and empty dicts and lists values from a dictionary or list.""" @@ -206,7 +168,7 @@ def save_dataset_to_netcdf( compression: int = 0, ) -> None: """ - Save a dataset to a netcdf file. Store the attrs as a json string in the 'attrs' attribute. + Save a dataset to a netcdf file. Store all attrs as JSON strings in 'attrs' attributes. Args: ds: Dataset to save. @@ -216,6 +178,7 @@ def save_dataset_to_netcdf( Raises: ValueError: If the path has an invalid file extension. """ + path = pathlib.Path(path) if path.suffix not in ['.nc', '.nc4']: raise ValueError(f'Invalid file extension for path {path}. Only .nc and .nc4 are supported') @@ -228,8 +191,20 @@ def save_dataset_to_netcdf( 'Dataset was exported without compression due to missing dependency "netcdf4".' 'Install netcdf4 via `pip install netcdf4`.' ) + ds = ds.copy(deep=True) ds.attrs = {'attrs': json.dumps(ds.attrs)} + + # Convert all DataArray attrs to JSON strings + for var_name, data_var in ds.data_vars.items(): + if data_var.attrs: # Only if there are attrs + ds[var_name].attrs = {'attrs': json.dumps(data_var.attrs)} + + # Also handle coordinate attrs if they exist + for coord_name, coord_var in ds.coords.items(): + if hasattr(coord_var, 'attrs') and coord_var.attrs: + ds[coord_name].attrs = {'attrs': json.dumps(coord_var.attrs)} + ds.to_netcdf( path, encoding=None @@ -240,16 +215,30 @@ def save_dataset_to_netcdf( def load_dataset_from_netcdf(path: Union[str, pathlib.Path]) -> xr.Dataset: """ - Load a dataset from a netcdf file. Load the attrs from the 'attrs' attribute. + Load a dataset from a netcdf file. Load all attrs from 'attrs' attributes. Args: path: Path to load the dataset from. Returns: - Dataset: Loaded dataset. + Dataset: Loaded dataset with restored attrs. """ ds = xr.load_dataset(path) - ds.attrs = json.loads(ds.attrs['attrs']) + + # Restore Dataset attrs + if 'attrs' in ds.attrs: + ds.attrs = json.loads(ds.attrs['attrs']) + + # Restore DataArray attrs + for var_name, data_var in ds.data_vars.items(): + if 'attrs' in data_var.attrs: + ds[var_name].attrs = json.loads(data_var.attrs['attrs']) + + # Restore coordinate attrs + for coord_name, coord_var in ds.coords.items(): + if hasattr(coord_var, 'attrs') and 'attrs' in coord_var.attrs: + ds[coord_name].attrs = json.loads(coord_var.attrs['attrs']) + return ds diff --git a/flixopt/linear_converters.py b/flixopt/linear_converters.py index 3fd032632..b137ad89a 100644 --- a/flixopt/linear_converters.py +++ b/flixopt/linear_converters.py @@ -8,7 +8,7 @@ import numpy as np from .components import LinearConverter -from .core import NumericDataTS, TimeSeriesData +from .core import TemporalDataUser, TimeSeriesData from .elements import Flow from .interface import OnOffParameters from .structure import register_class_for_io @@ -21,7 +21,7 @@ class Boiler(LinearConverter): def __init__( self, label: str, - eta: NumericDataTS, + eta: TemporalDataUser, Q_fu: Flow, Q_th: Flow, on_off_parameters: OnOffParameters = None, @@ -62,7 +62,7 @@ class Power2Heat(LinearConverter): def __init__( self, label: str, - eta: NumericDataTS, + eta: TemporalDataUser, P_el: Flow, Q_th: Flow, on_off_parameters: OnOffParameters = None, @@ -104,7 +104,7 @@ class HeatPump(LinearConverter): def __init__( self, label: str, - COP: NumericDataTS, + COP: TemporalDataUser, P_el: Flow, Q_th: Flow, on_off_parameters: OnOffParameters = None, @@ -146,7 +146,7 @@ class CoolingTower(LinearConverter): def __init__( self, label: str, - specific_electricity_demand: NumericDataTS, + specific_electricity_demand: TemporalDataUser, P_el: Flow, Q_th: Flow, on_off_parameters: OnOffParameters = None, @@ -190,8 +190,8 @@ class CHP(LinearConverter): def __init__( self, label: str, - eta_th: NumericDataTS, - eta_el: NumericDataTS, + eta_th: TemporalDataUser, + eta_el: TemporalDataUser, Q_fu: Flow, P_el: Flow, Q_th: Flow, @@ -251,7 +251,7 @@ class HeatPumpWithSource(LinearConverter): def __init__( self, label: str, - COP: NumericDataTS, + COP: TemporalDataUser, P_el: Flow, Q_ab: Flow, Q_th: Flow, @@ -297,11 +297,11 @@ def COP(self, value): # noqa: N802 def check_bounds( - value: NumericDataTS, + value: TemporalDataUser, parameter_label: str, element_label: str, - lower_bound: NumericDataTS, - upper_bound: NumericDataTS, + lower_bound: TemporalDataUser, + upper_bound: TemporalDataUser, ) -> None: """ Check if the value is within the bounds. The bounds are exclusive. diff --git a/flixopt/results.py b/flixopt/results.py index 223e3708e..1dee9ac02 100644 --- a/flixopt/results.py +++ b/flixopt/results.py @@ -14,7 +14,7 @@ from . import io as fx_io from . import plotting -from .core import TimeSeriesCollection +from .flow_system import FlowSystem if TYPE_CHECKING: import pyvis @@ -118,7 +118,7 @@ def from_calculation(cls, calculation: 'Calculation'): """ return cls( solution=calculation.model.solution, - flow_system=calculation.flow_system.as_dataset(constants_in_dataset=True), + flow_system=calculation.flow_system.to_dataset(), summary=calculation.summary, model=calculation.model, name=calculation.name, @@ -160,7 +160,7 @@ def __init__( } self.timesteps_extra = self.solution.indexes['time'] - self.hours_per_timestep = TimeSeriesCollection.calculate_hours_per_timestep(self.timesteps_extra) + self.hours_per_timestep = FlowSystem.calculate_hours_per_timestep(self.timesteps_extra) def __getitem__(self, key: str) -> Union['ComponentResults', 'BusResults', 'EffectResults']: if key in self.components: @@ -659,7 +659,7 @@ def from_file(cls, folder: Union[str, pathlib.Path], name: str): with open(path.with_suffix('.json'), 'r', encoding='utf-8') as f: meta_data = json.load(f) return cls( - [CalculationResults.from_file(folder, name) for name in meta_data['sub_calculations']], + [CalculationResults.from_file(folder, sub_name) for sub_name in meta_data['sub_calculations']], all_timesteps=pd.DatetimeIndex( [datetime.datetime.fromisoformat(date) for date in meta_data['all_timesteps']], name='time' ), @@ -684,7 +684,7 @@ def __init__( self.overlap_timesteps = overlap_timesteps self.name = name self.folder = pathlib.Path(folder) if folder is not None else pathlib.Path.cwd() / 'results' - self.hours_per_timestep = TimeSeriesCollection.calculate_hours_per_timestep(self.all_timesteps) + self.hours_per_timestep = FlowSystem.calculate_hours_per_timestep(self.all_timesteps) @property def meta_data(self) -> Dict[str, Union[int, List[str]]]: @@ -756,7 +756,7 @@ def to_file( f'Folder {folder} and its parent do not exist. Please create them first.' ) from e for segment in self.segment_results: - segment.to_file(folder=folder, name=f'{name}-{segment.name}', compression=compression) + segment.to_file(folder=folder, name=segment.name, compression=compression) with open(path.with_suffix('.json'), 'w', encoding='utf-8') as f: json.dump(self.meta_data, f, indent=4, ensure_ascii=False) diff --git a/flixopt/structure.py b/flixopt/structure.py index 1d0f2324f..cc307a1e8 100644 --- a/flixopt/structure.py +++ b/flixopt/structure.py @@ -18,8 +18,9 @@ from rich.console import Console from rich.pretty import Pretty +from . import io as fx_io from .config import CONFIG -from .core import NumericData, Scalar, TimeSeries, TimeSeriesCollection, TimeSeriesData +from .core import Scalar, TemporalDataUser, TimeSeriesData, get_dataarray_stats if TYPE_CHECKING: # for type checking and preventing circular imports from .effects import EffectCollectionModel @@ -56,7 +57,6 @@ def __init__(self, flow_system: 'FlowSystem'): """ super().__init__(force_dim_names=True) self.flow_system = flow_system - self.time_series_collection = flow_system.time_series_collection self.effects: Optional[EffectCollectionModel] = None def do_modeling(self): @@ -88,170 +88,506 @@ def solution(self): for effect in sorted(self.flow_system.effects, key=lambda effect: effect.label_full.upper()) }, } - return solution.reindex(time=self.time_series_collection.timesteps_extra) + return solution.reindex(time=self.flow_system.timesteps_extra) @property def hours_per_step(self): - return self.time_series_collection.hours_per_timestep + return self.flow_system.hours_per_timestep @property def hours_of_previous_timesteps(self): - return self.time_series_collection.hours_of_previous_timesteps + return self.flow_system.hours_of_previous_timesteps @property def coords(self) -> Tuple[pd.DatetimeIndex]: - return (self.time_series_collection.timesteps,) + return (self.flow_system.timesteps,) @property def coords_extra(self) -> Tuple[pd.DatetimeIndex]: - return (self.time_series_collection.timesteps_extra,) + return (self.flow_system.timesteps_extra,) class Interface: """ - This class is used to collect arguments about a Model. Its the base class for all Elements and Models in flixopt. + Base class for all Elements and Models in flixopt that provides serialization capabilities. + + This class enables automatic serialization/deserialization of objects containing xarray DataArrays + and nested Interface objects to/from xarray Datasets and NetCDF files. It uses introspection + of constructor parameters to automatically handle most serialization scenarios. + + Key Features: + - Automatic extraction and restoration of xarray DataArrays + - Support for nested Interface objects + - NetCDF and JSON export/import + - Recursive handling of complex nested structures + + Subclasses must implement: + transform_data(flow_system): Transform data to match FlowSystem dimensions """ def transform_data(self, flow_system: 'FlowSystem'): - """Transforms the data of the interface to match the FlowSystem's dimensions""" - raise NotImplementedError('Every Interface needs a transform_data() method') + """Transform the data of the interface to match the FlowSystem's dimensions. + + Args: + flow_system: The FlowSystem containing timing and dimensional information - def infos(self, use_numpy: bool = True, use_element_label: bool = False) -> Dict: + Raises: + NotImplementedError: Must be implemented by subclasses """ - Generate a dictionary representation of the object's constructor arguments. - Excludes default values and empty dictionaries and lists. - Converts data to be compatible with JSON. + raise NotImplementedError('Every Interface subclass needs a transform_data() method') - Args: - use_numpy: Whether to convert NumPy arrays to lists. Defaults to True. - If True, numeric numpy arrays (`np.ndarray`) are preserved as-is. - If False, they are converted to lists. - use_element_label: Whether to use the element label instead of the infos of the element. Defaults to False. - Note that Elements used as keys in dictionaries are always converted to their labels. + def _create_reference_structure(self) -> Tuple[Dict, Dict[str, xr.DataArray]]: + """ + Convert all DataArrays to references and extract them. + This is the core method that both to_dict() and to_dataset() build upon. Returns: - A dictionary representation of the object's constructor arguments. + Tuple of (reference_structure, extracted_arrays_dict) + Raises: + ValueError: If DataArrays don't have unique names or are duplicated """ - # Get the constructor arguments and their default values - init_params = sorted( - inspect.signature(self.__init__).parameters.items(), - key=lambda x: (x[0].lower() != 'label', x[0].lower()), # Prioritize 'label' - ) - # Build a dict of attribute=value pairs, excluding defaults - details = {'class': ':'.join([cls.__name__ for cls in self.__class__.__mro__])} - for name, param in init_params: - if name == 'self': + # Get constructor parameters using caching for performance + if not hasattr(self, '_cached_init_params'): + self._cached_init_params = list(inspect.signature(self.__init__).parameters.keys()) + + # Process all constructor parameters + reference_structure = {'__class__': self.__class__.__name__} + all_extracted_arrays = {} + + for name in self._cached_init_params: + if name == 'self' or name == 'timesteps': # Skip self and timesteps. Timesteps are directly stored in Datasets continue - value, default = getattr(self, name, None), param.default - # Ignore default values and empty dicts and list - if np.all(value == default) or (isinstance(value, (dict, list)) and not value): + + value = getattr(self, name, None) + if value is None: continue - details[name] = copy_and_convert_datatypes(value, use_numpy, use_element_label) - return details - def to_json(self, path: Union[str, pathlib.Path]): + # Extract arrays and get reference structure + processed_value, extracted_arrays = self._extract_dataarrays_recursive(value, name) + + # Check for array name conflicts + conflicts = set(all_extracted_arrays.keys()) & set(extracted_arrays.keys()) + if conflicts: + raise ValueError( + f'DataArray name conflicts detected: {conflicts}. ' + f'Each DataArray must have a unique name for serialization.' + ) + + # Add extracted arrays to the collection + all_extracted_arrays.update(extracted_arrays) + + # Only store in structure if it's not None/empty after processing + if processed_value is not None and not self._is_empty_container(processed_value): + reference_structure[name] = processed_value + + return reference_structure, all_extracted_arrays + + @staticmethod + def _is_empty_container(obj) -> bool: + """Check if object is an empty container (dict, list, tuple, set).""" + return isinstance(obj, (dict, list, tuple, set)) and len(obj) == 0 + + def _extract_dataarrays_recursive(self, obj, context_name: str = '') -> Tuple[Any, Dict[str, xr.DataArray]]: """ - Saves the element to a json file. - This not meant to be reloaded and recreate the object, but rather used to document or compare the object. + Recursively extract DataArrays from nested structures. Args: - path: The path to the json file. + obj: Object to process + context_name: Name context for better error messages + + Returns: + Tuple of (processed_object_with_references, extracted_arrays_dict) + + Raises: + ValueError: If DataArrays don't have unique names """ - data = get_compact_representation(self.infos(use_numpy=True, use_element_label=True)) - with open(path, 'w', encoding='utf-8') as f: - json.dump(data, f, indent=4, ensure_ascii=False) + extracted_arrays = {} + + # Handle DataArrays directly - use their unique name + if isinstance(obj, xr.DataArray): + if not obj.name: + raise ValueError( + f'DataArrays must have a unique name for serialization. ' + f'Unnamed DataArray found in {context_name}. Please set array.name = "unique_name"' + ) - def to_dict(self) -> Dict: - """Convert the object to a dictionary representation.""" - data = {'__class__': self.__class__.__name__} + array_name = str(obj.name) # Ensure string type + if array_name in extracted_arrays: + raise ValueError( + f'DataArray name "{array_name}" is duplicated in {context_name}. ' + f'Each DataArray must have a unique name for serialization.' + ) - # Get the constructor parameters - init_params = inspect.signature(self.__init__).parameters + extracted_arrays[array_name] = obj + return f':::{array_name}', extracted_arrays - for name in init_params: - if name == 'self': - continue + # Handle Interface objects - extract their DataArrays too + elif isinstance(obj, Interface): + try: + interface_structure, interface_arrays = obj._create_reference_structure() + extracted_arrays.update(interface_arrays) + return interface_structure, extracted_arrays + except Exception as e: + raise ValueError(f'Failed to process nested Interface object in {context_name}: {e}') from e + + # Handle sequences (lists, tuples) + elif isinstance(obj, (list, tuple)): + processed_items = [] + for i, item in enumerate(obj): + item_context = f'{context_name}[{i}]' if context_name else f'item[{i}]' + processed_item, nested_arrays = self._extract_dataarrays_recursive(item, item_context) + extracted_arrays.update(nested_arrays) + processed_items.append(processed_item) + return processed_items, extracted_arrays + + # Handle dictionaries + elif isinstance(obj, dict): + processed_dict = {} + for key, value in obj.items(): + key_context = f'{context_name}.{key}' if context_name else str(key) + processed_value, nested_arrays = self._extract_dataarrays_recursive(value, key_context) + extracted_arrays.update(nested_arrays) + processed_dict[key] = processed_value + return processed_dict, extracted_arrays + + # Handle sets (convert to list for JSON compatibility) + elif isinstance(obj, set): + processed_items = [] + for i, item in enumerate(obj): + item_context = f'{context_name}.set_item[{i}]' if context_name else f'set_item[{i}]' + processed_item, nested_arrays = self._extract_dataarrays_recursive(item, item_context) + extracted_arrays.update(nested_arrays) + processed_items.append(processed_item) + return processed_items, extracted_arrays + + # For all other types, serialize to basic types + else: + return self._serialize_to_basic_types(obj), extracted_arrays + + @classmethod + def _resolve_reference_structure(cls, structure, arrays_dict: Dict[str, xr.DataArray]): + """ + Convert reference structure back to actual objects using provided arrays. + + Args: + structure: Structure containing references (:::name) or special type markers + arrays_dict: Dictionary of available DataArrays + + Returns: + Structure with references resolved to actual DataArrays or objects + + Raises: + ValueError: If referenced arrays are not found or class is not registered + """ + # Handle DataArray references + if isinstance(structure, str) and structure.startswith(':::'): + array_name = structure[3:] # Remove ":::" prefix + if array_name not in arrays_dict: + raise ValueError(f"Referenced DataArray '{array_name}' not found in dataset") + + array = arrays_dict[array_name] + + # Handle null values with warning + if array.isnull().any(): + logger.warning(f"DataArray '{array_name}' contains null values. Dropping them.") + array = array.dropna(dim='time', how='all') + + # Check if this should be restored as TimeSeriesData + if TimeSeriesData.is_timeseries_data(array): + return TimeSeriesData.from_dataarray(array) + + return array + + elif isinstance(structure, list): + resolved_list = [] + for item in structure: + resolved_item = cls._resolve_reference_structure(item, arrays_dict) + if resolved_item is not None: # Filter out None values from missing references + resolved_list.append(resolved_item) + return resolved_list + + elif isinstance(structure, dict): + if structure.get('__class__'): + class_name = structure['__class__'] + if class_name not in CLASS_REGISTRY: + raise ValueError( + f"Class '{class_name}' not found in CLASS_REGISTRY. " + f'Available classes: {list(CLASS_REGISTRY.keys())}' + ) + + # This is a nested Interface object - restore it recursively + nested_class = CLASS_REGISTRY[class_name] + # Remove the __class__ key and process the rest + nested_data = {k: v for k, v in structure.items() if k != '__class__'} + # Resolve references in the nested data + resolved_nested_data = cls._resolve_reference_structure(nested_data, arrays_dict) + + try: + return nested_class(**resolved_nested_data) + except Exception as e: + raise ValueError(f'Failed to create instance of {class_name}: {e}') from e + else: + # Regular dictionary - resolve references in values + resolved_dict = {} + for key, value in structure.items(): + resolved_value = cls._resolve_reference_structure(value, arrays_dict) + if resolved_value is not None or value is None: # Keep None values if they were originally None + resolved_dict[key] = resolved_value + return resolved_dict - value = getattr(self, name, None) - data[name] = self._serialize_value(value) - - return data - - def _serialize_value(self, value: Any): - """Helper method to serialize a value based on its type.""" - if value is None: - return None - elif isinstance(value, Interface): - return value.to_dict() - elif isinstance(value, (list, tuple)): - return self._serialize_list(value) - elif isinstance(value, dict): - return self._serialize_dict(value) else: - return value + return structure - def _serialize_list(self, items): - """Serialize a list of items.""" - return [self._serialize_value(item) for item in items] + def _serialize_to_basic_types(self, obj): + """ + Convert object to basic Python types only (no DataArrays, no custom objects). - def _serialize_dict(self, d): - """Serialize a dictionary of items.""" - return {k: self._serialize_value(v) for k, v in d.items()} + Args: + obj: Object to serialize - @classmethod - def _deserialize_dict(cls, data: Dict) -> Union[Dict, 'Interface']: - if '__class__' in data: - class_name = data.pop('__class__') - try: - class_type = CLASS_REGISTRY[class_name] - if issubclass(class_type, Interface): - # Use _deserialize_dict to process the arguments - processed_data = {k: cls._deserialize_value(v) for k, v in data.items()} - return class_type(**processed_data) - else: - raise ValueError(f'Class "{class_name}" is not an Interface.') - except (AttributeError, KeyError) as e: - raise ValueError(f'Class "{class_name}" could not get reconstructed.') from e + Returns: + Object converted to basic Python types (str, int, float, bool, list, dict) + """ + if obj is None or isinstance(obj, (str, int, float, bool)): + return obj + elif isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.bool_): + return bool(obj) + elif isinstance(obj, (np.ndarray, pd.Series, pd.DataFrame)): + return obj.tolist() if hasattr(obj, 'tolist') else list(obj) + elif isinstance(obj, dict): + return {k: self._serialize_to_basic_types(v) for k, v in obj.items()} + elif isinstance(obj, (list, tuple)): + return [self._serialize_to_basic_types(item) for item in obj] + elif isinstance(obj, set): + return [self._serialize_to_basic_types(item) for item in obj] + elif hasattr(obj, 'isoformat'): # datetime objects + return obj.isoformat() + elif hasattr(obj, '__dict__'): # Custom objects with attributes + logger.warning(f'Converting custom object {type(obj)} to dict representation: {obj}') + return {str(k): self._serialize_to_basic_types(v) for k, v in obj.__dict__.items()} else: - return {k: cls._deserialize_value(v) for k, v in data.items()} + # For any other object, try to convert to string as fallback + logger.warning(f'Converting unknown type {type(obj)} to string: {obj}') + return str(obj) - @classmethod - def _deserialize_list(cls, data: List) -> List: - return [cls._deserialize_value(value) for value in data] + def to_dataset(self) -> xr.Dataset: + """ + Convert the object to an xarray Dataset representation. + All DataArrays become dataset variables, everything else goes to attrs. + + Its recommended to only call this method on Interfaces with all numeric data stored as xr.DataArrays. + Interfaces inside a FlowSystem are automatically converted this form after connecting and transforming the FlowSystem. + + Returns: + xr.Dataset: Dataset containing all DataArrays with basic objects only in attributes + + Raises: + ValueError: If serialization fails due to naming conflicts or invalid data + """ + try: + reference_structure, extracted_arrays = self._create_reference_structure() + # Create the dataset with extracted arrays as variables and structure as attrs + return xr.Dataset(extracted_arrays, attrs=reference_structure) + except Exception as e: + raise ValueError( + f'Failed to convert {self.__class__.__name__} to dataset. Its recommended to only call this method on ' + f'a fully connected and transformed FlowSystem, or Interfaces inside such a FlowSystem.' + f'Original Error: {e}') from e + + def to_netcdf(self, path: Union[str, pathlib.Path], compression: int = 0): + """ + Save the object to a NetCDF file. + + Args: + path: Path to save the NetCDF file + compression: Compression level (0-9) + + Raises: + ValueError: If serialization fails + IOError: If file cannot be written + """ + try: + ds = self.to_dataset() + fx_io.save_dataset_to_netcdf(ds, path, compression=compression) + except Exception as e: + raise IOError(f'Failed to save {self.__class__.__name__} to NetCDF file {path}: {e}') from e @classmethod - def _deserialize_value(cls, value: Any): - """Helper method to deserialize a value based on its type.""" - if value is None: - return None - elif isinstance(value, dict): - return cls._deserialize_dict(value) - elif isinstance(value, list): - return cls._deserialize_list(value) - return value + def from_dataset(cls, ds: xr.Dataset) -> 'Interface': + """ + Create an instance from an xarray Dataset. + + Args: + ds: Dataset containing the object data + + Returns: + Interface instance + + Raises: + ValueError: If dataset format is invalid or class mismatch + """ + try: + # Get class name and verify it matches + class_name = ds.attrs.get('__class__') + if class_name and class_name != cls.__name__: + logger.warning(f"Dataset class '{class_name}' doesn't match target class '{cls.__name__}'") + + # Get the reference structure from attrs + reference_structure = dict(ds.attrs) + + # Remove the class name since it's not a constructor parameter + reference_structure.pop('__class__', None) + + # Create arrays dictionary from dataset variables + arrays_dict = {name: array for name, array in ds.data_vars.items()} + + # Resolve all references using the centralized method + resolved_params = cls._resolve_reference_structure(reference_structure, arrays_dict) + + return cls(**resolved_params) + except Exception as e: + raise ValueError(f'Failed to create {cls.__name__} from dataset: {e}') from e @classmethod - def from_dict(cls, data: Dict) -> 'Interface': + def from_netcdf(cls, path: Union[str, pathlib.Path]) -> 'Interface': """ - Create an instance from a dictionary representation. + Load an instance from a NetCDF file. Args: - data: Dictionary containing the data for the object. + path: Path to the NetCDF file + + Returns: + Interface instance + + Raises: + IOError: If file cannot be read + ValueError: If file format is invalid """ - return cls._deserialize_dict(data) + try: + ds = fx_io.load_dataset_from_netcdf(path) + return cls.from_dataset(ds) + except Exception as e: + raise IOError(f'Failed to load {cls.__name__} from NetCDF file {path}: {e}') from e - def __repr__(self): - # Get the constructor arguments and their current values - init_signature = inspect.signature(self.__init__) - init_args = init_signature.parameters + def get_structure(self, clean: bool = False, stats: bool = False) -> Dict: + """ + Get object structure as a dictionary. + + Args: + clean: If True, remove None and empty dicts and lists. + stats: If True, replace DataArray references with statistics + + Returns: + Dictionary representation of the object structure + """ + reference_structure, extracted_arrays = self._create_reference_structure() + + if stats: + # Replace references with statistics + reference_structure = self._replace_references_with_stats(reference_structure, extracted_arrays) + + if clean: + return fx_io.remove_none_and_empty(reference_structure) + return reference_structure + + def _replace_references_with_stats(self, structure, arrays_dict: Dict[str, xr.DataArray]): + """Replace DataArray references with statistical summaries.""" + if isinstance(structure, str) and structure.startswith(':::'): + array_name = structure[3:] + if array_name in arrays_dict: + return get_dataarray_stats(arrays_dict[array_name]) + return structure + + elif isinstance(structure, dict): + return {k: self._replace_references_with_stats(v, arrays_dict) for k, v in structure.items()} + + elif isinstance(structure, list): + return [self._replace_references_with_stats(item, arrays_dict) for item in structure] - # Create a dictionary with argument names and their values - args_str = ', '.join(f'{name}={repr(getattr(self, name, None))}' for name in init_args if name != 'self') - return f'{self.__class__.__name__}({args_str})' + return structure + + def to_json(self, path: Union[str, pathlib.Path]): + """ + Save the object to a JSON file. + This is meant for documentation and comparison, not for reloading. + + Args: + path: The path to the JSON file. + + Raises: + IOError: If file cannot be written + """ + try: + # Use the stats mode for JSON export (cleaner output) + data = self.get_structure(clean=True, stats=True) + with open(path, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=4, ensure_ascii=False) + except Exception as e: + raise IOError(f'Failed to save {self.__class__.__name__} to JSON file {path}: {e}') from e + + def __repr__(self): + """Return a detailed string representation for debugging.""" + try: + # Get the constructor arguments and their current values + init_signature = inspect.signature(self.__init__) + init_args = init_signature.parameters + + # Create a dictionary with argument names and their values, with better formatting + args_parts = [] + for name in init_args: + if name == 'self': + continue + value = getattr(self, name, None) + # Truncate long representations + value_repr = repr(value) + if len(value_repr) > 50: + value_repr = value_repr[:47] + '...' + args_parts.append(f'{name}={value_repr}') + + args_str = ', '.join(args_parts) + return f'{self.__class__.__name__}({args_str})' + except Exception: + # Fallback if introspection fails + return f'{self.__class__.__name__}()' def __str__(self): - return get_str_representation(self.infos(use_numpy=True, use_element_label=True)) + """Return a user-friendly string representation.""" + try: + data = self.get_structure(clean=True, stats=True) + with StringIO() as output_buffer: + console = Console(file=output_buffer, width=1000) # Adjust width as needed + console.print(Pretty(data, expand_all=True, indent_guides=True)) + return output_buffer.getvalue() + except Exception: + # Fallback if structure generation fails + return f'{self.__class__.__name__} instance' + + def copy(self) -> 'Interface': + """ + Create a copy of the Interface object. + + Uses the existing serialization infrastructure to ensure proper copying + of all DataArrays and nested objects. + + Returns: + A new instance of the same class with copied data. + """ + # Convert to dataset, copy it, and convert back + dataset = self.to_dataset().copy(deep=True) + return self.__class__.from_dataset(dataset) + + def __copy__(self): + """Support for copy.copy().""" + return self.copy() + + def __deepcopy__(self, memo): + """Support for copy.deepcopy().""" + return self.copy() class Element(Interface): @@ -454,177 +790,3 @@ def results_structure(self): 'variables': list(self.variables), 'constraints': list(self.constraints), } - - -def copy_and_convert_datatypes(data: Any, use_numpy: bool = True, use_element_label: bool = False) -> Any: - """ - Converts values in a nested data structure into JSON-compatible types while preserving or transforming numpy arrays - and custom `Element` objects based on the specified options. - - The function handles various data types and transforms them into a consistent, readable format: - - Primitive types (`int`, `float`, `str`, `bool`, `None`) are returned as-is. - - Numpy scalars are converted to their corresponding Python scalar types. - - Collections (`list`, `tuple`, `set`, `dict`) are recursively processed to ensure all elements are compatible. - - Numpy arrays are preserved or converted to lists, depending on `use_numpy`. - - Custom `Element` objects can be represented either by their `label` or their initialization parameters as a dictionary. - - Timestamps (`datetime`) are converted to ISO 8601 strings. - - Args: - data: The input data to process, which may be deeply nested and contain a mix of types. - use_numpy: If `True`, numeric numpy arrays (`np.ndarray`) are preserved as-is. If `False`, they are converted to lists. - Default is `True`. - use_element_label: If `True`, `Element` objects are represented by their `label`. If `False`, they are converted into a dictionary - based on their initialization parameters. Default is `False`. - - Returns: - A transformed version of the input data, containing only JSON-compatible types: - - `int`, `float`, `str`, `bool`, `None` - - `list`, `dict` - - `np.ndarray` (if `use_numpy=True`. This is NOT JSON-compatible) - - Raises: - TypeError: If the data cannot be converted to the specified types. - - Examples: - >>> copy_and_convert_datatypes({'a': np.array([1, 2, 3]), 'b': Element(label='example')}) - {'a': array([1, 2, 3]), 'b': {'class': 'Element', 'label': 'example'}} - - >>> copy_and_convert_datatypes({'a': np.array([1, 2, 3]), 'b': Element(label='example')}, use_numpy=False) - {'a': [1, 2, 3], 'b': {'class': 'Element', 'label': 'example'}} - - Notes: - - The function gracefully handles unexpected types by issuing a warning and returning a deep copy of the data. - - Empty collections (lists, dictionaries) and default parameter values in `Element` objects are omitted from the output. - - Numpy arrays with non-numeric data types are automatically converted to lists. - """ - if isinstance(data, np.integer): # This must be checked before checking for regular int and float! - return int(data) - elif isinstance(data, np.floating): - return float(data) - - elif isinstance(data, (int, float, str, bool, type(None))): - return data - elif isinstance(data, datetime): - return data.isoformat() - - elif isinstance(data, (tuple, set)): - return copy_and_convert_datatypes([item for item in data], use_numpy, use_element_label) - elif isinstance(data, dict): - return { - copy_and_convert_datatypes(key, use_numpy, use_element_label=True): copy_and_convert_datatypes( - value, use_numpy, use_element_label - ) - for key, value in data.items() - } - elif isinstance(data, list): # Shorten arrays/lists to be readable - if use_numpy and all([isinstance(value, (int, float)) for value in data]): - return np.array([item for item in data]) - else: - return [copy_and_convert_datatypes(item, use_numpy, use_element_label) for item in data] - - elif isinstance(data, np.ndarray): - if not use_numpy: - return copy_and_convert_datatypes(data.tolist(), use_numpy, use_element_label) - elif use_numpy and np.issubdtype(data.dtype, np.number): - return data - else: - logger.critical( - f'An np.array with non-numeric content was found: {data=}.It will be converted to a list instead' - ) - return copy_and_convert_datatypes(data.tolist(), use_numpy, use_element_label) - - elif isinstance(data, TimeSeries): - return copy_and_convert_datatypes(data.active_data, use_numpy, use_element_label) - elif isinstance(data, TimeSeriesData): - return copy_and_convert_datatypes(data.data, use_numpy, use_element_label) - - elif isinstance(data, Interface): - if use_element_label and isinstance(data, Element): - return data.label - return data.infos(use_numpy, use_element_label) - elif isinstance(data, xr.DataArray): - # TODO: This is a temporary basic work around - return copy_and_convert_datatypes(data.values, use_numpy, use_element_label) - else: - raise TypeError(f'copy_and_convert_datatypes() did get unexpected data of type "{type(data)}": {data=}') - - -def get_compact_representation(data: Any, array_threshold: int = 50, decimals: int = 2) -> Dict: - """ - Generate a compact json serializable representation of deeply nested data. - Numpy arrays are statistically described if they exceed a threshold and converted to lists. - - Args: - data (Any): The data to format and represent. - array_threshold (int): Maximum length of NumPy arrays to display. Longer arrays are statistically described. - decimals (int): Number of decimal places in which to describe the arrays. - - Returns: - Dict: A dictionary representation of the data - """ - - def format_np_array_if_found(value: Any) -> Any: - """Recursively processes the data, formatting NumPy arrays.""" - if isinstance(value, (int, float, str, bool, type(None))): - return value - elif isinstance(value, np.ndarray): - return describe_numpy_arrays(value) - elif isinstance(value, dict): - return {format_np_array_if_found(k): format_np_array_if_found(v) for k, v in value.items()} - elif isinstance(value, (list, tuple, set)): - return [format_np_array_if_found(v) for v in value] - else: - logger.warning( - f'Unexpected value found when trying to format numpy array numpy array: {type(value)=}; {value=}' - ) - return value - - def describe_numpy_arrays(arr: np.ndarray) -> Union[str, List]: - """Shortens NumPy arrays if they exceed the specified length.""" - - def normalized_center_of_mass(array: Any) -> float: - # position in array (0 bis 1 normiert) - positions = np.linspace(0, 1, len(array)) # weights w_i - # mass center - if np.sum(array) == 0: - return np.nan - else: - return np.sum(positions * array) / np.sum(array) - - if arr.size > array_threshold: # Calculate basic statistics - fmt = f'.{decimals}f' - return ( - f'Array (min={np.min(arr):{fmt}}, max={np.max(arr):{fmt}}, mean={np.mean(arr):{fmt}}, ' - f'median={np.median(arr):{fmt}}, std={np.std(arr):{fmt}}, len={len(arr)}, ' - f'center={normalized_center_of_mass(arr):{fmt}})' - ) - else: - return np.around(arr, decimals=decimals).tolist() - - # Process the data to handle NumPy arrays - formatted_data = format_np_array_if_found(copy_and_convert_datatypes(data, use_numpy=True)) - - return formatted_data - - -def get_str_representation(data: Any, array_threshold: int = 50, decimals: int = 2) -> str: - """ - Generate a string representation of deeply nested data using `rich.print`. - NumPy arrays are shortened to the specified length and converted to strings. - - Args: - data (Any): The data to format and represent. - array_threshold (int): Maximum length of NumPy arrays to display. Longer arrays are statistically described. - decimals (int): Number of decimal places in which to describe the arrays. - - Returns: - str: The formatted string representation of the data. - """ - - formatted_data = get_compact_representation(data, array_threshold, decimals) - - # Use Rich to format and print the data - with StringIO() as output_buffer: - console = Console(file=output_buffer, width=1000) # Adjust width as needed - console.print(Pretty(formatted_data, expand_all=True, indent_guides=True)) - return output_buffer.getvalue() diff --git a/tests/conftest.py b/tests/conftest.py index 5399be72a..074c56efe 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,8 @@ def simple_flow_system() -> fx.FlowSystem: discharging=fx.Flow('Q_th_unload', bus='Fernwärme', size=1e4), capacity_in_flow_hours=fx.InvestParameters(fix_effects=20, fixed_size=30, optional=False), initial_charge_state=0, - relative_maximum_charge_state=1 / 100 * np.array([80.0, 70.0, 80.0, 80, 80, 80, 80, 80, 80, 80]), + relative_maximum_charge_state=1 / 100 * np.array([80.0, 70.0, 80.0, 80, 80, 80, 80, 80, 80]), + relative_maximum_final_charge_state=0.8, eta_charge=0.9, eta_discharge=1, relative_loss_per_hour=0.08, @@ -293,8 +294,8 @@ def flow_system_segments_of_flows_2(flow_system_complex) -> fx.FlowSystem: { 'P_el': fx.Piecewise( [ - fx.Piece(np.linspace(5, 6, len(flow_system.time_series_collection.timesteps)), 30), - fx.Piece(40, np.linspace(60, 70, len(flow_system.time_series_collection.timesteps))), + fx.Piece(np.linspace(5, 6, len(flow_system.timesteps)), 30), + fx.Piece(40, np.linspace(60, 70, len(flow_system.timesteps))), ] ), 'Q_th': fx.Piecewise([fx.Piece(6, 35), fx.Piece(45, 100)]), @@ -326,11 +327,11 @@ def flow_system_long(): thermal_load_ts, electrical_load_ts = ( fx.TimeSeriesData(thermal_load), - fx.TimeSeriesData(electrical_load, agg_weight=0.7), + fx.TimeSeriesData(electrical_load, aggregation_weight=0.7), ) p_feed_in, p_sell = ( - fx.TimeSeriesData(-(p_el - 0.5), agg_group='p_el'), - fx.TimeSeriesData(p_el + 0.5, agg_group='p_el'), + fx.TimeSeriesData(-(p_el - 0.5), aggregation_group='p_el'), + fx.TimeSeriesData(p_el + 0.5, aggregation_group='p_el'), ) flow_system = fx.FlowSystem(pd.DatetimeIndex(data.index)) diff --git a/tests/test_bus.py b/tests/test_bus.py index 4a41a9f9e..136f9d2cc 100644 --- a/tests/test_bus.py +++ b/tests/test_bus.py @@ -31,7 +31,7 @@ def test_bus(self, basic_flow_system_linopy): def test_bus_penalty(self, basic_flow_system_linopy): """Test that flow model constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps bus = fx.Bus('TestBus') flow_system.add_elements(bus, fx.Sink('WärmelastTest', sink=fx.Flow('Q_th_Last', 'TestBus')), diff --git a/tests/test_component.py b/tests/test_component.py index d87a28c29..18ceb717a 100644 --- a/tests/test_component.py +++ b/tests/test_component.py @@ -57,7 +57,7 @@ def test_component(self, basic_flow_system_linopy): def test_on_with_multiple_flows(self, basic_flow_system_linopy): """Test that flow model constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps ub_out2 = np.linspace(1, 1.5, 10).round(2) inputs = [ fx.Flow('In1', 'Fernwärme', relative_minimum=np.ones(10) * 0.1, size=100), @@ -128,7 +128,7 @@ def test_on_with_multiple_flows(self, basic_flow_system_linopy): def test_on_with_single_flow(self, basic_flow_system_linopy): """Test that flow model constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps inputs = [ fx.Flow('In1', 'Fernwärme', relative_minimum=np.ones(10) * 0.1, size=100), ] diff --git a/tests/test_dataconverter.py b/tests/test_dataconverter.py index 49f1438e7..329da7f92 100644 --- a/tests/test_dataconverter.py +++ b/tests/test_dataconverter.py @@ -14,7 +14,7 @@ def sample_time_index(request): def test_scalar_conversion(sample_time_index): # Test scalar conversion - result = DataConverter.as_dataarray(42, sample_time_index) + result = DataConverter.to_dataarray(42, sample_time_index) assert isinstance(result, xr.DataArray) assert result.shape == (len(sample_time_index),) assert result.dims == ('time',) @@ -25,7 +25,7 @@ def test_series_conversion(sample_time_index): series = pd.Series([1, 2, 3, 4, 5], index=sample_time_index) # Test Series conversion - result = DataConverter.as_dataarray(series, sample_time_index) + result = DataConverter.to_dataarray(series, sample_time_index) assert isinstance(result, xr.DataArray) assert result.shape == (5,) assert result.dims == ('time',) @@ -37,7 +37,7 @@ def test_dataframe_conversion(sample_time_index): df = pd.DataFrame({'A': [1, 2, 3, 4, 5]}, index=sample_time_index) # Test DataFrame conversion - result = DataConverter.as_dataarray(df, sample_time_index) + result = DataConverter.to_dataarray(df, sample_time_index) assert isinstance(result, xr.DataArray) assert result.shape == (5,) assert result.dims == ('time',) @@ -47,7 +47,7 @@ def test_dataframe_conversion(sample_time_index): def test_ndarray_conversion(sample_time_index): # Test 1D array conversion arr_1d = np.array([1, 2, 3, 4, 5]) - result = DataConverter.as_dataarray(arr_1d, sample_time_index) + result = DataConverter.to_dataarray(arr_1d, sample_time_index) assert result.shape == (5,) assert result.dims == ('time',) assert np.array_equal(result.values, arr_1d) @@ -58,7 +58,7 @@ def test_dataarray_conversion(sample_time_index): original = xr.DataArray(data=np.array([1, 2, 3, 4, 5]), coords={'time': sample_time_index}, dims=['time']) # Test DataArray conversion - result = DataConverter.as_dataarray(original, sample_time_index) + result = DataConverter.to_dataarray(original, sample_time_index) assert result.shape == (5,) assert result.dims == ('time',) assert np.array_equal(result.values, original.values) @@ -71,42 +71,42 @@ def test_dataarray_conversion(sample_time_index): def test_invalid_inputs(sample_time_index): # Test invalid input type with pytest.raises(ConversionError): - DataConverter.as_dataarray('invalid_string', sample_time_index) + DataConverter.to_dataarray('invalid_string', sample_time_index) # Test mismatched Series index mismatched_series = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('2025-01-01', periods=6, freq='D')) with pytest.raises(ConversionError): - DataConverter.as_dataarray(mismatched_series, sample_time_index) + DataConverter.to_dataarray(mismatched_series, sample_time_index) # Test DataFrame with multiple columns df_multi_col = pd.DataFrame({'A': [1, 2, 3, 4, 5], 'B': [6, 7, 8, 9, 10]}, index=sample_time_index) with pytest.raises(ConversionError): - DataConverter.as_dataarray(df_multi_col, sample_time_index) + DataConverter.to_dataarray(df_multi_col, sample_time_index) # Test mismatched array shape with pytest.raises(ConversionError): - DataConverter.as_dataarray(np.array([1, 2, 3]), sample_time_index) # Wrong length + DataConverter.to_dataarray(np.array([1, 2, 3]), sample_time_index) # Wrong length # Test multi-dimensional array with pytest.raises(ConversionError): - DataConverter.as_dataarray(np.array([[1, 2], [3, 4]]), sample_time_index) # 2D array not allowed + DataConverter.to_dataarray(np.array([[1, 2], [3, 4]]), sample_time_index) # 2D array not allowed def test_time_index_validation(): # Test with unnamed index unnamed_index = pd.date_range('2024-01-01', periods=5, freq='D') with pytest.raises(ConversionError): - DataConverter.as_dataarray(42, unnamed_index) + DataConverter.to_dataarray(42, unnamed_index) # Test with empty index empty_index = pd.DatetimeIndex([], name='time') with pytest.raises(ValueError): - DataConverter.as_dataarray(42, empty_index) + DataConverter.to_dataarray(42, empty_index) # Test with non-DatetimeIndex wrong_type_index = pd.Index([1, 2, 3, 4, 5], name='time') with pytest.raises(ValueError): - DataConverter.as_dataarray(42, wrong_type_index) + DataConverter.to_dataarray(42, wrong_type_index) if __name__ == '__main__': diff --git a/tests/test_effect.py b/tests/test_effect.py index 5cbc04ac6..9b4e1012a 100644 --- a/tests/test_effect.py +++ b/tests/test_effect.py @@ -13,7 +13,7 @@ class TestBusModel: def test_minimal(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps effect = fx.Effect('Effect1', '€', 'Testing Effect') flow_system.add_elements(effect) @@ -43,7 +43,7 @@ def test_minimal(self, basic_flow_system_linopy): def test_bounds(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps effect = fx.Effect('Effect1', '€', 'Testing Effect', minimum_operation=1.0, maximum_operation=1.1, diff --git a/tests/test_flow.py b/tests/test_flow.py index 2308dbd31..cce10b21a 100644 --- a/tests/test_flow.py +++ b/tests/test_flow.py @@ -14,7 +14,7 @@ class TestFlowModel: def test_flow_minimal(self, basic_flow_system_linopy): """Test that flow model constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow('Wärme', bus='Fernwärme', size=100) flow_system.add_elements(fx.Sink('Sink', sink=flow)) @@ -34,7 +34,7 @@ def test_flow_minimal(self, basic_flow_system_linopy): def test_flow(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', bus='Fernwärme', @@ -86,7 +86,7 @@ def test_flow(self, basic_flow_system_linopy): def test_effects_per_flow_hour(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps costs_per_flow_hour = xr.DataArray(np.linspace(1,2,timesteps.size), coords=(timesteps,)) co2_per_flow_hour = xr.DataArray(np.linspace(4, 5, timesteps.size), coords=(timesteps,)) @@ -120,7 +120,7 @@ class TestFlowInvestModel: def test_flow_invest(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -175,7 +175,7 @@ def test_flow_invest(self, basic_flow_system_linopy): def test_flow_invest_optional(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -239,7 +239,7 @@ def test_flow_invest_optional(self, basic_flow_system_linopy): def test_flow_invest_optional_wo_min_size(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -303,7 +303,7 @@ def test_flow_invest_optional_wo_min_size(self, basic_flow_system_linopy): def test_flow_invest_wo_min_size_non_optional(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -354,7 +354,7 @@ def test_flow_invest_wo_min_size_non_optional(self, basic_flow_system_linopy): def test_flow_invest_fixed_size(self, basic_flow_system_linopy): """Test flow with fixed size investment.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -446,7 +446,7 @@ class TestFlowOnModel: def test_flow_on(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', bus='Fernwärme', @@ -506,7 +506,7 @@ def test_flow_on(self, basic_flow_system_linopy): def test_effects_per_running_hour(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps costs_per_running_hour = xr.DataArray(np.linspace(1, 2, timesteps.size), coords=(timesteps,)) co2_per_running_hour = xr.DataArray(np.linspace(4, 5, timesteps.size), coords=(timesteps,)) @@ -553,7 +553,7 @@ def test_effects_per_running_hour(self, basic_flow_system_linopy): def test_consecutive_on_hours(self, basic_flow_system_linopy): """Test flow with minimum and maximum consecutive on hours.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -619,7 +619,7 @@ def test_consecutive_on_hours(self, basic_flow_system_linopy): def test_consecutive_on_hours_previous(self, basic_flow_system_linopy): """Test flow with minimum and maximum consecutive on hours.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -686,7 +686,7 @@ def test_consecutive_on_hours_previous(self, basic_flow_system_linopy): def test_consecutive_off_hours(self, basic_flow_system_linopy): """Test flow with minimum and maximum consecutive off hours.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -753,7 +753,7 @@ def test_consecutive_off_hours(self, basic_flow_system_linopy): def test_consecutive_off_hours_previous(self, basic_flow_system_linopy): """Test flow with minimum and maximum consecutive off hours.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', @@ -906,7 +906,7 @@ class TestFlowOnInvestModel: def test_flow_on_invest_optional(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', bus='Fernwärme', @@ -991,7 +991,7 @@ def test_flow_on_invest_optional(self, basic_flow_system_linopy): def test_flow_on_invest_non_optional(self, basic_flow_system_linopy): flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps flow = fx.Flow( 'Wärme', bus='Fernwärme', @@ -1078,7 +1078,7 @@ class TestFlowWithFixedProfile: def test_fixed_relative_profile(self, basic_flow_system_linopy): """Test flow with a fixed relative profile.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create a time-varying profile (e.g., for a load or renewable generation) profile = np.sin(np.linspace(0, 2 * np.pi, len(timesteps))) * 0.5 + 0.5 # Values between 0 and 1 @@ -1100,7 +1100,7 @@ def test_fixed_relative_profile(self, basic_flow_system_linopy): def test_fixed_profile_with_investment(self, basic_flow_system_linopy): """Test flow with fixed profile and investment.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create a fixed profile profile = np.sin(np.linspace(0, 2 * np.pi, len(timesteps))) * 0.5 + 0.5 diff --git a/tests/test_integration.py b/tests/test_integration.py index dc203c33e..da473b4e6 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -420,6 +420,12 @@ def test_modeling_types_costs(self, modeling_calculation): f'Costs do not match for {modeling_type} modeling type', ) + def test_segmented_io(self, modeling_calculation): + calc, modeling_type = modeling_calculation + if modeling_type == 'segmented': + calc.results.to_file() + _ = fx.results.SegmentedCalculationResults.from_file(calc.folder, calc.name) + if __name__ == '__main__': pytest.main(['-v']) diff --git a/tests/test_io.py b/tests/test_io.py index 2e6c61ccf..497b334c8 100644 --- a/tests/test_io.py +++ b/tests/test_io.py @@ -50,11 +50,12 @@ def test_flow_system_file_io(flow_system, highs_solver): def test_flow_system_io(flow_system): - di = flow_system.as_dict() - _ = fx.FlowSystem.from_dict(di) + flow_system.to_json('fs.json') - ds = flow_system.as_dataset() - _ = fx.FlowSystem.from_dataset(ds) + ds = flow_system.to_dataset() + new_fs = fx.FlowSystem.from_dataset(ds) + + assert flow_system == new_fs print(flow_system) flow_system.__repr__() diff --git a/tests/test_linear_converter.py b/tests/test_linear_converter.py index aaab60dcc..a01c17ef2 100644 --- a/tests/test_linear_converter.py +++ b/tests/test_linear_converter.py @@ -52,7 +52,7 @@ def test_basic_linear_converter(self, basic_flow_system_linopy): def test_linear_converter_time_varying(self, basic_flow_system_linopy): """Test a LinearConverter with time-varying conversion factors.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create time-varying efficiency (e.g., temperature-dependent) varying_efficiency = np.linspace(0.7, 0.9, len(timesteps)) @@ -268,7 +268,7 @@ def test_linear_converter_multidimensional(self, basic_flow_system_linopy): def test_edge_case_time_varying_conversion(self, basic_flow_system_linopy): """Test edge case with extreme time-varying conversion factors.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create fluctuating conversion efficiency (e.g., for a heat pump) # Values range from very low (0.1) to very high (5.0) @@ -317,7 +317,7 @@ def test_edge_case_time_varying_conversion(self, basic_flow_system_linopy): def test_piecewise_conversion(self, basic_flow_system_linopy): """Test a LinearConverter with PiecewiseConversion.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create input and output flows input_flow = fx.Flow('input', bus='input_bus', size=100) @@ -423,7 +423,7 @@ def test_piecewise_conversion(self, basic_flow_system_linopy): def test_piecewise_conversion_with_onoff(self, basic_flow_system_linopy): """Test a LinearConverter with PiecewiseConversion and OnOffParameters.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps + timesteps = flow_system.timesteps # Create input and output flows input_flow = fx.Flow('input', bus='input_bus', size=100) diff --git a/tests/test_storage.py b/tests/test_storage.py index a3b453c2b..1b9b3b875 100644 --- a/tests/test_storage.py +++ b/tests/test_storage.py @@ -14,8 +14,8 @@ class TestStorageModel: def test_basic_storage(self, basic_flow_system_linopy): """Test that basic storage model variables and constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps - timesteps_extra = flow_system.time_series_collection.timesteps_extra + timesteps = flow_system.timesteps + timesteps_extra = flow_system.timesteps_extra # Create a simple storage storage = fx.Storage( @@ -91,8 +91,8 @@ def test_basic_storage(self, basic_flow_system_linopy): def test_lossy_storage(self, basic_flow_system_linopy): """Test that basic storage model variables and constraints are correctly generated.""" flow_system = basic_flow_system_linopy - timesteps = flow_system.time_series_collection.timesteps - timesteps_extra = flow_system.time_series_collection.timesteps_extra + timesteps = flow_system.timesteps + timesteps_extra = flow_system.timesteps_extra # Create a simple storage storage = fx.Storage( @@ -175,6 +175,87 @@ def test_lossy_storage(self, basic_flow_system_linopy): model.variables['TestStorage|charge_state'].isel(time=0) == 0 ) + def test_charge_state_bounds(self, basic_flow_system_linopy): + """Test that basic storage model variables and constraints are correctly generated.""" + flow_system = basic_flow_system_linopy + timesteps = flow_system.timesteps + timesteps_extra = flow_system.timesteps_extra + + # Create a simple storage + storage = fx.Storage( + 'TestStorage', + charging=fx.Flow('Q_th_in', bus='Fernwärme', size=20), + discharging=fx.Flow('Q_th_out', bus='Fernwärme', size=20), + capacity_in_flow_hours=30, # 30 kWh storage capacity + initial_charge_state=3, + prevent_simultaneous_charge_and_discharge=True, + relative_maximum_charge_state=np.array([0.14, 0.22, 0.3 , 0.38, 0.46, 0.54, 0.62, 0.7 , 0.78, 0.86]), + relative_minimum_charge_state=np.array([0.07, 0.11, 0.15, 0.19, 0.23, 0.27, 0.31, 0.35, 0.39, 0.43]), + ) + + flow_system.add_elements(storage) + model = create_linopy_model(flow_system) + + # Check that all expected variables exist - linopy model variables are accessed by indexing + expected_variables = { + 'TestStorage(Q_th_in)|flow_rate', + 'TestStorage(Q_th_in)|total_flow_hours', + 'TestStorage(Q_th_out)|flow_rate', + 'TestStorage(Q_th_out)|total_flow_hours', + 'TestStorage|charge_state', + 'TestStorage|netto_discharge', + } + for var_name in expected_variables: + assert var_name in model.variables, f"Missing variable: {var_name}" + + # Check that all expected constraints exist - linopy model constraints are accessed by indexing + expected_constraints = { + 'TestStorage(Q_th_in)|total_flow_hours', + 'TestStorage(Q_th_out)|total_flow_hours', + 'TestStorage|netto_discharge', + 'TestStorage|charge_state', + 'TestStorage|initial_charge_state', + } + for con_name in expected_constraints: + assert con_name in model.constraints, f"Missing constraint: {con_name}" + + # Check variable properties + assert_var_equal( + model['TestStorage(Q_th_in)|flow_rate'], + model.add_variables(lower=0, upper=20, coords=(timesteps,)) + ) + assert_var_equal( + model['TestStorage(Q_th_out)|flow_rate'], + model.add_variables(lower=0, upper=20, coords=(timesteps,)) + ) + assert_var_equal( + model['TestStorage|charge_state'], + model.add_variables(lower=np.array([0.07, 0.11, 0.15, 0.19, 0.23, 0.27, 0.31, 0.35, 0.39, 0.43, 0.43]) * 30, + upper=np.array([0.14, 0.22, 0.3 , 0.38, 0.46, 0.54, 0.62, 0.7 , 0.78, 0.86, 0.86]) * 30, + coords=(timesteps_extra,)) + ) + + # Check constraint formulations + assert_conequal( + model.constraints['TestStorage|netto_discharge'], + model.variables['TestStorage|netto_discharge'] == + model.variables['TestStorage(Q_th_out)|flow_rate'] - model.variables['TestStorage(Q_th_in)|flow_rate'] + ) + + charge_state = model.variables['TestStorage|charge_state'] + assert_conequal( + model.constraints['TestStorage|charge_state'], + charge_state.isel(time=slice(1, None)) + == charge_state.isel(time=slice(None, -1)) + + model.variables['TestStorage(Q_th_in)|flow_rate'] * model.hours_per_step + - model.variables['TestStorage(Q_th_out)|flow_rate'] * model.hours_per_step, + ) + # Check initial charge state constraint + assert_conequal( + model.constraints['TestStorage|initial_charge_state'], + model.variables['TestStorage|charge_state'].isel(time=0) == 3 + ) + def test_storage_with_investment(self, basic_flow_system_linopy): """Test storage with investment parameters.""" flow_system = basic_flow_system_linopy diff --git a/tests/test_timeseries.py b/tests/test_timeseries.py deleted file mode 100644 index a8bc5fa85..000000000 --- a/tests/test_timeseries.py +++ /dev/null @@ -1,605 +0,0 @@ -import json -import tempfile -from pathlib import Path -from typing import Dict, List, Tuple - -import numpy as np -import pandas as pd -import pytest -import xarray as xr - -from flixopt.core import ConversionError, DataConverter, TimeSeries, TimeSeriesCollection, TimeSeriesData - - -@pytest.fixture -def sample_timesteps(): - """Create a sample time index with the required 'time' name.""" - return pd.date_range('2023-01-01', periods=5, freq='D', name='time') - - -@pytest.fixture -def simple_dataarray(sample_timesteps): - """Create a simple DataArray with time dimension.""" - return xr.DataArray([10, 20, 30, 40, 50], coords={'time': sample_timesteps}, dims=['time']) - - -@pytest.fixture -def sample_timeseries(simple_dataarray): - """Create a sample TimeSeries object.""" - return TimeSeries(simple_dataarray, name='Test Series') - - -class TestTimeSeries: - """Test suite for TimeSeries class.""" - - def test_initialization(self, simple_dataarray): - """Test basic initialization of TimeSeries.""" - ts = TimeSeries(simple_dataarray, name='Test Series') - - # Check basic properties - assert ts.name == 'Test Series' - assert ts.aggregation_weight is None - assert ts.aggregation_group is None - - # Check data initialization - assert isinstance(ts.stored_data, xr.DataArray) - assert ts.stored_data.equals(simple_dataarray) - assert ts.active_data.equals(simple_dataarray) - - # Check backup was created - assert ts._backup.equals(simple_dataarray) - - # Check active timesteps - assert ts.active_timesteps.equals(simple_dataarray.indexes['time']) - - def test_initialization_with_aggregation_params(self, simple_dataarray): - """Test initialization with aggregation parameters.""" - ts = TimeSeries( - simple_dataarray, name='Weighted Series', aggregation_weight=0.5, aggregation_group='test_group' - ) - - assert ts.name == 'Weighted Series' - assert ts.aggregation_weight == 0.5 - assert ts.aggregation_group == 'test_group' - - def test_initialization_validation(self, sample_timesteps): - """Test validation during initialization.""" - # Test missing time dimension - invalid_data = xr.DataArray([1, 2, 3], dims=['invalid_dim']) - with pytest.raises(ValueError, match='must have a "time" index'): - TimeSeries(invalid_data, name='Invalid Series') - - # Test multi-dimensional data - multi_dim_data = xr.DataArray( - [[1, 2, 3], [4, 5, 6]], coords={'dim1': [0, 1], 'time': sample_timesteps[:3]}, dims=['dim1', 'time'] - ) - with pytest.raises(ValueError, match='dimensions of DataArray must be 1'): - TimeSeries(multi_dim_data, name='Multi-dim Series') - - def test_active_timesteps_getter_setter(self, sample_timeseries, sample_timesteps): - """Test active_timesteps getter and setter.""" - # Initial state should use all timesteps - assert sample_timeseries.active_timesteps.equals(sample_timesteps) - - # Set to a subset - subset_index = sample_timesteps[1:3] - sample_timeseries.active_timesteps = subset_index - assert sample_timeseries.active_timesteps.equals(subset_index) - - # Active data should reflect the subset - assert sample_timeseries.active_data.equals(sample_timeseries.stored_data.sel(time=subset_index)) - - # Reset to full index - sample_timeseries.active_timesteps = None - assert sample_timeseries.active_timesteps.equals(sample_timesteps) - - # Test invalid type - with pytest.raises(TypeError, match='must be a pandas DatetimeIndex'): - sample_timeseries.active_timesteps = 'invalid' - - def test_reset(self, sample_timeseries, sample_timesteps): - """Test reset method.""" - # Set to subset first - subset_index = sample_timesteps[1:3] - sample_timeseries.active_timesteps = subset_index - - # Reset - sample_timeseries.reset() - - # Should be back to full index - assert sample_timeseries.active_timesteps.equals(sample_timesteps) - assert sample_timeseries.active_data.equals(sample_timeseries.stored_data) - - def test_restore_data(self, sample_timeseries, simple_dataarray): - """Test restore_data method.""" - # Modify the stored data - new_data = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) - - # Store original data for comparison - original_data = sample_timeseries.stored_data - - # Set new data - sample_timeseries.stored_data = new_data - assert sample_timeseries.stored_data.equals(new_data) - - # Restore from backup - sample_timeseries.restore_data() - - # Should be back to original data - assert sample_timeseries.stored_data.equals(original_data) - assert sample_timeseries.active_data.equals(original_data) - - def test_stored_data_setter(self, sample_timeseries, sample_timesteps): - """Test stored_data setter with different data types.""" - # Test with a Series - series_data = pd.Series([5, 6, 7, 8, 9], index=sample_timesteps) - sample_timeseries.stored_data = series_data - assert np.array_equal(sample_timeseries.stored_data.values, series_data.values) - - # Test with a single-column DataFrame - df_data = pd.DataFrame({'col1': [15, 16, 17, 18, 19]}, index=sample_timesteps) - sample_timeseries.stored_data = df_data - assert np.array_equal(sample_timeseries.stored_data.values, df_data['col1'].values) - - # Test with a NumPy array - array_data = np.array([25, 26, 27, 28, 29]) - sample_timeseries.stored_data = array_data - assert np.array_equal(sample_timeseries.stored_data.values, array_data) - - # Test with a scalar - sample_timeseries.stored_data = 42 - assert np.all(sample_timeseries.stored_data.values == 42) - - # Test with another DataArray - another_dataarray = xr.DataArray([30, 31, 32, 33, 34], coords={'time': sample_timesteps}, dims=['time']) - sample_timeseries.stored_data = another_dataarray - assert sample_timeseries.stored_data.equals(another_dataarray) - - def test_stored_data_setter_no_change(self, sample_timeseries): - """Test stored_data setter when data doesn't change.""" - # Get current data - current_data = sample_timeseries.stored_data - current_backup = sample_timeseries._backup - - # Set the same data - sample_timeseries.stored_data = current_data - - # Backup shouldn't change - assert sample_timeseries._backup is current_backup # Should be the same object - - def test_from_datasource(self, sample_timesteps): - """Test from_datasource class method.""" - # Test with scalar - ts_scalar = TimeSeries.from_datasource(42, 'Scalar Series', sample_timesteps) - assert np.all(ts_scalar.stored_data.values == 42) - - # Test with Series - series_data = pd.Series([1, 2, 3, 4, 5], index=sample_timesteps) - ts_series = TimeSeries.from_datasource(series_data, 'Series Data', sample_timesteps) - assert np.array_equal(ts_series.stored_data.values, series_data.values) - - # Test with aggregation parameters - ts_with_agg = TimeSeries.from_datasource( - series_data, 'Aggregated Series', sample_timesteps, aggregation_weight=0.7, aggregation_group='group1' - ) - assert ts_with_agg.aggregation_weight == 0.7 - assert ts_with_agg.aggregation_group == 'group1' - - def test_to_json_from_json(self, sample_timeseries): - """Test to_json and from_json methods.""" - # Test to_json (dictionary only) - json_dict = sample_timeseries.to_json() - assert json_dict['name'] == sample_timeseries.name - assert 'data' in json_dict - assert 'coords' in json_dict['data'] - assert 'time' in json_dict['data']['coords'] - - # Test to_json with file saving - with tempfile.TemporaryDirectory() as tmpdirname: - filepath = Path(tmpdirname) / 'timeseries.json' - sample_timeseries.to_json(filepath) - assert filepath.exists() - - # Test from_json with file loading - loaded_ts = TimeSeries.from_json(path=filepath) - assert loaded_ts.name == sample_timeseries.name - assert np.array_equal(loaded_ts.stored_data.values, sample_timeseries.stored_data.values) - - # Test from_json with dictionary - loaded_ts_dict = TimeSeries.from_json(data=json_dict) - assert loaded_ts_dict.name == sample_timeseries.name - assert np.array_equal(loaded_ts_dict.stored_data.values, sample_timeseries.stored_data.values) - - # Test validation in from_json - with pytest.raises(ValueError, match="one of 'path' or 'data'"): - TimeSeries.from_json(data=json_dict, path='dummy.json') - - def test_all_equal(self, sample_timesteps): - """Test all_equal property.""" - # All equal values - equal_data = xr.DataArray([5, 5, 5, 5, 5], coords={'time': sample_timesteps}, dims=['time']) - ts_equal = TimeSeries(equal_data, 'Equal Series') - assert ts_equal.all_equal is True - - # Not all equal - unequal_data = xr.DataArray([5, 5, 6, 5, 5], coords={'time': sample_timesteps}, dims=['time']) - ts_unequal = TimeSeries(unequal_data, 'Unequal Series') - assert ts_unequal.all_equal is False - - def test_arithmetic_operations(self, sample_timeseries): - """Test arithmetic operations.""" - # Create a second TimeSeries for testing - data2 = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) - ts2 = TimeSeries(data2, 'Second Series') - - # Test operations between two TimeSeries objects - assert np.array_equal( - (sample_timeseries + ts2).values, sample_timeseries.active_data.values + ts2.active_data.values - ) - assert np.array_equal( - (sample_timeseries - ts2).values, sample_timeseries.active_data.values - ts2.active_data.values - ) - assert np.array_equal( - (sample_timeseries * ts2).values, sample_timeseries.active_data.values * ts2.active_data.values - ) - assert np.array_equal( - (sample_timeseries / ts2).values, sample_timeseries.active_data.values / ts2.active_data.values - ) - - # Test operations with DataArrays - assert np.array_equal((sample_timeseries + data2).values, sample_timeseries.active_data.values + data2.values) - assert np.array_equal((data2 + sample_timeseries).values, data2.values + sample_timeseries.active_data.values) - - # Test operations with scalars - assert np.array_equal((sample_timeseries + 5).values, sample_timeseries.active_data.values + 5) - assert np.array_equal((5 + sample_timeseries).values, 5 + sample_timeseries.active_data.values) - - # Test unary operations - assert np.array_equal((-sample_timeseries).values, -sample_timeseries.active_data.values) - assert np.array_equal((+sample_timeseries).values, +sample_timeseries.active_data.values) - assert np.array_equal((abs(sample_timeseries)).values, abs(sample_timeseries.active_data.values)) - - def test_comparison_operations(self, sample_timesteps): - """Test comparison operations.""" - data1 = xr.DataArray([10, 20, 30, 40, 50], coords={'time': sample_timesteps}, dims=['time']) - data2 = xr.DataArray([5, 10, 15, 20, 25], coords={'time': sample_timesteps}, dims=['time']) - - ts1 = TimeSeries(data1, 'Series 1') - ts2 = TimeSeries(data2, 'Series 2') - - # Test __gt__ method - assert (ts1 > ts2).all().item() - - # Test with mixed values - data3 = xr.DataArray([5, 25, 15, 45, 25], coords={'time': sample_timesteps}, dims=['time']) - ts3 = TimeSeries(data3, 'Series 3') - - assert not (ts1 > ts3).all().item() # Not all values in ts1 are greater than ts3 - - def test_numpy_ufunc(self, sample_timeseries): - """Test numpy ufunc compatibility.""" - # Test basic numpy functions - assert np.array_equal(np.add(sample_timeseries, 5).values, np.add(sample_timeseries.active_data, 5).values) - - assert np.array_equal( - np.multiply(sample_timeseries, 2).values, np.multiply(sample_timeseries.active_data, 2).values - ) - - # Test with two TimeSeries objects - data2 = xr.DataArray([1, 2, 3, 4, 5], coords={'time': sample_timeseries.active_timesteps}, dims=['time']) - ts2 = TimeSeries(data2, 'Second Series') - - assert np.array_equal( - np.add(sample_timeseries, ts2).values, np.add(sample_timeseries.active_data, ts2.active_data).values - ) - - def test_sel_and_isel_properties(self, sample_timeseries): - """Test sel and isel properties.""" - # Test that sel property works - selected = sample_timeseries.sel(time=sample_timeseries.active_timesteps[0]) - assert selected.item() == sample_timeseries.active_data.values[0] - - # Test that isel property works - indexed = sample_timeseries.isel(time=0) - assert indexed.item() == sample_timeseries.active_data.values[0] - - -@pytest.fixture -def sample_collection(sample_timesteps): - """Create a sample TimeSeriesCollection.""" - return TimeSeriesCollection(sample_timesteps) - - -@pytest.fixture -def populated_collection(sample_collection): - """Create a TimeSeriesCollection with test data.""" - # Add a constant time series - sample_collection.create_time_series(42, 'constant_series') - - # Add a varying time series - varying_data = np.array([10, 20, 30, 40, 50]) - sample_collection.create_time_series(varying_data, 'varying_series') - - # Add a time series with extra timestep - sample_collection.create_time_series( - np.array([1, 2, 3, 4, 5, 6]), 'extra_timestep_series', needs_extra_timestep=True - ) - - # Add series with aggregation settings - sample_collection.create_time_series( - TimeSeriesData(np.array([5, 5, 5, 5, 5]), agg_group='group1'), 'group1_series1' - ) - sample_collection.create_time_series( - TimeSeriesData(np.array([6, 6, 6, 6, 6]), agg_group='group1'), 'group1_series2' - ) - sample_collection.create_time_series( - TimeSeriesData(np.array([10, 10, 10, 10, 10]), agg_weight=0.5), 'weighted_series' - ) - - return sample_collection - - -class TestTimeSeriesCollection: - """Test suite for TimeSeriesCollection.""" - - def test_initialization(self, sample_timesteps): - """Test basic initialization.""" - collection = TimeSeriesCollection(sample_timesteps) - - assert collection.all_timesteps.equals(sample_timesteps) - assert len(collection.all_timesteps_extra) == len(sample_timesteps) + 1 - assert isinstance(collection.all_hours_per_timestep, xr.DataArray) - assert len(collection) == 0 - - def test_initialization_with_custom_hours(self, sample_timesteps): - """Test initialization with custom hour settings.""" - # Test with last timestep duration - last_timestep_hours = 12 - collection = TimeSeriesCollection(sample_timesteps, hours_of_last_timestep=last_timestep_hours) - - # Verify the last timestep duration - extra_step_delta = collection.all_timesteps_extra[-1] - collection.all_timesteps_extra[-2] - assert extra_step_delta == pd.Timedelta(hours=last_timestep_hours) - - # Test with previous timestep duration - hours_per_step = 8 - collection2 = TimeSeriesCollection(sample_timesteps, hours_of_previous_timesteps=hours_per_step) - - assert collection2.hours_of_previous_timesteps == hours_per_step - - def test_create_time_series(self, sample_collection): - """Test creating time series.""" - # Test scalar - ts1 = sample_collection.create_time_series(42, 'scalar_series') - assert ts1.name == 'scalar_series' - assert np.all(ts1.active_data.values == 42) - - # Test numpy array - data = np.array([1, 2, 3, 4, 5]) - ts2 = sample_collection.create_time_series(data, 'array_series') - assert np.array_equal(ts2.active_data.values, data) - - # Test with TimeSeriesData - ts3 = sample_collection.create_time_series(TimeSeriesData(10, agg_weight=0.7), 'weighted_series') - assert ts3.aggregation_weight == 0.7 - - # Test with extra timestep - ts4 = sample_collection.create_time_series(5, 'extra_series', needs_extra_timestep=True) - assert ts4.needs_extra_timestep - assert len(ts4.active_data) == len(sample_collection.timesteps_extra) - - # Test duplicate name - with pytest.raises(ValueError, match='already exists'): - sample_collection.create_time_series(1, 'scalar_series') - - def test_access_time_series(self, populated_collection): - """Test accessing time series.""" - # Test __getitem__ - ts = populated_collection['varying_series'] - assert ts.name == 'varying_series' - - # Test __contains__ with string - assert 'constant_series' in populated_collection - assert 'nonexistent_series' not in populated_collection - - # Test __contains__ with TimeSeries object - assert populated_collection['varying_series'] in populated_collection - - # Test __iter__ - names = [ts.name for ts in populated_collection] - assert len(names) == 6 - assert 'varying_series' in names - - # Test access to non-existent series - with pytest.raises(KeyError): - populated_collection['nonexistent_series'] - - def test_constants_and_non_constants(self, populated_collection): - """Test constants and non_constants properties.""" - # Test constants - constants = populated_collection.constants - assert len(constants) == 4 # constant_series, group1_series1, group1_series2, weighted_series - assert all(ts.all_equal for ts in constants) - - # Test non_constants - non_constants = populated_collection.non_constants - assert len(non_constants) == 2 # varying_series, extra_timestep_series - assert all(not ts.all_equal for ts in non_constants) - - # Test modifying a series changes the results - populated_collection['constant_series'].stored_data = np.array([1, 2, 3, 4, 5]) - updated_constants = populated_collection.constants - assert len(updated_constants) == 3 # One less constant - assert 'constant_series' not in [ts.name for ts in updated_constants] - - def test_timesteps_properties(self, populated_collection, sample_timesteps): - """Test timestep-related properties.""" - # Test default (all) timesteps - assert populated_collection.timesteps.equals(sample_timesteps) - assert len(populated_collection.timesteps_extra) == len(sample_timesteps) + 1 - - # Test activating a subset - subset = sample_timesteps[1:3] - populated_collection.activate_timesteps(subset) - - assert populated_collection.timesteps.equals(subset) - assert len(populated_collection.timesteps_extra) == len(subset) + 1 - - # Check that time series were updated - assert populated_collection['varying_series'].active_timesteps.equals(subset) - assert populated_collection['extra_timestep_series'].active_timesteps.equals( - populated_collection.timesteps_extra - ) - - # Test reset - populated_collection.reset() - assert populated_collection.timesteps.equals(sample_timesteps) - - def test_to_dataframe_and_dataset(self, populated_collection): - """Test conversion to DataFrame and Dataset.""" - # Test to_dataset - ds = populated_collection.to_dataset() - assert isinstance(ds, xr.Dataset) - assert len(ds.data_vars) == 6 - - # Test to_dataframe with different filters - df_all = populated_collection.to_dataframe(filtered='all') - assert len(df_all.columns) == 6 - - df_constant = populated_collection.to_dataframe(filtered='constant') - assert len(df_constant.columns) == 4 - - df_non_constant = populated_collection.to_dataframe(filtered='non_constant') - assert len(df_non_constant.columns) == 2 - - # Test invalid filter - with pytest.raises(ValueError): - populated_collection.to_dataframe(filtered='invalid') - - def test_calculate_aggregation_weights(self, populated_collection): - """Test aggregation weight calculation.""" - weights = populated_collection.calculate_aggregation_weights() - - # Group weights should be 0.5 each (1/2) - assert populated_collection.group_weights['group1'] == 0.5 - - # Series in group1 should have weight 0.5 - assert weights['group1_series1'] == 0.5 - assert weights['group1_series2'] == 0.5 - - # Series with explicit weight should have that weight - assert weights['weighted_series'] == 0.5 - - # Series without group or weight should have weight 1 - assert weights['constant_series'] == 1 - - def test_insert_new_data(self, populated_collection, sample_timesteps): - """Test inserting new data.""" - # Create new data - new_data = pd.DataFrame( - { - 'constant_series': [100, 100, 100, 100, 100], - 'varying_series': [5, 10, 15, 20, 25], - # extra_timestep_series is omitted to test partial updates - }, - index=sample_timesteps, - ) - - # Insert data - populated_collection.insert_new_data(new_data) - - # Verify updates - assert np.all(populated_collection['constant_series'].active_data.values == 100) - assert np.array_equal(populated_collection['varying_series'].active_data.values, np.array([5, 10, 15, 20, 25])) - - # Series not in the DataFrame should be unchanged - assert np.array_equal( - populated_collection['extra_timestep_series'].active_data.values[:-1], np.array([1, 2, 3, 4, 5]) - ) - - # Test with mismatched index - bad_index = pd.date_range('2023-02-01', periods=5, freq='D', name='time') - bad_data = pd.DataFrame({'constant_series': [1, 1, 1, 1, 1]}, index=bad_index) - - with pytest.raises(ValueError, match='must match collection timesteps'): - populated_collection.insert_new_data(bad_data) - - def test_restore_data(self, populated_collection): - """Test restoring original data.""" - # Capture original data - original_values = {name: ts.stored_data.copy() for name, ts in populated_collection.time_series_data.items()} - - # Modify data - new_data = pd.DataFrame( - { - name: np.ones(len(populated_collection.timesteps)) * 999 - for name in populated_collection.time_series_data - if not populated_collection[name].needs_extra_timestep - }, - index=populated_collection.timesteps, - ) - - populated_collection.insert_new_data(new_data) - - # Verify data was changed - assert np.all(populated_collection['constant_series'].active_data.values == 999) - - # Restore data - populated_collection.restore_data() - - # Verify data was restored - for name, original in original_values.items(): - restored = populated_collection[name].stored_data - assert np.array_equal(restored.values, original.values) - - def test_class_method_with_uniform_timesteps(self): - """Test the with_uniform_timesteps class method.""" - collection = TimeSeriesCollection.with_uniform_timesteps( - start_time=pd.Timestamp('2023-01-01'), periods=24, freq='h', hours_per_step=1 - ) - - assert len(collection.timesteps) == 24 - assert collection.hours_of_previous_timesteps == 1 - assert (collection.timesteps[1] - collection.timesteps[0]) == pd.Timedelta(hours=1) - - def test_hours_per_timestep(self, populated_collection): - """Test hours_per_timestep calculation.""" - # Standard case - uniform timesteps - hours = populated_collection.hours_per_timestep.values - assert np.allclose(hours, 24) # Default is daily timesteps - - # Create non-uniform timesteps - non_uniform_times = pd.DatetimeIndex( - [ - pd.Timestamp('2023-01-01'), - pd.Timestamp('2023-01-02'), - pd.Timestamp('2023-01-03 12:00:00'), # 1.5 days from previous - pd.Timestamp('2023-01-04'), # 0.5 days from previous - pd.Timestamp('2023-01-06'), # 2 days from previous - ], - name='time', - ) - - collection = TimeSeriesCollection(non_uniform_times) - hours = collection.hours_per_timestep.values - - # Expected hours between timestamps - expected = np.array([24, 36, 12, 48, 48]) - assert np.allclose(hours, expected) - - def test_validation_and_errors(self, sample_timesteps): - """Test validation and error handling.""" - # Test non-DatetimeIndex - with pytest.raises(TypeError, match='must be a pandas DatetimeIndex'): - TimeSeriesCollection(pd.Index([1, 2, 3, 4, 5])) - - # Test too few timesteps - with pytest.raises(ValueError, match='must contain at least 2 timestamps'): - TimeSeriesCollection(pd.DatetimeIndex([pd.Timestamp('2023-01-01')], name='time')) - - # Test invalid active_timesteps - collection = TimeSeriesCollection(sample_timesteps) - invalid_timesteps = pd.date_range('2024-01-01', periods=3, freq='D', name='time') - - with pytest.raises(ValueError, match='must be a subset'): - collection.activate_timesteps(invalid_timesteps)