From 70870ffbbb83c26ee2276851d7840c070930bdba Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 12:01:44 +0100 Subject: [PATCH 01/15] Implementation Summary MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 1: RangeIndex Support in FlowSystem - Modified _validate_timesteps() to accept both DatetimeIndex and RangeIndex - Updated _create_timesteps_with_extra() and calculate_timestep_duration() for RangeIndex - Added is_segmented property to detect segmented FlowSystems Phase 2: Segmentation Metadata in ClusterStructure - Added fields: is_segmented, n_segments, segment_timestep_counts - Updated serialization for persistence Phase 3: TSAM Integration - Added parameters: segmentation, n_segments, segment_representation_method - Integrated with TSAM's segmentation, noSegments parameters - Extract segment durations from TSAM's segmentDurationDict - Build variable timestep_duration DataArray for segments Phase 4: Storage Model Updates - Updated InterclusterStorageModel for segmented systems - Fixed sample offsets to use actual time dimension size Phase 5: Solution Expansion - Updated expand_data() to use n_segments for indexing in segmented systems - Updated log messages to show segment info Phase 7: Tests (14 new tests) - TestSegmentation: 10 tests covering basic segmentation, structure, optimization, expansion - TestSegmentationWithStorage: 4 tests for intercluster storage with segmentation API Usage # Cluster with inner-period segmentation fs_reduced = flow_system.transform.cluster( n_clusters=8, cluster_duration='1D', segmentation=True, n_segments=6, ) # Result: 8 clusters × 6 segments = 48 representative points # vs. 8 clusters × 24 hours = 192 points without segmentation fs_reduced.optimize(solver) fs_expanded = fs_reduced.transform.expand_solution() --- flixopt/clustering/base.py | 36 ++++- flixopt/components.py | 38 ++++- flixopt/flow_system.py | 89 ++++++++--- flixopt/transform_accessor.py | 225 ++++++++++++++++++++++++---- tests/test_cluster_reduce_expand.py | 205 +++++++++++++++++++++++++ 5 files changed, 532 insertions(+), 61 deletions(-) diff --git a/flixopt/clustering/base.py b/flixopt/clustering/base.py index c48e634f5..f8fe3b166 100644 --- a/flixopt/clustering/base.py +++ b/flixopt/clustering/base.py @@ -61,6 +61,11 @@ class ClusterStructure: dims: [cluster] for simple case, or [cluster, period, scenario] for multi-dim. n_clusters: Number of distinct clusters (typical periods). timesteps_per_cluster: Number of timesteps in each cluster (e.g., 24 for daily). + is_segmented: Whether inner-period segmentation was applied. + n_segments: Number of segments per cluster (if segmented). + segment_timestep_counts: Maps (cluster, segment) to number of original timesteps. + dims: [cluster, segment] for simple case, or [cluster, segment, period, scenario]. + Values are counts of original timesteps each segment represents. Example: For 365 days clustered into 8 typical days: @@ -72,12 +77,22 @@ class ClusterStructure: For multi-scenario (e.g., 2 scenarios): - cluster_order: shape (365, 2) with dims [original_cluster, scenario] - cluster_occurrences: shape (8, 2) with dims [cluster, scenario] + + For segmented clustering (8 clusters, 6 segments each): + - is_segmented: True + - n_segments: 6 + - segment_timestep_counts: shape (8, 6), values like [[4, 3, 5, 4, 4, 4], ...] + indicating how many original timesteps each segment represents """ cluster_order: xr.DataArray cluster_occurrences: xr.DataArray n_clusters: int | xr.DataArray timesteps_per_cluster: int + # Segmentation fields (optional) + is_segmented: bool = False + n_segments: int | None = None + segment_timestep_counts: xr.DataArray | None = None def __post_init__(self): """Validate and ensure proper DataArray formatting.""" @@ -139,6 +154,16 @@ def _create_reference_structure(self) -> tuple[dict, dict[str, xr.DataArray]]: ref['timesteps_per_cluster'] = self.timesteps_per_cluster + # Segmentation fields + ref['is_segmented'] = self.is_segmented + if self.n_segments is not None: + ref['n_segments'] = self.n_segments + if self.segment_timestep_counts is not None: + name = self.segment_timestep_counts.name or 'segment_timestep_counts' + segment_counts_da = self.segment_timestep_counts.rename(name) + arrays[name] = segment_counts_da + ref['segment_timestep_counts'] = f':::{name}' + return ref, arrays @property @@ -411,7 +436,12 @@ def expand_data(self, aggregated: xr.DataArray, original_time: xr.DataArray | No timestep_mapping = self.timestep_mapping has_cluster_dim = 'cluster' in aggregated.dims - timesteps_per_cluster = self.cluster_structure.timesteps_per_cluster if has_cluster_dim else None + cluster_structure = self.cluster_structure + timesteps_per_cluster = cluster_structure.timesteps_per_cluster if has_cluster_dim else None + + # For segmented systems, use n_segments instead of timesteps_per_cluster for indexing + is_segmented = cluster_structure.is_segmented if cluster_structure else False + time_dim_size = cluster_structure.n_segments if is_segmented else timesteps_per_cluster def _expand_slice(mapping: np.ndarray, data: xr.DataArray) -> np.ndarray: """Expand a single slice using the mapping.""" @@ -425,8 +455,8 @@ def _expand_slice(mapping: np.ndarray, data: xr.DataArray) -> np.ndarray: f'Expected only {expected_dims}. Make sure period/scenario selections are applied.' ) if has_cluster_dim: - cluster_ids = mapping // timesteps_per_cluster - time_within = mapping % timesteps_per_cluster + cluster_ids = mapping // time_dim_size + time_within = mapping % time_dim_size return data.values[cluster_ids, time_within] return data.values[mapping] diff --git a/flixopt/components.py b/flixopt/components.py index b720dd0ba..28ce11abd 100644 --- a/flixopt/components.py +++ b/flixopt/components.py @@ -1478,7 +1478,13 @@ def _add_linking_constraints( # Use mean over time (linking operates at period level, not timestep) # Keep as DataArray to respect per-period/scenario values rel_loss = self.element.relative_loss_per_hour.mean('time') - hours_per_cluster = timesteps_per_cluster * self._model.timestep_duration.mean('time') + + flow_system = self._model.flow_system + if flow_system.is_segmented: + # For segmented systems, sum all segment durations to get total hours per cluster + hours_per_cluster = self._model.timestep_duration.sum('time').mean('cluster') + else: + hours_per_cluster = timesteps_per_cluster * self._model.timestep_duration.mean('time') decay_n = (1 - rel_loss) ** hours_per_cluster lhs = soc_after - soc_before * decay_n - delta_soc_ordered @@ -1523,9 +1529,22 @@ def _add_combined_bound_constraints( # relative_loss_per_hour is per-hour, so we need to convert offsets to hours # Keep as DataArray to respect per-period/scenario values rel_loss = self.element.relative_loss_per_hour.mean('time') - mean_timestep_duration = self._model.timestep_duration.mean('time') - sample_offsets = [0, timesteps_per_cluster // 2, timesteps_per_cluster - 1] + # For segmented systems, the time dimension size is n_segments, not timesteps_per_cluster + flow_system = self._model.flow_system + actual_time_points = len(flow_system.timesteps) + + if flow_system.is_segmented: + # For segmented systems, sample at start, mid, and end segments + # Use cumulative segment durations to calculate hours offset + sample_offsets = [0, actual_time_points // 2, actual_time_points - 1] + timestep_duration = self._model.timestep_duration + # Cumulative hours for each segment (sum of segment durations up to that point) + cumulative_hours = timestep_duration.cumsum(dim='time') + else: + # Non-segmented: use standard offsets based on timesteps_per_cluster + sample_offsets = [0, timesteps_per_cluster // 2, timesteps_per_cluster - 1] + mean_timestep_duration = self._model.timestep_duration.mean('time') for sample_name, offset in zip(['start', 'mid', 'end'], sample_offsets, strict=False): # With 2D structure: select time offset, then reorder by cluster_order @@ -1539,8 +1558,17 @@ def _add_combined_bound_constraints( cs_t = cs_t.assign_coords(original_cluster=np.arange(n_original_clusters)) # Apply decay factor (1-loss)^hours to SOC_boundary per Eq. 9 - # Convert timestep offset to hours - hours_offset = offset * mean_timestep_duration + if flow_system.is_segmented: + # For segmented systems, use cumulative hours at this offset + # At offset 0, hours = 0 (start of cluster) + if offset == 0: + hours_offset = 0 + else: + # Sum of segment durations up to and including this offset + hours_offset = cumulative_hours.isel(time=offset).mean('cluster') + else: + # Non-segmented: offset * mean timestep duration + hours_offset = offset * mean_timestep_duration decay_t = (1 - rel_loss) ** hours_offset combined = soc_d * decay_t + cs_t diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index 7c7f66339..d54a1b38c 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -173,7 +173,7 @@ class FlowSystem(Interface, CompositeContainerMixin[Element]): def __init__( self, - timesteps: pd.DatetimeIndex, + timesteps: pd.DatetimeIndex | pd.RangeIndex, periods: pd.Index | None = None, scenarios: pd.Index | None = None, clusters: pd.Index | None = None, @@ -200,7 +200,10 @@ def __init__( self.scenarios = None if scenarios is None else self._validate_scenarios(scenarios) self.clusters = clusters # Cluster dimension for clustered FlowSystems - self.timestep_duration = self.fit_to_model_coords('timestep_duration', timestep_duration) + # For RangeIndex (segmented systems), timestep_duration is None and must be set externally + self.timestep_duration = ( + self.fit_to_model_coords('timestep_duration', timestep_duration) if timestep_duration is not None else None + ) # Cluster weight for cluster() optimization (default 1.0) # Represents how many original timesteps each cluster represents @@ -264,14 +267,19 @@ def __init__( self.name = name @staticmethod - def _validate_timesteps(timesteps: pd.DatetimeIndex) -> pd.DatetimeIndex: - """Validate timesteps format and rename if needed.""" - if not isinstance(timesteps, pd.DatetimeIndex): - raise TypeError('timesteps must be a pandas DatetimeIndex') + def _validate_timesteps( + timesteps: pd.DatetimeIndex | pd.RangeIndex, + ) -> pd.DatetimeIndex | pd.RangeIndex: + """Validate timesteps format and rename if needed. + + Accepts either DatetimeIndex (standard) or RangeIndex (for segmented systems). + """ + if not isinstance(timesteps, (pd.DatetimeIndex, pd.RangeIndex)): + raise TypeError('timesteps must be a pandas DatetimeIndex or RangeIndex') if len(timesteps) < 2: raise ValueError('timesteps must contain at least 2 timestamps') if timesteps.name != 'time': - timesteps.name = 'time' + timesteps = timesteps.rename('time') if not timesteps.is_monotonic_increasing: raise ValueError('timesteps must be sorted') return timesteps @@ -317,9 +325,18 @@ def _validate_periods(periods: pd.Index) -> pd.Index: @staticmethod def _create_timesteps_with_extra( - timesteps: pd.DatetimeIndex, hours_of_last_timestep: float | None - ) -> pd.DatetimeIndex: - """Create timesteps with an extra step at the end.""" + timesteps: pd.DatetimeIndex | pd.RangeIndex, hours_of_last_timestep: float | None + ) -> pd.DatetimeIndex | pd.RangeIndex: + """Create timesteps with an extra step at the end. + + For DatetimeIndex, adds a timestamp based on hours_of_last_timestep. + For RangeIndex (segmented systems), simply extends the range by 1. + """ + if isinstance(timesteps, pd.RangeIndex): + # For RangeIndex, just extend by 1 + return pd.RangeIndex(len(timesteps) + 1, name='time') + + # DatetimeIndex case if hours_of_last_timestep is None: hours_of_last_timestep = (timesteps[-1] - timesteps[-2]) / pd.Timedelta(hours=1) @@ -327,8 +344,18 @@ def _create_timesteps_with_extra( return pd.DatetimeIndex(timesteps.append(last_date), name='time') @staticmethod - def calculate_timestep_duration(timesteps_extra: pd.DatetimeIndex) -> xr.DataArray: - """Calculate duration of each timestep in hours as a 1D DataArray.""" + def calculate_timestep_duration( + timesteps_extra: pd.DatetimeIndex | pd.RangeIndex, + ) -> xr.DataArray | None: + """Calculate duration of each timestep in hours as a 1D DataArray. + + For DatetimeIndex, calculates from time differences. + For RangeIndex (segmented systems), returns None - duration must be provided externally. + """ + if isinstance(timesteps_extra, pd.RangeIndex): + # For RangeIndex, duration cannot be calculated - must be provided externally + return None + hours_per_step = np.diff(timesteps_extra) / pd.Timedelta(hours=1) return xr.DataArray( hours_per_step, coords={'time': timesteps_extra[:-1]}, dims='time', name='timestep_duration' @@ -336,11 +363,18 @@ def calculate_timestep_duration(timesteps_extra: pd.DatetimeIndex) -> xr.DataArr @staticmethod def _calculate_hours_of_previous_timesteps( - timesteps: pd.DatetimeIndex, hours_of_previous_timesteps: float | np.ndarray | None - ) -> float | np.ndarray: - """Calculate duration of regular timesteps.""" + timesteps: pd.DatetimeIndex | pd.RangeIndex, + hours_of_previous_timesteps: float | np.ndarray | None, + ) -> float | np.ndarray | None: + """Calculate duration of regular timesteps. + + For RangeIndex, returns None if not provided (must be set externally). + """ if hours_of_previous_timesteps is not None: return hours_of_previous_timesteps + if isinstance(timesteps, pd.RangeIndex): + # For RangeIndex, cannot calculate from time diffs + return None # Calculate from the first interval first_interval = timesteps[1] - timesteps[0] return first_interval.total_seconds() / 3600 # Convert to hours @@ -385,33 +419,37 @@ def calculate_weight_per_period(periods_extra: pd.Index) -> xr.DataArray: @classmethod def _compute_time_metadata( cls, - timesteps: pd.DatetimeIndex, + timesteps: pd.DatetimeIndex | pd.RangeIndex, hours_of_last_timestep: int | float | None = None, hours_of_previous_timesteps: int | float | np.ndarray | None = None, - ) -> tuple[pd.DatetimeIndex, float, float | np.ndarray, xr.DataArray]: + ) -> tuple[pd.DatetimeIndex | pd.RangeIndex, float | None, float | np.ndarray | None, xr.DataArray | None]: """ Compute all time-related metadata from timesteps. This is the single source of truth for time metadata computation, used by both __init__ and dataset operations (sel/isel/resample) to ensure consistency. + For RangeIndex (segmented systems), timestep_duration cannot be calculated from + the index and must be provided externally after FlowSystem creation. + Args: - timesteps: The time index to compute metadata from + timesteps: The time index to compute metadata from (DatetimeIndex or RangeIndex) hours_of_last_timestep: Duration of the last timestep. If None, computed from the time index. hours_of_previous_timesteps: Duration of previous timesteps. If None, computed from the time index. Can be a scalar or array. Returns: Tuple of (timesteps_extra, hours_of_last_timestep, hours_of_previous_timesteps, timestep_duration) + For RangeIndex, hours_of_last_timestep and timestep_duration may be None. """ # Create timesteps with extra step at the end timesteps_extra = cls._create_timesteps_with_extra(timesteps, hours_of_last_timestep) - # Calculate timestep duration + # Calculate timestep duration (returns None for RangeIndex) timestep_duration = cls.calculate_timestep_duration(timesteps_extra) # Extract hours_of_last_timestep if not provided - if hours_of_last_timestep is None: + if hours_of_last_timestep is None and timestep_duration is not None: hours_of_last_timestep = timestep_duration.isel(time=-1).item() # Compute hours_of_previous_timesteps (handles both None and provided cases) @@ -2043,10 +2081,19 @@ def _cluster_timesteps_per_cluster(self) -> int | None: return len(self.timesteps) if self.clusters is not None else None @property - def _cluster_time_coords(self) -> pd.DatetimeIndex | None: + def _cluster_time_coords(self) -> pd.DatetimeIndex | pd.RangeIndex | None: """Get time coordinates for clustered system (same as timesteps).""" return self.timesteps if self.clusters is not None else None + @property + def is_segmented(self) -> bool: + """Check if this FlowSystem uses segmented time (RangeIndex instead of DatetimeIndex). + + Segmented systems have variable timestep durations stored in timestep_duration, + and the time index is a RangeIndex (0, 1, ..., n_segments-1) instead of timestamps. + """ + return isinstance(self.timesteps, pd.RangeIndex) + @property def n_timesteps(self) -> int: """Number of timesteps (within each cluster if clustered).""" diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py index 7daaa406d..6c3fe0966 100644 --- a/flixopt/transform_accessor.py +++ b/flixopt/transform_accessor.py @@ -589,6 +589,12 @@ def cluster( extreme_period_method: Literal['append', 'new_cluster_center', 'replace_cluster_center'] | None = None, rescale_cluster_periods: bool = True, predef_cluster_order: xr.DataArray | np.ndarray | list[int] | None = None, + segmentation: bool = False, + n_segments: int | None = None, + segment_representation_method: Literal[ + 'meanRepresentation', 'medoidRepresentation', 'distributionAndMinMaxRepresentation' + ] + | None = None, **tsam_kwargs: Any, ) -> FlowSystem: """ @@ -632,6 +638,14 @@ def cluster( For multi-dimensional FlowSystems, use an xr.DataArray with dims ``[original_cluster, period?, scenario?]`` to specify different assignments per period/scenario combination. + segmentation: If True, apply inner-period segmentation after clustering. + This further reduces timesteps by grouping adjacent timesteps within + each typical period into variable-length segments. Default: False. + n_segments: Number of segments per cluster when segmentation is enabled. + If None, defaults to timesteps_per_cluster (no reduction within periods). + Must be <= timesteps_per_cluster. + segment_representation_method: How segment representatives are computed. + Options same as representation_method. If None, uses representation_method. **tsam_kwargs: Additional keyword arguments passed to ``tsam.TimeSeriesAggregation``. See tsam documentation for all options. @@ -716,6 +730,9 @@ def cluster( 'weightDict', 'addPeakMax', 'addPeakMin', + 'segmentation', + 'noSegments', + 'segmentRepresentationMethod', } conflicts = reserved_tsam_keys & set(tsam_kwargs.keys()) if conflicts: @@ -770,6 +787,16 @@ def cluster( clustering_weights = weights or self._calculate_clustering_weights(temporaly_changing_ds) # tsam expects 'None' as a string, not Python None tsam_extreme_method = 'None' if extreme_period_method is None else extreme_period_method + + # Build segmentation parameters + tsam_segmentation_kwargs = {} + if segmentation: + tsam_segmentation_kwargs['segmentation'] = True + if n_segments is not None: + tsam_segmentation_kwargs['noSegments'] = n_segments + if segment_representation_method is not None: + tsam_segmentation_kwargs['segmentRepresentationMethod'] = segment_representation_method + tsam_agg = tsam.TimeSeriesAggregation( df, noTypicalPeriods=n_clusters, @@ -783,6 +810,7 @@ def cluster( weightDict={name: w for name, w in clustering_weights.items() if name in df.columns}, addPeakMax=time_series_for_high_peaks or [], addPeakMin=time_series_for_low_peaks or [], + **tsam_segmentation_kwargs, **tsam_kwargs, ) # Suppress tsam warning about minimal value constraints (informational, not actionable) @@ -800,6 +828,26 @@ def cluster( logger.warning(f'Failed to compute clustering metrics for {key}: {e}') clustering_metrics_all[key] = pd.DataFrame() + # Collect segment information if segmentation is enabled + # Convert TSAM's segmentDurationDict format: {'Segment Duration': {(cluster, segment): duration}} + # to our format: {cluster_id: [dur1, dur2, ...]} + segment_durations_all: dict[tuple, dict[int, list[int]]] = {} + if segmentation: + for key, tsam_agg in tsam_results.items(): + raw_dict = tsam_agg.segmentDurationDict + # Extract the nested dict with (cluster, segment) -> duration mapping + segment_dur_dict = raw_dict.get('Segment Duration', {}) + # Convert to {cluster_id: [dur1, dur2, ...]} format + converted: dict[int, list[int]] = {} + for (cluster_id, segment_id), duration in segment_dur_dict.items(): + if cluster_id not in converted: + converted[cluster_id] = [] + # Ensure segments are in order + while len(converted[cluster_id]) <= segment_id: + converted[cluster_id].append(0) + converted[cluster_id][segment_id] = duration + segment_durations_all[key] = converted + # Use first result for structure first_key = (periods[0], scenarios[0]) first_tsam = tsam_results[first_key] @@ -862,13 +910,23 @@ def cluster( # ═══════════════════════════════════════════════════════════════════════ # Create coordinates for the 2D cluster structure cluster_coords = np.arange(actual_n_clusters) - # Use DatetimeIndex for time within cluster (e.g., 00:00-23:00 for daily clustering) - time_coords = pd.date_range( - start='2000-01-01', - periods=timesteps_per_cluster, - freq=pd.Timedelta(hours=dt), - name='time', - ) + + # Determine time coordinates based on segmentation + if segmentation: + # For segmented systems: use RangeIndex, extract segment info from TSAM + first_segment_durations = segment_durations_all[first_key] + n_segments_actual = len(first_segment_durations[0]) # Segments per cluster + time_coords = pd.RangeIndex(n_segments_actual, name='time') + n_time_points = n_segments_actual + else: + # Non-segmented: use DatetimeIndex for time within cluster (e.g., 00:00-23:00 for daily clustering) + time_coords = pd.date_range( + start='2000-01-01', + periods=timesteps_per_cluster, + freq=pd.Timedelta(hours=dt), + name='time', + ) + n_time_points = timesteps_per_cluster # Create cluster_weight: shape (cluster,) - one weight per cluster # This is the number of original periods each cluster represents @@ -883,24 +941,46 @@ def _build_cluster_weight_for_key(key: tuple) -> xr.DataArray: weight_slices, ['cluster'], periods, scenarios, 'cluster_weight' ) - logger.info( - f'Reduced from {len(self._fs.timesteps)} to {actual_n_clusters} clusters × {timesteps_per_cluster} timesteps' - ) + if segmentation: + logger.info( + f'Reduced from {len(self._fs.timesteps)} to {actual_n_clusters} clusters × {n_segments_actual} segments' + ) + else: + logger.info( + f'Reduced from {len(self._fs.timesteps)} to {actual_n_clusters} clusters × {timesteps_per_cluster} timesteps' + ) logger.info(f'Clusters: {actual_n_clusters} (requested: {n_clusters})') # Build typical periods DataArrays with (cluster, time) shape typical_das: dict[str, dict[tuple, xr.DataArray]] = {} for key, tsam_agg in tsam_results.items(): - typical_df = tsam_agg.typicalPeriods - for col in typical_df.columns: - # Reshape flat data to (cluster, time) - flat_data = typical_df[col].values - reshaped = flat_data.reshape(actual_n_clusters, timesteps_per_cluster) - typical_das.setdefault(col, {})[key] = xr.DataArray( - reshaped, - dims=['cluster', 'time'], - coords={'cluster': cluster_coords, 'time': time_coords}, - ) + if segmentation: + # For segmented data, extract from segmentedNormalizedTypicalPeriods + # This has a MultiIndex: (period, segment_step, segment_duration, original_start_step) + segmented_df = tsam_agg.segmentedNormalizedTypicalPeriods + for col in segmented_df.columns: + # Group by period (cluster) and extract segment values + data = np.zeros((actual_n_clusters, n_segments_actual)) + for cluster_id in range(actual_n_clusters): + cluster_data = segmented_df.loc[cluster_id, col] + data[cluster_id, :] = cluster_data.values[:n_segments_actual] + typical_das.setdefault(col, {})[key] = xr.DataArray( + data, + dims=['cluster', 'time'], + coords={'cluster': cluster_coords, 'time': time_coords}, + ) + else: + # Non-segmented: use typicalPeriods + typical_df = tsam_agg.typicalPeriods + for col in typical_df.columns: + # Reshape flat data to (cluster, time) + flat_data = typical_df[col].values + reshaped = flat_data.reshape(actual_n_clusters, timesteps_per_cluster) + typical_das.setdefault(col, {})[key] = xr.DataArray( + reshaped, + dims=['cluster', 'time'], + coords={'cluster': cluster_coords, 'time': time_coords}, + ) # Build reduced dataset with (cluster, time) dimensions all_keys = {(p, s) for p in periods for s in scenarios} @@ -910,12 +990,13 @@ def _build_cluster_weight_for_key(key: tuple) -> xr.DataArray: ds_new_vars[name] = original_da.copy() elif name not in typical_das or set(typical_das[name].keys()) != all_keys: # Time-dependent but constant: reshape to (cluster, time, ...) - sliced = original_da.isel(time=slice(0, n_reduced_timesteps)) + n_total_reduced = actual_n_clusters * n_time_points + sliced = original_da.isel(time=slice(0, n_total_reduced)) # Get the shape - time is first, other dims follow other_dims = [d for d in sliced.dims if d != 'time'] other_shape = [sliced.sizes[d] for d in other_dims] - # Reshape: (n_reduced_timesteps, ...) -> (n_clusters, timesteps_per_cluster, ...) - new_shape = [actual_n_clusters, timesteps_per_cluster] + other_shape + # Reshape: (n_reduced_timesteps, ...) -> (n_clusters, n_time_points, ...) + new_shape = [actual_n_clusters, n_time_points] + other_shape reshaped = sliced.values.reshape(new_shape) # Build coords new_coords = {'cluster': cluster_coords, 'time': time_coords} @@ -949,6 +1030,32 @@ def _build_cluster_weight_for_key(key: tuple) -> xr.DataArray: # Set cluster_weight - shape (cluster,) possibly with period/scenario dimensions reduced_fs.cluster_weight = cluster_weight + # For segmented systems, set timestep_duration with variable segment durations + if segmentation: + # Build timestep_duration DataArray with shape (cluster, time) + # Each segment has a different duration (in hours) + def _build_segment_duration_for_key(key: tuple) -> xr.DataArray: + seg_durations = segment_durations_all[key] + # seg_durations is {cluster_id: [dur1, dur2, ...]} in original timesteps + data = np.array( + [ + [dur * dt for dur in seg_durations[c]] # Convert timestep counts to hours + for c in range(actual_n_clusters) + ] + ) + return xr.DataArray( + data, + dims=['cluster', 'time'], + coords={'cluster': cluster_coords, 'time': time_coords}, + name='timestep_duration', + ) + + duration_slices = {key: _build_segment_duration_for_key(key) for key in segment_durations_all} + timestep_duration = self._combine_slices_to_dataarray_generic( + duration_slices, ['cluster', 'time'], periods, scenarios, 'timestep_duration' + ) + reduced_fs.timestep_duration = timestep_duration + # Remove 'equals_final' from storages - doesn't make sense on reduced timesteps # Set to None so initial SOC is free (handled by storage_mode constraints) for storage in reduced_fs.storages.values(): @@ -965,12 +1072,33 @@ def _build_cluster_weight_for_key(key: tuple) -> xr.DataArray: def _build_timestep_mapping_for_key(key: tuple) -> np.ndarray: """Build timestep_mapping for a single (period, scenario) slice.""" mapping = np.zeros(n_original_timesteps, dtype=np.int32) - for period_idx, cluster_id in enumerate(cluster_orders[key]): - for pos in range(timesteps_per_cluster): - original_idx = period_idx * timesteps_per_cluster + pos - if original_idx < n_original_timesteps: - representative_idx = cluster_id * timesteps_per_cluster + pos - mapping[original_idx] = representative_idx + + if segmentation: + # For segmented systems, map original timesteps to (cluster, segment) pairs + seg_durations = segment_durations_all[key] + for period_idx, cluster_id in enumerate(cluster_orders[key]): + # Get segment boundaries for this cluster + cluster_seg_durations = seg_durations[cluster_id] + segment_boundaries = np.cumsum([0] + list(cluster_seg_durations)) + + for pos in range(timesteps_per_cluster): + original_idx = period_idx * timesteps_per_cluster + pos + if original_idx < n_original_timesteps: + # Find which segment this timestep belongs to + segment_id = np.searchsorted(segment_boundaries[1:], pos, side='right') + segment_id = min(segment_id, len(cluster_seg_durations) - 1) + # Map to (cluster * n_segments + segment) + representative_idx = cluster_id * n_segments_actual + segment_id + mapping[original_idx] = representative_idx + else: + # Non-segmented: map to (cluster * timesteps_per_cluster + pos) + for period_idx, cluster_id in enumerate(cluster_orders[key]): + for pos in range(timesteps_per_cluster): + original_idx = period_idx * timesteps_per_cluster + pos + if original_idx < n_original_timesteps: + representative_idx = cluster_id * timesteps_per_cluster + pos + mapping[original_idx] = representative_idx + return mapping def _build_cluster_occurrences_for_key(key: tuple) -> np.ndarray: @@ -1030,11 +1158,34 @@ def _build_cluster_occurrences_for_key(key: tuple) -> np.ndarray: _build_cluster_occurrences_for_key(first_key), dims=['cluster'], name='cluster_occurrences' ) + # Build segment_timestep_counts if segmentation is enabled + segment_timestep_counts_da = None + if segmentation: + + def _build_segment_timestep_counts_for_key(key: tuple) -> xr.DataArray: + seg_durations = segment_durations_all[key] + # seg_durations is {cluster_id: [dur1, dur2, ...]} in original timesteps + data = np.array([seg_durations[c] for c in range(actual_n_clusters)]) + return xr.DataArray( + data, + dims=['cluster', 'segment'], + coords={'cluster': cluster_coords, 'segment': np.arange(n_segments_actual)}, + name='segment_timestep_counts', + ) + + counts_slices = {key: _build_segment_timestep_counts_for_key(key) for key in segment_durations_all} + segment_timestep_counts_da = self._combine_slices_to_dataarray_generic( + counts_slices, ['cluster', 'segment'], periods, scenarios, 'segment_timestep_counts' + ) + cluster_structure = ClusterStructure( cluster_order=cluster_order_da, cluster_occurrences=cluster_occurrences_da, n_clusters=actual_n_clusters, timesteps_per_cluster=timesteps_per_cluster, + is_segmented=segmentation, + n_segments=n_segments_actual if segmentation else None, + segment_timestep_counts=segment_timestep_counts_da, ) # Create representative_weights with (cluster,) dimension only @@ -1050,9 +1201,15 @@ def _build_cluster_weights_for_key(key: tuple) -> xr.DataArray: weights_slices, ['cluster'], periods, scenarios, 'representative_weights' ) + # Calculate n_representatives based on segmentation + if segmentation: + n_representatives = actual_n_clusters * n_segments_actual + else: + n_representatives = n_reduced_timesteps + aggregation_result = ClusterResult( timestep_mapping=timestep_mapping_da, - n_representatives=n_reduced_timesteps, + n_representatives=n_representatives, representative_weights=representative_weights, cluster_structure=cluster_structure, original_data=ds, @@ -1508,10 +1665,14 @@ def expand_da(da: xr.DataArray, var_name: str = '') -> xr.DataArray: n_combinations = (len(self._fs.periods) if has_periods else 1) * ( len(self._fs.scenarios) if has_scenarios else 1 ) - n_reduced_timesteps = n_clusters * timesteps_per_cluster + # For segmented systems, reduced timesteps = n_clusters * n_segments + is_segmented = cluster_structure.is_segmented + time_dim_size = cluster_structure.n_segments if is_segmented else timesteps_per_cluster + n_reduced_timesteps = n_clusters * time_dim_size + segmentation_info = f', {cluster_structure.n_segments} segments' if is_segmented else '' logger.info( f'Expanded FlowSystem from {n_reduced_timesteps} to {n_original_timesteps} timesteps ' - f'({n_clusters} clusters' + f'({n_clusters} clusters{segmentation_info}' + ( f', {n_combinations} period/scenario combinations)' if n_combinations > 1 diff --git a/tests/test_cluster_reduce_expand.py b/tests/test_cluster_reduce_expand.py index b64c71a92..4daae8269 100644 --- a/tests/test_cluster_reduce_expand.py +++ b/tests/test_cluster_reduce_expand.py @@ -833,3 +833,208 @@ def test_clustering_without_peaks_may_miss_extremes(self, solver_fixture, timest # This test just verifies the clustering works # The peak may or may not be captured depending on clustering algorithm assert fs_no_peaks.solution is not None + + +# ==================== Segmentation Tests ==================== + + +class TestSegmentation: + """Tests for inner-period segmentation within clustering.""" + + def test_segmentation_creates_range_index_timesteps(self, timesteps_8_days): + """Test that segmentation creates RangeIndex timesteps.""" + fs = create_simple_system(timesteps_8_days) + + # Cluster with segmentation + fs_segmented = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + segmentation=True, + n_segments=6, + ) + + # Segmented FlowSystem should have RangeIndex timesteps + assert isinstance(fs_segmented.timesteps, pd.RangeIndex) + assert len(fs_segmented.timesteps) == 6 # n_segments + assert len(fs_segmented.clusters) == 2 # n_clusters + + def test_segmented_system_has_correct_structure(self, timesteps_8_days): + """Test that segmented FlowSystem has correct ClusterStructure fields.""" + fs = create_simple_system(timesteps_8_days) + + fs_segmented = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + segmentation=True, + n_segments=4, + ) + + # Check clustering info + info = fs_segmented.clustering + assert info is not None + cluster_structure = info.result.cluster_structure + assert cluster_structure is not None + + # Segmentation fields + assert cluster_structure.is_segmented is True + assert cluster_structure.n_segments == 4 + assert cluster_structure.segment_timestep_counts is not None + + # segment_timestep_counts should map [cluster, segment] -> original timesteps per segment + counts = cluster_structure.segment_timestep_counts + assert 'cluster' in counts.dims + assert 'segment' in counts.dims # Note: uses 'segment' dim, not 'time' + # Total of counts per cluster should equal timesteps_per_cluster (24) + for c in range(2): + cluster_sum = int(counts.sel(cluster=c).sum().values) + assert cluster_sum == 24, f'Cluster {c} segment counts sum to {cluster_sum}, expected 24' + + def test_segmented_system_has_variable_timestep_duration(self, timesteps_8_days): + """Test that segmented FlowSystem has variable timestep_duration.""" + fs = create_simple_system(timesteps_8_days) + + fs_segmented = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + segmentation=True, + n_segments=6, + ) + + # timestep_duration should be 2D: [cluster, time] + duration = fs_segmented.timestep_duration + assert 'cluster' in duration.dims + assert 'time' in duration.dims + + # Each cluster's durations should sum to 24 hours + for c in range(2): + cluster_duration_sum = float(duration.sel(cluster=c).sum().values) + assert_allclose(cluster_duration_sum, 24.0, rtol=1e-6) + + def test_segmented_system_is_segmented_property(self, timesteps_8_days): + """Test the is_segmented property on FlowSystem.""" + fs = create_simple_system(timesteps_8_days) + + # Regular clustering + fs_clustered = fs.transform.cluster(n_clusters=2, cluster_duration='1D') + assert fs_clustered.is_segmented is False + + # With segmentation + fs_segmented = fs.transform.cluster(n_clusters=2, cluster_duration='1D', segmentation=True, n_segments=6) + assert fs_segmented.is_segmented is True + + def test_segmented_system_optimize(self, solver_fixture, timesteps_8_days): + """Test that segmented FlowSystem can be optimized.""" + fs = create_simple_system(timesteps_8_days) + + fs_segmented = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + segmentation=True, + n_segments=6, + ) + + # Should optimize without errors + fs_segmented.optimize(solver_fixture) + assert fs_segmented.solution is not None + + # Solution should have correct dimensions + flow_var = 'Boiler(Q_th)|flow_rate' + assert flow_var in fs_segmented.solution + flow = fs_segmented.solution[flow_var] + assert 'cluster' in flow.dims + assert 'time' in flow.dims + # time dimension = n_segments + 1 (extra timestep) + assert flow.sizes['time'] == 7 # 6 segments + 1 extra + + def test_segmented_expand_solution_restores_full_timesteps(self, solver_fixture, timesteps_8_days): + """Test that expand_solution works for segmented systems.""" + fs = create_simple_system(timesteps_8_days) + + fs_segmented = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + segmentation=True, + n_segments=6, + ) + fs_segmented.optimize(solver_fixture) + + # Expand back to full + fs_expanded = fs_segmented.transform.expand_solution() + + # Should have original timesteps (DatetimeIndex) + assert isinstance(fs_expanded.timesteps, pd.DatetimeIndex) + assert len(fs_expanded.timesteps) == 192 # Original 8 days * 24h + assert fs_expanded.clusters is None # Expanded FlowSystem has no cluster dimension + assert fs_expanded.solution is not None + + def test_segmented_expanded_statistics_match(self, solver_fixture, timesteps_8_days): + """Test that expanded statistics match clustered statistics.""" + fs = create_simple_system(timesteps_8_days) + + fs_segmented = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + segmentation=True, + n_segments=6, + ) + fs_segmented.optimize(solver_fixture) + + # Get weighted statistics from clustered system + # Note: statistics.flow_hours doesn't include cluster_weight, so multiply manually + reduced_fh = fs_segmented.statistics.flow_hours['Boiler(Q_th)'] * fs_segmented.cluster_weight + reduced_flow_hours = reduced_fh.sum().item() + + # Expand and get statistics (no cluster_weight needed for expanded FlowSystem) + fs_expanded = fs_segmented.transform.expand_solution() + expanded_flow_hours = fs_expanded.statistics.flow_hours['Boiler(Q_th)'].sum().item() + + # Flow hours should match + assert_allclose(reduced_flow_hours, expanded_flow_hours, rtol=1e-6) + + +class TestSegmentationWithStorage: + """Tests for segmentation combined with intercluster storage.""" + + def test_segmented_storage_intercluster_cyclic(self, solver_fixture, timesteps_8_days): + """Test segmentation with intercluster_cyclic storage mode.""" + fs = create_system_with_storage(timesteps_8_days, cluster_mode='intercluster_cyclic') + + fs_segmented = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + segmentation=True, + n_segments=6, + ) + fs_segmented.optimize(solver_fixture) + + # Should have charge_state and SOC_boundary in solution + assert 'Battery|charge_state' in fs_segmented.solution + assert 'Battery|SOC_boundary' in fs_segmented.solution + + # Verify solution is valid + assert fs_segmented.solution is not None + + def test_segmented_storage_expand_solution(self, solver_fixture, timesteps_8_days): + """Test that expand_solution works for segmented storage systems.""" + fs = create_system_with_storage(timesteps_8_days, cluster_mode='intercluster_cyclic') + + fs_segmented = fs.transform.cluster( + n_clusters=2, + cluster_duration='1D', + segmentation=True, + n_segments=6, + ) + fs_segmented.optimize(solver_fixture) + + # Expand + fs_expanded = fs_segmented.transform.expand_solution() + + # Should have original timesteps + assert len(fs_expanded.timesteps) == 192 + + # Expanded charge_state should be non-negative (absolute SOC) + cs = fs_expanded.solution['Battery|charge_state'] + assert (cs >= -0.01).all(), f'Negative charge_state found: min={float(cs.min())}' + + # SOC_boundary should be removed after expansion + assert 'Battery|SOC_boundary' not in fs_expanded.solution From f6c31bfca9e1f873ecc134e6fbc49af83b7ad856 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 12:10:40 +0100 Subject: [PATCH 02/15] Added to CHANGELOG.md --- CHANGELOG.md | 68 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c774bfbc..c3cba599c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -51,6 +51,74 @@ If upgrading from v2.x, see the [v3.0.0 release notes](https://github.com/flixOp Until here --> +## [6.1.0] - Upcoming + +**Summary**: Adds inner-period segmentation support to time-series clustering, enabling further reduction of problem size by grouping adjacent timesteps within each typical period into variable-length segments. + +### ✨ Added + +#### Inner-Period Segmentation for Clustering + +Segmentation divides each typical period (cluster) into variable-length segments, dramatically reducing problem size while preserving key features of the time series. + +```python +# Without segmentation: 8760h → 8 clusters × 24h = 192 timesteps +# With segmentation: 8760h → 8 clusters × 6 segments = 48 timesteps + +fs_segmented = flow_system.transform.cluster( + n_clusters=8, + cluster_duration='1D', + segmentation=True, # Enable inner-period segmentation + n_segments=6, # Segments per cluster +) +fs_segmented.optimize(solver) +fs_expanded = fs_segmented.transform.expand_solution() +``` + +**New Parameters**: + +| Parameter | Description | +|-----------|-------------| +| `segmentation` | Enable inner-period segmentation (default: `False`) | +| `n_segments` | Number of segments per cluster (required when `segmentation=True`) | +| `segment_representation_method` | How to represent segment values: `'meanRepresentation'` (default), `'medoidRepresentation'`, etc. | + +**Key Features**: + +- **Variable segment durations**: Each segment can have different duration (in hours), automatically determined by tsam based on time series characteristics +- **Full storage integration**: Works with all storage `cluster_mode` options including `'intercluster_cyclic'` +- **Solution expansion**: `expand_solution()` correctly maps segmented results back to original timesteps +- **RangeIndex timesteps**: Segmented FlowSystems use `RangeIndex` instead of `DatetimeIndex` for the time dimension +- **`is_segmented` property**: Check if a FlowSystem uses segmentation via `flow_system.is_segmented` + +**Example with Storage**: + +```python +storage = fx.Storage( + 'Battery', + capacity_in_flow_hours=100, + cluster_mode='intercluster_cyclic', + ... +) + +# Cluster with segmentation - extreme reduction +fs_segmented = flow_system.transform.cluster( + n_clusters=12, + cluster_duration='1D', + segmentation=True, + n_segments=4, # 12 clusters × 4 segments = 48 timesteps (vs 12 × 24 = 288) +) +fs_segmented.optimize(solver) + +# Expand back to full resolution +fs_expanded = fs_segmented.transform.expand_solution() +``` + +!!! tip "When to Use Segmentation" + Segmentation is most beneficial for large-scale optimization problems where the additional reduction from 24 timesteps per cluster to ~4-8 segments significantly improves solve time. For problems that already solve quickly, standard clustering without segmentation may be sufficient. + +--- + ## [6.0.0] - Upcoming **Summary**: Major release featuring a complete rewrite of the clustering/aggregation system with tsam integration, new `fxplot` plotting accessor, FlowSystem comparison tools, and removal of deprecated v5.0 classes. From 3e32ec4911b265fa2a6a8cfda5305d6e21943ddb Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 12:17:14 +0100 Subject: [PATCH 03/15] Remove segmentation=True flag --- CHANGELOG.md | 6 ++---- flixopt/transform_accessor.py | 17 ++++++++++------- tests/test_cluster_reduce_expand.py | 10 +--------- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c3cba599c..ad038c891 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -68,8 +68,7 @@ Segmentation divides each typical period (cluster) into variable-length segments fs_segmented = flow_system.transform.cluster( n_clusters=8, cluster_duration='1D', - segmentation=True, # Enable inner-period segmentation - n_segments=6, # Segments per cluster + n_segments=6, # Enables segmentation with 6 segments per cluster ) fs_segmented.optimize(solver) fs_expanded = fs_segmented.transform.expand_solution() @@ -79,8 +78,7 @@ fs_expanded = fs_segmented.transform.expand_solution() | Parameter | Description | |-----------|-------------| -| `segmentation` | Enable inner-period segmentation (default: `False`) | -| `n_segments` | Number of segments per cluster (required when `segmentation=True`) | +| `n_segments` | Number of segments per cluster. If provided, enables inner-period segmentation. | | `segment_representation_method` | How to represent segment values: `'meanRepresentation'` (default), `'medoidRepresentation'`, etc. | **Key Features**: diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py index 6c3fe0966..2bbd79537 100644 --- a/flixopt/transform_accessor.py +++ b/flixopt/transform_accessor.py @@ -589,7 +589,6 @@ def cluster( extreme_period_method: Literal['append', 'new_cluster_center', 'replace_cluster_center'] | None = None, rescale_cluster_periods: bool = True, predef_cluster_order: xr.DataArray | np.ndarray | list[int] | None = None, - segmentation: bool = False, n_segments: int | None = None, segment_representation_method: Literal[ 'meanRepresentation', 'medoidRepresentation', 'distributionAndMinMaxRepresentation' @@ -638,14 +637,15 @@ def cluster( For multi-dimensional FlowSystems, use an xr.DataArray with dims ``[original_cluster, period?, scenario?]`` to specify different assignments per period/scenario combination. - segmentation: If True, apply inner-period segmentation after clustering. - This further reduces timesteps by grouping adjacent timesteps within - each typical period into variable-length segments. Default: False. - n_segments: Number of segments per cluster when segmentation is enabled. - If None, defaults to timesteps_per_cluster (no reduction within periods). - Must be <= timesteps_per_cluster. + n_segments: Number of segments per cluster for inner-period segmentation. + If provided, adjacent timesteps within each typical period are grouped + into variable-length segments, further reducing problem size. + E.g., with ``n_clusters=8, cluster_duration='1D', n_segments=6``: + 8 clusters × 6 segments = 48 timesteps (vs 8 × 24 = 192 without). + If None (default), no segmentation is applied. segment_representation_method: How segment representatives are computed. Options same as representation_method. If None, uses representation_method. + Only used when n_segments is provided. **tsam_kwargs: Additional keyword arguments passed to ``tsam.TimeSeriesAggregation``. See tsam documentation for all options. @@ -690,6 +690,9 @@ def cluster( from .core import TimeSeriesData, drop_constant_arrays from .flow_system import FlowSystem + # Enable segmentation if n_segments is provided + segmentation = n_segments is not None + # Parse cluster_duration to hours hours_per_cluster = ( pd.Timedelta(cluster_duration).total_seconds() / 3600 diff --git a/tests/test_cluster_reduce_expand.py b/tests/test_cluster_reduce_expand.py index 4daae8269..a25b5a3da 100644 --- a/tests/test_cluster_reduce_expand.py +++ b/tests/test_cluster_reduce_expand.py @@ -849,7 +849,6 @@ def test_segmentation_creates_range_index_timesteps(self, timesteps_8_days): fs_segmented = fs.transform.cluster( n_clusters=2, cluster_duration='1D', - segmentation=True, n_segments=6, ) @@ -865,7 +864,6 @@ def test_segmented_system_has_correct_structure(self, timesteps_8_days): fs_segmented = fs.transform.cluster( n_clusters=2, cluster_duration='1D', - segmentation=True, n_segments=4, ) @@ -896,7 +894,6 @@ def test_segmented_system_has_variable_timestep_duration(self, timesteps_8_days) fs_segmented = fs.transform.cluster( n_clusters=2, cluster_duration='1D', - segmentation=True, n_segments=6, ) @@ -919,7 +916,7 @@ def test_segmented_system_is_segmented_property(self, timesteps_8_days): assert fs_clustered.is_segmented is False # With segmentation - fs_segmented = fs.transform.cluster(n_clusters=2, cluster_duration='1D', segmentation=True, n_segments=6) + fs_segmented = fs.transform.cluster(n_clusters=2, cluster_duration='1D', n_segments=6) assert fs_segmented.is_segmented is True def test_segmented_system_optimize(self, solver_fixture, timesteps_8_days): @@ -929,7 +926,6 @@ def test_segmented_system_optimize(self, solver_fixture, timesteps_8_days): fs_segmented = fs.transform.cluster( n_clusters=2, cluster_duration='1D', - segmentation=True, n_segments=6, ) @@ -953,7 +949,6 @@ def test_segmented_expand_solution_restores_full_timesteps(self, solver_fixture, fs_segmented = fs.transform.cluster( n_clusters=2, cluster_duration='1D', - segmentation=True, n_segments=6, ) fs_segmented.optimize(solver_fixture) @@ -974,7 +969,6 @@ def test_segmented_expanded_statistics_match(self, solver_fixture, timesteps_8_d fs_segmented = fs.transform.cluster( n_clusters=2, cluster_duration='1D', - segmentation=True, n_segments=6, ) fs_segmented.optimize(solver_fixture) @@ -1002,7 +996,6 @@ def test_segmented_storage_intercluster_cyclic(self, solver_fixture, timesteps_8 fs_segmented = fs.transform.cluster( n_clusters=2, cluster_duration='1D', - segmentation=True, n_segments=6, ) fs_segmented.optimize(solver_fixture) @@ -1021,7 +1014,6 @@ def test_segmented_storage_expand_solution(self, solver_fixture, timesteps_8_day fs_segmented = fs.transform.cluster( n_clusters=2, cluster_duration='1D', - segmentation=True, n_segments=6, ) fs_segmented.optimize(solver_fixture) From baa5123735124065c7adcb7d8c641835ca3d1027 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 12:26:02 +0100 Subject: [PATCH 04/15] Fix IO with Segments --- flixopt/flow_system.py | 8 ++++- tests/test_clustering_io.py | 64 +++++++++++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 1 deletion(-) diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index d54a1b38c..c93bd23fa 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -783,9 +783,15 @@ def from_dataset(cls, ds: xr.Dataset) -> FlowSystem: if ds.indexes.get('scenario') is not None and 'scenario_weights' in reference_structure: scenario_weights = cls._resolve_dataarray_reference(reference_structure['scenario_weights'], arrays_dict) + # Get timesteps - convert integer index to RangeIndex for segmented systems + time_index = ds.indexes['time'] + if not isinstance(time_index, pd.DatetimeIndex): + # Segmented systems use RangeIndex (stored as integer array) + time_index = pd.RangeIndex(len(time_index), name='time') + # Create FlowSystem instance with constructor parameters flow_system = cls( - timesteps=ds.indexes['time'], + timesteps=time_index, periods=ds.indexes.get('period'), scenarios=ds.indexes.get('scenario'), clusters=clusters, diff --git a/tests/test_clustering_io.py b/tests/test_clustering_io.py index ae0fff2bb..b9f4fb956 100644 --- a/tests/test_clustering_io.py +++ b/tests/test_clustering_io.py @@ -534,3 +534,67 @@ def test_clustering_preserves_component_labels(self, simple_system_8_days, solve # Component labels should be preserved assert 'demand' in fs_expanded.components assert 'source' in fs_expanded.components + + +class TestSegmentationIO: + """Tests for segmentation serialization and deserialization.""" + + def test_segmentation_netcdf_roundtrip(self, simple_system_8_days, solver_fixture, tmp_path): + """Test that segmented FlowSystem can be saved and loaded from netCDF.""" + fs = simple_system_8_days + fs_segmented = fs.transform.cluster(n_clusters=2, cluster_duration='1D', n_segments=6) + fs_segmented.optimize(solver_fixture) + + # Save to netCDF + path = tmp_path / 'segmented.nc' + fs_segmented.to_netcdf(path) + + # Load back + fs_loaded = fx.FlowSystem.from_netcdf(path) + + # Verify segmentation is preserved + assert fs_loaded.is_segmented is True + assert isinstance(fs_loaded.timesteps, pd.RangeIndex) + assert len(fs_loaded.timesteps) == 6 # n_segments + assert fs_loaded.clustering is not None + assert fs_loaded.clustering.result.cluster_structure.is_segmented is True + assert fs_loaded.clustering.result.cluster_structure.n_segments == 6 + assert fs_loaded.clustering.result.cluster_structure.segment_timestep_counts is not None + + def test_segmentation_expand_after_roundtrip(self, simple_system_8_days, solver_fixture, tmp_path): + """Test that expand_solution works after netCDF roundtrip for segmented systems.""" + fs = simple_system_8_days + fs_segmented = fs.transform.cluster(n_clusters=2, cluster_duration='1D', n_segments=6) + fs_segmented.optimize(solver_fixture) + + # Save and load + path = tmp_path / 'segmented.nc' + fs_segmented.to_netcdf(path) + fs_loaded = fx.FlowSystem.from_netcdf(path) + + # Expand solution + fs_expanded = fs_loaded.transform.expand_solution() + + # Verify expansion + assert isinstance(fs_expanded.timesteps, pd.DatetimeIndex) + assert len(fs_expanded.timesteps) == 8 * 24 # Original timesteps + assert fs_expanded.solution is not None + + def test_segmentation_dataset_roundtrip(self, simple_system_8_days, solver_fixture): + """Test that segmented FlowSystem can roundtrip through Dataset.""" + fs = simple_system_8_days + fs_segmented = fs.transform.cluster(n_clusters=2, cluster_duration='1D', n_segments=4) + fs_segmented.optimize(solver_fixture) + + # To dataset and back + ds = fs_segmented.to_dataset(include_solution=True) + fs_restored = fx.FlowSystem.from_dataset(ds) + + # Verify + assert fs_restored.is_segmented is True + assert fs_restored.clustering.result.cluster_structure.n_segments == 4 + segment_counts = fs_restored.clustering.result.cluster_structure.segment_timestep_counts + assert segment_counts is not None + # Sum of segment counts per cluster should equal 24 (timesteps per cluster) + for c in range(2): + assert int(segment_counts.sel(cluster=c).sum().values) == 24 From 7c5ae40065655da92738df29a66dc2f5dcf08078 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 12:48:31 +0100 Subject: [PATCH 05/15] 1. Added timestep_duration resolution in from_dataset() (flow_system.py:791-798): - Check if timestep_duration is in the reference structure - Only resolve as DataArray reference if it's a string starting with ":::" - For non-segmented systems (where it's stored as a simple list), skip resolution and let the constructor calculate it 2. Pass timestep_duration to the constructor (flow_system.py:820): - Added timestep_duration=timestep_duration parameter to the cls() constructor call --- flixopt/flow_system.py | 25 +++++++++++++++++----- tests/test_clustering_io.py | 42 +++++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 5 deletions(-) diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index c93bd23fa..c35cf05ef 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -185,6 +185,7 @@ def __init__( scenario_independent_sizes: bool | list[str] = True, scenario_independent_flow_rates: bool | list[str] = False, name: str | None = None, + timestep_duration: xr.DataArray | None = None, ): self.timesteps = self._validate_timesteps(timesteps) @@ -193,17 +194,21 @@ def __init__( self.timesteps_extra, self.hours_of_last_timestep, self.hours_of_previous_timesteps, - timestep_duration, + computed_timestep_duration, ) = self._compute_time_metadata(self.timesteps, hours_of_last_timestep, hours_of_previous_timesteps) self.periods = None if periods is None else self._validate_periods(periods) self.scenarios = None if scenarios is None else self._validate_scenarios(scenarios) self.clusters = clusters # Cluster dimension for clustered FlowSystems - # For RangeIndex (segmented systems), timestep_duration is None and must be set externally - self.timestep_duration = ( - self.fit_to_model_coords('timestep_duration', timestep_duration) if timestep_duration is not None else None - ) + # Use provided timestep_duration if given (for segmented systems), otherwise use computed value + # For RangeIndex (segmented systems), computed_timestep_duration is None + if timestep_duration is not None: + self.timestep_duration = timestep_duration + elif computed_timestep_duration is not None: + self.timestep_duration = self.fit_to_model_coords('timestep_duration', computed_timestep_duration) + else: + self.timestep_duration = None # Cluster weight for cluster() optimization (default 1.0) # Represents how many original timesteps each cluster represents @@ -783,6 +788,15 @@ def from_dataset(cls, ds: xr.Dataset) -> FlowSystem: if ds.indexes.get('scenario') is not None and 'scenario_weights' in reference_structure: scenario_weights = cls._resolve_dataarray_reference(reference_structure['scenario_weights'], arrays_dict) + # Resolve timestep_duration if present as DataArray reference (for segmented systems with variable durations) + timestep_duration = None + if 'timestep_duration' in reference_structure: + ref_value = reference_structure['timestep_duration'] + # Only resolve if it's a DataArray reference (starts with ":::") + # For non-segmented systems, it may be stored as a simple list/scalar + if isinstance(ref_value, str) and ref_value.startswith(':::'): + timestep_duration = cls._resolve_dataarray_reference(ref_value, arrays_dict) + # Get timesteps - convert integer index to RangeIndex for segmented systems time_index = ds.indexes['time'] if not isinstance(time_index, pd.DatetimeIndex): @@ -803,6 +817,7 @@ def from_dataset(cls, ds: xr.Dataset) -> FlowSystem: scenario_independent_sizes=reference_structure.get('scenario_independent_sizes', True), scenario_independent_flow_rates=reference_structure.get('scenario_independent_flow_rates', False), name=reference_structure.get('name'), + timestep_duration=timestep_duration, ) # Restore components diff --git a/tests/test_clustering_io.py b/tests/test_clustering_io.py index b9f4fb956..b83aead25 100644 --- a/tests/test_clustering_io.py +++ b/tests/test_clustering_io.py @@ -598,3 +598,45 @@ def test_segmentation_dataset_roundtrip(self, simple_system_8_days, solver_fixtu # Sum of segment counts per cluster should equal 24 (timesteps per cluster) for c in range(2): assert int(segment_counts.sel(cluster=c).sum().values) == 24 + + def test_segmentation_with_periods_scenarios_roundtrip(self, solver_fixture, tmp_path): + """Test segmentation with periods and scenarios survives IO roundtrip.""" + # Create system with periods and scenarios + timesteps = pd.date_range('2023-01-01', periods=8 * 24, freq='h') + periods = pd.Index([2020, 2021], name='period') + scenarios = pd.Index(['low', 'high'], name='scenario') + demand = np.sin(np.linspace(0, 4 * np.pi, 8 * 24)) * 10 + 15 + + fs = fx.FlowSystem(timesteps, periods=periods, scenarios=scenarios) + fs.add_elements( + fx.Bus('heat'), + fx.Effect('costs', unit='EUR', is_objective=True, is_standard=True), + fx.Sink('demand', inputs=[fx.Flow('in', bus='heat', fixed_relative_profile=demand, size=10)]), + fx.Source('source', outputs=[fx.Flow('out', bus='heat', size=50, effects_per_flow_hour={'costs': 0.05})]), + ) + + # Cluster with segmentation + fs_segmented = fs.transform.cluster(n_clusters=2, cluster_duration='1D', n_segments=6) + fs_segmented.optimize(solver_fixture) + + # Verify multi-dimensional timestep_duration + assert fs_segmented.timestep_duration is not None + assert 'period' in fs_segmented.timestep_duration.dims + assert 'scenario' in fs_segmented.timestep_duration.dims + + # Save and load + path = tmp_path / 'segmented_multi.nc' + fs_segmented.to_netcdf(path) + fs_loaded = fx.FlowSystem.from_netcdf(path) + + # Verify everything is preserved + assert fs_loaded.is_segmented is True + assert fs_loaded.timestep_duration is not None + assert fs_loaded.timestep_duration.shape == fs_segmented.timestep_duration.shape + assert list(fs_loaded.periods) == list(fs_segmented.periods) + assert list(fs_loaded.scenarios) == list(fs_segmented.scenarios) + + # Expand should work + fs_expanded = fs_loaded.transform.expand_solution() + assert len(fs_expanded.timesteps) == 8 * 24 + assert fs_expanded.solution is not None From 1c02cb99084e21a9dc163332884ddb8ce3667b7d Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 12:56:53 +0100 Subject: [PATCH 06/15] Update __repr__() --- flixopt/clustering/base.py | 20 +++++++++++++------- flixopt/flow_system.py | 17 +++++++++++++---- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/flixopt/clustering/base.py b/flixopt/clustering/base.py index f8fe3b166..c338d2cb7 100644 --- a/flixopt/clustering/base.py +++ b/flixopt/clustering/base.py @@ -123,13 +123,17 @@ def __repr__(self) -> str: else: # Simple case: list of occurrences per cluster occ_info = [int(occ_data.sel(cluster=c).values) for c in range(n_clusters)] - return ( - f'ClusterStructure(\n' - f' {self.n_original_clusters} original periods → {n_clusters} clusters\n' - f' timesteps_per_cluster={self.timesteps_per_cluster}\n' - f' occurrences={occ_info}\n' - f')' - ) + + lines = [ + 'ClusterStructure(', + f' {self.n_original_clusters} original periods → {n_clusters} clusters', + f' timesteps_per_cluster={self.timesteps_per_cluster}', + ] + if self.is_segmented: + lines.append(f' segmented={self.n_segments} segments per cluster') + lines.append(f' occurrences={occ_info}') + lines.append(')') + return '\n'.join(lines) def _create_reference_structure(self) -> tuple[dict, dict[str, xr.DataArray]]: """Create reference structure for serialization.""" @@ -1003,6 +1007,8 @@ def __repr__(self) -> str: int(cs.n_clusters) if isinstance(cs.n_clusters, (int, np.integer)) else int(cs.n_clusters.values) ) structure_info = f'{cs.n_original_clusters} periods → {n_clusters} clusters' + if cs.is_segmented: + structure_info += f' × {cs.n_segments} segments' else: structure_info = 'no structure' return f'Clustering(\n backend={self.backend_name!r}\n {structure_info}\n)' diff --git a/flixopt/flow_system.py b/flixopt/flow_system.py index c35cf05ef..53100ccb1 100644 --- a/flixopt/flow_system.py +++ b/flixopt/flow_system.py @@ -1918,10 +1918,19 @@ def __repr__(self) -> str: """Return a detailed string representation showing all containers.""" r = fx_io.format_title_with_underline('FlowSystem', '=') - # Timestep info - time_period = f'{self.timesteps[0].date()} to {self.timesteps[-1].date()}' - freq_str = str(self.timesteps.freq).replace('<', '').replace('>', '') if self.timesteps.freq else 'irregular' - r += f'Timesteps: {len(self.timesteps)} ({freq_str}) [{time_period}]\n' + # Timestep info - handle both DatetimeIndex and RangeIndex (segmented) + if self.is_segmented: + r += f'Timesteps: {len(self.timesteps)} segments (segmented)\n' + else: + time_period = f'{self.timesteps[0].date()} to {self.timesteps[-1].date()}' + freq_str = ( + str(self.timesteps.freq).replace('<', '').replace('>', '') if self.timesteps.freq else 'irregular' + ) + r += f'Timesteps: {len(self.timesteps)} ({freq_str}) [{time_period}]\n' + + # Add clusters if present + if self.clusters is not None: + r += f'Clusters: {len(self.clusters)}\n' # Add periods if present if self.periods is not None: From cfc413ba9dfbe6e834ac48344f99f9f11ca7c5c4 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 12:57:03 +0100 Subject: [PATCH 07/15] Update notebook --- docs/notebooks/08c-clustering.ipynb | 158 +++++++++++++++++++++++++--- 1 file changed, 141 insertions(+), 17 deletions(-) diff --git a/docs/notebooks/08c-clustering.ipynb b/docs/notebooks/08c-clustering.ipynb index 6d85e60ba..af429a179 100644 --- a/docs/notebooks/08c-clustering.ipynb +++ b/docs/notebooks/08c-clustering.ipynb @@ -77,7 +77,7 @@ " 'Electricity Price': flow_system.components['GridBuy'].outputs[0].effects_per_flow_hour['costs'],\n", " }\n", ")\n", - "input_ds.fxplot.line(facet_row='variable', title='One Month of Input Data')" + "input_ds.fxplot.line(color='variable', title='One Month of Input Data')" ] }, { @@ -264,7 +264,11 @@ "cell_type": "code", "execution_count": null, "id": "18", - "metadata": {}, + "metadata": { + "jupyter": { + "is_executing": true + } + }, "outputs": [], "source": [ "# Visualize cluster structure with heatmap\n", @@ -275,6 +279,104 @@ "cell_type": "markdown", "id": "19", "metadata": {}, + "source": [ + "### Inner-Period Segmentation\n", + "\n", + "Segmentation provides additional problem reduction by dividing each typical period into \n", + "variable-length segments. Instead of solving all 96 timesteps per day, you solve only \n", + "6-12 representative segments:\n", + "\n", + "**Reduction example:**\n", + "- Standard clustering: 31 days → 8 typical days × 96 timesteps = 768 timesteps\n", + "- With segmentation: 31 days → 8 typical days × 6 segments = 48 representative points\n", + "\n", + "Use `n_segments` to enable segmentation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "20", + "metadata": {}, + "outputs": [], + "source": [ + "# Cluster with inner-period segmentation\n", + "fs_segmented = flow_system.transform.cluster(\n", + " n_clusters=8,\n", + " cluster_duration='1D',\n", + " n_segments=6, # Divide each typical day into 6 segments\n", + " time_series_for_high_peaks=peak_series,\n", + ")\n", + "fs_segmented.name = 'Segmented'\n", + "\n", + "fs_segmented.clustering" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21", + "metadata": {}, + "outputs": [], + "source": [ + "# Segments have variable durations (in hours)\n", + "# This captures both slow-changing and rapid-transition periods efficiently\n", + "fs_segmented.timestep_duration" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22", + "metadata": {}, + "outputs": [], + "source": [ + "# Optimize and expand to full resolution\n", + "start = timeit.default_timer()\n", + "fs_segmented.optimize(solver)\n", + "time_segmented = timeit.default_timer() - start\n", + "\n", + "# Expand solution back to original timesteps\n", + "fs_segmented_expanded = fs_segmented.transform.expand_solution()\n", + "\n", + "print(f'Segmentation speedup vs standard clustering: {(time_clustered / time_segmented):.1f}x')\n", + "print(f'Expanded timesteps match original: {len(fs_segmented_expanded.timesteps)} == {len(timesteps)}')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23", + "metadata": {}, + "outputs": [], + "source": [ + "# Compare sizing results\n", + "segmented_comparison = pd.DataFrame(\n", + " {\n", + " 'Standard Clustering': {\n", + " 'Time [s]': time_clustered,\n", + " 'Cost': fs_clustered.solution['costs'].item(),\n", + " 'CHP': fs_clustered.statistics.sizes['CHP(Q_th)'].item(),\n", + " 'Boiler': fs_clustered.statistics.sizes['Boiler(Q_th)'].item(),\n", + " },\n", + " 'Segmented (6 segments)': {\n", + " 'Time [s]': time_segmented,\n", + " 'Cost': fs_segmented.solution['costs'].item(),\n", + " 'CHP': fs_segmented.statistics.sizes['CHP(Q_th)'].item(),\n", + " 'Boiler': fs_segmented.statistics.sizes['Boiler(Q_th)'].item(),\n", + " },\n", + " }\n", + ").T\n", + "segmented_comparison['Speedup'] = time_clustered / segmented_comparison['Time [s]']\n", + "segmented_comparison.style.format(\n", + " {'Time [s]': '{:.2f}', 'Cost': '{:,.0f}', 'CHP': '{:.1f}', 'Boiler': '{:.1f}', 'Speedup': '{:.1f}x'}\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "24", + "metadata": {}, "source": [ "### Manual Cluster Assignment\n", "\n", @@ -286,7 +388,7 @@ { "cell_type": "code", "execution_count": null, - "id": "20", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -314,7 +416,7 @@ }, { "cell_type": "markdown", - "id": "21", + "id": "26", "metadata": {}, "source": [ "## Method 3: Two-Stage Workflow (Recommended)\n", @@ -332,7 +434,7 @@ { "cell_type": "code", "execution_count": null, - "id": "22", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -344,7 +446,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23", + "id": "28", "metadata": {}, "outputs": [], "source": [ @@ -363,7 +465,7 @@ }, { "cell_type": "markdown", - "id": "24", + "id": "29", "metadata": {}, "source": [ "## Compare Results" @@ -372,7 +474,7 @@ { "cell_type": "code", "execution_count": null, - "id": "25", + "id": "30", "metadata": {}, "outputs": [], "source": [ @@ -421,7 +523,7 @@ }, { "cell_type": "markdown", - "id": "26", + "id": "31", "metadata": {}, "source": [ "## Expand Solution to Full Resolution\n", @@ -433,7 +535,7 @@ { "cell_type": "code", "execution_count": null, - "id": "27", + "id": "32", "metadata": {}, "outputs": [], "source": [ @@ -444,7 +546,7 @@ { "cell_type": "code", "execution_count": null, - "id": "28", + "id": "33", "metadata": {}, "outputs": [], "source": [ @@ -466,7 +568,7 @@ }, { "cell_type": "markdown", - "id": "29", + "id": "34", "metadata": {}, "source": [ "## Visualize Clustered Heat Balance" @@ -475,7 +577,7 @@ { "cell_type": "code", "execution_count": null, - "id": "30", + "id": "35", "metadata": {}, "outputs": [], "source": [ @@ -485,7 +587,7 @@ { "cell_type": "code", "execution_count": null, - "id": "31", + "id": "36", "metadata": {}, "outputs": [], "source": [ @@ -494,7 +596,7 @@ }, { "cell_type": "markdown", - "id": "32", + "id": "37", "metadata": {}, "source": [ "## API Reference\n", @@ -513,6 +615,8 @@ "| `extreme_period_method` | `str \\| None` | None | How peaks are integrated: None, 'append', 'new_cluster_center', 'replace_cluster_center' |\n", "| `rescale_cluster_periods` | `bool` | True | Rescale clusters to match original means |\n", "| `predef_cluster_order` | `array` | None | Manual cluster assignments |\n", + "| `n_segments` | `int` | None | Enable inner-period segmentation with N segments per cluster |\n", + "| `segment_representation_method` | `str` | None | Segment representation: 'meanRepresentation', 'distributionRepresentation' |\n", "| `**tsam_kwargs` | - | - | Additional tsam parameters |\n", "\n", "### Clustering Object Properties\n", @@ -571,7 +675,7 @@ }, { "cell_type": "markdown", - "id": "33", + "id": "38", "metadata": {}, "source": [ "## Summary\n", @@ -580,6 +684,7 @@ "\n", "- Use **`cluster()`** to reduce time series into typical periods\n", "- Apply **peak forcing** to capture extreme demand days\n", + "- Use **inner-period segmentation** for additional reduction with `n_segments`\n", "- Use **two-stage optimization** for fast yet accurate investment decisions\n", "- **Expand solutions** back to full resolution with `expand_solution()`\n", "- Access **clustering metadata** via `fs.clustering` (metrics, cluster_order, occurrences)\n", @@ -594,6 +699,7 @@ "4. **Storage handling** is configurable via `cluster_mode`\n", "5. **Check metrics** to evaluate clustering quality\n", "6. **Use `predef_cluster_order`** to reproduce or define custom cluster assignments\n", + "7. **Use `n_segments`** for extreme problem reduction when speed is critical\n", "\n", "### Next Steps\n", "\n", @@ -602,7 +708,25 @@ ] } ], - "metadata": {}, + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, "nbformat": 4, "nbformat_minor": 5 } From 7ecc471a119fe5c35cf6520fe7838b2dff7a617d Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 13:02:17 +0100 Subject: [PATCH 08/15] Fix segmentation --- flixopt/transform_accessor.py | 7 ++++--- tests/test_clustering_io.py | 3 ++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/flixopt/transform_accessor.py b/flixopt/transform_accessor.py index 2bbd79537..7c7911bab 100644 --- a/flixopt/transform_accessor.py +++ b/flixopt/transform_accessor.py @@ -958,9 +958,10 @@ def _build_cluster_weight_for_key(key: tuple) -> xr.DataArray: typical_das: dict[str, dict[tuple, xr.DataArray]] = {} for key, tsam_agg in tsam_results.items(): if segmentation: - # For segmented data, extract from segmentedNormalizedTypicalPeriods - # This has a MultiIndex: (period, segment_step, segment_duration, original_start_step) - segmented_df = tsam_agg.segmentedNormalizedTypicalPeriods + # For segmented data, use typicalPeriods (NOT segmentedNormalizedTypicalPeriods!) + # typicalPeriods contains un-normalized values with MultiIndex: (period, segment_step, ...) + # segmentedNormalizedTypicalPeriods contains MinMax-normalized values (wrong scale) + segmented_df = tsam_agg.typicalPeriods for col in segmented_df.columns: # Group by period (cluster) and extract segment values data = np.zeros((actual_n_clusters, n_segments_actual)) diff --git a/tests/test_clustering_io.py b/tests/test_clustering_io.py index b83aead25..47c320040 100644 --- a/tests/test_clustering_io.py +++ b/tests/test_clustering_io.py @@ -605,7 +605,8 @@ def test_segmentation_with_periods_scenarios_roundtrip(self, solver_fixture, tmp timesteps = pd.date_range('2023-01-01', periods=8 * 24, freq='h') periods = pd.Index([2020, 2021], name='period') scenarios = pd.Index(['low', 'high'], name='scenario') - demand = np.sin(np.linspace(0, 4 * np.pi, 8 * 24)) * 10 + 15 + # Scale demand profile to 0.5-1.5 range so flow (profile * size) stays within source capacity + demand = np.sin(np.linspace(0, 4 * np.pi, 8 * 24)) * 0.5 + 1.0 fs = fx.FlowSystem(timesteps, periods=periods, scenarios=scenarios) fs.add_elements( From 9e157622fbc909a5899809777becb6bf23ee765d Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 13:20:11 +0100 Subject: [PATCH 09/15] Update notebook --- docs/notebooks/08c-clustering.ipynb | 137 +----- .../08c3-clustering-comparison.ipynb | 400 ++++++++++++++++++ 2 files changed, 416 insertions(+), 121 deletions(-) create mode 100644 docs/notebooks/08c3-clustering-comparison.ipynb diff --git a/docs/notebooks/08c-clustering.ipynb b/docs/notebooks/08c-clustering.ipynb index af429a179..3c858a229 100644 --- a/docs/notebooks/08c-clustering.ipynb +++ b/docs/notebooks/08c-clustering.ipynb @@ -264,11 +264,7 @@ "cell_type": "code", "execution_count": null, "id": "18", - "metadata": { - "jupyter": { - "is_executing": true - } - }, + "metadata": {}, "outputs": [], "source": [ "# Visualize cluster structure with heatmap\n", @@ -279,104 +275,6 @@ "cell_type": "markdown", "id": "19", "metadata": {}, - "source": [ - "### Inner-Period Segmentation\n", - "\n", - "Segmentation provides additional problem reduction by dividing each typical period into \n", - "variable-length segments. Instead of solving all 96 timesteps per day, you solve only \n", - "6-12 representative segments:\n", - "\n", - "**Reduction example:**\n", - "- Standard clustering: 31 days → 8 typical days × 96 timesteps = 768 timesteps\n", - "- With segmentation: 31 days → 8 typical days × 6 segments = 48 representative points\n", - "\n", - "Use `n_segments` to enable segmentation:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20", - "metadata": {}, - "outputs": [], - "source": [ - "# Cluster with inner-period segmentation\n", - "fs_segmented = flow_system.transform.cluster(\n", - " n_clusters=8,\n", - " cluster_duration='1D',\n", - " n_segments=6, # Divide each typical day into 6 segments\n", - " time_series_for_high_peaks=peak_series,\n", - ")\n", - "fs_segmented.name = 'Segmented'\n", - "\n", - "fs_segmented.clustering" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "21", - "metadata": {}, - "outputs": [], - "source": [ - "# Segments have variable durations (in hours)\n", - "# This captures both slow-changing and rapid-transition periods efficiently\n", - "fs_segmented.timestep_duration" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "22", - "metadata": {}, - "outputs": [], - "source": [ - "# Optimize and expand to full resolution\n", - "start = timeit.default_timer()\n", - "fs_segmented.optimize(solver)\n", - "time_segmented = timeit.default_timer() - start\n", - "\n", - "# Expand solution back to original timesteps\n", - "fs_segmented_expanded = fs_segmented.transform.expand_solution()\n", - "\n", - "print(f'Segmentation speedup vs standard clustering: {(time_clustered / time_segmented):.1f}x')\n", - "print(f'Expanded timesteps match original: {len(fs_segmented_expanded.timesteps)} == {len(timesteps)}')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "23", - "metadata": {}, - "outputs": [], - "source": [ - "# Compare sizing results\n", - "segmented_comparison = pd.DataFrame(\n", - " {\n", - " 'Standard Clustering': {\n", - " 'Time [s]': time_clustered,\n", - " 'Cost': fs_clustered.solution['costs'].item(),\n", - " 'CHP': fs_clustered.statistics.sizes['CHP(Q_th)'].item(),\n", - " 'Boiler': fs_clustered.statistics.sizes['Boiler(Q_th)'].item(),\n", - " },\n", - " 'Segmented (6 segments)': {\n", - " 'Time [s]': time_segmented,\n", - " 'Cost': fs_segmented.solution['costs'].item(),\n", - " 'CHP': fs_segmented.statistics.sizes['CHP(Q_th)'].item(),\n", - " 'Boiler': fs_segmented.statistics.sizes['Boiler(Q_th)'].item(),\n", - " },\n", - " }\n", - ").T\n", - "segmented_comparison['Speedup'] = time_clustered / segmented_comparison['Time [s]']\n", - "segmented_comparison.style.format(\n", - " {'Time [s]': '{:.2f}', 'Cost': '{:,.0f}', 'CHP': '{:.1f}', 'Boiler': '{:.1f}', 'Speedup': '{:.1f}x'}\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "24", - "metadata": {}, "source": [ "### Manual Cluster Assignment\n", "\n", @@ -388,7 +286,7 @@ { "cell_type": "code", "execution_count": null, - "id": "25", + "id": "20", "metadata": {}, "outputs": [], "source": [ @@ -416,7 +314,7 @@ }, { "cell_type": "markdown", - "id": "26", + "id": "21", "metadata": {}, "source": [ "## Method 3: Two-Stage Workflow (Recommended)\n", @@ -434,7 +332,7 @@ { "cell_type": "code", "execution_count": null, - "id": "27", + "id": "22", "metadata": {}, "outputs": [], "source": [ @@ -446,7 +344,7 @@ { "cell_type": "code", "execution_count": null, - "id": "28", + "id": "23", "metadata": {}, "outputs": [], "source": [ @@ -465,7 +363,7 @@ }, { "cell_type": "markdown", - "id": "29", + "id": "24", "metadata": {}, "source": [ "## Compare Results" @@ -474,7 +372,7 @@ { "cell_type": "code", "execution_count": null, - "id": "30", + "id": "25", "metadata": {}, "outputs": [], "source": [ @@ -523,7 +421,7 @@ }, { "cell_type": "markdown", - "id": "31", + "id": "26", "metadata": {}, "source": [ "## Expand Solution to Full Resolution\n", @@ -535,7 +433,7 @@ { "cell_type": "code", "execution_count": null, - "id": "32", + "id": "27", "metadata": {}, "outputs": [], "source": [ @@ -546,7 +444,7 @@ { "cell_type": "code", "execution_count": null, - "id": "33", + "id": "28", "metadata": {}, "outputs": [], "source": [ @@ -568,7 +466,7 @@ }, { "cell_type": "markdown", - "id": "34", + "id": "29", "metadata": {}, "source": [ "## Visualize Clustered Heat Balance" @@ -577,7 +475,7 @@ { "cell_type": "code", "execution_count": null, - "id": "35", + "id": "30", "metadata": {}, "outputs": [], "source": [ @@ -587,7 +485,7 @@ { "cell_type": "code", "execution_count": null, - "id": "36", + "id": "31", "metadata": {}, "outputs": [], "source": [ @@ -596,7 +494,7 @@ }, { "cell_type": "markdown", - "id": "37", + "id": "32", "metadata": {}, "source": [ "## API Reference\n", @@ -615,8 +513,6 @@ "| `extreme_period_method` | `str \\| None` | None | How peaks are integrated: None, 'append', 'new_cluster_center', 'replace_cluster_center' |\n", "| `rescale_cluster_periods` | `bool` | True | Rescale clusters to match original means |\n", "| `predef_cluster_order` | `array` | None | Manual cluster assignments |\n", - "| `n_segments` | `int` | None | Enable inner-period segmentation with N segments per cluster |\n", - "| `segment_representation_method` | `str` | None | Segment representation: 'meanRepresentation', 'distributionRepresentation' |\n", "| `**tsam_kwargs` | - | - | Additional tsam parameters |\n", "\n", "### Clustering Object Properties\n", @@ -675,7 +571,7 @@ }, { "cell_type": "markdown", - "id": "38", + "id": "33", "metadata": {}, "source": [ "## Summary\n", @@ -684,7 +580,6 @@ "\n", "- Use **`cluster()`** to reduce time series into typical periods\n", "- Apply **peak forcing** to capture extreme demand days\n", - "- Use **inner-period segmentation** for additional reduction with `n_segments`\n", "- Use **two-stage optimization** for fast yet accurate investment decisions\n", "- **Expand solutions** back to full resolution with `expand_solution()`\n", "- Access **clustering metadata** via `fs.clustering` (metrics, cluster_order, occurrences)\n", @@ -699,11 +594,11 @@ "4. **Storage handling** is configurable via `cluster_mode`\n", "5. **Check metrics** to evaluate clustering quality\n", "6. **Use `predef_cluster_order`** to reproduce or define custom cluster assignments\n", - "7. **Use `n_segments`** for extreme problem reduction when speed is critical\n", "\n", "### Next Steps\n", "\n", "- **[08c2-clustering-storage-modes](08c2-clustering-storage-modes.ipynb)**: Compare storage modes using a seasonal storage system\n", + "- **[08c3-clustering-comparison](08c3-clustering-comparison.ipynb)**: Compare different clustering configurations\n", "- **[08d-clustering-multiperiod](08d-clustering-multiperiod.ipynb)**: Clustering with multiple periods and scenarios" ] } diff --git a/docs/notebooks/08c3-clustering-comparison.ipynb b/docs/notebooks/08c3-clustering-comparison.ipynb new file mode 100644 index 000000000..b2fe6512d --- /dev/null +++ b/docs/notebooks/08c3-clustering-comparison.ipynb @@ -0,0 +1,400 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Comparing Clustering Configurations\n", + "\n", + "This notebook compares different clustering configurations to find the optimal trade-off\n", + "between accuracy and computational speed.\n", + "\n", + "We compare:\n", + "\n", + "- **Number of clusters**: How many typical periods are needed?\n", + "- **Inner-period segmentation**: Can we reduce timesteps within each cluster?\n", + "\n", + "!!! note \"Requirements\"\n", + " This notebook requires the `tsam` package: `pip install tsam`" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import timeit\n", + "\n", + "import pandas as pd\n", + "import xarray as xr\n", + "\n", + "import flixopt as fx\n", + "\n", + "fx.CONFIG.notebook()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup\n", + "\n", + "District heating system with one month of hourly data:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from data.generate_example_systems import create_district_heating_system\n", + "\n", + "flow_system = create_district_heating_system()\n", + "flow_system.connect_and_transform()\n", + "\n", + "solver = fx.solvers.HighsSolver(mip_gap=0.01)\n", + "peak_series = ['HeatDemand(Q_th)|fixed_relative_profile']\n", + "\n", + "flow_system" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Run Optimizations\n", + "\n", + "Compare full resolution, different cluster counts, and segmentation:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results = {}\n", + "\n", + "# Full resolution baseline\n", + "start = timeit.default_timer()\n", + "fs_full = flow_system.copy()\n", + "fs_full.name = 'Full'\n", + "fs_full.optimize(solver)\n", + "results['Full'] = {'fs': fs_full, 'time': timeit.default_timer() - start, 'timesteps': len(flow_system.timesteps)}\n", + "\n", + "# Different cluster counts\n", + "for n_clusters in [4, 8, 12]:\n", + " start = timeit.default_timer()\n", + " fs = flow_system.transform.cluster(\n", + " n_clusters=n_clusters,\n", + " cluster_duration='1D',\n", + " time_series_for_high_peaks=peak_series,\n", + " )\n", + " fs.name = f'{n_clusters} clusters'\n", + " fs.optimize(solver)\n", + " results[f'{n_clusters} clusters'] = {'fs': fs, 'time': timeit.default_timer() - start, 'timesteps': n_clusters * 24}\n", + "\n", + "# Segmentation (8 clusters with 6 segments each)\n", + "start = timeit.default_timer()\n", + "fs_seg = flow_system.transform.cluster(\n", + " n_clusters=8,\n", + " cluster_duration='1D',\n", + " n_segments=6,\n", + " time_series_for_high_peaks=peak_series,\n", + ")\n", + "fs_seg.name = '8x6 segmented'\n", + "fs_seg.optimize(solver)\n", + "results['8x6 segmented'] = {'fs': fs_seg, 'time': timeit.default_timer() - start, 'timesteps': 8 * 6}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Summary Table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "baseline_cost = results['Full']['fs'].solution['costs'].item()\n", + "baseline_time = results['Full']['time']\n", + "\n", + "summary = pd.DataFrame(\n", + " {\n", + " name: {\n", + " 'Timesteps': r['timesteps'],\n", + " 'Time [s]': r['time'],\n", + " 'Cost [EUR]': r['fs'].solution['costs'].item(),\n", + " 'Cost Gap [%]': (r['fs'].solution['costs'].item() - baseline_cost) / abs(baseline_cost) * 100,\n", + " 'CHP [kW]': r['fs'].statistics.sizes['CHP(Q_th)'].item(),\n", + " 'Storage [kWh]': r['fs'].statistics.sizes['Storage'].item(),\n", + " 'Speedup': baseline_time / r['time'],\n", + " }\n", + " for name, r in results.items()\n", + " }\n", + ").T\n", + "\n", + "summary.style.format(\n", + " {\n", + " 'Timesteps': '{:.0f}',\n", + " 'Time [s]': '{:.2f}',\n", + " 'Cost [EUR]': '{:,.0f}',\n", + " 'Cost Gap [%]': '{:+.1f}',\n", + " 'CHP [kW]': '{:.1f}',\n", + " 'Storage [kWh]': '{:.0f}',\n", + " 'Speedup': '{:.1f}x',\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Expand Solutions to Full Resolution\n", + "\n", + "Before comparing time series, expand all clustered solutions back to the original timesteps:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Expand all clustered/segmented solutions\n", + "expanded = {\n", + " 'Full': results['Full']['fs'],\n", + " '4 clusters': results['4 clusters']['fs'].transform.expand_solution(),\n", + " '8 clusters': results['8 clusters']['fs'].transform.expand_solution(),\n", + " '12 clusters': results['12 clusters']['fs'].transform.expand_solution(),\n", + " '8x6 segmented': results['8x6 segmented']['fs'].transform.expand_solution(),\n", + "}\n", + "\n", + "# Rename for clarity\n", + "for name, fs in expanded.items():\n", + " fs.name = name" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compare Component Sizes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comparison = fx.Comparison(list(expanded.values()))\n", + "comparison.statistics.sizes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comparison.statistics.plot.sizes()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compare Heat Production\n", + "\n", + "Visualize CHP and Boiler flow rates across all configurations:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Build combined dataset of heat flows\n", + "heat_flows = xr.Dataset(\n", + " {\n", + " 'CHP': xr.concat(\n", + " [fs.solution['CHP(Q_th)|flow_rate'] for fs in expanded.values()], dim=pd.Index(expanded.keys(), name='case')\n", + " ),\n", + " 'Boiler': xr.concat(\n", + " [fs.solution['Boiler(Q_th)|flow_rate'] for fs in expanded.values()],\n", + " dim=pd.Index(expanded.keys(), name='case'),\n", + " ),\n", + " }\n", + ")\n", + "\n", + "# Line plot with case as color, facet by component\n", + "heat_flows.fxplot.line(color='case', facet_row='variable', title='Heat Production by Configuration')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Compare Storage Operation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Build storage charge state dataset\n", + "storage_soc = xr.concat(\n", + " [fs.solution['Storage|charge_state'] for fs in expanded.values()], dim=pd.Index(expanded.keys(), name='case')\n", + ")\n", + "storage_soc.name = 'Storage Charge State'\n", + "\n", + "storage_soc.fxplot.line(color='case', title='Storage State of Charge')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clustering Quality Metrics\n", + "\n", + "RMSE and MAE show how well clustering preserves time series patterns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Collect metrics from clustered systems\n", + "metrics_list = []\n", + "for name in ['4 clusters', '8 clusters', '12 clusters']:\n", + " fs = results[name]['fs']\n", + " df = fs.clustering.metrics.to_dataframe()\n", + " df['Config'] = name\n", + " metrics_list.append(df)\n", + "\n", + "metrics_df = pd.concat(metrics_list)\n", + "metrics_df.index.name = 'Time Series'\n", + "metrics_df = metrics_df.reset_index()\n", + "\n", + "# Pivot for display\n", + "metrics_df.pivot(index='Time Series', columns='Config', values='RMSE').style.format('{:.4f}').background_gradient(\n", + " cmap='RdYlGn_r', axis=1\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Visualize Clustering Structure" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results['8 clusters']['fs'].clustering.plot.compare(kind='duration_curve')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "results['8 clusters']['fs'].clustering.plot.heatmap()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Segmentation: Variable Segment Durations\n", + "\n", + "Segmentation creates variable-length segments that adapt to time series patterns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "fs_seg = results['8x6 segmented']['fs']\n", + "\n", + "# Show segment durations (hours per segment per cluster)\n", + "fs_seg.timestep_duration.to_pandas().style.format('{:.0f}').background_gradient(cmap='Blues', axis=None)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Visualize segment durations\n", + "fs_seg.timestep_duration.fxplot.bar(facet_col='cluster', facet_col_wrap=4, title='Segment Durations per Cluster')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Recommendations\n", + "\n", + "Based on this comparison:\n", + "\n", + "1. **8 clusters** provides good accuracy (~7% cost gap) with 5x speedup\n", + "2. **Segmentation** provides additional reduction with acceptable accuracy loss\n", + "3. **4 clusters** may miss demand patterns, leading to undersized or oversized components\n", + "\n", + "### When to use segmentation:\n", + "\n", + "- Large problems where even clustered optimization is slow\n", + "- Preliminary design studies where speed matters more than precision\n", + "- Sensitivity analyses requiring many optimization runs\n", + "\n", + "### Best practice:\n", + "\n", + "- Always use `time_series_for_high_peaks` to capture extreme demand days\n", + "- Use `expand_solution()` to validate results at full resolution" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 051bcb68ebb8a066525561f4cdfc1c538f686133 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 13:48:32 +0100 Subject: [PATCH 10/15] Update notebooks --- docs/notebooks/02-heat-system.ipynb | 8 +++++++- ...clustering.ipynb => 08c1-clustering.ipynb} | 0 .../08c3-clustering-comparison.ipynb | 20 +++++++++---------- .../data/generate_example_systems.py | 16 +++++++++++---- docs/user-guide/optimization/clustering.md | 2 +- mkdocs.yml | 2 +- 6 files changed, 31 insertions(+), 17 deletions(-) rename docs/notebooks/{08c-clustering.ipynb => 08c1-clustering.ipynb} (100%) diff --git a/docs/notebooks/02-heat-system.ipynb b/docs/notebooks/02-heat-system.ipynb index d3514de15..3115fa3b3 100644 --- a/docs/notebooks/02-heat-system.ipynb +++ b/docs/notebooks/02-heat-system.ipynb @@ -375,7 +375,13 @@ ] } ], - "metadata": {}, + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + } + }, "nbformat": 4, "nbformat_minor": 5 } diff --git a/docs/notebooks/08c-clustering.ipynb b/docs/notebooks/08c1-clustering.ipynb similarity index 100% rename from docs/notebooks/08c-clustering.ipynb rename to docs/notebooks/08c1-clustering.ipynb diff --git a/docs/notebooks/08c3-clustering-comparison.ipynb b/docs/notebooks/08c3-clustering-comparison.ipynb index b2fe6512d..f8107f7f2 100644 --- a/docs/notebooks/08c3-clustering-comparison.ipynb +++ b/docs/notebooks/08c3-clustering-comparison.ipynb @@ -31,7 +31,7 @@ "\n", "import flixopt as fx\n", "\n", - "fx.CONFIG.notebook()" + "fx.CONFIG.exploring()" ] }, { @@ -40,7 +40,7 @@ "source": [ "## Setup\n", "\n", - "District heating system with one month of hourly data:" + "District heating system with a full year of hourly data (8760 timesteps):" ] }, { @@ -51,7 +51,7 @@ "source": [ "from data.generate_example_systems import create_district_heating_system\n", "\n", - "flow_system = create_district_heating_system()\n", + "flow_system = create_district_heating_system(duration='quarter')\n", "flow_system.connect_and_transform()\n", "\n", "solver = fx.solvers.HighsSolver(mip_gap=0.01)\n", @@ -99,14 +99,14 @@ "# Segmentation (8 clusters with 6 segments each)\n", "start = timeit.default_timer()\n", "fs_seg = flow_system.transform.cluster(\n", - " n_clusters=8,\n", + " n_clusters=16,\n", " cluster_duration='1D',\n", " n_segments=6,\n", " time_series_for_high_peaks=peak_series,\n", ")\n", - "fs_seg.name = '8x6 segmented'\n", + "fs_seg.name = '16x6 segmented'\n", "fs_seg.optimize(solver)\n", - "results['8x6 segmented'] = {'fs': fs_seg, 'time': timeit.default_timer() - start, 'timesteps': 8 * 6}" + "results['16x6 segmented'] = {'fs': fs_seg, 'time': timeit.default_timer() - start, 'timesteps': 8 * 6}" ] }, { @@ -131,7 +131,7 @@ " 'Timesteps': r['timesteps'],\n", " 'Time [s]': r['time'],\n", " 'Cost [EUR]': r['fs'].solution['costs'].item(),\n", - " 'Cost Gap [%]': (r['fs'].solution['costs'].item() - baseline_cost) / abs(baseline_cost) * 100,\n", + " 'Cost Gap [%]': (r['fs'].solution['costs'].item() - baseline_cost) / max(abs(baseline_cost), 1) * 100,\n", " 'CHP [kW]': r['fs'].statistics.sizes['CHP(Q_th)'].item(),\n", " 'Storage [kWh]': r['fs'].statistics.sizes['Storage'].item(),\n", " 'Speedup': baseline_time / r['time'],\n", @@ -144,7 +144,7 @@ " {\n", " 'Timesteps': '{:.0f}',\n", " 'Time [s]': '{:.2f}',\n", - " 'Cost [EUR]': '{:,.0f}',\n", + " 'Cost [EUR]': '{:.0f}',\n", " 'Cost Gap [%]': '{:+.1f}',\n", " 'CHP [kW]': '{:.1f}',\n", " 'Storage [kWh]': '{:.0f}',\n", @@ -174,7 +174,7 @@ " '4 clusters': results['4 clusters']['fs'].transform.expand_solution(),\n", " '8 clusters': results['8 clusters']['fs'].transform.expand_solution(),\n", " '12 clusters': results['12 clusters']['fs'].transform.expand_solution(),\n", - " '8x6 segmented': results['8x6 segmented']['fs'].transform.expand_solution(),\n", + " '16x6 segmented': results['16x6 segmented']['fs'].transform.expand_solution(),\n", "}\n", "\n", "# Rename for clarity\n", @@ -335,7 +335,7 @@ "metadata": {}, "outputs": [], "source": [ - "fs_seg = results['8x6 segmented']['fs']\n", + "fs_seg = results['16x6 segmented']['fs']\n", "\n", "# Show segment durations (hours per segment per cluster)\n", "fs_seg.timestep_duration.to_pandas().style.format('{:.0f}').background_gradient(cmap='Blues', axis=None)" diff --git a/docs/notebooks/data/generate_example_systems.py b/docs/notebooks/data/generate_example_systems.py index 985628e1f..a4acc0a79 100644 --- a/docs/notebooks/data/generate_example_systems.py +++ b/docs/notebooks/data/generate_example_systems.py @@ -290,11 +290,10 @@ def create_complex_system() -> fx.FlowSystem: return fs -def create_district_heating_system() -> fx.FlowSystem: +def create_district_heating_system(duration: str = 'month') -> fx.FlowSystem: """Create a district heating system with BDEW profiles. Uses realistic German data: - - One month (January 2020), hourly resolution - BDEW industrial heat profile - BDEW commercial electricity profile - OPSD electricity prices @@ -302,10 +301,19 @@ def create_district_heating_system() -> fx.FlowSystem: - CHP, boiler, storage, and grid connections - Investment optimization for sizing + Args: + duration: Time period - 'month' (744h), 'quarter' (2160h), or 'year' (8760h) + Used by: 08a-aggregation, 08c-clustering, 08e-clustering-internals notebooks """ - # One month, hourly - timesteps = pd.date_range('2020-01-01', '2020-01-31 23:00:00', freq='h') + end_dates = { + 'month': '2020-01-31 23:00:00', + 'quarter': '2020-03-31 23:00:00', + 'year': '2020-12-31 23:00:00', + } + if duration not in end_dates: + raise ValueError(f"duration must be one of {list(end_dates.keys())}, got '{duration}'") + timesteps = pd.date_range('2020-01-01', end_dates[duration], freq='h') temp = _get_weather()['temperature_C'].reindex(timesteps, method='ffill').values # BDEW profiles (MW scale for district heating) diff --git a/docs/user-guide/optimization/clustering.md b/docs/user-guide/optimization/clustering.md index 3ce0b53e9..e2e07439b 100644 --- a/docs/user-guide/optimization/clustering.md +++ b/docs/user-guide/optimization/clustering.md @@ -260,5 +260,5 @@ fs_accurate = flow_system.transform.cluster(n_clusters=24, cluster_duration='1D' ## See Also - [Storage Component](../mathematical-notation/elements/Storage.md) - Storage mathematical formulation -- [Notebooks: Clustering](../../notebooks/08c-clustering.ipynb) - Interactive examples +- [Notebooks: Clustering](../../notebooks/08c1-clustering.ipynb) - Interactive examples - [Notebooks: Storage Modes](../../notebooks/08c2-clustering-storage-modes.ipynb) - Storage mode comparison diff --git a/mkdocs.yml b/mkdocs.yml index 9eed96ad6..65cddd742 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -72,7 +72,7 @@ nav: - Aggregation: notebooks/08a-aggregation.ipynb - Rolling Horizon: notebooks/08b-rolling-horizon.ipynb - Clustering: - - Introduction: notebooks/08c-clustering.ipynb + - Introduction: notebooks/08c1-clustering.ipynb - Storage Modes: notebooks/08c2-clustering-storage-modes.ipynb - Multi-Period: notebooks/08d-clustering-multiperiod.ipynb - Internals: notebooks/08e-clustering-internals.ipynb From 3bc3e73eb197ca11703ff371b15afa46e7a97737 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 13:53:57 +0100 Subject: [PATCH 11/15] Update notebooks --- docs/notebooks/08c3-clustering-comparison.ipynb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/notebooks/08c3-clustering-comparison.ipynb b/docs/notebooks/08c3-clustering-comparison.ipynb index f8107f7f2..9cde12d2a 100644 --- a/docs/notebooks/08c3-clustering-comparison.ipynb +++ b/docs/notebooks/08c3-clustering-comparison.ipynb @@ -171,10 +171,10 @@ "# Expand all clustered/segmented solutions\n", "expanded = {\n", " 'Full': results['Full']['fs'],\n", - " '4 clusters': results['4 clusters']['fs'].transform.expand_solution(),\n", - " '8 clusters': results['8 clusters']['fs'].transform.expand_solution(),\n", - " '12 clusters': results['12 clusters']['fs'].transform.expand_solution(),\n", - " '16x6 segmented': results['16x6 segmented']['fs'].transform.expand_solution(),\n", + " '4 clusters': results['4 clusters']['fs'].transform.expand(),\n", + " '8 clusters': results['8 clusters']['fs'].transform.expand(),\n", + " '12 clusters': results['12 clusters']['fs'].transform.expand(),\n", + " '16x6 segmented': results['16x6 segmented']['fs'].transform.expand(),\n", "}\n", "\n", "# Rename for clarity\n", From 8bb3da2376a342e483853c95d96fad29dd60b46d Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 14:05:31 +0100 Subject: [PATCH 12/15] Update notebooks --- .../08c3-clustering-comparison.ipynb | 55 ++++++++++--------- 1 file changed, 29 insertions(+), 26 deletions(-) diff --git a/docs/notebooks/08c3-clustering-comparison.ipynb b/docs/notebooks/08c3-clustering-comparison.ipynb index 9cde12d2a..0f000ac48 100644 --- a/docs/notebooks/08c3-clustering-comparison.ipynb +++ b/docs/notebooks/08c3-clustering-comparison.ipynb @@ -27,11 +27,10 @@ "import timeit\n", "\n", "import pandas as pd\n", - "import xarray as xr\n", "\n", "import flixopt as fx\n", "\n", - "fx.CONFIG.exploring()" + "fx.CONFIG.notebook()" ] }, { @@ -205,7 +204,7 @@ "metadata": {}, "outputs": [], "source": [ - "comparison.statistics.plot.sizes()" + "comparison.statistics.plot.sizes(color='case')" ] }, { @@ -223,21 +222,18 @@ "metadata": {}, "outputs": [], "source": [ - "# Build combined dataset of heat flows\n", - "heat_flows = xr.Dataset(\n", - " {\n", - " 'CHP': xr.concat(\n", - " [fs.solution['CHP(Q_th)|flow_rate'] for fs in expanded.values()], dim=pd.Index(expanded.keys(), name='case')\n", - " ),\n", - " 'Boiler': xr.concat(\n", - " [fs.solution['Boiler(Q_th)|flow_rate'] for fs in expanded.values()],\n", - " dim=pd.Index(expanded.keys(), name='case'),\n", - " ),\n", - " }\n", - ")\n", - "\n", - "# Line plot with case as color, facet by component\n", - "heat_flows.fxplot.line(color='case', facet_row='variable', title='Heat Production by Configuration')" + "comparison.solution['CHP(Q_th)|flow_rate'].fxplot.heatmap(title='Heat Production by Configuration')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comparison.inputs['HeatDemand(Q_th)|fixed_relative_profile'].fxplot.line(\n", + " title='Heat Demand by Configuration', colors='viridis'\n", + ")" ] }, { @@ -253,13 +249,16 @@ "metadata": {}, "outputs": [], "source": [ - "# Build storage charge state dataset\n", - "storage_soc = xr.concat(\n", - " [fs.solution['Storage|charge_state'] for fs in expanded.values()], dim=pd.Index(expanded.keys(), name='case')\n", - ")\n", - "storage_soc.name = 'Storage Charge State'\n", - "\n", - "storage_soc.fxplot.line(color='case', title='Storage State of Charge')" + "comparison.solution['Storage|charge_state'].fxplot.line(color='case', title='Storage State of Charge')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "comparison.statistics.plot.storage('Storage').data.sum('time').to_pandas()" ] }, { @@ -274,7 +273,11 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "jupyter": { + "is_executing": true + } + }, "outputs": [], "source": [ "# Collect metrics from clustered systems\n", From a78d5376a26b70aafebdfcea1bf56a1aefb1c295 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 16:18:18 +0100 Subject: [PATCH 13/15] Added notebook to docs --- mkdocs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/mkdocs.yml b/mkdocs.yml index 65cddd742..6ab1d1685 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -74,6 +74,7 @@ nav: - Clustering: - Introduction: notebooks/08c1-clustering.ipynb - Storage Modes: notebooks/08c2-clustering-storage-modes.ipynb + - Comparison: notebooks/08c3-clustering-comparison.ipynb - Multi-Period: notebooks/08d-clustering-multiperiod.ipynb - Internals: notebooks/08e-clustering-internals.ipynb - Results: From abe5dee61de33d126135e50b73359f6f8b0702ad Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 16:22:48 +0100 Subject: [PATCH 14/15] =?UTF-8?q?=E2=8F=BA=20Done.=20Fixed=20all=20issues?= =?UTF-8?q?=20from=20the=20code=20review:?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1. expand_solution() → expand() in: - CHANGELOG.md (2 occurrences) - tests/test_cluster_reduce_expand.py (3 occurrences) - tests/test_clustering_io.py (2 occurrences) - docs/notebooks/08c3-clustering-comparison.ipynb (1 occurrence in best practices) 2. Notebook description fix: Changed "full year of hourly data (8760 timesteps)" to "quarter of hourly data (2190 timesteps)" to match duration='quarter' 3. Segmentation naming fix: - Comment: "8 clusters" → "16 clusters" (to match n_clusters=16) - Timesteps calculation: 8 * 6 → 16 * 6 --- CHANGELOG.md | 6 +++--- docs/notebooks/08c3-clustering-comparison.ipynb | 8 ++++---- tests/test_cluster_reduce_expand.py | 14 +++++++------- tests/test_clustering_io.py | 6 +++--- 4 files changed, 17 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 97dd247fe..dac4192ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -71,7 +71,7 @@ fs_segmented = flow_system.transform.cluster( n_segments=6, # Enables segmentation with 6 segments per cluster ) fs_segmented.optimize(solver) -fs_expanded = fs_segmented.transform.expand_solution() +fs_expanded = fs_segmented.transform.expand() ``` **New Parameters**: @@ -85,7 +85,7 @@ fs_expanded = fs_segmented.transform.expand_solution() - **Variable segment durations**: Each segment can have different duration (in hours), automatically determined by tsam based on time series characteristics - **Full storage integration**: Works with all storage `cluster_mode` options including `'intercluster_cyclic'` -- **Solution expansion**: `expand_solution()` correctly maps segmented results back to original timesteps +- **Solution expansion**: `expand()` correctly maps segmented results back to original timesteps - **RangeIndex timesteps**: Segmented FlowSystems use `RangeIndex` instead of `DatetimeIndex` for the time dimension - **`is_segmented` property**: Check if a FlowSystem uses segmentation via `flow_system.is_segmented` @@ -109,7 +109,7 @@ fs_segmented = flow_system.transform.cluster( fs_segmented.optimize(solver) # Expand back to full resolution -fs_expanded = fs_segmented.transform.expand_solution() +fs_expanded = fs_segmented.transform.expand() ``` !!! tip "When to Use Segmentation" diff --git a/docs/notebooks/08c3-clustering-comparison.ipynb b/docs/notebooks/08c3-clustering-comparison.ipynb index 0f000ac48..8baa9a34e 100644 --- a/docs/notebooks/08c3-clustering-comparison.ipynb +++ b/docs/notebooks/08c3-clustering-comparison.ipynb @@ -39,7 +39,7 @@ "source": [ "## Setup\n", "\n", - "District heating system with a full year of hourly data (8760 timesteps):" + "District heating system with a quarter of hourly data (2190 timesteps):" ] }, { @@ -95,7 +95,7 @@ " fs.optimize(solver)\n", " results[f'{n_clusters} clusters'] = {'fs': fs, 'time': timeit.default_timer() - start, 'timesteps': n_clusters * 24}\n", "\n", - "# Segmentation (8 clusters with 6 segments each)\n", + "# Segmentation (16 clusters with 6 segments each)\n", "start = timeit.default_timer()\n", "fs_seg = flow_system.transform.cluster(\n", " n_clusters=16,\n", @@ -105,7 +105,7 @@ ")\n", "fs_seg.name = '16x6 segmented'\n", "fs_seg.optimize(solver)\n", - "results['16x6 segmented'] = {'fs': fs_seg, 'time': timeit.default_timer() - start, 'timesteps': 8 * 6}" + "results['16x6 segmented'] = {'fs': fs_seg, 'time': timeit.default_timer() - start, 'timesteps': 16 * 6}" ] }, { @@ -375,7 +375,7 @@ "### Best practice:\n", "\n", "- Always use `time_series_for_high_peaks` to capture extreme demand days\n", - "- Use `expand_solution()` to validate results at full resolution" + "- Use `expand()` to validate results at full resolution" ] } ], diff --git a/tests/test_cluster_reduce_expand.py b/tests/test_cluster_reduce_expand.py index 0b3a0c505..bfba5b63c 100644 --- a/tests/test_cluster_reduce_expand.py +++ b/tests/test_cluster_reduce_expand.py @@ -942,8 +942,8 @@ def test_segmented_system_optimize(self, solver_fixture, timesteps_8_days): # time dimension = n_segments + 1 (extra timestep) assert flow.sizes['time'] == 7 # 6 segments + 1 extra - def test_segmented_expand_solution_restores_full_timesteps(self, solver_fixture, timesteps_8_days): - """Test that expand_solution works for segmented systems.""" + def test_segmented_expand_restores_full_timesteps(self, solver_fixture, timesteps_8_days): + """Test that expand works for segmented systems.""" fs = create_simple_system(timesteps_8_days) fs_segmented = fs.transform.cluster( @@ -954,7 +954,7 @@ def test_segmented_expand_solution_restores_full_timesteps(self, solver_fixture, fs_segmented.optimize(solver_fixture) # Expand back to full - fs_expanded = fs_segmented.transform.expand_solution() + fs_expanded = fs_segmented.transform.expand() # Should have original timesteps (DatetimeIndex) assert isinstance(fs_expanded.timesteps, pd.DatetimeIndex) @@ -979,7 +979,7 @@ def test_segmented_expanded_statistics_match(self, solver_fixture, timesteps_8_d reduced_flow_hours = reduced_fh.sum().item() # Expand and get statistics (no cluster_weight needed for expanded FlowSystem) - fs_expanded = fs_segmented.transform.expand_solution() + fs_expanded = fs_segmented.transform.expand() expanded_flow_hours = fs_expanded.statistics.flow_hours['Boiler(Q_th)'].sum().item() # Flow hours should match @@ -1007,8 +1007,8 @@ def test_segmented_storage_intercluster_cyclic(self, solver_fixture, timesteps_8 # Verify solution is valid assert fs_segmented.solution is not None - def test_segmented_storage_expand_solution(self, solver_fixture, timesteps_8_days): - """Test that expand_solution works for segmented storage systems.""" + def test_segmented_storage_expand(self, solver_fixture, timesteps_8_days): + """Test that expand works for segmented storage systems.""" fs = create_system_with_storage(timesteps_8_days, cluster_mode='intercluster_cyclic') fs_segmented = fs.transform.cluster( @@ -1019,7 +1019,7 @@ def test_segmented_storage_expand_solution(self, solver_fixture, timesteps_8_day fs_segmented.optimize(solver_fixture) # Expand - fs_expanded = fs_segmented.transform.expand_solution() + fs_expanded = fs_segmented.transform.expand() # Should have original timesteps assert len(fs_expanded.timesteps) == 192 diff --git a/tests/test_clustering_io.py b/tests/test_clustering_io.py index 756978d6a..f5fe415da 100644 --- a/tests/test_clustering_io.py +++ b/tests/test_clustering_io.py @@ -562,7 +562,7 @@ def test_segmentation_netcdf_roundtrip(self, simple_system_8_days, solver_fixtur assert fs_loaded.clustering.result.cluster_structure.segment_timestep_counts is not None def test_segmentation_expand_after_roundtrip(self, simple_system_8_days, solver_fixture, tmp_path): - """Test that expand_solution works after netCDF roundtrip for segmented systems.""" + """Test that expand works after netCDF roundtrip for segmented systems.""" fs = simple_system_8_days fs_segmented = fs.transform.cluster(n_clusters=2, cluster_duration='1D', n_segments=6) fs_segmented.optimize(solver_fixture) @@ -573,7 +573,7 @@ def test_segmentation_expand_after_roundtrip(self, simple_system_8_days, solver_ fs_loaded = fx.FlowSystem.from_netcdf(path) # Expand solution - fs_expanded = fs_loaded.transform.expand_solution() + fs_expanded = fs_loaded.transform.expand() # Verify expansion assert isinstance(fs_expanded.timesteps, pd.DatetimeIndex) @@ -638,6 +638,6 @@ def test_segmentation_with_periods_scenarios_roundtrip(self, solver_fixture, tmp assert list(fs_loaded.scenarios) == list(fs_segmented.scenarios) # Expand should work - fs_expanded = fs_loaded.transform.expand_solution() + fs_expanded = fs_loaded.transform.expand() assert len(fs_expanded.timesteps) == 8 * 24 assert fs_expanded.solution is not None From 866735ab72f84b469bf46a3be1361662dd211122 Mon Sep 17 00:00:00 2001 From: FBumann <117816358+FBumann@users.noreply.github.com> Date: Wed, 7 Jan 2026 16:28:25 +0100 Subject: [PATCH 15/15] 1. Added time_dim_size = cs.n_segments if cs.is_segmented else cs.timesteps_per_cluster to use the correct time dimension size 2. Updated the reshape and coordinate creation to use time_dim_size instead of timesteps_per_cluster --- flixopt/clustering/base.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/flixopt/clustering/base.py b/flixopt/clustering/base.py index 2efb323d4..7c6f28464 100644 --- a/flixopt/clustering/base.py +++ b/flixopt/clustering/base.py @@ -879,7 +879,8 @@ def clusters( resolved_variables = self._resolve_variables(variables) n_clusters = int(cs.n_clusters) if isinstance(cs.n_clusters, (int, np.integer)) else int(cs.n_clusters.values) - timesteps_per_cluster = cs.timesteps_per_cluster + # For segmented systems, use n_segments for the time dimension size + time_dim_size = cs.n_segments if cs.is_segmented else cs.timesteps_per_cluster # Check dimensions of all variables for consistency has_cluster_dim = None @@ -921,11 +922,11 @@ def clusters( data_by_cluster = da.values else: # Data has (time,) dim - reshape to (cluster, time) - data_by_cluster = da.values.reshape(n_clusters, timesteps_per_cluster) + data_by_cluster = da.values.reshape(n_clusters, time_dim_size) data_vars[var] = xr.DataArray( data_by_cluster, dims=['cluster', 'time'], - coords={'cluster': cluster_labels, 'time': range(timesteps_per_cluster)}, + coords={'cluster': cluster_labels, 'time': range(time_dim_size)}, ) ds = xr.Dataset(data_vars)