springfall2008 · springfall2008 · Feb 19, 2026 · Feb 18, 2026 · Feb 18, 2026 · Feb 19, 2026
diff --git a/.cspell/custom-dictionary-workspace.txt b/.cspell/custom-dictionary-workspace.txt
@@ -25,6 +25,7 @@ axvspan
 backprop
 Backpropagate
 backpropagation
+backsteps
 Basepath
 Batpred
 battemperature
@@ -162,6 +163,7 @@ isort
 itemtype
 ivtime
 jsyaml
+kaiming
 killall
 kopt
 Kostal
@@ -239,6 +241,7 @@ oninput
 onmouseout
 onmouseover
 openweathermap
+overfitting
 ownerapi
 pdata
 pdetails

diff --git a/apps/predbat/inverter.py b/apps/predbat/inverter.py
@@ -1105,12 +1105,11 @@ def update_status(self, minutes_now, quiet=False):
             pdetails = self.rest_data["Power"]
             if "Power" in pdetails:
                 ppdetails = pdetails["Power"]
+                # self.log("DEBUG: Power details from REST: {}".format(ppdetails))
                 self.battery_power = float(ppdetails.get("Battery_Power", 0.0))
                 self.pv_power = float(ppdetails.get("PV_Power", 0.0))
                 self.grid_power = float(ppdetails.get("Grid_Power", 0.0))
-                # Calculate load from energy balance instead of using inverter register (which is incorrect during grid charging)
-                # Load = PV + Grid + Battery (battery negative when charging, positive when discharging)
-                self.load_power = self.pv_power + self.grid_power + self.battery_power
+                self.load_power = float(ppdetails.get("Load_Power", 0.0))
-                self.load_power = float(ppdetails.get("Load_Power", 0.0))
+                # Derive load power from energy balance rather than trusting REST Load_Power directly
+                self.load_power = self.pv_power + self.grid_power + self.battery_power
-                self.load_power = float(ppdetails.get("Load_Power", 0.0))
+                # Derive load power from energy balance rather than trusting REST Load_Power directly
+                self.load_power = self.pv_power + self.grid_power + self.battery_power
                 if self.rest_v3:
                     self.battery_voltage = float(ppdetails.get("Battery_Voltage", 0.0))
                 else:

diff --git a/apps/predbat/load_ml_component.py b/apps/predbat/load_ml_component.py
@@ -63,6 +63,7 @@ def initialize(self, load_ml_enable, load_ml_source=True):
         self.ml_time_decay_days = 7
         self.ml_max_load_kw = 50.0
         self.ml_max_model_age_hours = 48
+        self.ml_weight_decay = 0.01
 
         # Data state
         self.load_data = None
@@ -98,7 +99,7 @@ def initialize(self, load_ml_enable, load_ml_source=True):
 
     def _init_predictor(self):
         """Initialize or reinitialize the predictor."""
-        self.predictor = LoadPredictor(log_func=self.log, learning_rate=self.ml_learning_rate, max_load_kw=self.ml_max_load_kw)
+        self.predictor = LoadPredictor(log_func=self.log, learning_rate=self.ml_learning_rate, max_load_kw=self.ml_max_load_kw, weight_decay=self.ml_weight_decay)
 
         # Determine model save path
         if self.config_root:
@@ -124,7 +125,7 @@ def _init_predictor(self):
                 # Model load failed (version mismatch, architecture change, etc.)
                 # Reinitialize predictor to ensure clean state
                 self.log("ML Component: Failed to load model, reinitializing predictor")
-                self.predictor = LoadPredictor(log_func=self.log, learning_rate=self.ml_learning_rate, max_load_kw=self.ml_max_load_kw)
+                self.predictor = LoadPredictor(log_func=self.log, learning_rate=self.ml_learning_rate, max_load_kw=self.ml_max_load_kw, weight_decay=self.ml_weight_decay)
 
     async def _fetch_load_data(self):
         """

diff --git a/apps/predbat/load_predictor.py b/apps/predbat/load_predictor.py
@@ -81,18 +81,20 @@ class LoadPredictor:
     - Placeholder for future exogenous features (temperature, solar)
     """
 
-    def __init__(self, log_func=None, learning_rate=0.001, max_load_kw=23.0):
+    def __init__(self, log_func=None, learning_rate=0.001, max_load_kw=23.0, weight_decay=0.01):
         """
         Initialize the load predictor.
 
         Args:
             log_func: Logging function (defaults to print)
             learning_rate: Learning rate for Adam optimizer
             max_load_kw: Maximum load in kW for clipping predictions
+            weight_decay: L2 regularization coefficient for AdamW (0.0 disables)
         """
         self.log = log_func if log_func else print
         self.learning_rate = learning_rate
         self.max_load_kw = max_load_kw
+        self.weight_decay = weight_decay
 
         # Model weights (initialized on first train)
         self.weights = None
@@ -120,7 +122,7 @@ def __init__(self, log_func=None, learning_rate=0.001, max_load_kw=23.0):
         self.model_initialized = False
 
     def _initialize_weights(self):
-        """Initialize network weights using Xavier initialization"""
+        """Initialize network weights using He initialization (optimal for ReLU)"""
         np.random.seed(42)  # For reproducibility
 
         layer_sizes = [TOTAL_FEATURES] + HIDDEN_SIZES + [OUTPUT_STEPS]
@@ -136,8 +138,8 @@ def _initialize_weights(self):
             fan_in = layer_sizes[i]
             fan_out = layer_sizes[i + 1]
 
-            # Xavier initialization
-            std = np.sqrt(2.0 / (fan_in + fan_out))
+            # He initialization (optimal for ReLU activations)
+            std = np.sqrt(2.0 / fan_in)
             w = np.random.randn(fan_in, fan_out).astype(np.float32) * std
             b = np.zeros(fan_out, dtype=np.float32)
 
@@ -220,7 +222,7 @@ def _backward(self, y_true, activations, pre_activations, sample_weights=None):
 
     def _adam_update(self, weight_grads, bias_grads, beta1=0.9, beta2=0.999, epsilon=1e-8):
         """
-        Update weights using Adam optimizer.
+        Update weights using Adam optimizer with optional weight decay (AdamW).
 
         Args:
             weight_grads: Gradients for weights
@@ -240,9 +242,13 @@ def _adam_update(self, weight_grads, bias_grads, beta1=0.9, beta2=0.999, epsilon
             m_hat = self.m_weights[i] / (1 - beta1**self.adam_t)
             v_hat = self.v_weights[i] / (1 - beta2**self.adam_t)
 
-            # Update weights
+            # Update weights with Adam step
             self.weights[i] -= self.learning_rate * m_hat / (np.sqrt(v_hat) + epsilon)
 
+            # Apply weight decay (AdamW-style L2 regularization)
+            if self.weight_decay > 0:
+                self.weights[i] *= 1 - self.learning_rate * self.weight_decay
+
             # Update momentum for biases
             self.m_biases[i] = beta1 * self.m_biases[i] + (1 - beta1) * bias_grads[i]
             self.v_biases[i] = beta2 * self.v_biases[i] + (1 - beta2) * (bias_grads[i] ** 2)
@@ -251,7 +257,7 @@ def _adam_update(self, weight_grads, bias_grads, beta1=0.9, beta2=0.999, epsilon
             m_hat = self.m_biases[i] / (1 - beta1**self.adam_t)
             v_hat = self.v_biases[i] / (1 - beta2**self.adam_t)
 
-            # Update biases
+            # Update biases (no weight decay on biases)
             self.biases[i] -= self.learning_rate * m_hat / (np.sqrt(v_hat) + epsilon)
 
     def _create_time_features(self, minute_of_day, day_of_week):

diff --git a/apps/predbat/octopus.py b/apps/predbat/octopus.py
@@ -1164,6 +1164,11 @@ async def async_read_response_retry(self, response, url, ignore_errors=False):
         """
         max_retries = OCTOPUS_MAX_RETRIES
         for attempt in range(max_retries):
+            # Check for shutdown signal
+            if self.api_stop:
+                self.log("Octopus API: Aborting retry loop due to shutdown")
+                return None
+
             data_as_json = await self.async_read_response(response, url, ignore_errors=ignore_errors)
             if data_as_json is not None:
                 return data_as_json
@@ -1212,7 +1217,9 @@ async def async_read_response(self, response, url, ignore_errors=False):
                 if error_code == "KT-CT-1199":
                     msg = f'Warn: Octopus API: Rate limit error in request ({url}): {data_as_json["errors"]}'
                     self.log(msg)
-                    await asyncio.sleep(5)  # Sleep briefly to avoid hammering
+                    # Don't sleep if shutting down
+                    if not self.api_stop:
+                        await asyncio.sleep(5)  # Sleep briefly to avoid hammering
                     return None
 
         # Return the response as-is - let caller handle other errors (including auth errors that need retry)

diff --git a/apps/predbat/predbat.py b/apps/predbat/predbat.py
@@ -27,7 +27,7 @@
 import requests
 import asyncio
 
-THIS_VERSION = "v8.33.3"
+THIS_VERSION = "v8.33.4"
 
 # fmt: off
 PREDBAT_FILES = ["predbat.py", "const.py", "hass.py", "config.py", "prediction.py", "gecloud.py", "utils.py", "inverter.py", "ha.py", "download.py", "web.py", "web_helper.py", "predheat.py", "futurerate.py", "octopus.py", "solcast.py", "execute.py", "plan.py", "fetch.py", "output.py", "userinterface.py", "energydataservice.py", "alertfeed.py", "compare.py", "db_manager.py", "db_engine.py", "plugin_system.py", "ohme.py", "components.py", "fox.py", "carbon.py", "temperature.py", "web_mcp.py", "component_base.py", "axle.py", "solax.py", "solis.py", "unit_test.py", "load_ml_component.py", "load_predictor.py"]

diff --git a/apps/predbat/tests/test_inverter.py b/apps/predbat/tests/test_inverter.py
@@ -1492,7 +1492,7 @@ def run_inverter_tests(my_predbat_dummy):
         assert_soc_max=9.523,
         assert_soc=3.333,
         assert_pv_power=10,
-        assert_load_power=590,  # Calculated from energy balance: PV(10) + Grid(-79) + Battery(659) = 590
+        assert_load_power=624,
         assert_charge_start_time_minutes=1410,
         assert_charge_end_time_minutes=1770,
         assert_discharge_start_time_minutes=1380,
@@ -1513,7 +1513,7 @@ def run_inverter_tests(my_predbat_dummy):
         assert_serial_number="EA2303G082",
         assert_soc=7.62,
         assert_pv_power=247.0,
-        assert_load_power=233.0,  # Calculated from energy balance: PV(247) + Grid(3) + Battery(-17) = 233
+        assert_load_power=197.0,
         assert_charge_start_time_minutes=1440,
         assert_charge_end_time_minutes=1440,
         assert_discharge_start_time_minutes=1445,

diff --git a/docs/load-ml.md b/docs/load-ml.md
@@ -23,7 +23,9 @@ The ML Load Prediction component uses a lightweight multi-layer perceptron (MLP)
 - Learns daily and weekly patterns automatically
 - Supports historical PV generation data as an input feature
 - Supports temperature forecast data for improved accuracy
-- Uses historical and future energy rates as an input feature
+- Uses historical and future energy import/export rates as input features
+- Deep neural network with 4 hidden layers [512, 256, 128, 64 neurons]
+- Optimized with He initialization and AdamW weight decay for robust training
 - Automatically trains on historical data (requires at least 1 day, recommended 7+ days)
 - Fine-tunes periodically to adapt to changing patterns
 - Model persists across restarts
@@ -33,7 +35,19 @@ The ML Load Prediction component uses a lightweight multi-layer perceptron (MLP)
 
 ### Architecture
 
-The ML Load Predictor uses a deep neural network with an input layer, some hidden layers and an output layer.
+The ML Load Predictor uses a deep multi-layer perceptron (MLP) with the following architecture:
+
+- **Input Layer**: 1444 features (288 load + 288 PV + 288 temperature + 288 import rates + 288 export rates + 4 time features)
+- **Hidden Layers**: 4 layers with [512, 256, 128, 64] neurons using ReLU activation
+- **Output Layer**: 1 neuron (predicts next 5-minute step)
+- **Total Parameters**: ~500,000 trainable weights
+
+**Optimization Techniques:**
+
+- **He Initialization**: Weights initialized using He/Kaiming method (`std = sqrt(2/fan_in)`), optimized for ReLU activations
+- **AdamW Optimizer**: Adam optimization with weight decay (L2 regularization, default 0.01) to prevent overfitting
+- **Early Stopping**: Training halts if validation error stops improving (patience=5 epochs)
+- **Weighted Samples**: Recent data weighted more heavily (exponential decay over 7 days)
 
 ### Input Features
 
@@ -49,12 +63,19 @@ The neural network uses several types of input features to make predictions:
    - Requires `pv_today` sensor to be configured
 
 3. **Historical Temperature**
-   - Past 7 days and future 2 days of temperature data at 5-minute intervals
+   - Past 24 hours of temperature data at 5-minute intervals
    - Helps correlate temperature with energy usage (heating/cooling)
    - **Requires the Temperature component to be enabled**
 
-4. **Cyclical Time Features** (4 features)
-   - Sin/Cos encoding of hour-of-day (captures daily patterns)
+4. **Historical Import/Export Energy Rates** (288 + 288 features)
+   - Past 24 hours of electricity import rates at 5-minute intervals
+   - Past 24 hours of electricity export rates at 5-minute intervals
+   - Helps the model learn consumption patterns based on time-of-use pricing
+   - Automatically extracted from your configured Octopus Energy tariffs or other rate sources
+   - Particularly useful for homes that shift usage to cheaper rate periods
+
+5. **Cyclical Time Features** (4 features)
+   - Sin/Cos encoding of minute-of-day (captures daily patterns with 5-min precision)
    - Sin/Cos encoding of day-of-week (captures weekly patterns)
    - These features help the network understand that 23:55 is close to 00:05
 
@@ -75,16 +96,26 @@ To prevent drift in long-range predictions, the model blends autoregressive pred
 **Initial Training:**
 
 - Requires at least 1 day of historical data (7+ days recommended)
-- Uses 50 epochs with early stopping
+- Uses 100 epochs with early stopping (patience=5)
+- Batch size: 128 samples
+- AdamW optimizer with learning rate 0.001 and weight decay 0.01
+- Sample weighting: exponential time decay (recent data weighted more)
 - Validates on the last 24 hours of data
 - Saves model to disk: `predbat_ml_model.npz`
 
+**Regularization:**
+
+- **Weight Decay**: L2 penalty (0.01) applied to network weights to prevent overfitting
+- **Early Stopping**: Training halts if validation error doesn't improve for 5 consecutive epochs
+- **Time-Weighted Samples**: Recent data has higher importance (7-day exponential decay)
+
 **Fine-tuning:**
 
 - Runs every 2 hours if enabled
 - Uses last 24 hours of data
-- Uses 2 epochs to quickly adapt to recent changes
+- Uses 3 epochs to quickly adapt to recent changes
 - Preserves learned patterns while adapting to new ones
+- Same regularization techniques applied
 
 **Model Validation:**
 
@@ -187,7 +218,7 @@ Check the Predbat logs for training progress:
 
 ```text
 ML Component: Starting initial training
-ML Predictor: Starting initial training with 50 epochs
+ML Predictor: Starting initial training with 100 epochs
 ML Predictor: Training complete, final val_mae=0.3245 kWh
 ML Component: Initial training completed, validation MAE=0.3245 kWh
 ```
@@ -207,7 +238,7 @@ You can visualize these predictions in the Predbat web interface or by creating
 The ML component tracks several status indicators:
 
 - **Model Status**: `not_initialized`, `training`, `active`, `validation_failed`, `stale`
-- **Validation MAE**: Mean Absolute Error on validation data (in kWh per 5-min step)
+- **Validation MAE**: Mean Absolute Error on validation data (see [Understanding MAE](#understanding-mae-mean-absolute-error) for details)
 - **Model Age**: How long since the model was last trained
 
 You can check model status in the Predbat logs or via the component status page in the web interface.
@@ -216,11 +247,47 @@ You can check model status in the Predbat logs or via the component status page
 
 Good predictions require:
 
-1. **Sufficient Historical Data**: At least 7 days recommended
+1. **Sufficient Historical Data**: At least 7 days recommended for stable patterns
 2. **Consistent Patterns**: Regular daily/weekly routines improve accuracy
-3. **Temperature Data**: Especially important for homes with electric heating/cooling
-4. **Clean Data**: Avoid gaps or incorrect readings in historical data
-5. **Recent Training**: Model should be retrained periodically (happens automatically)
+3. **Temperature Data**: Especially important for homes with electric heating/cooling (requires Temperature component)
+4. **Energy Rate Data**: Automatically included - helps model learn consumption patterns based on time-of-use tariffs
+5. **PV Generation Data**: If you have solar panels, include `pv_today` sensor for better correlation
+6. **Clean Data**: Avoid gaps or incorrect readings in historical data
+7. **Recent Training**: Model should be retrained periodically (happens automatically every 2 hours)
+
+### Understanding MAE (Mean Absolute Error)
+
+The model's accuracy is measured using **MAE (Mean Absolute Error)**, which is the primary metric used for validation and monitoring.
+
+**What is MAE?**
+
+MAE measures the average absolute difference between predicted and actual energy consumption values. For example:
+
+- If the model predicts 0.5 kWh for a 5-minute period and actual consumption is 0.7 kWh, the error is 0.2 kWh
+- MAE is the average of these errors across all predictions
+
+**How to interpret MAE:**
+
+- **MAE is in kWh per 5-minute step** - this is the average prediction error for each 5-minute interval
+- **Lower is better** - an MAE of 0.3 kWh means predictions are typically off by ±0.3 kWh per 5-minute period
+- **Scale matters** - a 0.3 kWh error means different things for different households:
+    - Low consumption home (2 kW average): 0.3 kWh per 5-min ≈ 3.6 kW error → significant
+    - High consumption home (8 kW average): 0.3 kWh per 5-min ≈ 3.6 kW error → moderate
+
+**Practical example:**
+
+If your validation MAE is 0.4 kWh per 5-min step:
+
+- Each 5-minute prediction is off by an average of 0.4 kWh (±24 Wh/min)
+- This translates to roughly ±4.8 kW average power error
+- Over 1 hour (12 steps), cumulative error averages out but could be up to ±4.8 kWh
+- The model learns patterns, so errors tend to cancel out over longer periods
+
+**Why MAE is used:**
+
+- **Easy to interpret**: Errors are in the same units as predictions (kWh)
+- **Robust to outliers**: Unlike squared errors, large mistakes don't dominate the metric
+- **Practical measure**: Directly relates to how much your battery plan might be affected
 
 ### Expected Accuracy
 
@@ -296,13 +363,14 @@ Access predictions via:
 
 The trained model is saved to disk as `predbat_ml_model.npz` in your Predbat config directory. This file contains:
 
-- Network weights and biases
-- Normalization parameters (mean, standard deviation)
-- Training metadata (epochs, timestamp, version)
+- **Network weights and biases**: All 4 hidden layers plus output layer
+- **Optimizer state**: Adam momentum terms for continuing fine-tuning
+- **Normalization parameters**: Feature and target mean/standard deviation
+- **Training metadata**: Epochs trained, timestamp, model version, architecture details
 
 The model is automatically loaded on Predbat restart, allowing predictions to continue immediately without retraining.
 
-If the model becomes unstable you can also delete this file to start again.
+**Note**: If you update Predbat and the model architecture or version changes, the old model will be rejected and a new model will be trained from scratch. If the model becomes unstable, you can manually delete `predbat_ml_model.npz` to force retraining.
 
 ---