From ae090115c465637b799cdcfff1799e06a0bbed92 Mon Sep 17 00:00:00 2001
From: mfaferek93 <michal.faferek@selfpatch.ai>
Date: Tue, 30 Jun 2026 18:32:17 +0200
Subject: [PATCH 1/5] Add tamper-evident append-only fault audit log

Append-only, hash-chained audit log of fault state transitions in the
fault manager. Each transition appends one immutable row with
record_hash = sha256(prev_hash + canonical(event)) (OpenSSL EVP SHA-256),
a persisted chain head that resumes across restarts, a verify routine,
a read API, and retention that seals a segment anchor before pruning so
the surviving tail stays verifiable. Off by default.

Refs #483
---
 src/ros2_medkit_fault_manager/CHANGELOG.rst   |   4 +
 src/ros2_medkit_fault_manager/CMakeLists.txt  |   8 +
 src/ros2_medkit_fault_manager/README.md       |  18 +
 .../config/fault_manager.yaml                 |  14 +
 .../fault_audit_log.hpp                       | 144 +++++
 .../fault_manager_node.hpp                    |  17 +
 src/ros2_medkit_fault_manager/package.xml     |   1 +
 .../src/fault_audit_log.cpp                   | 522 ++++++++++++++++++
 .../src/fault_manager_node.cpp                | 108 +++-
 .../test/test_fault_audit_log.cpp             | 270 +++++++++
 .../test/test_fault_manager.cpp               | 119 ++++
 11 files changed, 1224 insertions(+), 1 deletion(-)
 create mode 100644 src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
 create mode 100644 src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
 create mode 100644 src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp

diff --git a/src/ros2_medkit_fault_manager/CHANGELOG.rst b/src/ros2_medkit_fault_manager/CHANGELOG.rst
index a6130582..902b8447 100644
--- a/src/ros2_medkit_fault_manager/CHANGELOG.rst
+++ b/src/ros2_medkit_fault_manager/CHANGELOG.rst
@@ -2,6 +2,10 @@
 Changelog for package ros2_medkit_fault_manager
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+Forthcoming
+-----------
+* Optional tamper-evident, append-only audit log of fault state transitions: each transition appends one immutable, hash-chained row (``record_hash = sha256(prev_hash + canonical(event))`` via OpenSSL EVP SHA-256) with a persisted chain head, a ``verify`` routine, a read API, and retention that seals a segment anchor before pruning. Off by default (`#483 <https://github.com/selfpatch/ros2_medkit/issues/483>`_)
+
 0.6.0 (2026-06-22)
 ------------------
 * Bounded concurrent snapshot capture under fault storms with a ``CaptureThreadPool`` and configurable capture pool / queue / overflow-policy parameters. The rosbag leg is serialized and the cooldown map is bounded, so a burst of simultaneous faults can no longer exhaust capture threads or grow memory without limit (`#456 <https://github.com/selfpatch/ros2_medkit/pull/456>`_)
diff --git a/src/ros2_medkit_fault_manager/CMakeLists.txt b/src/ros2_medkit_fault_manager/CMakeLists.txt
index 23c4924c..9421ed7c 100644
--- a/src/ros2_medkit_fault_manager/CMakeLists.txt
+++ b/src/ros2_medkit_fault_manager/CMakeLists.txt
@@ -41,6 +41,8 @@ find_package(ros2_medkit_msgs REQUIRED)
 find_package(ros2_medkit_serialization REQUIRED)
 find_package(SQLite3 REQUIRED)
 find_package(nlohmann_json REQUIRED)
+# OpenSSL EVP SHA-256 for the tamper-evident audit log hash chain
+find_package(OpenSSL REQUIRED)
 # yaml-cpp is required as transitive dependency from ros2_medkit_serialization
 medkit_find_yaml_cpp()
 # rosbag2 for time-window snapshot recording
@@ -55,6 +57,7 @@ add_library(fault_manager_lib STATIC
   src/fault_manager_node.cpp
   src/fault_storage.cpp
   src/sqlite_fault_storage.cpp
+  src/fault_audit_log.cpp
   src/snapshot_capture.cpp
   src/rosbag_capture.cpp
   src/correlation/types.cpp
@@ -81,6 +84,7 @@ target_link_libraries(fault_manager_lib PUBLIC
   SQLite::SQLite3
   nlohmann_json::nlohmann_json
   yaml-cpp::yaml-cpp
+  OpenSSL::Crypto
 )
 
 medkit_apply_compat_defs(fault_manager_lib)
@@ -143,6 +147,10 @@ if(BUILD_TESTING)
   medkit_target_dependencies(test_sqlite_storage rclcpp ros2_medkit_msgs)
   medkit_set_test_domain(test_sqlite_storage)
 
+  # Fault audit log tests (hash chain, verify, rotation, reopen)
+  ament_add_gtest(test_fault_audit_log test/test_fault_audit_log.cpp)
+  target_link_libraries(test_fault_audit_log fault_manager_lib)
+
   # Snapshot capture tests
   ament_add_gtest(test_snapshot_capture test/test_snapshot_capture.cpp)
   target_link_libraries(test_snapshot_capture fault_manager_lib)
diff --git a/src/ros2_medkit_fault_manager/README.md b/src/ros2_medkit_fault_manager/README.md
index 8013acfe..cccaaa6a 100644
--- a/src/ros2_medkit_fault_manager/README.md
+++ b/src/ros2_medkit_fault_manager/README.md
@@ -53,6 +53,7 @@ ros2 service call /fault_manager/clear_fault ros2_medkit_msgs/srv/ClearFault \
 - **Debounce filtering** (optional): AUTOSAR DEM-style counter-based fault confirmation with per-entity threshold overrides
 - **Snapshot capture**: Captures topic data when faults are confirmed for debugging (snapshots are deleted when fault is cleared)
 - **Fault correlation** (optional): Root cause analysis with symptom muting and auto-clear
+- **Tamper-evident audit log** (optional): Append-only, hash-chained record of fault state transitions for verifiable history
 
 ## Parameters
 
@@ -109,6 +110,23 @@ patterns:
 
 **Memory**: Faults are stored in memory only. Useful for testing or when persistence is not required.
 
+## Advanced: Tamper-Evident Audit Log
+
+An optional append-only, hash-chained audit log records every fault state transition (`occurred`, `confirmed`, `cleared`) so the fault history is verifiable and any later edit or deletion is detectable. It is **off by default** because it adds a write and storage cost per transition.
+
+Each transition appends one immutable row holding `record_hash = sha256(prev_hash + canonical(event))` (OpenSSL EVP SHA-256), the `prev_hash` it links to, and a monotonic `seq`. The hash is computed once at insert and never recomputed. A persisted chain head lets the chain resume across restarts. The log is stored in its own SQLite database (separate from the fault store) and is treated as append-only - the manager only ever inserts rows.
+
+`verify()` walks the persisted chain oldest-first and recomputes every link: editing a row breaks its `record_hash`, deleting a row breaks the next row's `prev_hash` linkage, and deleting the newest row is caught by the persisted-head check.
+
+**Retention/rotation**: when more than `audit_log.retention_max_records` rows are retained, the oldest segment is *sealed* (its final `seq` + hash are persisted as an anchor) and then pruned. The surviving tail still verifies because the oldest retained row links back to the sealed anchor.
+
+| Parameter | Type | Default | Description |
+|-----------|------|---------|-------------|
+| `audit_log.enabled` | bool | `false` | Enable the tamper-evident audit log |
+| `audit_log.transitions` | string | `"all"` | Which transitions to record: `"all"` or `"confirmed_only"` |
+| `audit_log.database_path` | string | `""` | SQLite path. Empty => sibling `fault_audit.db` next to the fault DB (or `:memory:` for in-memory fault stores) |
+| `audit_log.retention_max_records` | int | `0` | Seal + prune the oldest segment beyond this many retained records (0 = unlimited) |
+
 ## Usage
 
 ### Launch
diff --git a/src/ros2_medkit_fault_manager/config/fault_manager.yaml b/src/ros2_medkit_fault_manager/config/fault_manager.yaml
index d421c0fd..3f94e71c 100644
--- a/src/ros2_medkit_fault_manager/config/fault_manager.yaml
+++ b/src/ros2_medkit_fault_manager/config/fault_manager.yaml
@@ -27,3 +27,17 @@ fault_manager:
     # snapshots.capture_pool_size: 2                  # max concurrent capture threads (>= 1)
     # snapshots.capture_queue_depth: 16               # max pending captures (>= 1)
     # snapshots.capture_queue_full_policy: reject_newest  # reject_newest | drop_oldest
+
+    # Tamper-evident, append-only audit log of fault state transitions
+    # (occurred/confirmed/cleared). OFF by default: it adds a write + storage cost
+    # per transition. When enabled, each transition appends one immutable,
+    # hash-chained row so any later edit or deletion is detectable via verify.
+    audit_log.enabled: false
+    # Which transitions to record: "all" or "confirmed_only".
+    # audit_log.transitions: all
+    # SQLite path. Empty => sibling "fault_audit.db" next to the fault DB
+    # (or :memory: when the fault store is in-memory).
+    # audit_log.database_path: ""
+    # Seal + prune the oldest segment beyond this many retained records
+    # (0 = unlimited). A sealed anchor keeps the surviving tail verifiable.
+    # audit_log.retention_max_records: 0
diff --git a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
new file mode 100644
index 00000000..59cca09d
--- /dev/null
+++ b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
@@ -0,0 +1,144 @@
+// Copyright 2026 mfaferek93, bburda
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+
+#include <sqlite3.h>
+
+#include <cstdint>
+#include <mutex>
+#include <string>
+#include <vector>
+
+namespace ros2_medkit_fault_manager {
+
+/// A single fault state-transition to record in the audit log.
+///
+/// `transition` is one of the kTransition* constants below. The remaining
+/// fields describe the fault at the moment of the transition; all of them feed
+/// the canonical serialization that the hash chain is computed over, so any
+/// later edit to a stored row is detectable.
+struct AuditEvent {
+  std::string fault_code;
+  std::string transition;   ///< occurred | confirmed | cleared | ack
+  uint8_t severity{0};      ///< severity at the time of the transition
+  std::string status;       ///< resulting fault status (e.g. CONFIRMED)
+  std::string source_id;    ///< reporting source that drove the transition
+  std::string description;  ///< human-readable description
+  int64_t occurred_at_ns{0};  ///< wall-clock timestamp of the transition
+};
+
+/// Canonical transition kinds. Stored verbatim, so they are part of the hash.
+constexpr const char * kTransitionOccurred = "occurred";
+constexpr const char * kTransitionConfirmed = "confirmed";
+constexpr const char * kTransitionCleared = "cleared";
+constexpr const char * kTransitionAck = "ack";
+
+/// One immutable, hash-chained row read back from the audit log.
+struct AuditRecord {
+  int64_t seq{0};
+  AuditEvent event;
+  std::string prev_hash;
+  std::string record_hash;
+};
+
+/// Persisted head of the hash chain.
+struct ChainHead {
+  int64_t seq{0};            ///< 0 when the chain is empty
+  std::string record_hash;   ///< genesis hash when the chain is empty
+};
+
+/// Result of verifying the persisted chain.
+struct AuditVerifyResult {
+  bool ok{true};
+  int64_t checked{0};   ///< number of records walked
+  int64_t bad_seq{0};   ///< seq of the first offending record (0 if ok)
+  std::string error;    ///< human-readable reason when !ok
+};
+
+/// Append-only, hash-chained audit log of fault state transitions.
+///
+/// Each appended row stores `record_hash = sha256(prev_hash + canonical(event))`
+/// using OpenSSL's EVP SHA-256 (the same primitive the gateway links). The hash
+/// is computed once at insert and never recomputed. A persisted chain head lets
+/// the chain resume across process restarts, and rotation seals a segment by
+/// persisting an anchor (its final seq + hash) before pruning so the surviving
+/// history stays verifiable.
+///
+/// The table is treated as append-only: this class only ever INSERTs rows (and,
+/// on rotation, deletes a sealed prefix). It never UPDATEs an existing record.
+class FaultAuditLog {
+ public:
+  /// Open (or create) the audit log database.
+  /// @param db_path SQLite path. Use ":memory:" for an in-memory log.
+  /// @param retention_max_records Max records to retain before rotation seals
+  ///        and prunes the oldest segment. 0 disables rotation (unlimited).
+  /// @throws std::runtime_error if the database cannot be opened or initialized.
+  explicit FaultAuditLog(const std::string & db_path, int64_t retention_max_records = 0);
+
+  ~FaultAuditLog();
+
+  FaultAuditLog(const FaultAuditLog &) = delete;
+  FaultAuditLog & operator=(const FaultAuditLog &) = delete;
+  FaultAuditLog(FaultAuditLog &&) = delete;
+  FaultAuditLog & operator=(FaultAuditLog &&) = delete;
+
+  /// Append one transition. Computes the chained hash, inserts the row, and
+  /// advances the persisted head, atomically.
+  /// @return the monotonic seq assigned to the new record.
+  int64_t append(const AuditEvent & event);
+
+  /// Walk the persisted chain oldest-first and validate every link.
+  AuditVerifyResult verify() const;
+
+  /// Read records oldest-first.
+  /// @param limit Max records to return (0 = all).
+  /// @param after_seq Only return records with seq > after_seq (0 = from start).
+  std::vector<AuditRecord> read(int64_t limit = 0, int64_t after_seq = 0) const;
+
+  /// Current persisted chain head.
+  ChainHead head() const;
+
+  /// Number of records currently retained (excludes pruned/sealed rows).
+  int64_t record_count() const;
+
+  /// Deterministic canonical serialization of an event at a given seq.
+  /// Stable field order so verify is reproducible across processes.
+  static std::string canonicalize(int64_t seq, const AuditEvent & event);
+
+  /// Genesis hash used as prev_hash for the very first record.
+  static std::string genesis_hash();
+
+  /// SHA-256 of `data` as a lowercase hex string (OpenSSL EVP).
+  static std::string sha256_hex(const std::string & data);
+
+  const std::string & db_path() const {
+    return db_path_;
+  }
+
+ private:
+  void initialize_schema();
+  ChainHead load_head_locked() const;
+  void store_head_locked(const ChainHead & head_record);
+  /// Seal + prune the oldest segment if the retained count exceeds the limit.
+  void rotate_if_needed_locked();
+
+  std::string db_path_;
+  int64_t retention_max_records_{0};
+  sqlite3 * db_{nullptr};
+  mutable std::mutex mutex_;
+  ChainHead head_;  ///< cached head, kept in sync with the head table
+};
+
+}  // namespace ros2_medkit_fault_manager
diff --git a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp
index 9af908a7..7c603232 100644
--- a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp
+++ b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp
@@ -23,6 +23,7 @@
 #include "ros2_medkit_fault_manager/capture_thread_pool.hpp"
 #include "ros2_medkit_fault_manager/correlation/correlation_engine.hpp"
 #include "ros2_medkit_fault_manager/entity_threshold_resolver.hpp"
+#include "ros2_medkit_fault_manager/fault_audit_log.hpp"
 #include "ros2_medkit_fault_manager/fault_storage.hpp"
 #include "ros2_medkit_fault_manager/rosbag_capture.hpp"
 #include "ros2_medkit_fault_manager/snapshot_capture.hpp"
@@ -148,6 +149,18 @@ class FaultManagerNode : public rclcpp::Node {
   /// Falls back to global config if no entity-specific overrides match.
   DebounceConfig resolve_config(const std::string & source_id) const;
 
+  /// Create the tamper-evident audit log from parameters (nullptr if disabled).
+  std::unique_ptr<FaultAuditLog> create_audit_log();
+
+  /// Append a fault state-transition to the audit log when enabled. No-op when
+  /// the log is off, or when only confirmations are logged and this is not one.
+  /// @param transition One of the kTransition* constants.
+  /// @param fault The fault state at the time of the transition.
+  /// @param source_id The reporting source that drove the transition.
+  /// @param occurred_at_ns Wall-clock timestamp of the transition.
+  void audit_transition(const char * transition, const ros2_medkit_msgs::msg::Fault & fault,
+                        const std::string & source_id, int64_t occurred_at_ns);
+
   std::string storage_type_;
   std::string database_path_;
   int32_t confirmation_threshold_{-1};
@@ -162,6 +175,10 @@ class FaultManagerNode : public rclcpp::Node {
   std::unique_ptr<FaultStorage> storage_;
   std::unique_ptr<EntityThresholdResolver> threshold_resolver_;  ///< Per-entity threshold overrides
 
+  /// Tamper-evident audit log of fault transitions (nullptr when disabled).
+  std::unique_ptr<FaultAuditLog> audit_log_;
+  bool audit_confirmed_only_{false};  ///< When true, only "confirmed" transitions are logged
+
   rclcpp::Service<ros2_medkit_msgs::srv::ReportFault>::SharedPtr report_fault_srv_;
   rclcpp::Service<ros2_medkit_msgs::srv::ListFaults>::SharedPtr list_faults_srv_;
   rclcpp::Service<ros2_medkit_msgs::srv::GetFault>::SharedPtr get_fault_srv_;
diff --git a/src/ros2_medkit_fault_manager/package.xml b/src/ros2_medkit_fault_manager/package.xml
index 1658c527..723c7fed 100644
--- a/src/ros2_medkit_fault_manager/package.xml
+++ b/src/ros2_medkit_fault_manager/package.xml
@@ -16,6 +16,7 @@
   <depend>ros2_medkit_serialization</depend>
   <depend>libsqlite3-dev</depend>
   <depend>nlohmann-json-dev</depend>
+  <depend>libssl-dev</depend>
   <depend>rosbag2_cpp</depend>
   <depend>rosbag2_storage</depend>
 
diff --git a/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp b/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
new file mode 100644
index 00000000..8799eddb
--- /dev/null
+++ b/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
@@ -0,0 +1,522 @@
+// Copyright 2026 mfaferek93, bburda
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "ros2_medkit_fault_manager/fault_audit_log.hpp"
+
+#include <openssl/evp.h>
+
+#include <array>
+#include <limits>
+#include <nlohmann/json.hpp>
+#include <stdexcept>
+#include <string>
+
+namespace ros2_medkit_fault_manager {
+
+namespace {
+
+/// 64 hex zeros: prev_hash of the first record. A fixed, well-known anchor so a
+/// verifier can confirm the chain starts where it claims to.
+constexpr const char * kGenesisHash = "0000000000000000000000000000000000000000000000000000000000000000";
+
+/// RAII wrapper for a prepared SQLite statement (audit-log local copy).
+class Stmt {
+ public:
+  Stmt(sqlite3 * db, const char * sql) : db_(db) {
+    if (sqlite3_prepare_v2(db, sql, -1, &stmt_, nullptr) != SQLITE_OK) {
+      throw std::runtime_error(std::string("audit: failed to prepare: ") + sqlite3_errmsg(db));
+    }
+  }
+
+  ~Stmt() {
+    if (stmt_) {
+      sqlite3_finalize(stmt_);
+    }
+  }
+
+  Stmt(const Stmt &) = delete;
+  Stmt & operator=(const Stmt &) = delete;
+
+  void bind_text(int index, const std::string & value) {
+    if (value.size() > static_cast<std::size_t>(std::numeric_limits<int>::max())) {
+      throw std::runtime_error("audit: text too large to bind");
+    }
+    if (sqlite3_bind_text(stmt_, index, value.c_str(), static_cast<int>(value.size()), SQLITE_TRANSIENT) != SQLITE_OK) {
+      throw std::runtime_error(std::string("audit: failed to bind text: ") + sqlite3_errmsg(db_));
+    }
+  }
+
+  void bind_int64(int index, int64_t value) {
+    if (sqlite3_bind_int64(stmt_, index, value) != SQLITE_OK) {
+      throw std::runtime_error(std::string("audit: failed to bind int64: ") + sqlite3_errmsg(db_));
+    }
+  }
+
+  int step() {
+    return sqlite3_step(stmt_);
+  }
+
+  std::string column_text(int index) {
+    const auto * text = reinterpret_cast<const char *>(sqlite3_column_text(stmt_, index));
+    return text ? std::string(text) : std::string();
+  }
+
+  int64_t column_int64(int index) {
+    return sqlite3_column_int64(stmt_, index);
+  }
+
+ private:
+  sqlite3 * db_;
+  sqlite3_stmt * stmt_{nullptr};
+};
+
+void exec_or_throw(sqlite3 * db, const char * sql, const char * what) {
+  char * err_msg = nullptr;
+  if (sqlite3_exec(db, sql, nullptr, nullptr, &err_msg) != SQLITE_OK) {
+    std::string error = err_msg ? err_msg : "unknown error";
+    sqlite3_free(err_msg);
+    throw std::runtime_error(std::string("audit: ") + what + ": " + error);
+  }
+}
+
+}  // namespace
+
+FaultAuditLog::FaultAuditLog(const std::string & db_path, int64_t retention_max_records)
+    : db_path_(db_path), retention_max_records_(retention_max_records < 0 ? 0 : retention_max_records) {
+  int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX;
+  if (sqlite3_open_v2(db_path.c_str(), &db_, flags, nullptr) != SQLITE_OK) {
+    std::string error = db_ ? sqlite3_errmsg(db_) : "unknown error";
+    if (db_) {
+      sqlite3_close(db_);
+      db_ = nullptr;
+    }
+    throw std::runtime_error("audit: failed to open '" + db_path + "': " + error);
+  }
+
+  exec_or_throw(db_, "PRAGMA journal_mode=WAL;", "enable WAL");
+  sqlite3_busy_timeout(db_, 5000);
+
+  initialize_schema();
+  head_ = load_head_locked();
+}
+
+FaultAuditLog::~FaultAuditLog() {
+  if (db_) {
+    sqlite3_close(db_);
+  }
+}
+
+std::string FaultAuditLog::genesis_hash() {
+  return kGenesisHash;
+}
+
+std::string FaultAuditLog::sha256_hex(const std::string & data) {
+  std::array<unsigned char, EVP_MAX_MD_SIZE> md{};
+  unsigned int md_len = 0;
+
+  EVP_MD_CTX * ctx = EVP_MD_CTX_new();
+  if (ctx == nullptr) {
+    throw std::runtime_error("audit: EVP_MD_CTX_new failed");
+  }
+  const bool ok = EVP_DigestInit_ex(ctx, EVP_sha256(), nullptr) == 1 &&
+                  EVP_DigestUpdate(ctx, data.data(), data.size()) == 1 &&
+                  EVP_DigestFinal_ex(ctx, md.data(), &md_len) == 1;
+  EVP_MD_CTX_free(ctx);
+  if (!ok) {
+    throw std::runtime_error("audit: SHA-256 digest failed");
+  }
+
+  static constexpr char kHex[] = "0123456789abcdef";
+  std::string out;
+  out.reserve(static_cast<std::size_t>(md_len) * 2);
+  for (unsigned int i = 0; i < md_len; ++i) {
+    out.push_back(kHex[md[i] >> 4]);
+    out.push_back(kHex[md[i] & 0x0F]);
+  }
+  return out;
+}
+
+std::string FaultAuditLog::canonicalize(int64_t seq, const AuditEvent & event) {
+  // Deterministic field order with JSON-escaped string values. Numeric fields
+  // render in fixed decimal form. This is the exact byte sequence the hash is
+  // taken over, so it must be stable across processes and re-reads.
+  std::string out;
+  out.reserve(192);
+  out += "{\"seq\":";
+  out += std::to_string(seq);
+  out += ",\"ts\":";
+  out += std::to_string(event.occurred_at_ns);
+  out += ",\"code\":";
+  out += nlohmann::json(event.fault_code).dump();
+  out += ",\"transition\":";
+  out += nlohmann::json(event.transition).dump();
+  out += ",\"severity\":";
+  out += std::to_string(static_cast<int>(event.severity));
+  out += ",\"status\":";
+  out += nlohmann::json(event.status).dump();
+  out += ",\"source\":";
+  out += nlohmann::json(event.source_id).dump();
+  out += ",\"description\":";
+  out += nlohmann::json(event.description).dump();
+  out += "}";
+  return out;
+}
+
+void FaultAuditLog::initialize_schema() {
+  // Immutable transition rows. seq is the monotonic chain index.
+  exec_or_throw(db_,
+                R"(
+    CREATE TABLE IF NOT EXISTS audit_log (
+      seq INTEGER PRIMARY KEY,
+      occurred_at_ns INTEGER NOT NULL,
+      fault_code TEXT NOT NULL,
+      transition TEXT NOT NULL,
+      severity INTEGER NOT NULL,
+      status TEXT NOT NULL,
+      source_id TEXT NOT NULL,
+      description TEXT NOT NULL,
+      prev_hash TEXT NOT NULL,
+      record_hash TEXT NOT NULL
+    );
+    CREATE INDEX IF NOT EXISTS idx_audit_log_fault_code ON audit_log(fault_code);
+  )",
+                "create audit_log table");
+
+  // Single-row persisted chain head, so the chain resumes across restarts.
+  exec_or_throw(db_,
+                R"(
+    CREATE TABLE IF NOT EXISTS audit_chain_head (
+      id INTEGER PRIMARY KEY CHECK (id = 1),
+      seq INTEGER NOT NULL,
+      record_hash TEXT NOT NULL
+    );
+  )",
+                "create audit_chain_head table");
+
+  // Sealed-segment anchors written before pruning. Each captures the final
+  // (seq, hash) of a pruned prefix so the surviving tail stays verifiable.
+  exec_or_throw(db_,
+                R"(
+    CREATE TABLE IF NOT EXISTS audit_anchors (
+      last_seq INTEGER PRIMARY KEY,
+      sealed_at_ns INTEGER NOT NULL,
+      last_hash TEXT NOT NULL
+    );
+  )",
+                "create audit_anchors table");
+}
+
+ChainHead FaultAuditLog::load_head_locked() const {
+  // Prefer the persisted head row.
+  {
+    Stmt stmt(db_, "SELECT seq, record_hash FROM audit_chain_head WHERE id = 1");
+    if (stmt.step() == SQLITE_ROW) {
+      ChainHead head_record;
+      head_record.seq = stmt.column_int64(0);
+      head_record.record_hash = stmt.column_text(1);
+      return head_record;
+    }
+  }
+
+  // No head row. Recover from the last retained record if any exist (defensive:
+  // a crash between INSERT and head update would land here on reopen).
+  {
+    Stmt stmt(db_, "SELECT seq, record_hash FROM audit_log ORDER BY seq DESC LIMIT 1");
+    if (stmt.step() == SQLITE_ROW) {
+      ChainHead head_record;
+      head_record.seq = stmt.column_int64(0);
+      head_record.record_hash = stmt.column_text(1);
+      return head_record;
+    }
+  }
+
+  return ChainHead{0, genesis_hash()};
+}
+
+void FaultAuditLog::store_head_locked(const ChainHead & head_record) {
+  Stmt stmt(db_,
+            "INSERT INTO audit_chain_head (id, seq, record_hash) VALUES (1, ?, ?) "
+            "ON CONFLICT(id) DO UPDATE SET seq = excluded.seq, record_hash = excluded.record_hash");
+  stmt.bind_int64(1, head_record.seq);
+  stmt.bind_text(2, head_record.record_hash);
+  if (stmt.step() != SQLITE_DONE) {
+    throw std::runtime_error(std::string("audit: failed to store head: ") + sqlite3_errmsg(db_));
+  }
+}
+
+int64_t FaultAuditLog::append(const AuditEvent & event) {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  const int64_t new_seq = head_.seq + 1;
+  const std::string prev_hash = head_.record_hash;
+  const std::string canonical = canonicalize(new_seq, event);
+  const std::string record_hash = sha256_hex(prev_hash + canonical);
+
+  exec_or_throw(db_, "BEGIN IMMEDIATE", "begin append");
+  try {
+    Stmt insert(db_,
+                "INSERT INTO audit_log (seq, occurred_at_ns, fault_code, transition, severity, status, "
+                "source_id, description, prev_hash, record_hash) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
+    insert.bind_int64(1, new_seq);
+    insert.bind_int64(2, event.occurred_at_ns);
+    insert.bind_text(3, event.fault_code);
+    insert.bind_text(4, event.transition);
+    insert.bind_int64(5, static_cast<int64_t>(event.severity));
+    insert.bind_text(6, event.status);
+    insert.bind_text(7, event.source_id);
+    insert.bind_text(8, event.description);
+    insert.bind_text(9, prev_hash);
+    insert.bind_text(10, record_hash);
+    if (insert.step() != SQLITE_DONE) {
+      throw std::runtime_error(std::string("audit: failed to insert record: ") + sqlite3_errmsg(db_));
+    }
+
+    store_head_locked(ChainHead{new_seq, record_hash});
+    exec_or_throw(db_, "COMMIT", "commit append");
+  } catch (...) {
+    exec_or_throw(db_, "ROLLBACK", "rollback append");
+    throw;
+  }
+
+  head_ = ChainHead{new_seq, record_hash};
+
+  rotate_if_needed_locked();
+  return new_seq;
+}
+
+void FaultAuditLog::rotate_if_needed_locked() {
+  if (retention_max_records_ <= 0) {
+    return;
+  }
+
+  // Count retained rows and find the oldest surviving seq.
+  int64_t count = 0;
+  int64_t min_seq = 0;
+  {
+    Stmt stmt(db_, "SELECT COUNT(*), COALESCE(MIN(seq), 0) FROM audit_log");
+    if (stmt.step() == SQLITE_ROW) {
+      count = stmt.column_int64(0);
+      min_seq = stmt.column_int64(1);
+    }
+  }
+  if (count <= retention_max_records_) {
+    return;
+  }
+
+  // Prune the oldest (count - retention_max_records_) rows. The boundary row is
+  // the highest seq being pruned; its hash becomes the sealed anchor that the
+  // first surviving row's prev_hash links back to.
+  const int64_t prune_count = count - retention_max_records_;
+  const int64_t boundary_seq = min_seq + prune_count - 1;
+
+  std::string boundary_hash;
+  int64_t sealed_at_ns = 0;
+  {
+    Stmt stmt(db_, "SELECT record_hash, occurred_at_ns FROM audit_log WHERE seq = ?");
+    stmt.bind_int64(1, boundary_seq);
+    if (stmt.step() != SQLITE_ROW) {
+      // Should not happen given the count; leave the log intact rather than
+      // prune without a valid anchor.
+      return;
+    }
+    boundary_hash = stmt.column_text(0);
+    sealed_at_ns = stmt.column_int64(1);
+  }
+
+  exec_or_throw(db_, "BEGIN IMMEDIATE", "begin rotate");
+  try {
+    Stmt anchor(db_,
+                "INSERT INTO audit_anchors (last_seq, sealed_at_ns, last_hash) VALUES (?, ?, ?) "
+                "ON CONFLICT(last_seq) DO NOTHING");
+    anchor.bind_int64(1, boundary_seq);
+    anchor.bind_int64(2, sealed_at_ns);
+    anchor.bind_text(3, boundary_hash);
+    if (anchor.step() != SQLITE_DONE) {
+      throw std::runtime_error(std::string("audit: failed to write anchor: ") + sqlite3_errmsg(db_));
+    }
+
+    Stmt del(db_, "DELETE FROM audit_log WHERE seq <= ?");
+    del.bind_int64(1, boundary_seq);
+    if (del.step() != SQLITE_DONE) {
+      throw std::runtime_error(std::string("audit: failed to prune records: ") + sqlite3_errmsg(db_));
+    }
+    exec_or_throw(db_, "COMMIT", "commit rotate");
+  } catch (...) {
+    exec_or_throw(db_, "ROLLBACK", "rollback rotate");
+    throw;
+  }
+}
+
+std::vector<AuditRecord> FaultAuditLog::read(int64_t limit, int64_t after_seq) const {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  std::string sql =
+      "SELECT seq, occurred_at_ns, fault_code, transition, severity, status, source_id, description, "
+      "prev_hash, record_hash FROM audit_log WHERE seq > ? ORDER BY seq ASC";
+  if (limit > 0) {
+    sql += " LIMIT ?";
+  }
+
+  Stmt stmt(db_, sql.c_str());
+  stmt.bind_int64(1, after_seq);
+  if (limit > 0) {
+    stmt.bind_int64(2, limit);
+  }
+
+  std::vector<AuditRecord> result;
+  while (stmt.step() == SQLITE_ROW) {
+    AuditRecord rec;
+    rec.seq = stmt.column_int64(0);
+    rec.event.occurred_at_ns = stmt.column_int64(1);
+    rec.event.fault_code = stmt.column_text(2);
+    rec.event.transition = stmt.column_text(3);
+    rec.event.severity = static_cast<uint8_t>(stmt.column_int64(4));
+    rec.event.status = stmt.column_text(5);
+    rec.event.source_id = stmt.column_text(6);
+    rec.event.description = stmt.column_text(7);
+    rec.prev_hash = stmt.column_text(8);
+    rec.record_hash = stmt.column_text(9);
+    result.push_back(std::move(rec));
+  }
+  return result;
+}
+
+ChainHead FaultAuditLog::head() const {
+  std::lock_guard<std::mutex> lock(mutex_);
+  return head_;
+}
+
+int64_t FaultAuditLog::record_count() const {
+  std::lock_guard<std::mutex> lock(mutex_);
+  Stmt stmt(db_, "SELECT COUNT(*) FROM audit_log");
+  if (stmt.step() != SQLITE_ROW) {
+    return 0;
+  }
+  return stmt.column_int64(0);
+}
+
+AuditVerifyResult FaultAuditLog::verify() const {
+  std::lock_guard<std::mutex> lock(mutex_);
+
+  AuditVerifyResult result;
+
+  // Walk every retained row oldest-first, recomputing each link.
+  Stmt stmt(db_,
+            "SELECT seq, occurred_at_ns, fault_code, transition, severity, status, source_id, description, "
+            "prev_hash, record_hash FROM audit_log ORDER BY seq ASC");
+
+  bool first = true;
+  int64_t expected_seq = 0;
+  std::string expected_prev;
+
+  while (stmt.step() == SQLITE_ROW) {
+    AuditRecord rec;
+    rec.seq = stmt.column_int64(0);
+    rec.event.occurred_at_ns = stmt.column_int64(1);
+    rec.event.fault_code = stmt.column_text(2);
+    rec.event.transition = stmt.column_text(3);
+    rec.event.severity = static_cast<uint8_t>(stmt.column_int64(4));
+    rec.event.status = stmt.column_text(5);
+    rec.event.source_id = stmt.column_text(6);
+    rec.event.description = stmt.column_text(7);
+    rec.prev_hash = stmt.column_text(8);
+    rec.record_hash = stmt.column_text(9);
+
+    if (first) {
+      first = false;
+      // The first retained row must link to genesis (seq 1) or to a sealed
+      // anchor whose last_seq == rec.seq - 1.
+      if (rec.seq == 1) {
+        if (rec.prev_hash != genesis_hash()) {
+          result.ok = false;
+          result.bad_seq = rec.seq;
+          result.error = "first record does not link to genesis";
+          return result;
+        }
+      } else {
+        Stmt anchor(db_, "SELECT last_hash FROM audit_anchors WHERE last_seq = ?");
+        anchor.bind_int64(1, rec.seq - 1);
+        if (anchor.step() != SQLITE_ROW) {
+          result.ok = false;
+          result.bad_seq = rec.seq;
+          result.error = "no sealed anchor for the oldest retained record (history truncated)";
+          return result;
+        }
+        const std::string anchor_hash = anchor.column_text(0);
+        if (rec.prev_hash != anchor_hash) {
+          result.ok = false;
+          result.bad_seq = rec.seq;
+          result.error = "oldest retained record does not link to its sealed anchor";
+          return result;
+        }
+      }
+      expected_seq = rec.seq;
+      expected_prev = rec.prev_hash;
+    } else {
+      // Subsequent rows must be contiguous and chain to the previous record.
+      if (rec.seq != expected_seq) {
+        result.ok = false;
+        result.bad_seq = rec.seq;
+        result.error = "non-contiguous seq (record deleted or reordered)";
+        return result;
+      }
+      if (rec.prev_hash != expected_prev) {
+        result.ok = false;
+        result.bad_seq = rec.seq;
+        result.error = "prev_hash does not match previous record_hash (chain broken)";
+        return result;
+      }
+    }
+
+    const std::string recomputed = sha256_hex(rec.prev_hash + canonicalize(rec.seq, rec.event));
+    if (recomputed != rec.record_hash) {
+      result.ok = false;
+      result.bad_seq = rec.seq;
+      result.error = "record_hash mismatch (record tampered)";
+      return result;
+    }
+
+    ++result.checked;
+    expected_prev = rec.record_hash;
+    expected_seq = rec.seq + 1;
+  }
+
+  // The persisted head must match the last retained record (catches deletion of
+  // the newest row, which the row walk alone cannot see).
+  if (result.checked > 0) {
+    if (head_.seq != expected_seq - 1 || head_.record_hash != expected_prev) {
+      result.ok = false;
+      result.bad_seq = head_.seq;
+      result.error = "persisted head does not match the last retained record";
+      return result;
+    }
+  } else {
+    // Empty retained log: head must be either genesis (never written) or point
+    // at a sealed anchor (everything pruned).
+    if (head_.seq != 0) {
+      Stmt anchor(db_, "SELECT last_hash FROM audit_anchors WHERE last_seq = ?");
+      anchor.bind_int64(1, head_.seq);
+      if (anchor.step() != SQLITE_ROW || anchor.column_text(0) != head_.record_hash) {
+        result.ok = false;
+        result.bad_seq = head_.seq;
+        result.error = "head references a record that is neither retained nor sealed";
+        return result;
+      }
+    }
+  }
+
+  return result;
+}
+
+}  // namespace ros2_medkit_fault_manager
diff --git a/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
index c704d989..5692693d 100644
--- a/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
+++ b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
@@ -151,6 +151,9 @@ FaultManagerNode::FaultManagerNode(const rclcpp::NodeOptions & options) : Node("
   // Create storage backend
   storage_ = create_storage();
 
+  // Create the tamper-evident fault audit log (disabled by default)
+  audit_log_ = create_audit_log();
+
   // Apply snapshot limit to storage
   if (max_snapshots > 0) {
     storage_->set_max_snapshots_per_fault(static_cast<size_t>(max_snapshots));
@@ -347,6 +350,91 @@ std::unique_ptr<FaultStorage> FaultManagerNode::create_storage() {
   return std::make_unique<InMemoryFaultStorage>();
 }
 
+std::unique_ptr<FaultAuditLog> FaultManagerNode::create_audit_log() {
+  const bool enabled = declare_parameter<bool>("audit_log.enabled", false);
+
+  // Which transitions to record: "all" (occurred/confirmed/cleared) or
+  // "confirmed_only".
+  const std::string transitions = declare_parameter<std::string>("audit_log.transitions", "all");
+  audit_confirmed_only_ = (transitions == "confirmed_only");
+  if (transitions != "all" && transitions != "confirmed_only") {
+    RCLCPP_WARN(get_logger(), "audit_log.transitions '%s' invalid, using 'all'", transitions.c_str());
+    audit_confirmed_only_ = false;
+  }
+
+  // Retention: seal + prune the oldest segment beyond this many records (0 = off).
+  auto retention = declare_parameter<int64_t>("audit_log.retention_max_records", 0);
+  if (retention < 0) {
+    RCLCPP_WARN(get_logger(), "audit_log.retention_max_records < 0, disabling rotation");
+    retention = 0;
+  }
+
+  // Path: explicit override, else a sibling of the fault DB, else :memory:.
+  std::string audit_path = declare_parameter<std::string>("audit_log.database_path", "");
+
+  if (!enabled) {
+    return nullptr;  // No table, no file, no write overhead when off.
+  }
+
+  if (audit_path.empty()) {
+    if (database_path_ == ":memory:" || storage_type_ != "sqlite") {
+      audit_path = ":memory:";
+    } else {
+      std::filesystem::path base(database_path_);
+      audit_path = (base.parent_path() / "fault_audit.db").string();
+    }
+  }
+
+  if (audit_path != ":memory:") {
+    std::filesystem::path p(audit_path);
+    auto parent = p.parent_path();
+    if (!parent.empty() && !std::filesystem::exists(parent)) {
+      try {
+        std::filesystem::create_directories(parent);
+      } catch (const std::filesystem::filesystem_error & e) {
+        RCLCPP_ERROR(get_logger(), "Failed to create audit log directory '%s': %s", parent.string().c_str(), e.what());
+        throw;
+      }
+    }
+  }
+
+  try {
+    auto log = std::make_unique<FaultAuditLog>(audit_path, retention);
+    RCLCPP_INFO(get_logger(), "Fault audit log enabled: %s (transitions=%s, retention=%ld, resume_seq=%ld)",
+                audit_path.c_str(), audit_confirmed_only_ ? "confirmed_only" : "all", retention, log->head().seq);
+    return log;
+  } catch (const std::exception & e) {
+    RCLCPP_ERROR(get_logger(), "Failed to open fault audit log '%s': %s", audit_path.c_str(), e.what());
+    throw;
+  }
+}
+
+void FaultManagerNode::audit_transition(const char * transition, const ros2_medkit_msgs::msg::Fault & fault,
+                                        const std::string & source_id, int64_t occurred_at_ns) {
+  if (!audit_log_) {
+    return;
+  }
+  if (audit_confirmed_only_ && std::string(transition) != kTransitionConfirmed) {
+    return;
+  }
+
+  AuditEvent event;
+  event.fault_code = fault.fault_code;
+  event.transition = transition;
+  event.severity = fault.severity;
+  event.status = fault.status;
+  event.source_id = source_id;
+  event.description = fault.description;
+  event.occurred_at_ns = occurred_at_ns;
+
+  try {
+    audit_log_->append(event);
+  } catch (const std::exception & e) {
+    RCLCPP_ERROR(get_logger(), "Failed to append audit record for '%s' (%s): %s", fault.fault_code.c_str(), transition,
+                 e.what());
+  }
+}
+
 void FaultManagerNode::handle_report_fault(
     const std::shared_ptr<ros2_medkit_msgs::srv::ReportFault::Request> & request,
     const std::shared_ptr<ros2_medkit_msgs::srv::ReportFault::Response> & response) {
@@ -390,9 +478,10 @@ void FaultManagerNode::handle_report_fault(
   auto resolved_config = resolve_config(request->source_id);
 
   // Report the fault event (use wall clock time, not sim time, for proper timestamps)
+  const rclcpp::Time event_time = get_wall_clock_time();
   bool is_new =
       storage_->report_fault_event(request->fault_code, request->event_type, request->severity, request->description,
-                                   request->source_id, get_wall_clock_time(), resolved_config);
+                                   request->source_id, event_time, resolved_config);
 
   response->accepted = true;
 
@@ -449,6 +538,16 @@ void FaultManagerNode::handle_report_fault(
     }
     // Note: PREFAILED/PREPASSED status changes don't emit events (debounce in progress)
 
+    // Append tamper-evident audit records for the transitions that just happened.
+    // Recorded regardless of correlation muting: muting affects display, not the
+    // fact that the state transition occurred.
+    if (is_new) {
+      audit_transition(kTransitionOccurred, *fault_after, request->source_id, event_time.nanoseconds());
+    }
+    if (just_confirmed) {
+      audit_transition(kTransitionConfirmed, *fault_after, request->source_id, event_time.nanoseconds());
+    }
+
     // Capture snapshots/rosbag when a fault confirms via the bounded pool (issue #441).
     // handle_report_fault runs on the single-threaded executor, so confirmations are
     // already serialized; last_capture_mutex_ only guards last_capture_times_ itself
@@ -657,6 +756,12 @@ void FaultManagerNode::handle_clear_fault(
     // Auto-clear correlated symptoms
     for (const auto & symptom_code : auto_cleared_codes) {
       storage_->clear_fault(symptom_code);
+      if (audit_log_) {
+        auto symptom = storage_->get_fault(symptom_code);
+        if (symptom) {
+          audit_transition(kTransitionCleared, *symptom, "clear_service", get_wall_clock_time().nanoseconds());
+        }
+      }
       RCLCPP_DEBUG(get_logger(), "Auto-cleared symptom: %s (root cause: %s)", symptom_code.c_str(),
                    request->fault_code.c_str());
       // Also cleanup rosbag for auto-cleared faults
@@ -684,6 +789,7 @@ void FaultManagerNode::handle_clear_fault(
     auto fault = storage_->get_fault(request->fault_code);
     if (fault) {
       publish_fault_event(ros2_medkit_msgs::msg::FaultEvent::EVENT_CLEARED, *fault, auto_cleared_codes);
+      audit_transition(kTransitionCleared, *fault, "clear_service", get_wall_clock_time().nanoseconds());
     }
   } else {
     response->message = "Fault not found: " + request->fault_code;
diff --git a/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp b/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
new file mode 100644
index 00000000..21fd3448
--- /dev/null
+++ b/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
@@ -0,0 +1,270 @@
+// Copyright 2026 mfaferek93, bburda
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <gtest/gtest.h>
+#include <sqlite3.h>
+
+#include <filesystem>
+#include <memory>
+#include <random>
+#include <string>
+
+#include "ros2_medkit_fault_manager/fault_audit_log.hpp"
+
+using ros2_medkit_fault_manager::AuditEvent;
+using ros2_medkit_fault_manager::FaultAuditLog;
+using ros2_medkit_fault_manager::kTransitionCleared;
+using ros2_medkit_fault_manager::kTransitionConfirmed;
+using ros2_medkit_fault_manager::kTransitionOccurred;
+
+namespace {
+
+AuditEvent make_event(const std::string & code, const char * transition, int64_t ts) {
+  AuditEvent e;
+  e.fault_code = code;
+  e.transition = transition;
+  e.severity = 2;
+  e.status = "CONFIRMED";
+  e.source_id = "/robot/source";
+  e.description = "pump pressure low";
+  e.occurred_at_ns = ts;
+  return e;
+}
+
+/// Run a single SQL statement directly against the audit DB file (used to
+/// simulate tampering an immutable row).
+void raw_exec(const std::string & db_path, const std::string & sql) {
+  sqlite3 * db = nullptr;
+  ASSERT_EQ(sqlite3_open(db_path.c_str(), &db), SQLITE_OK);
+  char * err = nullptr;
+  int rc = sqlite3_exec(db, sql.c_str(), nullptr, nullptr, &err);
+  std::string err_str = err ? err : "";
+  sqlite3_free(err);
+  sqlite3_close(db);
+  ASSERT_EQ(rc, SQLITE_OK) << err_str;
+}
+
+}  // namespace
+
+class FaultAuditLogTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    std::random_device rd;
+    std::mt19937_64 gen(rd());
+    std::uniform_int_distribution<uint64_t> dist;
+    path_ = (std::filesystem::temp_directory_path() / ("test_audit_" + std::to_string(dist(gen)) + ".db")).string();
+  }
+
+  void TearDown() override {
+    std::error_code ec;
+    std::filesystem::remove(path_, ec);
+    std::filesystem::remove(path_ + "-wal", ec);
+    std::filesystem::remove(path_ + "-shm", ec);
+  }
+
+  std::string path_;
+};
+
+// Each transition appends a chained row with a monotonic seq and linked hashes.
+TEST_F(FaultAuditLogTest, AppendsChainedRowPerTransition) {
+  FaultAuditLog log(path_);
+
+  EXPECT_EQ(log.append(make_event("F1", kTransitionOccurred, 100)), 1);
+  EXPECT_EQ(log.append(make_event("F1", kTransitionConfirmed, 200)), 2);
+  EXPECT_EQ(log.append(make_event("F1", kTransitionCleared, 300)), 3);
+
+  auto records = log.read();
+  ASSERT_EQ(records.size(), 3u);
+
+  // First row links to genesis; each subsequent prev_hash equals the prior hash.
+  EXPECT_EQ(records[0].seq, 1);
+  EXPECT_EQ(records[0].prev_hash, FaultAuditLog::genesis_hash());
+  EXPECT_EQ(records[1].prev_hash, records[0].record_hash);
+  EXPECT_EQ(records[2].prev_hash, records[1].record_hash);
+
+  // record_hash is the sha256 of prev_hash + canonical(event).
+  const std::string expected =
+      FaultAuditLog::sha256_hex(records[0].prev_hash + FaultAuditLog::canonicalize(1, records[0].event));
+  EXPECT_EQ(records[0].record_hash, expected);
+
+  EXPECT_EQ(log.record_count(), 3);
+  EXPECT_EQ(log.head().seq, 3);
+  EXPECT_EQ(log.head().record_hash, records[2].record_hash);
+}
+
+// Verify confirms an untampered chain.
+TEST_F(FaultAuditLogTest, VerifyUntamperedChain) {
+  FaultAuditLog log(path_);
+  for (int i = 0; i < 10; ++i) {
+    log.append(make_event("F" + std::to_string(i), kTransitionOccurred, 100 + i));
+  }
+  auto result = log.verify();
+  EXPECT_TRUE(result.ok) << result.error;
+  EXPECT_EQ(result.checked, 10);
+}
+
+// Known SHA-256 vector proves the EVP wiring (sha256("") == e3b0c442...).
+TEST_F(FaultAuditLogTest, Sha256KnownVector) {
+  EXPECT_EQ(FaultAuditLog::sha256_hex(""),
+            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
+}
+
+// Editing a past row makes verify fail.
+TEST_F(FaultAuditLogTest, EditingPastRowFailsVerify) {
+  {
+    FaultAuditLog log(path_);
+    log.append(make_event("F1", kTransitionOccurred, 100));
+    log.append(make_event("F2", kTransitionOccurred, 200));
+    log.append(make_event("F3", kTransitionOccurred, 300));
+    ASSERT_TRUE(log.verify().ok);
+  }
+
+  // Tamper: change a stored field without recomputing the hash.
+  raw_exec(path_, "UPDATE audit_log SET description = 'forged' WHERE seq = 2");
+
+  FaultAuditLog reopened(path_);
+  auto result = reopened.verify();
+  EXPECT_FALSE(result.ok);
+  EXPECT_EQ(result.bad_seq, 2);
+}
+
+// Deleting a middle row makes verify fail (chain gap).
+TEST_F(FaultAuditLogTest, DeletingMiddleRowFailsVerify) {
+  {
+    FaultAuditLog log(path_);
+    log.append(make_event("F1", kTransitionOccurred, 100));
+    log.append(make_event("F2", kTransitionOccurred, 200));
+    log.append(make_event("F3", kTransitionOccurred, 300));
+  }
+
+  raw_exec(path_, "DELETE FROM audit_log WHERE seq = 2");
+
+  FaultAuditLog reopened(path_);
+  EXPECT_FALSE(reopened.verify().ok);
+}
+
+// Deleting the newest row is caught by the persisted head check.
+TEST_F(FaultAuditLogTest, DeletingNewestRowFailsVerify) {
+  {
+    FaultAuditLog log(path_);
+    log.append(make_event("F1", kTransitionOccurred, 100));
+    log.append(make_event("F2", kTransitionOccurred, 200));
+    log.append(make_event("F3", kTransitionOccurred, 300));
+  }
+
+  // Drop the last row but leave the head pointing at seq 3.
+  raw_exec(path_, "DELETE FROM audit_log WHERE seq = 3");
+
+  FaultAuditLog reopened(path_);
+  EXPECT_FALSE(reopened.verify().ok);
+}
+
+// The chain head persists across a reopen and the chain resumes from it.
+TEST_F(FaultAuditLogTest, HeadPersistsAcrossReopen) {
+  std::string head_hash;
+  {
+    FaultAuditLog log(path_);
+    log.append(make_event("F1", kTransitionOccurred, 100));
+    log.append(make_event("F2", kTransitionConfirmed, 200));
+    head_hash = log.head().record_hash;
+    EXPECT_EQ(log.head().seq, 2);
+  }
+
+  FaultAuditLog reopened(path_);
+  EXPECT_EQ(reopened.head().seq, 2);
+  EXPECT_EQ(reopened.head().record_hash, head_hash);
+
+  // The next append continues the same chain.
+  EXPECT_EQ(reopened.append(make_event("F3", kTransitionCleared, 300)), 3);
+  auto records = reopened.read();
+  ASSERT_EQ(records.size(), 3u);
+  EXPECT_EQ(records[2].prev_hash, head_hash);
+  EXPECT_TRUE(reopened.verify().ok);
+}
+
+// Rotation seals a segment and prunes it, leaving the tail verifiable.
+TEST_F(FaultAuditLogTest, RotationSealsAndPrunesButStaysVerifiable) {
+  FaultAuditLog log(path_, /*retention_max_records=*/5);
+  for (int i = 1; i <= 12; ++i) {
+    log.append(make_event("F" + std::to_string(i), kTransitionOccurred, 100 + i));
+  }
+
+  // Only the most recent 5 rows are retained.
+  EXPECT_EQ(log.record_count(), 5);
+  auto records = log.read();
+  ASSERT_EQ(records.size(), 5u);
+  EXPECT_EQ(records.front().seq, 8);
+  EXPECT_EQ(records.back().seq, 12);
+  EXPECT_EQ(log.head().seq, 12);
+
+  // The surviving tail still verifies via the sealed anchor.
+  auto result = log.verify();
+  EXPECT_TRUE(result.ok) << result.error;
+  EXPECT_EQ(result.checked, 5);
+}
+
+// Tampering a row inside a rotated tail is still detected.
+TEST_F(FaultAuditLogTest, RotationThenTamperFails) {
+  {
+    FaultAuditLog log(path_, /*retention_max_records=*/5);
+    for (int i = 1; i <= 12; ++i) {
+      log.append(make_event("F" + std::to_string(i), kTransitionOccurred, 100 + i));
+    }
+  }
+  raw_exec(path_, "UPDATE audit_log SET source_id = 'forged' WHERE seq = 10");
+
+  FaultAuditLog reopened(path_, 5);
+  auto result = reopened.verify();
+  EXPECT_FALSE(result.ok);
+  EXPECT_EQ(result.bad_seq, 10);
+}
+
+// Removing the sealed anchor breaks the link for the oldest retained row.
+TEST_F(FaultAuditLogTest, MissingAnchorFailsVerify) {
+  {
+    FaultAuditLog log(path_, /*retention_max_records=*/5);
+    for (int i = 1; i <= 12; ++i) {
+      log.append(make_event("F" + std::to_string(i), kTransitionOccurred, 100 + i));
+    }
+  }
+  raw_exec(path_, "DELETE FROM audit_anchors");
+
+  FaultAuditLog reopened(path_, 5);
+  EXPECT_FALSE(reopened.verify().ok);
+}
+
+// canonicalize is deterministic and order-stable.
+TEST_F(FaultAuditLogTest, CanonicalizeDeterministic) {
+  auto e = make_event("F1", kTransitionConfirmed, 12345);
+  EXPECT_EQ(FaultAuditLog::canonicalize(7, e), FaultAuditLog::canonicalize(7, e));
+  // seq is part of the canonical form, so a different seq changes the bytes.
+  EXPECT_NE(FaultAuditLog::canonicalize(7, e), FaultAuditLog::canonicalize(8, e));
+}
+
+// read(after_seq) returns only newer records, oldest-first.
+TEST_F(FaultAuditLogTest, ReadAfterSeq) {
+  FaultAuditLog log(path_);
+  for (int i = 1; i <= 5; ++i) {
+    log.append(make_event("F" + std::to_string(i), kTransitionOccurred, 100 + i));
+  }
+  auto tail = log.read(/*limit=*/0, /*after_seq=*/3);
+  ASSERT_EQ(tail.size(), 2u);
+  EXPECT_EQ(tail[0].seq, 4);
+  EXPECT_EQ(tail[1].seq, 5);
+}
+
+int main(int argc, char ** argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
diff --git a/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp b/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
index 90d84af7..623dfdf9 100644
--- a/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
+++ b/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
@@ -16,12 +16,15 @@
 
 #include <atomic>
 #include <chrono>
+#include <filesystem>
 #include <memory>
 #include <optional>
+#include <random>
 #include <thread>
 #include <vector>
 
 #include "rclcpp/rclcpp.hpp"
+#include "ros2_medkit_fault_manager/fault_audit_log.hpp"
 #include "ros2_medkit_fault_manager/fault_manager_node.hpp"
 #include "ros2_medkit_fault_manager/fault_storage.hpp"
 #include "ros2_medkit_msgs/msg/fault.hpp"
@@ -1361,6 +1364,122 @@ TEST_F(SnapshotCooldownTest, CooldownPreventsRapidRecapture) {
   SUCCEED();
 }
 
+// End-to-end check that the node hooks the audit log on the fault write path.
+// Reports a CRITICAL fault (immediate confirm => occurred + confirmed), then
+// clears it (=> cleared), and inspects the persisted audit DB by reopening it.
+class FaultAuditIntegrationTest : public ::testing::Test {
+ protected:
+  void SetUp() override {
+    std::random_device rd;
+    std::mt19937_64 gen(rd());
+    std::uniform_int_distribution<uint64_t> dist;
+    audit_path_ = (std::filesystem::temp_directory_path() / ("test_node_audit_" + std::to_string(dist(gen)) + ".db"))
+                      .string();
+    ns_ = "/test_audit_" + std::to_string(dist(gen));
+
+    rclcpp::NodeOptions fm_options;
+    fm_options.parameter_overrides({
+        {"storage_type", "memory"},
+        {"confirmation_threshold", -1},
+        {"audit_log.enabled", true},
+        {"audit_log.database_path", audit_path_},
+    });
+    fm_options.arguments({"--ros-args", "-r", "__ns:=" + ns_});
+    fault_manager_ = std::make_shared<FaultManagerNode>(fm_options);
+
+    rclcpp::NodeOptions test_options;
+    test_options.arguments({"--ros-args", "-r", "__ns:=" + ns_});
+    test_node_ = std::make_shared<rclcpp::Node>("test_audit_client", test_options);
+
+    report_client_ = test_node_->create_client<ReportFault>(ns_ + "/fault_manager/report_fault");
+    clear_client_ = test_node_->create_client<ClearFault>(ns_ + "/fault_manager/clear_fault");
+    ASSERT_TRUE(report_client_->wait_for_service(std::chrono::seconds(5)));
+    ASSERT_TRUE(clear_client_->wait_for_service(std::chrono::seconds(5)));
+  }
+
+  void TearDown() override {
+    report_client_.reset();
+    clear_client_.reset();
+    test_node_.reset();
+    fault_manager_.reset();
+    std::error_code ec;
+    std::filesystem::remove(audit_path_, ec);
+    std::filesystem::remove(audit_path_ + "-wal", ec);
+    std::filesystem::remove(audit_path_ + "-shm", ec);
+  }
+
+  template <typename FutureT>
+  bool spin_until_ready(FutureT & future) {
+    auto start = std::chrono::steady_clock::now();
+    while (std::chrono::steady_clock::now() - start < std::chrono::seconds(2)) {
+      rclcpp::spin_some(fault_manager_);
+      rclcpp::spin_some(test_node_);
+      if (future.wait_for(std::chrono::milliseconds(0)) == std::future_status::ready) {
+        return true;
+      }
+      std::this_thread::sleep_for(std::chrono::milliseconds(10));
+    }
+    return false;
+  }
+
+  std::string audit_path_;
+  std::string ns_;
+  std::shared_ptr<FaultManagerNode> fault_manager_;
+  std::shared_ptr<rclcpp::Node> test_node_;
+  rclcpp::Client<ReportFault>::SharedPtr report_client_;
+  rclcpp::Client<ClearFault>::SharedPtr clear_client_;
+};
+
+TEST_F(FaultAuditIntegrationTest, TransitionsAppendVerifiableChain) {
+  auto report = std::make_shared<ReportFault::Request>();
+  report->fault_code = "AUDIT_FAULT";
+  report->event_type = ReportFault::Request::EVENT_FAILED;
+  report->severity = Fault::SEVERITY_CRITICAL;  // immediate confirm
+  report->description = "overpressure";
+  report->source_id = "/plc/pump";
+  auto rf = report_client_->async_send_request(report);
+  ASSERT_TRUE(spin_until_ready(rf));
+  ASSERT_TRUE(rf.get()->accepted);
+
+  auto clear = std::make_shared<ClearFault::Request>();
+  clear->fault_code = "AUDIT_FAULT";
+  auto cf = clear_client_->async_send_request(clear);
+  ASSERT_TRUE(spin_until_ready(cf));
+  ASSERT_TRUE(cf.get()->success);
+
+  // Reopen the audit DB independently and inspect the persisted chain.
+  ros2_medkit_fault_manager::FaultAuditLog audit(audit_path_);
+  auto records = audit.read();
+  ASSERT_EQ(records.size(), 3u);
+  EXPECT_EQ(records[0].event.transition, ros2_medkit_fault_manager::kTransitionOccurred);
+  EXPECT_EQ(records[1].event.transition, ros2_medkit_fault_manager::kTransitionConfirmed);
+  EXPECT_EQ(records[2].event.transition, ros2_medkit_fault_manager::kTransitionCleared);
+  EXPECT_EQ(records[0].event.fault_code, "AUDIT_FAULT");
+  EXPECT_EQ(records[1].event.source_id, "/plc/pump");
+
+  auto result = audit.verify();
+  EXPECT_TRUE(result.ok) << result.error;
+  EXPECT_EQ(result.checked, 3);
+}
+
+TEST(FaultAuditDisabledTest, NoAuditFileWhenDisabled) {
+  std::random_device rd;
+  std::mt19937_64 gen(rd());
+  std::uniform_int_distribution<uint64_t> dist;
+  const std::string audit_path =
+      (std::filesystem::temp_directory_path() / ("test_audit_off_" + std::to_string(dist(gen)) + ".db")).string();
+
+  rclcpp::NodeOptions options;
+  options.parameter_overrides({
+      {"storage_type", "memory"},
+      {"audit_log.database_path", audit_path},  // default enabled=false
+  });
+  auto node = std::make_shared<FaultManagerNode>(options);
+
+  // With the feature off, no audit database file is created.
+  EXPECT_FALSE(std::filesystem::exists(audit_path));
+}
+
 int main(int argc, char ** argv) {
   rclcpp::init(argc, argv);
   ::testing::InitGoogleTest(&argc, argv);

From 63976785d1deb3fb0bcd45c5384db7f7667c2cdf Mon Sep 17 00:00:00 2001
From: mfaferek93 <michal.faferek@selfpatch.ai>
Date: Tue, 30 Jun 2026 16:36:35 +0000
Subject: [PATCH 2/5] style: apply clang-format-18

---
 .../fault_audit_log.hpp                       | 20 +++++++++----------
 .../src/fault_audit_log.cpp                   |  2 +-
 .../src/fault_manager_node.cpp                |  5 ++---
 .../test/test_fault_audit_log.cpp             |  3 +--
 .../test/test_fault_manager.cpp               |  7 +++----
 5 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
index 59cca09d..039a12e5 100644
--- a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
+++ b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
@@ -31,11 +31,11 @@ namespace ros2_medkit_fault_manager {
 /// later edit to a stored row is detectable.
 struct AuditEvent {
   std::string fault_code;
-  std::string transition;   ///< occurred | confirmed | cleared | ack
-  uint8_t severity{0};      ///< severity at the time of the transition
-  std::string status;       ///< resulting fault status (e.g. CONFIRMED)
-  std::string source_id;    ///< reporting source that drove the transition
-  std::string description;  ///< human-readable description
+  std::string transition;     ///< occurred | confirmed | cleared | ack
+  uint8_t severity{0};        ///< severity at the time of the transition
+  std::string status;         ///< resulting fault status (e.g. CONFIRMED)
+  std::string source_id;      ///< reporting source that drove the transition
+  std::string description;    ///< human-readable description
   int64_t occurred_at_ns{0};  ///< wall-clock timestamp of the transition
 };
 
@@ -55,16 +55,16 @@ struct AuditRecord {
 
 /// Persisted head of the hash chain.
 struct ChainHead {
-  int64_t seq{0};            ///< 0 when the chain is empty
-  std::string record_hash;   ///< genesis hash when the chain is empty
+  int64_t seq{0};           ///< 0 when the chain is empty
+  std::string record_hash;  ///< genesis hash when the chain is empty
 };
 
 /// Result of verifying the persisted chain.
 struct AuditVerifyResult {
   bool ok{true};
-  int64_t checked{0};   ///< number of records walked
-  int64_t bad_seq{0};   ///< seq of the first offending record (0 if ok)
-  std::string error;    ///< human-readable reason when !ok
+  int64_t checked{0};  ///< number of records walked
+  int64_t bad_seq{0};  ///< seq of the first offending record (0 if ok)
+  std::string error;   ///< human-readable reason when !ok
 };
 
 /// Append-only, hash-chained audit log of fault state transitions.
diff --git a/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp b/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
index 8799eddb..8f146704 100644
--- a/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
+++ b/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
@@ -93,7 +93,7 @@ void exec_or_throw(sqlite3 * db, const char * sql, const char * what) {
 }  // namespace
 
 FaultAuditLog::FaultAuditLog(const std::string & db_path, int64_t retention_max_records)
-    : db_path_(db_path), retention_max_records_(retention_max_records < 0 ? 0 : retention_max_records) {
+  : db_path_(db_path), retention_max_records_(retention_max_records < 0 ? 0 : retention_max_records) {
   int flags = SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE | SQLITE_OPEN_FULLMUTEX;
   if (sqlite3_open_v2(db_path.c_str(), &db_, flags, nullptr) != SQLITE_OK) {
     std::string error = db_ ? sqlite3_errmsg(db_) : "unknown error";
diff --git a/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
index 5692693d..177d9133 100644
--- a/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
+++ b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
@@ -479,9 +479,8 @@ void FaultManagerNode::handle_report_fault(
 
   // Report the fault event (use wall clock time, not sim time, for proper timestamps)
   const rclcpp::Time event_time = get_wall_clock_time();
-  bool is_new =
-      storage_->report_fault_event(request->fault_code, request->event_type, request->severity, request->description,
-                                   request->source_id, event_time, resolved_config);
+  bool is_new = storage_->report_fault_event(request->fault_code, request->event_type, request->severity,
+                                             request->description, request->source_id, event_time, resolved_config);
 
   response->accepted = true;
 
diff --git a/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp b/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
index 21fd3448..b0ac54b9 100644
--- a/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
+++ b/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
@@ -116,8 +116,7 @@ TEST_F(FaultAuditLogTest, VerifyUntamperedChain) {
 
 // Known SHA-256 vector proves the EVP wiring (sha256("") == e3b0c442...).
 TEST_F(FaultAuditLogTest, Sha256KnownVector) {
-  EXPECT_EQ(FaultAuditLog::sha256_hex(""),
-            "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
+  EXPECT_EQ(FaultAuditLog::sha256_hex(""), "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855");
 }
 
 // Editing a past row makes verify fail.
diff --git a/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp b/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
index 623dfdf9..f53219cc 100644
--- a/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
+++ b/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
@@ -1373,8 +1373,8 @@ class FaultAuditIntegrationTest : public ::testing::Test {
     std::random_device rd;
     std::mt19937_64 gen(rd());
     std::uniform_int_distribution<uint64_t> dist;
-    audit_path_ = (std::filesystem::temp_directory_path() / ("test_node_audit_" + std::to_string(dist(gen)) + ".db"))
-                      .string();
+    audit_path_ =
+        (std::filesystem::temp_directory_path() / ("test_node_audit_" + std::to_string(dist(gen)) + ".db")).string();
     ns_ = "/test_audit_" + std::to_string(dist(gen));
 
     rclcpp::NodeOptions fm_options;
@@ -1471,8 +1471,7 @@ TEST(FaultAuditDisabledTest, NoAuditFileWhenDisabled) {
 
   rclcpp::NodeOptions options;
   options.parameter_overrides({
-      {"storage_type", "memory"},
-      {"audit_log.database_path", audit_path},  // default enabled=false
+      {"storage_type", "memory"}, {"audit_log.database_path", audit_path},  // default enabled=false
   });
   auto node = std::make_shared<FaultManagerNode>(options);
 

From 802413e3756be20e7d23f21048d0b7dd6325ab25 Mon Sep 17 00:00:00 2001
From: mfaferek93 <michal.faferek@selfpatch.ai>
Date: Tue, 30 Jun 2026 19:10:24 +0200
Subject: [PATCH 3/5] fix(fault-audit): close truncation bypass and audit timer
 confirmations

verify() now reads audit_chain_head directly and fails when the head row is
missing on a non-empty log, so deleting the newest row plus the head is caught.
Timer-driven PREFAILED->CONFIRMED confirmations now call audit_transition.
Adds append-only triggers (hardening, not a security boundary) and corrects the
unkeyed/single-file threat model in the docs.

Refs #483
---
 src/ros2_medkit_fault_manager/CHANGELOG.rst   |   2 +-
 src/ros2_medkit_fault_manager/README.md       |   8 +-
 .../config/fault_manager.yaml                 |   8 +-
 .../fault_audit_log.hpp                       |  18 ++-
 .../fault_manager_node.hpp                    |  10 ++
 .../fault_storage.hpp                         |   6 +-
 .../sqlite_fault_storage.hpp                  |   2 +-
 .../src/fault_audit_log.cpp                   | 114 ++++++++++++------
 .../src/fault_manager_node.cpp                |  16 ++-
 .../src/fault_storage.cpp                     |  10 +-
 .../src/sqlite_fault_storage.cpp              |  24 +++-
 .../test/test_fault_audit_log.cpp             |  79 +++++++++++-
 .../test/test_fault_manager.cpp               |  63 +++++++++-
 .../test/test_sqlite_storage.cpp              |   9 +-
 14 files changed, 293 insertions(+), 76 deletions(-)

diff --git a/src/ros2_medkit_fault_manager/CHANGELOG.rst b/src/ros2_medkit_fault_manager/CHANGELOG.rst
index 902b8447..1e884c91 100644
--- a/src/ros2_medkit_fault_manager/CHANGELOG.rst
+++ b/src/ros2_medkit_fault_manager/CHANGELOG.rst
@@ -4,7 +4,7 @@ Changelog for package ros2_medkit_fault_manager
 
 Forthcoming
 -----------
-* Optional tamper-evident, append-only audit log of fault state transitions: each transition appends one immutable, hash-chained row (``record_hash = sha256(prev_hash + canonical(event))`` via OpenSSL EVP SHA-256) with a persisted chain head, a ``verify`` routine, a read API, and retention that seals a segment anchor before pruning. Off by default (`#483 <https://github.com/selfpatch/ros2_medkit/issues/483>`_)
+* Optional append-only, hash-chained audit log of fault state transitions: each transition appends one immutable row (``record_hash = sha256(prev_hash + canonical(event))`` via OpenSSL EVP SHA-256) with a persisted chain head, a ``verify`` routine, a read API, and retention that seals a segment anchor before pruning. Time-based (PREFAILED->CONFIRMED) auto-confirmations are also audited. ``verify`` reads the chain head directly from the database, so deleting the newest row together with the head row is reported as tampering instead of silently recovering. ``BEFORE UPDATE`` / ``BEFORE DELETE`` triggers reject out-of-band edits as defense-in-depth. The chain is unkeyed and stored in a single writable file, so ``verify`` detects edits/deletions that did not recompute the chain (casual or accidental tampering); it is not a defence against an attacker who can rewrite the whole file. Off by default (`#483 <https://github.com/selfpatch/ros2_medkit/issues/483>`_)
 
 0.6.0 (2026-06-22)
 ------------------
diff --git a/src/ros2_medkit_fault_manager/README.md b/src/ros2_medkit_fault_manager/README.md
index cccaaa6a..4ca1cd98 100644
--- a/src/ros2_medkit_fault_manager/README.md
+++ b/src/ros2_medkit_fault_manager/README.md
@@ -112,11 +112,13 @@ patterns:
 
 ## Advanced: Tamper-Evident Audit Log
 
-An optional append-only, hash-chained audit log records every fault state transition (`occurred`, `confirmed`, `cleared`) so the fault history is verifiable and any later edit or deletion is detectable. It is **off by default** because it adds a write and storage cost per transition.
+An optional append-only, hash-chained audit log records every fault state transition (`occurred`, `confirmed`, `cleared`) so the fault history is independently verifiable. It is **off by default** because it adds a write and storage cost per transition.
 
-Each transition appends one immutable row holding `record_hash = sha256(prev_hash + canonical(event))` (OpenSSL EVP SHA-256), the `prev_hash` it links to, and a monotonic `seq`. The hash is computed once at insert and never recomputed. A persisted chain head lets the chain resume across restarts. The log is stored in its own SQLite database (separate from the fault store) and is treated as append-only - the manager only ever inserts rows.
+Each transition appends one immutable row holding `record_hash = sha256(prev_hash + canonical(event))` (OpenSSL EVP SHA-256), the `prev_hash` it links to, and a monotonic `seq`. The hash is computed once at insert and never recomputed. A persisted chain head lets the chain resume across restarts. The log is stored in its own SQLite database (separate from the fault store) and is treated as append-only: the manager only ever inserts rows, and `BEFORE UPDATE` / `BEFORE DELETE` triggers reject out-of-band edits (the guarded rotation prune excepted).
 
-`verify()` walks the persisted chain oldest-first and recomputes every link: editing a row breaks its `record_hash`, deleting a row breaks the next row's `prev_hash` linkage, and deleting the newest row is caught by the persisted-head check.
+`verify()` walks the persisted chain oldest-first and recomputes every link: editing a row breaks its `record_hash`, deleting a row breaks the next row's `prev_hash` linkage, and deleting the newest row (the head row is read straight from the DB) is caught by the persisted-head check.
+
+**Threat model (read this).** The chain is **unkeyed**, and the head and segment anchors live in the **same writable SQLite file** as the rows. `verify()` therefore catches edits or deletions that did **not** also recompute the chain - that is, casual or accidental tampering, and the bookkeeping bugs that would otherwise lose records. It does **not** stop an attacker with write access to the file: such an attacker can drop the triggers and recompute the entire chain (and head and anchors) to forge a self-consistent history. The append-only triggers are defense-in-depth, **not** a security boundary. True tamper-*proofing* requires a key or signature over the head (so it cannot be recomputed without the key) or external anchoring of the head hash to an append-only store you do not control; both are out of scope here and belong to the audit-log exporter / signing follow-up.
 
 **Retention/rotation**: when more than `audit_log.retention_max_records` rows are retained, the oldest segment is *sealed* (its final `seq` + hash are persisted as an anchor) and then pruned. The surviving tail still verifies because the oldest retained row links back to the sealed anchor.
 
diff --git a/src/ros2_medkit_fault_manager/config/fault_manager.yaml b/src/ros2_medkit_fault_manager/config/fault_manager.yaml
index 3f94e71c..13d1f57a 100644
--- a/src/ros2_medkit_fault_manager/config/fault_manager.yaml
+++ b/src/ros2_medkit_fault_manager/config/fault_manager.yaml
@@ -28,10 +28,14 @@ fault_manager:
     # snapshots.capture_queue_depth: 16               # max pending captures (>= 1)
     # snapshots.capture_queue_full_policy: reject_newest  # reject_newest | drop_oldest
 
-    # Tamper-evident, append-only audit log of fault state transitions
+    # Append-only, hash-chained audit log of fault state transitions
     # (occurred/confirmed/cleared). OFF by default: it adds a write + storage cost
     # per transition. When enabled, each transition appends one immutable,
-    # hash-chained row so any later edit or deletion is detectable via verify.
+    # hash-chained row, so verify() detects edits or deletions that did NOT also
+    # recompute the chain (casual/accidental tampering). The chain is unkeyed and
+    # lives in a single writable file, so it is NOT proof against an attacker who
+    # can rewrite the whole file; true tamper-proofing needs a signed head or
+    # external anchoring (out of scope here). See README "Threat model".
     audit_log.enabled: false
     # Which transitions to record: "all" or "confirmed_only".
     # audit_log.transitions: all
diff --git a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
index 039a12e5..457bca9e 100644
--- a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
+++ b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
@@ -18,6 +18,7 @@
 
 #include <cstdint>
 #include <mutex>
+#include <optional>
 #include <string>
 #include <vector>
 
@@ -27,8 +28,8 @@ namespace ros2_medkit_fault_manager {
 ///
 /// `transition` is one of the kTransition* constants below. The remaining
 /// fields describe the fault at the moment of the transition; all of them feed
-/// the canonical serialization that the hash chain is computed over, so any
-/// later edit to a stored row is detectable.
+/// the canonical serialization that the hash chain is computed over, so an edit
+/// to a stored row that does not also recompute the chain breaks verify().
 struct AuditEvent {
   std::string fault_code;
   std::string transition;     ///< occurred | confirmed | cleared | ack
@@ -78,6 +79,15 @@ struct AuditVerifyResult {
 ///
 /// The table is treated as append-only: this class only ever INSERTs rows (and,
 /// on rotation, deletes a sealed prefix). It never UPDATEs an existing record.
+/// BEFORE UPDATE / BEFORE DELETE triggers reject out-of-band edits (the guarded
+/// rotation prune excepted) as defense-in-depth.
+///
+/// Threat model: the hash chain is UNKEYED and the head/anchors live in the same
+/// writable file. verify() catches edits or deletions that did not also recompute
+/// the chain (casual or accidental tampering), but anyone with write access to the
+/// file can recompute the whole chain (and drop the triggers) and forge a
+/// consistent history. True tamper-proofing needs a key/signature over the head or
+/// external anchoring; that is out of scope here.
 class FaultAuditLog {
  public:
   /// Open (or create) the audit log database.
@@ -129,6 +139,10 @@ class FaultAuditLog {
 
  private:
   void initialize_schema();
+  /// Read the persisted chain head row (audit_chain_head id=1) straight from the
+  /// DB. Returns nullopt when the row is absent. verify() relies on this so a
+  /// deleted head row is treated as tampering rather than silently recovered.
+  std::optional<ChainHead> read_head_row_locked() const;
   ChainHead load_head_locked() const;
   void store_head_locked(const ChainHead & head_record);
   /// Seal + prune the oldest segment if the retained count exceeds the limit.
diff --git a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp
index 7c603232..61cf227c 100644
--- a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp
+++ b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp
@@ -62,6 +62,16 @@ class FaultManagerNode : public rclcpp::Node {
     return *storage_;
   }
 
+  /// Get mutable access to fault storage (for testing only).
+  FaultStorage & get_storage_for_test() {
+    return *storage_;
+  }
+
+  /// Get the tamper-evident audit log (nullptr when disabled), for testing only.
+  const FaultAuditLog * get_audit_log_for_test() const {
+    return audit_log_.get();
+  }
+
   /// Get the storage type being used
   const std::string & get_storage_type() const {
     return storage_type_;
diff --git a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_storage.hpp b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_storage.hpp
index 8c66733f..935d573a 100644
--- a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_storage.hpp
+++ b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_storage.hpp
@@ -143,8 +143,8 @@ class FaultStorage {
 
   /// Check and confirm PREFAILED faults that have been pending too long (time-based confirmation)
   /// @param current_time Current timestamp for age calculation
-  /// @return Number of faults that were confirmed
-  virtual size_t check_time_based_confirmation(const rclcpp::Time & current_time) = 0;
+  /// @return Fault codes that were confirmed by this call (so the caller can audit each).
+  virtual std::vector<std::string> check_time_based_confirmation(const rclcpp::Time & current_time) = 0;
 
   /// Set maximum snapshots per fault code (0 = unlimited)
   virtual void set_max_snapshots_per_fault(size_t /*max_count*/) {
@@ -223,7 +223,7 @@ class InMemoryFaultStorage : public FaultStorage {
 
   bool contains(const std::string & fault_code) const override;
 
-  size_t check_time_based_confirmation(const rclcpp::Time & current_time) override;
+  std::vector<std::string> check_time_based_confirmation(const rclcpp::Time & current_time) override;
 
   void set_max_snapshots_per_fault(size_t max_count) override;
 
diff --git a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/sqlite_fault_storage.hpp b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/sqlite_fault_storage.hpp
index 0de89805..29a54bee 100644
--- a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/sqlite_fault_storage.hpp
+++ b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/sqlite_fault_storage.hpp
@@ -59,7 +59,7 @@ class SqliteFaultStorage : public FaultStorage {
 
   bool contains(const std::string & fault_code) const override;
 
-  size_t check_time_based_confirmation(const rclcpp::Time & current_time) override;
+  std::vector<std::string> check_time_based_confirmation(const rclcpp::Time & current_time) override;
 
   void set_max_snapshots_per_fault(size_t max_count) override;
 
diff --git a/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp b/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
index 8f146704..88710a53 100644
--- a/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
+++ b/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
@@ -19,6 +19,7 @@
 #include <array>
 #include <limits>
 #include <nlohmann/json.hpp>
+#include <optional>
 #include <stdexcept>
 #include <string>
 
@@ -215,32 +216,54 @@ void FaultAuditLog::initialize_schema() {
     );
   )",
                 "create audit_anchors table");
+
+  // Append-only enforcement at the DB layer (defense-in-depth, NOT a security
+  // boundary: anyone able to write the file can also DROP these triggers). The
+  // single-row guard lets the in-process rotation prune delete a sealed prefix
+  // while every other UPDATE/DELETE on audit_log is rejected.
+  exec_or_throw(db_,
+                R"(
+    CREATE TABLE IF NOT EXISTS audit_prune_guard (
+      id INTEGER PRIMARY KEY CHECK (id = 1),
+      enabled INTEGER NOT NULL DEFAULT 0
+    );
+    INSERT INTO audit_prune_guard (id, enabled) VALUES (1, 0)
+      ON CONFLICT(id) DO NOTHING;
+    CREATE TRIGGER IF NOT EXISTS audit_log_no_update
+    BEFORE UPDATE ON audit_log
+    BEGIN
+      SELECT RAISE(ABORT, 'audit_log is append-only');
+    END;
+    CREATE TRIGGER IF NOT EXISTS audit_log_no_delete
+    BEFORE DELETE ON audit_log
+    WHEN (SELECT enabled FROM audit_prune_guard WHERE id = 1) IS NOT 1
+    BEGIN
+      SELECT RAISE(ABORT, 'audit_log is append-only');
+    END;
+  )",
+                "create append-only triggers");
 }
 
-ChainHead FaultAuditLog::load_head_locked() const {
-  // Prefer the persisted head row.
-  {
-    Stmt stmt(db_, "SELECT seq, record_hash FROM audit_chain_head WHERE id = 1");
-    if (stmt.step() == SQLITE_ROW) {
-      ChainHead head_record;
-      head_record.seq = stmt.column_int64(0);
-      head_record.record_hash = stmt.column_text(1);
-      return head_record;
-    }
+std::optional<ChainHead> FaultAuditLog::read_head_row_locked() const {
+  Stmt stmt(db_, "SELECT seq, record_hash FROM audit_chain_head WHERE id = 1");
+  if (stmt.step() == SQLITE_ROW) {
+    ChainHead head_record;
+    head_record.seq = stmt.column_int64(0);
+    head_record.record_hash = stmt.column_text(1);
+    return head_record;
   }
+  return std::nullopt;
+}
 
-  // No head row. Recover from the last retained record if any exist (defensive:
-  // a crash between INSERT and head update would land here on reopen).
-  {
-    Stmt stmt(db_, "SELECT seq, record_hash FROM audit_log ORDER BY seq DESC LIMIT 1");
-    if (stmt.step() == SQLITE_ROW) {
-      ChainHead head_record;
-      head_record.seq = stmt.column_int64(0);
-      head_record.record_hash = stmt.column_text(1);
-      return head_record;
-    }
+ChainHead FaultAuditLog::load_head_locked() const {
+  // Resume strictly from the persisted head row. There is deliberately no
+  // MAX(seq) fallback: append+head-update are written in one transaction, so a
+  // missing head row while rows exist means tampering, not a recoverable crash.
+  // verify() reports that case as a failure rather than silently fabricating a
+  // head from the surviving rows.
+  if (auto head_record = read_head_row_locked()) {
+    return *head_record;
   }
-
   return ChainHead{0, genesis_hash()};
 }
 
@@ -346,11 +369,18 @@ void FaultAuditLog::rotate_if_needed_locked() {
       throw std::runtime_error(std::string("audit: failed to write anchor: ") + sqlite3_errmsg(db_));
     }
 
+    // Open the prune guard so the BEFORE DELETE trigger allows this sealed-prefix
+    // delete. Both the guard flip and the delete are in this transaction, so a
+    // ROLLBACK restores the guard to its closed state.
+    exec_or_throw(db_, "UPDATE audit_prune_guard SET enabled = 1 WHERE id = 1", "open prune guard");
+
     Stmt del(db_, "DELETE FROM audit_log WHERE seq <= ?");
     del.bind_int64(1, boundary_seq);
     if (del.step() != SQLITE_DONE) {
       throw std::runtime_error(std::string("audit: failed to prune records: ") + sqlite3_errmsg(db_));
     }
+
+    exec_or_throw(db_, "UPDATE audit_prune_guard SET enabled = 0 WHERE id = 1", "close prune guard");
     exec_or_throw(db_, "COMMIT", "commit rotate");
   } catch (...) {
     exec_or_throw(db_, "ROLLBACK", "rollback rotate");
@@ -492,27 +522,37 @@ AuditVerifyResult FaultAuditLog::verify() const {
     expected_seq = rec.seq + 1;
   }
 
-  // The persisted head must match the last retained record (catches deletion of
-  // the newest row, which the row walk alone cannot see).
+  // Read the persisted head row DIRECTLY from the DB (not the cached head_). A
+  // missing head row cannot be silently recovered from MAX(seq), so deleting the
+  // newest record together with the head row is reported as tampering instead of
+  // verifying clean.
+  const std::optional<ChainHead> persisted = read_head_row_locked();
+
   if (result.checked > 0) {
-    if (head_.seq != expected_seq - 1 || head_.record_hash != expected_prev) {
+    // A non-empty log must carry its head row, and it must match the last record.
+    if (!persisted) {
       result.ok = false;
-      result.bad_seq = head_.seq;
+      result.bad_seq = expected_seq - 1;
+      result.error = "audit_chain_head row missing while audit_log is non-empty (head deleted / truncated)";
+      return result;
+    }
+    if (persisted->seq != expected_seq - 1 || persisted->record_hash != expected_prev) {
+      result.ok = false;
+      result.bad_seq = persisted->seq;
       result.error = "persisted head does not match the last retained record";
       return result;
     }
-  } else {
-    // Empty retained log: head must be either genesis (never written) or point
-    // at a sealed anchor (everything pruned).
-    if (head_.seq != 0) {
-      Stmt anchor(db_, "SELECT last_hash FROM audit_anchors WHERE last_seq = ?");
-      anchor.bind_int64(1, head_.seq);
-      if (anchor.step() != SQLITE_ROW || anchor.column_text(0) != head_.record_hash) {
-        result.ok = false;
-        result.bad_seq = head_.seq;
-        result.error = "head references a record that is neither retained nor sealed";
-        return result;
-      }
+  } else if (persisted && persisted->seq != 0) {
+    // Empty retained log with a head past genesis: everything was pruned, so the
+    // head must point at a sealed anchor. (No head row, or a genesis head, on an
+    // empty log is the never-written case and is fine.)
+    Stmt anchor(db_, "SELECT last_hash FROM audit_anchors WHERE last_seq = ?");
+    anchor.bind_int64(1, persisted->seq);
+    if (anchor.step() != SQLITE_ROW || anchor.column_text(0) != persisted->record_hash) {
+      result.ok = false;
+      result.bad_seq = persisted->seq;
+      result.error = "head references a record that is neither retained nor sealed";
+      return result;
     }
   }
 
diff --git a/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
index 177d9133..912593a7 100644
--- a/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
+++ b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
@@ -289,10 +289,20 @@ FaultManagerNode::FaultManagerNode(const rclcpp::NodeOptions & options) : Node("
   // Create auto-confirmation timer if enabled
   if (auto_confirm_after_sec_ > 0.0) {
     auto_confirm_timer_ = create_wall_timer(std::chrono::seconds(1), [this]() {
-      size_t confirmed = storage_->check_time_based_confirmation(get_wall_clock_time());
-      if (confirmed > 0) {
-        RCLCPP_INFO(get_logger(), "Auto-confirmed %zu PREFAILED fault(s) due to time threshold", confirmed);
+      const auto confirmed = storage_->check_time_based_confirmation(get_wall_clock_time());
+      if (confirmed.empty()) {
+        return;
       }
+      // Audit every timer-driven PREFAILED->CONFIRMED transition. Without this the
+      // confirmations are invisible to the audit log's verify().
+      const int64_t confirmed_at_ns = get_wall_clock_time().nanoseconds();
+      for (const auto & fault_code : confirmed) {
+        auto fault = storage_->get_fault(fault_code);
+        if (fault) {
+          audit_transition(kTransitionConfirmed, *fault, "auto_confirm_timer", confirmed_at_ns);
+        }
+      }
+      RCLCPP_INFO(get_logger(), "Auto-confirmed %zu PREFAILED fault(s) due to time threshold", confirmed.size());
     });
     RCLCPP_INFO(get_logger(),
                 "FaultManager node started (storage=%s, confirmation_threshold=%d, "
diff --git a/src/ros2_medkit_fault_manager/src/fault_storage.cpp b/src/ros2_medkit_fault_manager/src/fault_storage.cpp
index c8db7e51..f83fc3bc 100644
--- a/src/ros2_medkit_fault_manager/src/fault_storage.cpp
+++ b/src/ros2_medkit_fault_manager/src/fault_storage.cpp
@@ -267,14 +267,14 @@ bool InMemoryFaultStorage::contains(const std::string & fault_code) const {
   return faults_.find(fault_code) != faults_.end();
 }
 
-size_t InMemoryFaultStorage::check_time_based_confirmation(const rclcpp::Time & current_time) {
+std::vector<std::string> InMemoryFaultStorage::check_time_based_confirmation(const rclcpp::Time & current_time) {
   std::lock_guard<std::mutex> lock(mutex_);
 
+  std::vector<std::string> confirmed;
   if (config_.auto_confirm_after_sec <= 0.0) {
-    return 0;  // Time-based confirmation disabled
+    return confirmed;  // Time-based confirmation disabled
   }
 
-  size_t confirmed_count = 0;
   const double threshold_ns = config_.auto_confirm_after_sec * 1e9;
 
   for (auto & [code, state] : faults_) {
@@ -282,12 +282,12 @@ size_t InMemoryFaultStorage::check_time_based_confirmation(const rclcpp::Time &
       const int64_t age_ns = (current_time - state.last_failed_time).nanoseconds();
       if (static_cast<double>(age_ns) >= threshold_ns) {
         state.status = ros2_medkit_msgs::msg::Fault::STATUS_CONFIRMED;
-        ++confirmed_count;
+        confirmed.push_back(code);
       }
     }
   }
 
-  return confirmed_count;
+  return confirmed;
 }
 
 void InMemoryFaultStorage::set_max_snapshots_per_fault(size_t max_count) {
diff --git a/src/ros2_medkit_fault_manager/src/sqlite_fault_storage.cpp b/src/ros2_medkit_fault_manager/src/sqlite_fault_storage.cpp
index 5295f028..92de8e1f 100644
--- a/src/ros2_medkit_fault_manager/src/sqlite_fault_storage.cpp
+++ b/src/ros2_medkit_fault_manager/src/sqlite_fault_storage.cpp
@@ -666,17 +666,35 @@ bool SqliteFaultStorage::contains(const std::string & fault_code) const {
   return stmt.step() == SQLITE_ROW;
 }
 
-size_t SqliteFaultStorage::check_time_based_confirmation(const rclcpp::Time & current_time) {
+std::vector<std::string> SqliteFaultStorage::check_time_based_confirmation(const rclcpp::Time & current_time) {
   std::lock_guard<std::mutex> lock(mutex_);
 
+  std::vector<std::string> confirmed;
   if (config_.auto_confirm_after_sec <= 0.0) {
-    return 0;  // Time-based confirmation disabled
+    return confirmed;  // Time-based confirmation disabled
   }
 
   int64_t current_ns = current_time.nanoseconds();
   int64_t threshold_ns = static_cast<int64_t>(config_.auto_confirm_after_sec * 1e9);
   int64_t cutoff_ns = current_ns - threshold_ns;
 
+  // Collect the codes that will flip first so the caller can audit each one. The
+  // SELECT predicate mirrors the UPDATE exactly, and both run under the same lock,
+  // so the returned list matches the rows actually confirmed below.
+  {
+    SqliteStatement select_stmt(
+        db_, "SELECT fault_code FROM faults WHERE status = ? AND last_failed_ns <= ? AND last_failed_ns > 0");
+    select_stmt.bind_text(1, ros2_medkit_msgs::msg::Fault::STATUS_PREFAILED);
+    select_stmt.bind_int64(2, cutoff_ns);
+    while (select_stmt.step() == SQLITE_ROW) {
+      confirmed.push_back(select_stmt.column_text(0));
+    }
+  }
+
+  if (confirmed.empty()) {
+    return confirmed;
+  }
+
   SqliteStatement update_stmt(
       db_, "UPDATE faults SET status = ? WHERE status = ? AND last_failed_ns <= ? AND last_failed_ns > 0");
   update_stmt.bind_text(1, ros2_medkit_msgs::msg::Fault::STATUS_CONFIRMED);
@@ -687,7 +705,7 @@ size_t SqliteFaultStorage::check_time_based_confirmation(const rclcpp::Time & cu
     throw std::runtime_error(std::string("Failed to confirm faults: ") + sqlite3_errmsg(db_));
   }
 
-  return static_cast<size_t>(sqlite3_changes(db_));
+  return confirmed;
 }
 
 void SqliteFaultStorage::set_max_snapshots_per_fault(size_t max_count) {
diff --git a/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp b/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
index b0ac54b9..6a959b8e 100644
--- a/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
+++ b/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
@@ -42,8 +42,8 @@ AuditEvent make_event(const std::string & code, const char * transition, int64_t
   return e;
 }
 
-/// Run a single SQL statement directly against the audit DB file (used to
-/// simulate tampering an immutable row).
+/// Run SQL directly against the audit DB file (used to simulate tampering an
+/// immutable row). Asserts success.
 void raw_exec(const std::string & db_path, const std::string & sql) {
   sqlite3 * db = nullptr;
   ASSERT_EQ(sqlite3_open(db_path.c_str(), &db), SQLITE_OK);
@@ -55,6 +55,22 @@ void raw_exec(const std::string & db_path, const std::string & sql) {
   ASSERT_EQ(rc, SQLITE_OK) << err_str;
 }
 
+/// Run SQL directly and return the raw result code (does not assert), so a test
+/// can confirm the append-only triggers reject a write.
+int raw_exec_rc(const std::string & db_path, const std::string & sql) {
+  sqlite3 * db = nullptr;
+  EXPECT_EQ(sqlite3_open(db_path.c_str(), &db), SQLITE_OK);
+  int rc = sqlite3_exec(db, sql.c_str(), nullptr, nullptr, nullptr);
+  sqlite3_close(db);
+  return rc;
+}
+
+/// Drop the DB-level append-only triggers so a raw tamper write can land. This
+/// mimics an attacker who bypassed the (defense-in-depth, not security boundary)
+/// triggers; verify() must still detect the recompute-free edit afterwards.
+const char * const kDropTriggers =
+    "DROP TRIGGER IF EXISTS audit_log_no_update; DROP TRIGGER IF EXISTS audit_log_no_delete; ";
+
 }  // namespace
 
 class FaultAuditLogTest : public ::testing::Test {
@@ -130,7 +146,7 @@ TEST_F(FaultAuditLogTest, EditingPastRowFailsVerify) {
   }
 
   // Tamper: change a stored field without recomputing the hash.
-  raw_exec(path_, "UPDATE audit_log SET description = 'forged' WHERE seq = 2");
+  raw_exec(path_, std::string(kDropTriggers) + "UPDATE audit_log SET description = 'forged' WHERE seq = 2");
 
   FaultAuditLog reopened(path_);
   auto result = reopened.verify();
@@ -147,7 +163,7 @@ TEST_F(FaultAuditLogTest, DeletingMiddleRowFailsVerify) {
     log.append(make_event("F3", kTransitionOccurred, 300));
   }
 
-  raw_exec(path_, "DELETE FROM audit_log WHERE seq = 2");
+  raw_exec(path_, std::string(kDropTriggers) + "DELETE FROM audit_log WHERE seq = 2");
 
   FaultAuditLog reopened(path_);
   EXPECT_FALSE(reopened.verify().ok);
@@ -163,12 +179,34 @@ TEST_F(FaultAuditLogTest, DeletingNewestRowFailsVerify) {
   }
 
   // Drop the last row but leave the head pointing at seq 3.
-  raw_exec(path_, "DELETE FROM audit_log WHERE seq = 3");
+  raw_exec(path_, std::string(kDropTriggers) + "DELETE FROM audit_log WHERE seq = 3");
 
   FaultAuditLog reopened(path_);
   EXPECT_FALSE(reopened.verify().ok);
 }
 
+// Truncation bypass: deleting the newest row AND the head row must still FAIL.
+// The head row is read directly from the DB, so a missing head on a non-empty
+// log is treated as tampering rather than silently rebuilt from MAX(seq).
+TEST_F(FaultAuditLogTest, DeletingNewestRowAndHeadFailsVerify) {
+  {
+    FaultAuditLog log(path_);
+    log.append(make_event("F1", kTransitionOccurred, 100));
+    log.append(make_event("F2", kTransitionOccurred, 200));
+    log.append(make_event("F3", kTransitionOccurred, 300));
+    ASSERT_TRUE(log.verify().ok);
+  }
+
+  // Drop the newest row and the persisted head row together.
+  raw_exec(path_, std::string(kDropTriggers) +
+                      "DELETE FROM audit_log WHERE seq = 3; DELETE FROM audit_chain_head WHERE id = 1;");
+
+  FaultAuditLog reopened(path_);
+  auto result = reopened.verify();
+  EXPECT_FALSE(result.ok);
+  EXPECT_EQ(result.bad_seq, 2);  // last surviving row
+}
+
 // The chain head persists across a reopen and the chain resumes from it.
 TEST_F(FaultAuditLogTest, HeadPersistsAcrossReopen) {
   std::string head_hash;
@@ -221,7 +259,7 @@ TEST_F(FaultAuditLogTest, RotationThenTamperFails) {
       log.append(make_event("F" + std::to_string(i), kTransitionOccurred, 100 + i));
     }
   }
-  raw_exec(path_, "UPDATE audit_log SET source_id = 'forged' WHERE seq = 10");
+  raw_exec(path_, std::string(kDropTriggers) + "UPDATE audit_log SET source_id = 'forged' WHERE seq = 10");
 
   FaultAuditLog reopened(path_, 5);
   auto result = reopened.verify();
@@ -263,6 +301,35 @@ TEST_F(FaultAuditLogTest, ReadAfterSeq) {
   EXPECT_EQ(tail[1].seq, 5);
 }
 
+// Defense-in-depth: the append-only triggers reject an out-of-band UPDATE or
+// DELETE on audit_log while the in-process rotation prune still works.
+TEST_F(FaultAuditLogTest, AppendOnlyTriggersRejectOutOfBandWrites) {
+  {
+    FaultAuditLog log(path_);
+    log.append(make_event("F1", kTransitionOccurred, 100));
+    log.append(make_event("F2", kTransitionOccurred, 200));
+  }
+
+  // Both a raw UPDATE and a raw DELETE are aborted by the triggers.
+  EXPECT_NE(raw_exec_rc(path_, "UPDATE audit_log SET description = 'forged' WHERE seq = 1"), SQLITE_OK);
+  EXPECT_NE(raw_exec_rc(path_, "DELETE FROM audit_log WHERE seq = 1"), SQLITE_OK);
+
+  // Rows are intact and the chain still verifies.
+  FaultAuditLog reopened(path_);
+  EXPECT_EQ(reopened.record_count(), 2);
+  EXPECT_TRUE(reopened.verify().ok);
+}
+
+// The guarded rotation prune is exempt from the append-only delete trigger.
+TEST_F(FaultAuditLogTest, RotationPrunePassesAppendOnlyTrigger) {
+  FaultAuditLog log(path_, /*retention_max_records=*/3);
+  for (int i = 1; i <= 8; ++i) {
+    log.append(make_event("F" + std::to_string(i), kTransitionOccurred, 100 + i));
+  }
+  EXPECT_EQ(log.record_count(), 3);
+  EXPECT_TRUE(log.verify().ok);
+}
+
 int main(int argc, char ** argv) {
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
diff --git a/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp b/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
index f53219cc..28411122 100644
--- a/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
+++ b/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
@@ -526,9 +526,9 @@ TEST_F(FaultStorageTest, TimeBasedConfirmationDisabledByDefault) {
 
   // Advance time and check - should not auto-confirm (auto_confirm_after_sec = 0)
   auto future_time = rclcpp::Time(clock.now().nanoseconds() + static_cast<int64_t>(20e9));
-  size_t confirmed = storage_.check_time_based_confirmation(future_time);
+  auto confirmed = storage_.check_time_based_confirmation(future_time);
 
-  EXPECT_EQ(confirmed, 0u);
+  EXPECT_TRUE(confirmed.empty());
 
   auto fault = storage_.get_fault("FAULT_1");
   ASSERT_TRUE(fault.has_value());
@@ -548,13 +548,14 @@ TEST_F(FaultStorageTest, TimeBasedConfirmationWhenEnabled) {
 
   // Check before timeout - should not confirm
   auto before_timeout = rclcpp::Time(now.nanoseconds() + static_cast<int64_t>(5e9));
-  size_t confirmed_early = storage_.check_time_based_confirmation(before_timeout);
-  EXPECT_EQ(confirmed_early, 0u);
+  auto confirmed_early = storage_.check_time_based_confirmation(before_timeout);
+  EXPECT_TRUE(confirmed_early.empty());
 
   // Check after timeout - should confirm
   auto after_timeout = rclcpp::Time(now.nanoseconds() + static_cast<int64_t>(15e9));
-  size_t confirmed = storage_.check_time_based_confirmation(after_timeout);
-  EXPECT_EQ(confirmed, 1u);
+  auto confirmed = storage_.check_time_based_confirmation(after_timeout);
+  ASSERT_EQ(confirmed.size(), 1u);
+  EXPECT_EQ(confirmed[0], "FAULT_1");
 
   auto fault = storage_.get_fault("FAULT_1");
   ASSERT_TRUE(fault.has_value());
@@ -1479,6 +1480,56 @@ TEST(FaultAuditDisabledTest, NoAuditFileWhenDisabled) {
   EXPECT_FALSE(std::filesystem::exists(audit_path));
 }
 
+// Timer-driven (PREFAILED->CONFIRMED) auto-confirmations must be audited, not
+// silently applied. Sets auto_confirm_after_sec and asserts a "confirmed" audit
+// row appears after the 1 Hz timer fires.
+TEST(FaultAuditTimerTest, TimerConfirmationAppendsConfirmedAuditRow) {
+  rclcpp::NodeOptions options;
+  options.parameter_overrides({
+      {"storage_type", "memory"},
+      {"confirmation_threshold", -3},  // keep the fault PREFAILED so only the timer confirms it
+      {"auto_confirm_after_sec", 0.2},
+      {"audit_log.enabled", true},  // in-memory audit DB (memory storage)
+  });
+  auto node = std::make_shared<FaultManagerNode>(options);
+
+  const auto * audit = node->get_audit_log_for_test();
+  ASSERT_NE(audit, nullptr);
+
+  // Land a fault in PREFAILED directly in storage; the node's auto-confirm timer
+  // must later flip it to CONFIRMED and append the audit row.
+  DebounceConfig config;
+  config.confirmation_threshold = -3;
+  config.auto_confirm_after_sec = 0.2;
+  rclcpp::Clock clock(RCL_SYSTEM_TIME);
+  node->get_storage_for_test().report_fault_event("AUTO_CONF_1", ReportFault::Request::EVENT_FAILED,
+                                                  Fault::SEVERITY_ERROR, "stuck", "/robot/src", clock.now(), config);
+  ASSERT_EQ(node->get_storage().get_fault("AUTO_CONF_1")->status, Fault::STATUS_PREFAILED);
+
+  // Spin until a confirmed audit row appears or the budget expires (the wall
+  // timer fires once per second).
+  bool saw_confirmed = false;
+  auto start = std::chrono::steady_clock::now();
+  while (std::chrono::steady_clock::now() - start < std::chrono::seconds(5)) {
+    rclcpp::spin_some(node);
+    for (const auto & rec : audit->read()) {
+      if (rec.event.fault_code == "AUTO_CONF_1" &&
+          rec.event.transition == ros2_medkit_fault_manager::kTransitionConfirmed) {
+        saw_confirmed = true;
+        break;
+      }
+    }
+    if (saw_confirmed) {
+      break;
+    }
+    std::this_thread::sleep_for(std::chrono::milliseconds(20));
+  }
+
+  EXPECT_TRUE(saw_confirmed) << "timer-driven confirmation was not audited";
+  EXPECT_EQ(node->get_storage().get_fault("AUTO_CONF_1")->status, Fault::STATUS_CONFIRMED);
+  EXPECT_TRUE(audit->verify().ok);
+}
+
 int main(int argc, char ** argv) {
   rclcpp::init(argc, argv);
   ::testing::InitGoogleTest(&argc, argv);
diff --git a/src/ros2_medkit_fault_manager/test/test_sqlite_storage.cpp b/src/ros2_medkit_fault_manager/test/test_sqlite_storage.cpp
index 1f414dff..ddb2f033 100644
--- a/src/ros2_medkit_fault_manager/test/test_sqlite_storage.cpp
+++ b/src/ros2_medkit_fault_manager/test/test_sqlite_storage.cpp
@@ -568,13 +568,14 @@ TEST_F(SqliteFaultStorageTest, TimeBasedConfirmationWhenEnabled) {
 
   // Check before timeout - should not confirm
   auto before_timeout = rclcpp::Time(now.nanoseconds() + static_cast<int64_t>(5e9));
-  size_t confirmed_early = storage_->check_time_based_confirmation(before_timeout);
-  EXPECT_EQ(confirmed_early, 0u);
+  auto confirmed_early = storage_->check_time_based_confirmation(before_timeout);
+  EXPECT_TRUE(confirmed_early.empty());
 
   // Check after timeout - should confirm
   auto after_timeout = rclcpp::Time(now.nanoseconds() + static_cast<int64_t>(15e9));
-  size_t confirmed = storage_->check_time_based_confirmation(after_timeout);
-  EXPECT_EQ(confirmed, 1u);
+  auto confirmed = storage_->check_time_based_confirmation(after_timeout);
+  ASSERT_EQ(confirmed.size(), 1u);
+  EXPECT_EQ(confirmed[0], "FAULT_1");
 
   auto fault = storage_->get_fault("FAULT_1");
   ASSERT_TRUE(fault.has_value());

From 835b8621a499c0647a52c6ae21f43fd06b7fd595 Mon Sep 17 00:00:00 2001
From: mfaferek93 <michal.faferek@selfpatch.ai>
Date: Tue, 30 Jun 2026 19:43:52 +0200
Subject: [PATCH 4/5] fix(fault-audit): guard null DB handle and use PRId64 for
 int64 logs

raw_exec_rc no longer runs sqlite3_exec on a failed sqlite3_open; it
records the failure, closes the handle and returns early. The audit-log
enabled RCLCPP_INFO now formats int64_t retention/seq with PRId64
(<cinttypes>) to satisfy -Werror=format=2.

Refs #483
---
 src/ros2_medkit_fault_manager/src/fault_manager_node.cpp    | 4 +++-
 src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp | 6 +++++-
 2 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
index 912593a7..5103884d 100644
--- a/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
+++ b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
@@ -17,6 +17,7 @@
 #include <yaml-cpp/yaml.h>
 
 #include <cctype>
+#include <cinttypes>
 #include <filesystem>
 #include <fstream>
 #include <nlohmann/json.hpp>
@@ -410,7 +411,8 @@ std::unique_ptr<FaultAuditLog> FaultManagerNode::create_audit_log() {
 
   try {
     auto log = std::make_unique<FaultAuditLog>(audit_path, retention);
-    RCLCPP_INFO(get_logger(), "Fault audit log enabled: %s (transitions=%s, retention=%ld, resume_seq=%ld)",
+    RCLCPP_INFO(get_logger(),
+                "Fault audit log enabled: %s (transitions=%s, retention=%" PRId64 ", resume_seq=%" PRId64 ")",
                 audit_path.c_str(), audit_confirmed_only_ ? "confirmed_only" : "all", retention, log->head().seq);
     return log;
   } catch (const std::exception & e) {
diff --git a/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp b/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
index 6a959b8e..581ba006 100644
--- a/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
+++ b/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
@@ -59,7 +59,11 @@ void raw_exec(const std::string & db_path, const std::string & sql) {
 /// can confirm the append-only triggers reject a write.
 int raw_exec_rc(const std::string & db_path, const std::string & sql) {
   sqlite3 * db = nullptr;
-  EXPECT_EQ(sqlite3_open(db_path.c_str(), &db), SQLITE_OK);
+  if (sqlite3_open(db_path.c_str(), &db) != SQLITE_OK) {
+    ADD_FAILURE() << "sqlite3_open failed: " << (db ? sqlite3_errmsg(db) : "out of memory");
+    sqlite3_close(db);  // sqlite3 allows close on a failed-open handle (incl. nullptr).
+    return SQLITE_ERROR;
+  }
   int rc = sqlite3_exec(db, sql.c_str(), nullptr, nullptr, nullptr);
   sqlite3_close(db);
   return rc;

From 887280053e23befed35ba7bd8f6a409f80464ce7 Mon Sep 17 00:00:00 2001
From: mfaferek93 <michal.faferek@selfpatch.ai>
Date: Tue, 30 Jun 2026 22:33:57 +0200
Subject: [PATCH 5/5] fix(fault-audit): audit auto-heal and stop silent
 append-failure gaps

Record auto-recovery as a distinct "healed" row (source auto_heal) so a
fault's end is in the chain. Append failures now bump a dropped-writes
health counter and clear an audit-healthy flag; audit_log.fail_closed
makes them a hard error. Drop the never-written "ack" kind (clear == ack),
protect audit_prune_guard with a trigger, add logging activate/deactivate
markers, and tighten the threat-model README.

Refs #483
---
 src/ros2_medkit_fault_manager/README.md       |   9 +-
 .../config/fault_manager.yaml                 |  19 +-
 .../fault_audit_log.hpp                       |  16 +-
 .../fault_manager_node.hpp                    |  27 ++-
 .../src/fault_audit_log.cpp                   |  36 ++++
 .../src/fault_manager_node.cpp                |  72 +++++++-
 .../test/test_fault_audit_log.cpp             |  39 +++++
 .../test/test_fault_manager.cpp               | 165 +++++++++++++++++-
 8 files changed, 358 insertions(+), 25 deletions(-)

diff --git a/src/ros2_medkit_fault_manager/README.md b/src/ros2_medkit_fault_manager/README.md
index 4ca1cd98..30d7fa99 100644
--- a/src/ros2_medkit_fault_manager/README.md
+++ b/src/ros2_medkit_fault_manager/README.md
@@ -112,22 +112,25 @@ patterns:
 
 ## Advanced: Tamper-Evident Audit Log
 
-An optional append-only, hash-chained audit log records every fault state transition (`occurred`, `confirmed`, `cleared`) so the fault history is independently verifiable. It is **off by default** because it adds a write and storage cost per transition.
+An optional append-only, hash-chained audit log records every fault state transition (`occurred`, `confirmed`, `healed`, `cleared`) so the fault history is independently verifiable. Auto-recovery (a fault reaching the healing threshold via PASSED events) is recorded as a distinct `healed` row with source `auto_heal`, so the fault's END is in the timeline and is not confused with a manual `cleared`. The manager has no acknowledge action separate from clearing, so `~/clear_fault` is recorded as `cleared` (clear == ack); there is no `ack` kind. The log also records its own lifecycle with `logging_activated` / `logging_deactivated` markers (CIR (EU) 2024/2690 sec. 3.2) at start and stop. It is **off by default** because it adds a write and storage cost per transition.
 
 Each transition appends one immutable row holding `record_hash = sha256(prev_hash + canonical(event))` (OpenSSL EVP SHA-256), the `prev_hash` it links to, and a monotonic `seq`. The hash is computed once at insert and never recomputed. A persisted chain head lets the chain resume across restarts. The log is stored in its own SQLite database (separate from the fault store) and is treated as append-only: the manager only ever inserts rows, and `BEFORE UPDATE` / `BEFORE DELETE` triggers reject out-of-band edits (the guarded rotation prune excepted).
 
+**Completeness is an integrity property.** `verify()` proves nothing was *deleted* from the chain, but it cannot prove a transition that was *never appended*. So a silently dropped append is a hole `verify()` can never see. Every transition on the write path is therefore audited (occurred, timer/threshold confirmations, auto-heal, and clears), and an append failure is never swallowed silently: it increments a dropped-writes health counter and clears an "audit healthy" flag. With `audit_log.fail_closed` set, an append failure is a **hard error** that aborts the operation, so a compliance-strict deployment learns the audit broke instead of losing records unnoticed. The default (`fail_closed=false`) keeps fault processing running but still surfaces the gap via the health counter.
+
 `verify()` walks the persisted chain oldest-first and recomputes every link: editing a row breaks its `record_hash`, deleting a row breaks the next row's `prev_hash` linkage, and deleting the newest row (the head row is read straight from the DB) is caught by the persisted-head check.
 
-**Threat model (read this).** The chain is **unkeyed**, and the head and segment anchors live in the **same writable SQLite file** as the rows. `verify()` therefore catches edits or deletions that did **not** also recompute the chain - that is, casual or accidental tampering, and the bookkeeping bugs that would otherwise lose records. It does **not** stop an attacker with write access to the file: such an attacker can drop the triggers and recompute the entire chain (and head and anchors) to forge a self-consistent history. The append-only triggers are defense-in-depth, **not** a security boundary. True tamper-*proofing* requires a key or signature over the head (so it cannot be recomputed without the key) or external anchoring of the head hash to an append-only store you do not control; both are out of scope here and belong to the audit-log exporter / signing follow-up.
+**Threat model (read this).** The chain is **unkeyed**, and the head and segment anchors live in the **same writable SQLite file** as the rows. `verify()` therefore catches edits or deletions that did **not** also recompute the chain - that is, casual or accidental tampering, and the bookkeeping bugs that would otherwise lose records. The append-only triggers are defense-in-depth: `audit_log` rejects out-of-band UPDATE/DELETE, and the rotation-prune guard (`audit_prune_guard`) is itself protected by a trigger so an external writer cannot simply flip it open and then delete a prefix - that flip is only permitted from the in-process connection that holds a per-connection temp marker. The single-row chain head (`audit_chain_head`) is intentionally **not** trigger-protected (a trigger there would block the legitimate head update inside the append transaction); a casual edit or delete of the head is instead caught by `verify()` via the seq/hash/head-mismatch checks. None of this stops an attacker with write access to the file: such an attacker can create the same temp marker or drop the triggers, and recompute the entire chain (head and anchors included) to forge a self-consistent history. The triggers are **not** a security boundary - this is tamper-**evident**, not tamper-**proof**. True tamper-*proofing* requires a key or signature over the head (so it cannot be recomputed without the key) or external anchoring of the head hash to an append-only store you do not control; both are out of scope here and belong to the audit-log exporter / signing follow-up.
 
 **Retention/rotation**: when more than `audit_log.retention_max_records` rows are retained, the oldest segment is *sealed* (its final `seq` + hash are persisted as an anchor) and then pruned. The surviving tail still verifies because the oldest retained row links back to the sealed anchor.
 
 | Parameter | Type | Default | Description |
 |-----------|------|---------|-------------|
 | `audit_log.enabled` | bool | `false` | Enable the tamper-evident audit log |
-| `audit_log.transitions` | string | `"all"` | Which transitions to record: `"all"` or `"confirmed_only"` |
+| `audit_log.transitions` | string | `"all"` | Which transitions to record: `"all"` (occurred/confirmed/healed/cleared) or `"confirmed_only"`. Lifecycle markers are always recorded. |
 | `audit_log.database_path` | string | `""` | SQLite path. Empty => sibling `fault_audit.db` next to the fault DB (or `:memory:` for in-memory fault stores) |
 | `audit_log.retention_max_records` | int | `0` | Seal + prune the oldest segment beyond this many retained records (0 = unlimited) |
+| `audit_log.fail_closed` | bool | `false` | When `true`, an audit append failure is a hard error that aborts the operation (compliance-strict). When `false`, the failure is logged and counted but fault processing continues. Either way the gap is visible via the dropped-writes health counter. |
 
 ## Usage
 
diff --git a/src/ros2_medkit_fault_manager/config/fault_manager.yaml b/src/ros2_medkit_fault_manager/config/fault_manager.yaml
index 13d1f57a..fde100d2 100644
--- a/src/ros2_medkit_fault_manager/config/fault_manager.yaml
+++ b/src/ros2_medkit_fault_manager/config/fault_manager.yaml
@@ -29,13 +29,13 @@ fault_manager:
     # snapshots.capture_queue_full_policy: reject_newest  # reject_newest | drop_oldest
 
     # Append-only, hash-chained audit log of fault state transitions
-    # (occurred/confirmed/cleared). OFF by default: it adds a write + storage cost
-    # per transition. When enabled, each transition appends one immutable,
-    # hash-chained row, so verify() detects edits or deletions that did NOT also
-    # recompute the chain (casual/accidental tampering). The chain is unkeyed and
-    # lives in a single writable file, so it is NOT proof against an attacker who
-    # can rewrite the whole file; true tamper-proofing needs a signed head or
-    # external anchoring (out of scope here). See README "Threat model".
+    # (occurred/confirmed/healed/cleared). OFF by default: it adds a write +
+    # storage cost per transition. When enabled, each transition appends one
+    # immutable, hash-chained row, so verify() detects edits or deletions that did
+    # NOT also recompute the chain (casual/accidental tampering). The chain is
+    # unkeyed and lives in a single writable file, so it is NOT proof against an
+    # attacker who can rewrite the whole file; true tamper-proofing needs a signed
+    # head or external anchoring (out of scope here). See README "Threat model".
     audit_log.enabled: false
     # Which transitions to record: "all" or "confirmed_only".
     # audit_log.transitions: all
@@ -45,3 +45,8 @@ fault_manager:
     # Seal + prune the oldest segment beyond this many retained records
     # (0 = unlimited). A sealed anchor keeps the surviving tail verifiable.
     # audit_log.retention_max_records: 0
+    # Fail-closed (compliance-strict): when true, an audit append failure is a hard
+    # error that aborts the operation instead of being logged and dropped. Default
+    # false keeps fault processing running; either way a dropped-writes health
+    # counter makes the gap visible.
+    # audit_log.fail_closed: false
diff --git a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
index 457bca9e..3a954159 100644
--- a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
+++ b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_audit_log.hpp
@@ -32,7 +32,7 @@ namespace ros2_medkit_fault_manager {
 /// to a stored row that does not also recompute the chain breaks verify().
 struct AuditEvent {
   std::string fault_code;
-  std::string transition;     ///< occurred | confirmed | cleared | ack
+  std::string transition;     ///< one of the kTransition* constants below
   uint8_t severity{0};        ///< severity at the time of the transition
   std::string status;         ///< resulting fault status (e.g. CONFIRMED)
   std::string source_id;      ///< reporting source that drove the transition
@@ -44,7 +44,19 @@ struct AuditEvent {
 constexpr const char * kTransitionOccurred = "occurred";
 constexpr const char * kTransitionConfirmed = "confirmed";
 constexpr const char * kTransitionCleared = "cleared";
-constexpr const char * kTransitionAck = "ack";
+/// Auto-recovery: a fault reached the healing threshold via PASSED events. Kept
+/// distinct from kTransitionCleared so an automatic recovery is not mistaken for
+/// a manual clear in the timeline.
+constexpr const char * kTransitionHealed = "healed";
+/// Audit-log lifecycle markers (CIR (EU) 2024/2690 sec. 3.2: activation /
+/// deactivation of logging). Appended directly, independent of the per-fault
+/// transition filter, so the log records its own start and stop.
+constexpr const char * kTransitionLoggingActivated = "logging_activated";
+constexpr const char * kTransitionLoggingDeactivated = "logging_deactivated";
+// NOTE: there is deliberately no "ack" kind. The open fault_manager has no
+// acknowledge action separate from clearing: ~/clear_fault IS the acknowledge,
+// and it is recorded as kTransitionCleared (clear == ack). A separate "ack" kind
+// would never be written, so defining it would only mislead readers of the log.
 
 /// One immutable, hash-chained row read back from the audit log.
 struct AuditRecord {
diff --git a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp
index 61cf227c..a2226537 100644
--- a/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp
+++ b/src/ros2_medkit_fault_manager/include/ros2_medkit_fault_manager/fault_manager_node.hpp
@@ -14,7 +14,9 @@
 
 #pragma once
 
+#include <atomic>
 #include <chrono>
+#include <cstdint>
 #include <memory>
 #include <string>
 #include <unordered_map>
@@ -72,6 +74,26 @@ class FaultManagerNode : public rclcpp::Node {
     return audit_log_.get();
   }
 
+  /// Health signal: number of audit transitions that failed to append (and were
+  /// therefore lost from the chain). 0 when the audit is healthy or disabled.
+  uint64_t audit_dropped_writes() const {
+    return audit_dropped_writes_.load(std::memory_order_relaxed);
+  }
+
+  /// Health signal: false once any audit append has failed. A compliance-strict
+  /// deployment should treat a false here (or a non-zero audit_dropped_writes())
+  /// as the audit chain being incomplete.
+  bool audit_healthy() const {
+    return audit_healthy_.load(std::memory_order_relaxed);
+  }
+
+  /// Test-only: drive one audit append through the same failure-handling path the
+  /// service handlers use. Throws when audit_log.fail_closed is set and the append
+  /// fails; otherwise observe audit_dropped_writes()/audit_healthy().
+  void audit_transition_for_test(const char * transition, const ros2_medkit_msgs::msg::Fault & fault) {
+    audit_transition(transition, fault, "test", 0);
+  }
+
   /// Get the storage type being used
   const std::string & get_storage_type() const {
     return storage_type_;
@@ -187,7 +209,10 @@ class FaultManagerNode : public rclcpp::Node {
 
   /// Tamper-evident audit log of fault transitions (nullptr when disabled).
   std::unique_ptr<FaultAuditLog> audit_log_;
-  bool audit_confirmed_only_{false};  ///< When true, only "confirmed" transitions are logged
+  bool audit_confirmed_only_{false};               ///< When true, only "confirmed" transitions are logged
+  bool audit_fail_closed_{false};                  ///< When true, an audit append failure aborts the operation
+  std::atomic<uint64_t> audit_dropped_writes_{0};  ///< Count of audit appends that failed (lost rows)
+  std::atomic<bool> audit_healthy_{true};          ///< Cleared on the first failed audit append
 
   rclcpp::Service<ros2_medkit_msgs::srv::ReportFault>::SharedPtr report_fault_srv_;
   rclcpp::Service<ros2_medkit_msgs::srv::ListFaults>::SharedPtr list_faults_srv_;
diff --git a/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp b/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
index 88710a53..78b47c81 100644
--- a/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
+++ b/src/ros2_medkit_fault_manager/src/fault_audit_log.cpp
@@ -91,6 +91,15 @@ void exec_or_throw(sqlite3 * db, const char * sql, const char * what) {
   }
 }
 
+/// SQL function registered ONLY on this in-process connection. The prune-guard
+/// protection trigger calls it, so an out-of-band connection (which never
+/// registered it) cannot flip the guard: preparing the UPDATE fails on the
+/// unknown function. Connection-scoped, hence not a security boundary - a
+/// write-capable adversary can register the same function or drop the trigger.
+void audit_prune_authorized(sqlite3_context * ctx, int /*argc*/, sqlite3_value ** /*argv*/) {
+  sqlite3_result_int(ctx, 1);
+}
+
 }  // namespace
 
 FaultAuditLog::FaultAuditLog(const std::string & db_path, int64_t retention_max_records)
@@ -108,6 +117,15 @@ FaultAuditLog::FaultAuditLog(const std::string & db_path, int64_t retention_max_
   exec_or_throw(db_, "PRAGMA journal_mode=WAL;", "enable WAL");
   sqlite3_busy_timeout(db_, 5000);
 
+  // Authorize this connection for the prune-guard protection trigger (see below).
+  if (sqlite3_create_function_v2(db_, "audit_prune_authorized", 0, SQLITE_UTF8, nullptr, &audit_prune_authorized,
+                                 nullptr, nullptr, nullptr) != SQLITE_OK) {
+    std::string error = sqlite3_errmsg(db_);
+    sqlite3_close(db_);
+    db_ = nullptr;
+    throw std::runtime_error("audit: failed to register prune-authorization function: " + error);
+  }
+
   initialize_schema();
   head_ = load_head_locked();
 }
@@ -242,6 +260,24 @@ void FaultAuditLog::initialize_schema() {
     END;
   )",
                 "create append-only triggers");
+
+  // Protect the prune guard itself. Without this, an external writer could simply
+  // `UPDATE audit_prune_guard SET enabled = 1` and then DELETE rows past the
+  // append-only delete trigger. The guard may only be flipped by a connection on
+  // which audit_prune_authorized() is registered (this in-process connection); an
+  // out-of-band UPDATE fails because that function is unknown to it. This is still
+  // defense-in-depth, not a security boundary: a write-capable adversary can
+  // register the same function or DROP this trigger. verify() remains the backstop.
+  exec_or_throw(db_,
+                R"(
+    CREATE TRIGGER IF NOT EXISTS audit_prune_guard_protect
+    BEFORE UPDATE ON audit_prune_guard
+    WHEN audit_prune_authorized() IS NOT 1
+    BEGIN
+      SELECT RAISE(ABORT, 'audit_prune_guard is protected (in-process prune only)');
+    END;
+  )",
+                "create prune-guard protection trigger");
 }
 
 std::optional<ChainHead> FaultAuditLog::read_head_row_locked() const {
diff --git a/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
index 5103884d..e96d583c 100644
--- a/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
+++ b/src/ros2_medkit_fault_manager/src/fault_manager_node.cpp
@@ -327,6 +327,24 @@ FaultManagerNode::~FaultManagerNode() {
   if (rosbag_capture_) {
     rosbag_capture_->stop();
   }
+
+  // Close the chain with a "logging deactivated" marker (CIR (EU) 2024/2690
+  // sec. 3.2). Best-effort and appended directly (recorded even in confirmed_only
+  // mode); a failure here must not throw out of the destructor.
+  if (audit_log_) {
+    try {
+      AuditEvent marker;
+      marker.fault_code = "__audit__";
+      marker.transition = kTransitionLoggingDeactivated;
+      marker.status = "INACTIVE";
+      marker.source_id = "fault_manager";
+      marker.description = "audit logging deactivated";
+      marker.occurred_at_ns = get_wall_clock_time().nanoseconds();
+      audit_log_->append(marker);
+    } catch (const std::exception & e) {
+      RCLCPP_WARN(get_logger(), "Failed to append audit 'logging_deactivated' marker: %s", e.what());
+    }
+  }
 }
 
 std::unique_ptr<FaultStorage> FaultManagerNode::create_storage() {
@@ -380,6 +398,11 @@ std::unique_ptr<FaultAuditLog> FaultManagerNode::create_audit_log() {
     retention = 0;
   }
 
+  // Fail-closed: when set, an append failure is a hard error (the operation
+  // aborts and the audit is marked unhealthy) instead of being logged and
+  // silently dropped. Off by default so the audit never blocks fault processing.
+  audit_fail_closed_ = declare_parameter<bool>("audit_log.fail_closed", false);
+
   // Path: explicit override, else a sibling of the fault DB, else :memory:.
   std::string audit_path = declare_parameter<std::string>("audit_log.database_path", "");
 
@@ -412,8 +435,26 @@ std::unique_ptr<FaultAuditLog> FaultManagerNode::create_audit_log() {
   try {
     auto log = std::make_unique<FaultAuditLog>(audit_path, retention);
     RCLCPP_INFO(get_logger(),
-                "Fault audit log enabled: %s (transitions=%s, retention=%" PRId64 ", resume_seq=%" PRId64 ")",
-                audit_path.c_str(), audit_confirmed_only_ ? "confirmed_only" : "all", retention, log->head().seq);
+                "Fault audit log enabled: %s (transitions=%s, retention=%" PRId64
+                ", fail_closed=%s, resume_seq=%" PRId64 ")",
+                audit_path.c_str(), audit_confirmed_only_ ? "confirmed_only" : "all", retention,
+                audit_fail_closed_ ? "true" : "false", log->head().seq);
+
+    // Record a "logging activated" marker so the chain documents its own start
+    // (CIR (EU) 2024/2690 sec. 3.2). Appended directly, so it is recorded even in
+    // confirmed_only mode. Best-effort: a failure here does not abort startup.
+    AuditEvent marker;
+    marker.fault_code = "__audit__";
+    marker.transition = kTransitionLoggingActivated;
+    marker.status = "ACTIVE";
+    marker.source_id = "fault_manager";
+    marker.description = "audit logging activated";
+    marker.occurred_at_ns = get_wall_clock_time().nanoseconds();
+    try {
+      log->append(marker);
+    } catch (const std::exception & e) {
+      RCLCPP_WARN(get_logger(), "Failed to append audit 'logging_activated' marker: %s", e.what());
+    }
     return log;
   } catch (const std::exception & e) {
     RCLCPP_ERROR(get_logger(), "Failed to open fault audit log '%s': %s", audit_path.c_str(), e.what());
@@ -442,8 +483,22 @@ void FaultManagerNode::audit_transition(const char * transition, const ros2_medk
   try {
     audit_log_->append(event);
   } catch (const std::exception & e) {
-    RCLCPP_ERROR(get_logger(), "Failed to append audit record for '%s' (%s): %s", fault.fault_code.c_str(), transition,
-                 e.what());
+    // A dropped append is a hole in the chain that verify() can never see (it
+    // proves nothing was deleted, not that everything was written). Always make
+    // it loud and visible via the health counter so the gap is observable.
+    audit_dropped_writes_.fetch_add(1, std::memory_order_relaxed);
+    audit_healthy_.store(false, std::memory_order_relaxed);
+    const uint64_t dropped = audit_dropped_writes_.load(std::memory_order_relaxed);
+    RCLCPP_ERROR(get_logger(), "Failed to append audit record for '%s' (%s): %s [audit dropped_writes=%" PRIu64 "]",
+                 fault.fault_code.c_str(), transition, e.what(), dropped);
+    if (audit_fail_closed_) {
+      // Compliance-strict: refuse to proceed as if nothing happened. Abort the
+      // operation so the broken audit cannot be silently outlived.
+      RCLCPP_FATAL(get_logger(),
+                   "audit_log.fail_closed is set: aborting operation because the audit append failed for '%s' (%s)",
+                   fault.fault_code.c_str(), transition);
+      throw;
+    }
   }
 }
 
@@ -549,6 +604,12 @@ void FaultManagerNode::handle_report_fault(
     }
     // Note: PREFAILED/PREPASSED status changes don't emit events (debounce in progress)
 
+    // A PASSED event can drive an existing fault past the healing threshold to
+    // HEALED. That is the fault's END and must be audited (with a distinct kind
+    // and source) so the timeline is complete, not just occurred+confirmed.
+    const bool just_healed = !is_new && status_before != ros2_medkit_msgs::msg::Fault::STATUS_HEALED &&
+                             fault_after->status == ros2_medkit_msgs::msg::Fault::STATUS_HEALED;
+
     // Append tamper-evident audit records for the transitions that just happened.
     // Recorded regardless of correlation muting: muting affects display, not the
     // fact that the state transition occurred.
@@ -558,6 +619,9 @@ void FaultManagerNode::handle_report_fault(
     if (just_confirmed) {
       audit_transition(kTransitionConfirmed, *fault_after, request->source_id, event_time.nanoseconds());
     }
+    if (just_healed) {
+      audit_transition(kTransitionHealed, *fault_after, "auto_heal", event_time.nanoseconds());
+    }
 
     // Capture snapshots/rosbag when a fault confirms via the bounded pool (issue #441).
     // handle_report_fault runs on the single-threaded executor, so confirmations are
diff --git a/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp b/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
index 581ba006..d6e6ed06 100644
--- a/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
+++ b/src/ros2_medkit_fault_manager/test/test_fault_audit_log.cpp
@@ -26,6 +26,7 @@ using ros2_medkit_fault_manager::AuditEvent;
 using ros2_medkit_fault_manager::FaultAuditLog;
 using ros2_medkit_fault_manager::kTransitionCleared;
 using ros2_medkit_fault_manager::kTransitionConfirmed;
+using ros2_medkit_fault_manager::kTransitionHealed;
 using ros2_medkit_fault_manager::kTransitionOccurred;
 
 namespace {
@@ -334,6 +335,44 @@ TEST_F(FaultAuditLogTest, RotationPrunePassesAppendOnlyTrigger) {
   EXPECT_TRUE(log.verify().ok);
 }
 
+// A "healed" auto-recovery row chains and verifies like any other transition, and
+// stays distinct from a "cleared" so the timeline can tell them apart.
+TEST_F(FaultAuditLogTest, HealedTransitionChainVerifies) {
+  FaultAuditLog log(path_);
+  log.append(make_event("F1", kTransitionOccurred, 100));
+  log.append(make_event("F1", kTransitionConfirmed, 200));
+  log.append(make_event("F1", kTransitionHealed, 300));
+
+  auto records = log.read();
+  ASSERT_EQ(records.size(), 3u);
+  EXPECT_EQ(records[2].event.transition, kTransitionHealed);
+  EXPECT_NE(records[2].event.transition, std::string(kTransitionCleared));
+  EXPECT_TRUE(log.verify().ok);
+}
+
+// Defense-in-depth (item 4): the prune guard itself is protected. An out-of-band
+// connection cannot flip audit_prune_guard open, so it cannot then delete a
+// prefix past the append-only delete trigger. The in-process prune still works.
+TEST_F(FaultAuditLogTest, GuardProtectTriggerBlocksExternalGuardFlip) {
+  {
+    FaultAuditLog log(path_, /*retention_max_records=*/3);
+    for (int i = 1; i <= 8; ++i) {
+      log.append(make_event("F" + std::to_string(i), kTransitionOccurred, 100 + i));
+    }
+    EXPECT_EQ(log.record_count(), 3);  // in-process prune succeeded despite the protect trigger
+    EXPECT_TRUE(log.verify().ok);
+  }
+
+  // An external writer (no in-process temp unlock marker) cannot open the guard.
+  EXPECT_NE(raw_exec_rc(path_, "UPDATE audit_prune_guard SET enabled = 1 WHERE id = 1"), SQLITE_OK);
+  // With the guard still closed, a raw DELETE remains blocked by the delete trigger.
+  EXPECT_NE(raw_exec_rc(path_, "DELETE FROM audit_log"), SQLITE_OK);
+
+  FaultAuditLog reopened(path_, 3);
+  EXPECT_EQ(reopened.record_count(), 3);
+  EXPECT_TRUE(reopened.verify().ok);
+}
+
 int main(int argc, char ** argv) {
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
diff --git a/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp b/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
index 28411122..9a9dbc37 100644
--- a/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
+++ b/src/ros2_medkit_fault_manager/test/test_fault_manager.cpp
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include <gtest/gtest.h>
+#include <sqlite3.h>
 
 #include <atomic>
 #include <chrono>
@@ -20,6 +21,7 @@
 #include <memory>
 #include <optional>
 #include <random>
+#include <string>
 #include <thread>
 #include <vector>
 
@@ -1382,6 +1384,8 @@ class FaultAuditIntegrationTest : public ::testing::Test {
     fm_options.parameter_overrides({
         {"storage_type", "memory"},
         {"confirmation_threshold", -1},
+        {"healing_enabled", true},
+        {"healing_threshold", 1},  // counter >= 1 heals (two PASSED after one FAILED)
         {"audit_log.enabled", true},
         {"audit_log.database_path", audit_path_},
     });
@@ -1448,19 +1452,66 @@ TEST_F(FaultAuditIntegrationTest, TransitionsAppendVerifiableChain) {
   ASSERT_TRUE(spin_until_ready(cf));
   ASSERT_TRUE(cf.get()->success);
 
-  // Reopen the audit DB independently and inspect the persisted chain.
+  // Reopen the audit DB independently and inspect the persisted chain. The chain
+  // opens with a "logging_activated" lifecycle marker (seq 1), then the three
+  // fault transitions. The "logging_deactivated" marker is only appended when the
+  // node is destroyed (TearDown), so it is not visible to this read.
   ros2_medkit_fault_manager::FaultAuditLog audit(audit_path_);
   auto records = audit.read();
-  ASSERT_EQ(records.size(), 3u);
-  EXPECT_EQ(records[0].event.transition, ros2_medkit_fault_manager::kTransitionOccurred);
-  EXPECT_EQ(records[1].event.transition, ros2_medkit_fault_manager::kTransitionConfirmed);
-  EXPECT_EQ(records[2].event.transition, ros2_medkit_fault_manager::kTransitionCleared);
-  EXPECT_EQ(records[0].event.fault_code, "AUDIT_FAULT");
-  EXPECT_EQ(records[1].event.source_id, "/plc/pump");
+  ASSERT_EQ(records.size(), 4u);
+  EXPECT_EQ(records[0].event.transition, ros2_medkit_fault_manager::kTransitionLoggingActivated);
+  EXPECT_EQ(records[1].event.transition, ros2_medkit_fault_manager::kTransitionOccurred);
+  EXPECT_EQ(records[2].event.transition, ros2_medkit_fault_manager::kTransitionConfirmed);
+  EXPECT_EQ(records[3].event.transition, ros2_medkit_fault_manager::kTransitionCleared);
+  EXPECT_EQ(records[1].event.fault_code, "AUDIT_FAULT");
+  EXPECT_EQ(records[2].event.source_id, "/plc/pump");
+
+  auto result = audit.verify();
+  EXPECT_TRUE(result.ok) << result.error;
+  EXPECT_EQ(result.checked, 4);
+}
+
+// Completeness: an auto-healed fault must record its END. One FAILED confirms the
+// fault (occurred + confirmed); two PASSED drive the debounce counter to the
+// healing threshold, which must append a distinct "healed" row (source auto_heal),
+// and the full occurred -> confirmed -> healed chain must verify.
+TEST_F(FaultAuditIntegrationTest, AutoHealAppendsHealedRow) {
+  auto send_report = [&](uint8_t event_type) {
+    auto req = std::make_shared<ReportFault::Request>();
+    req->fault_code = "HEAL_FAULT";
+    req->event_type = event_type;
+    req->severity = Fault::SEVERITY_ERROR;  // not CRITICAL: goes through debounce
+    req->description = "intermittent sensor";
+    req->source_id = "/robot/sensor";
+    auto fut = report_client_->async_send_request(req);
+    ASSERT_TRUE(spin_until_ready(fut));
+    ASSERT_TRUE(fut.get()->accepted);
+  };
+
+  send_report(ReportFault::Request::EVENT_FAILED);  // counter -1, threshold -1 => CONFIRMED
+  send_report(ReportFault::Request::EVENT_PASSED);  // counter 0 (hysteresis): stays CONFIRMED
+  send_report(ReportFault::Request::EVENT_PASSED);  // counter 1 >= healing_threshold => HEALED
+
+  ros2_medkit_fault_manager::FaultAuditLog audit(audit_path_);
+  std::vector<std::string> transitions;
+  std::string heal_source;
+  for (const auto & rec : audit.read()) {
+    if (rec.event.fault_code == "HEAL_FAULT") {
+      transitions.push_back(rec.event.transition);
+      if (rec.event.transition == ros2_medkit_fault_manager::kTransitionHealed) {
+        heal_source = rec.event.source_id;
+      }
+    }
+  }
+
+  ASSERT_EQ(transitions.size(), 3u) << "expected occurred, confirmed, healed";
+  EXPECT_EQ(transitions[0], ros2_medkit_fault_manager::kTransitionOccurred);
+  EXPECT_EQ(transitions[1], ros2_medkit_fault_manager::kTransitionConfirmed);
+  EXPECT_EQ(transitions[2], ros2_medkit_fault_manager::kTransitionHealed);
+  EXPECT_EQ(heal_source, "auto_heal");
 
   auto result = audit.verify();
   EXPECT_TRUE(result.ok) << result.error;
-  EXPECT_EQ(result.checked, 3);
 }
 
 TEST(FaultAuditDisabledTest, NoAuditFileWhenDisabled) {
@@ -1530,6 +1581,104 @@ TEST(FaultAuditTimerTest, TimerConfirmationAppendsConfirmedAuditRow) {
   EXPECT_TRUE(audit->verify().ok);
 }
 
+namespace {
+
+/// Force the node's next audit append to fail by inserting a row at the seq the
+/// node will try next (MAX(seq)+1), so its INSERT collides on the seq PRIMARY KEY.
+/// Done from a separate connection; the append-only triggers do not block INSERT.
+void poison_next_audit_seq(const std::string & db_path) {
+  sqlite3 * db = nullptr;
+  ASSERT_EQ(sqlite3_open(db_path.c_str(), &db), SQLITE_OK);
+  const char * sql =
+      "INSERT INTO audit_log (seq, occurred_at_ns, fault_code, transition, severity, status, source_id, "
+      "description, prev_hash, record_hash) "
+      "SELECT COALESCE(MAX(seq), 0) + 1, 0, 'x', 'x', 0, 'x', 'x', 'x', 'x', 'x' FROM audit_log;";
+  char * err = nullptr;
+  int rc = sqlite3_exec(db, sql, nullptr, nullptr, &err);
+  std::string err_str = err ? err : "";
+  sqlite3_free(err);
+  sqlite3_close(db);
+  ASSERT_EQ(rc, SQLITE_OK) << err_str;
+}
+
+Fault make_failclosed_fault() {
+  Fault f;
+  f.fault_code = "FAILCLOSED";
+  f.severity = Fault::SEVERITY_ERROR;
+  f.status = "CONFIRMED";
+  f.description = "injected";
+  return f;
+}
+
+std::string make_temp_audit_path(const char * prefix) {
+  std::random_device rd;
+  std::mt19937_64 gen(rd());
+  std::uniform_int_distribution<uint64_t> dist;
+  return (std::filesystem::temp_directory_path() / (std::string(prefix) + std::to_string(dist(gen)) + ".db")).string();
+}
+
+void remove_audit_files(const std::string & path) {
+  std::error_code ec;
+  std::filesystem::remove(path, ec);
+  std::filesystem::remove(path + "-wal", ec);
+  std::filesystem::remove(path + "-shm", ec);
+}
+
+}  // namespace
+
+// Silent-gap guard, default behaviour: when fail_closed is false, an append
+// failure must NOT abort the operation, but it must be VISIBLE - the dropped
+// counter increments and the audit is flagged unhealthy (not silently lost).
+TEST(FaultAuditFailClosedTest, FailOpenFlagsButDoesNotThrow) {
+  const std::string audit_path = make_temp_audit_path("test_audit_failopen_");
+  {
+    rclcpp::NodeOptions options;
+    options.parameter_overrides({
+        {"storage_type", "memory"},
+        {"audit_log.enabled", true},
+        {"audit_log.database_path", audit_path},
+        {"audit_log.fail_closed", false},
+    });
+    auto node = std::make_shared<FaultManagerNode>(options);
+    ASSERT_TRUE(node->audit_healthy());
+    EXPECT_EQ(node->audit_dropped_writes(), 0u);
+
+    poison_next_audit_seq(audit_path);
+
+    EXPECT_NO_THROW(
+        node->audit_transition_for_test(ros2_medkit_fault_manager::kTransitionConfirmed, make_failclosed_fault()));
+    EXPECT_EQ(node->audit_dropped_writes(), 1u);
+    EXPECT_FALSE(node->audit_healthy());
+  }
+  remove_audit_files(audit_path);
+}
+
+// Silent-gap guard, compliance-strict: with fail_closed true, an injected append
+// failure must abort the operation (throw) rather than silently proceed, and the
+// same health signals must fire.
+TEST(FaultAuditFailClosedTest, FailClosedAbortsAndFlags) {
+  const std::string audit_path = make_temp_audit_path("test_audit_failclosed_");
+  {
+    rclcpp::NodeOptions options;
+    options.parameter_overrides({
+        {"storage_type", "memory"},
+        {"audit_log.enabled", true},
+        {"audit_log.database_path", audit_path},
+        {"audit_log.fail_closed", true},
+    });
+    auto node = std::make_shared<FaultManagerNode>(options);
+
+    poison_next_audit_seq(audit_path);
+
+    EXPECT_THROW(
+        node->audit_transition_for_test(ros2_medkit_fault_manager::kTransitionConfirmed, make_failclosed_fault()),
+        std::exception);
+    EXPECT_EQ(node->audit_dropped_writes(), 1u);
+    EXPECT_FALSE(node->audit_healthy());
+  }
+  remove_audit_files(audit_path);
+}
+
 int main(int argc, char ** argv) {
   rclcpp::init(argc, argv);
   ::testing::InitGoogleTest(&argc, argv);