From bb71912939a81403c6239a401aab1f8f6a9ad0d4 Mon Sep 17 00:00:00 2001 From: mfaferek93 Date: Tue, 30 Jun 2026 13:39:28 +0200 Subject: [PATCH 1/8] Add shared fault-detection module and migrate OPC UA plugin Introduce ros2_medkit_fault_detection, a header-only evaluator that maps a raw value to faults via three composable modes: numeric threshold, status-word bit decode, and fault-code enum. Migrate the OPC UA plugin's threshold path onto it so it gains bit-decode and enum via new status_bits/fault_enum node-map blocks, with raise/clear edge detection in the shared tracker. Refs #481 --- .../CMakeLists.txt | 67 +++++ src/ros2_medkit_fault_detection/README.md | 55 ++++ .../fault_detection.hpp | 243 ++++++++++++++++++ src/ros2_medkit_fault_detection/package.xml | 22 ++ .../test/test_fault_detection.cpp | 177 +++++++++++++ .../ros2_medkit_opcua/CMakeLists.txt | 4 + .../ros2_medkit_opcua/README.md | 37 ++- .../include/ros2_medkit_opcua/node_map.hpp | 21 +- .../ros2_medkit_opcua/opcua_plugin.hpp | 5 +- .../ros2_medkit_opcua/opcua_poller.hpp | 13 +- .../ros2_medkit_opcua/package.xml | 1 + .../ros2_medkit_opcua/src/node_map.cpp | 74 +++++- .../ros2_medkit_opcua/src/opcua_plugin.cpp | 32 +-- .../ros2_medkit_opcua/src/opcua_poller.cpp | 73 +++--- .../ros2_medkit_opcua/test/test_node_map.cpp | 142 ++++++++++ 15 files changed, 900 insertions(+), 66 deletions(-) create mode 100644 src/ros2_medkit_fault_detection/CMakeLists.txt create mode 100644 src/ros2_medkit_fault_detection/README.md create mode 100644 src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp create mode 100644 src/ros2_medkit_fault_detection/package.xml create mode 100644 src/ros2_medkit_fault_detection/test/test_fault_detection.cpp diff --git a/src/ros2_medkit_fault_detection/CMakeLists.txt b/src/ros2_medkit_fault_detection/CMakeLists.txt new file mode 100644 index 000000000..8d0cbb089 --- /dev/null +++ b/src/ros2_medkit_fault_detection/CMakeLists.txt @@ -0,0 +1,67 @@ +# Copyright 2026 mfaferek93 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.8) +project(ros2_medkit_fault_detection) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +find_package(ros2_medkit_cmake REQUIRED) +include(ROS2MedkitWarnings) + +find_package(ament_cmake REQUIRED) + +# Header-only INTERFACE library: the detection logic is compiled directly into +# each consuming plugin (see header rationale). +add_library(ros2_medkit_fault_detection INTERFACE) +target_include_directories(ros2_medkit_fault_detection INTERFACE + $ + $ +) +target_compile_features(ros2_medkit_fault_detection INTERFACE cxx_std_17) + +install(TARGETS ros2_medkit_fault_detection + EXPORT export_${PROJECT_NAME} +) +install(DIRECTORY include/ + DESTINATION include +) + +ament_export_targets(export_${PROJECT_NAME}) +ament_export_include_directories(include) + +if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + list(APPEND AMENT_LINT_AUTO_EXCLUDE + ament_cmake_uncrustify + ament_cmake_cpplint + ament_cmake_clang_tidy) + ament_lint_auto_find_test_dependencies() + + find_package(ament_cmake_clang_format QUIET) + if(ament_cmake_clang_format_FOUND) + file(GLOB_RECURSE _format_files "include/*.hpp" "test/*.cpp") + ament_clang_format(${_format_files} + CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../.clang-format") + endif() + + find_package(ament_cmake_gtest REQUIRED) + ament_add_gtest(test_fault_detection test/test_fault_detection.cpp) + target_link_libraries(test_fault_detection ros2_medkit_fault_detection) + + ros2_medkit_relax_vendor_warnings() +endif() + +ament_package() diff --git a/src/ros2_medkit_fault_detection/README.md b/src/ros2_medkit_fault_detection/README.md new file mode 100644 index 000000000..55c9ec513 --- /dev/null +++ b/src/ros2_medkit_fault_detection/README.md @@ -0,0 +1,55 @@ +# ros2_medkit_fault_detection + +Shared, protocol-agnostic fault-detection model for medkit gateway plugins. +A single evaluator maps a raw value read from any source (OPC UA, S7, Modbus, +ADS, ...) to the set of faults it implies, so every protocol plugin detects +faults the same way. + +## Detection modes + +Composable per mapped point in a plugin's node map: + +- **threshold** (`ThresholdRule`) - numeric above/below a setpoint. A boolean + point is alarm-on-true. +- **status word** (`StatusWordRule`) - decode named bits of an integer status + register; one fault per configured bit. +- **fault enum** (`EnumMapRule`) - map a fault-code register value to a fault + code + text; one fault per configured code, with an `ok_value` meaning + "no fault". + +## API + +```cpp +#include "ros2_medkit_fault_detection/fault_detection.hpp" +namespace fd = ros2_medkit::fault_detection; + +// Pure evaluator: value + rule -> set of (active/inactive) fault signals. +std::vector signals = fd::evaluate(value, rule); + +// Stateful raise/clear edge detector over successive evaluations. +fd::FaultTransitionTracker tracker; +std::vector transitions = tracker.apply(signals); // raises + clears +``` + +`evaluate` reports the full set of faults a rule governs (each flagged active +or inactive). `FaultTransitionTracker` keeps the last-known state per +`fault_code` and emits only the raise/clear edges, which a plugin forwards to +the fault manager as report/clear. + +## Placement and packaging + +This is a **header-only INTERFACE package**: the detection logic is compiled +directly into each consuming plugin. Protocol plugins are built as MODULE +libraries that resolve gateway symbols from the host process at load time +(`-Wl,--unresolved-symbols=ignore-all`); a separately linked shared library +would not be present in that host, so the logic is compiled in instead. The +header has no ROS or protocol dependencies, which keeps the evaluator a pure, +trivially unit-testable function. + +It lives in the open `ros2_medkit` repo so both the open OPC UA plugin +(`ros2_medkit_opcua`) and future protocol plugins can depend on it. + +## Consumers + +`ros2_medkit_opcua` lowers its `alarm:`, `status_bits:` and `fault_enum:` +node-map blocks onto `DetectionRule` and evaluates them through this module. diff --git a/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp b/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp new file mode 100644 index 000000000..0fbd6f862 --- /dev/null +++ b/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp @@ -0,0 +1,243 @@ +// Copyright 2026 mfaferek93 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +/// Shared fault-detection model for ros2_medkit protocol plugins. +/// +/// A single, protocol-agnostic evaluator maps a raw value read from any +/// source (OPC UA, S7, Modbus, ADS, ...) into the set of faults it implies, +/// using one of three composable detection modes: +/// +/// * threshold - numeric above/below a setpoint. +/// * status word - decode named bits of an integer status register. +/// * fault enum - map a fault-code register value to a fault code + text. +/// +/// The evaluator (``evaluate``) is a pure function of ``(value, rule)`` and +/// has no ROS / protocol dependencies, so it is trivially unit-testable and +/// safe to compile into a dlopen-loaded plugin MODULE. Raise/clear edge +/// detection is layered on top via ``FaultTransitionTracker``. +/// +/// Header-only on purpose: protocol plugins are built as MODULE libraries +/// that resolve gateway symbols from the host process at load time +/// (``--unresolved-symbols=ignore-all``); a separately linked shared library +/// would not be present in that host, so the detection logic is compiled +/// directly into each plugin instead. + +#include +#include +#include +#include +#include +#include + +namespace ros2_medkit::fault_detection { + +/// A raw value read from a protocol source. +using Value = std::variant; + +/// The identity + presentation of a fault a rule can raise. +struct FaultSpec { + std::string fault_code; + std::string severity{"ERROR"}; // "INFO" / "WARNING" / "ERROR" / "CRITICAL" + std::string message; // human-readable text; defaults to fault_code +}; + +/// Numeric threshold: raise ``fault`` when the value crosses ``threshold``. +struct ThresholdRule { + FaultSpec fault; + double threshold{0.0}; + bool above{true}; // true: active when value > threshold; false: value < threshold +}; + +/// One named bit of a status word. +struct BitRule { + unsigned bit{0}; // bit position, 0 = least significant + FaultSpec fault; +}; + +/// Status-word decode: one integer register, many named-bit faults. +struct StatusWordRule { + std::vector bits; +}; + +/// One entry of a fault-code enum register. +struct EnumRule { + std::int64_t code{0}; + FaultSpec fault; +}; + +/// Fault-code enum decode: a register value selects at most one fault. +struct EnumMapRule { + std::vector codes; + std::int64_t ok_value{0}; // value meaning "no fault" (no rule raised) +}; + +/// One of the composable detection modes for a single mapped point. +using DetectionRule = std::variant; + +/// A fault the evaluator says is currently active or inactive for a value. +/// The evaluator reports the full set of faults a rule governs (so a cleared +/// fault is reported as ``active == false``); edge detection is done by +/// ``FaultTransitionTracker``. +struct FaultSignal { + std::string fault_code; + std::string severity; + std::string message; + bool active{false}; +}; + +namespace detail { + +/// Coerce a value to double. bool -> 0/1, integers/doubles as-is. Strings are +/// not numerically comparable here and yield ``false`` (handled by caller). +inline bool as_double(const Value & v, double & out) { + return std::visit( + [&out](auto && x) -> bool { + using T = std::decay_t; + if constexpr (std::is_same_v) { + out = x ? 1.0 : 0.0; + return true; + } else if constexpr (std::is_same_v || std::is_same_v) { + out = static_cast(x); + return true; + } else { + return false; // string + } + }, + v); +} + +/// Coerce a value to a 64-bit integer for bit / enum decode. bool -> 0/1, +/// int as-is, double rounded to nearest. Strings are not decodable. +inline bool as_int(const Value & v, std::int64_t & out) { + return std::visit( + [&out](auto && x) -> bool { + using T = std::decay_t; + if constexpr (std::is_same_v) { + out = x ? 1 : 0; + return true; + } else if constexpr (std::is_same_v) { + out = x; + return true; + } else if constexpr (std::is_same_v) { + out = static_cast(std::llround(x)); + return true; + } else { + return false; // string + } + }, + v); +} + +inline FaultSignal make_signal(const FaultSpec & spec, bool active) { + FaultSignal s; + s.fault_code = spec.fault_code; + s.severity = spec.severity.empty() ? std::string{"ERROR"} : spec.severity; + s.message = spec.message.empty() ? spec.fault_code : spec.message; + s.active = active; + return s; +} + +} // namespace detail + +/// Evaluate a value against one detection rule. +/// +/// Returns the full set of faults the rule governs, each flagged active or +/// inactive. A threshold rule yields one signal; a status-word rule yields one +/// per configured bit; an enum rule yields one per configured code (the +/// matching code active, the rest inactive) so a transition tracker can clear +/// the previous code and raise the new one. +inline std::vector evaluate(const Value & value, const DetectionRule & rule) { + std::vector out; + + std::visit( + [&](auto && r) { + using T = std::decay_t; + + if constexpr (std::is_same_v) { + bool active = false; + if (std::holds_alternative(value)) { + // A boolean point is alarm-on-true, threshold ignored. Preserves + // the original OPC UA threshold-path behaviour for Bool tags. + active = std::get(value); + } else { + double d = 0.0; + if (detail::as_double(value, d)) { + active = r.above ? (d > r.threshold) : (d < r.threshold); + } + } + out.push_back(detail::make_signal(r.fault, active)); + + } else if constexpr (std::is_same_v) { + std::int64_t raw = 0; + const bool ok = detail::as_int(value, raw); + const auto mask = static_cast(raw); + for (const auto & b : r.bits) { + bool active = ok && b.bit < 64 && ((mask >> b.bit) & 0x1ULL); + out.push_back(detail::make_signal(b.fault, active)); + } + + } else if constexpr (std::is_same_v) { + std::int64_t code = 0; + const bool ok = detail::as_int(value, code); + const bool healthy = ok && code == r.ok_value; + for (const auto & e : r.codes) { + bool active = ok && !healthy && code == e.code; + out.push_back(detail::make_signal(e.fault, active)); + } + } + }, + rule); + + return out; +} + +/// Stateful raise/clear edge detector over a stream of evaluator outputs. +/// +/// Feed each ``evaluate`` result through ``apply``; it returns only the +/// signals whose active state changed since the last call (the raise and +/// clear transitions), which a plugin forwards to the fault manager as +/// report / clear. Keyed by ``fault_code``, so it tracks threshold, bit and +/// enum faults uniformly. +class FaultTransitionTracker { + public: + std::vector apply(const std::vector & signals) { + std::vector changes; + for (const auto & s : signals) { + auto it = last_active_.find(s.fault_code); + const bool known = it != last_active_.end(); + if (!known || it->second != s.active) { + last_active_[s.fault_code] = s.active; + // A fault unknown-and-inactive is not a transition (nothing was ever + // raised); only emit a clear for a fault we had previously raised. + if (s.active || known) { + changes.push_back(s); + } + } + } + return changes; + } + + /// Current active state of a fault (false if never seen). + bool active(const std::string & fault_code) const { + auto it = last_active_.find(fault_code); + return it != last_active_.end() && it->second; + } + + private: + std::unordered_map last_active_; +}; + +} // namespace ros2_medkit::fault_detection diff --git a/src/ros2_medkit_fault_detection/package.xml b/src/ros2_medkit_fault_detection/package.xml new file mode 100644 index 000000000..195093ca6 --- /dev/null +++ b/src/ros2_medkit_fault_detection/package.xml @@ -0,0 +1,22 @@ + + + + ros2_medkit_fault_detection + 0.6.0 + Shared, protocol-agnostic fault-detection model (threshold, status-bit, fault-code enum) for medkit gateway plugins + + mfaferek93 + Apache-2.0 + + ament_cmake + ros2_medkit_cmake + + ament_lint_auto + ament_lint_common + ament_cmake_clang_format + ament_cmake_gtest + + + ament_cmake + + diff --git a/src/ros2_medkit_fault_detection/test/test_fault_detection.cpp b/src/ros2_medkit_fault_detection/test/test_fault_detection.cpp new file mode 100644 index 000000000..d0496078e --- /dev/null +++ b/src/ros2_medkit_fault_detection/test/test_fault_detection.cpp @@ -0,0 +1,177 @@ +// Copyright 2026 mfaferek93 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ros2_medkit_fault_detection/fault_detection.hpp" + +#include +#include + +#include + +namespace fd = ros2_medkit::fault_detection; + +namespace { + +const fd::FaultSignal * find(const std::vector & v, const std::string & code) { + for (const auto & s : v) { + if (s.fault_code == code) { + return &s; + } + } + return nullptr; +} + +} // namespace + +// -- Threshold --------------------------------------------------------------- + +TEST(ThresholdRule, AboveSetpointRaises) { + fd::ThresholdRule r; + r.fault = {"HIGH_TEMP", "ERROR", "Over temperature"}; + r.threshold = 80.0; + r.above = true; + + auto hot = fd::evaluate(fd::Value{90.0}, r); + ASSERT_EQ(hot.size(), 1u); + EXPECT_TRUE(hot[0].active); + EXPECT_EQ(hot[0].fault_code, "HIGH_TEMP"); + EXPECT_EQ(hot[0].severity, "ERROR"); + EXPECT_EQ(hot[0].message, "Over temperature"); + + auto ok = fd::evaluate(fd::Value{70.0}, r); + ASSERT_EQ(ok.size(), 1u); + EXPECT_FALSE(ok[0].active); +} + +TEST(ThresholdRule, BelowSetpointRaises) { + fd::ThresholdRule r; + r.fault = {"LOW_LEVEL", "WARNING", ""}; + r.threshold = 100.0; + r.above = false; + + EXPECT_TRUE(fd::evaluate(fd::Value{static_cast(50)}, r)[0].active); + EXPECT_FALSE(fd::evaluate(fd::Value{static_cast(150)}, r)[0].active); + // Empty message falls back to the fault code. + EXPECT_EQ(fd::evaluate(fd::Value{static_cast(50)}, r)[0].message, "LOW_LEVEL"); +} + +TEST(ThresholdRule, BooleanPointIsAlarmOnTrue) { + fd::ThresholdRule r; + r.fault = {"TRIGGER", "ERROR", "Trigger set"}; + r.threshold = 0.0; + r.above = true; + + EXPECT_TRUE(fd::evaluate(fd::Value{true}, r)[0].active); + EXPECT_FALSE(fd::evaluate(fd::Value{false}, r)[0].active); +} + +// -- Status-word bit decode -------------------------------------------------- + +TEST(StatusWordRule, NamedBitDecode) { + fd::StatusWordRule r; + r.bits = { + {0u, {"E_STOP", "CRITICAL", "Emergency stop"}}, + {3u, {"PUMP_OVERLOAD", "ERROR", "Pump overload"}}, + {7u, {"FILTER_DIRTY", "WARNING", "Filter dirty"}}, + }; + + // 0b1001 -> bit 0 and bit 3 set, bit 7 clear. + auto out = fd::evaluate(fd::Value{static_cast(0b1001)}, r); + ASSERT_EQ(out.size(), 3u); + EXPECT_TRUE(find(out, "E_STOP")->active); + EXPECT_TRUE(find(out, "PUMP_OVERLOAD")->active); + EXPECT_FALSE(find(out, "FILTER_DIRTY")->active); + EXPECT_EQ(find(out, "PUMP_OVERLOAD")->message, "Pump overload"); +} + +TEST(StatusWordRule, AllClearWhenZero) { + fd::StatusWordRule r; + r.bits = {{2u, {"FAULT_A", "ERROR", "A"}}, {5u, {"FAULT_B", "ERROR", "B"}}}; + auto out = fd::evaluate(fd::Value{static_cast(0)}, r); + for (const auto & s : out) { + EXPECT_FALSE(s.active); + } +} + +// -- Fault-code enum --------------------------------------------------------- + +TEST(EnumMapRule, CodeMapsToFaultAndText) { + fd::EnumMapRule r; + r.ok_value = 0; + r.codes = { + {10, {"VFD_OVERVOLTAGE", "ERROR", "DC bus overvoltage"}}, + {11, {"VFD_OVERCURRENT", "ERROR", "Output overcurrent"}}, + {12, {"VFD_OVERTEMP", "WARNING", "Heatsink over temperature"}}, + }; + + auto out = fd::evaluate(fd::Value{static_cast(11)}, r); + ASSERT_EQ(out.size(), 3u); + EXPECT_FALSE(find(out, "VFD_OVERVOLTAGE")->active); + EXPECT_TRUE(find(out, "VFD_OVERCURRENT")->active); + EXPECT_EQ(find(out, "VFD_OVERCURRENT")->message, "Output overcurrent"); + EXPECT_FALSE(find(out, "VFD_OVERTEMP")->active); +} + +TEST(EnumMapRule, OkValueRaisesNothing) { + fd::EnumMapRule r; + r.ok_value = 0; + r.codes = {{10, {"VFD_OVERVOLTAGE", "ERROR", "x"}}}; + auto out = fd::evaluate(fd::Value{static_cast(0)}, r); + ASSERT_EQ(out.size(), 1u); + EXPECT_FALSE(out[0].active); +} + +// -- Raise / clear transitions ---------------------------------------------- + +TEST(TransitionTracker, EmitsRaiseThenClearOnce) { + fd::ThresholdRule r; + r.fault = {"HIGH_TEMP", "ERROR", "hot"}; + r.threshold = 80.0; + r.above = true; + + fd::FaultTransitionTracker tracker; + + // Healthy first read: no transition (nothing was ever raised). + EXPECT_TRUE(tracker.apply(fd::evaluate(fd::Value{70.0}, r)).empty()); + + // Crosses up: one raise. + auto up = tracker.apply(fd::evaluate(fd::Value{90.0}, r)); + ASSERT_EQ(up.size(), 1u); + EXPECT_TRUE(up[0].active); + EXPECT_TRUE(tracker.active("HIGH_TEMP")); + + // Still high: no repeat. + EXPECT_TRUE(tracker.apply(fd::evaluate(fd::Value{95.0}, r)).empty()); + + // Crosses down: one clear. + auto down = tracker.apply(fd::evaluate(fd::Value{60.0}, r)); + ASSERT_EQ(down.size(), 1u); + EXPECT_FALSE(down[0].active); + EXPECT_FALSE(tracker.active("HIGH_TEMP")); +} + +TEST(TransitionTracker, EnumCodeChangeClearsOldRaisesNew) { + fd::EnumMapRule r; + r.ok_value = 0; + r.codes = {{10, {"OVERVOLT", "ERROR", "ov"}}, {11, {"OVERCURR", "ERROR", "oc"}}}; + + fd::FaultTransitionTracker tracker; + tracker.apply(fd::evaluate(fd::Value{static_cast(10)}, r)); // raise OVERVOLT + + // Code switches 10 -> 11: clear OVERVOLT, raise OVERCURR in one step. + auto changes = tracker.apply(fd::evaluate(fd::Value{static_cast(11)}, r)); + ASSERT_EQ(changes.size(), 2u); + EXPECT_FALSE(find(changes, "OVERVOLT")->active); + EXPECT_TRUE(find(changes, "OVERCURR")->active); +} diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt b/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt index d66c3ecce..973882a03 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt @@ -27,6 +27,7 @@ include(ROS2MedkitWarnings) find_package(ament_cmake REQUIRED) find_package(ros2_medkit_gateway REQUIRED) +find_package(ros2_medkit_fault_detection REQUIRED) find_package(ros2_medkit_msgs REQUIRED) find_package(rclcpp REQUIRED) find_package(std_msgs REQUIRED) @@ -98,6 +99,7 @@ target_include_directories(ros2_medkit_opcua_plugin PRIVATE medkit_target_dependencies(ros2_medkit_opcua_plugin ros2_medkit_msgs ros2_medkit_gateway + ros2_medkit_fault_detection rclcpp std_msgs ) @@ -168,6 +170,7 @@ if(BUILD_TESTING) medkit_target_dependencies(test_opcua_plugin ros2_medkit_msgs ros2_medkit_gateway + ros2_medkit_fault_detection rclcpp std_msgs ) @@ -188,6 +191,7 @@ if(BUILD_TESTING) ) medkit_target_dependencies(test_node_map ros2_medkit_gateway + ros2_medkit_fault_detection rclcpp ) target_link_libraries(test_node_map diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md index 9f6f0ac2a..6e34dd8a5 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md @@ -169,13 +169,48 @@ nodes: writable: true # Allow writes via x-plc-operations min_value: 0.0 # Optional: range validation for writes max_value: 100.0 - alarm: # Optional: map to SOVD fault + alarm: # Optional: numeric threshold -> SOVD fault fault_code: PLC_LOW_LEVEL severity: WARNING message: Tank level below minimum threshold: 100.0 above_threshold: false # Alarm when value < threshold + # Status-word bit decode: one integer register, one fault per named bit. + # Mutually exclusive per node with `alarm:` / `fault_enum:`. + - node_id: "ns=2;s=Pump.StatusWord" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 3 # bit position, 0 = least significant + fault_code: PUMP_OVERLOAD + severity: ERROR + message: Pump motor overload + - bit: 7 + fault_code: FILTER_DIRTY + severity: WARNING # message defaults to the fault_code + + # Fault-code enum: a register value selects at most one fault + text. + - node_id: "ns=2;s=Vfd.FaultCode" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + ok_value: 0 # value meaning "no fault" (default 0) + codes: + - code: 10 + fault_code: VFD_OVERVOLTAGE + severity: ERROR + message: DC bus overvoltage + - code: 11 + fault_code: VFD_OVERCURRENT + severity: ERROR + +# The three modes (`alarm`, `status_bits`, `fault_enum`) are evaluated by the +# shared `ros2_medkit_fault_detection` module so every protocol plugin maps raw +# values to faults identically. They are mutually exclusive on a single node. + # Native OPC-UA AlarmConditionType events (issue #386). Subscribes to alarms # defined inside the PLC (Siemens Program_Alarm / ProDiag, Beckhoff TF6100, # CodeSys 3.5+, Rockwell via FactoryTalk Linx). Mutually exclusive per entry diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp index 6b26fe8bc..e790dc87e 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp @@ -22,9 +22,15 @@ #include +#include "ros2_medkit_fault_detection/fault_detection.hpp" + namespace ros2_medkit_gateway { -/// Alarm configuration for a node that maps to a SOVD fault +/// Threshold-mode alarm configuration for a node that maps to a SOVD fault. +/// Retained for backward compatibility with the original ``alarm:`` node-map +/// block; at load time it is also lowered to a shared +/// ``fault_detection::ThresholdRule`` (see ``NodeMapEntry::detection``) so the +/// runtime evaluator is the same one used by status-bit and enum modes. struct AlarmConfig { std::string fault_code; std::string severity; // "ERROR", "WARNING", "INFO" @@ -73,9 +79,15 @@ struct NodeMapEntry { bool writable{false}; // Can be written via x-plc-operations std::optional min_value; // Optional range validation for writes std::optional max_value; - std::optional alarm; // Optional alarm -> fault mapping + std::optional alarm; // Optional threshold alarm -> fault mapping (back-compat) std::string ros2_topic; // ROS 2 topic for value bridging (auto: /plc/{entity}/{name}) + // Shared fault-detection rule for this point: threshold, status-word bit + // decode, or fault-code enum. Built at load time from the ``alarm:``, + // ``status_bits:`` or ``fault_enum:`` node-map block. The poller evaluates + // this through the shared ``fault_detection::evaluate`` regardless of mode. + std::optional detection; + bool has_range() const { return min_value.has_value() && max_value.has_value(); } @@ -121,6 +133,11 @@ class NodeMap { /// Get all entries that have threshold-based alarm configuration std::vector alarm_entries() const; + /// Get all entries carrying a shared fault-detection rule (threshold, + /// status-word bit decode, or fault-code enum). This is the set the poller + /// evaluates each cycle. + std::vector detection_entries() const; + /// Get all native OPC-UA AlarmConditionType event-mode entries (issue #386). const std::vector & event_alarms() const { return event_alarms_; diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp index 5141243d7..1a20d154d 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp @@ -112,8 +112,9 @@ class OpcuaPlugin : public ros2_medkit_gateway::GatewayPlugin, void handle_plc_operations(const ros2_medkit_gateway::PluginRequest & req, ros2_medkit_gateway::PluginResponse & res); void handle_plc_status(const ros2_medkit_gateway::PluginRequest & req, ros2_medkit_gateway::PluginResponse & res); - // Alarm -> Fault bridge - void on_alarm_change(const std::string & fault_code, const AlarmConfig & config, bool active); + // Fault-detection signal (threshold / status-bit / enum) -> Fault bridge + void on_alarm_change(const std::string & entity_id, + const ros2_medkit::fault_detection::FaultSignal & signal); // Issue #386: native AlarmConditionType event lifecycle bridge. void on_event_alarm(const AlarmEventDelivery & delivery); diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp index cbb3a6fb9..e716f3b20 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp @@ -42,9 +42,12 @@ struct PollSnapshot { uint64_t error_count{0}; }; -/// Callback when an alarm state changes (for fault reporting) -using AlarmChangeCallback = - std::function; +/// Callback when a fault-detection signal transitions (for fault reporting). +/// Fires once per raise/clear edge with the hosting entity and the shared +/// ``FaultSignal`` (fault code, severity, message, active) produced by the +/// shared evaluator. Threshold, status-bit and enum modes all use this path. +using AlarmChangeCallback = std::function; /// Callback when a native OPC-UA AlarmCondition lifecycle transitions to a /// new SOVD status (issue #386). Fires at most once per logical transition; @@ -166,7 +169,9 @@ class OpcuaPoller { mutable std::mutex alarm_mutex_; AlarmChangeCallback alarm_callback_; - std::unordered_map alarm_states_; // fault_code -> last known state + // Shared raise/clear edge detector across all detection modes, keyed by + // fault_code. Replaces the previous threshold-only last-state map. + ros2_medkit::fault_detection::FaultTransitionTracker alarm_tracker_; // Issue #386: event-mode AlarmCondition state. EventAlarmCallback event_alarm_callback_; diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml b/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml index d4ef796e2..b509fe1ff 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml @@ -18,6 +18,7 @@ std_msgs ros2_medkit_msgs ros2_medkit_gateway + ros2_medkit_fault_detection nlohmann-json-dev yaml_cpp_vendor libssl-dev diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp index 755faafda..07cfe99ac 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp @@ -290,6 +290,22 @@ bool NodeMap::load(const std::string & yaml_path) { "/plc/" + sanitize_topic_segment(entry.entity_id) + "/" + sanitize_topic_segment(entry.data_name); } + // Fault-detection block. Three mutually exclusive modes per point, all + // lowered onto the shared ``fault_detection`` evaluator: + // alarm: numeric threshold (above/below) - original behaviour + // status_bits: decode named bits of an integer status word + // fault_enum: map a fault-code register value to a fault + text + namespace fd = ros2_medkit::fault_detection; + const int detection_modes = + (n["alarm"] ? 1 : 0) + (n["status_bits"] ? 1 : 0) + (n["fault_enum"] ? 1 : 0); + if (detection_modes > 1) { + RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), + "Entry node_id=%s declares more than one of alarm/status_bits/fault_enum - " + "these detection modes are mutually exclusive", + entry.node_id_str.c_str()); + return false; + } + if (n["alarm"]) { AlarmConfig alarm; alarm.fault_code = n["alarm"]["fault_code"].as(); @@ -297,7 +313,53 @@ bool NodeMap::load(const std::string & yaml_path) { alarm.message = n["alarm"]["message"].as(alarm.fault_code); alarm.threshold = n["alarm"]["threshold"].as(0.0); alarm.above_threshold = n["alarm"]["above_threshold"].as(true); + + fd::ThresholdRule rule; + rule.fault = {alarm.fault_code, alarm.severity, alarm.message}; + rule.threshold = alarm.threshold; + rule.above = alarm.above_threshold; + entry.detection = std::move(rule); entry.alarm = std::move(alarm); + } else if (n["status_bits"]) { + fd::StatusWordRule rule; + for (const auto & b : n["status_bits"]) { + if (!b["bit"] || !b["fault_code"]) { + RCLCPP_WARN(rclcpp::get_logger("opcua.node_map"), + "status_bits entry on node_id=%s missing bit/fault_code - skipping", + entry.node_id_str.c_str()); + continue; + } + fd::BitRule br; + br.bit = b["bit"].as(); + br.fault = {b["fault_code"].as(), b["severity"].as("ERROR"), + b["message"].as(b["fault_code"].as())}; + rule.bits.push_back(std::move(br)); + } + if (!rule.bits.empty()) { + entry.detection = std::move(rule); + } + } else if (n["fault_enum"]) { + fd::EnumMapRule rule; + rule.ok_value = n["fault_enum"]["ok_value"].as(0); + const auto codes = n["fault_enum"]["codes"]; + if (codes && codes.IsSequence()) { + for (const auto & c : codes) { + if (!c["code"] || !c["fault_code"]) { + RCLCPP_WARN(rclcpp::get_logger("opcua.node_map"), + "fault_enum entry on node_id=%s missing code/fault_code - skipping", + entry.node_id_str.c_str()); + continue; + } + fd::EnumRule er; + er.code = c["code"].as(); + er.fault = {c["fault_code"].as(), c["severity"].as("ERROR"), + c["message"].as(c["fault_code"].as())}; + rule.codes.push_back(std::move(er)); + } + } + if (!rule.codes.empty()) { + entry.detection = std::move(rule); + } } size_t idx = entries_.size(); @@ -450,6 +512,16 @@ std::vector NodeMap::alarm_entries() const { return result; } +std::vector NodeMap::detection_entries() const { + std::vector result; + for (const auto & entry : entries_) { + if (entry.detection.has_value()) { + result.push_back(&entry); + } + } + return result; +} + void NodeMap::build_entity_defs() { entity_defs_.clear(); @@ -466,7 +538,7 @@ void NodeMap::build_entity_defs() { if (entry.writable) { def.writable_names.push_back(entry.data_name); } - if (entry.alarm.has_value()) { + if (entry.detection.has_value()) { def.has_faults = true; } } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp index e4960df78..3ebfd7e37 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp @@ -188,9 +188,10 @@ void OpcuaPlugin::set_context(PluginContext & context) { } poller_ = std::make_unique(*client_, node_map_); - poller_->set_alarm_callback([this](const std::string & code, const AlarmConfig & cfg, bool active) { - on_alarm_change(code, cfg, active); - }); + poller_->set_alarm_callback( + [this](const std::string & entity_id, const ros2_medkit::fault_detection::FaultSignal & signal) { + on_alarm_change(entity_id, signal); + }); poller_->set_event_alarm_callback([this](const AlarmEventDelivery & delivery) { on_event_alarm(delivery); }); @@ -491,29 +492,18 @@ void OpcuaPlugin::handle_plc_status(const PluginRequest & req, PluginResponse & // -- Fault bridge -- -void OpcuaPlugin::on_alarm_change(const std::string & fault_code, const AlarmConfig & config, bool active) { +void OpcuaPlugin::on_alarm_change(const std::string & entity_id, + const ros2_medkit::fault_detection::FaultSignal & signal) { if (shutdown_requested_.load()) { return; } - std::string entity_id; - for (const auto & entry : node_map_.entries()) { - if (entry.alarm.has_value() && entry.alarm->fault_code == fault_code) { - entity_id = entry.entity_id; - break; - } - } - - if (entity_id.empty()) { - log_warn("Alarm " + fault_code + " has no entity mapping"); - return; - } - if (active) { - log_info("Alarm activated: " + fault_code + " on " + entity_id); - send_report_fault(entity_id, fault_code, config.severity, config.message); + if (signal.active) { + log_info("Alarm activated: " + signal.fault_code + " on " + entity_id); + send_report_fault(entity_id, signal.fault_code, signal.severity, signal.message); } else { - log_info("Alarm cleared: " + fault_code + " on " + entity_id); - send_clear_fault(fault_code); + log_info("Alarm cleared: " + signal.fault_code + " on " + entity_id); + send_clear_fault(signal.fault_code); } } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp index c4b7213e6..00cf25285 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp @@ -582,53 +582,56 @@ void OpcuaPoller::do_poll() { } void OpcuaPoller::evaluate_alarms() { - auto alarm_entries = node_map_.alarm_entries(); - if (alarm_entries.empty()) { + namespace fd = ros2_medkit::fault_detection; + + auto detection_entries = node_map_.detection_entries(); + if (detection_entries.empty()) { return; } - // Collect state changes while holding snapshot mutex - struct AlarmChange { - std::string fault_code; - AlarmConfig config; - bool active; + // Translate a polled OPC-UA value into the shared, protocol-agnostic value + // type understood by the fault-detection evaluator. + auto to_fd_value = [](const OpcuaValue & v) -> fd::Value { + return std::visit( + [](auto && val) -> fd::Value { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return fd::Value{val}; + } else if constexpr (std::is_same_v) { + return fd::Value{val}; + } else if constexpr (std::is_floating_point_v) { + return fd::Value{static_cast(val)}; + } else { + return fd::Value{static_cast(val)}; + } + }, + v); + }; + + // Collect raise/clear edges while holding the snapshot mutex. + struct FaultChange { + std::string entity_id; + fd::FaultSignal signal; }; - std::vector changes; + std::vector changes; { std::lock_guard snap_lock(snapshot_mutex_); - for (const auto * entry : alarm_entries) { + for (const auto * entry : detection_entries) { auto it = snapshot_.values.find(entry->node_id_str); if (it == snapshot_.values.end()) { continue; } - const auto & alarm = *entry->alarm; - bool active = false; - - std::visit( - [&active, &alarm](auto && val) { - using T = std::decay_t; - if constexpr (std::is_same_v) { - active = val; - } else if constexpr (std::is_arithmetic_v) { - double dval = static_cast(val); - if (alarm.above_threshold) { - active = dval > alarm.threshold; - } else { - active = dval < alarm.threshold; - } - } - }, - it->second); - - auto & prev_state = alarm_states_[alarm.fault_code]; - snapshot_.alarms[alarm.fault_code] = active; - - if (active != prev_state) { - prev_state = active; - changes.push_back({alarm.fault_code, alarm, active}); + auto signals = fd::evaluate(to_fd_value(it->second), *entry->detection); + for (const auto & s : signals) { + snapshot_.alarms[s.fault_code] = s.active; + } + + // Edge-detect raise/clear; only transitions reach the callback. + for (auto & edge : alarm_tracker_.apply(signals)) { + changes.push_back({entry->entity_id, std::move(edge)}); } } } // snapshot_mutex_ released @@ -638,7 +641,7 @@ void OpcuaPoller::evaluate_alarms() { std::lock_guard alarm_lock(alarm_mutex_); if (alarm_callback_) { for (const auto & c : changes) { - alarm_callback_(c.fault_code, c.config, c.active); + alarm_callback_(c.entity_id, c.signal); } } } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp index 218da4843..91f3f8d4d 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp @@ -605,4 +605,146 @@ component_id: test EXPECT_FALSE(map.load(path)); } +// -- Shared fault-detection node-map wiring (issue #481) --------------------- + +namespace fd = ros2_medkit::fault_detection; + +TEST_F(NodeMapTest, ThresholdAlarmLoweredToDetectionRule) { + std::string path = "/tmp/test_node_map_threshold_detect.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: ent1 + data_name: temp + alarm: + fault_code: HIGH_TEMP + severity: ERROR + message: "Over temp" + threshold: 80.0 + above_threshold: true +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(det.has_value()); + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + EXPECT_EQ(r.fault.fault_code, "HIGH_TEMP"); + EXPECT_DOUBLE_EQ(r.threshold, 80.0); + EXPECT_TRUE(r.above); +} + +TEST_F(NodeMapTest, StatusBitsParsedIntoDetection) { + std::string path = "/tmp/test_node_map_status_bits.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=StatusWord" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 3 + fault_code: PUMP_OVERLOAD + severity: ERROR + message: "Pump overload" + - bit: 7 + fault_code: FILTER_DIRTY + severity: WARNING +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(det.has_value()); + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + ASSERT_EQ(r.bits.size(), 2u); + EXPECT_EQ(r.bits[0].bit, 3u); + EXPECT_EQ(r.bits[0].fault.fault_code, "PUMP_OVERLOAD"); + EXPECT_EQ(r.bits[1].bit, 7u); + // Message defaults to the fault code when omitted. + EXPECT_EQ(r.bits[1].fault.message, "FILTER_DIRTY"); + + auto signals = fd::evaluate(fd::Value{static_cast(0b1000)}, *det); + ASSERT_EQ(signals.size(), 2u); + EXPECT_TRUE(signals[0].active); // bit 3 set + EXPECT_FALSE(signals[1].active); // bit 7 clear +} + +TEST_F(NodeMapTest, FaultEnumParsedIntoDetection) { + std::string path = "/tmp/test_node_map_fault_enum.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=FaultCode" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + ok_value: 0 + codes: + - code: 10 + fault_code: VFD_OVERVOLTAGE + severity: ERROR + message: "DC bus overvoltage" + - code: 11 + fault_code: VFD_OVERCURRENT + severity: ERROR +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(det.has_value()); + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + EXPECT_EQ(r.ok_value, 0); + ASSERT_EQ(r.codes.size(), 2u); + EXPECT_EQ(r.codes[0].code, 10); + EXPECT_EQ(r.codes[0].fault.message, "DC bus overvoltage"); + + auto signals = fd::evaluate(fd::Value{static_cast(10)}, *det); + ASSERT_EQ(signals.size(), 2u); + EXPECT_TRUE(signals[0].active); // code 10 + EXPECT_FALSE(signals[1].active); // code 11 +} + +TEST_F(NodeMapTest, RejectsMultipleDetectionModesOnOneNode) { + std::string path = "/tmp/test_node_map_multi_mode.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: ent1 + data_name: val1 + alarm: + fault_code: A + threshold: 1.0 + status_bits: + - bit: 0 + fault_code: B +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + } // namespace ros2_medkit_gateway From 9e70c9f945f079d4cc7c3666861d7dee4942a336 Mon Sep 17 00:00:00 2001 From: mfaferek93 Date: Tue, 30 Jun 2026 16:36:34 +0000 Subject: [PATCH 2/8] style: apply clang-format-18 --- .../include/ros2_medkit_opcua/opcua_plugin.hpp | 3 +-- .../include/ros2_medkit_opcua/opcua_poller.hpp | 4 ++-- src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp | 6 ++---- .../ros2_medkit_opcua/src/opcua_plugin.cpp | 2 +- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp index 1a20d154d..91f03f568 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp @@ -113,8 +113,7 @@ class OpcuaPlugin : public ros2_medkit_gateway::GatewayPlugin, void handle_plc_status(const ros2_medkit_gateway::PluginRequest & req, ros2_medkit_gateway::PluginResponse & res); // Fault-detection signal (threshold / status-bit / enum) -> Fault bridge - void on_alarm_change(const std::string & entity_id, - const ros2_medkit::fault_detection::FaultSignal & signal); + void on_alarm_change(const std::string & entity_id, const ros2_medkit::fault_detection::FaultSignal & signal); // Issue #386: native AlarmConditionType event lifecycle bridge. void on_event_alarm(const AlarmEventDelivery & delivery); diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp index e716f3b20..82c4738b0 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp @@ -46,8 +46,8 @@ struct PollSnapshot { /// Fires once per raise/clear edge with the hosting entity and the shared /// ``FaultSignal`` (fault code, severity, message, active) produced by the /// shared evaluator. Threshold, status-bit and enum modes all use this path. -using AlarmChangeCallback = std::function; +using AlarmChangeCallback = + std::function; /// Callback when a native OPC-UA AlarmCondition lifecycle transitions to a /// new SOVD status (issue #386). Fires at most once per logical transition; diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp index 07cfe99ac..f550023da 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp @@ -296,8 +296,7 @@ bool NodeMap::load(const std::string & yaml_path) { // status_bits: decode named bits of an integer status word // fault_enum: map a fault-code register value to a fault + text namespace fd = ros2_medkit::fault_detection; - const int detection_modes = - (n["alarm"] ? 1 : 0) + (n["status_bits"] ? 1 : 0) + (n["fault_enum"] ? 1 : 0); + const int detection_modes = (n["alarm"] ? 1 : 0) + (n["status_bits"] ? 1 : 0) + (n["fault_enum"] ? 1 : 0); if (detection_modes > 1) { RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), "Entry node_id=%s declares more than one of alarm/status_bits/fault_enum - " @@ -325,8 +324,7 @@ bool NodeMap::load(const std::string & yaml_path) { for (const auto & b : n["status_bits"]) { if (!b["bit"] || !b["fault_code"]) { RCLCPP_WARN(rclcpp::get_logger("opcua.node_map"), - "status_bits entry on node_id=%s missing bit/fault_code - skipping", - entry.node_id_str.c_str()); + "status_bits entry on node_id=%s missing bit/fault_code - skipping", entry.node_id_str.c_str()); continue; } fd::BitRule br; diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp index 3ebfd7e37..3ccef8733 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp @@ -493,7 +493,7 @@ void OpcuaPlugin::handle_plc_status(const PluginRequest & req, PluginResponse & // -- Fault bridge -- void OpcuaPlugin::on_alarm_change(const std::string & entity_id, - const ros2_medkit::fault_detection::FaultSignal & signal) { + const ros2_medkit::fault_detection::FaultSignal & signal) { if (shutdown_requested_.load()) { return; } From 05f6588663be24ad9d7bf66170e0c57c3c22b28a Mon Sep 17 00:00:00 2001 From: mfaferek93 Date: Tue, 30 Jun 2026 17:14:31 +0000 Subject: [PATCH 3/8] Enforce global fault-code uniqueness in OPC UA node map The shared FaultTransitionTracker is keyed by fault_code alone, so two detection entries emitting the same code flapped raise/clear every poll cycle. Reject duplicate detection codes at load and extend the event_alarms collision check to status_bits/fault_enum. Decode uint64_t status registers and warn on bit >= 64; README notes unsigned status words. Refs #481 --- .../fault_detection.hpp | 6 + .../ros2_medkit_opcua/README.md | 9 + .../ros2_medkit_opcua/src/node_map.cpp | 90 ++++- .../ros2_medkit_opcua/src/opcua_client.cpp | 8 + .../ros2_medkit_opcua/test/test_node_map.cpp | 345 ++++++++++++++++++ 5 files changed, 445 insertions(+), 13 deletions(-) diff --git a/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp b/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp index 0fbd6f862..3e7a17d69 100644 --- a/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp +++ b/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp @@ -211,6 +211,12 @@ inline std::vector evaluate(const Value & value, const DetectionRul /// clear transitions), which a plugin forwards to the fault manager as /// report / clear. Keyed by ``fault_code``, so it tracks threshold, bit and /// enum faults uniformly. +/// +/// Contract: a single tracker may be shared across many points only if every +/// ``fault_code`` is globally unique. Because the key is the code alone, two +/// points emitting the same code would alternately raise and clear it each +/// cycle. Consumers that share one tracker (e.g. the OPC UA poller across all +/// node-map entries) must enforce that uniqueness at config-load time. class FaultTransitionTracker { public: std::vector apply(const std::vector & signals) { diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md index 6e34dd8a5..83ccf99a6 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md @@ -210,6 +210,15 @@ nodes: # The three modes (`alarm`, `status_bits`, `fault_enum`) are evaluated by the # shared `ros2_medkit_fault_detection` module so every protocol plugin maps raw # values to faults identically. They are mutually exclusive on a single node. +# +# Status words: expose the register as an UNSIGNED OPC-UA type (Byte / UInt16 / +# UInt32 / UInt64). A signed type (Int16/Int32) whose top bit is set is +# sign-extended when widened to the 64-bit decode register, which sets every +# bit above the register width and raises spurious bit faults. `bit:` positions +# are 0-based and must be < 64; higher positions are dead config and rejected at +# load. Fault codes must be globally unique across all `alarm` / `status_bits` / +# `fault_enum` entries (the loader rejects duplicates); reusing a code on two +# nodes makes its fault flap. # Native OPC-UA AlarmConditionType events (issue #386). Subscribes to alarms # defined inside the PLC (Siemens Program_Alarm / ProDiag, Beckhoff TF6100, diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp index f550023da..98271d63a 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp @@ -25,7 +25,9 @@ #include #include #include +#include #include +#include #include #include @@ -329,6 +331,15 @@ bool NodeMap::load(const std::string & yaml_path) { } fd::BitRule br; br.bit = b["bit"].as(); + if (br.bit >= 64) { + // The status word is decoded as a 64-bit register, so bits at or + // above 64 can never be set and the rule would be dead config. + RCLCPP_WARN(rclcpp::get_logger("opcua.node_map"), + "status_bits entry on node_id=%s has bit=%u >= 64 (max status-word width); " + "this fault can never fire - skipping", + entry.node_id_str.c_str(), br.bit); + continue; + } br.fault = {b["fault_code"].as(), b["severity"].as("ERROR"), b["message"].as(b["fault_code"].as())}; rule.bits.push_back(std::move(br)); @@ -413,25 +424,78 @@ bool NodeMap::load(const std::string & yaml_path) { } } - // Cross-section collision check (bburda review on PR #387). The two - // alarm pipelines - threshold polling under ``nodes[*].alarm`` and - // native event subscription under ``event_alarms`` - must not address - // the same ``(entity_id, fault_code)``: fault_manager would receive - // both ``report_fault`` calls and the resulting status flapping is - // impossible to debug at runtime. The two paths produce different - // semantics (debounced vs state-machine driven) so the merge is not - // even well-defined. + // Fault-code collision validation. Two distinct concerns are checked here + // against the full set of fault codes the polled detection entries can + // emit (threshold + every status_bits bit + every fault_enum code), not + // just ``nodes[*].alarm`` as the original check did. + // + // 1. Global uniqueness of every emitted detection fault_code. The + // poller shares one ``FaultTransitionTracker`` keyed by fault_code + // alone, and clears are global by code, but evaluation runs + // per-entry. Two detection entries emitting the same fault_code + // (even on different entities) would therefore flap raise+clear + // every poll cycle. Reject duplicates so the intent - one code, one + // source - is documented and enforced at load. + // + // 2. Cross-pipeline collision with native ``event_alarms`` on the same + // ``(entity_id, fault_code)`` (bburda review on PR #387). The + // threshold/bit/enum polling path and the AlarmCondition + // subscription path produce different semantics (debounced vs + // state-machine driven), so fault_manager receiving both a polled + // report and a state-machine report for one SOVD fault flaps and is + // impossible to debug at runtime. { + namespace fd = ros2_medkit::fault_detection; + // (entity_id, fault_code) for every fault a detection entry can emit. + std::vector> detection_faults; + for (const auto & entry : entries_) { + if (!entry.detection.has_value()) { + continue; + } + std::visit( + [&](const auto & rule) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + detection_faults.emplace_back(entry.entity_id, rule.fault.fault_code); + } else if constexpr (std::is_same_v) { + for (const auto & b : rule.bits) { + detection_faults.emplace_back(entry.entity_id, b.fault.fault_code); + } + } else if constexpr (std::is_same_v) { + for (const auto & e : rule.codes) { + detection_faults.emplace_back(entry.entity_id, e.fault.fault_code); + } + } + }, + *entry.detection); + } + + // 1. Global uniqueness by fault_code across all detection entries. + std::set seen_codes; + for (const auto & fault : detection_faults) { + if (!seen_codes.insert(fault.second).second) { + RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), + "fault_code '%s' is emitted by more than one detection entry " + "(threshold/status_bits/fault_enum); fault codes must be globally unique because " + "the shared fault tracker is keyed by code alone (a collision flaps raise/clear " + "every poll cycle) - rename one of them", + fault.second.c_str()); + return false; + } + } + + // 2. Cross-pipeline collision with event_alarms by (entity_id, fault_code). std::set> event_keys; for (const auto & cfg : event_alarms_) { event_keys.emplace(cfg.entity_id, cfg.fault_code); } - for (const auto & entry : entries_) { - if (entry.alarm.has_value() && event_keys.count({entry.entity_id, entry.alarm->fault_code}) > 0) { + for (const auto & [entity_id, code] : detection_faults) { + if (event_keys.count({entity_id, code}) > 0) { RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), - "Duplicate (entity_id=%s, fault_code=%s) declared by both nodes[*].alarm " - "(threshold mode) and event_alarms[*] (subscription mode) - mutually exclusive", - entry.entity_id.c_str(), entry.alarm->fault_code.c_str()); + "Duplicate (entity_id=%s, fault_code=%s) declared by both a polled detection entry " + "(threshold/status_bits/fault_enum) and event_alarms[*] (subscription mode) - " + "mutually exclusive", + entity_id.c_str(), code.c_str()); return false; } } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp index f26a13e50..e3f35eea0 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp @@ -122,6 +122,14 @@ OpcuaValue variant_to_value(const opcua::Variant & var) { if (var.isType()) { return var.getScalarCopy(); } + if (var.isType()) { + // OpcuaValue has no unsigned 64-bit slot; reinterpret the bit pattern as + // int64_t. C++20 mandates two's complement, so the bits are preserved for + // status-word decode and for fault-enum codes that fit in int64_t. Values + // above INT64_MAX wrap to negative when compared numerically (enum mode); + // status-word bit decode is unaffected because it masks the raw bits. + return static_cast(var.getScalarCopy()); + } if (var.isType()) { return var.getScalarCopy(); } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp index 91f3f8d4d..d790f9fca 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp @@ -16,8 +16,11 @@ #include +#include #include #include +#include +#include namespace ros2_medkit_gateway { @@ -724,6 +727,348 @@ component_id: test EXPECT_FALSE(signals[1].active); // code 11 } +TEST_F(NodeMapTest, RejectsDuplicateDetectionFaultCode) { + // The poller shares one FaultTransitionTracker keyed by fault_code alone, so + // two detection entries emitting the same code (here on different entities) + // would flap raise/clear every cycle. The loader must reject the whole file + // at load time. (issue #481) + std::string path = "/tmp/test_node_map_dup_fault_code.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: ent_a + data_name: temp_a + alarm: + fault_code: SHARED_CODE + threshold: 80.0 + - node_id: "ns=1;i=2" + entity_id: ent_b + data_name: temp_b + alarm: + fault_code: SHARED_CODE + threshold: 90.0 +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + +TEST_F(NodeMapTest, RejectsDuplicateCodeAcrossStatusBitsAndEnum) { + // Uniqueness spans every emitted code, not just threshold alarms: a bit code + // colliding with an enum code on another node is still a shared-tracker + // collision. (issue #481) + std::string path = "/tmp/test_node_map_dup_mixed.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 2 + fault_code: COLLIDE + - node_id: "ns=1;s=Fault" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + codes: + - code: 7 + fault_code: COLLIDE +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + +TEST_F(NodeMapTest, RejectsStatusBitsCollidingWithEventAlarm) { + // The author's original collision check only looked at nodes[*].alarm; a + // status_bits (or fault_enum) code colliding with an event_alarms + // (entity_id, fault_code) must be rejected too, otherwise fault_manager gets + // both a polled report and a state-machine report for one SOVD fault. + // (issue #481) + std::string path = "/tmp/test_node_map_bits_event_collision.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: tank_process + data_name: status_word + data_type: int + status_bits: + - bit: 1 + fault_code: PLC_OVERPRESSURE +event_alarms: + - alarm_source: "ns=2;s=Alarms.Overpressure" + entity_id: tank_process + fault_code: PLC_OVERPRESSURE +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + +TEST_F(NodeMapTest, AcceptsDistinctCodesAcrossDetectionModes) { + // The uniqueness rule must not false-positive on genuinely distinct codes + // spread across threshold / status_bits / fault_enum entries. + std::string path = "/tmp/test_node_map_distinct_modes.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: tank + data_name: temp + alarm: + fault_code: HIGH_TEMP + threshold: 80.0 + - node_id: "ns=1;s=Status" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 3 + fault_code: PUMP_OVERLOAD + - bit: 7 + fault_code: FILTER_DIRTY + - node_id: "ns=1;s=Fault" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + codes: + - code: 10 + fault_code: VFD_OVERVOLTAGE +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + EXPECT_EQ(map.detection_entries().size(), 3u); +} + +TEST_F(NodeMapTest, StatusBitOutOfRangeSkipped) { + // A bit position >= 64 can never be set in the 64-bit decode register, so it + // is dead config: warn and drop the bit, keep the rest. (issue #481) + std::string path = "/tmp/test_node_map_bit_oob.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 64 + fault_code: NEVER_FIRES + - bit: 3 + fault_code: PUMP_OVERLOAD +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + ASSERT_EQ(r.bits.size(), 1u); + EXPECT_EQ(r.bits[0].bit, 3u); + EXPECT_EQ(r.bits[0].fault.fault_code, "PUMP_OVERLOAD"); +} + +// -- Poller-level shared-tracker integration (issue #481) -------------------- +// +// Exercises the exact loop OpcuaPoller::evaluate_alarms runs: one shared +// FaultTransitionTracker, iterating detection entries built by the real +// NodeMap loader and evaluated by the shared fault_detection evaluator. Proves +// there is no cross-entry flapping and that status-word bits transition +// independently. Driven without a live OPC UA server by feeding synthetic +// values keyed by node_id, mirroring the poller's snapshot. + +namespace { + +struct DetectionChange { + std::string entity_id; + fd::FaultSignal signal; +}; + +// Mirror of OpcuaPoller::evaluate_alarms: for each detection entry, look up its +// value, evaluate, and push the value through the SHARED tracker; return the +// raise/clear edges. Entries whose node has no value yet are skipped. +std::vector evaluate_cycle(const NodeMap & map, fd::FaultTransitionTracker & tracker, + const std::unordered_map & values) { + std::vector changes; + for (const auto * entry : map.detection_entries()) { + auto it = values.find(entry->node_id_str); + if (it == values.end()) { + continue; + } + auto signals = fd::evaluate(it->second, *entry->detection); + for (auto & edge : tracker.apply(signals)) { + changes.push_back({entry->entity_id, std::move(edge)}); + } + } + return changes; +} + +const DetectionChange * find_change(const std::vector & v, const std::string & code) { + for (const auto & c : v) { + if (c.signal.fault_code == code) { + return &c; + } + } + return nullptr; +} + +} // namespace + +TEST_F(NodeMapTest, PollerNoCrossEntryFlapping) { + std::string path = "/tmp/test_node_map_poller_two_entries.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=TempA" + entity_id: ent_a + data_name: temp_a + data_type: float + alarm: + fault_code: A_HIGH + threshold: 80.0 + - node_id: "ns=1;s=TempB" + entity_id: ent_b + data_name: temp_b + data_type: float + alarm: + fault_code: B_HIGH + threshold: 80.0 +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 2u); + + fd::FaultTransitionTracker tracker; + std::unordered_map values; + + // Both healthy: no transitions. + values["ns=1;s=TempA"] = fd::Value{70.0}; + values["ns=1;s=TempB"] = fd::Value{70.0}; + EXPECT_TRUE(evaluate_cycle(map, tracker, values).empty()); + + // A crosses high, B stays healthy: exactly one raise for A. + values["ns=1;s=TempA"] = fd::Value{90.0}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + EXPECT_EQ(changes[0].entity_id, "ent_a"); + EXPECT_EQ(changes[0].signal.fault_code, "A_HIGH"); + EXPECT_TRUE(changes[0].signal.active); + } + + // Steady state with A still high: the shared tracker must NOT clear B (which + // it never raised) nor re-emit A. This is the regression the fix guards. + for (int i = 0; i < 5; ++i) { + EXPECT_TRUE(evaluate_cycle(map, tracker, values).empty()); + } + + // Now B crosses high too: exactly one raise for B, A untouched. + values["ns=1;s=TempB"] = fd::Value{95.0}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + EXPECT_EQ(changes[0].entity_id, "ent_b"); + EXPECT_EQ(changes[0].signal.fault_code, "B_HIGH"); + EXPECT_TRUE(changes[0].signal.active); + } + + // A clears, B stays high: exactly one clear for A. + values["ns=1;s=TempA"] = fd::Value{60.0}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + EXPECT_EQ(changes[0].signal.fault_code, "A_HIGH"); + EXPECT_FALSE(changes[0].signal.active); + } +} + +TEST_F(NodeMapTest, PollerStatusWordBitsTransitionIndependently) { + std::string path = "/tmp/test_node_map_poller_status_word.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=StatusWord" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 0 + fault_code: PUMP_OVERLOAD + - bit: 3 + fault_code: FILTER_DIRTY +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + + fd::FaultTransitionTracker tracker; + std::unordered_map values; + + // All clear. + values["ns=1;s=StatusWord"] = fd::Value{static_cast(0)}; + EXPECT_TRUE(evaluate_cycle(map, tracker, values).empty()); + + // Bit 0 set only: raise PUMP_OVERLOAD, FILTER_DIRTY stays clear. + values["ns=1;s=StatusWord"] = fd::Value{static_cast(0b0001)}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + const auto * overload = find_change(changes, "PUMP_OVERLOAD"); + ASSERT_NE(overload, nullptr); + EXPECT_TRUE(overload->signal.active); + } + + // Bit 3 also set: raise FILTER_DIRTY only, PUMP_OVERLOAD stays raised. + values["ns=1;s=StatusWord"] = fd::Value{static_cast(0b1001)}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + const auto * dirty = find_change(changes, "FILTER_DIRTY"); + ASSERT_NE(dirty, nullptr); + EXPECT_TRUE(dirty->signal.active); + } + + // Bit 0 clears, bit 3 stays: clear PUMP_OVERLOAD only. + values["ns=1;s=StatusWord"] = fd::Value{static_cast(0b1000)}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + const auto * overload = find_change(changes, "PUMP_OVERLOAD"); + ASSERT_NE(overload, nullptr); + EXPECT_FALSE(overload->signal.active); + } +} + TEST_F(NodeMapTest, RejectsMultipleDetectionModesOnOneNode) { std::string path = "/tmp/test_node_map_multi_mode.yaml"; std::ofstream f(path); From 7e9ea8c4f6a2bbf631771a87c2f8736573fe32d1 Mon Sep 17 00:00:00 2001 From: mfaferek93 Date: Tue, 30 Jun 2026 19:47:49 +0200 Subject: [PATCH 4/8] Fix duplicate clang_format lint test and review nits Exclude ament_cmake_clang_format from ament_lint_auto so the explicit ament_clang_format() hook is the only clang_format test (was registered twice, failing configure), and fix its CONFIG_FILE path to the repo root. Add include, make the uint64->int64 reinterpret well-defined via memcpy under C++17, and correct the out-of-range bit README wording. Refs #481 --- src/ros2_medkit_fault_detection/CMakeLists.txt | 7 +++++-- .../fault_detection.hpp | 1 + .../ros2_medkit_opcua/README.md | 5 +++-- .../ros2_medkit_opcua/src/opcua_client.cpp | 14 +++++++++----- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/src/ros2_medkit_fault_detection/CMakeLists.txt b/src/ros2_medkit_fault_detection/CMakeLists.txt index 8d0cbb089..e094c9f3b 100644 --- a/src/ros2_medkit_fault_detection/CMakeLists.txt +++ b/src/ros2_medkit_fault_detection/CMakeLists.txt @@ -44,17 +44,20 @@ ament_export_include_directories(include) if(BUILD_TESTING) find_package(ament_lint_auto REQUIRED) + # clang_format is registered explicitly below; excluding it here prevents + # ament_lint_auto from registering a second clang_format test. list(APPEND AMENT_LINT_AUTO_EXCLUDE ament_cmake_uncrustify ament_cmake_cpplint - ament_cmake_clang_tidy) + ament_cmake_clang_tidy + ament_cmake_clang_format) ament_lint_auto_find_test_dependencies() find_package(ament_cmake_clang_format QUIET) if(ament_cmake_clang_format_FOUND) file(GLOB_RECURSE _format_files "include/*.hpp" "test/*.cpp") ament_clang_format(${_format_files} - CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../.clang-format") + CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../../.clang-format") endif() find_package(ament_cmake_gtest REQUIRED) diff --git a/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp b/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp index 3e7a17d69..ffc73afec 100644 --- a/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp +++ b/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md index 83ccf99a6..095243ae4 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md @@ -215,8 +215,9 @@ nodes: # UInt32 / UInt64). A signed type (Int16/Int32) whose top bit is set is # sign-extended when widened to the 64-bit decode register, which sets every # bit above the register width and raises spurious bit faults. `bit:` positions -# are 0-based and must be < 64; higher positions are dead config and rejected at -# load. Fault codes must be globally unique across all `alarm` / `status_bits` / +# are 0-based and must be < 64; higher positions are dead config, so the loader +# logs a warning and skips that bit rule while still loading the rest of the +# config. Fault codes must be globally unique across all `alarm` / `status_bits` / # `fault_enum` entries (the loader rejects duplicates); reusing a code on two # nodes makes its fault flap. diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp index e3f35eea0..e0768c59d 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp @@ -124,11 +124,15 @@ OpcuaValue variant_to_value(const opcua::Variant & var) { } if (var.isType()) { // OpcuaValue has no unsigned 64-bit slot; reinterpret the bit pattern as - // int64_t. C++20 mandates two's complement, so the bits are preserved for - // status-word decode and for fault-enum codes that fit in int64_t. Values - // above INT64_MAX wrap to negative when compared numerically (enum mode); - // status-word bit decode is unaffected because it masks the raw bits. - return static_cast(var.getScalarCopy()); + // int64_t. A plain static_cast of a value above INT64_MAX is only + // implementation-defined under C++17, so copy the bits via memcpy to make + // the reinterpretation well-defined. The raw bits are preserved for + // status-word bit decode (which masks them) and for fault-enum codes that + // fit in int64_t; codes above INT64_MAX read as negative in enum mode. + const uint64_t raw = var.getScalarCopy(); + int64_t signed_bits; + std::memcpy(&signed_bits, &raw, sizeof(signed_bits)); + return signed_bits; } if (var.isType()) { return var.getScalarCopy(); From f844aa9242eb24b7f51f51e3273ba7a8ce3eadf4 Mon Sep 17 00:00:00 2001 From: mfaferek93 Date: Tue, 30 Jun 2026 22:28:16 +0200 Subject: [PATCH 5/8] test: drive status_bits and fault_enum through live OPC UA E2E Extend test_alarm_server with writable StatusWord/FaultCode Int32 registers and a set command, and add poll-path bit-decode plus enum-transition scenarios to run_alarm_tests.sh asserting raise/clear via SOVD /api/v1/faults. Refs #481 --- .../docker/scripts/run_alarm_tests.sh | 51 ++++++++++++++++- .../test_alarm_server/test_alarm_server.cpp | 57 ++++++++++++++++++- 2 files changed, 105 insertions(+), 3 deletions(-) diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/scripts/run_alarm_tests.sh b/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/scripts/run_alarm_tests.sh index b237069f1..e8f09b6d1 100755 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/scripts/run_alarm_tests.sh +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/scripts/run_alarm_tests.sh @@ -227,7 +227,34 @@ mkdir -p /tmp/alarm_test_config cat >/tmp/alarm_test_config/alarm_nodes.yaml < `` CLI command. Fault codes are globally unique vs the +# event_alarms codes so the shared fault tracker never flaps. +nodes: + - node_id: "ns=2;s=StatusWord" + entity_id: tank_process + data_name: status_word + data_type: int + status_bits: + - bit: 3 + fault_code: PLC_PUMP_OVERLOAD + severity: ERROR + message: "Pump overload" + - node_id: "ns=2;s=FaultCode" + entity_id: tank_process + data_name: fault_code + data_type: int + fault_enum: + ok_value: 0 + codes: + - code: 10 + fault_code: PLC_VFD_OVERVOLTAGE + severity: ERROR + - code: 11 + fault_code: PLC_VFD_OVERCURRENT + severity: ERROR event_alarms: - alarm_source: "ns=2;s=Alarms.Overpressure" entity_id: tank_process @@ -353,6 +380,28 @@ echo "fire SensorLost 900" >&3 wait_until_status PLC_SENSOR_LOST CONFIRMED 30 echo " OK PLC_SENSOR_LOST re-armed after Enable" +echo " [scenario] status-word bit decode (poll path)" +# bit 3 set => value 8 raises PLC_PUMP_OVERLOAD; clearing the word clears it. +echo "set StatusWord 8" >&3 +wait_until_status PLC_PUMP_OVERLOAD CONFIRMED 30 +echo "set StatusWord 0" >&3 +wait_no_fault PLC_PUMP_OVERLOAD 30 +echo " OK PLC_PUMP_OVERLOAD raised on bit3 then cleared" + +echo " [scenario] fault-code enum decode (poll path)" +# code 11 => PLC_VFD_OVERCURRENT; transition to code 10 must raise +# PLC_VFD_OVERVOLTAGE and clear PLC_VFD_OVERCURRENT; ok_value clears both. +echo "set FaultCode 11" >&3 +wait_until_status PLC_VFD_OVERCURRENT CONFIRMED 30 +echo "set FaultCode 10" >&3 +wait_until_status PLC_VFD_OVERVOLTAGE CONFIRMED 30 +wait_no_fault PLC_VFD_OVERCURRENT 30 +echo " OK enum transition 11->10: PLC_VFD_OVERVOLTAGE raised, PLC_VFD_OVERCURRENT cleared" +echo "set FaultCode 0" >&3 +wait_no_fault PLC_VFD_OVERVOLTAGE 30 +wait_no_fault PLC_VFD_OVERCURRENT 30 +echo " OK both VFD faults cleared on ok_value" + echo " [scenario] reconnect re-subscribes after server restart" # This scenario does NOT verify ConditionRefresh re-emit - the test_alarm_server # is in-memory and loses condition state on restart, so the natural diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/fixtures/test_alarm_server/test_alarm_server.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/fixtures/test_alarm_server/test_alarm_server.cpp index 765394592..bf58e533a 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/fixtures/test_alarm_server/test_alarm_server.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/fixtures/test_alarm_server/test_alarm_server.cpp @@ -136,6 +136,27 @@ UA_StatusCode add_condition(UA_Server * server, const std::string & name, UA_UIn return rc; } +// Add a writable Int32 variable under ObjectsFolder in the user namespace with +// a predictable string NodeId ``ns;s=`` so the gateway's polled +// detection config (``node_id: "ns=2;s=StatusWord"``) can address it. Exercises +// the status_bits / fault_enum poll path that is independent of the +// AlarmConditionType event path above. +UA_StatusCode add_int32_variable(UA_Server * server, const std::string & name, UA_UInt16 ns) { + UA_VariableAttributes attr = UA_VariableAttributes_default; + UA_Int32 initial = 0; + UA_Variant_setScalar(&attr.value, &initial, &UA_TYPES[UA_TYPES_INT32]); + attr.dataType = UA_TYPES[UA_TYPES_INT32].typeId; + attr.accessLevel = UA_ACCESSLEVELMASK_READ | UA_ACCESSLEVELMASK_WRITE; + attr.displayName = UA_LOCALIZEDTEXT(const_cast("en"), const_cast(name.c_str())); + UA_NodeId requested = UA_NODEID_STRING_ALLOC(ns, name.c_str()); + UA_QualifiedName qname = UA_QUALIFIEDNAME(ns, const_cast(name.c_str())); + UA_StatusCode rc = UA_Server_addVariableNode( + server, requested, UA_NODEID_NUMERIC(0, UA_NS0ID_OBJECTSFOLDER), UA_NODEID_NUMERIC(0, UA_NS0ID_ORGANIZES), qname, + UA_NODEID_NUMERIC(0, UA_NS0ID_BASEDATAVARIABLETYPE), attr, nullptr, nullptr); + UA_NodeId_clear(&requested); + return rc; +} + UA_StatusCode set_two_state(UA_Server * server, const UA_NodeId & cond, const char * field, UA_Boolean value) { UA_Variant v; UA_Variant_setScalar(&v, &value, &UA_TYPES[UA_TYPES_BOOLEAN]); @@ -300,7 +321,7 @@ UA_StatusCode handle_enable(UA_Server * server, const Condition & c, bool enable return set_two_state(server, c.node, "EnabledState", enable); } -void cli_loop(UA_Server * server) { +void cli_loop(UA_Server * server, UA_UInt16 ns) { std::string line; while (g_running && std::getline(std::cin, line)) { std::istringstream iss(line); @@ -312,6 +333,28 @@ void cli_loop(UA_Server * server) { break; } std::lock_guard guard(g_mutex); + // ``set `` writes a polled Int32 variable (StatusWord / + // FaultCode). Handled before the condition lookup because these nodes are + // plain variables, not AlarmCondition instances in g_conditions. + if (cmd == "set") { + long val = 0; + if (!(iss >> val)) { + std::cout << "ERR set_missing_value:" << name << std::endl; + continue; + } + UA_Int32 v32 = static_cast(val); + UA_Variant var; + UA_Variant_setScalar(&var, &v32, &UA_TYPES[UA_TYPES_INT32]); + UA_NodeId target = UA_NODEID_STRING_ALLOC(ns, name.c_str()); + UA_StatusCode rc = UA_Server_writeValue(server, target, var); + UA_NodeId_clear(&target); + if (rc == UA_STATUSCODE_GOOD) { + std::cout << "OK " << name << "=" << val << std::endl; + } else { + std::cout << "ERR " << name << ":" << UA_StatusCode_name(rc) << std::endl; + } + continue; + } auto it = g_conditions.find(name); if (cmd != "quit" && it == g_conditions.end()) { std::cout << "ERR unknown_condition:" << name << std::endl; @@ -416,8 +459,18 @@ int main(int argc, char ** argv) { g_conditions[oh.name] = oh; g_conditions[sl.name] = sl; + // Polled Int32 registers for the status_bits / fault_enum detection path + // (independent of the AlarmConditionType events above). Driven via the + // ``set `` CLI command. + if (add_int32_variable(server, "StatusWord", ns) != UA_STATUSCODE_GOOD || + add_int32_variable(server, "FaultCode", ns) != UA_STATUSCODE_GOOD) { + UA_LOG_ERROR(UA_Log_Stdout, UA_LOGCATEGORY_USERLAND, "Failed to register detection variables"); + UA_Server_delete(server); + return 1; + } + std::cout << "READY port=" << port << " namespace=" << ns << std::endl; - std::thread cli(cli_loop, server); + std::thread cli(cli_loop, server, ns); UA_StatusCode rc = UA_Server_run(server, reinterpret_cast(&g_running)); g_running = false; From f202c527c1572f611a226b115cef9e9b91f876ba Mon Sep 17 00:00:00 2001 From: mfaferek93 Date: Tue, 30 Jun 2026 22:55:34 +0200 Subject: [PATCH 6/8] Hold tracker state on undecidable reads; label unmapped enum codes NaN/inf/uncoercible values now emit no signal, so a transition tracker holds a standing fault instead of a bad read clearing it. Enum decode adds a catch-all for unmapped non-ok codes, status-word decode an optional width mask, and node-map malformed scalars warn not default. Refs #481 --- .../fault_detection.hpp | 79 ++++-- .../test/test_fault_detection.cpp | 141 +++++++++++ .../include/ros2_medkit_opcua/node_map.hpp | 4 +- .../ros2_medkit_opcua/src/node_map.cpp | 180 ++++++++++++-- .../ros2_medkit_opcua/test/test_node_map.cpp | 233 +++++++++++++++++- 5 files changed, 593 insertions(+), 44 deletions(-) diff --git a/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp b/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp index ffc73afec..045ddcc69 100644 --- a/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp +++ b/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp @@ -71,6 +71,11 @@ struct BitRule { /// Status-word decode: one integer register, many named-bit faults. struct StatusWordRule { std::vector bits; + /// Source register width in bits. 0 = treat the decoded value as a full + /// 64-bit word (no masking). When set in [1, 63] the raw value is masked to + /// that width before decode, so a signed register read with its sign bit set + /// (which sign-extends into int64) does not light up spurious high bits. + unsigned width{0}; }; /// One entry of a fault-code enum register. @@ -83,6 +88,11 @@ struct EnumRule { struct EnumMapRule { std::vector codes; std::int64_t ok_value{0}; // value meaning "no fault" (no rule raised) + /// Catch-all raised when the register reports a non-ok value that matches no + /// configured code (firmware revisions routinely add codes the config has + /// not enumerated). Empty ``fault_code`` disables the catch-all. When its + /// ``message`` is empty the evaluator fills in "unmapped fault code ". + FaultSpec unknown_fault{}; }; /// One of the composable detection modes for a single mapped point. @@ -121,7 +131,11 @@ inline bool as_double(const Value & v, double & out) { } /// Coerce a value to a 64-bit integer for bit / enum decode. bool -> 0/1, -/// int as-is, double rounded to nearest. Strings are not decodable. +/// int as-is, double rounded to nearest. Non-finite doubles (NaN / inf) and +/// doubles outside the int64 range are *undecidable* and yield ``false`` so the +/// caller can hold state rather than fabricate garbage bits / enum codes (e.g. +/// llround(+inf) is INT64_MAX, which would light up every status-word bit). +/// Strings are not decodable. inline bool as_int(const Value & v, std::int64_t & out) { return std::visit( [&out](auto && x) -> bool { @@ -133,6 +147,12 @@ inline bool as_int(const Value & v, std::int64_t & out) { out = x; return true; } else if constexpr (std::is_same_v) { + // -2^63 (INT64_MIN, exact in double) .. 2^63 (one past INT64_MAX). + constexpr double kMin = -9223372036854775808.0; + constexpr double kMax = 9223372036854775808.0; + if (!std::isfinite(x) || x < kMin || x >= kMax) { + return false; + } out = static_cast(std::llround(x)); return true; } else { @@ -158,8 +178,15 @@ inline FaultSignal make_signal(const FaultSpec & spec, bool active) { /// Returns the full set of faults the rule governs, each flagged active or /// inactive. A threshold rule yields one signal; a status-word rule yields one /// per configured bit; an enum rule yields one per configured code (the -/// matching code active, the rest inactive) so a transition tracker can clear -/// the previous code and raise the new one. +/// matching code active, the rest inactive) plus an optional catch-all, so a +/// transition tracker can clear the previous code and raise the new one. +/// +/// Undecidable input holds state. A value that cannot be coerced for the rule - +/// a non-finite double (NaN / inf, e.g. a disconnected analog sensor or a +/// div-by-zero on the PLC) or an uncoercible value (a string, a failed numeric +/// conversion) - yields an *empty* vector: the evaluator emits NO signal, so a +/// ``FaultTransitionTracker`` preserves the prior state instead of clearing a +/// standing fault. A bad read must never mask a real alarm. inline std::vector evaluate(const Value & value, const DetectionRule & rule) { std::vector out; @@ -168,36 +195,56 @@ inline std::vector evaluate(const Value & value, const DetectionRul using T = std::decay_t; if constexpr (std::is_same_v) { - bool active = false; if (std::holds_alternative(value)) { - // A boolean point is alarm-on-true, threshold ignored. Preserves - // the original OPC UA threshold-path behaviour for Bool tags. - active = std::get(value); + // A boolean point honours the direction: above=true is + // alarm-on-true, above=false is alarm-on-false (a normally-closed + // contact, a watchdog-OK / ready bit whose FAULT state is false). + const bool b = std::get(value); + out.push_back(detail::make_signal(r.fault, r.above ? b : !b)); } else { double d = 0.0; - if (detail::as_double(value, d)) { - active = r.above ? (d > r.threshold) : (d < r.threshold); + if (detail::as_double(value, d) && std::isfinite(d)) { + out.push_back(detail::make_signal(r.fault, r.above ? (d > r.threshold) : (d < r.threshold))); } + // else: undecidable (string / NaN / inf) -> emit nothing, hold last state. } - out.push_back(detail::make_signal(r.fault, active)); } else if constexpr (std::is_same_v) { std::int64_t raw = 0; - const bool ok = detail::as_int(value, raw); - const auto mask = static_cast(raw); + if (!detail::as_int(value, raw)) { + return; // undecidable -> emit nothing, hold last state + } + auto mask = static_cast(raw); + if (r.width > 0 && r.width < 64) { + mask &= (1ULL << r.width) - 1ULL; // ignore sign-extended high bits + } for (const auto & b : r.bits) { - bool active = ok && b.bit < 64 && ((mask >> b.bit) & 0x1ULL); + const bool active = b.bit < 64 && (((mask >> b.bit) & 0x1ULL) != 0ULL); out.push_back(detail::make_signal(b.fault, active)); } } else if constexpr (std::is_same_v) { std::int64_t code = 0; - const bool ok = detail::as_int(value, code); - const bool healthy = ok && code == r.ok_value; + if (!detail::as_int(value, code)) { + return; // undecidable -> emit nothing, hold last state + } + const bool healthy = code == r.ok_value; + bool matched = false; for (const auto & e : r.codes) { - bool active = ok && !healthy && code == e.code; + const bool active = !healthy && code == e.code; + matched = matched || active; out.push_back(detail::make_signal(e.fault, active)); } + // Catch-all so a non-ok value with no configured label stays visible + // rather than reading as healthy. + if (!r.unknown_fault.fault_code.empty()) { + const bool active = !healthy && !matched; + FaultSignal s = detail::make_signal(r.unknown_fault, active); + if (active && r.unknown_fault.message.empty()) { + s.message = "unmapped fault code " + std::to_string(code); + } + out.push_back(std::move(s)); + } } }, rule); diff --git a/src/ros2_medkit_fault_detection/test/test_fault_detection.cpp b/src/ros2_medkit_fault_detection/test/test_fault_detection.cpp index d0496078e..7859a9a75 100644 --- a/src/ros2_medkit_fault_detection/test/test_fault_detection.cpp +++ b/src/ros2_medkit_fault_detection/test/test_fault_detection.cpp @@ -15,6 +15,7 @@ #include "ros2_medkit_fault_detection/fault_detection.hpp" #include +#include #include #include @@ -76,6 +77,42 @@ TEST(ThresholdRule, BooleanPointIsAlarmOnTrue) { EXPECT_FALSE(fd::evaluate(fd::Value{false}, r)[0].active); } +TEST(ThresholdRule, BooleanHonoursDirection) { + // above=false expresses the common case where logic-true is healthy and the + // FAULT is false (normally-closed contact, watchdog-OK / ready bit). + fd::ThresholdRule r; + r.fault = {"NOT_READY", "ERROR", "Drive not ready"}; + r.threshold = 0.0; + r.above = false; + + EXPECT_TRUE(fd::evaluate(fd::Value{false}, r)[0].active); + EXPECT_FALSE(fd::evaluate(fd::Value{true}, r)[0].active); +} + +TEST(ThresholdRule, NonFiniteValueHoldsState) { + // A disconnected analog sensor reports NaN/inf. The evaluator must emit + // nothing (not active=false), so the tracker holds a previously-raised fault + // instead of a bad read clearing a real alarm. + fd::ThresholdRule r; + r.fault = {"HIGH_TEMP", "ERROR", "hot"}; + r.threshold = 80.0; + r.above = true; + + fd::FaultTransitionTracker tracker; + ASSERT_EQ(tracker.apply(fd::evaluate(fd::Value{90.0}, r)).size(), 1u); + ASSERT_TRUE(tracker.active("HIGH_TEMP")); + + auto nan_out = fd::evaluate(fd::Value{std::numeric_limits::quiet_NaN()}, r); + EXPECT_TRUE(nan_out.empty()); + EXPECT_TRUE(tracker.apply(nan_out).empty()); + EXPECT_TRUE(tracker.active("HIGH_TEMP")); + + auto inf_out = fd::evaluate(fd::Value{std::numeric_limits::infinity()}, r); + EXPECT_TRUE(inf_out.empty()); + EXPECT_TRUE(tracker.apply(inf_out).empty()); + EXPECT_TRUE(tracker.active("HIGH_TEMP")); +} + // -- Status-word bit decode -------------------------------------------------- TEST(StatusWordRule, NamedBitDecode) { @@ -104,6 +141,47 @@ TEST(StatusWordRule, AllClearWhenZero) { } } +TEST(StatusWordRule, NonFiniteValueHoldsState) { + // +inf rounded to int64 is INT64_MAX, which would light up every bit; NaN + // rounds to 0 (all clear) and would clear a standing fault. Both must instead + // emit nothing so the tracker preserves prior state. + fd::StatusWordRule r; + r.bits = {{0u, {"E_STOP", "CRITICAL", "Emergency stop"}}, {3u, {"PUMP_OVERLOAD", "ERROR", "Pump overload"}}}; + + fd::FaultTransitionTracker tracker; + ASSERT_EQ(tracker.apply(fd::evaluate(fd::Value{static_cast(0b1)}, r)).size(), 1u); + ASSERT_TRUE(tracker.active("E_STOP")); + + auto inf_out = fd::evaluate(fd::Value{std::numeric_limits::infinity()}, r); + EXPECT_TRUE(inf_out.empty()); // no flood + EXPECT_TRUE(tracker.apply(inf_out).empty()); + EXPECT_TRUE(tracker.active("E_STOP")); + EXPECT_FALSE(tracker.active("PUMP_OVERLOAD")); + + auto nan_out = fd::evaluate(fd::Value{std::numeric_limits::quiet_NaN()}, r); + EXPECT_TRUE(nan_out.empty()); + EXPECT_TRUE(tracker.apply(nan_out).empty()); + EXPECT_TRUE(tracker.active("E_STOP")); +} + +TEST(StatusWordRule, WidthMasksSignExtendedHighBits) { + // A 16-bit signed status word with bit15 set, read as int64, sign-extends and + // sets bits 16..63. A configured width masks them so only in-range bits fire. + fd::StatusWordRule r; + r.width = 16; + r.bits = {{15u, {"REAL_FAULT", "ERROR", "real"}}, {20u, {"SPURIOUS", "ERROR", "spurious"}}}; + + const auto sign_extended = static_cast(0xFFFFFFFFFFFF8000ULL); + auto out = fd::evaluate(fd::Value{sign_extended}, r); + EXPECT_TRUE(find(out, "REAL_FAULT")->active); + EXPECT_FALSE(find(out, "SPURIOUS")->active); + + // Without a width the same value would spuriously fire bit 20. + fd::StatusWordRule wide; + wide.bits = {{20u, {"SPURIOUS", "ERROR", "spurious"}}}; + EXPECT_TRUE(find(fd::evaluate(fd::Value{sign_extended}, wide), "SPURIOUS")->active); +} + // -- Fault-code enum --------------------------------------------------------- TEST(EnumMapRule, CodeMapsToFaultAndText) { @@ -132,6 +210,69 @@ TEST(EnumMapRule, OkValueRaisesNothing) { EXPECT_FALSE(out[0].active); } +TEST(EnumMapRule, UnmappedNonOkRaisesCatchAll) { + // A non-ok value with no configured label must stay visible via the catch-all + // (firmware adds codes the config has not enumerated), not read as healthy. + fd::EnumMapRule r; + r.ok_value = 0; + r.codes = {{10, {"OVERVOLT", "ERROR", "ov"}}, {11, {"OVERCURR", "ERROR", "oc"}}}; + r.unknown_fault = {"VFD_UNMAPPED", "ERROR", ""}; + + auto out = fd::evaluate(fd::Value{static_cast(99)}, r); + ASSERT_EQ(out.size(), 3u); + EXPECT_FALSE(find(out, "OVERVOLT")->active); + EXPECT_FALSE(find(out, "OVERCURR")->active); + const auto * unk = find(out, "VFD_UNMAPPED"); + ASSERT_NE(unk, nullptr); + EXPECT_TRUE(unk->active); + EXPECT_EQ(unk->message, "unmapped fault code 99"); + + // ok and mapped values keep the catch-all inactive so the tracker clears it. + EXPECT_FALSE(find(fd::evaluate(fd::Value{static_cast(0)}, r), "VFD_UNMAPPED")->active); + auto mapped = fd::evaluate(fd::Value{static_cast(10)}, r); + EXPECT_TRUE(find(mapped, "OVERVOLT")->active); + EXPECT_FALSE(find(mapped, "VFD_UNMAPPED")->active); +} + +TEST(EnumMapRule, NonFiniteValueHoldsState) { + // NaN decodes to 0 == ok_value (reads healthy) and inf to INT64_MAX; either + // would clear a standing enum fault. Both must emit nothing instead. + fd::EnumMapRule r; + r.ok_value = 0; + r.codes = {{10, {"OVERVOLT", "ERROR", "ov"}}}; + r.unknown_fault.fault_code = "UNMAPPED"; + + fd::FaultTransitionTracker tracker; + ASSERT_EQ(tracker.apply(fd::evaluate(fd::Value{static_cast(10)}, r)).size(), 1u); + ASSERT_TRUE(tracker.active("OVERVOLT")); + + auto nan_out = fd::evaluate(fd::Value{std::numeric_limits::quiet_NaN()}, r); + EXPECT_TRUE(nan_out.empty()); + EXPECT_TRUE(tracker.apply(nan_out).empty()); + EXPECT_TRUE(tracker.active("OVERVOLT")); + + EXPECT_TRUE(fd::evaluate(fd::Value{std::numeric_limits::infinity()}, r).empty()); + EXPECT_TRUE(tracker.active("OVERVOLT")); +} + +TEST(Evaluator, UncoercibleStringHoldsState) { + // A string payload (wrong datatype, error string, partial poll) is undecidable + // for every rule kind: emit nothing so a standing fault is preserved. + fd::ThresholdRule t; + t.fault = {"HIGH_TEMP", "ERROR", "hot"}; + t.threshold = 80.0; + EXPECT_TRUE(fd::evaluate(fd::Value{std::string{"sensor error"}}, t).empty()); + + fd::StatusWordRule s; + s.bits = {{0u, {"E_STOP", "CRITICAL", "e"}}}; + EXPECT_TRUE(fd::evaluate(fd::Value{std::string{"bad"}}, s).empty()); + + fd::EnumMapRule e; + e.codes = {{10, {"OVERVOLT", "ERROR", "ov"}}}; + e.unknown_fault.fault_code = "UNK"; + EXPECT_TRUE(fd::evaluate(fd::Value{std::string{"bad"}}, e).empty()); +} + // -- Raise / clear transitions ---------------------------------------------- TEST(TransitionTracker, EmitsRaiseThenClearOnce) { diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp index e790dc87e..99162f2a8 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp @@ -130,7 +130,9 @@ class NodeMap { /// Find entry by OPC-UA node ID string const NodeMapEntry * find_by_node_id(const std::string & node_id_str) const; - /// Get all entries that have threshold-based alarm configuration + /// Get all entries that have threshold-based alarm configuration. + /// Test / back-compat only - the live fault-evaluation path is + /// detection_entries(); nothing reads this at runtime. std::vector alarm_entries() const; /// Get all entries carrying a shared fault-detection rule (threshold, diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp index 98271d63a..75fe68e6b 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp @@ -199,6 +199,69 @@ bool NodeMap::load(const std::string & yaml_path) { return false; } + const auto logger = rclcpp::get_logger("opcua.node_map"); + + // Defensive scalar decode: when a numeric/boolean field is *present* but + // fails to convert, warn (naming node_id) and fall back to the default + // rather than silently substituting a semantics-changing value (e.g. + // ``threshold: "100 bar"`` -> 0.0 inverts a below-threshold alarm). yaml-cpp + // ``as(fallback)`` cannot tell "absent" from "present but bad". (#481) + auto parse_double = [logger](const YAML::Node & node, double def, const char * field, + const std::string & node_id) -> double { + if (!node) { + return def; + } + try { + return node.as(); + } catch (const YAML::Exception &) { + RCLCPP_WARN(logger, "node_id=%s: non-numeric %s ignored - using default", node_id.c_str(), field); + return def; + } + }; + auto parse_bool = [logger](const YAML::Node & node, bool def, const char * field, + const std::string & node_id) -> bool { + if (!node) { + return def; + } + try { + return node.as(); + } catch (const YAML::Exception &) { + RCLCPP_WARN(logger, "node_id=%s: non-boolean %s ignored - using default", node_id.c_str(), field); + return def; + } + }; + auto parse_int64 = [logger](const YAML::Node & node, std::int64_t def, const char * field, + const std::string & node_id) -> std::int64_t { + if (!node) { + return def; + } + try { + return node.as(); + } catch (const YAML::Exception &) { + RCLCPP_WARN(logger, "node_id=%s: non-integer %s ignored - using default", node_id.c_str(), field); + return def; + } + }; + // Severity must be one of the documented SOVD buckets; warn + default to + // ERROR on a typo (e.g. ``SEVERE`` / wrong case) so it is not misrouted. (#481) + auto validate_severity = [logger](const std::string & sev, const std::string & node_id) -> std::string { + if (sev != "INFO" && sev != "WARNING" && sev != "ERROR" && sev != "CRITICAL") { + RCLCPP_WARN(logger, "node_id=%s: unknown severity '%s' - defaulting to ERROR", node_id.c_str(), sev.c_str()); + return "ERROR"; + } + return sev; + }; + // Deterministic, unique-per-point catch-all code for an unmapped enum value. + auto derive_unknown_code = [](const std::string & entity, const std::string & data) -> std::string { + std::string s = entity + "_" + data + "_UNMAPPED"; + for (auto & c : s) { + c = (std::isalnum(static_cast(c)) != 0) + ? static_cast(std::toupper(static_cast(c))) + : '_'; + } + return s; + }; + for (size_t i = 0; i < nodes.size(); ++i) { const auto & n = nodes[i]; @@ -310,10 +373,11 @@ bool NodeMap::load(const std::string & yaml_path) { if (n["alarm"]) { AlarmConfig alarm; alarm.fault_code = n["alarm"]["fault_code"].as(); - alarm.severity = n["alarm"]["severity"].as("ERROR"); + alarm.severity = validate_severity(n["alarm"]["severity"].as("ERROR"), entry.node_id_str); alarm.message = n["alarm"]["message"].as(alarm.fault_code); - alarm.threshold = n["alarm"]["threshold"].as(0.0); - alarm.above_threshold = n["alarm"]["above_threshold"].as(true); + alarm.threshold = parse_double(n["alarm"]["threshold"], 0.0, "alarm.threshold", entry.node_id_str); + alarm.above_threshold = + parse_bool(n["alarm"]["above_threshold"], true, "alarm.above_threshold", entry.node_id_str); fd::ThresholdRule rule; rule.fault = {alarm.fault_code, alarm.severity, alarm.message}; @@ -323,51 +387,108 @@ bool NodeMap::load(const std::string & yaml_path) { entry.alarm = std::move(alarm); } else if (n["status_bits"]) { fd::StatusWordRule rule; - for (const auto & b : n["status_bits"]) { - if (!b["bit"] || !b["fault_code"]) { - RCLCPP_WARN(rclcpp::get_logger("opcua.node_map"), - "status_bits entry on node_id=%s missing bit/fault_code - skipping", entry.node_id_str.c_str()); - continue; + // Optional source register width: mask off sign-extended high bits so a + // signed status word read with its sign bit set does not fire spurious + // faults above the real register width. (#481) + if (n["status_word_width"]) { + const auto w = parse_int64(n["status_word_width"], 0, "status_word_width", entry.node_id_str); + if (w >= 1 && w <= 64) { + rule.width = static_cast(w); + } else { + RCLCPP_WARN(logger, "status_word_width=%lld on node_id=%s out of range (1..64) - ignoring", + static_cast(w), entry.node_id_str.c_str()); } - fd::BitRule br; - br.bit = b["bit"].as(); - if (br.bit >= 64) { - // The status word is decoded as a 64-bit register, so bits at or - // above 64 can never be set and the rule would be dead config. - RCLCPP_WARN(rclcpp::get_logger("opcua.node_map"), - "status_bits entry on node_id=%s has bit=%u >= 64 (max status-word width); " - "this fault can never fire - skipping", - entry.node_id_str.c_str(), br.bit); - continue; + } + const auto & bits_node = n["status_bits"]; + if (bits_node.IsSequence()) { + for (const auto & b : bits_node) { + if (!b["bit"] || !b["fault_code"]) { + RCLCPP_WARN(logger, "status_bits entry on node_id=%s missing bit/fault_code - skipping", + entry.node_id_str.c_str()); + continue; + } + fd::BitRule br; + // A wrong-typed bit value skips just this bit (consistent with the + // missing-field path above), never aborts the whole file. (#481) + try { + br.bit = b["bit"].as(); + } catch (const YAML::Exception &) { + RCLCPP_WARN(logger, "status_bits entry on node_id=%s has non-integer bit - skipping", + entry.node_id_str.c_str()); + continue; + } + if (br.bit >= 64) { + // The status word is decoded as a 64-bit register, so bits at or + // above 64 can never be set and the rule would be dead config. + RCLCPP_WARN(logger, + "status_bits entry on node_id=%s has bit=%u >= 64 (max status-word width); " + "this fault can never fire - skipping", + entry.node_id_str.c_str(), br.bit); + continue; + } + if (rule.width > 0 && br.bit >= rule.width) { + RCLCPP_WARN(logger, + "status_bits entry on node_id=%s has bit=%u >= status_word_width=%u; " + "this fault can never fire - skipping", + entry.node_id_str.c_str(), br.bit, rule.width); + continue; + } + br.fault = {b["fault_code"].as(), + validate_severity(b["severity"].as("ERROR"), entry.node_id_str), + b["message"].as(b["fault_code"].as())}; + rule.bits.push_back(std::move(br)); } - br.fault = {b["fault_code"].as(), b["severity"].as("ERROR"), - b["message"].as(b["fault_code"].as())}; - rule.bits.push_back(std::move(br)); } if (!rule.bits.empty()) { entry.detection = std::move(rule); + } else { + RCLCPP_WARN(logger, + "status_bits on node_id=%s declared but produced no usable rules - " + "no faults will be detected for this point", + entry.node_id_str.c_str()); } } else if (n["fault_enum"]) { fd::EnumMapRule rule; - rule.ok_value = n["fault_enum"]["ok_value"].as(0); + rule.ok_value = parse_int64(n["fault_enum"]["ok_value"], 0, "fault_enum.ok_value", entry.node_id_str); const auto codes = n["fault_enum"]["codes"]; if (codes && codes.IsSequence()) { for (const auto & c : codes) { if (!c["code"] || !c["fault_code"]) { - RCLCPP_WARN(rclcpp::get_logger("opcua.node_map"), - "fault_enum entry on node_id=%s missing code/fault_code - skipping", + RCLCPP_WARN(logger, "fault_enum entry on node_id=%s missing code/fault_code - skipping", entry.node_id_str.c_str()); continue; } fd::EnumRule er; - er.code = c["code"].as(); - er.fault = {c["fault_code"].as(), c["severity"].as("ERROR"), + // A wrong-typed code value skips just this entry, never aborts. (#481) + try { + er.code = c["code"].as(); + } catch (const YAML::Exception &) { + RCLCPP_WARN(logger, "fault_enum entry on node_id=%s has non-integer code - skipping", + entry.node_id_str.c_str()); + continue; + } + er.fault = {c["fault_code"].as(), + validate_severity(c["severity"].as("ERROR"), entry.node_id_str), c["message"].as(c["fault_code"].as())}; rule.codes.push_back(std::move(er)); } } if (!rule.codes.empty()) { + // Catch-all: a non-ok value matching no configured code stays visible + // rather than reading as healthy. Code is YAML-overridable, else + // derived deterministically from the point. (#481) + rule.unknown_fault.fault_code = n["fault_enum"]["unknown_fault_code"] + ? n["fault_enum"]["unknown_fault_code"].as() + : derive_unknown_code(entry.entity_id, entry.data_name); + rule.unknown_fault.severity = + validate_severity(n["fault_enum"]["unknown_severity"].as("ERROR"), entry.node_id_str); + rule.unknown_fault.message = n["fault_enum"]["unknown_message"].as(""); entry.detection = std::move(rule); + } else { + RCLCPP_WARN(logger, + "fault_enum on node_id=%s declared but produced no usable rules - " + "no faults will be detected for this point", + entry.node_id_str.c_str()); } } @@ -465,6 +586,9 @@ bool NodeMap::load(const std::string & yaml_path) { for (const auto & e : rule.codes) { detection_faults.emplace_back(entry.entity_id, e.fault.fault_code); } + if (!rule.unknown_fault.fault_code.empty()) { + detection_faults.emplace_back(entry.entity_id, rule.unknown_fault.fault_code); + } } }, *entry.detection); @@ -564,6 +688,10 @@ const NodeMapEntry * NodeMap::find_by_node_id(const std::string & node_id_str) c return nullptr; } +// Test / back-compat only: the live fault-evaluation path is +// detection_entries() (see opcua_poller.cpp). This returns just the +// threshold-mode subset carrying the legacy ``alarm`` block and is not consulted +// at runtime; do not wire new callers to it. std::vector NodeMap::alarm_entries() const { std::vector result; for (const auto & entry : entries_) { diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp index d790f9fca..5970f76ff 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp @@ -722,9 +722,11 @@ component_id: test EXPECT_EQ(r.codes[0].fault.message, "DC bus overvoltage"); auto signals = fd::evaluate(fd::Value{static_cast(10)}, *det); - ASSERT_EQ(signals.size(), 2u); + // Two configured codes plus the auto-added unmapped catch-all. + ASSERT_EQ(signals.size(), 3u); EXPECT_TRUE(signals[0].active); // code 10 EXPECT_FALSE(signals[1].active); // code 11 + EXPECT_FALSE(signals[2].active); // catch-all (a mapped code is not "unmapped") } TEST_F(NodeMapTest, RejectsDuplicateDetectionFaultCode) { @@ -1069,6 +1071,235 @@ component_id: test } } +// -- Undecidable input, catch-all, and malformed-config handling (issue #481) - + +TEST_F(NodeMapTest, EmptyStatusBitsBlockLoadsWithoutDetection) { + // A declared-but-empty status_bits block yields zero rules. The loader must + // keep the data point but emit no detection (warned), not silently ship a + // non-functional fault map and not reject the whole file. + std::string path = "/tmp/test_node_map_empty_status_bits.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: pump + data_name: status_word + data_type: int + status_bits: [] +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.entries().size(), 1u); + EXPECT_TRUE(map.detection_entries().empty()); +} + +TEST_F(NodeMapTest, FaultEnumWithoutCodesLoadsWithoutDetection) { + std::string path = "/tmp/test_node_map_empty_enum.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Fault" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + ok_value: 0 +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.entries().size(), 1u); + EXPECT_TRUE(map.detection_entries().empty()); +} + +TEST_F(NodeMapTest, WrongTypedBitSkippedNotAborted) { + // A single mistyped bit must skip just that bit (consistent with the + // missing-field path), never tear down the entire node map. + std::string path = "/tmp/test_node_map_bad_bit.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: "three" + fault_code: BAD_BIT + - bit: 3 + fault_code: PUMP_OVERLOAD +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + ASSERT_EQ(r.bits.size(), 1u); + EXPECT_EQ(r.bits[0].fault.fault_code, "PUMP_OVERLOAD"); +} + +TEST_F(NodeMapTest, WrongTypedEnumCodeSkippedNotAborted) { + std::string path = "/tmp/test_node_map_bad_code.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Fault" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + codes: + - code: high + fault_code: BAD_CODE + - code: 10 + fault_code: VFD_OVERVOLTAGE +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + ASSERT_EQ(r.codes.size(), 1u); + EXPECT_EQ(r.codes[0].code, 10); +} + +TEST_F(NodeMapTest, FaultEnumGetsCatchAllUnknownFault) { + std::string path = "/tmp/test_node_map_enum_catchall.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Fault" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + ok_value: 0 + codes: + - code: 10 + fault_code: VFD_OVERVOLTAGE +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + ASSERT_FALSE(r.unknown_fault.fault_code.empty()); + + // An unenumerated non-ok value raises the catch-all rather than reading ok. + auto out = fd::evaluate(fd::Value{static_cast(77)}, *det); + const fd::FaultSignal * unk = nullptr; + for (const auto & s : out) { + if (s.fault_code == r.unknown_fault.fault_code) { + unk = &s; + } + } + ASSERT_NE(unk, nullptr); + EXPECT_TRUE(unk->active); + EXPECT_EQ(unk->message, "unmapped fault code 77"); +} + +TEST_F(NodeMapTest, NonNumericThresholdFallsBackAndLoads) { + // A present-but-unparseable threshold must warn and fall back, not silently + // invert/disable detection and not abort the load. + std::string path = "/tmp/test_node_map_bad_threshold.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: ent1 + data_name: val1 + alarm: + fault_code: BAD_THRESH + threshold: "100 bar" +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_TRUE(map.entries()[0].alarm.has_value()); + EXPECT_DOUBLE_EQ(map.entries()[0].alarm->threshold, 0.0); +} + +TEST_F(NodeMapTest, InvalidSeverityDefaultsToError) { + std::string path = "/tmp/test_node_map_bad_severity.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: ent1 + data_name: val1 + alarm: + fault_code: SEV_TEST + severity: SEVERE + threshold: 50.0 +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_TRUE(map.entries()[0].alarm.has_value()); + EXPECT_EQ(map.entries()[0].alarm->severity, "ERROR"); +} + +TEST_F(NodeMapTest, StatusWordWidthMasksSignExtension) { + std::string path = "/tmp/test_node_map_status_width.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: drive + data_name: status_word + data_type: int + status_word_width: 16 + status_bits: + - bit: 15 + fault_code: DRIVE_FAULT +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + EXPECT_EQ(r.width, 16u); + + // bit15 set in a sign-extended 16-bit word: only DRIVE_FAULT, no spurious bits. + const auto sign_extended = static_cast(0xFFFFFFFFFFFF8000ULL); + auto out = fd::evaluate(fd::Value{sign_extended}, *det); + ASSERT_EQ(out.size(), 1u); + EXPECT_TRUE(out[0].active); +} + TEST_F(NodeMapTest, RejectsMultipleDetectionModesOnOneNode) { std::string path = "/tmp/test_node_map_multi_mode.yaml"; std::ofstream f(path); From 383f6e6e284a8754e333e370ac1f264fa69b77b2 Mon Sep 17 00:00:00 2001 From: mfaferek93 Date: Wed, 1 Jul 2026 21:37:04 +0200 Subject: [PATCH 7/8] Harden OPC UA node-map validation and fault-code uniqueness Fail plugin configuration when the node map is rejected so an invalid map disables the plugin instead of running the poller. Enforce fault-code uniqueness globally across detection entries and event_alarms (fault manager keys by code alone), and guard detection string reads so one malformed field skips a node rather than aborting the whole file. Document status_word_width and fault_enum catch-all keys; add the fault_detection changelog and design/docs wiring. Refs #481 --- docs/changelog.rst | 2 + docs/design/index.rst | 1 + .../ros2_medkit_fault_detection/index.rst | 71 +++++++ src/ros2_medkit_fault_detection/CHANGELOG.rst | 28 +++ .../ros2_medkit_opcua/README.md | 42 +++- .../ros2_medkit_opcua/src/node_map.cpp | 197 +++++++++++------- .../ros2_medkit_opcua/src/opcua_plugin.cpp | 13 +- .../ros2_medkit_opcua/test/test_node_map.cpp | 107 ++++++++++ .../test/test_opcua_plugin.cpp | 35 ++++ 9 files changed, 410 insertions(+), 86 deletions(-) create mode 100644 docs/design/ros2_medkit_fault_detection/index.rst create mode 100644 src/ros2_medkit_fault_detection/CHANGELOG.rst diff --git a/docs/changelog.rst b/docs/changelog.rst index 27d4363ff..13ab432ae 100644 --- a/docs/changelog.rst +++ b/docs/changelog.rst @@ -12,6 +12,8 @@ This page aggregates changelogs from all ros2_medkit packages. .. include:: ../src/ros2_medkit_fault_reporter/CHANGELOG.rst +.. include:: ../src/ros2_medkit_fault_detection/CHANGELOG.rst + .. include:: ../src/ros2_medkit_diagnostic_bridge/CHANGELOG.rst .. include:: ../src/ros2_medkit_log_bridge/CHANGELOG.rst diff --git a/docs/design/index.rst b/docs/design/index.rst index effe35d05..29ab9016f 100644 --- a/docs/design/index.rst +++ b/docs/design/index.rst @@ -9,6 +9,7 @@ This section contains design documentation for the ros2_medkit project packages. ros2_medkit_beacon_common/index ros2_medkit_cmake/index ros2_medkit_diagnostic_bridge/index + ros2_medkit_fault_detection/index ros2_medkit_fault_manager/index ros2_medkit_fault_reporter/index ros2_medkit_gateway/index diff --git a/docs/design/ros2_medkit_fault_detection/index.rst b/docs/design/ros2_medkit_fault_detection/index.rst new file mode 100644 index 000000000..4a79c1761 --- /dev/null +++ b/docs/design/ros2_medkit_fault_detection/index.rst @@ -0,0 +1,71 @@ +ros2_medkit_fault_detection +=========================== + +This section contains design documentation for the ros2_medkit_fault_detection package. + +Overview +-------- + +The ``ros2_medkit_fault_detection`` package provides a single, protocol-agnostic +model for turning a raw value read from any source (OPC UA, S7, Modbus, ADS, ...) +into the set of faults that value implies. Every protocol plugin lowers its own +configuration onto this shared model, so a threshold, a status-word bit, or a +fault-code register is mapped to a SOVD fault identically regardless of the +transport it came from. + +The package is a header-only INTERFACE library (``cxx_std_17``). Protocol +plugins are built as ``MODULE`` libraries that resolve gateway symbols from the +host process at load time (``--unresolved-symbols=ignore-all``); a separately +linked shared library would not be present in that host, so the detection logic +is compiled directly into each plugin instead. + +Detection modes +--------------- + +A single mapped point carries exactly one of three composable detection rules, +expressed as a ``DetectionRule`` variant: + +``ThresholdRule`` + Numeric above/below a setpoint. A boolean point honours the direction, so a + normally-closed contact or a watchdog-OK bit can alarm on ``false``. + +``StatusWordRule`` + Decode named bits of an integer status register, one fault per bit. An + optional source-register ``width`` masks the raw value to that many low bits + before decode, so a signed register read whose sign bit is set does not light + up spurious high bits. + +``EnumMapRule`` + Map a fault-code register value to at most one fault code and text. An + optional catch-all keeps a non-ok value that matches no configured code + visible rather than reading as healthy. + +Evaluator contract +------------------ + +``evaluate(value, rule)`` is a pure function of its inputs with no ROS or +protocol dependencies, which makes it trivially unit-testable. It returns the +full set of faults the rule governs, each flagged active or inactive, so a +transition tracker can clear a previous code and raise a new one in the same +cycle. + +Undecidable input holds state. A value that cannot be coerced for the rule - a +non-finite double (NaN / inf from a disconnected analog sensor or a div-by-zero +on the PLC), a string, or a failed numeric conversion - yields an empty result. +The evaluator emits no signal, so a transition tracker preserves the prior state +instead of clearing a standing fault. A bad read must never mask a real alarm. + +Transition tracking and global uniqueness +----------------------------------------- + +``FaultTransitionTracker`` layers stateful raise/clear edge detection on top of +the evaluator. It returns only the signals whose active state changed since the +last call, which a plugin forwards to the fault manager as report / clear. + +The tracker is keyed by ``fault_code`` alone, matching the fault manager, which +also keys and clears faults by code alone. A single tracker may therefore be +shared across many points only if every ``fault_code`` is globally unique; two +points emitting the same code would alternately raise and clear it each cycle. +Consumers that share one tracker (for example the OPC UA poller across all +node-map entries and event alarms) must enforce that uniqueness at +config-load time and reject a colliding configuration before anything runs. diff --git a/src/ros2_medkit_fault_detection/CHANGELOG.rst b/src/ros2_medkit_fault_detection/CHANGELOG.rst new file mode 100644 index 000000000..1f92dc33c --- /dev/null +++ b/src/ros2_medkit_fault_detection/CHANGELOG.rst @@ -0,0 +1,28 @@ +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Changelog for package ros2_medkit_fault_detection +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +0.6.0 (2026-06-22) +------------------ +* Initial release: shared, protocol-agnostic fault-detection model for medkit + gateway plugins. A single header-only evaluator maps a raw value read from any + source (OPC UA, S7, Modbus, ADS, ...) into the set of faults it implies, using + one of three composable detection modes: ``ThresholdRule`` (numeric + above/below a setpoint), ``StatusWordRule`` (decode named bits of an integer + status register, with optional source-width masking to drop sign-extended high + bits), and ``EnumMapRule`` (map a fault-code register value to a fault code + + text, with an optional catch-all for unmapped values) + (`#481 `_). +* ``evaluate(value, rule)`` is a pure function with no ROS / protocol + dependencies, so it is trivially unit-testable and safe to compile into a + dlopen-loaded plugin MODULE. Undecidable input (a non-finite double, a string, + a failed numeric conversion) yields an empty result so a transition tracker + holds the prior state instead of clearing a standing fault - a bad read never + masks a real alarm. +* ``FaultTransitionTracker`` layers stateful raise/clear edge detection on top, + keyed by ``fault_code`` alone; consumers that share one tracker across many + points must enforce global fault-code uniqueness at config-load time. +* Shipped as a header-only INTERFACE library (``cxx_std_17``); the ``OPC UA`` + plugin is the first consumer and migrates its threshold / status-bit / enum + detection onto this module. +* Contributors: @mfaferek93, @bburda diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md index 095243ae4..c6bac608e 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md @@ -182,6 +182,11 @@ nodes: entity_id: pump data_name: status_word data_type: int + status_word_width: 16 # optional: source register width in bits. + # Masks off sign-extended high bits so a + # signed 16-bit word read with its top bit + # set does not fire spurious faults above + # bit 15. Range 1..64. See the note below. status_bits: - bit: 3 # bit position, 0 = least significant fault_code: PUMP_OVERLOAD @@ -198,6 +203,16 @@ nodes: data_type: int fault_enum: ok_value: 0 # value meaning "no fault" (default 0) + # Catch-all for a non-ok register value that matches no listed code + # (firmware revisions routinely add codes the config has not enumerated). + # All three keys are optional: + # unknown_fault_code: code raised for the unmapped value. Defaults to a + # deterministic __UNMAPPED. + # unknown_severity: SOVD bucket for it (default ERROR). + # unknown_message: text (default "unmapped fault code "). + unknown_fault_code: VFD_UNKNOWN_FAULT + unknown_severity: WARNING + unknown_message: Undocumented VFD fault code codes: - code: 10 fault_code: VFD_OVERVOLTAGE @@ -211,15 +226,24 @@ nodes: # shared `ros2_medkit_fault_detection` module so every protocol plugin maps raw # values to faults identically. They are mutually exclusive on a single node. # -# Status words: expose the register as an UNSIGNED OPC-UA type (Byte / UInt16 / -# UInt32 / UInt64). A signed type (Int16/Int32) whose top bit is set is -# sign-extended when widened to the 64-bit decode register, which sets every -# bit above the register width and raises spurious bit faults. `bit:` positions -# are 0-based and must be < 64; higher positions are dead config, so the loader -# logs a warning and skips that bit rule while still loading the rest of the -# config. Fault codes must be globally unique across all `alarm` / `status_bits` / -# `fault_enum` entries (the loader rejects duplicates); reusing a code on two -# nodes makes its fault flap. +# Status words: prefer exposing the register as an UNSIGNED OPC-UA type (Byte / +# UInt16 / UInt32 / UInt64). A signed type (Int16/Int32) whose top bit is set is +# sign-extended when widened to the 64-bit decode register, which sets every bit +# above the register width and raises spurious bit faults. When you cannot change +# the server type, set `status_word_width:` (1..64) on the node instead: the +# loader masks the raw value to that many low bits before decoding, so the +# sign-extended high bits are dropped in-config. `bit:` positions are 0-based and +# must be < 64 (and < `status_word_width` when set); higher positions are dead +# config, so the loader logs a warning and skips that bit rule while still loading +# the rest of the config. +# +# Fault codes must be globally unique across ALL fault sources - every `alarm` / +# `status_bits` / `fault_enum` entry AND every `event_alarms` entry - regardless +# of which entity owns them. The fault manager keys and clears faults by +# fault_code alone, so a code reused on two sources (even on different entities, +# even one polled and one event-driven) would flap raise/clear or clear the other +# source's fault. The loader rejects the whole file at load with an actionable +# error naming both sources. # Native OPC-UA AlarmConditionType events (issue #386). Subscribes to alarms # defined inside the PLC (Siemens Program_Alarm / ProDiag, Beckhoff TF6100, diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp index 75fe68e6b..225964978 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp @@ -21,11 +21,11 @@ #include #include #include -#include #include #include #include #include +#include #include #include @@ -242,6 +242,25 @@ bool NodeMap::load(const std::string & yaml_path) { return def; } }; + // Defensive required-string read: a missing or wrong-typed (sequence / map / + // null) string field warns naming node_id + field and returns nullopt so + // the caller skips just this node / bit / code. An unguarded + // ``.as()`` throws YAML::TypedBadConversion, which the outer + // catch turns into a whole-file abort that discards every other valid + // node. (#481) + auto parse_string = [logger](const YAML::Node & node, const char * field, + const std::string & node_id) -> std::optional { + if (!node) { + RCLCPP_WARN(logger, "node_id=%s: missing %s - skipping", node_id.c_str(), field); + return std::nullopt; + } + try { + return node.as(); + } catch (const YAML::Exception &) { + RCLCPP_WARN(logger, "node_id=%s: non-string %s - skipping", node_id.c_str(), field); + return std::nullopt; + } + }; // Severity must be one of the documented SOVD buckets; warn + default to // ERROR on a typo (e.g. ``SEVERE`` / wrong case) so it is not misrouted. (#481) auto validate_severity = [logger](const std::string & sev, const std::string & node_id) -> std::string { @@ -274,14 +293,31 @@ bool NodeMap::load(const std::string & yaml_path) { NodeMapEntry entry; - entry.node_id_str = n["node_id"].as(); + // Required string fields use guarded reads: a present-but-wrong-typed + // value (a sequence / map where a scalar is expected) warns and skips + // just this entry rather than aborting the whole file via the outer + // catch. (#481) + { + auto node_id_str = parse_string(n["node_id"], "node_id", "entry " + std::to_string(i)); + if (!node_id_str) { + continue; + } + entry.node_id_str = *node_id_str; + } entry.node_id = parse_node_id(entry.node_id_str); if (entry.node_id_str.empty()) { RCLCPP_WARN(rclcpp::get_logger("opcua.node_map"), "Entry %zu has empty node_id - skipping", i); continue; } - entry.entity_id = n["entity_id"].as(); - entry.data_name = n["data_name"].as(); + { + auto entity_id = parse_string(n["entity_id"], "entity_id", entry.node_id_str); + auto data_name = parse_string(n["data_name"], "data_name", entry.node_id_str); + if (!entity_id || !data_name) { + continue; + } + entry.entity_id = *entity_id; + entry.data_name = *data_name; + } entry.display_name = n["display_name"].as(entry.data_name); entry.unit = n["unit"].as(""); entry.data_type = n["data_type"].as("float"); @@ -371,20 +407,27 @@ bool NodeMap::load(const std::string & yaml_path) { } if (n["alarm"]) { - AlarmConfig alarm; - alarm.fault_code = n["alarm"]["fault_code"].as(); - alarm.severity = validate_severity(n["alarm"]["severity"].as("ERROR"), entry.node_id_str); - alarm.message = n["alarm"]["message"].as(alarm.fault_code); - alarm.threshold = parse_double(n["alarm"]["threshold"], 0.0, "alarm.threshold", entry.node_id_str); - alarm.above_threshold = - parse_bool(n["alarm"]["above_threshold"], true, "alarm.above_threshold", entry.node_id_str); - - fd::ThresholdRule rule; - rule.fault = {alarm.fault_code, alarm.severity, alarm.message}; - rule.threshold = alarm.threshold; - rule.above = alarm.above_threshold; - entry.detection = std::move(rule); - entry.alarm = std::move(alarm); + auto fault_code = parse_string(n["alarm"]["fault_code"], "alarm.fault_code", entry.node_id_str); + if (!fault_code) { + // No detection for this point; the data point itself still loads. + RCLCPP_WARN(logger, "alarm on node_id=%s has no usable fault_code - no fault detection for this point", + entry.node_id_str.c_str()); + } else { + AlarmConfig alarm; + alarm.fault_code = *fault_code; + alarm.severity = validate_severity(n["alarm"]["severity"].as("ERROR"), entry.node_id_str); + alarm.message = n["alarm"]["message"].as(alarm.fault_code); + alarm.threshold = parse_double(n["alarm"]["threshold"], 0.0, "alarm.threshold", entry.node_id_str); + alarm.above_threshold = + parse_bool(n["alarm"]["above_threshold"], true, "alarm.above_threshold", entry.node_id_str); + + fd::ThresholdRule rule; + rule.fault = {alarm.fault_code, alarm.severity, alarm.message}; + rule.threshold = alarm.threshold; + rule.above = alarm.above_threshold; + entry.detection = std::move(rule); + entry.alarm = std::move(alarm); + } } else if (n["status_bits"]) { fd::StatusWordRule rule; // Optional source register width: mask off sign-extended high bits so a @@ -433,9 +476,12 @@ bool NodeMap::load(const std::string & yaml_path) { entry.node_id_str.c_str(), br.bit, rule.width); continue; } - br.fault = {b["fault_code"].as(), - validate_severity(b["severity"].as("ERROR"), entry.node_id_str), - b["message"].as(b["fault_code"].as())}; + auto bit_fault_code = parse_string(b["fault_code"], "status_bits.fault_code", entry.node_id_str); + if (!bit_fault_code) { + continue; // wrong-typed fault_code: skip just this bit + } + br.fault = {*bit_fault_code, validate_severity(b["severity"].as("ERROR"), entry.node_id_str), + b["message"].as(*bit_fault_code)}; rule.bits.push_back(std::move(br)); } } @@ -467,9 +513,12 @@ bool NodeMap::load(const std::string & yaml_path) { entry.node_id_str.c_str()); continue; } - er.fault = {c["fault_code"].as(), - validate_severity(c["severity"].as("ERROR"), entry.node_id_str), - c["message"].as(c["fault_code"].as())}; + auto code_fault_code = parse_string(c["fault_code"], "fault_enum.fault_code", entry.node_id_str); + if (!code_fault_code) { + continue; // wrong-typed fault_code: skip just this enum code + } + er.fault = {*code_fault_code, validate_severity(c["severity"].as("ERROR"), entry.node_id_str), + c["message"].as(*code_fault_code)}; rule.codes.push_back(std::move(er)); } } @@ -477,9 +526,14 @@ bool NodeMap::load(const std::string & yaml_path) { // Catch-all: a non-ok value matching no configured code stays visible // rather than reading as healthy. Code is YAML-overridable, else // derived deterministically from the point. (#481) - rule.unknown_fault.fault_code = n["fault_enum"]["unknown_fault_code"] - ? n["fault_enum"]["unknown_fault_code"].as() - : derive_unknown_code(entry.entity_id, entry.data_name); + if (n["fault_enum"]["unknown_fault_code"]) { + auto override_code = + parse_string(n["fault_enum"]["unknown_fault_code"], "fault_enum.unknown_fault_code", entry.node_id_str); + rule.unknown_fault.fault_code = + override_code ? *override_code : derive_unknown_code(entry.entity_id, entry.data_name); + } else { + rule.unknown_fault.fault_code = derive_unknown_code(entry.entity_id, entry.data_name); + } rule.unknown_fault.severity = validate_severity(n["fault_enum"]["unknown_severity"].as("ERROR"), entry.node_id_str); rule.unknown_fault.message = n["fault_enum"]["unknown_message"].as(""); @@ -545,81 +599,76 @@ bool NodeMap::load(const std::string & yaml_path) { } } - // Fault-code collision validation. Two distinct concerns are checked here - // against the full set of fault codes the polled detection entries can - // emit (threshold + every status_bits bit + every fault_enum code), not - // just ``nodes[*].alarm`` as the original check did. + // Global fault-code uniqueness validation. fault_manager keys and clears + // faults by ``fault_code`` ALONE (clear_fault(fault_code) / + // get_fault(fault_code)); the poller shares one ``FaultTransitionTracker`` + // that is likewise keyed by code alone. So a fault_code is a global + // identifier and must be unique across EVERY source that can emit it, + // regardless of entity_id: // - // 1. Global uniqueness of every emitted detection fault_code. The - // poller shares one ``FaultTransitionTracker`` keyed by fault_code - // alone, and clears are global by code, but evaluation runs - // per-entry. Two detection entries emitting the same fault_code - // (even on different entities) would therefore flap raise+clear - // every poll cycle. Reject duplicates so the intent - one code, one - // source - is documented and enforced at load. + // * every polled detection fault (threshold + each status_bits bit + + // each fault_enum code + the enum catch-all), and + // * every native ``event_alarms`` subscription (issue #386). // - // 2. Cross-pipeline collision with native ``event_alarms`` on the same - // ``(entity_id, fault_code)`` (bburda review on PR #387). The - // threshold/bit/enum polling path and the AlarmCondition - // subscription path produce different semantics (debounced vs - // state-machine driven), so fault_manager receiving both a polled - // report and a state-machine report for one SOVD fault flaps and is - // impossible to debug at runtime. + // Two sources sharing a code - even a polled code on entity_a and an + // event_alarms code on entity_b - would collide at fault_manager: one + // source's clear wipes the other's fault, or the two flap raise/clear + // every cycle. The (entity_id, fault_code) pair the earlier check keyed on + // is NOT sufficient, because the fault manager never sees entity_id in its + // key. Reject the whole file at load with an actionable error so the intent + // - one code, one source - is enforced before anything runs. { namespace fd = ros2_medkit::fault_detection; - // (entity_id, fault_code) for every fault a detection entry can emit. - std::vector> detection_faults; + struct EmittedFault { + std::string entity_id; + const char * pipeline; // "polled detection" or "event_alarms" + }; + std::unordered_map seen; // fault_code -> first source + auto check_unique = [&](const std::string & code, const std::string & entity_id, const char * pipeline) -> bool { + auto [it, inserted] = seen.emplace(code, EmittedFault{entity_id, pipeline}); + if (!inserted) { + RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), + "fault_code '%s' is emitted by more than one source (%s on entity '%s' and %s on " + "entity '%s'); fault codes must be globally unique across all detection entries and " + "event_alarms because the shared fault manager is keyed by code alone (a collision " + "clears the other source's fault or flaps raise/clear) - rename one of them", + code.c_str(), it->second.pipeline, it->second.entity_id.c_str(), pipeline, entity_id.c_str()); + return false; + } + return true; + }; + for (const auto & entry : entries_) { if (!entry.detection.has_value()) { continue; } + bool ok = true; std::visit( [&](const auto & rule) { using T = std::decay_t; if constexpr (std::is_same_v) { - detection_faults.emplace_back(entry.entity_id, rule.fault.fault_code); + ok = ok && check_unique(rule.fault.fault_code, entry.entity_id, "polled detection"); } else if constexpr (std::is_same_v) { for (const auto & b : rule.bits) { - detection_faults.emplace_back(entry.entity_id, b.fault.fault_code); + ok = ok && check_unique(b.fault.fault_code, entry.entity_id, "polled detection"); } } else if constexpr (std::is_same_v) { for (const auto & e : rule.codes) { - detection_faults.emplace_back(entry.entity_id, e.fault.fault_code); + ok = ok && check_unique(e.fault.fault_code, entry.entity_id, "polled detection"); } if (!rule.unknown_fault.fault_code.empty()) { - detection_faults.emplace_back(entry.entity_id, rule.unknown_fault.fault_code); + ok = ok && check_unique(rule.unknown_fault.fault_code, entry.entity_id, "polled detection"); } } }, *entry.detection); - } - - // 1. Global uniqueness by fault_code across all detection entries. - std::set seen_codes; - for (const auto & fault : detection_faults) { - if (!seen_codes.insert(fault.second).second) { - RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), - "fault_code '%s' is emitted by more than one detection entry " - "(threshold/status_bits/fault_enum); fault codes must be globally unique because " - "the shared fault tracker is keyed by code alone (a collision flaps raise/clear " - "every poll cycle) - rename one of them", - fault.second.c_str()); + if (!ok) { return false; } } - // 2. Cross-pipeline collision with event_alarms by (entity_id, fault_code). - std::set> event_keys; for (const auto & cfg : event_alarms_) { - event_keys.emplace(cfg.entity_id, cfg.fault_code); - } - for (const auto & [entity_id, code] : detection_faults) { - if (event_keys.count({entity_id, code}) > 0) { - RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), - "Duplicate (entity_id=%s, fault_code=%s) declared by both a polled detection entry " - "(threshold/status_bits/fault_enum) and event_alarms[*] (subscription mode) - " - "mutually exclusive", - entity_id.c_str(), code.c_str()); + if (!check_unique(cfg.fault_code, cfg.entity_id, "event_alarms")) { return false; } } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp index 3ccef8733..32f4cbd63 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp @@ -29,6 +29,7 @@ #include #include #include +#include namespace ros2_medkit_gateway { @@ -143,11 +144,17 @@ void OpcuaPlugin::configure(const nlohmann::json & config) { if (!node_map_path_.empty()) { if (!node_map_.load(node_map_path_)) { + // FATAL: a node map that fails to load or validate (duplicate/colliding + // fault codes, conflicting detection modes, oversized file, ...) must + // NOT run. The global fault-code uniqueness that prevents raise/clear + // flapping is only enforced at load, so starting the poller against a + // rejected map would defeat it. Throw so PluginManager disables the + // plugin instead of proceeding to set_context()/starting the poller. log_error("Failed to load node map from: " + node_map_path_); - } else { - log_info("Loaded node map with " + std::to_string(node_map_.entries().size()) + - " entries from: " + node_map_path_); + throw std::runtime_error("OPC-UA node map failed to load or validate: " + node_map_path_ + + " (see preceding log line for the specific rejection)"); } + log_info("Loaded node map with " + std::to_string(node_map_.entries().size()) + " entries from: " + node_map_path_); } } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp index 5970f76ff..ba6a0ae24 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp @@ -861,6 +861,113 @@ component_id: test EXPECT_EQ(map.detection_entries().size(), 3u); } +TEST_F(NodeMapTest, RejectsCodeAcrossPipelinesDifferentEntities) { + // Global-by-code uniqueness (issue #481): a polled detection code on + // entity_a and an event_alarms code on entity_b share the SAME fault_code. + // fault_manager keys and clears by code alone, so the two collide even + // though the entities differ; the (entity_id, fault_code) pair the earlier + // guard keyed on let this through. The loader must reject the whole file. + std::string path = "/tmp/test_node_map_cross_entity_pipeline.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: entity_a + data_name: pressure + alarm: + fault_code: SHARED_CODE + threshold: 90.0 +event_alarms: + - alarm_source: "ns=2;s=Alarms.Something" + entity_id: entity_b + fault_code: SHARED_CODE +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + +TEST_F(NodeMapTest, MalformedDetectionStringSkipsRuleNotFile) { + // issue #481: a wrong-typed string field (here a sequence where a scalar + // fault_code is expected) must warn and skip just that rule, not throw + // YAML::TypedBadConversion into the outer catch and discard every other + // node in the file. + std::string path = "/tmp/test_node_map_malformed_string.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=BadEnum" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + codes: + - code: 5 + fault_code: [not, a, scalar] + - node_id: "ns=1;s=GoodTemp" + entity_id: tank + data_name: temp + alarm: + fault_code: GOOD_HIGH_TEMP + threshold: 80.0 +)"; + f.close(); + + NodeMap map; + // Whole file still loads; the malformed enum code is dropped so that node + // just has no detection, and the second valid node keeps its rule. + ASSERT_TRUE(map.load(path)); + EXPECT_EQ(map.entries().size(), 2u); + const auto * bad = map.find_by_node_id("ns=1;s=BadEnum"); + ASSERT_NE(bad, nullptr); + EXPECT_FALSE(bad->detection.has_value()); + const auto * good = map.find_by_node_id("ns=1;s=GoodTemp"); + ASSERT_NE(good, nullptr); + ASSERT_TRUE(good->detection.has_value()); + EXPECT_TRUE(std::holds_alternative(*good->detection)); +} + +TEST_F(NodeMapTest, AlarmMissingFaultCodeSkipsDetectionNotFile) { + // issue #481: an ``alarm:`` block with no fault_code used to abort the whole + // file via an unguarded .as(). Now it warns, skips detection + // for that point, and keeps loading the rest. + std::string path = "/tmp/test_node_map_alarm_no_code.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: tank + data_name: pressure + alarm: + threshold: 90.0 + - node_id: "ns=1;i=2" + entity_id: tank + data_name: level + alarm: + fault_code: LEVEL_LOW + threshold: 10.0 + above_threshold: false +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + EXPECT_EQ(map.entries().size(), 2u); + const auto * p = map.find_by_node_id("ns=1;i=1"); + ASSERT_NE(p, nullptr); + EXPECT_FALSE(p->detection.has_value()); + const auto * l = map.find_by_node_id("ns=1;i=2"); + ASSERT_NE(l, nullptr); + EXPECT_TRUE(l->detection.has_value()); +} + TEST_F(NodeMapTest, StatusBitOutOfRangeSkipped) { // A bit position >= 64 can never be set in the 64-bit decode register, so it // is dead config: warn and drop the bit, keep the rest. (issue #481) diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_plugin.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_plugin.cpp index bda73f832..0d8ee6728 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_plugin.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_plugin.cpp @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -294,4 +295,38 @@ TEST_F(OpcuaPluginTest, GetFaultNotFound) { EXPECT_EQ(result.error().code, FaultProviderError::FaultNotFound); } +// -- configure() validation (issue #481) -- + +TEST(OpcuaPluginConfigureTest, ThrowsOnInvalidNodeMap) { + // A node map that fails validation (here a duplicate fault_code) must make + // configure() throw so PluginManager disables the plugin instead of starting + // the poller against a map whose global fault-code uniqueness was rejected. + std::string path = "/tmp/test_opcua_plugin_invalid_nodemap.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: ent_a + data_name: a + alarm: + fault_code: DUP_CODE + threshold: 1.0 + - node_id: "ns=1;i=2" + entity_id: ent_b + data_name: b + alarm: + fault_code: DUP_CODE + threshold: 2.0 +)"; + f.close(); + + OpcuaPlugin plugin; + nlohmann::json config; + config["node_map_path"] = path; + config["endpoint_url"] = "opc.tcp://nonexistent:4840"; + EXPECT_THROW(plugin.configure(config), std::runtime_error); +} + } // namespace ros2_medkit_gateway From 1ca393edaffdab09906e5522e3a901188994ec7f Mon Sep 17 00:00:00 2001 From: mfaferek93 Date: Wed, 1 Jul 2026 21:53:55 +0200 Subject: [PATCH 8/8] Guard event_alarms required strings against whole-file abort A present-but-wrong-typed alarm_source/entity_id/fault_code used an unguarded .as() that threw into the outer catch, discarding every valid node and disabling the plugin. Route the three through the same parse_string() warn+skip helper as the nodes block so one malformed entry is skipped instead. Refs #481 --- .../ros2_medkit_opcua/src/node_map.cpp | 17 +++++++-- .../ros2_medkit_opcua/test/test_node_map.cpp | 36 +++++++++++++++++++ 2 files changed, 50 insertions(+), 3 deletions(-) diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp index 225964978..806c10531 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp @@ -571,11 +571,22 @@ bool NodeMap::load(const std::string & yaml_path) { "event_alarms[%zu] missing alarm_source/entity_id/fault_code - skipping", i); continue; } + // Required strings routed through the same guarded read as the nodes + // block: a present-but-wrong-typed value warns and skips just this + // event_alarms entry rather than throwing into the outer catch, which + // would discard every valid node and disable the plugin. (#481) + const std::string alarm_label = "event_alarms[" + std::to_string(i) + "]"; + auto alarm_source = parse_string(a["alarm_source"], "alarm_source", alarm_label); + auto alarm_entity_id = parse_string(a["entity_id"], "entity_id", alarm_label); + auto alarm_fault_code = parse_string(a["fault_code"], "fault_code", alarm_label); + if (!alarm_source || !alarm_entity_id || !alarm_fault_code) { + continue; + } AlarmEventConfig cfg; - cfg.source_node_id_str = a["alarm_source"].as(); + cfg.source_node_id_str = *alarm_source; cfg.source_node_id = parse_node_id(cfg.source_node_id_str); - cfg.entity_id = a["entity_id"].as(); - cfg.fault_code = a["fault_code"].as(); + cfg.entity_id = *alarm_entity_id; + cfg.fault_code = *alarm_fault_code; cfg.severity_override = a["severity_override"].as(""); cfg.message_override = a["message"].as(""); event_alarms_.push_back(std::move(cfg)); diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp index ba6a0ae24..d5d329fb5 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp @@ -1287,6 +1287,42 @@ component_id: test EXPECT_EQ(r.codes[0].code, 10); } +TEST_F(NodeMapTest, WrongTypedEventAlarmStringSkipsEntryNotFile) { + // A present-but-wrong-typed required string on one ``event_alarms`` entry + // must skip just that entry, consistent with the nodes block, rather than + // throwing into the outer catch, which would discard every valid node and + // alarm and disable the whole plugin. (#481) + std::string path = "/tmp/test_node_map_bad_event_alarm.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Pressure" + entity_id: tank_process + data_name: pressure + alarm: + fault_code: PLC_OVERPRESSURE + threshold: 90.0 +event_alarms: + - alarm_source: "ns=2;s=Alarms.Bad" + entity_id: [not, a, string] + fault_code: BAD_ALARM + - alarm_source: "ns=2;s=Alarms.Good" + entity_id: tank_process + fault_code: PLC_OVERHEAT +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + // Valid node survived the malformed event_alarms entry. + ASSERT_EQ(map.detection_entries().size(), 1u); + // Only the well-formed alarm was kept; the wrong-typed one was skipped. + ASSERT_EQ(map.event_alarms().size(), 1u); + EXPECT_EQ(map.event_alarms()[0].fault_code, "PLC_OVERHEAT"); +} + TEST_F(NodeMapTest, FaultEnumGetsCatchAllUnknownFault) { std::string path = "/tmp/test_node_map_enum_catchall.yaml"; std::ofstream f(path);