diff --git a/src/ros2_medkit_fault_detection/CMakeLists.txt b/src/ros2_medkit_fault_detection/CMakeLists.txt new file mode 100644 index 000000000..e094c9f3b --- /dev/null +++ b/src/ros2_medkit_fault_detection/CMakeLists.txt @@ -0,0 +1,70 @@ +# Copyright 2026 mfaferek93 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +cmake_minimum_required(VERSION 3.8) +project(ros2_medkit_fault_detection) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +find_package(ros2_medkit_cmake REQUIRED) +include(ROS2MedkitWarnings) + +find_package(ament_cmake REQUIRED) + +# Header-only INTERFACE library: the detection logic is compiled directly into +# each consuming plugin (see header rationale). +add_library(ros2_medkit_fault_detection INTERFACE) +target_include_directories(ros2_medkit_fault_detection INTERFACE + $ + $ +) +target_compile_features(ros2_medkit_fault_detection INTERFACE cxx_std_17) + +install(TARGETS ros2_medkit_fault_detection + EXPORT export_${PROJECT_NAME} +) +install(DIRECTORY include/ + DESTINATION include +) + +ament_export_targets(export_${PROJECT_NAME}) +ament_export_include_directories(include) + +if(BUILD_TESTING) + find_package(ament_lint_auto REQUIRED) + # clang_format is registered explicitly below; excluding it here prevents + # ament_lint_auto from registering a second clang_format test. + list(APPEND AMENT_LINT_AUTO_EXCLUDE + ament_cmake_uncrustify + ament_cmake_cpplint + ament_cmake_clang_tidy + ament_cmake_clang_format) + ament_lint_auto_find_test_dependencies() + + find_package(ament_cmake_clang_format QUIET) + if(ament_cmake_clang_format_FOUND) + file(GLOB_RECURSE _format_files "include/*.hpp" "test/*.cpp") + ament_clang_format(${_format_files} + CONFIG_FILE "${CMAKE_CURRENT_SOURCE_DIR}/../../.clang-format") + endif() + + find_package(ament_cmake_gtest REQUIRED) + ament_add_gtest(test_fault_detection test/test_fault_detection.cpp) + target_link_libraries(test_fault_detection ros2_medkit_fault_detection) + + ros2_medkit_relax_vendor_warnings() +endif() + +ament_package() diff --git a/src/ros2_medkit_fault_detection/README.md b/src/ros2_medkit_fault_detection/README.md new file mode 100644 index 000000000..55c9ec513 --- /dev/null +++ b/src/ros2_medkit_fault_detection/README.md @@ -0,0 +1,55 @@ +# ros2_medkit_fault_detection + +Shared, protocol-agnostic fault-detection model for medkit gateway plugins. +A single evaluator maps a raw value read from any source (OPC UA, S7, Modbus, +ADS, ...) to the set of faults it implies, so every protocol plugin detects +faults the same way. + +## Detection modes + +Composable per mapped point in a plugin's node map: + +- **threshold** (`ThresholdRule`) - numeric above/below a setpoint. A boolean + point is alarm-on-true. +- **status word** (`StatusWordRule`) - decode named bits of an integer status + register; one fault per configured bit. +- **fault enum** (`EnumMapRule`) - map a fault-code register value to a fault + code + text; one fault per configured code, with an `ok_value` meaning + "no fault". + +## API + +```cpp +#include "ros2_medkit_fault_detection/fault_detection.hpp" +namespace fd = ros2_medkit::fault_detection; + +// Pure evaluator: value + rule -> set of (active/inactive) fault signals. +std::vector signals = fd::evaluate(value, rule); + +// Stateful raise/clear edge detector over successive evaluations. +fd::FaultTransitionTracker tracker; +std::vector transitions = tracker.apply(signals); // raises + clears +``` + +`evaluate` reports the full set of faults a rule governs (each flagged active +or inactive). `FaultTransitionTracker` keeps the last-known state per +`fault_code` and emits only the raise/clear edges, which a plugin forwards to +the fault manager as report/clear. + +## Placement and packaging + +This is a **header-only INTERFACE package**: the detection logic is compiled +directly into each consuming plugin. Protocol plugins are built as MODULE +libraries that resolve gateway symbols from the host process at load time +(`-Wl,--unresolved-symbols=ignore-all`); a separately linked shared library +would not be present in that host, so the logic is compiled in instead. The +header has no ROS or protocol dependencies, which keeps the evaluator a pure, +trivially unit-testable function. + +It lives in the open `ros2_medkit` repo so both the open OPC UA plugin +(`ros2_medkit_opcua`) and future protocol plugins can depend on it. + +## Consumers + +`ros2_medkit_opcua` lowers its `alarm:`, `status_bits:` and `fault_enum:` +node-map blocks onto `DetectionRule` and evaluates them through this module. diff --git a/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp b/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp new file mode 100644 index 000000000..045ddcc69 --- /dev/null +++ b/src/ros2_medkit_fault_detection/include/ros2_medkit_fault_detection/fault_detection.hpp @@ -0,0 +1,297 @@ +// Copyright 2026 mfaferek93 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +/// Shared fault-detection model for ros2_medkit protocol plugins. +/// +/// A single, protocol-agnostic evaluator maps a raw value read from any +/// source (OPC UA, S7, Modbus, ADS, ...) into the set of faults it implies, +/// using one of three composable detection modes: +/// +/// * threshold - numeric above/below a setpoint. +/// * status word - decode named bits of an integer status register. +/// * fault enum - map a fault-code register value to a fault code + text. +/// +/// The evaluator (``evaluate``) is a pure function of ``(value, rule)`` and +/// has no ROS / protocol dependencies, so it is trivially unit-testable and +/// safe to compile into a dlopen-loaded plugin MODULE. Raise/clear edge +/// detection is layered on top via ``FaultTransitionTracker``. +/// +/// Header-only on purpose: protocol plugins are built as MODULE libraries +/// that resolve gateway symbols from the host process at load time +/// (``--unresolved-symbols=ignore-all``); a separately linked shared library +/// would not be present in that host, so the detection logic is compiled +/// directly into each plugin instead. + +#include +#include +#include +#include +#include +#include +#include + +namespace ros2_medkit::fault_detection { + +/// A raw value read from a protocol source. +using Value = std::variant; + +/// The identity + presentation of a fault a rule can raise. +struct FaultSpec { + std::string fault_code; + std::string severity{"ERROR"}; // "INFO" / "WARNING" / "ERROR" / "CRITICAL" + std::string message; // human-readable text; defaults to fault_code +}; + +/// Numeric threshold: raise ``fault`` when the value crosses ``threshold``. +struct ThresholdRule { + FaultSpec fault; + double threshold{0.0}; + bool above{true}; // true: active when value > threshold; false: value < threshold +}; + +/// One named bit of a status word. +struct BitRule { + unsigned bit{0}; // bit position, 0 = least significant + FaultSpec fault; +}; + +/// Status-word decode: one integer register, many named-bit faults. +struct StatusWordRule { + std::vector bits; + /// Source register width in bits. 0 = treat the decoded value as a full + /// 64-bit word (no masking). When set in [1, 63] the raw value is masked to + /// that width before decode, so a signed register read with its sign bit set + /// (which sign-extends into int64) does not light up spurious high bits. + unsigned width{0}; +}; + +/// One entry of a fault-code enum register. +struct EnumRule { + std::int64_t code{0}; + FaultSpec fault; +}; + +/// Fault-code enum decode: a register value selects at most one fault. +struct EnumMapRule { + std::vector codes; + std::int64_t ok_value{0}; // value meaning "no fault" (no rule raised) + /// Catch-all raised when the register reports a non-ok value that matches no + /// configured code (firmware revisions routinely add codes the config has + /// not enumerated). Empty ``fault_code`` disables the catch-all. When its + /// ``message`` is empty the evaluator fills in "unmapped fault code ". + FaultSpec unknown_fault{}; +}; + +/// One of the composable detection modes for a single mapped point. +using DetectionRule = std::variant; + +/// A fault the evaluator says is currently active or inactive for a value. +/// The evaluator reports the full set of faults a rule governs (so a cleared +/// fault is reported as ``active == false``); edge detection is done by +/// ``FaultTransitionTracker``. +struct FaultSignal { + std::string fault_code; + std::string severity; + std::string message; + bool active{false}; +}; + +namespace detail { + +/// Coerce a value to double. bool -> 0/1, integers/doubles as-is. Strings are +/// not numerically comparable here and yield ``false`` (handled by caller). +inline bool as_double(const Value & v, double & out) { + return std::visit( + [&out](auto && x) -> bool { + using T = std::decay_t; + if constexpr (std::is_same_v) { + out = x ? 1.0 : 0.0; + return true; + } else if constexpr (std::is_same_v || std::is_same_v) { + out = static_cast(x); + return true; + } else { + return false; // string + } + }, + v); +} + +/// Coerce a value to a 64-bit integer for bit / enum decode. bool -> 0/1, +/// int as-is, double rounded to nearest. Non-finite doubles (NaN / inf) and +/// doubles outside the int64 range are *undecidable* and yield ``false`` so the +/// caller can hold state rather than fabricate garbage bits / enum codes (e.g. +/// llround(+inf) is INT64_MAX, which would light up every status-word bit). +/// Strings are not decodable. +inline bool as_int(const Value & v, std::int64_t & out) { + return std::visit( + [&out](auto && x) -> bool { + using T = std::decay_t; + if constexpr (std::is_same_v) { + out = x ? 1 : 0; + return true; + } else if constexpr (std::is_same_v) { + out = x; + return true; + } else if constexpr (std::is_same_v) { + // -2^63 (INT64_MIN, exact in double) .. 2^63 (one past INT64_MAX). + constexpr double kMin = -9223372036854775808.0; + constexpr double kMax = 9223372036854775808.0; + if (!std::isfinite(x) || x < kMin || x >= kMax) { + return false; + } + out = static_cast(std::llround(x)); + return true; + } else { + return false; // string + } + }, + v); +} + +inline FaultSignal make_signal(const FaultSpec & spec, bool active) { + FaultSignal s; + s.fault_code = spec.fault_code; + s.severity = spec.severity.empty() ? std::string{"ERROR"} : spec.severity; + s.message = spec.message.empty() ? spec.fault_code : spec.message; + s.active = active; + return s; +} + +} // namespace detail + +/// Evaluate a value against one detection rule. +/// +/// Returns the full set of faults the rule governs, each flagged active or +/// inactive. A threshold rule yields one signal; a status-word rule yields one +/// per configured bit; an enum rule yields one per configured code (the +/// matching code active, the rest inactive) plus an optional catch-all, so a +/// transition tracker can clear the previous code and raise the new one. +/// +/// Undecidable input holds state. A value that cannot be coerced for the rule - +/// a non-finite double (NaN / inf, e.g. a disconnected analog sensor or a +/// div-by-zero on the PLC) or an uncoercible value (a string, a failed numeric +/// conversion) - yields an *empty* vector: the evaluator emits NO signal, so a +/// ``FaultTransitionTracker`` preserves the prior state instead of clearing a +/// standing fault. A bad read must never mask a real alarm. +inline std::vector evaluate(const Value & value, const DetectionRule & rule) { + std::vector out; + + std::visit( + [&](auto && r) { + using T = std::decay_t; + + if constexpr (std::is_same_v) { + if (std::holds_alternative(value)) { + // A boolean point honours the direction: above=true is + // alarm-on-true, above=false is alarm-on-false (a normally-closed + // contact, a watchdog-OK / ready bit whose FAULT state is false). + const bool b = std::get(value); + out.push_back(detail::make_signal(r.fault, r.above ? b : !b)); + } else { + double d = 0.0; + if (detail::as_double(value, d) && std::isfinite(d)) { + out.push_back(detail::make_signal(r.fault, r.above ? (d > r.threshold) : (d < r.threshold))); + } + // else: undecidable (string / NaN / inf) -> emit nothing, hold last state. + } + + } else if constexpr (std::is_same_v) { + std::int64_t raw = 0; + if (!detail::as_int(value, raw)) { + return; // undecidable -> emit nothing, hold last state + } + auto mask = static_cast(raw); + if (r.width > 0 && r.width < 64) { + mask &= (1ULL << r.width) - 1ULL; // ignore sign-extended high bits + } + for (const auto & b : r.bits) { + const bool active = b.bit < 64 && (((mask >> b.bit) & 0x1ULL) != 0ULL); + out.push_back(detail::make_signal(b.fault, active)); + } + + } else if constexpr (std::is_same_v) { + std::int64_t code = 0; + if (!detail::as_int(value, code)) { + return; // undecidable -> emit nothing, hold last state + } + const bool healthy = code == r.ok_value; + bool matched = false; + for (const auto & e : r.codes) { + const bool active = !healthy && code == e.code; + matched = matched || active; + out.push_back(detail::make_signal(e.fault, active)); + } + // Catch-all so a non-ok value with no configured label stays visible + // rather than reading as healthy. + if (!r.unknown_fault.fault_code.empty()) { + const bool active = !healthy && !matched; + FaultSignal s = detail::make_signal(r.unknown_fault, active); + if (active && r.unknown_fault.message.empty()) { + s.message = "unmapped fault code " + std::to_string(code); + } + out.push_back(std::move(s)); + } + } + }, + rule); + + return out; +} + +/// Stateful raise/clear edge detector over a stream of evaluator outputs. +/// +/// Feed each ``evaluate`` result through ``apply``; it returns only the +/// signals whose active state changed since the last call (the raise and +/// clear transitions), which a plugin forwards to the fault manager as +/// report / clear. Keyed by ``fault_code``, so it tracks threshold, bit and +/// enum faults uniformly. +/// +/// Contract: a single tracker may be shared across many points only if every +/// ``fault_code`` is globally unique. Because the key is the code alone, two +/// points emitting the same code would alternately raise and clear it each +/// cycle. Consumers that share one tracker (e.g. the OPC UA poller across all +/// node-map entries) must enforce that uniqueness at config-load time. +class FaultTransitionTracker { + public: + std::vector apply(const std::vector & signals) { + std::vector changes; + for (const auto & s : signals) { + auto it = last_active_.find(s.fault_code); + const bool known = it != last_active_.end(); + if (!known || it->second != s.active) { + last_active_[s.fault_code] = s.active; + // A fault unknown-and-inactive is not a transition (nothing was ever + // raised); only emit a clear for a fault we had previously raised. + if (s.active || known) { + changes.push_back(s); + } + } + } + return changes; + } + + /// Current active state of a fault (false if never seen). + bool active(const std::string & fault_code) const { + auto it = last_active_.find(fault_code); + return it != last_active_.end() && it->second; + } + + private: + std::unordered_map last_active_; +}; + +} // namespace ros2_medkit::fault_detection diff --git a/src/ros2_medkit_fault_detection/package.xml b/src/ros2_medkit_fault_detection/package.xml new file mode 100644 index 000000000..195093ca6 --- /dev/null +++ b/src/ros2_medkit_fault_detection/package.xml @@ -0,0 +1,22 @@ + + + + ros2_medkit_fault_detection + 0.6.0 + Shared, protocol-agnostic fault-detection model (threshold, status-bit, fault-code enum) for medkit gateway plugins + + mfaferek93 + Apache-2.0 + + ament_cmake + ros2_medkit_cmake + + ament_lint_auto + ament_lint_common + ament_cmake_clang_format + ament_cmake_gtest + + + ament_cmake + + diff --git a/src/ros2_medkit_fault_detection/test/test_fault_detection.cpp b/src/ros2_medkit_fault_detection/test/test_fault_detection.cpp new file mode 100644 index 000000000..7859a9a75 --- /dev/null +++ b/src/ros2_medkit_fault_detection/test/test_fault_detection.cpp @@ -0,0 +1,318 @@ +// Copyright 2026 mfaferek93 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "ros2_medkit_fault_detection/fault_detection.hpp" + +#include +#include +#include + +#include + +namespace fd = ros2_medkit::fault_detection; + +namespace { + +const fd::FaultSignal * find(const std::vector & v, const std::string & code) { + for (const auto & s : v) { + if (s.fault_code == code) { + return &s; + } + } + return nullptr; +} + +} // namespace + +// -- Threshold --------------------------------------------------------------- + +TEST(ThresholdRule, AboveSetpointRaises) { + fd::ThresholdRule r; + r.fault = {"HIGH_TEMP", "ERROR", "Over temperature"}; + r.threshold = 80.0; + r.above = true; + + auto hot = fd::evaluate(fd::Value{90.0}, r); + ASSERT_EQ(hot.size(), 1u); + EXPECT_TRUE(hot[0].active); + EXPECT_EQ(hot[0].fault_code, "HIGH_TEMP"); + EXPECT_EQ(hot[0].severity, "ERROR"); + EXPECT_EQ(hot[0].message, "Over temperature"); + + auto ok = fd::evaluate(fd::Value{70.0}, r); + ASSERT_EQ(ok.size(), 1u); + EXPECT_FALSE(ok[0].active); +} + +TEST(ThresholdRule, BelowSetpointRaises) { + fd::ThresholdRule r; + r.fault = {"LOW_LEVEL", "WARNING", ""}; + r.threshold = 100.0; + r.above = false; + + EXPECT_TRUE(fd::evaluate(fd::Value{static_cast(50)}, r)[0].active); + EXPECT_FALSE(fd::evaluate(fd::Value{static_cast(150)}, r)[0].active); + // Empty message falls back to the fault code. + EXPECT_EQ(fd::evaluate(fd::Value{static_cast(50)}, r)[0].message, "LOW_LEVEL"); +} + +TEST(ThresholdRule, BooleanPointIsAlarmOnTrue) { + fd::ThresholdRule r; + r.fault = {"TRIGGER", "ERROR", "Trigger set"}; + r.threshold = 0.0; + r.above = true; + + EXPECT_TRUE(fd::evaluate(fd::Value{true}, r)[0].active); + EXPECT_FALSE(fd::evaluate(fd::Value{false}, r)[0].active); +} + +TEST(ThresholdRule, BooleanHonoursDirection) { + // above=false expresses the common case where logic-true is healthy and the + // FAULT is false (normally-closed contact, watchdog-OK / ready bit). + fd::ThresholdRule r; + r.fault = {"NOT_READY", "ERROR", "Drive not ready"}; + r.threshold = 0.0; + r.above = false; + + EXPECT_TRUE(fd::evaluate(fd::Value{false}, r)[0].active); + EXPECT_FALSE(fd::evaluate(fd::Value{true}, r)[0].active); +} + +TEST(ThresholdRule, NonFiniteValueHoldsState) { + // A disconnected analog sensor reports NaN/inf. The evaluator must emit + // nothing (not active=false), so the tracker holds a previously-raised fault + // instead of a bad read clearing a real alarm. + fd::ThresholdRule r; + r.fault = {"HIGH_TEMP", "ERROR", "hot"}; + r.threshold = 80.0; + r.above = true; + + fd::FaultTransitionTracker tracker; + ASSERT_EQ(tracker.apply(fd::evaluate(fd::Value{90.0}, r)).size(), 1u); + ASSERT_TRUE(tracker.active("HIGH_TEMP")); + + auto nan_out = fd::evaluate(fd::Value{std::numeric_limits::quiet_NaN()}, r); + EXPECT_TRUE(nan_out.empty()); + EXPECT_TRUE(tracker.apply(nan_out).empty()); + EXPECT_TRUE(tracker.active("HIGH_TEMP")); + + auto inf_out = fd::evaluate(fd::Value{std::numeric_limits::infinity()}, r); + EXPECT_TRUE(inf_out.empty()); + EXPECT_TRUE(tracker.apply(inf_out).empty()); + EXPECT_TRUE(tracker.active("HIGH_TEMP")); +} + +// -- Status-word bit decode -------------------------------------------------- + +TEST(StatusWordRule, NamedBitDecode) { + fd::StatusWordRule r; + r.bits = { + {0u, {"E_STOP", "CRITICAL", "Emergency stop"}}, + {3u, {"PUMP_OVERLOAD", "ERROR", "Pump overload"}}, + {7u, {"FILTER_DIRTY", "WARNING", "Filter dirty"}}, + }; + + // 0b1001 -> bit 0 and bit 3 set, bit 7 clear. + auto out = fd::evaluate(fd::Value{static_cast(0b1001)}, r); + ASSERT_EQ(out.size(), 3u); + EXPECT_TRUE(find(out, "E_STOP")->active); + EXPECT_TRUE(find(out, "PUMP_OVERLOAD")->active); + EXPECT_FALSE(find(out, "FILTER_DIRTY")->active); + EXPECT_EQ(find(out, "PUMP_OVERLOAD")->message, "Pump overload"); +} + +TEST(StatusWordRule, AllClearWhenZero) { + fd::StatusWordRule r; + r.bits = {{2u, {"FAULT_A", "ERROR", "A"}}, {5u, {"FAULT_B", "ERROR", "B"}}}; + auto out = fd::evaluate(fd::Value{static_cast(0)}, r); + for (const auto & s : out) { + EXPECT_FALSE(s.active); + } +} + +TEST(StatusWordRule, NonFiniteValueHoldsState) { + // +inf rounded to int64 is INT64_MAX, which would light up every bit; NaN + // rounds to 0 (all clear) and would clear a standing fault. Both must instead + // emit nothing so the tracker preserves prior state. + fd::StatusWordRule r; + r.bits = {{0u, {"E_STOP", "CRITICAL", "Emergency stop"}}, {3u, {"PUMP_OVERLOAD", "ERROR", "Pump overload"}}}; + + fd::FaultTransitionTracker tracker; + ASSERT_EQ(tracker.apply(fd::evaluate(fd::Value{static_cast(0b1)}, r)).size(), 1u); + ASSERT_TRUE(tracker.active("E_STOP")); + + auto inf_out = fd::evaluate(fd::Value{std::numeric_limits::infinity()}, r); + EXPECT_TRUE(inf_out.empty()); // no flood + EXPECT_TRUE(tracker.apply(inf_out).empty()); + EXPECT_TRUE(tracker.active("E_STOP")); + EXPECT_FALSE(tracker.active("PUMP_OVERLOAD")); + + auto nan_out = fd::evaluate(fd::Value{std::numeric_limits::quiet_NaN()}, r); + EXPECT_TRUE(nan_out.empty()); + EXPECT_TRUE(tracker.apply(nan_out).empty()); + EXPECT_TRUE(tracker.active("E_STOP")); +} + +TEST(StatusWordRule, WidthMasksSignExtendedHighBits) { + // A 16-bit signed status word with bit15 set, read as int64, sign-extends and + // sets bits 16..63. A configured width masks them so only in-range bits fire. + fd::StatusWordRule r; + r.width = 16; + r.bits = {{15u, {"REAL_FAULT", "ERROR", "real"}}, {20u, {"SPURIOUS", "ERROR", "spurious"}}}; + + const auto sign_extended = static_cast(0xFFFFFFFFFFFF8000ULL); + auto out = fd::evaluate(fd::Value{sign_extended}, r); + EXPECT_TRUE(find(out, "REAL_FAULT")->active); + EXPECT_FALSE(find(out, "SPURIOUS")->active); + + // Without a width the same value would spuriously fire bit 20. + fd::StatusWordRule wide; + wide.bits = {{20u, {"SPURIOUS", "ERROR", "spurious"}}}; + EXPECT_TRUE(find(fd::evaluate(fd::Value{sign_extended}, wide), "SPURIOUS")->active); +} + +// -- Fault-code enum --------------------------------------------------------- + +TEST(EnumMapRule, CodeMapsToFaultAndText) { + fd::EnumMapRule r; + r.ok_value = 0; + r.codes = { + {10, {"VFD_OVERVOLTAGE", "ERROR", "DC bus overvoltage"}}, + {11, {"VFD_OVERCURRENT", "ERROR", "Output overcurrent"}}, + {12, {"VFD_OVERTEMP", "WARNING", "Heatsink over temperature"}}, + }; + + auto out = fd::evaluate(fd::Value{static_cast(11)}, r); + ASSERT_EQ(out.size(), 3u); + EXPECT_FALSE(find(out, "VFD_OVERVOLTAGE")->active); + EXPECT_TRUE(find(out, "VFD_OVERCURRENT")->active); + EXPECT_EQ(find(out, "VFD_OVERCURRENT")->message, "Output overcurrent"); + EXPECT_FALSE(find(out, "VFD_OVERTEMP")->active); +} + +TEST(EnumMapRule, OkValueRaisesNothing) { + fd::EnumMapRule r; + r.ok_value = 0; + r.codes = {{10, {"VFD_OVERVOLTAGE", "ERROR", "x"}}}; + auto out = fd::evaluate(fd::Value{static_cast(0)}, r); + ASSERT_EQ(out.size(), 1u); + EXPECT_FALSE(out[0].active); +} + +TEST(EnumMapRule, UnmappedNonOkRaisesCatchAll) { + // A non-ok value with no configured label must stay visible via the catch-all + // (firmware adds codes the config has not enumerated), not read as healthy. + fd::EnumMapRule r; + r.ok_value = 0; + r.codes = {{10, {"OVERVOLT", "ERROR", "ov"}}, {11, {"OVERCURR", "ERROR", "oc"}}}; + r.unknown_fault = {"VFD_UNMAPPED", "ERROR", ""}; + + auto out = fd::evaluate(fd::Value{static_cast(99)}, r); + ASSERT_EQ(out.size(), 3u); + EXPECT_FALSE(find(out, "OVERVOLT")->active); + EXPECT_FALSE(find(out, "OVERCURR")->active); + const auto * unk = find(out, "VFD_UNMAPPED"); + ASSERT_NE(unk, nullptr); + EXPECT_TRUE(unk->active); + EXPECT_EQ(unk->message, "unmapped fault code 99"); + + // ok and mapped values keep the catch-all inactive so the tracker clears it. + EXPECT_FALSE(find(fd::evaluate(fd::Value{static_cast(0)}, r), "VFD_UNMAPPED")->active); + auto mapped = fd::evaluate(fd::Value{static_cast(10)}, r); + EXPECT_TRUE(find(mapped, "OVERVOLT")->active); + EXPECT_FALSE(find(mapped, "VFD_UNMAPPED")->active); +} + +TEST(EnumMapRule, NonFiniteValueHoldsState) { + // NaN decodes to 0 == ok_value (reads healthy) and inf to INT64_MAX; either + // would clear a standing enum fault. Both must emit nothing instead. + fd::EnumMapRule r; + r.ok_value = 0; + r.codes = {{10, {"OVERVOLT", "ERROR", "ov"}}}; + r.unknown_fault.fault_code = "UNMAPPED"; + + fd::FaultTransitionTracker tracker; + ASSERT_EQ(tracker.apply(fd::evaluate(fd::Value{static_cast(10)}, r)).size(), 1u); + ASSERT_TRUE(tracker.active("OVERVOLT")); + + auto nan_out = fd::evaluate(fd::Value{std::numeric_limits::quiet_NaN()}, r); + EXPECT_TRUE(nan_out.empty()); + EXPECT_TRUE(tracker.apply(nan_out).empty()); + EXPECT_TRUE(tracker.active("OVERVOLT")); + + EXPECT_TRUE(fd::evaluate(fd::Value{std::numeric_limits::infinity()}, r).empty()); + EXPECT_TRUE(tracker.active("OVERVOLT")); +} + +TEST(Evaluator, UncoercibleStringHoldsState) { + // A string payload (wrong datatype, error string, partial poll) is undecidable + // for every rule kind: emit nothing so a standing fault is preserved. + fd::ThresholdRule t; + t.fault = {"HIGH_TEMP", "ERROR", "hot"}; + t.threshold = 80.0; + EXPECT_TRUE(fd::evaluate(fd::Value{std::string{"sensor error"}}, t).empty()); + + fd::StatusWordRule s; + s.bits = {{0u, {"E_STOP", "CRITICAL", "e"}}}; + EXPECT_TRUE(fd::evaluate(fd::Value{std::string{"bad"}}, s).empty()); + + fd::EnumMapRule e; + e.codes = {{10, {"OVERVOLT", "ERROR", "ov"}}}; + e.unknown_fault.fault_code = "UNK"; + EXPECT_TRUE(fd::evaluate(fd::Value{std::string{"bad"}}, e).empty()); +} + +// -- Raise / clear transitions ---------------------------------------------- + +TEST(TransitionTracker, EmitsRaiseThenClearOnce) { + fd::ThresholdRule r; + r.fault = {"HIGH_TEMP", "ERROR", "hot"}; + r.threshold = 80.0; + r.above = true; + + fd::FaultTransitionTracker tracker; + + // Healthy first read: no transition (nothing was ever raised). + EXPECT_TRUE(tracker.apply(fd::evaluate(fd::Value{70.0}, r)).empty()); + + // Crosses up: one raise. + auto up = tracker.apply(fd::evaluate(fd::Value{90.0}, r)); + ASSERT_EQ(up.size(), 1u); + EXPECT_TRUE(up[0].active); + EXPECT_TRUE(tracker.active("HIGH_TEMP")); + + // Still high: no repeat. + EXPECT_TRUE(tracker.apply(fd::evaluate(fd::Value{95.0}, r)).empty()); + + // Crosses down: one clear. + auto down = tracker.apply(fd::evaluate(fd::Value{60.0}, r)); + ASSERT_EQ(down.size(), 1u); + EXPECT_FALSE(down[0].active); + EXPECT_FALSE(tracker.active("HIGH_TEMP")); +} + +TEST(TransitionTracker, EnumCodeChangeClearsOldRaisesNew) { + fd::EnumMapRule r; + r.ok_value = 0; + r.codes = {{10, {"OVERVOLT", "ERROR", "ov"}}, {11, {"OVERCURR", "ERROR", "oc"}}}; + + fd::FaultTransitionTracker tracker; + tracker.apply(fd::evaluate(fd::Value{static_cast(10)}, r)); // raise OVERVOLT + + // Code switches 10 -> 11: clear OVERVOLT, raise OVERCURR in one step. + auto changes = tracker.apply(fd::evaluate(fd::Value{static_cast(11)}, r)); + ASSERT_EQ(changes.size(), 2u); + EXPECT_FALSE(find(changes, "OVERVOLT")->active); + EXPECT_TRUE(find(changes, "OVERCURR")->active); +} diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt b/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt index d66c3ecce..973882a03 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt @@ -27,6 +27,7 @@ include(ROS2MedkitWarnings) find_package(ament_cmake REQUIRED) find_package(ros2_medkit_gateway REQUIRED) +find_package(ros2_medkit_fault_detection REQUIRED) find_package(ros2_medkit_msgs REQUIRED) find_package(rclcpp REQUIRED) find_package(std_msgs REQUIRED) @@ -98,6 +99,7 @@ target_include_directories(ros2_medkit_opcua_plugin PRIVATE medkit_target_dependencies(ros2_medkit_opcua_plugin ros2_medkit_msgs ros2_medkit_gateway + ros2_medkit_fault_detection rclcpp std_msgs ) @@ -168,6 +170,7 @@ if(BUILD_TESTING) medkit_target_dependencies(test_opcua_plugin ros2_medkit_msgs ros2_medkit_gateway + ros2_medkit_fault_detection rclcpp std_msgs ) @@ -188,6 +191,7 @@ if(BUILD_TESTING) ) medkit_target_dependencies(test_node_map ros2_medkit_gateway + ros2_medkit_fault_detection rclcpp ) target_link_libraries(test_node_map diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md index 9f6f0ac2a..095243ae4 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md @@ -169,13 +169,58 @@ nodes: writable: true # Allow writes via x-plc-operations min_value: 0.0 # Optional: range validation for writes max_value: 100.0 - alarm: # Optional: map to SOVD fault + alarm: # Optional: numeric threshold -> SOVD fault fault_code: PLC_LOW_LEVEL severity: WARNING message: Tank level below minimum threshold: 100.0 above_threshold: false # Alarm when value < threshold + # Status-word bit decode: one integer register, one fault per named bit. + # Mutually exclusive per node with `alarm:` / `fault_enum:`. + - node_id: "ns=2;s=Pump.StatusWord" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 3 # bit position, 0 = least significant + fault_code: PUMP_OVERLOAD + severity: ERROR + message: Pump motor overload + - bit: 7 + fault_code: FILTER_DIRTY + severity: WARNING # message defaults to the fault_code + + # Fault-code enum: a register value selects at most one fault + text. + - node_id: "ns=2;s=Vfd.FaultCode" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + ok_value: 0 # value meaning "no fault" (default 0) + codes: + - code: 10 + fault_code: VFD_OVERVOLTAGE + severity: ERROR + message: DC bus overvoltage + - code: 11 + fault_code: VFD_OVERCURRENT + severity: ERROR + +# The three modes (`alarm`, `status_bits`, `fault_enum`) are evaluated by the +# shared `ros2_medkit_fault_detection` module so every protocol plugin maps raw +# values to faults identically. They are mutually exclusive on a single node. +# +# Status words: expose the register as an UNSIGNED OPC-UA type (Byte / UInt16 / +# UInt32 / UInt64). A signed type (Int16/Int32) whose top bit is set is +# sign-extended when widened to the 64-bit decode register, which sets every +# bit above the register width and raises spurious bit faults. `bit:` positions +# are 0-based and must be < 64; higher positions are dead config, so the loader +# logs a warning and skips that bit rule while still loading the rest of the +# config. Fault codes must be globally unique across all `alarm` / `status_bits` / +# `fault_enum` entries (the loader rejects duplicates); reusing a code on two +# nodes makes its fault flap. + # Native OPC-UA AlarmConditionType events (issue #386). Subscribes to alarms # defined inside the PLC (Siemens Program_Alarm / ProDiag, Beckhoff TF6100, # CodeSys 3.5+, Rockwell via FactoryTalk Linx). Mutually exclusive per entry diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/scripts/run_alarm_tests.sh b/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/scripts/run_alarm_tests.sh index b237069f1..e8f09b6d1 100755 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/scripts/run_alarm_tests.sh +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/scripts/run_alarm_tests.sh @@ -227,7 +227,34 @@ mkdir -p /tmp/alarm_test_config cat >/tmp/alarm_test_config/alarm_nodes.yaml < `` CLI command. Fault codes are globally unique vs the +# event_alarms codes so the shared fault tracker never flaps. +nodes: + - node_id: "ns=2;s=StatusWord" + entity_id: tank_process + data_name: status_word + data_type: int + status_bits: + - bit: 3 + fault_code: PLC_PUMP_OVERLOAD + severity: ERROR + message: "Pump overload" + - node_id: "ns=2;s=FaultCode" + entity_id: tank_process + data_name: fault_code + data_type: int + fault_enum: + ok_value: 0 + codes: + - code: 10 + fault_code: PLC_VFD_OVERVOLTAGE + severity: ERROR + - code: 11 + fault_code: PLC_VFD_OVERCURRENT + severity: ERROR event_alarms: - alarm_source: "ns=2;s=Alarms.Overpressure" entity_id: tank_process @@ -353,6 +380,28 @@ echo "fire SensorLost 900" >&3 wait_until_status PLC_SENSOR_LOST CONFIRMED 30 echo " OK PLC_SENSOR_LOST re-armed after Enable" +echo " [scenario] status-word bit decode (poll path)" +# bit 3 set => value 8 raises PLC_PUMP_OVERLOAD; clearing the word clears it. +echo "set StatusWord 8" >&3 +wait_until_status PLC_PUMP_OVERLOAD CONFIRMED 30 +echo "set StatusWord 0" >&3 +wait_no_fault PLC_PUMP_OVERLOAD 30 +echo " OK PLC_PUMP_OVERLOAD raised on bit3 then cleared" + +echo " [scenario] fault-code enum decode (poll path)" +# code 11 => PLC_VFD_OVERCURRENT; transition to code 10 must raise +# PLC_VFD_OVERVOLTAGE and clear PLC_VFD_OVERCURRENT; ok_value clears both. +echo "set FaultCode 11" >&3 +wait_until_status PLC_VFD_OVERCURRENT CONFIRMED 30 +echo "set FaultCode 10" >&3 +wait_until_status PLC_VFD_OVERVOLTAGE CONFIRMED 30 +wait_no_fault PLC_VFD_OVERCURRENT 30 +echo " OK enum transition 11->10: PLC_VFD_OVERVOLTAGE raised, PLC_VFD_OVERCURRENT cleared" +echo "set FaultCode 0" >&3 +wait_no_fault PLC_VFD_OVERVOLTAGE 30 +wait_no_fault PLC_VFD_OVERCURRENT 30 +echo " OK both VFD faults cleared on ok_value" + echo " [scenario] reconnect re-subscribes after server restart" # This scenario does NOT verify ConditionRefresh re-emit - the test_alarm_server # is in-memory and loses condition state on restart, so the natural diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp index 6b26fe8bc..99162f2a8 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp @@ -22,9 +22,15 @@ #include +#include "ros2_medkit_fault_detection/fault_detection.hpp" + namespace ros2_medkit_gateway { -/// Alarm configuration for a node that maps to a SOVD fault +/// Threshold-mode alarm configuration for a node that maps to a SOVD fault. +/// Retained for backward compatibility with the original ``alarm:`` node-map +/// block; at load time it is also lowered to a shared +/// ``fault_detection::ThresholdRule`` (see ``NodeMapEntry::detection``) so the +/// runtime evaluator is the same one used by status-bit and enum modes. struct AlarmConfig { std::string fault_code; std::string severity; // "ERROR", "WARNING", "INFO" @@ -73,9 +79,15 @@ struct NodeMapEntry { bool writable{false}; // Can be written via x-plc-operations std::optional min_value; // Optional range validation for writes std::optional max_value; - std::optional alarm; // Optional alarm -> fault mapping + std::optional alarm; // Optional threshold alarm -> fault mapping (back-compat) std::string ros2_topic; // ROS 2 topic for value bridging (auto: /plc/{entity}/{name}) + // Shared fault-detection rule for this point: threshold, status-word bit + // decode, or fault-code enum. Built at load time from the ``alarm:``, + // ``status_bits:`` or ``fault_enum:`` node-map block. The poller evaluates + // this through the shared ``fault_detection::evaluate`` regardless of mode. + std::optional detection; + bool has_range() const { return min_value.has_value() && max_value.has_value(); } @@ -118,9 +130,16 @@ class NodeMap { /// Find entry by OPC-UA node ID string const NodeMapEntry * find_by_node_id(const std::string & node_id_str) const; - /// Get all entries that have threshold-based alarm configuration + /// Get all entries that have threshold-based alarm configuration. + /// Test / back-compat only - the live fault-evaluation path is + /// detection_entries(); nothing reads this at runtime. std::vector alarm_entries() const; + /// Get all entries carrying a shared fault-detection rule (threshold, + /// status-word bit decode, or fault-code enum). This is the set the poller + /// evaluates each cycle. + std::vector detection_entries() const; + /// Get all native OPC-UA AlarmConditionType event-mode entries (issue #386). const std::vector & event_alarms() const { return event_alarms_; diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp index 5141243d7..91f03f568 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp @@ -112,8 +112,8 @@ class OpcuaPlugin : public ros2_medkit_gateway::GatewayPlugin, void handle_plc_operations(const ros2_medkit_gateway::PluginRequest & req, ros2_medkit_gateway::PluginResponse & res); void handle_plc_status(const ros2_medkit_gateway::PluginRequest & req, ros2_medkit_gateway::PluginResponse & res); - // Alarm -> Fault bridge - void on_alarm_change(const std::string & fault_code, const AlarmConfig & config, bool active); + // Fault-detection signal (threshold / status-bit / enum) -> Fault bridge + void on_alarm_change(const std::string & entity_id, const ros2_medkit::fault_detection::FaultSignal & signal); // Issue #386: native AlarmConditionType event lifecycle bridge. void on_event_alarm(const AlarmEventDelivery & delivery); diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp index cbb3a6fb9..82c4738b0 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp @@ -42,9 +42,12 @@ struct PollSnapshot { uint64_t error_count{0}; }; -/// Callback when an alarm state changes (for fault reporting) +/// Callback when a fault-detection signal transitions (for fault reporting). +/// Fires once per raise/clear edge with the hosting entity and the shared +/// ``FaultSignal`` (fault code, severity, message, active) produced by the +/// shared evaluator. Threshold, status-bit and enum modes all use this path. using AlarmChangeCallback = - std::function; + std::function; /// Callback when a native OPC-UA AlarmCondition lifecycle transitions to a /// new SOVD status (issue #386). Fires at most once per logical transition; @@ -166,7 +169,9 @@ class OpcuaPoller { mutable std::mutex alarm_mutex_; AlarmChangeCallback alarm_callback_; - std::unordered_map alarm_states_; // fault_code -> last known state + // Shared raise/clear edge detector across all detection modes, keyed by + // fault_code. Replaces the previous threshold-only last-state map. + ros2_medkit::fault_detection::FaultTransitionTracker alarm_tracker_; // Issue #386: event-mode AlarmCondition state. EventAlarmCallback event_alarm_callback_; diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml b/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml index d4ef796e2..b509fe1ff 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml @@ -18,6 +18,7 @@ std_msgs ros2_medkit_msgs ros2_medkit_gateway + ros2_medkit_fault_detection nlohmann-json-dev yaml_cpp_vendor libssl-dev diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp index 755faafda..75fe68e6b 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp @@ -25,7 +25,9 @@ #include #include #include +#include #include +#include #include #include @@ -197,6 +199,69 @@ bool NodeMap::load(const std::string & yaml_path) { return false; } + const auto logger = rclcpp::get_logger("opcua.node_map"); + + // Defensive scalar decode: when a numeric/boolean field is *present* but + // fails to convert, warn (naming node_id) and fall back to the default + // rather than silently substituting a semantics-changing value (e.g. + // ``threshold: "100 bar"`` -> 0.0 inverts a below-threshold alarm). yaml-cpp + // ``as(fallback)`` cannot tell "absent" from "present but bad". (#481) + auto parse_double = [logger](const YAML::Node & node, double def, const char * field, + const std::string & node_id) -> double { + if (!node) { + return def; + } + try { + return node.as(); + } catch (const YAML::Exception &) { + RCLCPP_WARN(logger, "node_id=%s: non-numeric %s ignored - using default", node_id.c_str(), field); + return def; + } + }; + auto parse_bool = [logger](const YAML::Node & node, bool def, const char * field, + const std::string & node_id) -> bool { + if (!node) { + return def; + } + try { + return node.as(); + } catch (const YAML::Exception &) { + RCLCPP_WARN(logger, "node_id=%s: non-boolean %s ignored - using default", node_id.c_str(), field); + return def; + } + }; + auto parse_int64 = [logger](const YAML::Node & node, std::int64_t def, const char * field, + const std::string & node_id) -> std::int64_t { + if (!node) { + return def; + } + try { + return node.as(); + } catch (const YAML::Exception &) { + RCLCPP_WARN(logger, "node_id=%s: non-integer %s ignored - using default", node_id.c_str(), field); + return def; + } + }; + // Severity must be one of the documented SOVD buckets; warn + default to + // ERROR on a typo (e.g. ``SEVERE`` / wrong case) so it is not misrouted. (#481) + auto validate_severity = [logger](const std::string & sev, const std::string & node_id) -> std::string { + if (sev != "INFO" && sev != "WARNING" && sev != "ERROR" && sev != "CRITICAL") { + RCLCPP_WARN(logger, "node_id=%s: unknown severity '%s' - defaulting to ERROR", node_id.c_str(), sev.c_str()); + return "ERROR"; + } + return sev; + }; + // Deterministic, unique-per-point catch-all code for an unmapped enum value. + auto derive_unknown_code = [](const std::string & entity, const std::string & data) -> std::string { + std::string s = entity + "_" + data + "_UNMAPPED"; + for (auto & c : s) { + c = (std::isalnum(static_cast(c)) != 0) + ? static_cast(std::toupper(static_cast(c))) + : '_'; + } + return s; + }; + for (size_t i = 0; i < nodes.size(); ++i) { const auto & n = nodes[i]; @@ -290,14 +355,141 @@ bool NodeMap::load(const std::string & yaml_path) { "/plc/" + sanitize_topic_segment(entry.entity_id) + "/" + sanitize_topic_segment(entry.data_name); } + // Fault-detection block. Three mutually exclusive modes per point, all + // lowered onto the shared ``fault_detection`` evaluator: + // alarm: numeric threshold (above/below) - original behaviour + // status_bits: decode named bits of an integer status word + // fault_enum: map a fault-code register value to a fault + text + namespace fd = ros2_medkit::fault_detection; + const int detection_modes = (n["alarm"] ? 1 : 0) + (n["status_bits"] ? 1 : 0) + (n["fault_enum"] ? 1 : 0); + if (detection_modes > 1) { + RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), + "Entry node_id=%s declares more than one of alarm/status_bits/fault_enum - " + "these detection modes are mutually exclusive", + entry.node_id_str.c_str()); + return false; + } + if (n["alarm"]) { AlarmConfig alarm; alarm.fault_code = n["alarm"]["fault_code"].as(); - alarm.severity = n["alarm"]["severity"].as("ERROR"); + alarm.severity = validate_severity(n["alarm"]["severity"].as("ERROR"), entry.node_id_str); alarm.message = n["alarm"]["message"].as(alarm.fault_code); - alarm.threshold = n["alarm"]["threshold"].as(0.0); - alarm.above_threshold = n["alarm"]["above_threshold"].as(true); + alarm.threshold = parse_double(n["alarm"]["threshold"], 0.0, "alarm.threshold", entry.node_id_str); + alarm.above_threshold = + parse_bool(n["alarm"]["above_threshold"], true, "alarm.above_threshold", entry.node_id_str); + + fd::ThresholdRule rule; + rule.fault = {alarm.fault_code, alarm.severity, alarm.message}; + rule.threshold = alarm.threshold; + rule.above = alarm.above_threshold; + entry.detection = std::move(rule); entry.alarm = std::move(alarm); + } else if (n["status_bits"]) { + fd::StatusWordRule rule; + // Optional source register width: mask off sign-extended high bits so a + // signed status word read with its sign bit set does not fire spurious + // faults above the real register width. (#481) + if (n["status_word_width"]) { + const auto w = parse_int64(n["status_word_width"], 0, "status_word_width", entry.node_id_str); + if (w >= 1 && w <= 64) { + rule.width = static_cast(w); + } else { + RCLCPP_WARN(logger, "status_word_width=%lld on node_id=%s out of range (1..64) - ignoring", + static_cast(w), entry.node_id_str.c_str()); + } + } + const auto & bits_node = n["status_bits"]; + if (bits_node.IsSequence()) { + for (const auto & b : bits_node) { + if (!b["bit"] || !b["fault_code"]) { + RCLCPP_WARN(logger, "status_bits entry on node_id=%s missing bit/fault_code - skipping", + entry.node_id_str.c_str()); + continue; + } + fd::BitRule br; + // A wrong-typed bit value skips just this bit (consistent with the + // missing-field path above), never aborts the whole file. (#481) + try { + br.bit = b["bit"].as(); + } catch (const YAML::Exception &) { + RCLCPP_WARN(logger, "status_bits entry on node_id=%s has non-integer bit - skipping", + entry.node_id_str.c_str()); + continue; + } + if (br.bit >= 64) { + // The status word is decoded as a 64-bit register, so bits at or + // above 64 can never be set and the rule would be dead config. + RCLCPP_WARN(logger, + "status_bits entry on node_id=%s has bit=%u >= 64 (max status-word width); " + "this fault can never fire - skipping", + entry.node_id_str.c_str(), br.bit); + continue; + } + if (rule.width > 0 && br.bit >= rule.width) { + RCLCPP_WARN(logger, + "status_bits entry on node_id=%s has bit=%u >= status_word_width=%u; " + "this fault can never fire - skipping", + entry.node_id_str.c_str(), br.bit, rule.width); + continue; + } + br.fault = {b["fault_code"].as(), + validate_severity(b["severity"].as("ERROR"), entry.node_id_str), + b["message"].as(b["fault_code"].as())}; + rule.bits.push_back(std::move(br)); + } + } + if (!rule.bits.empty()) { + entry.detection = std::move(rule); + } else { + RCLCPP_WARN(logger, + "status_bits on node_id=%s declared but produced no usable rules - " + "no faults will be detected for this point", + entry.node_id_str.c_str()); + } + } else if (n["fault_enum"]) { + fd::EnumMapRule rule; + rule.ok_value = parse_int64(n["fault_enum"]["ok_value"], 0, "fault_enum.ok_value", entry.node_id_str); + const auto codes = n["fault_enum"]["codes"]; + if (codes && codes.IsSequence()) { + for (const auto & c : codes) { + if (!c["code"] || !c["fault_code"]) { + RCLCPP_WARN(logger, "fault_enum entry on node_id=%s missing code/fault_code - skipping", + entry.node_id_str.c_str()); + continue; + } + fd::EnumRule er; + // A wrong-typed code value skips just this entry, never aborts. (#481) + try { + er.code = c["code"].as(); + } catch (const YAML::Exception &) { + RCLCPP_WARN(logger, "fault_enum entry on node_id=%s has non-integer code - skipping", + entry.node_id_str.c_str()); + continue; + } + er.fault = {c["fault_code"].as(), + validate_severity(c["severity"].as("ERROR"), entry.node_id_str), + c["message"].as(c["fault_code"].as())}; + rule.codes.push_back(std::move(er)); + } + } + if (!rule.codes.empty()) { + // Catch-all: a non-ok value matching no configured code stays visible + // rather than reading as healthy. Code is YAML-overridable, else + // derived deterministically from the point. (#481) + rule.unknown_fault.fault_code = n["fault_enum"]["unknown_fault_code"] + ? n["fault_enum"]["unknown_fault_code"].as() + : derive_unknown_code(entry.entity_id, entry.data_name); + rule.unknown_fault.severity = + validate_severity(n["fault_enum"]["unknown_severity"].as("ERROR"), entry.node_id_str); + rule.unknown_fault.message = n["fault_enum"]["unknown_message"].as(""); + entry.detection = std::move(rule); + } else { + RCLCPP_WARN(logger, + "fault_enum on node_id=%s declared but produced no usable rules - " + "no faults will be detected for this point", + entry.node_id_str.c_str()); + } } size_t idx = entries_.size(); @@ -353,25 +545,81 @@ bool NodeMap::load(const std::string & yaml_path) { } } - // Cross-section collision check (bburda review on PR #387). The two - // alarm pipelines - threshold polling under ``nodes[*].alarm`` and - // native event subscription under ``event_alarms`` - must not address - // the same ``(entity_id, fault_code)``: fault_manager would receive - // both ``report_fault`` calls and the resulting status flapping is - // impossible to debug at runtime. The two paths produce different - // semantics (debounced vs state-machine driven) so the merge is not - // even well-defined. + // Fault-code collision validation. Two distinct concerns are checked here + // against the full set of fault codes the polled detection entries can + // emit (threshold + every status_bits bit + every fault_enum code), not + // just ``nodes[*].alarm`` as the original check did. + // + // 1. Global uniqueness of every emitted detection fault_code. The + // poller shares one ``FaultTransitionTracker`` keyed by fault_code + // alone, and clears are global by code, but evaluation runs + // per-entry. Two detection entries emitting the same fault_code + // (even on different entities) would therefore flap raise+clear + // every poll cycle. Reject duplicates so the intent - one code, one + // source - is documented and enforced at load. + // + // 2. Cross-pipeline collision with native ``event_alarms`` on the same + // ``(entity_id, fault_code)`` (bburda review on PR #387). The + // threshold/bit/enum polling path and the AlarmCondition + // subscription path produce different semantics (debounced vs + // state-machine driven), so fault_manager receiving both a polled + // report and a state-machine report for one SOVD fault flaps and is + // impossible to debug at runtime. { + namespace fd = ros2_medkit::fault_detection; + // (entity_id, fault_code) for every fault a detection entry can emit. + std::vector> detection_faults; + for (const auto & entry : entries_) { + if (!entry.detection.has_value()) { + continue; + } + std::visit( + [&](const auto & rule) { + using T = std::decay_t; + if constexpr (std::is_same_v) { + detection_faults.emplace_back(entry.entity_id, rule.fault.fault_code); + } else if constexpr (std::is_same_v) { + for (const auto & b : rule.bits) { + detection_faults.emplace_back(entry.entity_id, b.fault.fault_code); + } + } else if constexpr (std::is_same_v) { + for (const auto & e : rule.codes) { + detection_faults.emplace_back(entry.entity_id, e.fault.fault_code); + } + if (!rule.unknown_fault.fault_code.empty()) { + detection_faults.emplace_back(entry.entity_id, rule.unknown_fault.fault_code); + } + } + }, + *entry.detection); + } + + // 1. Global uniqueness by fault_code across all detection entries. + std::set seen_codes; + for (const auto & fault : detection_faults) { + if (!seen_codes.insert(fault.second).second) { + RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), + "fault_code '%s' is emitted by more than one detection entry " + "(threshold/status_bits/fault_enum); fault codes must be globally unique because " + "the shared fault tracker is keyed by code alone (a collision flaps raise/clear " + "every poll cycle) - rename one of them", + fault.second.c_str()); + return false; + } + } + + // 2. Cross-pipeline collision with event_alarms by (entity_id, fault_code). std::set> event_keys; for (const auto & cfg : event_alarms_) { event_keys.emplace(cfg.entity_id, cfg.fault_code); } - for (const auto & entry : entries_) { - if (entry.alarm.has_value() && event_keys.count({entry.entity_id, entry.alarm->fault_code}) > 0) { + for (const auto & [entity_id, code] : detection_faults) { + if (event_keys.count({entity_id, code}) > 0) { RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), - "Duplicate (entity_id=%s, fault_code=%s) declared by both nodes[*].alarm " - "(threshold mode) and event_alarms[*] (subscription mode) - mutually exclusive", - entry.entity_id.c_str(), entry.alarm->fault_code.c_str()); + "Duplicate (entity_id=%s, fault_code=%s) declared by both a polled detection entry " + "(threshold/status_bits/fault_enum) and event_alarms[*] (subscription mode) - " + "mutually exclusive", + entity_id.c_str(), code.c_str()); return false; } } @@ -440,6 +688,10 @@ const NodeMapEntry * NodeMap::find_by_node_id(const std::string & node_id_str) c return nullptr; } +// Test / back-compat only: the live fault-evaluation path is +// detection_entries() (see opcua_poller.cpp). This returns just the +// threshold-mode subset carrying the legacy ``alarm`` block and is not consulted +// at runtime; do not wire new callers to it. std::vector NodeMap::alarm_entries() const { std::vector result; for (const auto & entry : entries_) { @@ -450,6 +702,16 @@ std::vector NodeMap::alarm_entries() const { return result; } +std::vector NodeMap::detection_entries() const { + std::vector result; + for (const auto & entry : entries_) { + if (entry.detection.has_value()) { + result.push_back(&entry); + } + } + return result; +} + void NodeMap::build_entity_defs() { entity_defs_.clear(); @@ -466,7 +728,7 @@ void NodeMap::build_entity_defs() { if (entry.writable) { def.writable_names.push_back(entry.data_name); } - if (entry.alarm.has_value()) { + if (entry.detection.has_value()) { def.has_faults = true; } } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp index f26a13e50..e0768c59d 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp @@ -122,6 +122,18 @@ OpcuaValue variant_to_value(const opcua::Variant & var) { if (var.isType()) { return var.getScalarCopy(); } + if (var.isType()) { + // OpcuaValue has no unsigned 64-bit slot; reinterpret the bit pattern as + // int64_t. A plain static_cast of a value above INT64_MAX is only + // implementation-defined under C++17, so copy the bits via memcpy to make + // the reinterpretation well-defined. The raw bits are preserved for + // status-word bit decode (which masks them) and for fault-enum codes that + // fit in int64_t; codes above INT64_MAX read as negative in enum mode. + const uint64_t raw = var.getScalarCopy(); + int64_t signed_bits; + std::memcpy(&signed_bits, &raw, sizeof(signed_bits)); + return signed_bits; + } if (var.isType()) { return var.getScalarCopy(); } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp index e4960df78..3ccef8733 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp @@ -188,9 +188,10 @@ void OpcuaPlugin::set_context(PluginContext & context) { } poller_ = std::make_unique(*client_, node_map_); - poller_->set_alarm_callback([this](const std::string & code, const AlarmConfig & cfg, bool active) { - on_alarm_change(code, cfg, active); - }); + poller_->set_alarm_callback( + [this](const std::string & entity_id, const ros2_medkit::fault_detection::FaultSignal & signal) { + on_alarm_change(entity_id, signal); + }); poller_->set_event_alarm_callback([this](const AlarmEventDelivery & delivery) { on_event_alarm(delivery); }); @@ -491,29 +492,18 @@ void OpcuaPlugin::handle_plc_status(const PluginRequest & req, PluginResponse & // -- Fault bridge -- -void OpcuaPlugin::on_alarm_change(const std::string & fault_code, const AlarmConfig & config, bool active) { +void OpcuaPlugin::on_alarm_change(const std::string & entity_id, + const ros2_medkit::fault_detection::FaultSignal & signal) { if (shutdown_requested_.load()) { return; } - std::string entity_id; - for (const auto & entry : node_map_.entries()) { - if (entry.alarm.has_value() && entry.alarm->fault_code == fault_code) { - entity_id = entry.entity_id; - break; - } - } - - if (entity_id.empty()) { - log_warn("Alarm " + fault_code + " has no entity mapping"); - return; - } - if (active) { - log_info("Alarm activated: " + fault_code + " on " + entity_id); - send_report_fault(entity_id, fault_code, config.severity, config.message); + if (signal.active) { + log_info("Alarm activated: " + signal.fault_code + " on " + entity_id); + send_report_fault(entity_id, signal.fault_code, signal.severity, signal.message); } else { - log_info("Alarm cleared: " + fault_code + " on " + entity_id); - send_clear_fault(fault_code); + log_info("Alarm cleared: " + signal.fault_code + " on " + entity_id); + send_clear_fault(signal.fault_code); } } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp index c4b7213e6..00cf25285 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp @@ -582,53 +582,56 @@ void OpcuaPoller::do_poll() { } void OpcuaPoller::evaluate_alarms() { - auto alarm_entries = node_map_.alarm_entries(); - if (alarm_entries.empty()) { + namespace fd = ros2_medkit::fault_detection; + + auto detection_entries = node_map_.detection_entries(); + if (detection_entries.empty()) { return; } - // Collect state changes while holding snapshot mutex - struct AlarmChange { - std::string fault_code; - AlarmConfig config; - bool active; + // Translate a polled OPC-UA value into the shared, protocol-agnostic value + // type understood by the fault-detection evaluator. + auto to_fd_value = [](const OpcuaValue & v) -> fd::Value { + return std::visit( + [](auto && val) -> fd::Value { + using T = std::decay_t; + if constexpr (std::is_same_v) { + return fd::Value{val}; + } else if constexpr (std::is_same_v) { + return fd::Value{val}; + } else if constexpr (std::is_floating_point_v) { + return fd::Value{static_cast(val)}; + } else { + return fd::Value{static_cast(val)}; + } + }, + v); + }; + + // Collect raise/clear edges while holding the snapshot mutex. + struct FaultChange { + std::string entity_id; + fd::FaultSignal signal; }; - std::vector changes; + std::vector changes; { std::lock_guard snap_lock(snapshot_mutex_); - for (const auto * entry : alarm_entries) { + for (const auto * entry : detection_entries) { auto it = snapshot_.values.find(entry->node_id_str); if (it == snapshot_.values.end()) { continue; } - const auto & alarm = *entry->alarm; - bool active = false; - - std::visit( - [&active, &alarm](auto && val) { - using T = std::decay_t; - if constexpr (std::is_same_v) { - active = val; - } else if constexpr (std::is_arithmetic_v) { - double dval = static_cast(val); - if (alarm.above_threshold) { - active = dval > alarm.threshold; - } else { - active = dval < alarm.threshold; - } - } - }, - it->second); - - auto & prev_state = alarm_states_[alarm.fault_code]; - snapshot_.alarms[alarm.fault_code] = active; - - if (active != prev_state) { - prev_state = active; - changes.push_back({alarm.fault_code, alarm, active}); + auto signals = fd::evaluate(to_fd_value(it->second), *entry->detection); + for (const auto & s : signals) { + snapshot_.alarms[s.fault_code] = s.active; + } + + // Edge-detect raise/clear; only transitions reach the callback. + for (auto & edge : alarm_tracker_.apply(signals)) { + changes.push_back({entry->entity_id, std::move(edge)}); } } } // snapshot_mutex_ released @@ -638,7 +641,7 @@ void OpcuaPoller::evaluate_alarms() { std::lock_guard alarm_lock(alarm_mutex_); if (alarm_callback_) { for (const auto & c : changes) { - alarm_callback_(c.fault_code, c.config, c.active); + alarm_callback_(c.entity_id, c.signal); } } } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/fixtures/test_alarm_server/test_alarm_server.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/fixtures/test_alarm_server/test_alarm_server.cpp index 765394592..bf58e533a 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/fixtures/test_alarm_server/test_alarm_server.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/fixtures/test_alarm_server/test_alarm_server.cpp @@ -136,6 +136,27 @@ UA_StatusCode add_condition(UA_Server * server, const std::string & name, UA_UIn return rc; } +// Add a writable Int32 variable under ObjectsFolder in the user namespace with +// a predictable string NodeId ``ns;s=`` so the gateway's polled +// detection config (``node_id: "ns=2;s=StatusWord"``) can address it. Exercises +// the status_bits / fault_enum poll path that is independent of the +// AlarmConditionType event path above. +UA_StatusCode add_int32_variable(UA_Server * server, const std::string & name, UA_UInt16 ns) { + UA_VariableAttributes attr = UA_VariableAttributes_default; + UA_Int32 initial = 0; + UA_Variant_setScalar(&attr.value, &initial, &UA_TYPES[UA_TYPES_INT32]); + attr.dataType = UA_TYPES[UA_TYPES_INT32].typeId; + attr.accessLevel = UA_ACCESSLEVELMASK_READ | UA_ACCESSLEVELMASK_WRITE; + attr.displayName = UA_LOCALIZEDTEXT(const_cast("en"), const_cast(name.c_str())); + UA_NodeId requested = UA_NODEID_STRING_ALLOC(ns, name.c_str()); + UA_QualifiedName qname = UA_QUALIFIEDNAME(ns, const_cast(name.c_str())); + UA_StatusCode rc = UA_Server_addVariableNode( + server, requested, UA_NODEID_NUMERIC(0, UA_NS0ID_OBJECTSFOLDER), UA_NODEID_NUMERIC(0, UA_NS0ID_ORGANIZES), qname, + UA_NODEID_NUMERIC(0, UA_NS0ID_BASEDATAVARIABLETYPE), attr, nullptr, nullptr); + UA_NodeId_clear(&requested); + return rc; +} + UA_StatusCode set_two_state(UA_Server * server, const UA_NodeId & cond, const char * field, UA_Boolean value) { UA_Variant v; UA_Variant_setScalar(&v, &value, &UA_TYPES[UA_TYPES_BOOLEAN]); @@ -300,7 +321,7 @@ UA_StatusCode handle_enable(UA_Server * server, const Condition & c, bool enable return set_two_state(server, c.node, "EnabledState", enable); } -void cli_loop(UA_Server * server) { +void cli_loop(UA_Server * server, UA_UInt16 ns) { std::string line; while (g_running && std::getline(std::cin, line)) { std::istringstream iss(line); @@ -312,6 +333,28 @@ void cli_loop(UA_Server * server) { break; } std::lock_guard guard(g_mutex); + // ``set `` writes a polled Int32 variable (StatusWord / + // FaultCode). Handled before the condition lookup because these nodes are + // plain variables, not AlarmCondition instances in g_conditions. + if (cmd == "set") { + long val = 0; + if (!(iss >> val)) { + std::cout << "ERR set_missing_value:" << name << std::endl; + continue; + } + UA_Int32 v32 = static_cast(val); + UA_Variant var; + UA_Variant_setScalar(&var, &v32, &UA_TYPES[UA_TYPES_INT32]); + UA_NodeId target = UA_NODEID_STRING_ALLOC(ns, name.c_str()); + UA_StatusCode rc = UA_Server_writeValue(server, target, var); + UA_NodeId_clear(&target); + if (rc == UA_STATUSCODE_GOOD) { + std::cout << "OK " << name << "=" << val << std::endl; + } else { + std::cout << "ERR " << name << ":" << UA_StatusCode_name(rc) << std::endl; + } + continue; + } auto it = g_conditions.find(name); if (cmd != "quit" && it == g_conditions.end()) { std::cout << "ERR unknown_condition:" << name << std::endl; @@ -416,8 +459,18 @@ int main(int argc, char ** argv) { g_conditions[oh.name] = oh; g_conditions[sl.name] = sl; + // Polled Int32 registers for the status_bits / fault_enum detection path + // (independent of the AlarmConditionType events above). Driven via the + // ``set `` CLI command. + if (add_int32_variable(server, "StatusWord", ns) != UA_STATUSCODE_GOOD || + add_int32_variable(server, "FaultCode", ns) != UA_STATUSCODE_GOOD) { + UA_LOG_ERROR(UA_Log_Stdout, UA_LOGCATEGORY_USERLAND, "Failed to register detection variables"); + UA_Server_delete(server); + return 1; + } + std::cout << "READY port=" << port << " namespace=" << ns << std::endl; - std::thread cli(cli_loop, server); + std::thread cli(cli_loop, server, ns); UA_StatusCode rc = UA_Server_run(server, reinterpret_cast(&g_running)); g_running = false; diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp index 218da4843..5970f76ff 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp @@ -16,8 +16,11 @@ #include +#include #include #include +#include +#include namespace ros2_medkit_gateway { @@ -605,4 +608,719 @@ component_id: test EXPECT_FALSE(map.load(path)); } +// -- Shared fault-detection node-map wiring (issue #481) --------------------- + +namespace fd = ros2_medkit::fault_detection; + +TEST_F(NodeMapTest, ThresholdAlarmLoweredToDetectionRule) { + std::string path = "/tmp/test_node_map_threshold_detect.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: ent1 + data_name: temp + alarm: + fault_code: HIGH_TEMP + severity: ERROR + message: "Over temp" + threshold: 80.0 + above_threshold: true +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(det.has_value()); + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + EXPECT_EQ(r.fault.fault_code, "HIGH_TEMP"); + EXPECT_DOUBLE_EQ(r.threshold, 80.0); + EXPECT_TRUE(r.above); +} + +TEST_F(NodeMapTest, StatusBitsParsedIntoDetection) { + std::string path = "/tmp/test_node_map_status_bits.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=StatusWord" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 3 + fault_code: PUMP_OVERLOAD + severity: ERROR + message: "Pump overload" + - bit: 7 + fault_code: FILTER_DIRTY + severity: WARNING +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(det.has_value()); + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + ASSERT_EQ(r.bits.size(), 2u); + EXPECT_EQ(r.bits[0].bit, 3u); + EXPECT_EQ(r.bits[0].fault.fault_code, "PUMP_OVERLOAD"); + EXPECT_EQ(r.bits[1].bit, 7u); + // Message defaults to the fault code when omitted. + EXPECT_EQ(r.bits[1].fault.message, "FILTER_DIRTY"); + + auto signals = fd::evaluate(fd::Value{static_cast(0b1000)}, *det); + ASSERT_EQ(signals.size(), 2u); + EXPECT_TRUE(signals[0].active); // bit 3 set + EXPECT_FALSE(signals[1].active); // bit 7 clear +} + +TEST_F(NodeMapTest, FaultEnumParsedIntoDetection) { + std::string path = "/tmp/test_node_map_fault_enum.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=FaultCode" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + ok_value: 0 + codes: + - code: 10 + fault_code: VFD_OVERVOLTAGE + severity: ERROR + message: "DC bus overvoltage" + - code: 11 + fault_code: VFD_OVERCURRENT + severity: ERROR +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(det.has_value()); + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + EXPECT_EQ(r.ok_value, 0); + ASSERT_EQ(r.codes.size(), 2u); + EXPECT_EQ(r.codes[0].code, 10); + EXPECT_EQ(r.codes[0].fault.message, "DC bus overvoltage"); + + auto signals = fd::evaluate(fd::Value{static_cast(10)}, *det); + // Two configured codes plus the auto-added unmapped catch-all. + ASSERT_EQ(signals.size(), 3u); + EXPECT_TRUE(signals[0].active); // code 10 + EXPECT_FALSE(signals[1].active); // code 11 + EXPECT_FALSE(signals[2].active); // catch-all (a mapped code is not "unmapped") +} + +TEST_F(NodeMapTest, RejectsDuplicateDetectionFaultCode) { + // The poller shares one FaultTransitionTracker keyed by fault_code alone, so + // two detection entries emitting the same code (here on different entities) + // would flap raise/clear every cycle. The loader must reject the whole file + // at load time. (issue #481) + std::string path = "/tmp/test_node_map_dup_fault_code.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: ent_a + data_name: temp_a + alarm: + fault_code: SHARED_CODE + threshold: 80.0 + - node_id: "ns=1;i=2" + entity_id: ent_b + data_name: temp_b + alarm: + fault_code: SHARED_CODE + threshold: 90.0 +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + +TEST_F(NodeMapTest, RejectsDuplicateCodeAcrossStatusBitsAndEnum) { + // Uniqueness spans every emitted code, not just threshold alarms: a bit code + // colliding with an enum code on another node is still a shared-tracker + // collision. (issue #481) + std::string path = "/tmp/test_node_map_dup_mixed.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 2 + fault_code: COLLIDE + - node_id: "ns=1;s=Fault" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + codes: + - code: 7 + fault_code: COLLIDE +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + +TEST_F(NodeMapTest, RejectsStatusBitsCollidingWithEventAlarm) { + // The author's original collision check only looked at nodes[*].alarm; a + // status_bits (or fault_enum) code colliding with an event_alarms + // (entity_id, fault_code) must be rejected too, otherwise fault_manager gets + // both a polled report and a state-machine report for one SOVD fault. + // (issue #481) + std::string path = "/tmp/test_node_map_bits_event_collision.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: tank_process + data_name: status_word + data_type: int + status_bits: + - bit: 1 + fault_code: PLC_OVERPRESSURE +event_alarms: + - alarm_source: "ns=2;s=Alarms.Overpressure" + entity_id: tank_process + fault_code: PLC_OVERPRESSURE +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + +TEST_F(NodeMapTest, AcceptsDistinctCodesAcrossDetectionModes) { + // The uniqueness rule must not false-positive on genuinely distinct codes + // spread across threshold / status_bits / fault_enum entries. + std::string path = "/tmp/test_node_map_distinct_modes.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: tank + data_name: temp + alarm: + fault_code: HIGH_TEMP + threshold: 80.0 + - node_id: "ns=1;s=Status" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 3 + fault_code: PUMP_OVERLOAD + - bit: 7 + fault_code: FILTER_DIRTY + - node_id: "ns=1;s=Fault" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + codes: + - code: 10 + fault_code: VFD_OVERVOLTAGE +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + EXPECT_EQ(map.detection_entries().size(), 3u); +} + +TEST_F(NodeMapTest, StatusBitOutOfRangeSkipped) { + // A bit position >= 64 can never be set in the 64-bit decode register, so it + // is dead config: warn and drop the bit, keep the rest. (issue #481) + std::string path = "/tmp/test_node_map_bit_oob.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 64 + fault_code: NEVER_FIRES + - bit: 3 + fault_code: PUMP_OVERLOAD +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + ASSERT_EQ(r.bits.size(), 1u); + EXPECT_EQ(r.bits[0].bit, 3u); + EXPECT_EQ(r.bits[0].fault.fault_code, "PUMP_OVERLOAD"); +} + +// -- Poller-level shared-tracker integration (issue #481) -------------------- +// +// Exercises the exact loop OpcuaPoller::evaluate_alarms runs: one shared +// FaultTransitionTracker, iterating detection entries built by the real +// NodeMap loader and evaluated by the shared fault_detection evaluator. Proves +// there is no cross-entry flapping and that status-word bits transition +// independently. Driven without a live OPC UA server by feeding synthetic +// values keyed by node_id, mirroring the poller's snapshot. + +namespace { + +struct DetectionChange { + std::string entity_id; + fd::FaultSignal signal; +}; + +// Mirror of OpcuaPoller::evaluate_alarms: for each detection entry, look up its +// value, evaluate, and push the value through the SHARED tracker; return the +// raise/clear edges. Entries whose node has no value yet are skipped. +std::vector evaluate_cycle(const NodeMap & map, fd::FaultTransitionTracker & tracker, + const std::unordered_map & values) { + std::vector changes; + for (const auto * entry : map.detection_entries()) { + auto it = values.find(entry->node_id_str); + if (it == values.end()) { + continue; + } + auto signals = fd::evaluate(it->second, *entry->detection); + for (auto & edge : tracker.apply(signals)) { + changes.push_back({entry->entity_id, std::move(edge)}); + } + } + return changes; +} + +const DetectionChange * find_change(const std::vector & v, const std::string & code) { + for (const auto & c : v) { + if (c.signal.fault_code == code) { + return &c; + } + } + return nullptr; +} + +} // namespace + +TEST_F(NodeMapTest, PollerNoCrossEntryFlapping) { + std::string path = "/tmp/test_node_map_poller_two_entries.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=TempA" + entity_id: ent_a + data_name: temp_a + data_type: float + alarm: + fault_code: A_HIGH + threshold: 80.0 + - node_id: "ns=1;s=TempB" + entity_id: ent_b + data_name: temp_b + data_type: float + alarm: + fault_code: B_HIGH + threshold: 80.0 +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 2u); + + fd::FaultTransitionTracker tracker; + std::unordered_map values; + + // Both healthy: no transitions. + values["ns=1;s=TempA"] = fd::Value{70.0}; + values["ns=1;s=TempB"] = fd::Value{70.0}; + EXPECT_TRUE(evaluate_cycle(map, tracker, values).empty()); + + // A crosses high, B stays healthy: exactly one raise for A. + values["ns=1;s=TempA"] = fd::Value{90.0}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + EXPECT_EQ(changes[0].entity_id, "ent_a"); + EXPECT_EQ(changes[0].signal.fault_code, "A_HIGH"); + EXPECT_TRUE(changes[0].signal.active); + } + + // Steady state with A still high: the shared tracker must NOT clear B (which + // it never raised) nor re-emit A. This is the regression the fix guards. + for (int i = 0; i < 5; ++i) { + EXPECT_TRUE(evaluate_cycle(map, tracker, values).empty()); + } + + // Now B crosses high too: exactly one raise for B, A untouched. + values["ns=1;s=TempB"] = fd::Value{95.0}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + EXPECT_EQ(changes[0].entity_id, "ent_b"); + EXPECT_EQ(changes[0].signal.fault_code, "B_HIGH"); + EXPECT_TRUE(changes[0].signal.active); + } + + // A clears, B stays high: exactly one clear for A. + values["ns=1;s=TempA"] = fd::Value{60.0}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + EXPECT_EQ(changes[0].signal.fault_code, "A_HIGH"); + EXPECT_FALSE(changes[0].signal.active); + } +} + +TEST_F(NodeMapTest, PollerStatusWordBitsTransitionIndependently) { + std::string path = "/tmp/test_node_map_poller_status_word.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=StatusWord" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: 0 + fault_code: PUMP_OVERLOAD + - bit: 3 + fault_code: FILTER_DIRTY +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + + fd::FaultTransitionTracker tracker; + std::unordered_map values; + + // All clear. + values["ns=1;s=StatusWord"] = fd::Value{static_cast(0)}; + EXPECT_TRUE(evaluate_cycle(map, tracker, values).empty()); + + // Bit 0 set only: raise PUMP_OVERLOAD, FILTER_DIRTY stays clear. + values["ns=1;s=StatusWord"] = fd::Value{static_cast(0b0001)}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + const auto * overload = find_change(changes, "PUMP_OVERLOAD"); + ASSERT_NE(overload, nullptr); + EXPECT_TRUE(overload->signal.active); + } + + // Bit 3 also set: raise FILTER_DIRTY only, PUMP_OVERLOAD stays raised. + values["ns=1;s=StatusWord"] = fd::Value{static_cast(0b1001)}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + const auto * dirty = find_change(changes, "FILTER_DIRTY"); + ASSERT_NE(dirty, nullptr); + EXPECT_TRUE(dirty->signal.active); + } + + // Bit 0 clears, bit 3 stays: clear PUMP_OVERLOAD only. + values["ns=1;s=StatusWord"] = fd::Value{static_cast(0b1000)}; + { + auto changes = evaluate_cycle(map, tracker, values); + ASSERT_EQ(changes.size(), 1u); + const auto * overload = find_change(changes, "PUMP_OVERLOAD"); + ASSERT_NE(overload, nullptr); + EXPECT_FALSE(overload->signal.active); + } +} + +// -- Undecidable input, catch-all, and malformed-config handling (issue #481) - + +TEST_F(NodeMapTest, EmptyStatusBitsBlockLoadsWithoutDetection) { + // A declared-but-empty status_bits block yields zero rules. The loader must + // keep the data point but emit no detection (warned), not silently ship a + // non-functional fault map and not reject the whole file. + std::string path = "/tmp/test_node_map_empty_status_bits.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: pump + data_name: status_word + data_type: int + status_bits: [] +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.entries().size(), 1u); + EXPECT_TRUE(map.detection_entries().empty()); +} + +TEST_F(NodeMapTest, FaultEnumWithoutCodesLoadsWithoutDetection) { + std::string path = "/tmp/test_node_map_empty_enum.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Fault" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + ok_value: 0 +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.entries().size(), 1u); + EXPECT_TRUE(map.detection_entries().empty()); +} + +TEST_F(NodeMapTest, WrongTypedBitSkippedNotAborted) { + // A single mistyped bit must skip just that bit (consistent with the + // missing-field path), never tear down the entire node map. + std::string path = "/tmp/test_node_map_bad_bit.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: pump + data_name: status_word + data_type: int + status_bits: + - bit: "three" + fault_code: BAD_BIT + - bit: 3 + fault_code: PUMP_OVERLOAD +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + ASSERT_EQ(r.bits.size(), 1u); + EXPECT_EQ(r.bits[0].fault.fault_code, "PUMP_OVERLOAD"); +} + +TEST_F(NodeMapTest, WrongTypedEnumCodeSkippedNotAborted) { + std::string path = "/tmp/test_node_map_bad_code.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Fault" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + codes: + - code: high + fault_code: BAD_CODE + - code: 10 + fault_code: VFD_OVERVOLTAGE +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + ASSERT_EQ(r.codes.size(), 1u); + EXPECT_EQ(r.codes[0].code, 10); +} + +TEST_F(NodeMapTest, FaultEnumGetsCatchAllUnknownFault) { + std::string path = "/tmp/test_node_map_enum_catchall.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Fault" + entity_id: vfd + data_name: fault_code + data_type: int + fault_enum: + ok_value: 0 + codes: + - code: 10 + fault_code: VFD_OVERVOLTAGE +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + ASSERT_FALSE(r.unknown_fault.fault_code.empty()); + + // An unenumerated non-ok value raises the catch-all rather than reading ok. + auto out = fd::evaluate(fd::Value{static_cast(77)}, *det); + const fd::FaultSignal * unk = nullptr; + for (const auto & s : out) { + if (s.fault_code == r.unknown_fault.fault_code) { + unk = &s; + } + } + ASSERT_NE(unk, nullptr); + EXPECT_TRUE(unk->active); + EXPECT_EQ(unk->message, "unmapped fault code 77"); +} + +TEST_F(NodeMapTest, NonNumericThresholdFallsBackAndLoads) { + // A present-but-unparseable threshold must warn and fall back, not silently + // invert/disable detection and not abort the load. + std::string path = "/tmp/test_node_map_bad_threshold.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: ent1 + data_name: val1 + alarm: + fault_code: BAD_THRESH + threshold: "100 bar" +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_TRUE(map.entries()[0].alarm.has_value()); + EXPECT_DOUBLE_EQ(map.entries()[0].alarm->threshold, 0.0); +} + +TEST_F(NodeMapTest, InvalidSeverityDefaultsToError) { + std::string path = "/tmp/test_node_map_bad_severity.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: ent1 + data_name: val1 + alarm: + fault_code: SEV_TEST + severity: SEVERE + threshold: 50.0 +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_TRUE(map.entries()[0].alarm.has_value()); + EXPECT_EQ(map.entries()[0].alarm->severity, "ERROR"); +} + +TEST_F(NodeMapTest, StatusWordWidthMasksSignExtension) { + std::string path = "/tmp/test_node_map_status_width.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;s=Status" + entity_id: drive + data_name: status_word + data_type: int + status_word_width: 16 + status_bits: + - bit: 15 + fault_code: DRIVE_FAULT +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.detection_entries().size(), 1u); + const auto & det = map.detection_entries()[0]->detection; + ASSERT_TRUE(std::holds_alternative(*det)); + const auto & r = std::get(*det); + EXPECT_EQ(r.width, 16u); + + // bit15 set in a sign-extended 16-bit word: only DRIVE_FAULT, no spurious bits. + const auto sign_extended = static_cast(0xFFFFFFFFFFFF8000ULL); + auto out = fd::evaluate(fd::Value{sign_extended}, *det); + ASSERT_EQ(out.size(), 1u); + EXPECT_TRUE(out[0].active); +} + +TEST_F(NodeMapTest, RejectsMultipleDetectionModesOnOneNode) { + std::string path = "/tmp/test_node_map_multi_mode.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: ent1 + data_name: val1 + alarm: + fault_code: A + threshold: 1.0 + status_bits: + - bit: 0 + fault_code: B +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + } // namespace ros2_medkit_gateway