diff --git a/.github/workflows/opcua-plugin.yml b/.github/workflows/opcua-plugin.yml index 398084079..43cf4a39d 100644 --- a/.github/workflows/opcua-plugin.yml +++ b/.github/workflows/opcua-plugin.yml @@ -256,3 +256,89 @@ jobs: echo "=== ${c} logs ===" docker logs "${c}" 2>&1 | tail -80 || true done + + integration-secured: + name: Integration (Secured A&C) + # Issue #477: proves the OPC-UA client security code's + # apply_security_config() SUCCESS path - a real Basic256Sha256 + + # SignAndEncrypt SecureChannel with a username token against the + # encryption-enabled test_alarm_server fixture. Needs fault_manager_node + # built alongside the gateway, openssl for cert generation, and + # ROS2_MEDKIT_OPCUA_SECURE_REQUIRE=1 so the test runs for real instead of + # skipping with CTest code 77. + runs-on: ubuntu-latest + container: + image: ubuntu:noble + timeout-minutes: 60 + defaults: + run: + shell: bash + steps: + - name: Install Git + run: | + apt-get update + apt-get install -y git + + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Pre-install ROS 2 apt source + uses: ./.github/actions/ros-apt-source + + - name: Set up ROS 2 jazzy + uses: ros-tooling/setup-ros@v0.7 + with: + required-ros-distributions: jazzy + + - name: Install ccache + run: apt-get install -y ccache + + - name: Cache ccache + uses: actions/cache@v4 + with: + path: /root/.cache/ccache + key: ccache-opcua-secured-${{ github.sha }} + restore-keys: | + ccache-opcua-secured- + + - name: Install dependencies + run: | + apt-get update + # openssl: cert generation. libssl-dev: encryption backend for both + # the plugin's open62541pp and the fixture's static open62541. + apt-get install -y ros-jazzy-test-msgs libyaml-cpp-dev libssl-dev openssl + source /opt/ros/jazzy/setup.bash + rosdep update + rosdep install --from-paths src --ignore-src -y \ + --skip-keys='nav2_msgs ament_cmake_clang_format ament_cmake_clang_tidy' + + - name: Build gateway + OPC-UA plugin + fault_manager + env: + CCACHE_DIR: /root/.cache/ccache + CCACHE_MAXSIZE: 500M + CCACHE_SLOPPINESS: pch_defines,time_macros + run: | + source /opt/ros/jazzy/setup.bash + # fault_manager_node is a sibling package the secured test launches; + # it is not a build dependency of the plugin, so name it explicitly. + colcon build --symlink-install \ + --packages-up-to ros2_medkit_opcua ros2_medkit_fault_manager \ + --cmake-args -DCMAKE_BUILD_TYPE=Release \ + --event-handlers console_direct+ + ccache -s + + - name: Run secured A&C integration test + timeout-minutes: 15 + env: + ROS2_MEDKIT_OPCUA_SECURE_REQUIRE: '1' + run: | + source /opt/ros/jazzy/setup.bash + source install/setup.bash + colcon test --return-code-on-test-failure \ + --packages-select ros2_medkit_opcua \ + --ctest-args -R test_opcua_secured \ + --event-handlers console_direct+ + + - name: Show test results + if: always() + run: colcon test-result --verbose diff --git a/src/ros2_medkit_gateway/config/gateway_params.secure.yaml b/src/ros2_medkit_gateway/config/gateway_params.secure.yaml new file mode 100644 index 000000000..8dc739dab --- /dev/null +++ b/src/ros2_medkit_gateway/config/gateway_params.secure.yaml @@ -0,0 +1,117 @@ +# ROS 2 Medkit Gateway - secure field profile +# +# Hardened parameter preset for on-prem / plant-network (appliance) +# deployments. It turns ON every control that the default development config +# leaves OFF: JWT auth, TLS, restricted CORS, and rate limiting. Use this file +# instead of gateway_params.yaml for any deployment reachable from an +# untrusted network. +# +# ros2 run ros2_medkit_gateway gateway_node \ +# --ros-args --params-file gateway_params.secure.yaml +# +# Items marked "REQUIRED" below must be set before the gateway will start / +# accept clients. See design/hardening.rst for the full checklist and +# credential / certificate provisioning steps. + +ros2_medkit_gateway: + ros__parameters: + server: + # Appliance binds all interfaces; TLS + auth below protect the surface. + # Narrow to a management interface IP when the deployment allows it. + host: "0.0.0.0" + port: 8443 + + # TLS/HTTPS - REQUIRED. Provision a real certificate (see + # scripts/generate_dev_certs.sh for the dev-only equivalent). The key + # file must be chmod 600 and owned by the gateway service user. + tls: + enabled: true + cert_file: "/etc/ros2_medkit/certs/cert.pem" # REQUIRED + key_file: "/etc/ros2_medkit/certs/key.pem" # REQUIRED (chmod 600) + ca_file: "" + # 1.3 preferred on a controlled fleet; drop to 1.2 only for legacy + # clients that cannot negotiate 1.3. + min_version: "1.3" + + # CORS - restrict to the explicit origins that serve the operator UI. + # Never use ["*"] with credentials. Empty list disables CORS entirely + # (correct for API-only appliances with no browser UI). + cors: + allowed_origins: ["https://medkit-ui.local"] # REQUIRED if a browser UI is used; else [""] + allowed_methods: ["GET", "PUT", "POST", "DELETE", "OPTIONS"] + allowed_headers: ["Content-Type", "Accept", "Authorization"] + allow_credentials: true + max_age_seconds: 600 + + # Authentication (JWT + RBAC) - REQUIRED on. + auth: + enabled: true + # HS256 shared secret (>= 32 chars) or, for RS256, the private key path. + # REQUIRED - inject from a secret store / env at deploy time; do NOT + # commit a real secret to source control. + jwt_secret: "" # REQUIRED + jwt_public_key: "" + jwt_algorithm: "HS256" + token_expiry_seconds: 3600 + refresh_token_expiry_seconds: 86400 + # "all" forces auth on every request (reads + writes). Use "write" only + # when unauthenticated reads are explicitly acceptable on this network. + require_auth_for: "all" + issuer: "ros2_medkit_gateway" + # Provision the minimum set of role-scoped clients. Format: + # "client_id:client_secret:role" (roles: viewer/operator/configurator/admin). + # REQUIRED - replace with real, rotated credentials. + clients: [""] # REQUIRED + + # Rate limiting - ON to bound abuse / runaway clients. + rate_limiting: + enabled: true + global_requests_per_minute: 600 + client_requests_per_minute: 120 + # Tighten mutating endpoints (operations / data writes). + endpoint_limits: ["/api/v1/*/operations/*:30"] + client_cleanup_interval_seconds: 300 + client_max_idle_seconds: 600 + + # Diagnostic scripts - disabled by default on an appliance. Enable + # deliberately and keep uploads off (manifest-defined scripts only). + scripts: + scripts_dir: "" + allow_uploads: false + max_file_size_mb: 10 + max_concurrent_executions: 5 + default_timeout_sec: 300 + max_execution_history: 100 + + # Bulk data uploads - cap the payload size; raise only if the deployment + # genuinely needs large uploads. + bulk_data: + storage_dir: "/var/lib/ros2_medkit/bulk_data" + max_upload_size: 26214400 # 25 MiB + categories: [""] + + # SOVD resource locking on, so concurrent operators cannot stamp on each + # other's mutations. + locking: + enabled: true + default_max_expiration: 3600 + cleanup_interval: 30 + defaults: + components: + lock_required_scopes: ["operations"] + breakable: true + apps: + lock_required_scopes: ["operations"] + breakable: true + + # OpenAPI /docs endpoints off on a hardened appliance (reduce surface). + docs: + enabled: false + + # Peer aggregation: if used across hosts, require TLS and do NOT forward + # client tokens to mDNS-discovered peers unless every peer is trusted. + aggregation: + enabled: false + require_tls: true + forward_auth: false + peer_scheme: "https" diff --git a/src/ros2_medkit_gateway/design/hardening.rst b/src/ros2_medkit_gateway/design/hardening.rst new file mode 100644 index 000000000..16311c0f3 --- /dev/null +++ b/src/ros2_medkit_gateway/design/hardening.rst @@ -0,0 +1,124 @@ +Gateway hardening (secure field profile) +======================================== + +The gateway ships every transport and access control needed for a hardened +deployment - JWT authentication with RBAC, TLS/HTTPS, restricted CORS, and +token-bucket rate limiting - but they are **disabled by default** so local +development works out of the box. A gateway exposed on a plant network with the +defaults is wide open: unauthenticated reads and writes over cleartext HTTP. + +For any deployment reachable from an untrusted network, start from the secure +field profile preset ``config/gateway_params.secure.yaml`` instead of +``config/gateway_params.yaml``: + +.. code-block:: bash + + ros2 run ros2_medkit_gateway gateway_node \ + --ros-args --params-file gateway_params.secure.yaml \ + -p auth.jwt_secret:="$MEDKIT_JWT_SECRET" \ + -p 'auth.clients:=["operator:'"$OP_SECRET"':operator"]' + +What the secure profile turns on +-------------------------------- + +================================ ============== =========================================== +Control Default Secure profile +================================ ============== =========================================== +``auth.enabled`` false true +``auth.require_auth_for`` write all (auth on reads + writes) +``server.tls.enabled`` false true (HTTPS, min TLS 1.3) +``cors.allowed_origins`` ``[]`` explicit origin list (no wildcard) +``rate_limiting.enabled`` false true (global + per-client + per-endpoint) +``scripts.allow_uploads`` true false (manifest-defined scripts only) +``docs.enabled`` true false (reduced surface) +``bulk_data.max_upload_size`` 100 MiB 25 MiB +``locking`` on operations none lock required before mutation +================================ ============== =========================================== + +Credential and certificate provisioning +---------------------------------------- + +1. **TLS certificate.** Provision a real server certificate + private key and + point ``server.tls.cert_file`` / ``server.tls.key_file`` at them. The key + file must be ``chmod 600`` and owned by the gateway service user. For a + dev/test box only, ``scripts/generate_dev_certs.sh`` emits a self-signed + ``cert.pem`` / ``key.pem`` / ``ca.pem`` (never use these in production). + +2. **JWT secret.** Generate a high-entropy secret of at least 32 characters + (HS256) or provision an RS256 key pair. Inject it at deploy time from a + secret store or environment variable - do not commit it to source control. + + .. warning:: + + ``auth.jwt_secret`` is a plain readable ROS 2 parameter. Beyond source + control it is exposed on two planes: + + - **DDS control plane.** Any peer on the ROS 2 graph can read it with + ``ros2 param get / auth.jwt_secret`` - the parameter is + declared readable and the DDS domain is unauthenticated by default. + - **Process table.** Passing it inline (``-p auth.jwt_secret:=...`` as in + the launch example above) also leaks the value via ``ps`` and + ``/proc//cmdline``. + + Injecting from an environment variable / params file instead of an inline + ``-p`` closes the process-table leak, but the value still lands in a + readable parameter, so it remains exposed on the DDS plane. To close that, + lock down the control plane: ROS 2 security (SROS2) with an access-control + policy that denies parameter reads to untrusted participants, or a + dedicated / firewalled ``ROS_DOMAIN_ID`` (optionally with + ``ROS_LOCALHOST_ONLY=1``) that no untrusted peer can join. + +3. **Role-scoped clients.** Create the minimum set of clients in + ``auth.clients`` (``client_id:client_secret:role``). Roles, least to most + privileged: ``viewer`` (read), ``operator`` (+ trigger ops / ack faults / + publish), ``configurator`` (+ modify configs), ``admin`` (+ auth + management). Rotate secrets periodically. + +4. **Obtain a token** and call the API over HTTPS: + + .. code-block:: bash + + curl -sk -X POST https://gateway:8443/api/v1/auth/authorize \ + -H 'Content-Type: application/json' \ + -d '{"client_id":"operator","client_secret":"...","grant_type":"client_credentials"}' + # use the returned access_token: + curl -sk https://gateway:8443/api/v1/faults -H "Authorization: Bearer $TOKEN" + +Hardening checklist +------------------- + +Before exposing a gateway on a shared / plant network, confirm: + +- [ ] ``auth.enabled: true`` and ``auth.require_auth_for`` is ``all`` (or + ``write`` only if unauthenticated reads are explicitly acceptable). +- [ ] ``auth.jwt_secret`` is set to a >= 32-char secret injected from a secret + store (not the placeholder, not in version control). +- [ ] ``auth.clients`` lists only the role-scoped clients you need; default / + example credentials removed; secrets rotated. +- [ ] ``server.tls.enabled: true`` with a real certificate; private key is + ``chmod 600``; ``min_version`` is ``1.3`` (or ``1.2`` only for legacy + clients). +- [ ] ``cors.allowed_origins`` is an explicit list (no ``*``); ``*`` is never + combined with ``allow_credentials: true``. +- [ ] ``rate_limiting.enabled: true`` with per-client and mutating-endpoint + limits tuned to the deployment. +- [ ] ``scripts.allow_uploads: false`` unless remote script upload is a + required, reviewed capability. +- [ ] ``bulk_data.max_upload_size`` bounded to what the deployment needs. +- [ ] If peer aggregation is used: ``aggregation.require_tls: true`` and + ``forward_auth`` only enabled when every peer is trusted. +- [ ] Bind ``server.host`` to a management interface where the network layout + allows, and place the gateway behind the plant firewall / segmentation. +- [ ] Back the gateway with persistent storage on a volume with restricted + permissions (faults DB, triggers DB, rosbag snapshots). + +OPC-UA plugin (southbound) hardening +------------------------------------ + +The gateway controls the northbound REST surface; the OPC-UA plugin controls +the southbound connection to the PLC. Harden both. The plugin supports +SecurityPolicy (Basic256Sha256 / Aes128 / Aes256), MessageSecurityMode +(Sign / SignAndEncrypt), a client application-instance certificate, a server +trust store with reject-untrusted, and user identity (anonymous / +username-password / X.509). See ``ros2_medkit_opcua`` README, section +"OPC-UA client security". diff --git a/src/ros2_medkit_gateway/design/index.rst b/src/ros2_medkit_gateway/design/index.rst index 09bb0c204..a78928315 100644 --- a/src/ros2_medkit_gateway/design/index.rst +++ b/src/ros2_medkit_gateway/design/index.rst @@ -674,6 +674,7 @@ Additional Design Documents aggregation dto_contract entity_cache_architecture + hardening lifecycle plugin_entity_notifications ros2_subscription_architecture diff --git a/src/ros2_medkit_gateway/src/gateway_node.cpp b/src/ros2_medkit_gateway/src/gateway_node.cpp index 716b5e8ea..e87071b4a 100644 --- a/src/ros2_medkit_gateway/src/gateway_node.cpp +++ b/src/ros2_medkit_gateway/src/gateway_node.cpp @@ -496,8 +496,15 @@ GatewayNode::GatewayNode(const rclcpp::NodeOptions & options) : Node("ros2_medki .build(); // Note: HttpServerManager will log TLS configuration details } catch (const std::exception & e) { - RCLCPP_ERROR(get_logger(), "Invalid TLS configuration: %s. TLS disabled.", e.what()); - tls_config_ = TlsConfig{}; // Disabled + // Fail closed: TLS was explicitly requested but could not be built (e.g. + // missing/invalid cert or key). Refuse to start rather than silently + // serving plaintext HTTP. Disabling here would expose the API in the + // clear under a configuration that asked for encryption. + RCLCPP_FATAL(get_logger(), + "TLS is enabled (server.tls.enabled=true) but the TLS configuration is invalid: %s. " + "Refusing to start in plaintext - fix the certificate/key configuration or disable TLS.", + e.what()); + throw std::runtime_error(std::string("Invalid TLS configuration: ") + e.what()); } } else { RCLCPP_INFO(get_logger(), "TLS/HTTPS: disabled"); @@ -551,8 +558,15 @@ GatewayNode::GatewayNode(const rclcpp::NodeOptions & options) : Node("ros2_medki algorithm_to_string(auth_config_.jwt_algorithm).c_str(), get_parameter("auth.require_auth_for").as_string().c_str()); } catch (const std::exception & e) { - RCLCPP_ERROR(get_logger(), "Invalid authentication configuration: %s. Authentication disabled.", e.what()); - auth_config_ = AuthConfig{}; // Disabled + // Fail closed: authentication was explicitly requested but could not be + // built (e.g. empty jwt_secret). Refuse to start rather than silently + // serving an unauthenticated API under a configuration that asked for + // auth. + RCLCPP_FATAL(get_logger(), + "Authentication is enabled (auth.enabled=true) but the auth configuration is invalid: %s. " + "Refusing to start unauthenticated - fix the auth configuration or disable auth.", + e.what()); + throw std::runtime_error(std::string("Invalid authentication configuration: ") + e.what()); } } else { RCLCPP_INFO(get_logger(), "Authentication: disabled"); diff --git a/src/ros2_medkit_gateway/test/test_gateway_node.cpp b/src/ros2_medkit_gateway/test/test_gateway_node.cpp index e467c7849..d0451a58a 100644 --- a/src/ros2_medkit_gateway/test/test_gateway_node.cpp +++ b/src/ros2_medkit_gateway/test/test_gateway_node.cpp @@ -908,6 +908,47 @@ TEST_F(TestGatewayNode, test_invalid_function_id_bad_request) { EXPECT_EQ(res->status, 400); } +// ============================================================================= +// Secure-profile fail-closed tests (issue #477): when auth/TLS is explicitly +// enabled but its configuration is invalid, the node must refuse to start +// rather than silently fall back to an unauthenticated / plaintext server. +// ============================================================================= + +TEST_F(TestGatewayNode, auth_enabled_with_empty_secret_fails_closed) { + // auth.enabled=true with an empty HS256 secret: the AuthConfigBuilder rejects + // it, and the node must propagate that as a fatal (throw) instead of disabling + // auth and serving unauthenticated. + node_.reset(); + int free_port = reserve_free_port(); + ASSERT_NE(free_port, 0); + + rclcpp::NodeOptions options; + options.parameter_overrides({ + rclcpp::Parameter("server.port", free_port), + rclcpp::Parameter("auth.enabled", true), + rclcpp::Parameter("auth.jwt_secret", std::string("")), + rclcpp::Parameter("auth.jwt_algorithm", std::string("HS256")), + }); + EXPECT_THROW(std::make_shared(options), std::exception); +} + +TEST_F(TestGatewayNode, tls_enabled_with_missing_cert_fails_closed) { + // server.tls.enabled=true with no cert/key: the TlsConfigBuilder rejects it, + // and the node must refuse to start in plaintext. + node_.reset(); + int free_port = reserve_free_port(); + ASSERT_NE(free_port, 0); + + rclcpp::NodeOptions options; + options.parameter_overrides({ + rclcpp::Parameter("server.port", free_port), + rclcpp::Parameter("server.tls.enabled", true), + rclcpp::Parameter("server.tls.cert_file", std::string("")), + rclcpp::Parameter("server.tls.key_file", std::string("")), + }); + EXPECT_THROW(std::make_shared(options), std::exception); +} + // ============================================================================= // Entity type path extraction tests // ============================================================================= diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt b/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt index d66c3ecce..8407a6790 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/CMakeLists.txt @@ -55,6 +55,12 @@ fetchcontent_declare( set(UAPP_INTERNAL_OPEN62541 ON CACHE BOOL "" FORCE) set(CMAKE_POSITION_INDEPENDENT_CODE ON CACHE BOOL "" FORCE) +# Enable the OpenSSL encryption backend so the client can negotiate signed / +# encrypted SecureChannels (SecurityPolicy Basic256Sha256 etc.) and present a +# client application-instance certificate. Without this open62541 is built +# SecurityPolicy=None only and the encrypted ClientConfig overloads compile out. +set(UA_ENABLE_ENCRYPTION "OPENSSL" CACHE STRING "" FORCE) + # open62541 + open62541pp are third-party code pulled via FetchContent. # ROS2MedkitWarnings promotes -Wswitch-enum, -Wnull-dereference, and other # warnings to errors project-wide, and those fire on upstream C sources that @@ -262,6 +268,11 @@ if(BUILD_TESTING) -DBUILD_SHARED_LIBS=OFF -DUA_ENABLE_SUBSCRIPTIONS_EVENTS=ON -DUA_ENABLE_SUBSCRIPTIONS_ALARMS_CONDITIONS=ON + # Secured A&C test (issue #477): the fixture also serves an + # encryption-only endpoint (Basic256Sha256 Sign/SignAndEncrypt) so the + # gateway's apply_security_config() success path can be exercised + # against a real SecureChannel. Requires the OpenSSL backend. + -DUA_ENABLE_ENCRYPTION=OPENSSL -DUA_NAMESPACE_ZERO=FULL -DUA_BUILD_EXAMPLES=OFF -DUA_BUILD_TOOLS=OFF @@ -285,8 +296,10 @@ if(BUILD_TESTING) add_dependencies(test_alarm_server alarm_open62541_ep) target_include_directories(test_alarm_server SYSTEM PRIVATE "${_alarm_o62_include}") + # The static open62541 above is built with UA_ENABLE_ENCRYPTION=OPENSSL, so + # the fixture links OpenSSL for the secured-endpoint code path. target_link_libraries(test_alarm_server PRIVATE - "${_alarm_o62_lib}" Threads::Threads) + "${_alarm_o62_lib}" OpenSSL::SSL OpenSSL::Crypto Threads::Threads) set_target_properties(test_alarm_server PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}") target_compile_options(test_alarm_server PRIVATE -w) @@ -313,6 +326,31 @@ if(BUILD_TESTING) LABELS "integration" SKIP_RETURN_CODE 77 TIMEOUT 60) + + # ---- Secured A&C integration test (issue #477) ------------------------ + # Drives the gateway's apply_security_config() success path against the + # encryption-enabled fixture above: negotiates a Basic256Sha256 + + # SignAndEncrypt SecureChannel with a username token, asserts a secured + # read + a CONFIRMED alarm, and that a wrong password fails closed. + # + # Skips with CTest exit 77 unless ROS2_MEDKIT_OPCUA_SECURE_REQUIRE is set + # (mirrors the asyncua smoke-test skip). It also needs gateway_node + + # fault_manager_node + openssl on PATH, so the default unit-tests job (which + # does not build fault_manager) skips cleanly while the dedicated secured CI + # job runs it for real. + install(PROGRAMS + test/integration/gen_test_certs.sh + test/integration/test_opcua_secured.test.py + DESTINATION lib/${PROJECT_NAME}) + add_test(NAME test_opcua_secured + COMMAND "${Python3_EXECUTABLE}" + "${CMAKE_CURRENT_SOURCE_DIR}/test/integration/test_opcua_secured.test.py" + "${CMAKE_BINARY_DIR}/test_alarm_server" + "${CMAKE_CURRENT_SOURCE_DIR}/test/integration/gen_test_certs.sh") + set_tests_properties(test_opcua_secured PROPERTIES + LABELS "integration" + SKIP_RETURN_CODE 77 + TIMEOUT 300) endif() ros2_medkit_relax_vendor_warnings() diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md index 9f6f0ac2a..8da6efd33 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/README.md @@ -181,11 +181,41 @@ nodes: # CodeSys 3.5+, Rockwell via FactoryTalk Linx). Mutually exclusive per entry # with the threshold-based alarm form above. event_alarms: + # Simple form: one source -> one fault. - alarm_source: "ns=4;s=Alarms.Overpressure" entity_id: tank_process fault_code: PLC_OVERPRESSURE + + # Multi-alarm form (issue #389): one source emits many conditions (e.g. the + # Server object as a catch-all, or one Object owning several Program_Alarms) + # routed to distinct faults by condition identity. `mappings` are evaluated + # in order; the first whose non-empty match fields all equal the observed + # event wins, falling back to the source-level `fault_code` (if set). + - alarm_source: "ns=0;i=2253" # Server object (system-wide) + entity_id: line_1 + fault_code: PLC_GENERIC_ALARM # catch-all fallback (optional) + mappings: + - condition_name: "Overpressure" # match ConditionType.ConditionName + fault_code: PLC_OVERPRESSURE + severity_override: ERROR + message: "Tank overpressure" + - source_node: "ns=3;s=PumpA" # or match by event SourceNode + event_type: "ns=0;i=2915" # and/or EventType (AND-combined) + fault_code: PLC_PUMP_A_FAULT + # Append vendor associated values (Siemens Program_Alarm SD_1..SD_n) to the + # fault description as "label=value". + associated_values: + - "SD_1" + - name: "SD_2" + label: "Setpoint" ``` +Mapping precedence: a mapping with more (non-empty) match fields is matched +only when all of them equal the observed event; declaration order breaks ties. +An event matching no mapping uses the source-level `fault_code`; if that is also +empty the event is ignored. A mapping-level `severity_override` / `message` +overrides the source-level one; otherwise the source-level value is inherited. + The plugin auto-registers `acknowledge_fault` and `confirm_fault` operations on every entity that has at least one `event_alarms` entry. Invoke them with: @@ -216,6 +246,93 @@ ros2_medkit_gateway: | `poll_interval_ms` | `1000` | Polling interval in ms (clamped to [100, 60000]) | | `prefer_subscriptions` | `false` | Use OPC-UA subscriptions instead of polling | | `subscription_interval_ms` | `500` | Publishing interval for OPC-UA subscriptions when `prefer_subscriptions: true` | +| `condition_replay_strategy` | `auto` | Active-condition replay on reconnect: `method`, `read`, `auto`, `off` (see below) | +| `require_confirm_for_clear` | `true` | Require both Acknowledge AND Confirm before a native alarm auto-clears. Set `false` for Confirm-less servers (e.g. Siemens S7-1500) so alarms clear on Acknowledge alone (see below) | + +### OPC-UA client security (SecurityPolicy, certificates, user auth) + +By default the client connects with `SecurityPolicy=None` and an `Anonymous` +user (unencrypted, unauthenticated). This is fine for an isolated lab LAN but +not for a hardened server. The following parameters enable a signed/encrypted +SecureChannel with a client application-instance certificate, a server trust +store, and a user identity. All are opt-in; leaving them unset reproduces the +legacy behaviour. + +```yaml +plugins.opcua.security_policy: "Basic256Sha256" # None | Basic256Sha256 | Aes128Sha256RsaOaep | Aes256Sha256RsaPss +plugins.opcua.security_mode: "SignAndEncrypt" # None | Sign | SignAndEncrypt +plugins.opcua.client_cert_path: "/etc/ros2_medkit/opcua/client_cert.der" # X.509 v3, DER +plugins.opcua.client_key_path: "/etc/ros2_medkit/opcua/client_key.pem" # private key, PEM +plugins.opcua.application_uri: "urn:selfpatch:medkit:opcua-client" # MUST match the cert SAN URI +plugins.opcua.trust_list_paths: ["/etc/ros2_medkit/opcua/server_cert.der"] # DER trust store +plugins.opcua.reject_untrusted: true # false = accept ANY server cert (INSECURE, lab only; not trust-on-first-use, nothing pinned) +plugins.opcua.user_auth_mode: "UsernamePassword" # Anonymous | UsernamePassword | X509 +plugins.opcua.username: "medkit" +plugins.opcua.password: "..." # inject from a secret store at deploy time +plugins.opcua.user_cert_path: "/etc/ros2_medkit/opcua/user_cert.der" # X509 user token only +``` + +Notes: +- A non-None `security_policy` requires `client_cert_path` + `client_key_path`; + a secured connection with no cert fails fast (before contacting the server). +- `application_uri` must equal the URI entry in the client certificate's + SubjectAltName or the server rejects the channel with + `BadCertificateUriInvalid`. +- The client cert (its public part) must be added to the server's trusted + client list, and the server cert (DER) added to `trust_list_paths`, unless + `reject_untrusted: false`. +- The encryption backend (OpenSSL) is compiled in; a build without it logs an + error and refuses any secured profile. + +### Active-condition replay on reconnect (issue #389/#478) + +When the gateway (re)subscribes after a drop or restart, conditions that are +already active on the server would otherwise not be re-reported (only live +transitions flow). The standard recovery is OPC-UA `ConditionRefresh`. Per +Part 9 §5.5.7 it is a method of the **ConditionType** node (`i=2782`), so the +Call is issued there (not the Server object) - calling it on the Server object +was the root cause of `BadNodeIdUnknown` rejections, including on Siemens +S7-1500, which documents `ConditionRefresh` as supported (only the optional +`ConditionRefresh2` is not). With the corrected target, `ConditionRefresh` is +expected to work on conformant servers and the read fallback should rarely be +needed. The `condition_replay_strategy` parameter controls recovery: + +| Value | Behaviour | +|-------|-----------| +| `method` | Call `ConditionRefresh` only (warns if rejected) | +| `read` | Skip the method; browse each `alarm_source`, read current condition state, reconcile the fault set | +| `auto` (default) | Try `ConditionRefresh`; on rejection fall back to `read` | +| `off` | No replay (only live transitions surface) | + +The read fallback browses each `alarm_source` for AlarmCondition instances - +both hierarchical Object children and targets of the `HasCondition` reference +(`i=9006`), recursing one level - and reads their state. It mirrors +`ConditionRefresh` semantics: only conditions with `Retain==true` form the +active set, a `Disabled` condition (`EnabledState/Id=false`) is never treated +as active, and a transient read failure keeps the condition rather than +dropping it. + +**Safety guarantee (#478):** the read fallback NEVER clears a tracked fault for +a source that has not positively exposed at least one Condition instance node. +An EventNotifier-only server (e.g. S7-1500, which models no per-condition +instance nodes) makes the browse return zero conditions; rather than wiping the +active alarm set, the gateway preserves the last-known faults, logs an operator +warning that read-fallback is unsupported on this server, and keeps live +transitions flowing. Use `ConditionRefresh` (`method`/`auto`) on such servers. +The read fallback does not recover the live `EventId`, so an operator +ack/confirm may need the next live event before it succeeds. + +### Confirm-less servers and `require_confirm_for_clear` (issue #478) + +By default a native alarm auto-clears only after the operator has both +Acknowledged AND Confirmed it (OPC-UA Part 9 §5.7). Some servers do not +implement the optional `Confirm` transition - Siemens S7-1500 supports +`Acknowledge` / `ConditionRefresh` / `AddComment` but not `Confirm`, so +`ConfirmedState` never becomes true and an inactive alarm would latch forever. +Set `require_confirm_for_clear: false` (or `OPCUA_REQUIRE_CONFIRM_FOR_CLEAR=0`) +so the alarm clears on `Acknowledge` alone. The default (`true`) is unchanged +and spec-strict; the relaxed path still requires acknowledgement and needs +real-S7-1500 validation. Node map entries also support an optional `ros2_topic` field to override the auto-generated ROS 2 topic name for the PLC value bridge: @@ -241,6 +358,17 @@ Write operations use the `set_` prefix convention: |----------|-------------| | `OPCUA_ENDPOINT_URL` | OPC-UA server URL | | `OPCUA_NODE_MAP_PATH` | Path to node map YAML | +| `OPCUA_SECURITY_POLICY` | `None` / `Basic256Sha256` / `Aes128Sha256RsaOaep` / `Aes256Sha256RsaPss` | +| `OPCUA_SECURITY_MODE` | `None` / `Sign` / `SignAndEncrypt` | +| `OPCUA_CLIENT_CERT` / `OPCUA_CLIENT_KEY` | Client cert (DER) / key (PEM) paths | +| `OPCUA_APPLICATION_URI` | Client application URI (must match cert SAN) | +| `OPCUA_TRUST_LIST` | Colon-separated DER trust-store paths | +| `OPCUA_REJECT_UNTRUSTED` | `false`/`0`/`no` to accept any server cert (lab only) | +| `OPCUA_USER_AUTH` | `Anonymous` / `UsernamePassword` / `X509` | +| `OPCUA_USERNAME` / `OPCUA_PASSWORD` | Username-password identity | +| `OPCUA_USER_CERT` | X.509 user-token cert (DER) | +| `OPCUA_CONDITION_REPLAY` | `method` / `read` / `auto` / `off` | +| `OPCUA_REQUIRE_CONFIRM_FOR_CLEAR` | `0`/`false`/`no`/`off` to clear native alarms on Acknowledge alone (Confirm-less servers) | ## Hardware Deployment @@ -348,16 +476,22 @@ bash scripts/stop.sh | Error handling | 3 | Unknown entity, unknown operation, invalid JSON | | **Total** | **16** | | -## Security Limitations +## Security -**Current version uses Anonymous auth with SecurityPolicy=None.** All OPC-UA communication is unencrypted and unauthenticated. This is acceptable for isolated LANs and demo environments but NOT suitable for production networks exposed to untrusted traffic. +The client supports a signed/encrypted SecureChannel (`SecurityPolicy` +Basic256Sha256 / Aes128 / Aes256, `MessageSecurityMode` Sign / SignAndEncrypt), +a client application-instance certificate, a server trust store with +reject-untrusted, and user identity (Anonymous / Username-Password / X.509). +See "OPC-UA client security" above for configuration. -Planned for future versions: -- OPC-UA certificate-based authentication (Basic256Sha256) -- Username/password authentication -- Configurable security policy per connection +**The default remains `SecurityPolicy=None` + `Anonymous` (unencrypted, +unauthenticated)** for backward compatibility and isolated-LAN demos; enable a +secured profile for any network exposed to untrusted traffic. The plugin logs +the effective security profile at startup and warns when running unsecured or +with `reject_untrusted: false`. -The plugin logs a startup message indicating the auth mode in use. +Northbound (gateway REST) hardening is documented separately in the gateway's +`design/hardening.rst` and the `gateway_params.secure.yaml` field profile. Write operations include configurable `min_value`/`max_value` range validation to prevent out-of-range values being sent to PLC actuators. diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/design/index.rst b/src/ros2_medkit_plugins/ros2_medkit_opcua/design/index.rst index 77ecfa33b..994d49280 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/design/index.rst +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/design/index.rst @@ -249,15 +249,27 @@ Decision order, first match wins: | | | below) | +-----+--------------------------------------+---------------------------------------+ | 5b | ``ActiveState == false``, | ``CLEARED`` | -| | ``Acked == true``, | | -| | ``Confirmed == true`` | | +| | ``Acked == true`` and | | +| | (``Confirmed == true`` or | | +| | not ``require_confirm_for_clear``) | | +-----+--------------------------------------+---------------------------------------+ -``Retain`` is intentionally NOT used for state determination. Per Part 9 -§5.5.2.10 it controls visibility during ``ConditionRefresh`` bursts only; -lifecycle is driven entirely by Active / Acked / Confirmed. The SOVD -``PREFAILED`` state has no native equivalent and is reserved for the -threshold-polling pre-trigger path. +``require_confirm_for_clear`` (default ``true``) gates rule 5b on both +Acknowledge and Confirm. Some servers do not implement the optional ``Confirm`` +transition - Siemens S7-1500 supports ``Acknowledge`` / ``ConditionRefresh`` / +``AddComment`` but not ``Confirm`` - so ``ConfirmedState`` never becomes true +and the alarm would latch in ``HEALED`` forever. Setting the flag ``false`` +(YAML ``require_confirm_for_clear`` / env ``OPCUA_REQUIRE_CONFIRM_FOR_CLEAR``) +clears such an alarm on ``Acknowledge`` alone. The default is unchanged +(spec-strict, no regression) and the relaxed path still requires +acknowledgement; it needs real-S7-1500 validation (issue #478). + +``Retain`` is NOT used by this live state-machine table (lifecycle is driven by +Active / Acked / Confirmed; per Part 9 §5.5.2.10 ``Retain`` controls visibility +during ``ConditionRefresh`` bursts). It IS used in the read-based reconnect +replay path (below) as the interesting-state filter, mirroring what +``ConditionRefresh`` would replay. The SOVD ``PREFAILED`` state has no native +equivalent and is reserved for the threshold-polling pre-trigger path. .. note:: @@ -298,15 +310,45 @@ ConditionRefresh ---------------- After creating event monitored items the plugin invokes ``ConditionRefresh`` -(Server object ``i=2253``, method ``i=3875``) so the server pushes any -condition that fired before the subscription started. The same call fires -on every successful reconnect. +so the server pushes any condition that fired before the subscription started. +The same call fires on every successful reconnect. + +Per Part 9 §5.5.7 ``ConditionRefresh`` is a Method of the **ConditionType**, so +the Call's ObjectId is the well-known ConditionType node ``i=2782`` (method +``i=3875``), NOT the Server object ``i=2253``. Calling it on the Server object +was the root cause of ``BadNodeIdUnknown`` rejections on conformant servers, +including Siemens S7-1500, which documents ``ConditionRefresh`` as supported +(only the optional ``ConditionRefresh2`` is not). With the corrected target the +method is expected to succeed on Siemens, making the read fallback unnecessary +there (issue #478). The bracketing ``RefreshStartEventType`` (i=2787) and ``RefreshEndEventType`` (i=2788) are recognized and used to set a diagnostic flag; live notifications arriving during the burst are applied normally because the state machine is driven by per-condition ``ConditionId`` and runs idempotently. +Read-based replay fallback +-------------------------- + +When ``ConditionRefresh`` is rejected (``condition_replay_strategy`` = +``read`` / ``auto``) the plugin browses each ``alarm_source`` for AlarmCondition +instances - hierarchical Object children plus targets of the ``HasCondition`` +reference (``i=9006``), recursing one level - reads their current state, and +drives the same state machine. It mirrors ``ConditionRefresh`` semantics: + +- only conditions with ``Retain == true`` form the active set; +- a ``Disabled`` condition (``EnabledState/Id = false``) is never active; +- a transient browse/read failure keeps the condition (it is not dropped and + must not be reconciled away). + +**Safety gate (issue #478).** The reconcile that clears faults for conditions +no longer present NEVER fires for a source that has not positively exposed at +least one Condition instance node. An EventNotifier-only server (S7-1500 models +no per-condition instance nodes) returns a successful but empty browse; rather +than wiping the tracked active alarms, the plugin preserves them, logs an +operator warning, and relies on ``ConditionRefresh`` / live transitions. This +prevents the net-negative outcome where a reconnect erased a still-active alarm. + Acknowledge / Confirm round-trip -------------------------------- @@ -336,7 +378,13 @@ Vendor matrix | Vendor / runtime | AlarmConditionType | Notes | +======================+=====================+===============================+ | Siemens S7-1500 | yes (FW V2.9+) | ProDiag, Program_Alarm, | -| | | system diagnostics | +| | | system diagnostics. | +| | | ConditionRefresh + | +| | | Acknowledge supported, NO | +| | | Confirm (set | +| | | require_confirm_for_clear | +| | | false). EventNotifier-only: | +| | | no condition instance nodes | +----------------------+---------------------+-------------------------------+ | Beckhoff TwinCAT 3 | yes (TF6100) | ``Confirm`` propagates to | | | | PLC code; ``Ack`` does not | diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/Dockerfile.gateway b/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/Dockerfile.gateway index bb8d68a78..2887336b1 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/Dockerfile.gateway +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/Dockerfile.gateway @@ -64,5 +64,11 @@ COPY --from=builder ${COLCON_WS}/install/ ${COLCON_WS}/install/ RUN mkdir -p /config COPY src/ros2_medkit_plugins/ros2_medkit_opcua/config/tank_demo_nodes.yaml /config/tank_nodes.yaml COPY src/ros2_medkit_plugins/ros2_medkit_opcua/docker/gateway_params.yaml /config/gateway_params.yaml +# Secure field profile for appliance deployments (auth + TLS + restricted CORS +# + rate limiting on). The demo gateway_params.yaml above is wide open and is +# for local testing only. Appliance / plant-network deployments must launch +# with --params-file /config/gateway_params.secure.yaml and provision the +# REQUIRED secrets + certificate (see ros2_medkit_gateway design/hardening.rst). +COPY src/ros2_medkit_gateway/config/gateway_params.secure.yaml /config/gateway_params.secure.yaml EXPOSE 8080 diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/gateway_params.yaml b/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/gateway_params.yaml index 9ea92f6d3..a331166cb 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/gateway_params.yaml +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/gateway_params.yaml @@ -1,3 +1,6 @@ +# DEMO ONLY - wide open (no auth, no TLS, CORS "*"). For appliance / plant +# deployments use /config/gateway_params.secure.yaml instead (auth + TLS + +# restricted CORS + rate limiting). See ros2_medkit_gateway design/hardening.rst. ros2_medkit_gateway: ros__parameters: server: diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/test_alarm_server/Dockerfile b/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/test_alarm_server/Dockerfile index 775a6477f..e21c6a5ce 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/test_alarm_server/Dockerfile +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/docker/test_alarm_server/Dockerfile @@ -13,7 +13,7 @@ FROM ubuntu:24.04 AS builder ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends \ - build-essential cmake git python3 ca-certificates \ + build-essential cmake git python3 ca-certificates libssl-dev \ && rm -rf /var/lib/apt/lists/* # Pin matches the SHA used by the plugin's open62541pp dependency @@ -34,6 +34,7 @@ RUN cmake -S /src/open62541 -B /tmp/o62-build \ -DBUILD_SHARED_LIBS=OFF \ -DUA_ENABLE_SUBSCRIPTIONS_EVENTS=ON \ -DUA_ENABLE_SUBSCRIPTIONS_ALARMS_CONDITIONS=ON \ + -DUA_ENABLE_ENCRYPTION=OPENSSL \ -DUA_NAMESPACE_ZERO=FULL \ -DUA_BUILD_EXAMPLES=OFF \ -DUA_BUILD_TOOLS=OFF \ @@ -42,15 +43,16 @@ RUN cmake -S /src/open62541 -B /tmp/o62-build \ -DCMAKE_C_FLAGS=-w \ && cmake --build /tmp/o62-build -j"$(nproc)" --target install +# The static lib is built with the OpenSSL encryption backend, so link OpenSSL. RUN g++ -O2 -std=c++17 -w \ -I/opt/open62541/include \ /src/test_alarm_server/test_alarm_server.cpp \ /opt/open62541/lib/libopen62541.a \ - -lpthread -o /opt/test_alarm_server + -lssl -lcrypto -lpthread -o /opt/test_alarm_server FROM ubuntu:24.04 RUN apt-get update && apt-get install -y --no-install-recommends \ - libstdc++6 ca-certificates \ + libstdc++6 ca-certificates libssl3 openssl \ && rm -rf /var/lib/apt/lists/* COPY --from=builder /opt/test_alarm_server /usr/local/bin/test_alarm_server EXPOSE 4842 diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/alarm_state_machine.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/alarm_state_machine.hpp index 3b7119c0a..37579cd30 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/alarm_state_machine.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/alarm_state_machine.hpp @@ -86,6 +86,15 @@ struct AlarmEventInput { /// 5. ActiveState == false -> Healed or Cleared based on /// Acked + Confirmed /// +/// ``require_confirm_for_clear`` (default true) controls rule 5: a fully +/// cleared alarm needs both Acked AND Confirmed. Some servers do NOT implement +/// the optional Confirm transition (Siemens S7-1500 supports Acknowledge / +/// ConditionRefresh / AddComment but not Confirm), so ConfirmedState never +/// becomes true and the alarm would latch in Healed forever. Setting the flag +/// false lets such a deployment clear on Acknowledge alone. Default true keeps +/// the spec-strict behaviour for conformant servers (no regression). Needs +/// real-S7-1500 validation. +/// /// Retain is intentionally not modeled by this state machine and does not /// affect ``compute()``. Per Part 9 §5.5.2.10 it controls visibility during /// ConditionRefresh bursts rather than the lifecycle mapping implemented @@ -100,7 +109,8 @@ class AlarmStateMachine { AlarmAction action; }; - static Outcome compute(SovdAlarmStatus prev_status, const AlarmEventInput & in) { + static Outcome compute(SovdAlarmStatus prev_status, const AlarmEventInput & in, + bool require_confirm_for_clear = true) { // Rule 1: branch events are recorded in the fault_manager event log // (caller's responsibility) but never advance the primary lifecycle. if (in.branch_id_present) { @@ -141,8 +151,11 @@ class AlarmStateMachine { // Rule 5: ActiveState=false. Cleared only when both Acked AND Confirmed // have been completed by the operator; until then the alarm is latched - // (HEALED) so the operator sees the unfinished workflow item. - if (in.acked_state && in.confirmed_state) { + // (HEALED) so the operator sees the unfinished workflow item. When + // ``require_confirm_for_clear`` is false (Confirm-less server, e.g. S7-1500) + // the Confirmed gate is dropped so Acknowledge alone clears the alarm. + const bool clear_ready = in.acked_state && (in.confirmed_state || !require_confirm_for_clear); + if (clear_ready) { if (prev_status == SovdAlarmStatus::Cleared || prev_status == SovdAlarmStatus::Suppressed) { return {SovdAlarmStatus::Cleared, AlarmAction::NoOp}; } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp index 6b26fe8bc..d34ce6d6c 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/node_map.hpp @@ -33,6 +33,33 @@ struct AlarmConfig { bool above_threshold{true}; // true = alarm when value > threshold }; +/// Reference to one extra OPC-UA event field to select and surface as an +/// associated value (issue #389). Used for vendor associated values such as +/// Siemens Program_Alarm ``SD_1``..``SD_n``. ``label`` is what appears in the +/// fault description; ``name``/``namespace_index`` address the event field via +/// a SimpleAttributeOperand on BaseEventType. +struct AssociatedValueRef { + uint16_t namespace_index{0}; + std::string name; + std::string label; // defaults to ``name`` when unset +}; + +/// One condition-identity mapping inside an ``event_alarms`` source (issue +/// #389). A single OPC-UA source (e.g. the Server object as a catch-all, or a +/// real owning Object) can emit many distinct conditions; each mapping routes +/// a subset to its own SOVD fault. Match fields are AND-combined; an empty +/// match field is a wildcard. The first mapping whose non-empty match fields +/// all equal the observed event wins (declaration order = precedence). +struct AlarmMapping { + std::string match_condition_name; // ConditionType.ConditionName (empty = any) + std::string match_source_node; // event SourceNode id string (empty = any) + std::string match_event_type; // event EventType id string (empty = any) + + std::string fault_code; + std::string severity_override; + std::string message_override; +}; + /// Configuration for a native OPC-UA AlarmConditionType event subscription /// (issue #386). The plugin subscribes to events emitted from /// ``alarm_source`` and bridges them through ``AlarmStateMachine`` into @@ -48,7 +75,9 @@ struct AlarmEventConfig { /// SOVD entity that should host the resulting fault. std::string entity_id; - /// SOVD fault code (e.g. ``PLC_OVERPRESSURE``). + /// Source-level / fallback SOVD fault code (e.g. ``PLC_OVERPRESSURE``). + /// Used when no ``mappings`` entry matches an observed event. May be empty + /// when every alarm is routed through ``mappings``. std::string fault_code; /// Optional severity override. When empty, ``AlarmStateMachine`` derives @@ -59,6 +88,24 @@ struct AlarmEventConfig { /// Optional friendly message override; falls back to the event's /// ``Message`` field when empty. std::string message_override; + + /// Issue #389: per-condition-identity mappings (multi-alarm). Resolved in + /// declaration order; first match wins, falling back to the source-level + /// ``fault_code`` above. + std::vector mappings; + + /// Issue #389: extra event fields to append to the fault description. + std::vector associated_values; +}; + +/// Result of resolving an observed event against an ``AlarmEventConfig`` +/// (issue #389). ``matched`` is false when neither a mapping nor the +/// source-level fault_code applies (the event should be ignored). +struct ResolvedAlarm { + std::string fault_code; + std::string severity_override; + std::string message_override; + bool matched{false}; }; /// Mapping entry: OPC-UA NodeId -> SOVD entity data point @@ -127,9 +174,16 @@ class NodeMap { } /// Find an event-mode alarm by ``(entity_id, fault_code)`` (used by the - /// SOVD ``acknowledge_fault`` / ``confirm_fault`` operations). + /// SOVD ``acknowledge_fault`` / ``confirm_fault`` operations). Matches the + /// source-level fault_code or any of the entry's mapping fault_codes. const AlarmEventConfig * find_event_alarm(const std::string & entity_id, const std::string & fault_code) const; + /// Resolve an observed event against a config's mappings (issue #389). + /// First matching mapping wins; falls back to the source-level fault_code. + /// Pure / static so the precedence rules are unit-testable without a server. + static ResolvedAlarm resolve_alarm(const AlarmEventConfig & cfg, const std::string & condition_name, + const std::string & source_node, const std::string & event_type); + /// Get derived SOVD entity definitions const std::vector & entity_defs() const { return entity_defs_; diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_client.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_client.hpp index ce058ad77..3c46f69c2 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_client.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_client.hpp @@ -42,12 +42,64 @@ struct ReadResult { /// Callback for subscription data changes using DataChangeCallback = std::function; +/// OPC-UA SecurityPolicy selector. Maps to the policy URI sent during the +/// SecureChannel handshake. ``None`` is the only value that works without a +/// client application-instance certificate; everything else requires +/// ``client_cert_path`` + ``client_key_path`` and an encryption-enabled build. +enum class SecurityPolicy { None, Basic256Sha256, Aes128Sha256RsaOaep, Aes256Sha256RsaPss }; + +/// OPC-UA MessageSecurityMode. ``None`` = cleartext; ``Sign`` = signed but not +/// encrypted; ``SignAndEncrypt`` = signed + encrypted. Independent of the +/// SecurityPolicy selector above (the server endpoint must offer the pair). +enum class SecurityMode { None, Sign, SignAndEncrypt }; + +/// Session user identity token type. ``Anonymous`` needs no credentials; +/// ``UsernamePassword`` uses ``username``/``password``; ``X509`` presents a +/// user certificate (``user_cert_path``). +enum class UserAuthMode { Anonymous, UsernamePassword, X509 }; + /// Configuration for OPC-UA connection struct OpcuaClientConfig { std::string endpoint_url = "opc.tcp://localhost:4840"; std::chrono::milliseconds connect_timeout{5000}; std::chrono::milliseconds reconnect_interval{3000}; - // TODO: OPC-UA security (certificates, Basic256Sha256) + + // --- SecureChannel security (opt-in; defaults reproduce the legacy + // anonymous + SecurityPolicy=None behaviour) --- + SecurityPolicy security_policy{SecurityPolicy::None}; + SecurityMode security_mode{SecurityMode::None}; + + /// Client application-instance certificate (X.509 v3, DER-encoded) and its + /// private key (PEM-encoded). Required for any SecurityPolicy other than + /// None. Empty when running unsecured. + std::string client_cert_path; + std::string client_key_path; + + /// Application URI advertised by the client. MUST match the URI entry in + /// the certificate's SubjectAltName, otherwise the server rejects the + /// SecureChannel with BadCertificateUriInvalid. Empty leaves the + /// open62541 default ("urn:open62541.client.application"). + std::string application_uri; + + /// Trusted server / CA certificates (DER-encoded) forming the trust store. + /// Used to validate the server certificate when ``reject_untrusted`` is + /// true. + std::vector trust_list_paths; + + /// When true (default) the server certificate must chain to an entry in + /// ``trust_list_paths``; an untrusted server is rejected. When false the + /// client accepts any server certificate (accept-any: INSECURE, lab only). + /// This is NOT trust-on-first-use: nothing is pinned, so a later cert change + /// is not detected. + bool reject_untrusted{true}; + + // --- Session user identity --- + UserAuthMode user_auth_mode{UserAuthMode::Anonymous}; + std::string username; + std::string password; + /// User X.509 token certificate (DER-encoded), used when + /// ``user_auth_mode == X509``. + std::string user_cert_path; }; /// RAII wrapper around open62541pp::Client with auto-reconnect @@ -210,9 +262,92 @@ class OpcuaClient { static tl::expected classify_call_result(uint32_t overall_status_code, const std::vector & arg_results); + /// Current state of one OPC-UA Condition instance, read directly from the + /// address space (issue #389 reconnect replay). Used as the read-based + /// fallback when ConditionRefresh is unavailable: instead of waiting for + /// the server to replay buffered events, the client browses the alarm + /// source for its condition instances and reads their state variables. + struct ConditionStateSnapshot { + opcua::NodeId condition_id; + std::string condition_name; // ConditionType.ConditionName (issue #389 mapping) + bool enabled_state{true}; + bool active_state{false}; + bool acked_state{true}; + bool confirmed_state{true}; + bool retain{false}; + uint16_t severity{0}; + std::string message; + /// True when the condition's ActiveState/Id node resolved but its value + /// could not be read this scan (transient read failure). The caller must + /// treat the condition as still present (do not clear it) even though its + /// state fields fell back to their defaults. + bool state_read_failed{false}; + }; + + /// Browse ``source_node`` for AlarmCondition instances and read their + /// current state (ActiveState/Id, AckedState/Id, ConfirmedState/Id, + /// EnabledState/Id, Retain, Severity, Message). Only immediate children + /// that expose an ``ActiveState`` node are treated as conditions; other + /// children are skipped. + /// + /// ``scan_ok`` (when provided) distinguishes a successful empty scan from a + /// failed one: it is set true only when the source browse completed, and + /// false when disconnected or when the browse raised a Bad status. Callers + /// must NOT reconcile (clear) active faults for a source whose scan failed, + /// otherwise a transient disconnect would falsely clear live alarms. + /// + /// NOTE: this read-based fallback assumes each AlarmCondition instance is + /// browseable as an immediate child of its alarm source via + /// HierarchicalReferences. That holds for the open62541 reference server, + /// but has NOT yet been validated against a real Siemens S7-1500 (whose + /// address-space layout for condition instances must be confirmed before + /// this path can be claimed to work there). + std::vector read_source_conditions(const opcua::NodeId & source_node, + bool * scan_ok = nullptr); + /// Get server description string (for status endpoint) std::string server_description() const; + // --- Security config parsing helpers (pure, unit-testable without a + // server). Case-insensitive; unknown input falls back to the safe default + // and sets ``*ok = false`` when provided. --- + + /// Parse a SecurityPolicy name ("None", "Basic256Sha256", + /// "Aes128Sha256RsaOaep", "Aes256Sha256RsaPss"). Falls back to None. + static SecurityPolicy parse_security_policy(const std::string & name, bool * ok = nullptr); + + /// Map a SecurityPolicy to its OPC-UA policy URI + /// (e.g. "http://opcfoundation.org/UA/SecurityPolicy#Basic256Sha256"). + static std::string security_policy_uri(SecurityPolicy policy); + + /// Parse a MessageSecurityMode name ("None", "Sign", "SignAndEncrypt"). + /// Falls back to None. + static SecurityMode parse_security_mode(const std::string & name, bool * ok = nullptr); + + /// Parse a user-identity mode ("Anonymous", "Username"/"UsernamePassword", + /// "X509"/"Certificate"). Falls back to Anonymous. + static UserAuthMode parse_user_auth_mode(const std::string & name, bool * ok = nullptr); + + /// True when the config requests a secured SecureChannel (any + /// SecurityPolicy other than None, or any MessageSecurityMode other than + /// None). Username/password or X.509 identity alone does NOT imply an + /// encrypted channel. + static bool requires_secure_channel(const OpcuaClientConfig & config); + + /// True when user credentials (username/password or X.509) would be sent + /// over an unencrypted SecureChannel: a non-anonymous identity combined with + /// no secured channel. Such credentials can be intercepted on the wire and + /// the caller is expected to warn loudly. + static bool credentials_sent_in_clear(const OpcuaClientConfig & config); + + /// True when SecurityPolicy and MessageSecurityMode are set inconsistently: + /// exactly one of them is None. OPC-UA (Part 4 §7.37) requires them to move + /// together - a mode of Sign/SignAndEncrypt with SecurityPolicy=None would + /// leave the policy URI unpinned (silently allowing a downgraded channel), + /// and a policy without a mode is equally malformed. A conflicting config is + /// rejected before contacting the server. + static bool security_config_conflict(const OpcuaClientConfig & config); + private: struct Impl; std::unique_ptr impl_; diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp index 5141243d7..e655d49d1 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_plugin.hpp @@ -126,6 +126,10 @@ class OpcuaPlugin : public ros2_medkit_gateway::GatewayPlugin, // Publish PLC values to ROS 2 topics (called after each poll) void publish_values(const PollSnapshot & snap); + // Log the effective OPC-UA security profile (policy / mode / user auth) at + // startup; warns when running unsecured. + void log_security_profile() const; + // Build JSON response for data endpoint nlohmann::json build_data_response(const std::string & entity_id) const; diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp index cbb3a6fb9..fbe7544fb 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/include/ros2_medkit_opcua/opcua_poller.hpp @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -54,9 +55,10 @@ struct AlarmEventDelivery { std::string entity_id; SovdAlarmStatus next_status; AlarmAction action; - uint16_t severity{0}; // raw OPC-UA Severity 1-1000 - std::string message; // event Message field (or AlarmEventConfig override) - std::string condition_id; // string form of OPC-UA ConditionId + uint16_t severity{0}; // raw OPC-UA Severity 1-1000 + std::string severity_override; // resolved severity bucket override (issue #389), empty = derive from severity + std::string message; // event Message field (or resolved override) + std::string condition_id; // string form of OPC-UA ConditionId }; using EventAlarmCallback = std::function; @@ -70,18 +72,45 @@ struct ConditionRuntime { opcua::ByteString latest_event_id; std::string entity_id; std::string fault_code; + /// String form of the alarm source NodeId that owns this condition. Used by + /// the read-based reconcile to skip clearing conditions whose source scan + /// failed (issue #479). + std::string source_id; SovdAlarmStatus last_status{SovdAlarmStatus::Suppressed}; }; /// Callback after each poll cycle (for publishing values to ROS 2 topics) using PollCallback = std::function; +/// Strategy for replaying already-active OPC-UA conditions on (re)subscribe +/// (issue #389). Some servers (e.g. Siemens S7-1500) reject the standard +/// ConditionRefresh method with BadNodeIdUnknown, so the active alarm set is +/// otherwise lost across a reconnect / gateway restart. +/// - Method: call ConditionRefresh only (Part 9 §5.5.7 standard path). +/// - Read: skip ConditionRefresh; browse the alarm sources and read each +/// condition's current state, then reconcile the fault set. +/// - Auto: try ConditionRefresh first; on rejection fall back to Read. +/// - Off: no replay (only live transitions surface). +enum class ConditionReplayStrategy { Method, Read, Auto, Off }; + /// Configuration for the poller struct PollerConfig { bool prefer_subscriptions{false}; // poll mode by default (subscriptions need event loop) double subscription_interval_ms{500.0}; std::chrono::milliseconds poll_interval{1000}; std::chrono::milliseconds reconnect_interval{5000}; + /// Active-condition replay strategy on (re)subscribe (issue #389). + /// Default Auto: ConditionRefresh with a read-based fallback so hardened + /// servers that reject the method still recover their active alarms. + ConditionReplayStrategy condition_replay_strategy{ConditionReplayStrategy::Auto}; + /// When true (default) a native AlarmCondition auto-clears only after BOTH + /// Acknowledge and Confirm (OPC-UA Part 9 §5.7). Set false for servers that + /// do not implement the optional Confirm transition (e.g. Siemens S7-1500, + /// which supports Acknowledge / ConditionRefresh / AddComment but not + /// Confirm) so the alarm clears on Acknowledge alone instead of latching + /// forever. Default true preserves the spec-strict behaviour. Issue #478; + /// needs real-S7-1500 validation. + bool require_confirm_for_clear{true}; /// Optional warn-level log sink for operator-visible failures inside the /// poll thread. Set by the plugin owning the poller to its log_warn /// helper so events like ``ConditionRefresh failed`` reach the ROS 2 log @@ -136,6 +165,51 @@ class OpcuaPoller { /// currently active for the entity. std::optional lookup_condition(const std::string & entity_id, const std::string & fault_code) const; + /// Parse a replay-strategy name ("method", "read", "auto", "off"). + /// Case-insensitive; unknown input falls back to Auto. + static ConditionReplayStrategy parse_replay_strategy(const std::string & name); + + /// How a read-scan condition snapshot participates in the read-based replay. + /// - Feed: interesting + reliable -> mark seen and drive the state + /// machine (mirrors a ConditionRefresh replay event). + /// - KeepOnly: present but unreliable (transient read/browse failure) -> + /// mark seen so reconcile does not clear it, but do NOT feed an + /// unreliable state into the state machine. + /// - Skip: not interesting (Disabled, or Retain=false) -> neither + /// marked seen nor fed, so a stale tracked fault can reconcile. + enum class ReadReplayDisposition { Feed, KeepOnly, Skip }; + + /// Classify one read-scan condition snapshot for reconnect replay (issue + /// #478). Mirrors ConditionRefresh's interesting-state filter: a Disabled + /// condition (EnabledState/Id=false) is never active, and only conditions + /// with Retain==true are replayed; a transient read failure is kept but not + /// trusted. Pure / static so the filter is unit-testable without a server. + static ReadReplayDisposition classify_read_snapshot(const OpcuaClient::ConditionStateSnapshot & snap); + + /// Decide whether a tracked condition should be cleared after a read-based + /// replay scan (issue #479 / #478). A condition is cleared only when it was + /// active, was NOT observed this scan (``seen``), its owning source scan + /// succeeded (not in ``failed_sources``), AND its source is positively known + /// to model Condition instances as address-space nodes (in + /// ``modeled_sources``). The last gate is the #478 safety guarantee: an + /// EventNotifier-only server (e.g. S7-1500) exposes no per-condition nodes, + /// so its empty read scan must never wipe the tracked fault set. + /// Pure and static so the false-clear guard is unit-testable without a server. + static bool should_clear_condition(SovdAlarmStatus last_status, const std::string & condition_id, + const std::string & source_id, const std::set & seen, + const std::set & failed_sources, + const std::set & modeled_sources); + + /// Decide whether a tracked condition should be cleared after a completed + /// ConditionRefresh burst (issue #480). Cleared only when it was active + /// (Confirmed/Healed) and its ConditionId was NOT among the ones the server + /// replayed during the burst (``seen``). A delivered RefreshEnd is an + /// authoritative, subscription-wide replay, so - unlike the read fallback - + /// no per-source modeling gate is needed. Pure and static so it is + /// unit-testable without a server. + static bool should_clear_after_refresh(SovdAlarmStatus last_status, const std::string & condition_id, + const std::set & seen); + private: void poll_loop(); void do_poll(); @@ -148,7 +222,54 @@ class OpcuaPoller { void on_event(const AlarmEventConfig & cfg, const std::vector & values, const opcua::NodeId & source_node, const opcua::NodeId & event_type, const opcua::NodeId & condition_id); - void condition_refresh(); + + /// Run the configured active-condition replay (issue #389). Dispatches to + /// ConditionRefresh and/or the read-based fallback per + /// ``condition_replay_strategy``. + void replay_active_conditions(); + /// Call OPC-UA ConditionRefresh. Returns true when the server accepted it. + bool try_condition_refresh(); + /// Read-based fallback: browse each alarm source, read current condition + /// state, drive the state machine, and reconcile the fault set. + void read_fallback_replay(); + /// Clear faults for conditions that were active before the replay but are + /// no longer present/active in the read scan (``seen`` = condition ids + /// observed this scan). Conditions whose owning source is in + /// ``failed_sources`` (its browse failed this scan) are left untouched so a + /// transient disconnect cannot falsely clear live alarms (issue #479). + /// ``modeled_sources`` lists the sources positively known to expose Condition + /// instance nodes; conditions whose source is absent from it are never + /// cleared, so an EventNotifier-only server cannot wipe live alarms (#478). + void reconcile_after_read(const std::set & seen, const std::set & failed_sources, + const std::set & modeled_sources); + + /// Reconcile the tracked fault set after a completed ConditionRefresh burst + /// (issue #480). ConditionRefresh (Part 9 §5.5.7) replays only Retain=true + /// conditions between RefreshStart and RefreshEnd, so a condition that fully + /// cleared while we were offline is silently absent from the burst and would + /// otherwise stay latched. ``seen`` is the set of ConditionIds the server + /// replayed during the burst; any tracked-active condition NOT in it cleared + /// offline and is cleared here. Unlike the read fallback this needs no + /// per-source modeling gate: a delivered RefreshEnd is an authoritative, + /// subscription-wide replay (works on EventNotifier-only servers too). + void reconcile_after_refresh(const std::set & seen); + + /// Dispatch a batch of ClearFault deliveries to the event-alarm callback. + /// Shared by the read- and refresh-based reconcile paths. Must be called with + /// no poller lock held (invokes the user callback). + void dispatch_condition_clears(const std::vector & clears); + + /// Emit an operator-visible warning via the configured ``log_warn`` sink, + /// falling back to the poller's ROS 2 logger. + void warn_operator(const std::string & msg); + + /// Apply one condition state observation (from a live event or a read scan) + /// to the tracked condition map + state machine, dispatching the resulting + /// fault action. ``event_id`` is the live EventId for ack/confirm (null for + /// read-based observations). + void apply_condition_state(const AlarmEventConfig & cfg, const opcua::NodeId & condition_id, + const AlarmEventInput & input, uint16_t severity, const std::string & message, + const opcua::ByteString * event_id); OpcuaClient & client_; const NodeMap & node_map_; @@ -178,16 +299,30 @@ class OpcuaPoller { mutable std::shared_mutex conditions_mutex_; std::unordered_map conditions_; // ConditionId stringForm -> runtime - // ConditionRefresh bracketing state. open62541 sends the buffered - // historical condition burst between RefreshStartEvent and - // RefreshEndEvent; we apply each event during the burst as normal but - // use this flag in tests / diagnostics. Production note: per Part 9 - // §5.5.7 the spec also requires the client to ignore ConditionRefresh - // notifications carrying ``Retain=false`` for non-current branches; the - // state machine already drops branches via BranchId, and the trampoline - // cannot distinguish refresh-burst events from live events without - // tracking the EventType, which we do explicitly below. + // Issue #478 read-fallback safety state. Touched only from the replay path + // (start() before the poll thread exists, then the poll thread on reconnect), + // never concurrently, so no extra lock is needed. + // + // ``read_modeled_sources_``: alarm sources positively known to expose + // Condition instances as address-space nodes (a read scan yielded >=1 + // ActiveState/Id condition). Only such sources are eligible for read-based + // clearing; EventNotifier-only servers (S7-1500) never enter this set, so + // their tracked faults survive an empty read scan. + std::set read_modeled_sources_; + // Sources already warned about as read-fallback-unsupported (warn once each). + std::set read_unsupported_warned_sources_; + + // ConditionRefresh bracketing state. open62541 sends the buffered historical + // condition burst between RefreshStartEvent and RefreshEndEvent; we apply each + // event during the burst as normal. On RefreshStart we clear ``refresh_seen_`` + // and accumulate every replayed ConditionId there; on RefreshEnd we reconcile + // (clear tracked-active conditions the server did NOT replay - they cleared + // offline). Per Part 9 §5.5.7 ConditionRefresh replays only Retain=true / + // current-branch conditions; the state machine already drops non-current + // branches via BranchId. Touched only on the poll thread (the burst is + // delivered via run_iterate there), same convention as read_modeled_sources_. std::atomic condition_refresh_in_progress_{false}; + std::set refresh_seen_; // Throttle the warn-level log emitted from condition_refresh() failures. // Reset to false on each fresh subscribe in setup_event_subscriptions() diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml b/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml index d4ef796e2..a91716cb0 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/package.xml @@ -31,6 +31,11 @@ ament_lint_common ament_cmake_clang_format ament_cmake_gtest + + ros2_medkit_fault_manager ament_cmake diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp index 755faafda..3fc90e2cd 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/node_map.cpp @@ -175,29 +175,34 @@ bool NodeMap::load(const std::string & yaml_path) { auto_browse_ = root["auto_browse"].as(); } - // Node entries - auto nodes = root["nodes"]; - if (!nodes || !nodes.IsSequence()) { - // Clear any stale state from previous load - entries_.clear(); - entity_index_.clear(); - node_id_index_.clear(); - entity_defs_.clear(); - return false; - } - + // Node entries. The ``nodes:`` section is optional: a config may declare + // only ``event_alarms:`` (native AlarmCondition subscriptions with no + // scalar data points to poll). The final emptiness check below rejects a + // file that declares neither. entries_.clear(); entity_index_.clear(); node_id_index_.clear(); - if (nodes.size() > 10000) { + auto nodes = root["nodes"]; + // A ``nodes:`` key that is present but neither null nor a sequence (e.g. a + // scalar or a map) is a config error, not an absent section. Fail loudly + // instead of silently dropping every node mapping. An explicitly empty + // ``nodes:`` (null) stays valid and is handled by the emptiness check below. + if (nodes && !nodes.IsNull() && !nodes.IsSequence()) { + RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), + "'nodes:' must be a sequence of node entries - refusing to load"); + return false; + } + const bool has_nodes = nodes && nodes.IsSequence(); + + if (has_nodes && nodes.size() > 10000) { RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), "Node map has %zu entries (max 10000) - refusing to load to prevent resource exhaustion", nodes.size()); return false; } - for (size_t i = 0; i < nodes.size(); ++i) { + for (size_t i = 0; has_nodes && i < nodes.size(); ++i) { const auto & n = nodes[i]; // Validate required fields @@ -320,18 +325,64 @@ bool NodeMap::load(const std::string & yaml_path) { } for (size_t i = 0; i < event_alarms_node.size(); ++i) { const auto & a = event_alarms_node[i]; - if (!a["alarm_source"] || !a["entity_id"] || !a["fault_code"]) { + if (!a["alarm_source"] || !a["entity_id"]) { RCLCPP_WARN(rclcpp::get_logger("opcua.node_map"), - "event_alarms[%zu] missing alarm_source/entity_id/fault_code - skipping", i); + "event_alarms[%zu] missing alarm_source/entity_id - skipping", i); continue; } AlarmEventConfig cfg; cfg.source_node_id_str = a["alarm_source"].as(); cfg.source_node_id = parse_node_id(cfg.source_node_id_str); cfg.entity_id = a["entity_id"].as(); - cfg.fault_code = a["fault_code"].as(); + cfg.fault_code = a["fault_code"].as(""); cfg.severity_override = a["severity_override"].as(""); cfg.message_override = a["message"].as(""); + + // Issue #389: per-condition-identity mappings (multi-alarm). + if (a["mappings"] && a["mappings"].IsSequence()) { + for (const auto & m : a["mappings"]) { + if (!m["fault_code"]) { + RCLCPP_WARN(rclcpp::get_logger("opcua.node_map"), + "event_alarms[%zu] mapping missing fault_code - skipping", i); + continue; + } + AlarmMapping mapping; + mapping.match_condition_name = m["condition_name"].as(""); + mapping.match_source_node = m["source_node"].as(""); + mapping.match_event_type = m["event_type"].as(""); + mapping.fault_code = m["fault_code"].as(); + mapping.severity_override = m["severity_override"].as(""); + mapping.message_override = m["message"].as(""); + cfg.mappings.push_back(std::move(mapping)); + } + } + + // Issue #389: extra associated-value event fields (e.g. SD_1..SD_n). + if (a["associated_values"] && a["associated_values"].IsSequence()) { + for (const auto & v : a["associated_values"]) { + AssociatedValueRef ref; + if (v.IsScalar()) { + ref.name = v.as(); + } else { + ref.namespace_index = v["namespace_index"].as(0); + ref.name = v["name"].as(""); + ref.label = v["label"].as(""); + } + if (ref.name.empty()) { + continue; + } + if (ref.label.empty()) { + ref.label = ref.name; + } + cfg.associated_values.push_back(std::move(ref)); + } + } + + if (cfg.fault_code.empty() && cfg.mappings.empty()) { + RCLCPP_WARN(rclcpp::get_logger("opcua.node_map"), + "event_alarms[%zu] has neither fault_code nor mappings - skipping", i); + continue; + } event_alarms_.push_back(std::move(cfg)); } } @@ -363,8 +414,36 @@ bool NodeMap::load(const std::string & yaml_path) { // even well-defined. { std::set> event_keys; + // A given (entity_id, fault_code) must be produced by at most ONE event + // alarm mapping. Distinct OPC-UA conditions are tracked separately by + // ConditionId at runtime and each reports, but a ClearFault is keyed on + // fault_code alone: if two conditions resolve to the same code, one + // condition clearing wipes the shared fault while the other is still + // active, producing an undebuggable flap. Reject duplicates rather than + // silently deduping them into a set (bburda review on PR #387). + auto register_event_key = [&](const std::string & entity_id, const std::string & fault_code) -> bool { + if (fault_code.empty()) { + return true; + } + if (!event_keys.emplace(entity_id, fault_code).second) { + RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), + "Duplicate (entity_id=%s, fault_code=%s) declared by more than one event_alarms " + "source/mapping - distinct conditions must not share a fault_code (a clear on one " + "would wipe the other)", + entity_id.c_str(), fault_code.c_str()); + return false; + } + return true; + }; for (const auto & cfg : event_alarms_) { - event_keys.emplace(cfg.entity_id, cfg.fault_code); + if (!register_event_key(cfg.entity_id, cfg.fault_code)) { + return false; + } + for (const auto & m : cfg.mappings) { + if (!register_event_key(cfg.entity_id, m.fault_code)) { + return false; + } + } } for (const auto & entry : entries_) { if (entry.alarm.has_value() && event_keys.count({entry.entity_id, entry.alarm->fault_code}) > 0) { @@ -377,6 +456,16 @@ bool NodeMap::load(const std::string & yaml_path) { } } + // A config that declares neither a 'nodes:' section nor any event alarms + // carries no mappings and is almost always a mistake (wrong file / typo). + // A present-but-empty 'nodes:' section is still a valid config. + if (!has_nodes && event_alarms_.empty()) { + RCLCPP_ERROR(rclcpp::get_logger("opcua.node_map"), + "Node map has neither 'nodes:' nor 'event_alarms:' entries - nothing to map"); + entity_defs_.clear(); + return false; + } + build_entity_defs(); return true; @@ -389,13 +478,53 @@ bool NodeMap::load(const std::string & yaml_path) { const AlarmEventConfig * NodeMap::find_event_alarm(const std::string & entity_id, const std::string & fault_code) const { for (const auto & cfg : event_alarms_) { - if (cfg.entity_id == entity_id && cfg.fault_code == fault_code) { + if (cfg.entity_id != entity_id) { + continue; + } + if (cfg.fault_code == fault_code) { return &cfg; } + for (const auto & m : cfg.mappings) { + if (m.fault_code == fault_code) { + return &cfg; + } + } } return nullptr; } +ResolvedAlarm NodeMap::resolve_alarm(const AlarmEventConfig & cfg, const std::string & condition_name, + const std::string & source_node, const std::string & event_type) { + ResolvedAlarm out; + // First mapping whose non-empty match fields all equal the observed event + // wins (declaration order = precedence). + for (const auto & m : cfg.mappings) { + if (!m.match_condition_name.empty() && m.match_condition_name != condition_name) { + continue; + } + if (!m.match_source_node.empty() && m.match_source_node != source_node) { + continue; + } + if (!m.match_event_type.empty() && m.match_event_type != event_type) { + continue; + } + out.fault_code = m.fault_code; + // A mapping-level override wins; otherwise inherit the source-level one. + out.severity_override = m.severity_override.empty() ? cfg.severity_override : m.severity_override; + out.message_override = m.message_override.empty() ? cfg.message_override : m.message_override; + out.matched = true; + return out; + } + // No mapping matched: fall back to the source-level fault_code (catch-all). + if (!cfg.fault_code.empty()) { + out.fault_code = cfg.fault_code; + out.severity_override = cfg.severity_override; + out.message_override = cfg.message_override; + out.matched = true; + } + return out; +} + std::vector NodeMap::entries_for_entity(const std::string & entity_id) const { std::vector result; auto it = entity_index_.find(entity_id); diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp index f26a13e50..c11b16476 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_client.cpp @@ -16,8 +16,11 @@ #include #include +#include #include #include +#include +#include #include #include #include @@ -26,6 +29,10 @@ #include #include +#include +#ifdef UA_ENABLE_ENCRYPTION +#include +#endif #include #include @@ -134,6 +141,26 @@ OpcuaValue variant_to_value(const opcua::Variant & var) { return std::string(""); } +// Lower-case copy for case-insensitive config parsing. +std::string to_lower(const std::string & s) { + std::string out = s; + std::transform(out.begin(), out.end(), out.begin(), [](unsigned char c) { + return static_cast(std::tolower(c)); + }); + return out; +} + +// Read a whole file into an opcua::ByteString (binary-safe). Returns an empty +// ByteString on failure; the caller treats empty cert/key as a config error. +opcua::ByteString read_file_bytes(const std::string & path) { + std::ifstream f(path, std::ios::binary); + if (!f) { + return opcua::ByteString{}; + } + std::string data((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + return opcua::ByteString(std::string_view(data)); +} + opcua::Variant value_to_variant(const OpcuaValue & val) { return std::visit( [](auto && v) -> opcua::Variant { @@ -199,6 +226,238 @@ OpcuaClient::~OpcuaClient() { disconnect(); } +// --- Security config helpers (pure / unit-testable) --- + +SecurityPolicy OpcuaClient::parse_security_policy(const std::string & name, bool * ok) { + if (ok) { + *ok = true; + } + const std::string n = to_lower(name); + if (n.empty() || n == "none") { + return SecurityPolicy::None; + } + if (n == "basic256sha256") { + return SecurityPolicy::Basic256Sha256; + } + if (n == "aes128sha256rsaoaep" || n == "aes128_sha256_rsaoaep") { + return SecurityPolicy::Aes128Sha256RsaOaep; + } + if (n == "aes256sha256rsapss" || n == "aes256_sha256_rsapss") { + return SecurityPolicy::Aes256Sha256RsaPss; + } + if (ok) { + *ok = false; + } + return SecurityPolicy::None; +} + +std::string OpcuaClient::security_policy_uri(SecurityPolicy policy) { + switch (policy) { + case SecurityPolicy::None: + return "http://opcfoundation.org/UA/SecurityPolicy#None"; + case SecurityPolicy::Basic256Sha256: + return "http://opcfoundation.org/UA/SecurityPolicy#Basic256Sha256"; + case SecurityPolicy::Aes128Sha256RsaOaep: + return "http://opcfoundation.org/UA/SecurityPolicy#Aes128_Sha256_RsaOaep"; + case SecurityPolicy::Aes256Sha256RsaPss: + return "http://opcfoundation.org/UA/SecurityPolicy#Aes256_Sha256_RsaPss"; + } + return "http://opcfoundation.org/UA/SecurityPolicy#None"; +} + +SecurityMode OpcuaClient::parse_security_mode(const std::string & name, bool * ok) { + if (ok) { + *ok = true; + } + const std::string n = to_lower(name); + if (n.empty() || n == "none") { + return SecurityMode::None; + } + if (n == "sign") { + return SecurityMode::Sign; + } + if (n == "signandencrypt" || n == "sign_and_encrypt") { + return SecurityMode::SignAndEncrypt; + } + if (ok) { + *ok = false; + } + return SecurityMode::None; +} + +UserAuthMode OpcuaClient::parse_user_auth_mode(const std::string & name, bool * ok) { + if (ok) { + *ok = true; + } + const std::string n = to_lower(name); + if (n.empty() || n == "anonymous") { + return UserAuthMode::Anonymous; + } + if (n == "username" || n == "usernamepassword" || n == "username_password" || n == "password") { + return UserAuthMode::UsernamePassword; + } + if (n == "x509" || n == "certificate" || n == "cert") { + return UserAuthMode::X509; + } + if (ok) { + *ok = false; + } + return UserAuthMode::Anonymous; +} + +bool OpcuaClient::requires_secure_channel(const OpcuaClientConfig & config) { + return config.security_policy != SecurityPolicy::None || config.security_mode != SecurityMode::None; +} + +bool OpcuaClient::credentials_sent_in_clear(const OpcuaClientConfig & config) { + return config.user_auth_mode != UserAuthMode::Anonymous && !requires_secure_channel(config); +} + +bool OpcuaClient::security_config_conflict(const OpcuaClientConfig & config) { + const bool policy_none = config.security_policy == SecurityPolicy::None; + const bool mode_none = config.security_mode == SecurityMode::None; + return policy_none != mode_none; +} + +namespace { + +#ifdef UA_ENABLE_ENCRYPTION +opcua::MessageSecurityMode to_ua_security_mode(SecurityMode mode) { + switch (mode) { + case SecurityMode::None: + return opcua::MessageSecurityMode::None; + case SecurityMode::Sign: + return opcua::MessageSecurityMode::Sign; + case SecurityMode::SignAndEncrypt: + return opcua::MessageSecurityMode::SignAndEncrypt; + } + return opcua::MessageSecurityMode::None; +} +#endif + +// Rebuild ``client`` with the SecureChannel + user-identity profile requested +// by ``cfg``. A fresh ClientConfig is mandatory for secured connections +// because the application-instance certificate and trust list can only be +// supplied at construction (UA_ClientConfig_setDefaultEncryption). Returns +// false on a fatal configuration error (already logged); the caller then +// reports the connect as failed without contacting the server. +bool apply_security_config(opcua::Client & client, const OpcuaClientConfig & cfg) { + // Reject a contradictory SecurityPolicy / MessageSecurityMode pairing before + // building anything. Accepting e.g. security_mode=Sign with + // security_policy=None would leave the policy URI unpinned and silently + // defeat the explicit-selection guarantee below (OPC-UA Part 4 §7.37). + if (OpcuaClient::security_config_conflict(cfg)) { + const bool policy_none = cfg.security_policy == SecurityPolicy::None; + RCLCPP_ERROR(opcua_client_logger(), + "OPC-UA security config is contradictory: %s. security_policy and " + "message_security_mode must both be None or both be set. Refusing to connect.", + policy_none ? "message_security_mode is Sign/SignAndEncrypt but security_policy is None" + : "security_policy is set but message_security_mode is None"); + return false; + } + + const bool secure = OpcuaClient::requires_secure_channel(cfg); + + if (!secure) { + // Unsecured channel (SecurityPolicy=None, MessageSecurityMode=None). A + // username/password or X.509 identity may still be applied below. + client = opcua::Client(); + } else { +#ifndef UA_ENABLE_ENCRYPTION + RCLCPP_ERROR(opcua_client_logger(), + "OPC-UA SecurityPolicy/MessageSecurityMode requested but this build has no " + "encryption support (UA_ENABLE_ENCRYPTION is off)"); + return false; +#else + if (cfg.client_cert_path.empty() || cfg.client_key_path.empty()) { + RCLCPP_ERROR(opcua_client_logger(), "OPC-UA secured connection requires client_cert_path and client_key_path"); + return false; + } + auto cert = read_file_bytes(cfg.client_cert_path); + auto key = read_file_bytes(cfg.client_key_path); + if (cert.empty() || key.empty()) { + RCLCPP_ERROR(opcua_client_logger(), "OPC-UA: failed to read client cert/key ('%s' / '%s')", + cfg.client_cert_path.c_str(), cfg.client_key_path.c_str()); + return false; + } + + std::vector trust; + trust.reserve(cfg.trust_list_paths.size()); + for (const auto & p : cfg.trust_list_paths) { + auto b = read_file_bytes(p); + if (b.empty()) { + RCLCPP_WARN(opcua_client_logger(), "OPC-UA: skipping unreadable trust-list entry '%s'", p.c_str()); + continue; + } + trust.push_back(std::move(b)); + } + + opcua::ClientConfig cc(cert, key, trust, {}); + + // Force the requested SecurityPolicy. An empty URI lets open62541 pick the + // highest the server offers; we want explicit selection so a downgraded + // server endpoint cannot silently weaken the channel. + const std::string policy_uri = OpcuaClient::security_policy_uri(cfg.security_policy); + if (cfg.security_policy != SecurityPolicy::None && !policy_uri.empty()) { + UA_String_clear(&cc.handle()->securityPolicyUri); + cc.handle()->securityPolicyUri = UA_STRING_ALLOC(policy_uri.c_str()); + } + + cc.setSecurityMode(to_ua_security_mode(cfg.security_mode)); + + // The application URI must match the URI SAN in the client certificate or + // the server rejects the channel with BadCertificateUriInvalid. + if (!cfg.application_uri.empty()) { + UA_String_clear(&cc.handle()->clientDescription.applicationUri); + cc.handle()->clientDescription.applicationUri = UA_STRING_ALLOC(cfg.application_uri.c_str()); + } + + // Trust handling: by default validate the server certificate against the + // trust list. When reject_untrusted is false, accept any server certificate + // (accept-any: INSECURE, lab only). This is NOT trust-on-first-use - nothing + // is pinned, so a substituted or expired server cert is accepted on every + // connect with no chain / hostname / URI-SAN check. + if (!cfg.reject_untrusted) { + auto & cv = cc.handle()->certificateVerification; + if (cv.clear) { + cv.clear(&cv); + } + UA_CertificateVerification_AcceptAll(&cv); + } + + client = opcua::Client(std::move(cc)); +#endif + } + + // User identity token (applied regardless of channel encryption). + switch (cfg.user_auth_mode) { + case UserAuthMode::Anonymous: + client.config().setUserIdentityToken(opcua::AnonymousIdentityToken{}); + break; + case UserAuthMode::UsernamePassword: + client.config().setUserIdentityToken(opcua::UserNameIdentityToken(cfg.username, cfg.password)); + break; + case UserAuthMode::X509: { +#ifdef UA_ENABLE_ENCRYPTION + auto ucert = read_file_bytes(cfg.user_cert_path); + if (ucert.empty()) { + RCLCPP_ERROR(opcua_client_logger(), "OPC-UA X.509 user auth: failed to read user_cert_path '%s'", + cfg.user_cert_path.c_str()); + return false; + } + client.config().setUserIdentityToken(opcua::X509IdentityToken(std::move(ucert))); +#else + RCLCPP_ERROR(opcua_client_logger(), "OPC-UA X.509 user auth requires an encryption-enabled build"); + return false; +#endif + break; + } + } + return true; +} + +} // namespace + bool OpcuaClient::connect(const OpcuaClientConfig & config) { std::lock_guard lock(impl_->client_mutex); impl_->config = config; @@ -208,6 +467,14 @@ bool OpcuaClient::connect(const OpcuaClientConfig & config) { impl_->connected = true; return true; } + + // (Re)build the client with the requested security profile before every + // connect so reconnects re-apply the same SecureChannel settings. + if (!apply_security_config(impl_->client, config)) { + impl_->connected = false; + return false; + } + impl_->client.config().setTimeout(static_cast(config.connect_timeout.count())); impl_->client.connect(config.endpoint_url); impl_->connected = true; @@ -223,7 +490,8 @@ bool OpcuaClient::connect(const OpcuaClientConfig & config) { } return true; - } catch (const opcua::BadStatus &) { + } catch (const opcua::BadStatus & e) { + RCLCPP_WARN(opcua_client_logger(), "OPC-UA connect to '%s' failed: %s", config.endpoint_url.c_str(), e.what()); impl_->connected = false; return false; } @@ -556,6 +824,202 @@ std::string OpcuaClient::server_description() const { return impl_->server_desc; } +std::vector OpcuaClient::read_source_conditions(const opcua::NodeId & source_node, + bool * scan_ok) { + std::lock_guard lock(impl_->client_mutex); + std::vector out; + // Default to "scan failed" so an early return (disconnected) or a thrown + // browse never looks like a clean "no conditions" result to the caller. + if (scan_ok) { + *scan_ok = false; + } + if (!impl_->connected) { + return out; + } + + // A browse failure during state-variable resolution is "path absent" only + // for the explicit not-found status codes. Anything else (timeout, channel + // drop, server busy) is a transient failure that must NOT be read as "this + // child is not a condition" - otherwise a momentary glitch silently drops a + // live condition and lets reconcile clear its fault. + auto is_path_absent_code = [](UA_StatusCode code) { + return code == UA_STATUSCODE_BADNOMATCH || code == UA_STATUSCODE_BADNODEIDUNKNOWN || + code == UA_STATUSCODE_BADNOTFOUND || code == UA_STATUSCODE_BADBROWSENAMEINVALID; + }; + + // Read a Boolean two-step state variable (e.g. ActiveState/Id). Returns the + // fallback when the path is absent or not Boolean. ``resolved`` reports + // whether the ``state_name/Id`` path browsed successfully (used to decide if + // a child is a condition); ``read_failed`` reports that the path resolved but + // its value could not be read this scan (transient failure, distinct from the + // path simply not existing). A transient BROWSE failure (as opposed to a + // genuine path-absent code) is also treated as resolved+read_failed so the + // condition is kept and flagged rather than dropped (issue #478). + auto read_state_id = [&is_path_absent_code](opcua::Node & cond, const char * state_name, bool fallback, + bool * resolved, bool * read_failed) -> bool { + try { + auto node = cond.browseChild({{0, state_name}, {0, "Id"}}); + if (resolved) { + *resolved = true; + } + try { + auto val = node.readValue(); + if (val.isType()) { + return val.getScalarCopy(); + } + return fallback; + } catch (const opcua::BadStatus &) { + // Path exists but the read failed transiently: keep the condition. + if (read_failed) { + *read_failed = true; + } + return fallback; + } + } catch (const opcua::BadStatus & e) { + if (is_path_absent_code(e.code())) { + // Path genuinely absent: this child is not an alarm condition. + if (resolved) { + *resolved = false; + } + return fallback; + } + // Transient browse failure: conservatively keep the condition and flag it + // so the caller does not drop it (and let reconcile wrongly clear it). + if (resolved) { + *resolved = true; + } + if (read_failed) { + *read_failed = true; + } + return fallback; + } + }; + + // Collect candidate condition nodes from the source. AlarmCondition + // instances may be linked as hierarchical Object children OR via the + // non-hierarchical HasCondition reference (i=9006, Part 9 §5.5.3); we follow + // both and recurse one level through HasCondition so conditions owned by an + // intermediate Object are still found. De-duplicated by NodeId string. + constexpr uint32_t kHasConditionRefId = 9006; + auto collect_candidates = [&](opcua::Node & src, std::vector> & out_nodes, + std::set & seen_ids) { + auto add = [&](opcua::Node n) { + if (seen_ids.insert(n.id().toString()).second) { + out_nodes.push_back(std::move(n)); + } + }; + auto hier = src.browseChildren(opcua::ReferenceTypeId::HierarchicalReferences, opcua::NodeClass::Object); + for (auto & c : hier) { + add(c); + } + try { + auto linked = src.browseChildren(opcua::NodeId(0, kHasConditionRefId), opcua::NodeClass::Object); + for (auto & c : linked) { + add(c); + } + } catch (const opcua::BadStatus &) { + // HasCondition browse unsupported on this server: hierarchical children + // already collected, continue. + } + // One-level recursion through HasCondition from each hierarchical child. + for (auto & c : hier) { + try { + auto linked = c.browseChildren(opcua::NodeId(0, kHasConditionRefId), opcua::NodeClass::Object); + for (auto & gc : linked) { + add(gc); + } + } catch (const opcua::BadStatus &) { + } + } + }; + + try { + opcua::Node src(impl_->client, source_node); + std::vector> children; + std::set candidate_ids; + collect_candidates(src, children, candidate_ids); + for (auto & child : children) { + ConditionStateSnapshot snap; + snap.condition_id = child.id(); + + // A child is only treated as an alarm condition when ActiveState/Id + // resolves; everything else under the source is ignored. A transient + // read failure on a resolved ActiveState keeps the condition (flagged so + // the caller does not drop/clear it) instead of silently discarding it. + bool is_condition = false; + bool active_read_failed = false; + snap.active_state = read_state_id(child, "ActiveState", false, &is_condition, &active_read_failed); + if (!is_condition) { + continue; + } + snap.state_read_failed = active_read_failed; + + snap.enabled_state = read_state_id(child, "EnabledState", true, nullptr, nullptr); + snap.acked_state = read_state_id(child, "AckedState", true, nullptr, nullptr); + snap.confirmed_state = read_state_id(child, "ConfirmedState", true, nullptr, nullptr); + + try { + auto retain_node = child.browseChild({{0, "Retain"}}); + try { + auto retain = retain_node.readValue(); + if (retain.isType()) { + snap.retain = retain.getScalarCopy(); + } + } catch (const opcua::BadStatus &) { + // Retain node resolved but its value could not be read this scan: a + // transient failure. Do NOT let retain silently default to false + // (which would Skip an inactive-but-retained condition and let + // reconcile clear a still-tracked fault). Flag the snapshot unreliable + // so it is kept (KeepOnly), not dropped (issue #478). + snap.state_read_failed = true; + } + } catch (const opcua::BadStatus & e) { + // A genuinely absent Retain path (optional on some servers) leaves + // retain=false. A transient browse failure resolving it is unreliable, + // so keep-and-flag rather than trust the default. + if (!is_path_absent_code(e.code())) { + snap.state_read_failed = true; + } + } + try { + auto sev = child.browseChild({{0, "Severity"}}).readValue(); + if (sev.isType()) { + snap.severity = sev.getScalarCopy(); + } + } catch (const opcua::BadStatus &) { + } + try { + auto cname = child.browseChild({{0, "ConditionName"}}).readValue(); + if (cname.isType()) { + snap.condition_name = std::string(cname.getScalarCopy()); + } + } catch (const opcua::BadStatus &) { + } + try { + auto msg = child.browseChild({{0, "Message"}}).readValue(); + if (msg.isType()) { + snap.message = std::string(msg.getScalarCopy().getText()); + } else if (msg.isType()) { + snap.message = std::string(msg.getScalarCopy()); + } + } catch (const opcua::BadStatus &) { + } + + out.push_back(std::move(snap)); + } + // The source browse completed without throwing: this is a trustworthy + // scan (possibly with zero conditions), safe to reconcile against. + if (scan_ok) { + *scan_ok = true; + } + } catch (const opcua::BadStatus & e) { + maybe_mark_disconnected(impl_->connected, impl_->generation, e); + // scan_ok stays false: a browse failure must not be read as "no conditions". + } + + return out; +} + // ---------------------------------------------------------------------------- // Issue #386: native OPC-UA AlarmCondition event subscription primitives. // open62541pp v0.16 has no native EventFilter / event subscription API, so the @@ -678,7 +1142,18 @@ static void on_event_trampoline_c(UA_Client * /*client*/, UA_UInt32 sub_id, void } if (ctx->callback) { - ctx->callback(user_values, source_node, event_type, condition_id); + // The callback chain (on_event -> apply_condition_state -> fault report / + // clear) must never let an exception unwind through open62541's C frames: + // they are not exception-safe (leaks / inconsistent internal state; formally + // UB) and an escape past run_iterate would call std::terminate on the poll + // thread. Contain everything at the C boundary. + try { + ctx->callback(user_values, source_node, event_type, condition_id); + } catch (const std::exception & e) { + RCLCPP_ERROR(opcua_client_logger(), "OPC-UA event callback threw, dropping notification: %s", e.what()); + } catch (...) { + RCLCPP_ERROR(opcua_client_logger(), "OPC-UA event callback threw a non-std exception, dropping notification"); + } } } diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp index e4960df78..99aee3254 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_plugin.cpp @@ -29,6 +29,7 @@ #include #include #include +#include namespace ros2_medkit_gateway { @@ -48,6 +49,44 @@ inline bool plugin_debug_enabled() { return rcutils_logging_logger_is_enabled_for("opcua.plugin", RCUTILS_LOG_SEVERITY_DEBUG); } +// Security-config parse wrappers. An unrecognized value must NOT silently fall +// back to the insecure default (SecurityPolicy::None / SecurityMode::None / +// anonymous auth): a typo would quietly downgrade a hardened link. Fail loud and +// refuse - the thrown exception disables the plugin in PluginManager::configure_plugins. +SecurityPolicy require_security_policy(const std::string & value) { + bool ok = true; + const auto parsed = OpcuaClient::parse_security_policy(value, &ok); + if (!ok) { + RCLCPP_ERROR(opcua_plugin_logger(), + "Unrecognized OPC-UA security_policy '%s'; refusing to start with an insecure fallback", + value.c_str()); + throw std::invalid_argument("opcua: unrecognized security_policy '" + value + "'"); + } + return parsed; +} + +SecurityMode require_security_mode(const std::string & value) { + bool ok = true; + const auto parsed = OpcuaClient::parse_security_mode(value, &ok); + if (!ok) { + RCLCPP_ERROR(opcua_plugin_logger(), + "Unrecognized OPC-UA security_mode '%s'; refusing to start with an insecure fallback", value.c_str()); + throw std::invalid_argument("opcua: unrecognized security_mode '" + value + "'"); + } + return parsed; +} + +UserAuthMode require_user_auth_mode(const std::string & value) { + bool ok = true; + const auto parsed = OpcuaClient::parse_user_auth_mode(value, &ok); + if (!ok) { + RCLCPP_ERROR(opcua_plugin_logger(), + "Unrecognized OPC-UA user_auth_mode '%s'; refusing to start with an insecure fallback", value.c_str()); + throw std::invalid_argument("opcua: unrecognized user_auth_mode '" + value + "'"); + } + return parsed; +} + /// Parse a JSON "value" field, coerce to the node's declared data_type, and /// validate against the optional min/max range. Shared by handle_plc_operations, /// DataProvider::write_data, and OperationProvider::execute_operation to keep @@ -115,8 +154,49 @@ void OpcuaPlugin::configure(const nlohmann::json & config) { if (config.contains("endpoint_url")) { client_config_.endpoint_url = config["endpoint_url"].get(); } - // NOTE: OPC-UA security (username/password, certificates) not yet implemented. - // Connect uses Anonymous auth with SecurityPolicy=None. + + // OPC-UA SecureChannel security + user identity. All opt-in; defaults keep + // the legacy anonymous + SecurityPolicy=None behaviour. + if (config.contains("security_policy")) { + client_config_.security_policy = require_security_policy(config["security_policy"].get()); + } + if (config.contains("security_mode") || config.contains("message_security_mode")) { + const std::string mode_str = config.contains("security_mode") ? config["security_mode"].get() + : config["message_security_mode"].get(); + client_config_.security_mode = require_security_mode(mode_str); + } + if (config.contains("client_cert_path")) { + client_config_.client_cert_path = config["client_cert_path"].get(); + } + if (config.contains("client_key_path")) { + client_config_.client_key_path = config["client_key_path"].get(); + } + if (config.contains("application_uri")) { + client_config_.application_uri = config["application_uri"].get(); + } + if (config.contains("trust_list_paths") && config["trust_list_paths"].is_array()) { + client_config_.trust_list_paths.clear(); + for (const auto & p : config["trust_list_paths"]) { + if (p.is_string() && !p.get().empty()) { + client_config_.trust_list_paths.push_back(p.get()); + } + } + } + if (config.contains("reject_untrusted")) { + client_config_.reject_untrusted = config["reject_untrusted"].get(); + } + if (config.contains("user_auth_mode")) { + client_config_.user_auth_mode = require_user_auth_mode(config["user_auth_mode"].get()); + } + if (config.contains("username")) { + client_config_.username = config["username"].get(); + } + if (config.contains("password")) { + client_config_.password = config["password"].get(); + } + if (config.contains("user_cert_path")) { + client_config_.user_cert_path = config["user_cert_path"].get(); + } if (config.contains("node_map_path")) { node_map_path_ = config["node_map_path"].get(); @@ -132,6 +212,23 @@ void OpcuaPlugin::configure(const nlohmann::json & config) { auto ms = std::clamp(config["poll_interval_ms"].get(), 100, 60000); poller_config_.poll_interval = std::chrono::milliseconds(ms); } + if (config.contains("condition_replay_strategy")) { + poller_config_.condition_replay_strategy = + OpcuaPoller::parse_replay_strategy(config["condition_replay_strategy"].get()); + } + if (auto * env = std::getenv("OPCUA_CONDITION_REPLAY")) { + poller_config_.condition_replay_strategy = OpcuaPoller::parse_replay_strategy(env); + } + // Issue #478: drop the Confirm gate for servers that do not implement the + // optional Confirm transition (e.g. Siemens S7-1500) so alarms clear on + // Acknowledge alone. Default true keeps the spec-strict behaviour. + if (config.contains("require_confirm_for_clear")) { + poller_config_.require_confirm_for_clear = config["require_confirm_for_clear"].get(); + } + if (auto * env = std::getenv("OPCUA_REQUIRE_CONFIRM_FOR_CLEAR")) { + const std::string v = env; + poller_config_.require_confirm_for_clear = !(v == "0" || v == "false" || v == "no" || v == "off"); + } // Environment variables override YAML config (for Docker) if (auto * env = std::getenv("OPCUA_ENDPOINT_URL")) { @@ -140,6 +237,58 @@ void OpcuaPlugin::configure(const nlohmann::json & config) { if (auto * env = std::getenv("OPCUA_NODE_MAP_PATH")) { node_map_path_ = env; } + // Security env overrides (for Docker / appliance deployments). + if (auto * env = std::getenv("OPCUA_SECURITY_POLICY")) { + client_config_.security_policy = require_security_policy(env); + } + if (auto * env = std::getenv("OPCUA_SECURITY_MODE")) { + client_config_.security_mode = require_security_mode(env); + } + if (auto * env = std::getenv("OPCUA_CLIENT_CERT")) { + client_config_.client_cert_path = env; + } + if (auto * env = std::getenv("OPCUA_CLIENT_KEY")) { + client_config_.client_key_path = env; + } + if (auto * env = std::getenv("OPCUA_APPLICATION_URI")) { + client_config_.application_uri = env; + } + if (auto * env = std::getenv("OPCUA_TRUST_LIST")) { + // Colon-separated list of DER trust-store paths. + client_config_.trust_list_paths.clear(); + std::string list = env; + size_t start = 0; + while (start <= list.size()) { + size_t sep = list.find(':', start); + std::string path = list.substr(start, sep == std::string::npos ? std::string::npos : sep - start); + if (!path.empty()) { + client_config_.trust_list_paths.push_back(path); + } + if (sep == std::string::npos) { + break; + } + start = sep + 1; + } + } + if (auto * env = std::getenv("OPCUA_REJECT_UNTRUSTED")) { + std::string v = env; + std::transform(v.begin(), v.end(), v.begin(), [](unsigned char c) { + return static_cast(std::tolower(c)); + }); + client_config_.reject_untrusted = !(v == "0" || v == "false" || v == "no"); + } + if (auto * env = std::getenv("OPCUA_USER_AUTH")) { + client_config_.user_auth_mode = require_user_auth_mode(env); + } + if (auto * env = std::getenv("OPCUA_USERNAME")) { + client_config_.username = env; + } + if (auto * env = std::getenv("OPCUA_PASSWORD")) { + client_config_.password = env; + } + if (auto * env = std::getenv("OPCUA_USER_CERT")) { + client_config_.user_cert_path = env; + } if (!node_map_path_.empty()) { if (!node_map_.load(node_map_path_)) { @@ -177,9 +326,7 @@ void OpcuaPlugin::set_context(PluginContext & context) { } } - log_warn( - "OPC-UA security: Anonymous auth, SecurityPolicy=None. " - "Not suitable for untrusted networks."); + log_security_profile(); if (client_->connect(client_config_)) { log_info("Connected to OPC-UA server: " + client_config_.endpoint_url); @@ -524,11 +671,11 @@ void OpcuaPlugin::on_event_alarm(const AlarmEventDelivery & delivery) { // Map raw OPC-UA Severity (1-1000) to SOVD severity bucket. // Selfpatch convention documented in design/index.rst; not from IEC 62682. - // Severity_override on the AlarmEventConfig wins when set. + // The resolved severity_override (mapping- or source-level, issue #389) + // wins when set. auto severity_str = [&]() { - auto * cfg = node_map_.find_event_alarm(delivery.entity_id, delivery.fault_code); - if (cfg && !cfg->severity_override.empty()) { - return cfg->severity_override; + if (!delivery.severity_override.empty()) { + return delivery.severity_override; } if (delivery.severity >= 801) { return std::string("CRITICAL"); @@ -660,6 +807,64 @@ void OpcuaPlugin::publish_values(const PollSnapshot & snap) { } } +void OpcuaPlugin::log_security_profile() const { + auto policy_name = [](SecurityPolicy p) -> std::string { + switch (p) { + case SecurityPolicy::None: + return "None"; + case SecurityPolicy::Basic256Sha256: + return "Basic256Sha256"; + case SecurityPolicy::Aes128Sha256RsaOaep: + return "Aes128Sha256RsaOaep"; + case SecurityPolicy::Aes256Sha256RsaPss: + return "Aes256Sha256RsaPss"; + } + return "None"; + }; + auto mode_name = [](SecurityMode m) -> std::string { + switch (m) { + case SecurityMode::None: + return "None"; + case SecurityMode::Sign: + return "Sign"; + case SecurityMode::SignAndEncrypt: + return "SignAndEncrypt"; + } + return "None"; + }; + auto auth_name = [](UserAuthMode a) -> std::string { + switch (a) { + case UserAuthMode::Anonymous: + return "Anonymous"; + case UserAuthMode::UsernamePassword: + return "Username/Password"; + case UserAuthMode::X509: + return "X.509"; + } + return "Anonymous"; + }; + + const std::string profile = "OPC-UA security: SecurityPolicy=" + policy_name(client_config_.security_policy) + + ", MessageSecurityMode=" + mode_name(client_config_.security_mode) + + ", user=" + auth_name(client_config_.user_auth_mode); + + const bool secure_channel = OpcuaClient::requires_secure_channel(client_config_); + + if (OpcuaClient::credentials_sent_in_clear(client_config_)) { + // Username/password or X.509 credentials sent without a Sign/SignAndEncrypt + // channel travel in the clear and can be intercepted on the wire. + log_warn(profile + + ". WARNING: user credentials cross an unencrypted OPC-UA channel (MessageSecurityMode=None) - " + "they can be intercepted. Use Sign or SignAndEncrypt on an untrusted network."); + } else if (!secure_channel) { + log_warn(profile + ". Unsecured - not suitable for untrusted networks."); + } else if (!client_config_.reject_untrusted) { + log_warn(profile + ". WARNING: reject_untrusted=false (accepts any server certificate)."); + } else { + log_info(profile); + } +} + nlohmann::json OpcuaPlugin::build_data_response(const std::string & entity_id) const { auto entries = node_map_.entries_for_entity(entity_id); auto snap = poller_->snapshot(); diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp index c4b7213e6..f08d9862a 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/src/opcua_poller.cpp @@ -33,12 +33,6 @@ inline rclcpp::Logger opcua_poller_logger() { static auto logger = rclcpp::get_logger("opcua.poller"); return logger; } - -inline bool poller_debug_enabled() { - // rcutils API for Humble compatibility; Jazzy+ adds rclcpp::Logger:: - // get_effective_level but Humble does not. - return rcutils_logging_logger_is_enabled_for("opcua.poller", RCUTILS_LOG_SEVERITY_DEBUG); -} } // namespace namespace { @@ -68,7 +62,11 @@ constexpr size_t kFieldAckedState = 5; constexpr size_t kFieldConfirmedState = 6; constexpr size_t kFieldShelvingState = 7; constexpr size_t kFieldBranchId = 8; -constexpr size_t kAlarmFieldCount = 9; +constexpr size_t kFieldConditionName = 9; // issue #389 (multi-alarm identity) +constexpr size_t kAlarmFieldCount = 9; // count of fixed alarm-state fields (indices 0-8) +// ConditionName is always appended at index kFieldConditionName; configured +// associated values follow at index kFieldConditionName + 1 onward. +constexpr size_t kFirstAssociatedValueField = kFieldConditionName + 1; // Standard NodeIds for the types that *directly* define each field (open62541 // servers reject SAOs whose BrowsePath is inherited rather than direct). @@ -77,11 +75,11 @@ constexpr uint32_t kConditionType = 2782; constexpr uint32_t kAcknowledgeableConditionType = 2881; constexpr uint32_t kAlarmConditionType = 2915; -std::vector build_alarm_event_select_specs() { +std::vector build_alarm_event_select_specs(const AlarmEventConfig & cfg) { // Each clause carries the type that *directly* defines its first browse // segment - inheritance traversal is not honored by the open62541 server // validator (verified against 1.4.6 with FULL ns0). - return { + std::vector specs = { {opcua::NodeId(0, kBaseEventType), {{0, "EventId"}}, UA_ATTRIBUTEID_VALUE}, {opcua::NodeId(0, kBaseEventType), {{0, "Severity"}}, UA_ATTRIBUTEID_VALUE}, {opcua::NodeId(0, kBaseEventType), {{0, "Message"}}, UA_ATTRIBUTEID_VALUE}, @@ -93,7 +91,65 @@ std::vector build_alarm_event_select_specs() { {{0, "ShelvingState"}, {0, "CurrentState"}, {0, "Id"}}, UA_ATTRIBUTEID_VALUE}, {opcua::NodeId(0, kConditionType), {{0, "BranchId"}}, UA_ATTRIBUTEID_VALUE}, + // Issue #389: ConditionName, used to route distinct conditions from one + // source to distinct faults. + {opcua::NodeId(0, kConditionType), {{0, "ConditionName"}}, UA_ATTRIBUTEID_VALUE}, }; + // Issue #389: configured associated values (e.g. Siemens SD_1..SD_n) as + // BaseEventType properties. + for (const auto & av : cfg.associated_values) { + specs.push_back({opcua::NodeId(0, kBaseEventType), {{av.namespace_index, av.name}}, UA_ATTRIBUTEID_VALUE}); + } + return specs; +} + +// Render an arbitrary scalar event field value to a short string for inclusion +// in a fault description (associated values can be of any builtin type). +std::string variant_to_display(const opcua::Variant & v) { + if (v.isEmpty()) { + return ""; + } + if (v.isType()) { + return std::string(v.getScalarCopy().getText()); + } + if (v.isType()) { + return std::string(v.getScalarCopy()); + } + if (v.isType()) { + return v.getScalarCopy() ? "true" : "false"; + } + if (v.isType()) { + return std::to_string(v.getScalarCopy()); + } + if (v.isType()) { + return std::to_string(v.getScalarCopy()); + } + if (v.isType()) { + return std::to_string(v.getScalarCopy()); + } + if (v.isType()) { + return std::to_string(v.getScalarCopy()); + } + if (v.isType()) { + return std::to_string(v.getScalarCopy()); + } + if (v.isType()) { + return std::to_string(v.getScalarCopy()); + } + if (v.isType()) { + return std::to_string(v.getScalarCopy()); + } + return ""; +} + +std::string variant_to_string_scalar(const opcua::Variant & v) { + if (v.isType()) { + return std::string(v.getScalarCopy()); + } + if (v.isType()) { + return std::string(v.getScalarCopy().getText()); + } + return ""; } // Extract a scalar of type T from a Variant, returning the default if absent. @@ -243,10 +299,12 @@ void OpcuaPoller::setup_event_subscriptions() { return; } - const auto select_specs = build_alarm_event_select_specs(); event_monitored_item_ids_.clear(); for (const auto & cfg : node_map_.event_alarms()) { + // Per-source select specs so each source can carry its own associated + // values (issue #389) in addition to the fixed alarm-state fields. + const auto select_specs = build_alarm_event_select_specs(cfg); // Capture cfg BY VALUE: even though range-for ``const auto & cfg`` binds // to a vector element that outlives the loop, an `&cfg`-by-reference // capture would chain through a local reference variable whose name @@ -266,77 +324,348 @@ void OpcuaPoller::setup_event_subscriptions() { } } - // Fire ConditionRefresh so the server pushes any conditions that fired - // before our subscription started (Part 9 §5.5.7 mandates the bracketing - // RefreshStartEvent / RefreshEndEvent which we treat as ordinary events - // tagged by EventType). + // Replay conditions that were already active before this (re)subscribe so a + // reconnect / restart does not drop the live fault set. Strategy-driven: + // ConditionRefresh (Part 9 §5.5.7) and/or a read-based fallback (issue + // #389) for servers that reject the method. if (!event_monitored_item_ids_.empty()) { - condition_refresh(); + replay_active_conditions(); + } +} + +ConditionReplayStrategy OpcuaPoller::parse_replay_strategy(const std::string & name) { + std::string n = name; + std::transform(n.begin(), n.end(), n.begin(), [](unsigned char c) { + return static_cast(std::tolower(c)); + }); + if (n == "method") { + return ConditionReplayStrategy::Method; + } + if (n == "read" || n == "read_fallback" || n == "readfallback") { + return ConditionReplayStrategy::Read; + } + if (n == "off" || n == "none" || n == "disabled") { + return ConditionReplayStrategy::Off; } + return ConditionReplayStrategy::Auto; } -void OpcuaPoller::condition_refresh() { - // Server object NodeId (i=2253) hosts the ConditionRefresh method - // (i=3875 - per Part 9 §5.5.7). We use the standard NodeId; servers that - // diverge from the spec (rare) will return BadMethodInvalid which is - // logged but does not abort subscribing. - static constexpr uint32_t kServerObjectId = 2253; +void OpcuaPoller::warn_operator(const std::string & msg) { + if (config_.log_warn) { + config_.log_warn(msg); + } else { + RCLCPP_WARN(opcua_poller_logger(), "%s", msg.c_str()); + } +} + +void OpcuaPoller::replay_active_conditions() { + switch (config_.condition_replay_strategy) { + case ConditionReplayStrategy::Off: + return; + case ConditionReplayStrategy::Method: + if (!try_condition_refresh() && !condition_refresh_warned_) { + warn_operator( + "OPC-UA ConditionRefresh rejected and replay strategy is 'method'; active conditions " + "will NOT be replayed on reconnect with this server. Live transitions still flow. See issue #389."); + condition_refresh_warned_ = true; + } else if (condition_refresh_warned_) { + condition_refresh_warned_ = false; + } + return; + case ConditionReplayStrategy::Read: + read_fallback_replay(); + return; + case ConditionReplayStrategy::Auto: + if (try_condition_refresh()) { + condition_refresh_warned_ = false; + return; + } + if (!condition_refresh_warned_) { + warn_operator( + "OPC-UA ConditionRefresh rejected; using read-based active-condition replay fallback (issue #389)."); + condition_refresh_warned_ = true; + } + read_fallback_replay(); + return; + } +} + +bool OpcuaPoller::try_condition_refresh() { + // Per Part 9 §5.5.7 ConditionRefresh is a Method of the ConditionType, so the + // Call's ObjectId MUST be the well-known ConditionType node (i=2782), NOT the + // Server object (i=2253). Calling it on the Server object is the root cause of + // BadNodeIdUnknown / BadMethodInvalid rejections on conformant servers + // (including Siemens S7-1500, which documents ConditionRefresh as supported - + // only ConditionRefresh2 is unsupported). The method itself is i=3875 + // (ConditionType.ConditionRefresh) and takes the SubscriptionId argument. + static constexpr uint32_t kConditionTypeId = 2782; static constexpr uint32_t kConditionRefreshMethodId = 3875; std::vector args; args.push_back(opcua::Variant::fromScalar(static_cast(event_subscription_id_))); auto result = - client_.call_method(opcua::NodeId(0, kServerObjectId), opcua::NodeId(0, kConditionRefreshMethodId), args); - if (!result.has_value()) { - // Not fatal but operator-visible: when ConditionRefresh is rejected by - // the server (BadMethodInvalid in open62541 v1.4.x, BadNotImplemented - // on Siemens S7-1500, etc.) the gateway will not re-receive any active - // conditions on reconnect; only live transitions surface in /faults. - // Worth a single warn per connect so the operator knows their - // alarm-replay-on-reconnect contract is broken with this PLC. - if (!condition_refresh_warned_) { - const std::string msg = "OPC-UA ConditionRefresh rejected (" + result.error().message + - "); active conditions will NOT be replayed on reconnect with this server. " - "Live transitions still flow. See issue #389."; - if (config_.log_warn) { - config_.log_warn(msg); - } else { - // Fallback when the plugin did not wire log_warn through PollerConfig - // (e.g., direct unit tests). RCLCPP_WARN goes to /rosout instead of - // raw stderr so the warn integrates with normal log filtering. - RCLCPP_WARN(opcua_poller_logger(), "%s", msg.c_str()); + client_.call_method(opcua::NodeId(0, kConditionTypeId), opcua::NodeId(0, kConditionRefreshMethodId), args); + return result.has_value(); +} + +OpcuaPoller::ReadReplayDisposition +OpcuaPoller::classify_read_snapshot(const OpcuaClient::ConditionStateSnapshot & snap) { + // Transient read/browse failure: present but unreliable. Keep it (so reconcile + // does not clear) but do not feed an untrustworthy state into the machine. + if (snap.state_read_failed) { + return ReadReplayDisposition::KeepOnly; + } + // EnabledState=false: a Disabled condition is never active and is never + // replayed by ConditionRefresh (Part 9 §5.7.2). Exclude it entirely so a + // stale tracked fault can reconcile away. + if (!snap.enabled_state) { + return ReadReplayDisposition::Skip; + } + // A reliably-read ActiveState=true condition is always interesting and must + // never be Skipped on the Retain check below (issue #478). Retain is an + // optional node that can read false by default when it is unreadable this + // scan; Skipping a live, reliably-active condition on that basis would drop + // it from ``seen`` and let reconcile clear a still-active fault on a modeled + // source. ConditionRefresh always replays an active condition, so mirror it. + if (snap.active_state) { + return ReadReplayDisposition::Feed; + } + // Retain mirrors ConditionRefresh's interesting-state filter (Part 9 + // §5.5.2): only Retain==true conditions are replayed. An inactive condition + // that has gone quiet (Retain==false) is no longer of interest. + if (!snap.retain) { + return ReadReplayDisposition::Skip; + } + return ReadReplayDisposition::Feed; +} + +void OpcuaPoller::read_fallback_replay() { + // Browse each configured alarm source, read its conditions' current state, + // and drive the same state machine as live events. Conditions that are no + // longer active/interesting are reconciled (cleared) afterwards - but only + // for sources whose scan succeeded AND that positively expose Condition + // instance nodes, so neither a transient disconnect nor an EventNotifier-only + // server (S7-1500) can falsely clear live alarms (issue #478). + // + // This fallback assumes each AlarmCondition instance is browseable from its + // alarm source (hierarchical child or via HasCondition). Validated against + // the open62541 reference server; NOT yet validated against a real Siemens + // S7-1500, whose condition-instance address-space layout must be confirmed + // before this path can be relied on there (use ConditionRefresh on Siemens). + std::set seen; + std::set failed_sources; + for (const auto & cfg : node_map_.event_alarms()) { + bool scan_ok = false; + auto conditions = client_.read_source_conditions(cfg.source_node_id, &scan_ok); + if (!scan_ok) { + // Browse failed (disconnect / Bad status). Record the source so reconcile + // does not clear its still-tracked conditions on an empty/partial scan. + failed_sources.insert(cfg.source_node_id_str); + continue; + } + const bool has_reliable_condition = + std::any_of(conditions.begin(), conditions.end(), [](const OpcuaClient::ConditionStateSnapshot & s) { + return !s.state_read_failed; + }); + if (has_reliable_condition) { + // The source positively exposes at least one RELIABLY-read Condition + // instance node -> read-fallback is viable here and reconcile may clear + // stale faults for it. + read_modeled_sources_.insert(cfg.source_node_id_str); + read_unsupported_warned_sources_.erase(cfg.source_node_id_str); + } else if (read_modeled_sources_.find(cfg.source_node_id_str) == read_modeled_sources_.end()) { + // Either zero Condition instance nodes, or every node this scan was a + // transient (unreliable) read - and this source has never yielded a + // reliably-read condition. Treat an all-transient scan exactly like an + // empty/unsupported scan: an EventNotifier-only server (e.g. S7-1500) + // yields zero condition nodes, and a lone transient browse error on a + // non-condition child must NOT flip the source into 'modeled' (which + // would let reconcile wipe the live event-fault set, whose ConditionIds + // are never in ``seen``). Do NOT clear its tracked faults; warn once. + if (read_unsupported_warned_sources_.insert(cfg.source_node_id_str).second) { + warn_operator( + "OPC-UA read-based active-condition replay found no Condition instance nodes under source '" + + cfg.source_node_id_str + + "'. This server appears to expose alarms via EventNotifier only (e.g. Siemens S7-1500), so the " + "read fallback cannot recover the active set; tracked faults are preserved (not cleared) and live " + "transitions still flow. Prefer ConditionRefresh ('method' or 'auto'). See issue #478."); } - condition_refresh_warned_ = true; } - } else { - // Reset the throttle: a successful refresh means the server is - // cooperating again, so the next failure (e.g., after a restart of a - // server with a different config) earns a fresh warn. - condition_refresh_warned_ = false; + for (const auto & snap : conditions) { + const std::string cid = snap.condition_id.toString(); + const ReadReplayDisposition disp = classify_read_snapshot(snap); + if (disp == ReadReplayDisposition::Skip) { + // Not interesting (Disabled or Retain=false): leave out of ``seen`` so + // any stale tracked fault reconciles away. + continue; + } + // Mark every interesting/observed condition as seen, even if it matches + // no mapping below. Otherwise reconcile_after_read would treat a live (but + // unmapped) condition as "cleared while offline" and wrongly clear it. + seen.insert(cid); + if (disp == ReadReplayDisposition::KeepOnly) { + // Present but unreliable (transient read failure): kept, not fed. + continue; + } + + AlarmEventInput input; + input.enabled_state = snap.enabled_state; + input.active_state = snap.active_state; + input.acked_state = snap.acked_state; + input.confirmed_state = snap.confirmed_state; + // Read-based replay never observes a historical branch (we read the + // current branch state directly) and shelving is folded into the live + // event path; treat read snapshots as live, non-shelved state. + input.shelved = false; + input.branch_id_present = false; + + // Resolve identity to a specific fault. EventType is not available from + // a state read, so event_type-only mappings will not match here; routing + // by condition_name / source_node still works. + ResolvedAlarm resolved = + NodeMap::resolve_alarm(cfg, snap.condition_name, cfg.source_node_id_str, /*event_type=*/""); + if (!resolved.matched) { + continue; + } + AlarmEventConfig eff = cfg; + eff.fault_code = resolved.fault_code; + eff.severity_override = resolved.severity_override; + eff.message_override = resolved.message_override; + + apply_condition_state(eff, snap.condition_id, input, snap.severity, snap.message, /*event_id=*/nullptr); + } + } + reconcile_after_read(seen, failed_sources, read_modeled_sources_); +} + +bool OpcuaPoller::should_clear_condition(SovdAlarmStatus last_status, const std::string & condition_id, + const std::string & source_id, const std::set & seen, + const std::set & failed_sources, + const std::set & modeled_sources) { + const bool was_active = (last_status == SovdAlarmStatus::Confirmed || last_status == SovdAlarmStatus::Healed); + if (!was_active) { + return false; + } + // Never clear a condition whose source scan failed this cycle: its absence + // from ``seen`` means "not scanned", not "no longer active". + if (failed_sources.find(source_id) != failed_sources.end()) { + return false; + } + // Safety gate (issue #478): only clear when the source is positively known to + // model Condition instances as address-space nodes. An EventNotifier-only + // server (S7-1500) yields zero condition nodes, so an empty read scan must + // never wipe its tracked faults - this is the single most important guard. + if (modeled_sources.find(source_id) == modeled_sources.end()) { + return false; + } + return seen.find(condition_id) == seen.end(); +} + +bool OpcuaPoller::should_clear_after_refresh(SovdAlarmStatus last_status, const std::string & condition_id, + const std::set & seen) { + const bool was_active = (last_status == SovdAlarmStatus::Confirmed || last_status == SovdAlarmStatus::Healed); + if (!was_active) { + return false; + } + return seen.find(condition_id) == seen.end(); +} + +void OpcuaPoller::reconcile_after_read(const std::set & seen, const std::set & failed_sources, + const std::set & modeled_sources) { + std::vector clears; + { + std::unique_lock lock(conditions_mutex_); + for (auto & [cid, runtime] : conditions_) { + if (should_clear_condition(runtime.last_status, cid, runtime.source_id, seen, failed_sources, modeled_sources)) { + // Tracked as active but absent from a successful read scan -> the alarm + // cleared while we were offline. Clear the SOVD fault. + runtime.last_status = SovdAlarmStatus::Cleared; + AlarmEventDelivery d; + d.fault_code = runtime.fault_code; + d.entity_id = runtime.entity_id; + d.next_status = SovdAlarmStatus::Cleared; + d.action = AlarmAction::ClearFault; + d.condition_id = cid; + clears.push_back(std::move(d)); + } + } + } + dispatch_condition_clears(clears); +} + +void OpcuaPoller::reconcile_after_refresh(const std::set & seen) { + std::vector clears; + { + std::unique_lock lock(conditions_mutex_); + for (auto & [cid, runtime] : conditions_) { + if (!should_clear_after_refresh(runtime.last_status, cid, seen)) { + continue; + } + // Tracked as active but the completed ConditionRefresh burst did not + // replay it -> the alarm cleared while we were offline. Clear the fault. + runtime.last_status = SovdAlarmStatus::Cleared; + AlarmEventDelivery d; + d.fault_code = runtime.fault_code; + d.entity_id = runtime.entity_id; + d.next_status = SovdAlarmStatus::Cleared; + d.action = AlarmAction::ClearFault; + d.condition_id = cid; + clears.push_back(std::move(d)); + } + } + dispatch_condition_clears(clears); +} + +void OpcuaPoller::dispatch_condition_clears(const std::vector & clears) { + if (clears.empty()) { + return; + } + EventAlarmCallback cb_copy; + { + std::lock_guard cb_lock(event_alarm_callback_mutex_); + cb_copy = event_alarm_callback_; + } + if (cb_copy) { + for (const auto & d : clears) { + cb_copy(d); + } } } void OpcuaPoller::on_event(const AlarmEventConfig & cfg, const std::vector & values, - const opcua::NodeId & /*source_node*/, const opcua::NodeId & event_type, + const opcua::NodeId & source_node, const opcua::NodeId & event_type, const opcua::NodeId & condition_id) { - RCLCPP_DEBUG_STREAM(opcua_poller_logger(), - "on_event fault=" << cfg.fault_code << " event_type=" << event_type.toString() - << " condition=" << condition_id.toString() << " values=" << values.size()); - // Detect ConditionRefresh bracketing per Part 9 §5.5.7. The flag is for - // diagnostics only; the state machine itself does not need to know - // because RefreshStart / RefreshEnd notifications carry no condition - // payload, so positional-empty values trip the early-return below. + RCLCPP_DEBUG_STREAM(opcua_poller_logger(), "on_event source_fault=" << cfg.fault_code + << " event_type=" << event_type.toString() + << " condition=" << condition_id.toString() + << " values=" << values.size()); + // Detect ConditionRefresh bracketing per Part 9 §5.5.7. RefreshStart / + // RefreshEnd carry no condition payload. Between them the server replays its + // active (Retain=true) condition set; we accumulate every replayed + // ConditionId in ``refresh_seen_`` so RefreshEnd can reconcile away tracked + // conditions that were NOT replayed - they cleared while we were offline and + // would otherwise stay latched Confirmed (issue #480). if (event_type.getNamespaceIndex() == 0 && event_type.getIdentifierType() == opcua::NodeIdType::Numeric) { auto numeric = event_type.getIdentifierAs(); if (numeric == kRefreshStartEventTypeId) { + refresh_seen_.clear(); condition_refresh_in_progress_.store(true, std::memory_order_release); return; } if (numeric == kRefreshEndEventTypeId) { condition_refresh_in_progress_.store(false, std::memory_order_release); + reconcile_after_refresh(refresh_seen_); + refresh_seen_.clear(); return; } } + // A condition (re)reported inside a refresh burst is still active; remember it + // so RefreshEnd does not clear it. Recorded before the field-count bail below + // so even a partial-field replay counts as "seen". + if (condition_refresh_in_progress_.load(std::memory_order_acquire)) { + refresh_seen_.insert(condition_id.toString()); + } + if (values.size() < kAlarmFieldCount) { // Server returned fewer fields than our filter requested - typical when // BadAttributeIdInvalid was returned for one or more select clauses. @@ -380,14 +709,69 @@ void OpcuaPoller::on_event(const AlarmEventConfig & cfg, const std::vector(values[kFieldSeverity], 0); + std::string message = variant_to_localized_text(values[kFieldMessage]); + + // Issue #389: resolve this event's identity (ConditionName / SourceNode / + // EventType) to a specific fault via the config's mappings. This lets one + // OPC-UA source emit many distinct conditions that surface as distinct + // SOVD faults with their own text. + std::string condition_name; + if (values.size() > kFieldConditionName) { + condition_name = variant_to_string_scalar(values[kFieldConditionName]); + } + ResolvedAlarm resolved = NodeMap::resolve_alarm(cfg, condition_name, source_node.toString(), event_type.toString()); + if (!resolved.matched) { + // No mapping and no source-level fault_code applies to this condition. + RCLCPP_DEBUG_STREAM(opcua_poller_logger(), + "on_event: no fault mapping for condition_name='" << condition_name << "' - ignoring"); + return; + } + + // Append configured associated values (e.g. SD_1..SD_n) to the description. + for (size_t i = 0; i < cfg.associated_values.size(); ++i) { + const size_t idx = kFirstAssociatedValueField + i; + if (idx >= values.size()) { + break; + } + const std::string rendered = variant_to_display(values[idx]); + if (rendered.empty()) { + continue; + } + if (!message.empty()) { + message += "; "; + } + message += cfg.associated_values[i].label + "=" + rendered; + } + + // Build the effective config for this specific event (resolved fault_code + + // overrides) so apply_condition_state tracks the right fault_code / entity. + AlarmEventConfig eff = cfg; + eff.fault_code = resolved.fault_code; + eff.severity_override = resolved.severity_override; + eff.message_override = resolved.message_override; + + // Capture the live EventId for spec-compliant Acknowledge / Confirm. + opcua::ByteString event_id; + bool have_event_id = false; + if (values[kFieldEventId].isType()) { + event_id = values[kFieldEventId].getScalarCopy(); + have_event_id = true; + } + + apply_condition_state(eff, condition_id, input, severity, message, have_event_id ? &event_id : nullptr); +} + +void OpcuaPoller::apply_condition_state(const AlarmEventConfig & cfg, const opcua::NodeId & condition_id, + const AlarmEventInput & input, uint16_t severity, const std::string & message, + const opcua::ByteString * event_id) { + // Key the runtime map on the ConditionId string form so distinct condition + // instances within the same event source remain separate (Part 9 + // §5.5.2.13). Shared by the live event path and the read-based replay. + const std::string condition_id_str = condition_id.toString(); - ConditionRuntime runtime_snapshot; - SovdAlarmStatus prev_status; + AlarmEventDelivery delivery; + bool dispatch = false; { std::unique_lock lock(conditions_mutex_); auto it = conditions_.find(condition_id_str); @@ -396,31 +780,17 @@ void OpcuaPoller::on_event(const AlarmEventConfig & cfg, const std::vectorsecond.last_status; - - // Track the latest EventId for spec-compliant Acknowledge calls. - if (values[kFieldEventId].isType()) { - it->second.latest_event_id = values[kFieldEventId].getScalarCopy(); - if (poller_debug_enabled()) { - std::ostringstream hex_oss; - const auto * bytes = it->second.latest_event_id.data(); - for (size_t i = 0; i < std::min(it->second.latest_event_id.length(), 16); ++i) { - char buf[3]; - std::snprintf(buf, sizeof(buf), "%02x", static_cast(bytes[i]) & 0xffu); - hex_oss << buf; - } - RCLCPP_DEBUG_STREAM(opcua_poller_logger(), - "captured EventId len=" << it->second.latest_event_id.length() << " hex=" << hex_oss.str()); - } - } else { - RCLCPP_DEBUG(opcua_poller_logger(), "EventId field not a ByteString"); + const SovdAlarmStatus prev_status = it->second.last_status; + + if (event_id != nullptr) { + it->second.latest_event_id = *event_id; } - auto outcome = AlarmStateMachine::compute(prev_status, input); + auto outcome = AlarmStateMachine::compute(prev_status, input, config_.require_confirm_for_clear); RCLCPP_DEBUG_STREAM(opcua_poller_logger(), "state machine: enabled=" << input.enabled_state << " active=" << input.active_state << " acked=" << input.acked_state @@ -428,42 +798,34 @@ void OpcuaPoller::on_event(const AlarmEventConfig & cfg, const std::vector" to stdout for the test harness to poll. See // docs in the header comment of fire(), clear(), ack(), etc. below. +#include #include #include #include @@ -31,7 +32,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -136,6 +139,23 @@ UA_StatusCode add_condition(UA_Server * server, const std::string & name, UA_UIn return rc; } +// A single readable Double variable (``ns=;s=Tank.Level`` = 42.0). The +// secured integration test maps this node and reads it back over the encrypted +// SecureChannel to prove a value read - not just an event subscription - +// traverses the Basic256Sha256 channel. +UA_StatusCode add_variable(UA_Server * server, UA_UInt16 ns) { + UA_VariableAttributes attr = UA_VariableAttributes_default; + UA_Double value = 42.0; + UA_Variant_setScalar(&attr.value, &value, &UA_TYPES[UA_TYPES_DOUBLE]); + attr.displayName = UA_LOCALIZEDTEXT(const_cast("en"), const_cast("TankLevel")); + attr.accessLevel = UA_ACCESSLEVELMASK_READ; + UA_NodeId node_id = UA_NODEID_STRING(ns, const_cast("Tank.Level")); + UA_QualifiedName qname = UA_QUALIFIEDNAME(ns, const_cast("TankLevel")); + return UA_Server_addVariableNode(server, node_id, UA_NODEID_NUMERIC(0, UA_NS0ID_OBJECTSFOLDER), + UA_NODEID_NUMERIC(0, UA_NS0ID_ORGANIZES), qname, + UA_NODEID_NUMERIC(0, UA_NS0ID_BASEDATAVARIABLETYPE), attr, nullptr, nullptr); +} + UA_StatusCode set_two_state(UA_Server * server, const UA_NodeId & cond, const char * field, UA_Boolean value) { UA_Variant v; UA_Variant_setScalar(&v, &value, &UA_TYPES[UA_TYPES_BOOLEAN]); @@ -300,6 +320,147 @@ UA_StatusCode handle_enable(UA_Server * server, const Condition & c, bool enable return set_two_state(server, c.node, "EnabledState", enable); } +#ifdef UA_ENABLE_ENCRYPTION + +// Read a whole file into a heap-allocated UA_ByteString. Returns an empty +// ByteString (length 0) on failure; the caller treats that as a fatal config +// error. open62541's OpenSSL backend auto-detects DER (0x30 0x82 magic) vs PEM +// for both certificates and keys, so the test harness can hand us a DER cert +// and a PEM key without us caring about the encoding here. +UA_ByteString load_file(const std::string & path) { + UA_ByteString out = UA_BYTESTRING_NULL; + std::ifstream f(path, std::ios::binary); + if (!f) { + return out; + } + std::string data((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + out.length = data.size(); + out.data = static_cast(UA_malloc(out.length)); + if (!out.data) { + out.length = 0; + return out; + } + std::memcpy(out.data, data.data(), out.length); + return out; +} + +// Chained ActivateSession callback. open62541's UA_AccessControl_default owns +// the real user-token check; we wrap it only to emit a deterministic, greppable +// line proving which SecurityPolicy + MessageSecurityMode the SecureChannel +// actually negotiated. The test harness asserts on ``securityMode=3`` (= +// SignAndEncrypt) so the suite cannot pass against a downgraded / None channel. +using ActivateSessionFn = UA_StatusCode (*)(UA_Server *, UA_AccessControl *, const UA_EndpointDescription *, + const UA_ByteString *, const UA_NodeId *, const UA_ExtensionObject *, + void **); +ActivateSessionFn g_orig_activate_session = nullptr; + +UA_StatusCode logging_activate_session(UA_Server * server, UA_AccessControl * ac, + const UA_EndpointDescription * endpoint, const UA_ByteString * channel_cert, + const UA_NodeId * session_id, const UA_ExtensionObject * user_token, + void ** session_ctx) { + std::string policy_uri; + int mode = -1; + if (endpoint) { + policy_uri.assign(reinterpret_cast(endpoint->securityPolicyUri.data), + endpoint->securityPolicyUri.length); + mode = static_cast(endpoint->securityMode); + } + std::cout << "SECURE_SESSION securityPolicyUri=" << policy_uri << " securityMode=" << mode + << " (1=None,2=Sign,3=SignAndEncrypt)" << std::endl; + UA_StatusCode rc = g_orig_activate_session ? g_orig_activate_session(server, ac, endpoint, channel_cert, session_id, + user_token, session_ctx) + : UA_STATUSCODE_BADINTERNALERROR; + std::cout << "SECURE_SESSION activate rc=" << UA_StatusCode_name(rc) << std::endl; + return rc; +} + +// Build a secured server config: Basic256Sha256 with Sign AND SignAndEncrypt, +// the client app-instance cert in the trust list, and username/password access +// control with anonymous login DISABLED. We use +// UA_ServerConfig_setDefaultWithSecurityPolicies, which (per OPC UA discovery) +// also exposes a SecurityPolicy=None endpoint - open62541's connect-by-URL +// client opens a transient None channel purely to GetEndpoints, then opens the +// real encrypted channel for the session. The negotiated session security is +// proven by the SECURE_SESSION log line (securityMode=3), not by the absence of +// a None endpoint. Returns a UA status code; on success the caller adds the +// conditions exactly as in the insecure path. +UA_StatusCode configure_secure(UA_ServerConfig * config, UA_UInt16 port, const std::string & cert_path, + const std::string & key_path, const std::string & trust_path, + const std::string & app_uri, const std::string & username, + const std::string & password) { + UA_ByteString cert = load_file(cert_path); + UA_ByteString key = load_file(key_path); + UA_ByteString trust = load_file(trust_path); + if (cert.length == 0 || key.length == 0) { + UA_LOG_ERROR(UA_Log_Stdout, UA_LOGCATEGORY_USERLAND, "secure: failed to read cert '%s' / key '%s'", + cert_path.c_str(), key_path.c_str()); + UA_ByteString_clear(&cert); + UA_ByteString_clear(&key); + UA_ByteString_clear(&trust); + return UA_STATUSCODE_BADCONFIGURATIONERROR; + } + + const UA_ByteString * trust_list = (trust.length > 0) ? &trust : nullptr; + size_t trust_list_size = (trust.length > 0) ? 1 : 0; + UA_StatusCode rc = UA_ServerConfig_setDefaultWithSecurityPolicies(config, port, &cert, &key, trust_list, + trust_list_size, nullptr, 0, nullptr, 0); + if (rc != UA_STATUSCODE_GOOD) { + UA_ByteString_clear(&cert); + UA_ByteString_clear(&key); + UA_ByteString_clear(&trust); + return rc; + } + + // The server's applicationUri MUST equal the URI SAN in its own certificate + // (setDefaultWithSecurityPolicies defaults it to urn:open62541.server.*). + // open62541 stamps it into every endpoint at startup. + UA_String_clear(&config->applicationDescription.applicationUri); + config->applicationDescription.applicationUri = UA_STRING_ALLOC(app_uri.c_str()); + + // Replace the permissive default access control (anonymous allowed) with a + // username/password policy that rejects anonymous logins outright. + // UA_AccessControl_default clears the previous access control itself (it + // calls accessControl.clear at entry), so we must NOT clear it here first - + // a manual clear double-frees the context and segfaults on open62541 1.3. + UA_UsernamePasswordLogin login; + login.username = UA_STRING_ALLOC(username.c_str()); + login.password = UA_STRING_ALLOC(password.c_str()); + // The user-token policy advertises Basic256Sha256 so the password is + // encrypted at the token layer too (on top of the encrypted channel). A null + // URI is NOT portable: open62541 1.3 dereferences it unconditionally and + // segfaults. Non-owning UA_STRING is fine - the plugin copies it. + UA_String token_policy_uri = + UA_STRING(const_cast("http://opcfoundation.org/UA/SecurityPolicy#Basic256Sha256")); + // open62541 <= 1.3 (bundled by open62541pp v0.16.0, used by the CMake fixture + // build) takes an extra UA_CertificateVerification* (the x509 user-token + // validator) before userTokenPolicyUri; 1.4+ (the docker image pin) dropped + // it. We only use username/password, so pass null for it either way. +#if defined(UA_OPEN62541_VER_MAJOR) && UA_OPEN62541_VER_MAJOR == 1 && UA_OPEN62541_VER_MINOR < 4 + rc = UA_AccessControl_default(config, false /*allowAnonymous*/, nullptr /*verifyX509*/, &token_policy_uri, 1, &login); +#else + rc = UA_AccessControl_default(config, false /*allowAnonymous*/, &token_policy_uri, 1, &login); +#endif + UA_String_clear(&login.username); + UA_String_clear(&login.password); + if (rc != UA_STATUSCODE_GOOD) { + UA_ByteString_clear(&cert); + UA_ByteString_clear(&key); + UA_ByteString_clear(&trust); + return rc; + } + + // Install the logging wrapper around the freshly-built access control. + g_orig_activate_session = config->accessControl.activateSession; + config->accessControl.activateSession = logging_activate_session; + + UA_ByteString_clear(&cert); + UA_ByteString_clear(&key); + UA_ByteString_clear(&trust); + return rc; +} + +#endif // UA_ENABLE_ENCRYPTION + void cli_loop(UA_Server * server) { std::string line; while (g_running && std::getline(std::cin, line)) { @@ -392,17 +553,53 @@ int main(int argc, char ** argv) { signal(SIGTERM, stop_handler); UA_UInt16 port = 4842; + bool secure = false; + std::string cert_path, key_path, trust_path, username = "medkit", password = "secret"; + std::string app_uri = "urn:test:alarms:server"; for (int i = 1; i < argc; ++i) { if (std::strcmp(argv[i], "--port") == 0 && i + 1 < argc) { port = static_cast(std::atoi(argv[++i])); + } else if (std::strcmp(argv[i], "--secure") == 0) { + secure = true; + } else if (std::strcmp(argv[i], "--cert") == 0 && i + 1 < argc) { + cert_path = argv[++i]; + } else if (std::strcmp(argv[i], "--key") == 0 && i + 1 < argc) { + key_path = argv[++i]; + } else if (std::strcmp(argv[i], "--trust") == 0 && i + 1 < argc) { + trust_path = argv[++i]; + } else if (std::strcmp(argv[i], "--app-uri") == 0 && i + 1 < argc) { + app_uri = argv[++i]; + } else if (std::strcmp(argv[i], "--username") == 0 && i + 1 < argc) { + username = argv[++i]; + } else if (std::strcmp(argv[i], "--password") == 0 && i + 1 < argc) { + password = argv[++i]; } } UA_Server * server = UA_Server_new(); UA_ServerConfig * config = UA_Server_getConfig(server); - UA_ServerConfig_setMinimal(config, port, nullptr); + if (secure) { +#ifdef UA_ENABLE_ENCRYPTION + UA_StatusCode src = configure_secure(config, port, cert_path, key_path, trust_path, app_uri, username, password); + if (src != UA_STATUSCODE_GOOD) { + UA_LOG_ERROR(UA_Log_Stdout, UA_LOGCATEGORY_USERLAND, "secure config failed: %s", UA_StatusCode_name(src)); + UA_Server_delete(server); + return 1; + } +#else + UA_LOG_ERROR(UA_Log_Stdout, UA_LOGCATEGORY_USERLAND, + "--secure requested but this build has no UA_ENABLE_ENCRYPTION support"); + UA_Server_delete(server); + return 1; +#endif + } else { + UA_ServerConfig_setMinimal(config, port, nullptr); + } UA_UInt16 ns = UA_Server_addNamespace(server, NS_URI); + if (add_variable(server, ns) != UA_STATUSCODE_GOOD) { + UA_LOG_WARNING(UA_Log_Stdout, UA_LOGCATEGORY_USERLAND, "Failed to register Tank.Level variable"); + } Condition op, oh, sl; if (add_condition(server, "Overpressure", ns, op) != UA_STATUSCODE_GOOD || @@ -416,7 +613,7 @@ int main(int argc, char ** argv) { g_conditions[oh.name] = oh; g_conditions[sl.name] = sl; - std::cout << "READY port=" << port << " namespace=" << ns << std::endl; + std::cout << "READY port=" << port << " namespace=" << ns << " secure=" << (secure ? "true" : "false") << std::endl; std::thread cli(cli_loop, server); UA_StatusCode rc = UA_Server_run(server, reinterpret_cast(&g_running)); diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/integration/gen_test_certs.sh b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/integration/gen_test_certs.sh new file mode 100755 index 000000000..4cd840994 --- /dev/null +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/integration/gen_test_certs.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# Copyright 2026 mfaferek93 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Generate the throwaway OPC-UA application-instance certificates the secured +# A&C integration test (issue #477) needs. NOTHING is committed - the test +# regenerates these into a temp dir on every run. +# +# Produces, in the output directory: +# server_key.pem server_cert.der URI SAN urn:test:alarms:server +# client_key.pem client_cert.der URI SAN urn:selfpatch:medkit:opcua-client +# +# Certificates are DER (open62541 + the medkit client both auto-detect DER); +# keys are unencrypted RSA-2048 PEM. The server trust list gets the client +# cert; the gateway trust list gets the server cert. The URI SAN MUST equal the +# applicationUri each side advertises or open62541 rejects the channel with +# BadCertificateUriInvalid. +# +# Usage: gen_test_certs.sh [server-uri] [client-uri] + +set -euo pipefail + +OUT="${1:?usage: gen_test_certs.sh [server-uri] [client-uri]}" +SERVER_URI="${2:-urn:test:alarms:server}" +CLIENT_URI="${3:-urn:selfpatch:medkit:opcua-client}" + +mkdir -p "${OUT}" + +# Emit one self-signed application-instance cert (DER) + key (PEM). +# $1 basename $2 CN $3 URI SAN +gen_cert() { + local base="$1" cn="$2" uri="$3" + local cnf="${OUT}/${base}.cnf" + cat >"${cnf}" </dev/null 2>&1 + openssl x509 -in "${OUT}/${base}_cert.pem" -outform der -out "${OUT}/${base}_cert.der" +} + +gen_cert server "test-alarm-server" "${SERVER_URI}" +gen_cert client "selfpatch-medkit-opcua-client" "${CLIENT_URI}" + +echo "certs written to ${OUT}: server_cert.der client_cert.der (+ *_key.pem)" diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/integration/test_opcua_secured.test.py b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/integration/test_opcua_secured.test.py new file mode 100755 index 000000000..fa6b889c2 --- /dev/null +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/integration/test_opcua_secured.test.py @@ -0,0 +1,430 @@ +#!/usr/bin/env python3 +# Copyright 2026 mfaferek93 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Secured OPC-UA Alarms & Conditions integration test (issue #477). + +This is the success-path counterpart to the unit tests around +``apply_security_config()``: instead of only exercising the parsers, it +negotiates a real encrypted SecureChannel against a live open62541 server and +drives the native A&C bridge through it. + +Topology (all on the loopback, ephemeral ports): + + test_alarm_server --secure (Basic256Sha256 Sign + SignAndEncrypt, + | username/password, anonymous DISABLED) + | encrypted SecureChannel + UserName token + gateway_node (ros2_medkit_opcua) -> fault_manager_node + +POSITIVE (good credentials): + * SOVD x-plc-status reports connected == true (channel established). + * SOVD x-plc-data returns the Tank.Level value (a real read over the + encrypted channel, not just an event subscription). + * Firing an alarm over the server's stdin surfaces a CONFIRMED fault in + /api/v1/faults (secured A&C event subscription end-to-end). + * The server logs ``SECURE_SESSION ... securityMode=3`` (SignAndEncrypt), + proving the channel is encrypted and not None. + +NEGATIVE (wrong password): + * The gateway never reaches connected == true: fail-closed. + +Skips with the CTest convention (exit 77) unless +``ROS2_MEDKIT_OPCUA_SECURE_REQUIRE`` is set, mirroring the asyncua smoke-test +skip. When the flag is set, a missing prerequisite is a hard failure so a CI +job that loses a dependency cannot silently bypass the secured check. + +Usage: test_opcua_secured.test.py +""" + +import json +import os +from pathlib import Path +import shutil +import socket +import subprocess +import sys +import tempfile +import time +import urllib.error +import urllib.request + +CTEST_SKIP = 77 +USERNAME = 'medkit' +PASSWORD = 'secret' +WRONG_PASSWORD = 'definitely-not-the-password' +APP_URI_SERVER = 'urn:test:alarms:server' +APP_URI_CLIENT = 'urn:selfpatch:medkit:opcua-client' +ROS_DOMAIN_ID = '228' + +ALARM_CODE = 'PLC_OVERPRESSURE' + + +def require_flag(): + """Return True when the suite must run for real (CI) instead of skipping.""" + return os.environ.get('ROS2_MEDKIT_OPCUA_SECURE_REQUIRE', '0') not in ('', '0') + + +def skip_or_fail(reason): + """Return the CTest skip code, or fail hard when the run is required.""" + if require_flag(): + print(f'REQUIRED but prerequisite unmet: {reason}', file=sys.stderr) + return 1 + print(f'SKIP (set ROS2_MEDKIT_OPCUA_SECURE_REQUIRE to force): {reason}') + return CTEST_SKIP + + +def free_port(): + """Grab an OS-assigned free TCP port on the loopback and release it.""" + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(('127.0.0.1', 0)) + return s.getsockname()[1] + + +def find_plugin(): + """Locate libros2_medkit_opcua_plugin.so under AMENT_PREFIX_PATH.""" + for prefix in os.environ.get('AMENT_PREFIX_PATH', '').split(os.pathsep): + if not prefix: + continue + for root, _dirs, files in os.walk(prefix): + if 'libros2_medkit_opcua_plugin.so' in files: + return os.path.join(root, 'libros2_medkit_opcua_plugin.so') + return None + + +def have_executable(package, executable): + """Return True when ``ros2 run `` is resolvable.""" + try: + out = subprocess.run( + ['ros2', 'pkg', 'executables', package], + capture_output=True, text=True, timeout=30, check=False, + ) + except (FileNotFoundError, subprocess.TimeoutExpired): + return False + return f'{package} {executable}' in out.stdout + + +def http_json(url, timeout=2): + """GET and parse JSON, or return None on any failure.""" + try: + with urllib.request.urlopen(url, timeout=timeout) as resp: + return json.loads(resp.read().decode()) + except (urllib.error.URLError, OSError, ValueError): + return None + + +def wait_json(url, predicate, deadline=60, period=2.0): + """Poll until predicate(json) is true; return the final json or None.""" + last = None + for _ in range(int(deadline / period) + 1): + last = http_json(url) + if last is not None: + try: + if predicate(last): + return last + except (KeyError, TypeError, ValueError): + pass + time.sleep(period) + return last + + +def wait_log(path, needle, deadline=30, period=0.5): + """Poll a log file until appears; return True/False.""" + for _ in range(int(deadline / period) + 1): + try: + if needle in Path(path).read_text(errors='replace'): + return True + except OSError: + pass + time.sleep(period) + return False + + +def write_node_map(path): + """Map the readable Tank.Level node + the 3 alarm sources into one entity.""" + path.write_text( + 'area_id: plc_systems\n' + 'component_id: alarm_test_runtime\n' + 'nodes:\n' + ' - node_id: "ns=2;s=Tank.Level"\n' + ' entity_id: tank_process\n' + ' data_name: tank_level\n' + ' data_type: float\n' + 'event_alarms:\n' + ' - alarm_source: "ns=2;s=Alarms.Overpressure"\n' + ' entity_id: tank_process\n' + ' fault_code: PLC_OVERPRESSURE\n' + ' - alarm_source: "ns=2;s=Alarms.Overheat"\n' + ' entity_id: tank_process\n' + ' fault_code: PLC_OVERHEAT\n' + ' - alarm_source: "ns=2;s=Alarms.SensorLost"\n' + ' entity_id: tank_process\n' + ' fault_code: PLC_SENSOR_LOST\n' + ) + + +def write_gateway_params(path, *, port, plugin, server_port, node_map, manifest, + certs, password): + """Render a gateway params file pointing the opcua plugin at the secure server.""" + path.write_text( + 'ros2_medkit_gateway:\n' + ' ros__parameters:\n' + ' server:\n' + ' host: "127.0.0.1"\n' + f' port: {port}\n' + ' plugins: ["opcua"]\n' + f' plugins.opcua.path: "{plugin}"\n' + f' plugins.opcua.endpoint_url: "opc.tcp://127.0.0.1:{server_port}"\n' + f' plugins.opcua.node_map_path: "{node_map}"\n' + ' plugins.opcua.poll_interval_ms: 500\n' + ' plugins.opcua.security_policy: "Basic256Sha256"\n' + ' plugins.opcua.security_mode: "SignAndEncrypt"\n' + f' plugins.opcua.client_cert_path: "{certs}/client_cert.der"\n' + f' plugins.opcua.client_key_path: "{certs}/client_key.pem"\n' + f' plugins.opcua.application_uri: "{APP_URI_CLIENT}"\n' + f' plugins.opcua.trust_list_paths: ["{certs}/server_cert.der"]\n' + ' plugins.opcua.reject_untrusted: true\n' + ' plugins.opcua.user_auth_mode: "UsernamePassword"\n' + f' plugins.opcua.username: "{USERNAME}"\n' + f' plugins.opcua.password: "{password}"\n' + ' discovery.mode: "hybrid"\n' + f' discovery.manifest_path: "{manifest}"\n' + ' discovery.manifest_strict_validation: false\n' + ) + + +def start_gateway(params_file, log_path, env): + """Launch a gateway_node bound to the given params file.""" + log = open(log_path, 'w') + proc = subprocess.Popen( + ['ros2', 'run', 'ros2_medkit_gateway', 'gateway_node', + '--ros-args', '--params-file', str(params_file)], + stdout=log, stderr=subprocess.STDOUT, env=env, + ) + proc._log = log # keep handle alive for close on teardown + return proc + + +def terminate(proc): + """Best-effort SIGTERM then SIGKILL of a child process.""" + if proc is None: + return + proc.terminate() + try: + proc.wait(timeout=8) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait() + log = getattr(proc, '_log', None) + if log is not None: + log.close() + + +def main(): + if len(sys.argv) < 3: + print('usage: test_opcua_secured.test.py ', + file=sys.stderr) + return 2 + server_bin = Path(sys.argv[1]).resolve() + gen_certs = Path(sys.argv[2]).resolve() + + # --- Prerequisite gating (skip 77 unless required) --------------------- + if shutil.which('openssl') is None: + return skip_or_fail('openssl CLI not available') + if shutil.which('ros2') is None: + return skip_or_fail('ros2 CLI not available') + if not (server_bin.is_file() and os.access(server_bin, os.X_OK)): + return skip_or_fail(f'secure fixture missing: {server_bin}') + if not gen_certs.is_file(): + return skip_or_fail(f'cert generator missing: {gen_certs}') + plugin = find_plugin() + if plugin is None: + return skip_or_fail('libros2_medkit_opcua_plugin.so not found') + if not have_executable('ros2_medkit_fault_manager', 'fault_manager_node'): + return skip_or_fail('ros2_medkit_fault_manager/fault_manager_node not built') + if not have_executable('ros2_medkit_gateway', 'gateway_node'): + return skip_or_fail('ros2_medkit_gateway/gateway_node not built') + + workdir = Path(tempfile.mkdtemp(prefix='opcua_secured_')) + certs = workdir / 'certs' + server_log = workdir / 'server.log' + env = dict(os.environ, ROS_DOMAIN_ID=ROS_DOMAIN_ID) + + server = fault_mgr = gw_good = gw_bad = None + try: + # --- Certificates (regenerated every run, never committed) --------- + subprocess.run(['bash', str(gen_certs), str(certs)], check=True, + capture_output=True, text=True) + + node_map = workdir / 'alarm_nodes.yaml' + manifest = workdir / 'manifest.yaml' + write_node_map(node_map) + manifest.write_text('manifest_version: "1.0"\n') + + server_port = free_port() + good_port = free_port() + bad_port = free_port() + + # --- Secure server fixture (stdin pipe for the alarm CLI) ---------- + slog = open(server_log, 'w') + server = subprocess.Popen( + [str(server_bin), '--secure', '--port', str(server_port), + '--cert', str(certs / 'server_cert.der'), + '--key', str(certs / 'server_key.pem'), + '--trust', str(certs / 'client_cert.der'), + '--app-uri', APP_URI_SERVER, + '--username', USERNAME, '--password', PASSWORD], + stdin=subprocess.PIPE, stdout=slog, stderr=subprocess.STDOUT, + text=True, + ) + if not wait_log(server_log, 'READY ', deadline=20): + print('secure server did not become READY', file=sys.stderr) + print(Path(server_log).read_text(errors='replace'), file=sys.stderr) + return 1 + + # --- fault_manager_node -------------------------------------------- + fault_mgr = subprocess.Popen( + ['ros2', 'run', 'ros2_medkit_fault_manager', 'fault_manager_node'], + stdout=open(workdir / 'fault_manager.log', 'w'), + stderr=subprocess.STDOUT, env=env, + ) + # Poll for the service the opcua plugin calls. + for _ in range(40): + out = subprocess.run(['ros2', 'service', 'list'], capture_output=True, + text=True, env=env, timeout=15, check=False) + if '/fault_manager/report_fault' in out.stdout: + break + time.sleep(0.5) + + # === POSITIVE: good credentials ==================================== + good_params = workdir / 'gateway_good.yaml' + write_gateway_params(good_params, port=good_port, plugin=plugin, + server_port=server_port, node_map=node_map, + manifest=manifest, certs=certs, password=PASSWORD) + gw_good = start_gateway(good_params, workdir / 'gateway_good.log', env) + + base = f'http://127.0.0.1:{good_port}/api/v1' + status = wait_json( + f'{base}/components/alarm_test_runtime/x-plc-status', + lambda j: j.get('connected') is True, deadline=70) + if not (status and status.get('connected') is True): + print('FAIL: gateway did not connect over the secure channel', file=sys.stderr) + print('server log:\n' + Path(server_log).read_text(errors='replace'), file=sys.stderr) + gw_tail = Path(workdir / 'gateway_good.log').read_text(errors='replace')[-3000:] + print('gateway log tail:\n' + gw_tail, file=sys.stderr) + return 1 + print(' OK secured connect: x-plc-status connected == true') + + # Proof the channel is encrypted (SignAndEncrypt = securityMode 3), + # emitted by the server on session activation. Fail hard on None. + if not wait_log(server_log, 'securityMode=3', deadline=20): + print('FAIL: server never logged an encrypted (SignAndEncrypt) session', + file=sys.stderr) + print(Path(server_log).read_text(errors='replace'), file=sys.stderr) + return 1 + if 'securityPolicyUri=http://opcfoundation.org/UA/SecurityPolicy#Basic256Sha256' \ + not in Path(server_log).read_text(errors='replace'): + print('FAIL: negotiated SecurityPolicy was not Basic256Sha256', file=sys.stderr) + print(Path(server_log).read_text(errors='replace'), file=sys.stderr) + return 1 + print(' OK encrypted channel: Basic256Sha256 securityMode=3 (SignAndEncrypt)') + + # Secured read: Tank.Level value over the encrypted channel. + data = wait_json( + f'{base}/apps/tank_process/x-plc-data', + lambda j: any(i.get('name') == 'tank_level' and i.get('value') is not None + for i in j.get('items', [])), deadline=30) + value = None + if data: + value = next((i.get('value') for i in data.get('items', []) + if i.get('name') == 'tank_level'), None) + if value is None: + print('FAIL: secured tag read returned no value', file=sys.stderr) + print(json.dumps(data), file=sys.stderr) + return 1 + print(f' OK secured read: tank_level = {value}') + + # Secured A&C: fire an alarm over the server stdin -> CONFIRMED fault. + server.stdin.write('fire Overpressure 750\n') + server.stdin.flush() + faults = wait_json( + f'{base}/faults', + lambda j: any(i.get('fault_code') == ALARM_CODE and i.get('status') == 'CONFIRMED' + for i in j.get('items', [])), deadline=40) + if not (faults and any(i.get('fault_code') == ALARM_CODE and i.get('status') == 'CONFIRMED' + for i in faults.get('items', []))): + print('FAIL: alarm did not surface as a CONFIRMED fault', file=sys.stderr) + print(json.dumps(faults), file=sys.stderr) + print('server log:\n' + Path(server_log).read_text(errors='replace'), file=sys.stderr) + return 1 + print(' OK secured A&C: PLC_OVERPRESSURE CONFIRMED via encrypted event subscription') + + terminate(gw_good) + gw_good = None + + # === NEGATIVE: wrong password must fail closed ===================== + bad_params = workdir / 'gateway_bad.yaml' + write_gateway_params(bad_params, port=bad_port, plugin=plugin, + server_port=server_port, node_map=node_map, + manifest=manifest, certs=certs, password=WRONG_PASSWORD) + bad_log = workdir / 'gateway_bad.log' + gw_bad = start_gateway(bad_params, bad_log, env) + + bad_base = f'http://127.0.0.1:{bad_port}/api/v1' + # Fail-closed = the server rejects the wrong password and NO secured read + # ever succeeds. We assert both: (1) the client logs BadUserAccessDenied + # (the session never activates), and (2) the tag value is never served. + # We key off the secured read rather than x-plc-status.connected because + # a rejected ActivateSession leaves the underlying SecureChannel briefly + # open, which can make the coarse connected flag flap; no authenticated + # read can ever complete over it, and that is the real security property. + rejected = wait_log(bad_log, 'BadUserAccessDenied', deadline=40) + read_leaked = False + for _ in range(10): + d = http_json(f'{bad_base}/apps/tank_process/x-plc-data') + if d and any(i.get('name') == 'tank_level' and i.get('value') is not None + for i in d.get('items', [])): + read_leaked = True + break + time.sleep(2) + if not rejected or read_leaked: + print('FAIL: wrong password did NOT fail closed ' + f'(auth_rejected={rejected}, read_leaked={read_leaked})', file=sys.stderr) + print('gateway(bad) log tail:\n' + + bad_log.read_text(errors='replace')[-3000:], file=sys.stderr) + return 1 + print(' OK fail-closed: wrong password rejected (BadUserAccessDenied)') + + print('PASS: secured OPC-UA A&C integration test') + return 0 + finally: + terminate(gw_good) + terminate(gw_bad) + terminate(fault_mgr) + if server is not None: + try: + if server.stdin: + server.stdin.close() + except OSError: + pass + terminate(server) + try: + slog.close() + except (NameError, OSError): + pass + shutil.rmtree(workdir, ignore_errors=True) + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_alarm_state_machine.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_alarm_state_machine.cpp index 918a68d40..207634c7c 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_alarm_state_machine.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_alarm_state_machine.cpp @@ -314,4 +314,46 @@ TEST(AlarmStateMachineTest, InactiveUnackedFromSuppressedReportsHealed) { EXPECT_EQ(out.action, AlarmAction::ReportHealed); } +// -- Issue #478: require_confirm_for_clear (Siemens Confirm-less clear path) -- + +TEST(AlarmStateMachineTest, AckedNotConfirmedLatchesWhenConfirmRequiredDefault) { + // Default behaviour (require_confirm_for_clear=true): an acked-but-not- + // confirmed inactive alarm stays latched (HEALED). No regression. + AlarmEventInput in; + in.enabled_state = true; + in.active_state = false; + in.acked_state = true; + in.confirmed_state = false; + auto out = AlarmStateMachine::compute(SovdAlarmStatus::Confirmed, in); // default true + EXPECT_EQ(out.next_status, SovdAlarmStatus::Healed); + EXPECT_EQ(out.action, AlarmAction::ReportHealed); +} + +TEST(AlarmStateMachineTest, AckedClearsWhenConfirmNotRequired) { + // Confirm-less server (S7-1500): with require_confirm_for_clear=false an + // acknowledged inactive alarm clears even though ConfirmedState never + // becomes true. + AlarmEventInput in; + in.enabled_state = true; + in.active_state = false; + in.acked_state = true; + in.confirmed_state = false; + auto out = AlarmStateMachine::compute(SovdAlarmStatus::Confirmed, in, /*require_confirm_for_clear=*/false); + EXPECT_EQ(out.next_status, SovdAlarmStatus::Cleared); + EXPECT_EQ(out.action, AlarmAction::ClearFault); +} + +TEST(AlarmStateMachineTest, UnackedStillLatchesWhenConfirmNotRequired) { + // Even with the Confirm gate dropped, an UN-acknowledged inactive alarm must + // still latch (HEALED): acknowledgement is the minimum operator action. + AlarmEventInput in; + in.enabled_state = true; + in.active_state = false; + in.acked_state = false; + in.confirmed_state = false; + auto out = AlarmStateMachine::compute(SovdAlarmStatus::Confirmed, in, /*require_confirm_for_clear=*/false); + EXPECT_EQ(out.next_status, SovdAlarmStatus::Healed); + EXPECT_EQ(out.action, AlarmAction::ReportHealed); +} + } // namespace ros2_medkit_gateway diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp index 218da4843..e4e4e0b8e 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_node_map.cpp @@ -605,4 +605,236 @@ component_id: test EXPECT_FALSE(map.load(path)); } +// --------------------------------------------------------------------------- +// Issue #389: multi-alarm mapping + associated values. +// --------------------------------------------------------------------------- + +TEST(ResolveAlarmTest, FallsBackToSourceFaultCode) { + AlarmEventConfig cfg; + cfg.entity_id = "tank"; + cfg.fault_code = "PLC_GENERIC"; + cfg.severity_override = "WARNING"; + auto r = NodeMap::resolve_alarm(cfg, "AnyName", "ns=2;s=Src", "ns=0;i=2915"); + EXPECT_TRUE(r.matched); + EXPECT_EQ(r.fault_code, "PLC_GENERIC"); + EXPECT_EQ(r.severity_override, "WARNING"); +} + +TEST(ResolveAlarmTest, NoMappingNoFallbackIsUnmatched) { + AlarmEventConfig cfg; + cfg.entity_id = "tank"; // no fault_code, no mappings + auto r = NodeMap::resolve_alarm(cfg, "X", "Y", "Z"); + EXPECT_FALSE(r.matched); +} + +TEST(ResolveAlarmTest, FirstMatchingMappingWinsByConditionName) { + AlarmEventConfig cfg; + cfg.entity_id = "tank"; + cfg.fault_code = "PLC_CATCHALL"; + cfg.mappings.push_back({/*cond*/ "Overpressure", "", "", "PLC_OVERPRESSURE", "ERROR", "Overpressure!"}); + cfg.mappings.push_back({/*cond*/ "LowLevel", "", "", "PLC_LOW_LEVEL", "WARNING", "Low level"}); + + auto r1 = NodeMap::resolve_alarm(cfg, "Overpressure", "ns=2;s=Tank", "ns=0;i=2915"); + EXPECT_TRUE(r1.matched); + EXPECT_EQ(r1.fault_code, "PLC_OVERPRESSURE"); + EXPECT_EQ(r1.severity_override, "ERROR"); + EXPECT_EQ(r1.message_override, "Overpressure!"); + + auto r2 = NodeMap::resolve_alarm(cfg, "LowLevel", "ns=2;s=Tank", "ns=0;i=2915"); + EXPECT_EQ(r2.fault_code, "PLC_LOW_LEVEL"); + + // Unmatched condition name -> source-level catch-all. + auto r3 = NodeMap::resolve_alarm(cfg, "Unknown", "ns=2;s=Tank", "ns=0;i=2915"); + EXPECT_TRUE(r3.matched); + EXPECT_EQ(r3.fault_code, "PLC_CATCHALL"); +} + +TEST(ResolveAlarmTest, MatchBySourceNodeAndEventType) { + AlarmEventConfig cfg; + cfg.entity_id = "tank"; + cfg.mappings.push_back({"", "ns=2;s=PumpA", "ns=0;i=2915", "PLC_PUMP_A", "", ""}); + // SourceNode mismatch -> unmatched (no fallback fault_code). + EXPECT_FALSE(NodeMap::resolve_alarm(cfg, "X", "ns=2;s=PumpB", "ns=0;i=2915").matched); + // Both match. + auto r = NodeMap::resolve_alarm(cfg, "X", "ns=2;s=PumpA", "ns=0;i=2915"); + EXPECT_TRUE(r.matched); + EXPECT_EQ(r.fault_code, "PLC_PUMP_A"); +} + +TEST(ResolveAlarmTest, MappingInheritsSourceLevelOverridesWhenUnset) { + AlarmEventConfig cfg; + cfg.entity_id = "tank"; + cfg.severity_override = "CRITICAL"; + cfg.message_override = "src-msg"; + cfg.mappings.push_back({"Cond", "", "", "PLC_X", "", ""}); // no per-mapping overrides + auto r = NodeMap::resolve_alarm(cfg, "Cond", "s", "e"); + EXPECT_EQ(r.severity_override, "CRITICAL"); + EXPECT_EQ(r.message_override, "src-msg"); +} + +TEST_F(NodeMapTest, LoadsMappingsAndAssociatedValues) { + std::string path = "/tmp/test_node_map_mappings.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +event_alarms: + - alarm_source: "ns=0;i=2253" + entity_id: plant + fault_code: PLC_GENERIC + mappings: + - condition_name: "Overpressure" + fault_code: PLC_OVERPRESSURE + severity_override: ERROR + message: "Tank overpressure" + - condition_name: "LowLevel" + fault_code: PLC_LOW_LEVEL + associated_values: + - "SD_1" + - name: "SD_2" + label: "Setpoint" +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.event_alarms().size(), 1u); + const auto & cfg = map.event_alarms()[0]; + EXPECT_EQ(cfg.fault_code, "PLC_GENERIC"); + ASSERT_EQ(cfg.mappings.size(), 2u); + EXPECT_EQ(cfg.mappings[0].match_condition_name, "Overpressure"); + EXPECT_EQ(cfg.mappings[0].fault_code, "PLC_OVERPRESSURE"); + ASSERT_EQ(cfg.associated_values.size(), 2u); + EXPECT_EQ(cfg.associated_values[0].name, "SD_1"); + EXPECT_EQ(cfg.associated_values[0].label, "SD_1"); // defaults to name + EXPECT_EQ(cfg.associated_values[1].label, "Setpoint"); + + // find_event_alarm resolves a mapping fault_code, not just the source code. + EXPECT_NE(map.find_event_alarm("plant", "PLC_LOW_LEVEL"), nullptr); + EXPECT_NE(map.find_event_alarm("plant", "PLC_GENERIC"), nullptr); +} + +TEST_F(NodeMapTest, MappingOnlyEntryNeedsNoSourceFaultCode) { + std::string path = "/tmp/test_node_map_mapping_only.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +event_alarms: + - alarm_source: "ns=0;i=2253" + entity_id: plant + mappings: + - condition_name: "A" + fault_code: PLC_A +)"; + f.close(); + + NodeMap map; + ASSERT_TRUE(map.load(path)); + ASSERT_EQ(map.event_alarms().size(), 1u); + EXPECT_TRUE(map.event_alarms()[0].fault_code.empty()); + EXPECT_EQ(map.event_alarms()[0].mappings.size(), 1u); +} + +TEST_F(NodeMapTest, RejectsMappingFaultCodeCollisionWithThreshold) { + // A mapping fault_code that collides with a threshold alarm fault_code on + // the same entity must be rejected, same as a source-level collision. + std::string path = "/tmp/test_node_map_mapping_collision.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +nodes: + - node_id: "ns=1;i=1" + entity_id: tank + data_name: pressure + alarm: + fault_code: PLC_OVERPRESSURE + threshold: 90.0 +event_alarms: + - alarm_source: "ns=0;i=2253" + entity_id: tank + mappings: + - condition_name: "Overpressure" + fault_code: PLC_OVERPRESSURE +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + +TEST_F(NodeMapTest, RejectsDuplicateFaultCodeAcrossEventAlarms) { + // Issue #480: two event_alarms resolving to the same (entity_id, fault_code) + // must be rejected, not silently deduped. At runtime distinct ConditionIds + // are tracked separately and each reports, but a ClearFault is keyed on + // fault_code alone, so one condition clearing would wipe the other's fault + // while it is still active (flap). + std::string path = "/tmp/test_node_map_dup_event_alarm.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +event_alarms: + - alarm_source: "ns=2;s=Alarms.PumpA" + entity_id: pump + fault_code: PLC_PUMP_FAULT + - alarm_source: "ns=2;s=Alarms.PumpB" + entity_id: pump + fault_code: PLC_PUMP_FAULT +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + +TEST_F(NodeMapTest, RejectsDuplicateFaultCodeAcrossEventAlarmMappings) { + // Same flap hazard when the duplicate comes from two mappings (in different + // sources) rather than two source-level fault_codes. + std::string path = "/tmp/test_node_map_dup_event_alarm_mapping.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +event_alarms: + - alarm_source: "ns=2;s=Alarms.A" + entity_id: line + mappings: + - condition_name: "Jam" + fault_code: PLC_LINE_STOP + - alarm_source: "ns=2;s=Alarms.B" + entity_id: line + mappings: + - condition_name: "EStop" + fault_code: PLC_LINE_STOP +)"; + f.close(); + + NodeMap map; + EXPECT_FALSE(map.load(path)); +} + +TEST_F(NodeMapTest, AcceptsSameFaultCodeOnDifferentEntities) { + // The guard keys on (entity_id, fault_code); reusing a fault_code across + // distinct entities is legitimate and must still load. + std::string path = "/tmp/test_node_map_same_code_diff_entity.yaml"; + std::ofstream f(path); + f << R"( +area_id: test +component_id: test +event_alarms: + - alarm_source: "ns=2;s=Alarms.PumpA" + entity_id: pump_a + fault_code: PLC_PUMP_FAULT + - alarm_source: "ns=2;s=Alarms.PumpB" + entity_id: pump_b + fault_code: PLC_PUMP_FAULT +)"; + f.close(); + + NodeMap map; + EXPECT_TRUE(map.load(path)); +} + } // namespace ros2_medkit_gateway diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_client.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_client.cpp index 66eb59056..d015014d7 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_client.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_client.cpp @@ -59,6 +59,24 @@ TEST(OpcuaClientTest, ReadWhenDisconnected) { EXPECT_FALSE(result.good); } +// Issue #389 (read-based active-condition replay): browsing for conditions on a +// disconnected client returns nothing rather than throwing. +TEST(OpcuaClientTest, ReadSourceConditionsWhenDisconnected) { + OpcuaClient client; + auto conds = client.read_source_conditions(opcua::NodeId(0, UA_NS0ID_SERVER)); + EXPECT_TRUE(conds.empty()); +} + +// Issue #479: a disconnected scan must report scan_ok=false so the poller does +// NOT treat the empty result as "no active conditions" and falsely clear faults. +TEST(OpcuaClientTest, ReadSourceConditionsReportsScanFailureWhenDisconnected) { + OpcuaClient client; + bool scan_ok = true; // must be flipped to false + auto conds = client.read_source_conditions(opcua::NodeId(0, UA_NS0ID_SERVER), &scan_ok); + EXPECT_TRUE(conds.empty()); + EXPECT_FALSE(scan_ok); +} + TEST(OpcuaClientTest, WriteWhenDisconnected) { OpcuaClient client; EXPECT_FALSE(client.write_value({1, "SomeNode"}, 42.0)); @@ -92,6 +110,144 @@ TEST(OpcuaClientTest, WriteValueWithTypeHintDisconnected) { EXPECT_EQ(result.error().code, OpcuaClient::WriteError::NotConnected); } +// --------------------------------------------------------------------------- +// Issue #389: OPC-UA client security config parsing (pure helpers, no server). +// --------------------------------------------------------------------------- + +TEST(OpcuaClientSecurity, ParseSecurityPolicy) { + bool ok = false; + EXPECT_EQ(OpcuaClient::parse_security_policy("None", &ok), SecurityPolicy::None); + EXPECT_TRUE(ok); + EXPECT_EQ(OpcuaClient::parse_security_policy("", &ok), SecurityPolicy::None); + EXPECT_TRUE(ok); + EXPECT_EQ(OpcuaClient::parse_security_policy("basic256sha256"), SecurityPolicy::Basic256Sha256); + EXPECT_EQ(OpcuaClient::parse_security_policy("Basic256Sha256"), SecurityPolicy::Basic256Sha256); + EXPECT_EQ(OpcuaClient::parse_security_policy("Aes128Sha256RsaOaep"), SecurityPolicy::Aes128Sha256RsaOaep); + EXPECT_EQ(OpcuaClient::parse_security_policy("Aes256_Sha256_RsaPss"), SecurityPolicy::Aes256Sha256RsaPss); + // Unknown -> safe default + ok=false. + EXPECT_EQ(OpcuaClient::parse_security_policy("Bogus", &ok), SecurityPolicy::None); + EXPECT_FALSE(ok); +} + +TEST(OpcuaClientSecurity, SecurityPolicyUri) { + EXPECT_EQ(OpcuaClient::security_policy_uri(SecurityPolicy::None), "http://opcfoundation.org/UA/SecurityPolicy#None"); + EXPECT_EQ(OpcuaClient::security_policy_uri(SecurityPolicy::Basic256Sha256), + "http://opcfoundation.org/UA/SecurityPolicy#Basic256Sha256"); + EXPECT_EQ(OpcuaClient::security_policy_uri(SecurityPolicy::Aes128Sha256RsaOaep), + "http://opcfoundation.org/UA/SecurityPolicy#Aes128_Sha256_RsaOaep"); + EXPECT_EQ(OpcuaClient::security_policy_uri(SecurityPolicy::Aes256Sha256RsaPss), + "http://opcfoundation.org/UA/SecurityPolicy#Aes256_Sha256_RsaPss"); +} + +TEST(OpcuaClientSecurity, ParseSecurityMode) { + bool ok = false; + EXPECT_EQ(OpcuaClient::parse_security_mode("none", &ok), SecurityMode::None); + EXPECT_TRUE(ok); + EXPECT_EQ(OpcuaClient::parse_security_mode("Sign"), SecurityMode::Sign); + EXPECT_EQ(OpcuaClient::parse_security_mode("SignAndEncrypt"), SecurityMode::SignAndEncrypt); + EXPECT_EQ(OpcuaClient::parse_security_mode("sign_and_encrypt"), SecurityMode::SignAndEncrypt); + EXPECT_EQ(OpcuaClient::parse_security_mode("bogus", &ok), SecurityMode::None); + EXPECT_FALSE(ok); +} + +TEST(OpcuaClientSecurity, ParseUserAuthMode) { + EXPECT_EQ(OpcuaClient::parse_user_auth_mode("anonymous"), UserAuthMode::Anonymous); + EXPECT_EQ(OpcuaClient::parse_user_auth_mode(""), UserAuthMode::Anonymous); + EXPECT_EQ(OpcuaClient::parse_user_auth_mode("username"), UserAuthMode::UsernamePassword); + EXPECT_EQ(OpcuaClient::parse_user_auth_mode("UsernamePassword"), UserAuthMode::UsernamePassword); + EXPECT_EQ(OpcuaClient::parse_user_auth_mode("x509"), UserAuthMode::X509); + EXPECT_EQ(OpcuaClient::parse_user_auth_mode("certificate"), UserAuthMode::X509); +} + +TEST(OpcuaClientSecurity, RequiresSecureChannel) { + OpcuaClientConfig cfg; + EXPECT_FALSE(OpcuaClient::requires_secure_channel(cfg)); // defaults: None/None + cfg.user_auth_mode = UserAuthMode::UsernamePassword; + // Username/password alone does NOT imply an encrypted channel. + EXPECT_FALSE(OpcuaClient::requires_secure_channel(cfg)); + cfg.security_mode = SecurityMode::Sign; + EXPECT_TRUE(OpcuaClient::requires_secure_channel(cfg)); + cfg.security_mode = SecurityMode::None; + cfg.security_policy = SecurityPolicy::Basic256Sha256; + EXPECT_TRUE(OpcuaClient::requires_secure_channel(cfg)); +} + +// Issue #478: non-anonymous credentials on an unencrypted channel must be +// flagged as cleartext so the plugin can warn loudly. +TEST(OpcuaClientSecurity, CredentialsSentInClear) { + OpcuaClientConfig cfg; + // Anonymous on a plain channel: nothing to leak. + EXPECT_FALSE(OpcuaClient::credentials_sent_in_clear(cfg)); + + // Username/password with no secured channel: credentials travel in clear. + cfg.user_auth_mode = UserAuthMode::UsernamePassword; + EXPECT_TRUE(OpcuaClient::credentials_sent_in_clear(cfg)); + + // X.509 identity with no secured channel: still cleartext. + cfg.user_auth_mode = UserAuthMode::X509; + EXPECT_TRUE(OpcuaClient::credentials_sent_in_clear(cfg)); + + // Once the channel is signed, credentials are protected. + cfg.security_mode = SecurityMode::Sign; + EXPECT_FALSE(OpcuaClient::credentials_sent_in_clear(cfg)); + + // Encryption via the policy alone (mode None) also protects them. + cfg.security_mode = SecurityMode::None; + cfg.security_policy = SecurityPolicy::Basic256Sha256; + EXPECT_FALSE(OpcuaClient::credentials_sent_in_clear(cfg)); + + // Anonymous is never cleartext regardless of channel. + cfg.user_auth_mode = UserAuthMode::Anonymous; + cfg.security_policy = SecurityPolicy::None; + EXPECT_FALSE(OpcuaClient::credentials_sent_in_clear(cfg)); +} + +TEST(OpcuaClientSecurity, SecurityConfigConflict) { + OpcuaClientConfig cfg; + // Both None (default): consistent, no conflict. + EXPECT_FALSE(OpcuaClient::security_config_conflict(cfg)); + + // Mode set but policy None: the policy URI would be left unpinned - conflict. + cfg.security_mode = SecurityMode::Sign; + EXPECT_TRUE(OpcuaClient::security_config_conflict(cfg)); + cfg.security_mode = SecurityMode::SignAndEncrypt; + EXPECT_TRUE(OpcuaClient::security_config_conflict(cfg)); + + // Both set: consistent. + cfg.security_policy = SecurityPolicy::Basic256Sha256; + EXPECT_FALSE(OpcuaClient::security_config_conflict(cfg)); + + // Policy set but mode None: equally malformed - conflict. + cfg.security_mode = SecurityMode::None; + EXPECT_TRUE(OpcuaClient::security_config_conflict(cfg)); +} + +TEST(OpcuaClientSecurity, ConnectRejectsContradictorySecurityConfig) { + // security_mode=Sign with security_policy=None must fail before touching the + // network (would otherwise silently run an unpinned-policy channel). + OpcuaClient client; + OpcuaClientConfig cfg; + cfg.endpoint_url = "opc.tcp://localhost:49997"; + cfg.connect_timeout = std::chrono::milliseconds(500); + cfg.security_mode = SecurityMode::Sign; + cfg.security_policy = SecurityPolicy::None; + EXPECT_FALSE(client.connect(cfg)); + EXPECT_FALSE(client.is_connected()); +} + +TEST(OpcuaClientSecurity, ConnectSecureWithoutCertFailsFast) { + // A secured profile with no client cert/key must fail config-time, before + // touching the network, and leave the client disconnected. + OpcuaClient client; + OpcuaClientConfig cfg; + cfg.endpoint_url = "opc.tcp://localhost:49998"; + cfg.connect_timeout = std::chrono::milliseconds(500); + cfg.security_policy = SecurityPolicy::Basic256Sha256; + cfg.security_mode = SecurityMode::SignAndEncrypt; + EXPECT_FALSE(client.connect(cfg)); + EXPECT_FALSE(client.is_connected()); +} + TEST(OpcuaClientTest, CurrentConfigPersistence) { OpcuaClient client; OpcuaClientConfig cfg; diff --git a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_plugin.cpp b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_plugin.cpp index bda73f832..f0d0da550 100644 --- a/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_plugin.cpp +++ b/src/ros2_medkit_plugins/ros2_medkit_opcua/test/test_opcua_plugin.cpp @@ -294,4 +294,177 @@ TEST_F(OpcuaPluginTest, GetFaultNotFound) { EXPECT_EQ(result.error().code, FaultProviderError::FaultNotFound); } +// Issue #389: condition-replay strategy parsing (used by the poller's +// reconnect / restart active-condition replay). +TEST(ConditionReplayStrategyParse, KnownValues) { + EXPECT_EQ(OpcuaPoller::parse_replay_strategy("method"), ConditionReplayStrategy::Method); + EXPECT_EQ(OpcuaPoller::parse_replay_strategy("read"), ConditionReplayStrategy::Read); + EXPECT_EQ(OpcuaPoller::parse_replay_strategy("read_fallback"), ConditionReplayStrategy::Read); + EXPECT_EQ(OpcuaPoller::parse_replay_strategy("off"), ConditionReplayStrategy::Off); + EXPECT_EQ(OpcuaPoller::parse_replay_strategy("auto"), ConditionReplayStrategy::Auto); + EXPECT_EQ(OpcuaPoller::parse_replay_strategy("AUTO"), ConditionReplayStrategy::Auto); + // Unknown -> safe default (Auto: method with read fallback). + EXPECT_EQ(OpcuaPoller::parse_replay_strategy("bogus"), ConditionReplayStrategy::Auto); +} + +// -- Issue #479/#478: read-replay reconcile must not falsely clear faults ----- + +TEST(ReconcileShouldClearCondition, ClearsActiveConditionAbsentFromSuccessfulScan) { + // An active condition that the (successful) scan did not observe is gone - + // but only when its source is positively known to model condition nodes. + std::set seen; // condition not seen this scan + std::set failed_sources; // its source scanned fine + std::set modeled_sources{"src-1"}; // source exposes condition nodes + EXPECT_TRUE(OpcuaPoller::should_clear_condition(SovdAlarmStatus::Confirmed, "cond-1", "src-1", seen, failed_sources, + modeled_sources)); + EXPECT_TRUE(OpcuaPoller::should_clear_condition(SovdAlarmStatus::Healed, "cond-1", "src-1", seen, failed_sources, + modeled_sources)); +} + +TEST(ReconcileShouldClearCondition, KeepsConditionStillSeen) { + std::set seen{"cond-1"}; + std::set failed_sources; + std::set modeled_sources{"src-1"}; + EXPECT_FALSE(OpcuaPoller::should_clear_condition(SovdAlarmStatus::Confirmed, "cond-1", "src-1", seen, failed_sources, + modeled_sources)); +} + +TEST(ReconcileShouldClearCondition, SkipsClearWhenSourceScanFailed) { + // The false-clear guard: source scan failed (browse error / disconnect), + // so the condition's absence from ``seen`` must NOT clear it. + std::set seen; // not observed... + std::set failed_sources{"src-1"}; // ...because its source failed + std::set modeled_sources{"src-1"}; + EXPECT_FALSE(OpcuaPoller::should_clear_condition(SovdAlarmStatus::Confirmed, "cond-1", "src-1", seen, failed_sources, + modeled_sources)); + EXPECT_FALSE(OpcuaPoller::should_clear_condition(SovdAlarmStatus::Healed, "cond-1", "src-1", seen, failed_sources, + modeled_sources)); +} + +// Issue #478 safety-gate: an empty scan from a source that has NEVER yielded a +// condition instance node (EventNotifier-only server, e.g. S7-1500) must NOT +// clear the still-active tracked fault. This is the single most important +// outcome of the hardening. +TEST(ReconcileShouldClearCondition, NeverClearsWhenSourceNotPositivelyModeled) { + std::set seen; // condition not seen (server has no nodes) + std::set failed_sources; // the browse itself succeeded (empty) + std::set modeled_sources; // src-1 has never yielded a condition node + EXPECT_FALSE(OpcuaPoller::should_clear_condition(SovdAlarmStatus::Confirmed, "cond-1", "src-1", seen, failed_sources, + modeled_sources)); + EXPECT_FALSE(OpcuaPoller::should_clear_condition(SovdAlarmStatus::Healed, "cond-1", "src-1", seen, failed_sources, + modeled_sources)); +} + +TEST(ReconcileShouldClearCondition, NeverClearsInactiveCondition) { + std::set seen; + std::set failed_sources; + std::set modeled_sources{"src-1"}; + EXPECT_FALSE(OpcuaPoller::should_clear_condition(SovdAlarmStatus::Cleared, "cond-1", "src-1", seen, failed_sources, + modeled_sources)); + EXPECT_FALSE(OpcuaPoller::should_clear_condition(SovdAlarmStatus::Suppressed, "cond-1", "src-1", seen, failed_sources, + modeled_sources)); +} + +// -- Issue #480: ConditionRefresh burst reconcile must clear a tracked fault +// that the server did NOT replay (it cleared while we were offline). Unlike the +// read fallback, a delivered RefreshEnd is an authoritative subscription-wide +// replay, so there is no per-source modeling gate. -------------------------- + +TEST(ReconcileAfterRefresh, ClearsActiveConditionNotReplayed) { + // Active before the reconnect, absent from the RefreshStart..RefreshEnd burst + // => the alarm cleared offline and its latched fault must reconcile away. + std::set seen; // server replayed nothing (no active conditions) + EXPECT_TRUE(OpcuaPoller::should_clear_after_refresh(SovdAlarmStatus::Confirmed, "cond-1", seen)); + EXPECT_TRUE(OpcuaPoller::should_clear_after_refresh(SovdAlarmStatus::Healed, "cond-1", seen)); +} + +TEST(ReconcileAfterRefresh, KeepsConditionReplayedInBurst) { + // The server replayed this ConditionId during the burst => still active, keep. + std::set seen{"cond-1"}; + EXPECT_FALSE(OpcuaPoller::should_clear_after_refresh(SovdAlarmStatus::Confirmed, "cond-1", seen)); + EXPECT_FALSE(OpcuaPoller::should_clear_after_refresh(SovdAlarmStatus::Healed, "cond-1", seen)); +} + +TEST(ReconcileAfterRefresh, NeverClearsInactiveCondition) { + // A condition that was never active (Suppressed/Cleared) is not a latched + // fault and must not be touched even when absent from the burst. + std::set seen; + EXPECT_FALSE(OpcuaPoller::should_clear_after_refresh(SovdAlarmStatus::Cleared, "cond-1", seen)); + EXPECT_FALSE(OpcuaPoller::should_clear_after_refresh(SovdAlarmStatus::Suppressed, "cond-1", seen)); +} + +TEST(ReconcileAfterRefresh, KeepsOtherActiveConditionsWhenOneReplayed) { + // Peer condition (same fault_code family, different ConditionId) replayed; + // the un-replayed one still clears. Guards the distinct-ConditionId keying. + std::set seen{"cond-keep"}; + EXPECT_FALSE(OpcuaPoller::should_clear_after_refresh(SovdAlarmStatus::Confirmed, "cond-keep", seen)); + EXPECT_TRUE(OpcuaPoller::should_clear_after_refresh(SovdAlarmStatus::Confirmed, "cond-gone", seen)); +} + +// -- Issue #478: read-scan snapshot classification (Retain / EnabledState / +// transient-keep filter that mirrors ConditionRefresh semantics). -------------- + +namespace { +OpcuaClient::ConditionStateSnapshot make_snap(bool enabled, bool active, bool retain, bool read_failed = false) { + OpcuaClient::ConditionStateSnapshot s; + s.enabled_state = enabled; + s.active_state = active; + s.retain = retain; + s.state_read_failed = read_failed; + return s; +} +} // namespace + +TEST(ReadSnapshotClassify, RetainTrueActiveIsFed) { + auto d = OpcuaPoller::classify_read_snapshot(make_snap(/*enabled=*/true, /*active=*/true, /*retain=*/true)); + EXPECT_EQ(d, OpcuaPoller::ReadReplayDisposition::Feed); +} + +TEST(ReadSnapshotClassify, RetainTrueInactiveStillInterestingIsFed) { + // ActiveState=false but Retain=true => unacked/unconfirmed, still of interest + // (ConditionRefresh would replay it). Fed so the HEALED latch is preserved. + auto d = OpcuaPoller::classify_read_snapshot(make_snap(/*enabled=*/true, /*active=*/false, /*retain=*/true)); + EXPECT_EQ(d, OpcuaPoller::ReadReplayDisposition::Feed); +} + +TEST(ReadSnapshotClassify, RetainFalseIsSkipped) { + // Retain=false => no longer interesting; not seen so a stale fault reconciles. + auto d = OpcuaPoller::classify_read_snapshot(make_snap(/*enabled=*/true, /*active=*/false, /*retain=*/false)); + EXPECT_EQ(d, OpcuaPoller::ReadReplayDisposition::Skip); +} + +TEST(ReadSnapshotClassify, DisabledIsSkippedEvenIfActiveAndRetained) { + // EnabledState=false must never be treated as active in the read path, even + // when ActiveState/Retain would otherwise mark it interesting. + auto d = OpcuaPoller::classify_read_snapshot(make_snap(/*enabled=*/false, /*active=*/true, /*retain=*/true)); + EXPECT_EQ(d, OpcuaPoller::ReadReplayDisposition::Skip); +} + +TEST(ReadSnapshotClassify, TransientReadFailureIsKeptNotFed) { + // A transient read/browse failure (flagged) keeps the condition (so reconcile + // does not clear it) but does not feed an unreliable state into the machine. + // KeepOnly takes precedence over every other field. + auto d = OpcuaPoller::classify_read_snapshot( + make_snap(/*enabled=*/false, /*active=*/false, /*retain=*/false, /*read_failed=*/true)); + EXPECT_EQ(d, OpcuaPoller::ReadReplayDisposition::KeepOnly); +} + +TEST(ReadSnapshotClassify, ActiveWithUnreadableRetainIsFedNotSkipped) { + // Regression (issue #478): Retain defaults false when its optional node is + // unreadable this scan. A reliably-active condition must be Fed (so it lands + // in ``seen``), never Skipped on the Retain check - otherwise reconcile would + // clear a still-active fault on a modeled source. + auto d = OpcuaPoller::classify_read_snapshot(make_snap(/*enabled=*/true, /*active=*/true, /*retain=*/false)); + EXPECT_EQ(d, OpcuaPoller::ReadReplayDisposition::Feed); +} + +TEST(ReadSnapshotClassify, ActiveButFlaggedUnreliableIsKeptNotFed) { + // KeepOnly still wins over the active fast-path: an active condition whose + // snapshot is flagged unreliable (e.g. a transient Retain read failure) is + // preserved via ``seen`` but not fed an untrustworthy state. + auto d = OpcuaPoller::classify_read_snapshot( + make_snap(/*enabled=*/true, /*active=*/true, /*retain=*/false, /*read_failed=*/true)); + EXPECT_EQ(d, OpcuaPoller::ReadReplayDisposition::KeepOnly); +} + } // namespace ros2_medkit_gateway