Fix executor thread-safety for feedback subscription content filter

minggangw · minggangw · commit b9a0540265d0 · 2026-03-30T18:25:04.000+08:00
The background executor thread re-enters rcl_wait immediately after
signaling the main thread, creating a data race when the main thread
modifies the feedback subscription's content filter during callback
processing. Add a condition variable so the background thread waits
until the main thread finishes ExecuteReadyHandles before re-entering
rcl_wait.

Also fix the RMW content filter probe in the ActionClient constructor:
- Use node.destroySubscription() instead of probeSub.destroy() (which
  does not exist on Subscription), preventing silent probe failure that
  was disabling the optimization unconditionally.
- Use interface_loader in tests to access FeedbackMessage type correctly.
- Change test probe from before() to beforeEach() so the outer
  beforeEach-created node is available.
- Add delay before feedback publish in test executeCallback to allow
  the client time to set up the content filter after goal acceptance.
diff --git a/lib/action/client.js b/lib/action/client.js
@@ -118,6 +118,29 @@ class ActionClient extends Entity {
       this.qos.statusSubQosProfile
     );
 
+    // Probe that the RMW actually supports content filtering before enabling
+    // the optimization. If the RMW does not support content filtering, the
+    // rcl layer returns RCL_RET_UNSUPPORTED which disables the optimization
+    // gracefully. However, probing upfront avoids the warning log on first use.
+    if (this._enableFeedbackMsgOptimization) {
+      try {
+        const probeSub = node.createSubscription(
+          this._typeClass.impl.FeedbackMessage,
+          actionName + '/_action/feedback',
+          () => {}
+        );
+        const supported =
+          typeof probeSub.isContentFilterSupported === 'function' &&
+          probeSub.isContentFilterSupported();
+        node.destroySubscription(probeSub);
+        if (!supported) {
+          this._enableFeedbackMsgOptimization = false;
+        }
+      } catch {
+        this._enableFeedbackMsgOptimization = false;
+      }
+    }
+
     node._addActionClient(this);
   }
 
diff --git a/src/executor.cpp b/src/executor.cpp
@@ -48,7 +48,8 @@ Executor::Executor(Napi::Env env, HandleManager* handle_manager,
       handle_manager_(handle_manager),
       delegate_(delegate),
       context_(nullptr),
-      env_(env) {
+      env_(env),
+      work_pending_(false) {
   running_.store(false);
 }
 
@@ -105,6 +106,8 @@ void Executor::Stop() {
     // Stop thread first, and then uv_close
     // Make sure async_ is not used anymore
     running_.store(false);
+    // Wake the background thread in case it is waiting on the condvar.
+    work_done_cv_.notify_all();
     handle_manager_->StopWaitingHandles();
     uv_thread_join(&background_thread_);
 
@@ -134,6 +137,13 @@ bool Executor::IsMainThread() {
 void Executor::DoWork(uv_async_t* handle) {
   Executor* executor = reinterpret_cast<Executor*>(handle->data);
   executor->ExecuteReadyHandles();
+
+  // Signal the background thread that it is safe to re-enter rcl_wait.
+  {
+    std::lock_guard<std::mutex> lock(executor->work_done_mutex_);
+    executor->work_pending_ = false;
+  }
+  executor->work_done_cv_.notify_one();
 }
 
 void Executor::Run(void* arg) {
@@ -159,7 +169,23 @@ void Executor::Run(void* arg) {
 
       if (!uv_is_closing(reinterpret_cast<uv_handle_t*>(executor->async_)) &&
           handle_manager->ready_handles_count() > 0) {
+        // Tell the main thread there is work to do, then wait for it to
+        // finish before re-entering rcl_wait.  This prevents a data race
+        // where the background thread holds subscriptions in the wait set
+        // while the main thread modifies their state (e.g. content filter).
+        {
+          std::lock_guard<std::mutex> lock(executor->work_done_mutex_);
+          executor->work_pending_ = true;
+        }
         uv_async_send(executor->async_);
+
+        // Wait until DoWork() signals completion.
+        {
+          std::unique_lock<std::mutex> lock(executor->work_done_mutex_);
+          executor->work_done_cv_.wait(lock, [executor] {
+            return !executor->work_pending_ || !executor->running_.load();
+          });
+        }
       }
     }
 
diff --git a/src/executor.h b/src/executor.h
@@ -20,7 +20,9 @@
 #include <uv.h>
 
 #include <atomic>
+#include <condition_variable>
 #include <exception>
+#include <mutex>
 #include <vector>
 
 #include "rcl_handle.h"
@@ -72,6 +74,15 @@ class Executor {
   Napi::Env env_;
 
   std::atomic_bool running_;
+
+  // Synchronization: the background thread waits after uv_async_send until
+  // the main thread finishes ExecuteReadyHandles.  This prevents the
+  // background thread from re-entering rcl_wait (which holds a reference to
+  // subscriptions) while the main thread modifies subscription state (e.g.
+  // content filter changes).
+  std::mutex work_done_mutex_;
+  std::condition_variable work_done_cv_;
+  bool work_pending_;  // true while the main thread is processing handles
 };
 
 }  // namespace rclnodejs
diff --git a/test/test-action-client.js b/test/test-action-client.js
@@ -31,6 +31,10 @@ describe('rclnodejs action client', function () {
   let publishFeedback = null;
 
   async function executeCallback(goalHandle) {
+    // Delay before publishing feedback to allow the client time to process
+    // the goal response and set up the content filter (if enabled).
+    await assertUtils.createDelay(50);
+
     if (
       publishFeedback &&
       ActionUuid.fromMessage(publishFeedback).toString() ===
@@ -317,6 +321,30 @@ describe('rclnodejs action client', function () {
       typeof nativeLoader.actionConfigureFeedbackSubFilterAddGoalId ===
       'function';
 
+    // Probe whether the RMW supports content filtering, matching the
+    // same check the ActionClient constructor performs.
+    let isContentFilterSupported = false;
+    beforeEach(function () {
+      if (isContentFilterSupported) return; // only probe once
+      if (isFeedbackFilterSupported()) {
+        try {
+          const loader = require('../lib/interface_loader.js');
+          const typeClass = loader.loadInterface(fibonacci);
+          const probeSub = node.createSubscription(
+            typeClass.impl.FeedbackMessage,
+            'fibonacci/_action/feedback',
+            () => {}
+          );
+          isContentFilterSupported =
+            typeof probeSub.isContentFilterSupported === 'function' &&
+            probeSub.isContentFilterSupported();
+          node.destroySubscription(probeSub);
+        } catch {
+          isContentFilterSupported = false;
+        }
+      }
+    });
+
     it('Test option defaults to false', function () {
       let client = new rclnodejs.ActionClient(node, fibonacci, 'fibonacci');
       assert.strictEqual(client._enableFeedbackMsgOptimization, false);
@@ -327,8 +355,8 @@ describe('rclnodejs action client', function () {
       let client = new rclnodejs.ActionClient(node, fibonacci, 'fibonacci', {
         enableFeedbackMsgOptimization: true,
       });
-      // Only enabled when native API exists
-      if (isFeedbackFilterSupported()) {
+      // Only enabled when native API exists AND the RMW supports content filtering
+      if (isFeedbackFilterSupported() && isContentFilterSupported) {
         assert.strictEqual(client._enableFeedbackMsgOptimization, true);
       } else {
         assert.strictEqual(client._enableFeedbackMsgOptimization, false);