Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions rclcpp/src/rclcpp/time_source.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -422,13 +422,35 @@ class TimeSource::NodeState final
// Destroy the subscription for the clock topic
void destroy_clock_sub()
{
std::lock_guard<std::mutex> guard(clock_sub_lock_);
if (clock_executor_thread_.joinable()) {
clock_executor_->cancel();
clock_executor_thread_.join();
clock_executor_->remove_callback_group(clock_callback_group_);
std::thread thread_to_join;
rclcpp::executors::SingleThreadedExecutor::SharedPtr executor_to_clean;
rclcpp::CallbackGroup::SharedPtr cbg_to_remove;

{
std::lock_guard<std::mutex> guard(clock_sub_lock_);
if (clock_executor_thread_.joinable()) {
clock_executor_->cancel();
// Move the thread out so we can join without holding clock_sub_lock_.
// Holding the lock during join() can deadlock if the executor thread's
// shutdown path contends on clock_sub_lock_ (see ros2/rclcpp#2962).
// The thread is unconditionally joined below — it is not dangling.
thread_to_join = std::move(clock_executor_thread_);
executor_to_clean = clock_executor_;
cbg_to_remove = clock_callback_group_;
}
}

// Join outside the lock. The subscription is intentionally kept alive
// until after join so the executor thread can finish any in-flight callback.
if (thread_to_join.joinable()) {
thread_to_join.join();
executor_to_clean->remove_callback_group(cbg_to_remove);
}

{
std::lock_guard<std::mutex> guard(clock_sub_lock_);
clock_subscription_.reset();
}
clock_subscription_.reset();
}

// On set Parameters callback handle
Expand Down
5 changes: 5 additions & 0 deletions rclcpp/test/rclcpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,11 @@ ament_add_ros_isolated_gtest(test_time_source test_time_source.cpp
if(TARGET test_time_source)
target_link_libraries(test_time_source ${PROJECT_NAME} rcl::rcl)
endif()
ament_add_ros_isolated_gtest(test_time_source_deadlock test_time_source_deadlock.cpp
TIMEOUT 60)
if(TARGET test_time_source_deadlock)
target_link_libraries(test_time_source_deadlock ${PROJECT_NAME})
endif()

ament_add_ros_isolated_gtest(test_utilities test_utilities.cpp
APPEND_LIBRARY_DIRS "${append_library_dirs}")
Expand Down
137 changes: 137 additions & 0 deletions rclcpp/test/rclcpp/test_time_source_deadlock.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// Copyright 2026 Open Source Robotics Foundation, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

/**
* Stress tests for TimeSource::destroy_clock_sub deadlock fix.
*
* Exercises the conditions from ros2/rclcpp#2962: rapid node
* construction/destruction with use_sim_time enabled. If the
* deadlock is present, these tests will hang (caught by the test
* timeout). Under ThreadSanitizer, any data race or dangling-thread
* access will be flagged.
*/

#include <chrono>
#include <future>
#include <memory>
#include <thread>
#include <vector>

#include "gtest/gtest.h"

#include "rclcpp/rclcpp.hpp"

using namespace std::chrono_literals;

class TestTimeSourceDeadlock : public ::testing::Test
{
public:
static void SetUpTestCase()
{
rclcpp::init(0, nullptr);
}

static void TearDownTestCase()
{
rclcpp::shutdown();
}
};

/// Reproducer for ros2/rclcpp#2962: rapid construction and destruction
/// of a node with use_sim_time=true. The clock executor thread must
/// be joined without deadlock.
TEST_F(TestTimeSourceDeadlock, rapid_construct_destroy_sim_time)
{
for (int i = 0; i < 50; ++i) {
rclcpp::NodeOptions opts;
opts.parameter_overrides({rclcpp::Parameter("use_sim_time", true)});
auto node = std::make_shared<rclcpp::Node>(
"deadlock_test_" + std::to_string(i), "/test_ns", opts);

// Immediately destroy — the clock executor thread must be
// joined without deadlock.
node.reset();
}
}

/// Create and destroy nodes with use_sim_time from multiple threads
/// to stress the lock/join interaction.
TEST_F(TestTimeSourceDeadlock, concurrent_construct_destroy)
{
const int num_threads = 4;
const int iterations = 10;
std::vector<std::future<void>> futures;

for (int t = 0; t < num_threads; ++t) {
futures.push_back(std::async(std::launch::async, [t, iterations]() {
for (int i = 0; i < iterations; ++i) {
rclcpp::NodeOptions opts;
opts.parameter_overrides({rclcpp::Parameter("use_sim_time", true)});
auto node = std::make_shared<rclcpp::Node>(
"concurrent_test_" + std::to_string(t) + "_" + std::to_string(i),
"/test_ns", opts);
node.reset();
}
}));
}

for (auto & f : futures) {
ASSERT_NO_THROW(f.get());
}
}

/// Toggle use_sim_time parameter while spinning — exercises the
/// create_clock_sub / destroy_clock_sub interplay.
TEST_F(TestTimeSourceDeadlock, toggle_use_sim_time_while_spinning)
{
auto node = std::make_shared<rclcpp::Node>("toggle_test", "/test_ns");
rclcpp::executors::SingleThreadedExecutor executor;
executor.add_node(node);

// Spin in background
auto spin_future = std::async(std::launch::async, [&executor]() {
executor.spin();
});

// Toggle use_sim_time several times
for (int i = 0; i < 10; ++i) {
node->set_parameter(rclcpp::Parameter("use_sim_time", true));
std::this_thread::sleep_for(10ms);
node->set_parameter(rclcpp::Parameter("use_sim_time", false));
std::this_thread::sleep_for(10ms);
}

executor.cancel();
spin_future.get();
}

/// Verify the clock thread is properly cleaned up after destruction.
/// After destroying a sim-time node, no threads from its clock
/// executor should be lingering.
TEST_F(TestTimeSourceDeadlock, no_lingering_thread_after_destroy)
{
{
rclcpp::NodeOptions opts;
opts.parameter_overrides({rclcpp::Parameter("use_sim_time", true)});
auto node = std::make_shared<rclcpp::Node>(
"linger_test", "/test_ns", opts);
// Node destroyed here — clock thread must be fully joined
}

// If the thread were dangling, subsequent operations on the
// same context would be unreliable. Create another node to
// verify the context is clean.
auto node2 = std::make_shared<rclcpp::Node>("after_linger", "/test_ns");
ASSERT_NE(node2, nullptr);
}