nits

pavel-kirienko · pavel-kirienko · commit 722b1a43190d · 2026-01-22T02:06:59.000+02:00
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -10,7 +10,7 @@ cmake_minimum_required(VERSION 3.20)
 
 project(canard)
 enable_testing()
-
+set(CMAKE_CTEST_ARGUMENTS "-V")  # Enable test outputs when running `make test`
 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 
 # Shared Clang-Format target for all subprojects.
diff --git a/libcanard/canard.c b/libcanard/canard.c
@@ -133,6 +133,30 @@ static byte_t popcount(const uint64_t x)
 #endif
 }
 
+/// See ctz().
+static byte_t ctz_emulated(uint32_t x)
+{
+    CANARD_ASSERT(x != 0U);
+    byte_t v = 0;
+    while ((x & 1U) == 0U) {
+        x >>= 1U;
+        ++v;
+    }
+    return v;
+}
+
+/// Count trailing zeros (ctz), aka find first set (ffs), aka the index of the least-significant set bit.
+/// Undefined for zero argument.
+static byte_t ctz(const uint32_t x)
+{
+#if defined(__GNUC__) || defined(__clang__) || defined(__CC_ARM)
+    CANARD_ASSERT(x != 0U);
+    return (byte_t)__builtin_ctzl(x);
+#else
+    return ctz_emulated(x);
+#endif
+}
+
 static void* mem_alloc(const canard_mem_t memory, const size_t size) { return memory.vtable->alloc(memory, size); }
 static void* mem_alloc_zero(const canard_mem_t memory, const size_t size)
 {
diff --git a/libcanard/canard.h b/libcanard/canard.h
@@ -389,7 +389,15 @@ struct canard_t
         /// due to the limited bus capacity; at the same time, CAN is likely to be used with small memory-limited
         /// devices. Hence we introduce a design tradeoff favoring smaller memory footprint over insertion efficiency,
         /// which is reasonable on the assumption that the number of simultaneously enqueued transfers (sic! not frames)
-        /// is typically small, on the order of a couple dozen at most.
+        /// is typically small, on the order of a couple dozen at most. At small N, linked lists are even expected to
+        /// outperform BST lookup; given r=2 is the approximate complexity premium of BST lookup over list scan,
+        /// assuming that an average list lookup ends halfway, then the complexity crossover point is about:
+        ///
+        ///     N/2 > r log2(N)
+        ///
+        /// So for r=2, we expect linked lists to outperform BSTs for less than about 15 pending transfers per shard
+        /// (per default, RPC-service and message transfers use different shards, also each priority level is separate).
+        /// The number of frames per transfer is irrelevant here as it doesn't affect the asymptotic complexity.
         ///
         /// The structures are optimized to minimize the poll complexity, since it is on the hot path, at the expense
         /// of insertion and cancellation paths. Each pending queue is a simple FIFO; the priority ordering is done