From 82b528335c44168a1e51582cfc0207bd6ac50eb2 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Fri, 8 May 2026 15:21:41 +0000 Subject: [PATCH] fix(ha): Fix flaky TestWatchPrefixNilPanicWithMemberlist The test was flaky due to a race between the WatchPrefix watcher registration in loop() and the CheckReplica call. StartAndAwaitRunning returns before the WatchPrefix goroutine registers its watcher channel in the memberlist KV. If CheckReplica's CAS + notifyWatchers fires before the watcher is registered, the notification is lost and the key never appears in the elected cache. Fix by adding a 100ms sleep before CheckReplica to allow the WatchPrefix goroutine to register its watcher channel (same pattern used in memberlist_client_test.go), and increasing the poll timeout from 3s to 5s for CI robustness. Signed-off-by: Ben Ye Signed-off-by: Friedrich Gonzalez <1517449+friedrichg@users.noreply.github.com> --- pkg/ha/ha_tracker_test.go | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/pkg/ha/ha_tracker_test.go b/pkg/ha/ha_tracker_test.go index 5376bb91ee..ea81927997 100644 --- a/pkg/ha/ha_tracker_test.go +++ b/pkg/ha/ha_tracker_test.go @@ -265,10 +265,16 @@ func TestWatchPrefixNilPanicWithMemberlist(t *testing.T) { replica := "replica0" key := userID + "/" + cluster + // Give the WatchPrefix goroutine in loop() time to register its watcher + // channel in the memberlist KV before we write. Without this, the CAS + // notification can fire before the watcher is registered, causing the + // key to never appear in the elected cache. + time.Sleep(100 * time.Millisecond) + now := time.Now() require.NoError(t, tracker.CheckReplica(ctx, userID, cluster, replica, now)) - test.Poll(t, 3*time.Second, nil, func() any { + test.Poll(t, 5*time.Second, nil, func() any { tracker.electedLock.RLock() defer tracker.electedLock.RUnlock() if _, ok := tracker.elected[key]; !ok {