Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 4 additions & 20 deletions go/pools/smartconnpool/pool.go
Original file line number Diff line number Diff line change
Expand Up @@ -435,13 +435,9 @@ func (pool *ConnPool[C]) pop(stack *connStack[C]) *Pooled[C] {
// to expire this connection (even if it's still visible to them), so it's
// safe to return it
for conn, ok := stack.Pop(); ok; conn, ok = stack.Pop() {
if !conn.timeUsed.borrow() {
// Ignore the connection that couldn't be borrowed;
// it's being closed by the idle worker and replaced by a new connection.
continue
if conn.timeUsed.borrow() {
return conn
}
Comment on lines +438 to 440
Copy link

Copilot AI Jan 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The reverted logic may reintroduce the original connection leak issue. After reverting, when borrow() fails (connection is expired/busy), the loop continues to the next connection but doesn't handle the failed connection, leaving it in the stack. Consider documenting why this approach is acceptable in v21 and what trade-offs are being made by accepting potential leaked connections versus the memory leak caused by the fix.

Copilot uses AI. Check for mistakes.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hggh


return conn
}
return nil
}
Expand Down Expand Up @@ -746,23 +742,11 @@ func (pool *ConnPool[C]) closeIdleResources(now time.Time) {
for conn := s.Peek(); conn != nil; conn = conn.next.Load() {
if conn.timeUsed.expired(mono, timeout) {
pool.Metrics.idleClosed.Add(1)

conn.Close()
pool.closedConn()

// Using context.Background() is fine since MySQL connection already enforces
// a connect timeout via the `db_connect_timeout_ms` config param.
Copy link

Copilot AI Jan 15, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This revert changes the behavior from creating new connections to reopening existing connection objects. Given that the original fix was reverted due to memory leak issues specific to v21, this code section should include a comment explaining why connReopen() is used here instead of the getNew() + tryReturnConn() approach, and what the known trade-offs are. This will help future maintainers understand the context when they encounter connection pool issues.

Suggested change
// a connect timeout via the `db_connect_timeout_ms` config param.
// a connect timeout via the `db_connect_timeout_ms` config param.
// NOTE: We intentionally call connReopen() here instead of using the
// getNew() + tryReturnConn() pattern. An earlier change switched this
// logic to allocate a new connection and return the old one to the pool,
// but that approach was reverted in v21 due to a version-specific memory
// leak in the connection pooling layer. Reopening the existing connection
// object avoids that leak at the cost of reusing connection structs
// more aggressively and potentially holding onto them longer. If this
// behavior is revisited in the future, carefully reassess memory usage,
// pool accounting, and leak risks before switching back to getNew() +
// tryReturnConn().

Copilot uses AI. Check for mistakes.
c, err := pool.getNew(context.Background())
if err != nil {
// If we couldn't open a new connection, just continue
continue
}

// opening a new connection might have raced with other goroutines,
// so it's possible that we got back `nil` here
if c != nil {
// Return the new connection to the pool
pool.tryReturnConn(c)
if err := pool.connReopen(context.Background(), conn, mono); err != nil {
pool.closedConn()
}
}
}
Expand Down
85 changes: 0 additions & 85 deletions go/pools/smartconnpool/pool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1280,88 +1280,3 @@ func TestCloseDuringWaitForConn(t *testing.T) {
require.EqualValues(t, 0, state.open.Load())
}
}

// TestIdleTimeoutConnectionLeak checks for leaked connections after idle timeout
func TestIdleTimeoutConnectionLeak(t *testing.T) {
var state TestState

// Slow connection creation to ensure idle timeout happens during reopening
state.chaos.delayConnect = 300 * time.Millisecond

p := NewPool(&Config[*TestConn]{
Capacity: 2,
IdleTimeout: 50 * time.Millisecond,
LogWait: state.LogWait,
}).Open(newConnector(&state), nil)

getCtx, cancel := context.WithTimeout(t.Context(), 500*time.Millisecond)
defer cancel()

// Get and return two connections
conn1, err := p.Get(getCtx, nil)
require.NoError(t, err)

conn2, err := p.Get(getCtx, nil)
require.NoError(t, err)

p.put(conn1)
p.put(conn2)

// At this point: Active=2, InUse=0, Available=2
require.EqualValues(t, 2, p.Active())
require.EqualValues(t, 0, p.InUse())
require.EqualValues(t, 2, p.Available())

// Wait for idle timeout to kick in and start expiring connections
require.EventuallyWithT(t, func(c *assert.CollectT) {
// Check the actual number of currently open connections
assert.Equal(c, int64(2), state.open.Load())
// Check the total number of closed connections
assert.Equal(c, int64(1), state.close.Load())
}, 100*time.Millisecond, 10*time.Millisecond)

// At this point, the idle timeout worker has expired the connections
// and is trying to reopen them (which takes 300ms due to delayConnect)

// Try to get connections while they're being reopened
// This should trigger the bug where connections get discarded
for i := 0; i < 2; i++ {
getCtx, cancel := context.WithTimeout(t.Context(), 50*time.Millisecond)
defer cancel()

conn, err := p.Get(getCtx, nil)
require.NoError(t, err)

p.put(conn)
}

// Wait a moment for all reopening to complete
require.EventuallyWithT(t, func(c *assert.CollectT) {
// Check the actual number of currently open connections
require.Equal(c, int64(2), state.open.Load())
// Check the total number of closed connections
require.Equal(c, int64(2), state.close.Load())
}, 400*time.Millisecond, 10*time.Millisecond)

// Check the pool state
assert.Equal(t, int64(2), p.Active())
assert.Equal(t, int64(0), p.InUse())
assert.Equal(t, int64(2), p.Available())
assert.Equal(t, int64(2), p.Metrics.IdleClosed())

// Try to close the pool - if there are leaked connections, this will timeout
closeCtx, cancel := context.WithTimeout(t.Context(), 500*time.Millisecond)
defer cancel()

err = p.CloseWithContext(closeCtx)
require.NoError(t, err)

// Pool should be completely closed now
assert.Equal(t, int64(0), p.Active())
assert.Equal(t, int64(0), p.InUse())
assert.Equal(t, int64(0), p.Available())
assert.Equal(t, int64(2), p.Metrics.IdleClosed())

assert.Equal(t, int64(0), state.open.Load())
assert.Equal(t, int64(4), state.close.Load())
}
Loading