From d44348e7810566f489664ed5edbaf393930ad37f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 21:21:42 +0000 Subject: [PATCH 1/7] Initial plan From 8eb0f7a2f7508bf4d4a00da946e09e43e0d3ad17 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 21:25:46 +0000 Subject: [PATCH 2/7] Fix timestamp overflow in lock cleanup operations Replace cleanupTS with abortTSFrom to handle overflow when startTS==^uint64(0). Add explicit overflow checks to prevent invalid abort timestamps that violate the FSM requirement that resolveTS > startTS, which could leave locks permanently unresolved. Co-authored-by: bootjp <1306365+bootjp@users.noreply.github.com> --- kv/shard_store.go | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/kv/shard_store.go b/kv/shard_store.go index 79e93e33..39e45abc 100644 --- a/kv/shard_store.go +++ b/kv/shard_store.go @@ -365,7 +365,12 @@ func (s *ShardStore) resolveTxnLockForKey(ctx context.Context, g *ShardGroup, ke case txnStatusCommitted: return applyTxnResolution(g, pb.Phase_COMMIT, lock.StartTS, commitTS, lock.PrimaryKey, [][]byte{key}) case txnStatusRolledBack: - return applyTxnResolution(g, pb.Phase_ABORT, lock.StartTS, cleanupTS(lock.StartTS), lock.PrimaryKey, [][]byte{key}) + abortTS := abortTSFrom(lock.StartTS, lock.StartTS) + if abortTS <= lock.StartTS { + // Overflow: can't choose an abort timestamp strictly greater than startTS. + return errors.Wrapf(ErrTxnLocked, "key: %s (timestamp overflow)", string(key)) + } + return applyTxnResolution(g, pb.Phase_ABORT, lock.StartTS, abortTS, lock.PrimaryKey, [][]byte{key}) case txnStatusPending: return errors.Wrapf(ErrTxnLocked, "key: %s", string(key)) default: @@ -528,7 +533,12 @@ func lockResolutionForStatus(state lockTxnStatus, lock txnLock, key []byte) (pb. case txnStatusCommitted: return pb.Phase_COMMIT, state.commitTS, nil case txnStatusRolledBack: - return pb.Phase_ABORT, cleanupTS(lock.StartTS), nil + abortTS := abortTSFrom(lock.StartTS, lock.StartTS) + if abortTS <= lock.StartTS { + // Overflow: can't choose an abort timestamp strictly greater than startTS. + return pb.Phase_NONE, 0, errors.Wrapf(ErrTxnLocked, "key: %s (timestamp overflow)", string(key)) + } + return pb.Phase_ABORT, abortTS, nil default: return pb.Phase_NONE, 0, errors.Wrapf(ErrTxnInvalidMeta, "unknown txn status for key %s", string(key)) } @@ -835,7 +845,12 @@ func (s *ShardStore) tryAbortExpiredPrimary(primaryKey []byte, startTS uint64) ( if !ok || pg == nil || pg.Txn == nil { return false, nil } - if err := applyTxnResolution(pg, pb.Phase_ABORT, startTS, cleanupTS(startTS), primaryKey, [][]byte{primaryKey}); err != nil { + abortTS := abortTSFrom(startTS, startTS) + if abortTS <= startTS { + // Overflow: can't choose an abort timestamp strictly greater than startTS. + return false, nil + } + if err := applyTxnResolution(pg, pb.Phase_ABORT, startTS, abortTS, primaryKey, [][]byte{primaryKey}); err != nil { return false, err } return true, nil @@ -859,14 +874,7 @@ func applyTxnResolution(g *ShardGroup, phase pb.Phase, startTS, commitTS uint64, return errors.WithStack(err) } -func cleanupTS(startTS uint64) uint64 { - now := hlcWallNow() - next := startTS + 1 - if now > next { - return now - } - return next -} + // ApplyMutations applies a batch of mutations to the correct shard store. // From 9b2ef8bd32d99561d976b3a134c8d68885cfd711 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 21:27:20 +0000 Subject: [PATCH 3/7] Use commitTS parameter in abortTSFrom calls where available Pass commitTS from primaryTxnStatus to abortTSFrom for semantic clarity and consistency with usage elsewhere in the codebase. Co-authored-by: bootjp <1306365+bootjp@users.noreply.github.com> --- kv/shard_store.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/kv/shard_store.go b/kv/shard_store.go index 39e45abc..88690d83 100644 --- a/kv/shard_store.go +++ b/kv/shard_store.go @@ -365,7 +365,7 @@ func (s *ShardStore) resolveTxnLockForKey(ctx context.Context, g *ShardGroup, ke case txnStatusCommitted: return applyTxnResolution(g, pb.Phase_COMMIT, lock.StartTS, commitTS, lock.PrimaryKey, [][]byte{key}) case txnStatusRolledBack: - abortTS := abortTSFrom(lock.StartTS, lock.StartTS) + abortTS := abortTSFrom(lock.StartTS, commitTS) if abortTS <= lock.StartTS { // Overflow: can't choose an abort timestamp strictly greater than startTS. return errors.Wrapf(ErrTxnLocked, "key: %s (timestamp overflow)", string(key)) @@ -533,7 +533,7 @@ func lockResolutionForStatus(state lockTxnStatus, lock txnLock, key []byte) (pb. case txnStatusCommitted: return pb.Phase_COMMIT, state.commitTS, nil case txnStatusRolledBack: - abortTS := abortTSFrom(lock.StartTS, lock.StartTS) + abortTS := abortTSFrom(lock.StartTS, state.commitTS) if abortTS <= lock.StartTS { // Overflow: can't choose an abort timestamp strictly greater than startTS. return pb.Phase_NONE, 0, errors.Wrapf(ErrTxnLocked, "key: %s (timestamp overflow)", string(key)) From 65ede5cb70054d3fa824758b8a6a4e604ec917ca Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 21:28:31 +0000 Subject: [PATCH 4/7] Add comment clarifying tryAbortExpiredPrimary timestamp handling Explain why startTS is passed twice to abortTSFrom: no commitTS is available when aborting an expired lock with no commit record. Co-authored-by: bootjp <1306365+bootjp@users.noreply.github.com> --- kv/shard_store.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kv/shard_store.go b/kv/shard_store.go index 88690d83..229268e7 100644 --- a/kv/shard_store.go +++ b/kv/shard_store.go @@ -845,6 +845,8 @@ func (s *ShardStore) tryAbortExpiredPrimary(primaryKey []byte, startTS uint64) ( if !ok || pg == nil || pg.Txn == nil { return false, nil } + // No commitTS available here; we're aborting an expired lock with no commit record. + // abortTSFrom will use startTS+1 if representable. abortTS := abortTSFrom(startTS, startTS) if abortTS <= startTS { // Overflow: can't choose an abort timestamp strictly greater than startTS. From 5099174a89162094a3ae6332a06a96f88b93201a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 21:29:26 +0000 Subject: [PATCH 5/7] Pass 0 for commitTS in tryAbortExpiredPrimary for clarity Use 0 instead of startTS as the second parameter to abortTSFrom to explicitly indicate that no commitTS is available in this context. Co-authored-by: bootjp <1306365+bootjp@users.noreply.github.com> --- kv/shard_store.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kv/shard_store.go b/kv/shard_store.go index 229268e7..a68beb19 100644 --- a/kv/shard_store.go +++ b/kv/shard_store.go @@ -846,8 +846,9 @@ func (s *ShardStore) tryAbortExpiredPrimary(primaryKey []byte, startTS uint64) ( return false, nil } // No commitTS available here; we're aborting an expired lock with no commit record. - // abortTSFrom will use startTS+1 if representable. - abortTS := abortTSFrom(startTS, startTS) + // Pass 0 for commitTS to explicitly indicate it's not available; abortTSFrom will + // use startTS+1 if representable. + abortTS := abortTSFrom(startTS, 0) if abortTS <= startTS { // Overflow: can't choose an abort timestamp strictly greater than startTS. return false, nil From 3ff3e3b0a29ec3b495a966b023c70f753c5e405b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 21:37:56 +0000 Subject: [PATCH 6/7] Fix gci linter error - remove extra blank lines Remove extra blank lines left after deleting cleanupTS function to fix golangci-lint gci formatting check. Co-authored-by: bootjp <1306365+bootjp@users.noreply.github.com> --- kv/shard_store.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/kv/shard_store.go b/kv/shard_store.go index a68beb19..f67a953c 100644 --- a/kv/shard_store.go +++ b/kv/shard_store.go @@ -877,8 +877,6 @@ func applyTxnResolution(g *ShardGroup, phase pb.Phase, startTS, commitTS uint64, return errors.WithStack(err) } - - // ApplyMutations applies a batch of mutations to the correct shard store. // // All mutations must belong to the same shard. Cross-shard mutation batches are From a3477b605c84e7774e5c681513b3bb5f46eca23a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 16 Feb 2026 21:41:32 +0000 Subject: [PATCH 7/7] Add defensive programming comments for overflow checks Clarify that while uint64 overflow is not expected in normal operation, the overflow checks are defensive programming to handle the edge case where startTS==^uint64(0) or a bug causes overflow, preventing violation of the FSM invariant resolveTS > startTS. Co-authored-by: bootjp <1306365+bootjp@users.noreply.github.com> --- kv/shard_store.go | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/kv/shard_store.go b/kv/shard_store.go index f67a953c..2b53ee92 100644 --- a/kv/shard_store.go +++ b/kv/shard_store.go @@ -367,7 +367,9 @@ func (s *ShardStore) resolveTxnLockForKey(ctx context.Context, g *ShardGroup, ke case txnStatusRolledBack: abortTS := abortTSFrom(lock.StartTS, commitTS) if abortTS <= lock.StartTS { - // Overflow: can't choose an abort timestamp strictly greater than startTS. + // Defensive check: While uint64 overflow is not expected in normal operation, + // this handles the edge case where startTS==^uint64(0) or a bug causes overflow. + // Prevents violating the FSM invariant resolveTS > startTS (fsm.go:258). return errors.Wrapf(ErrTxnLocked, "key: %s (timestamp overflow)", string(key)) } return applyTxnResolution(g, pb.Phase_ABORT, lock.StartTS, abortTS, lock.PrimaryKey, [][]byte{key}) @@ -535,7 +537,9 @@ func lockResolutionForStatus(state lockTxnStatus, lock txnLock, key []byte) (pb. case txnStatusRolledBack: abortTS := abortTSFrom(lock.StartTS, state.commitTS) if abortTS <= lock.StartTS { - // Overflow: can't choose an abort timestamp strictly greater than startTS. + // Defensive check: While uint64 overflow is not expected in normal operation, + // this handles the edge case where startTS==^uint64(0) or a bug causes overflow. + // Prevents violating the FSM invariant resolveTS > startTS (fsm.go:258). return pb.Phase_NONE, 0, errors.Wrapf(ErrTxnLocked, "key: %s (timestamp overflow)", string(key)) } return pb.Phase_ABORT, abortTS, nil @@ -850,7 +854,9 @@ func (s *ShardStore) tryAbortExpiredPrimary(primaryKey []byte, startTS uint64) ( // use startTS+1 if representable. abortTS := abortTSFrom(startTS, 0) if abortTS <= startTS { - // Overflow: can't choose an abort timestamp strictly greater than startTS. + // Defensive check: While uint64 overflow is not expected in normal operation, + // this handles the edge case where startTS==^uint64(0) or a bug causes overflow. + // Prevents violating the FSM invariant resolveTS > startTS (fsm.go:258). return false, nil } if err := applyTxnResolution(pg, pb.Phase_ABORT, startTS, abortTS, primaryKey, [][]byte{primaryKey}); err != nil {