From b26ace69f5d4fd50954037ca92022814d00f2be8 Mon Sep 17 00:00:00 2001 From: Zaid-edge Date: Thu, 2 Jul 2026 14:03:12 +0500 Subject: [PATCH] fix(mtree): warn on reaching max_diff_rows, matching table-diff (ACE-198) mtree table-diff stayed silent when the collected diffs equalled max_diff_rows: it only flagged truncation when a further row was refused, and it had no final-summary warning. table-diff flags at >= the cap and prints "additional differences may exist", so the two paths disagreed. - recordPairRowLocked now flags truncation once a pair reaches the cap, mirroring table-diff's incrementPairDiffRowsLocked (>= semantics). - Emit a final "mtree table-diff stopped after reaching max_diff_rows=N; additional differences may exist" warning before writing the report, the analog of table_diff.go's completion warning. - Update the boundary unit test to expect DiffRowLimitReached at the exact cap. --- internal/consistency/mtree/merkle.go | 24 ++++++++++++++--------- internal/consistency/mtree/merkle_test.go | 11 ++++++----- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/internal/consistency/mtree/merkle.go b/internal/consistency/mtree/merkle.go index f0333b1..f7775b0 100644 --- a/internal/consistency/mtree/merkle.go +++ b/internal/consistency/mtree/merkle.go @@ -605,10 +605,10 @@ func (m *MerkleTreeTask) pairCapReached(pairKey string) bool { } // recordPairRowLocked accounts for one collected differing row-pair. The caller -// must hold diffMutex. It does not flag truncation: reaching MaxDiffRows on an -// accepted row does not mean anything was dropped (the pair may have exactly -// MaxDiffRows diffs). Truncation is flagged only when a further row is actually -// refused, via notePairLimitReachedLocked at the stop checks. +// must hold diffMutex. Reaching MaxDiffRows flags the report truncated, matching +// table-diff: the cap is a report-size bound, so once a pair holds max_diff_rows +// rows we treat any further divergence as possible-but-unenumerated ("additional +// differences may exist"), even when this pair happens to hold exactly the cap. func (m *MerkleTreeTask) recordPairRowLocked(pairKey string) { if m.MaxDiffRows <= 0 { return @@ -617,11 +617,14 @@ func (m *MerkleTreeTask) recordPairRowLocked(pairKey string) { m.diffRowCounts = make(map[string]int64) } m.diffRowCounts[pairKey]++ + if m.diffRowCounts[pairKey] >= m.MaxDiffRows { + m.notePairLimitReachedLocked(pairKey) + } } // notePairLimitReachedLocked marks the report truncated and warns once. Call it -// only when a differing row is actually being dropped because the pair is -// already at MaxDiffRows. The caller must hold diffMutex. +// when the pair has reached MaxDiffRows, whether on the row that fills the cap +// or on a later row being dropped. The caller must hold diffMutex. func (m *MerkleTreeTask) notePairLimitReachedLocked(pairKey string) { m.DiffResult.Summary.DiffRowLimitReached = true if !m.diffLimitWarned { @@ -663,9 +666,9 @@ func (m *MerkleTreeTask) appendDiffs(nodePairKey string, work CompareRangesWorkI m.DiffResult.NodeDiffs[nodePairKey].Rows[node2Name] = []types.OrderedMap{} } - // Each loop stops (and flags truncation) at its top check the moment it is - // asked to record a row while the pair is already full, so hitting the cap - // on the last accepted row does not spuriously mark the report truncated. + // Each loop stops at its top check the moment it is asked to record a row + // while the pair is already full; recordPairRowLocked flags truncation as + // soon as a pair reaches the cap, matching table-diff. var currentDiffRowsForPair int for _, row := range diffResult.Node1OnlyRows { @@ -2360,6 +2363,9 @@ func (m *MerkleTreeTask) DiffMtree() (err error) { m.DiffResult.Summary.EndTime = endTime.Format(time.RFC3339) m.DiffResult.Summary.TimeTaken = endTime.Sub(m.StartTime).String() m.DiffResult.Summary.PrimaryKey = m.Key + if m.DiffResult.Summary.DiffRowLimitReached { + logger.Warn("mtree table-diff stopped after reaching max_diff_rows=%d; additional differences may exist", m.MaxDiffRows) + } if diffPath, _, writeErr := utils.WriteDiffReport(m.DiffResult, m.Schema, m.Table, m.Output); writeErr != nil { return writeErr } else if diffPath != "" { diff --git a/internal/consistency/mtree/merkle_test.go b/internal/consistency/mtree/merkle_test.go index deace82..9154764 100644 --- a/internal/consistency/mtree/merkle_test.go +++ b/internal/consistency/mtree/merkle_test.go @@ -54,9 +54,10 @@ func TestMtreeDiffEnforcesMaxDiffRows(t *testing.T) { } } -// A pair with exactly max_diff_rows diffs collects them all and is NOT marked -// truncated: reaching the cap on the last row does not mean anything was dropped. -func TestMtreeDiffExactCapNotTruncated(t *testing.T) { +// A pair with exactly max_diff_rows diffs collects them all and IS marked +// truncated, matching table-diff: reaching the cap is a report-size bound, so we +// warn that additional differences may exist even at the exact boundary. +func TestMtreeDiffExactCapMarksTruncated(t *testing.T) { const cap = 5 m := mtreeTaskWithDiffState(cap) work := CompareRangesWorkItem{ @@ -71,8 +72,8 @@ func TestMtreeDiffExactCapNotTruncated(t *testing.T) { if got := len(m.DiffResult.NodeDiffs["n1/n2"].Rows["n1"]); got != cap { t.Errorf("collected %d rows for n1, want all %d", got, cap) } - if m.DiffResult.Summary.DiffRowLimitReached { - t.Errorf("did not expect DiffRowLimitReached when diffs exactly equal the cap") + if !m.DiffResult.Summary.DiffRowLimitReached { + t.Errorf("expected DiffRowLimitReached=true when diffs exactly equal the cap") } }