diff --git a/packages/layout-engine/layout-bridge/src/remeasure.ts b/packages/layout-engine/layout-bridge/src/remeasure.ts index 0e9eeec135..1356979fae 100644 --- a/packages/layout-engine/layout-bridge/src/remeasure.ts +++ b/packages/layout-engine/layout-bridge/src/remeasure.ts @@ -789,14 +789,49 @@ const applyTabLayoutToLines = ( const alignmentTabStopsPx = tabStops .map((stop, index) => ({ stop, index })) .filter(({ stop }) => stop.val === 'end' || stop.val === 'center' || stop.val === 'decimal'); + + // Per-line-segment tab counts. Segments are delimited by explicit because + // pPr/tabs apply per line, not per paragraph. sd-1480: "Page\t2
Page\t5" must + // bind the trailing tab of EACH segment to the alignment stop, not just the + // paragraph-final tab. + const tabSegmentInfo = new Map(); + { + let segmentTabRunIndices: number[] = []; + const closeSegment = () => { + const total = segmentTabRunIndices.length; + segmentTabRunIndices.forEach((runIdx, ord) => { + tabSegmentInfo.set(runIdx, { localOrdinal: ord, segmentTotal: total }); + }); + segmentTabRunIndices = []; + }; + for (let i = 0; i < runs.length; i++) { + const r = runs[i]; + if (r.kind === 'lineBreak' || (r.kind === 'break' && (r as { breakType?: string }).breakType === 'line')) { + closeSegment(); + } else if (r.kind === 'tab') { + segmentTabRunIndices.push(i); + } + } + closeSegment(); + } + // Word-compat heuristic (not ECMA-376 17.3.3.32): the last N tab characters in a - // paragraph bind to the last N explicit end/center/decimal stops. Needed for TOC + // line bind to the last N explicit end/center/decimal stops. Needed for TOC // entries where a right-aligned dot-leader stop coexists with default grid stops. // Mirrored in measuring/dom/src/index.ts. - const getAlignmentStopForOrdinal = (ordinal: number): { stop: TabStopPx; index: number } | null => { + const getAlignmentStopForOrdinal = (ordinal: number, runIdx?: number): { stop: TabStopPx; index: number } | null => { if (alignmentTabStopsPx.length === 0 || totalTabRuns === 0 || !Number.isFinite(ordinal)) return null; - if (ordinal < 0 || ordinal >= totalTabRuns) return null; - const remainingTabs = totalTabRuns - ordinal - 1; + let scopeOrdinal = ordinal; + let scopeTotal = totalTabRuns; + if (runIdx !== undefined) { + const info = tabSegmentInfo.get(runIdx); + if (info) { + scopeOrdinal = info.localOrdinal; + scopeTotal = info.segmentTotal; + } + } + if (scopeOrdinal < 0 || scopeOrdinal >= scopeTotal) return null; + const remainingTabs = scopeTotal - scopeOrdinal - 1; const targetIndex = alignmentTabStopsPx.length - 1 - remainingTabs; if (targetIndex < 0 || targetIndex >= alignmentTabStopsPx.length) return null; return alignmentTabStopsPx[targetIndex]; @@ -828,22 +863,34 @@ const applyTabLayoutToLines = ( /** * Processes a tab character, calculating position and handling alignment. */ - const applyTab = (startRunIndex: number, startChar: number, run?: Run, tabOrdinal?: number): void => { + const applyTab = ( + startRunIndex: number, + startChar: number, + run?: Run, + tabOrdinal?: number, + tabRunIdx?: number, + ): void => { const originX = cursorX; const absCurrentX = cursorX + effectiveIndent; let stop: TabStopPx | undefined; let target: number; + // Mirror of measuring/dom: only force the SD-2447 heuristic when greedy + // would land on a `source:default` stop (synthetic 0.5" grid). Explicit + // start stops should win greedy. + const greedy = getNextTabStopPx(absCurrentX, tabStops, tabStopCursor); + const greedyOnDefault = greedy.stop?.source === 'default'; const forcedAlignment = - typeof tabOrdinal === 'number' && Number.isFinite(tabOrdinal) ? getAlignmentStopForOrdinal(tabOrdinal) : null; + greedyOnDefault && typeof tabOrdinal === 'number' && Number.isFinite(tabOrdinal) + ? getAlignmentStopForOrdinal(tabOrdinal, tabRunIdx) + : null; if (forcedAlignment && forcedAlignment.stop.pos > absCurrentX + TAB_EPSILON) { stop = forcedAlignment.stop; target = forcedAlignment.stop.pos; tabStopCursor = forcedAlignment.index + 1; } else { - const next = getNextTabStopPx(absCurrentX, tabStops, tabStopCursor); - stop = next.stop; - target = next.target; - tabStopCursor = next.nextIndex; + stop = greedy.stop; + target = greedy.target; + tabStopCursor = greedy.nextIndex; } const clampedTarget = Number.isFinite(maxAbsWidth) ? Math.min(target, maxAbsWidth) : target; const relativeTarget = clampedTarget - effectiveIndent; @@ -901,7 +948,7 @@ const applyTabLayoutToLines = ( if (run.kind === 'tab') { const tabRun = run as TabRun; const ordinal = consumeTabOrdinal(tabRun.tabIndex); - applyTab(runIndex + 1, 0, run, ordinal); + applyTab(runIndex + 1, 0, run, ordinal, runIndex); continue; } diff --git a/packages/layout-engine/layout-engine/src/column-balancing.test.ts b/packages/layout-engine/layout-engine/src/column-balancing.test.ts index 7aa7431135..9b308ce7d1 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.test.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.test.ts @@ -9,7 +9,6 @@ import { calculateBalancedColumnHeight, shouldBalanceColumns, shouldSkipBalancing, - balancePageColumns, DEFAULT_BALANCING_CONFIG, type BalancingContext, type BalancingBlock, @@ -307,21 +306,10 @@ describe('DEFAULT_BALANCING_CONFIG', () => { }); // ============================================================================ -// balancePageColumns Tests (Post-Layout Balancing) +// balanceSectionOnPage Tests (Section-scoped balancing) // ============================================================================ -/** - * Helper to create a mock fragment for balancePageColumns testing. - */ -function createFragment( - blockId: string, - x: number, - y: number, - width: number, - kind: string = 'para', -): { x: number; y: number; width: number; kind: string; blockId: string } { - return { x, y, width, kind, blockId }; -} +import { balanceSectionOnPage } from './column-balancing.js'; /** * Helper to create measure data for paragraph fragments. @@ -333,228 +321,179 @@ function createMeasure(kind: string, lineHeights: number[]): { kind: string; lin }; } -describe('balancePageColumns', () => { - describe('basic balancing', () => { - it('should distribute fragments across 2 columns based on target height', () => { - // 4 fragments, each 20px tall = 80px total, target = 40px per column - // With >= condition: switch when adding would reach/exceed 40px - // Block 1 (20px): column 0, height=20 - // Block 2 (20px): 20+20=40 >= 40, switch! column 1, height=20 - // Block 3, 4: stay in column 1 - // Result: 1 in column 0, 3 in column 1 - const fragments = [ - createFragment('block-1', 96, 96, 624), - createFragment('block-2', 96, 116, 624), - createFragment('block-3', 96, 136, 624), - createFragment('block-4', 96, 156, 624), - ]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ['block-3', createMeasure('paragraph', [20])], - ['block-4', createMeasure('paragraph', [20])], - ]); - - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 40, measureMap); - - // Block 1 stays in column 0 - expect(fragments[0].x).toBe(96); - // Blocks 2, 3, 4 move to column 1 - expect(fragments[1].x).toBe(432); - expect(fragments[2].x).toBe(432); - expect(fragments[3].x).toBe(432); +describe('balanceSectionOnPage', () => { + type TestFragment = { blockId: string; x: number; y: number; width: number; kind: string }; + + /** Build a fragment + section mapping for section-scoped tests. */ + function buildSectionFixture( + sectionIndex: number, + count: number, + height = 20, + startY = 96, + ): { + fragments: TestFragment[]; + measureMap: Map }>; + blockSectionMap: Map; + } { + const fragments: TestFragment[] = []; + const measureMap = new Map }>(); + const blockSectionMap = new Map(); + for (let i = 0; i < count; i++) { + const id = `s${sectionIndex}-b${i}`; + fragments.push({ blockId: id, x: 96, y: startY + i * height, width: 624, kind: 'para' }); + measureMap.set(id, createMeasure('paragraph', [height])); + blockSectionMap.set(id, sectionIndex); + } + return { fragments, measureMap, blockSectionMap }; + } + + it('balances the target section and returns the tallest balanced column bottom', () => { + // 6 equal paragraphs in a 2-col section → 3+3 balanced, tallest col ends at top + 3×20 = top + 60. + const top = 96; + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 6, 20, top); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: top, + columnWidth: 288, + availableHeight: 60, + measureMap, }); - it('should set fragment width to column width', () => { - const fragments = [createFragment('block-1', 96, 96, 624), createFragment('block-2', 96, 116, 624)]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ]); + // Returned maxY is the bottom of the tallest balanced column. + expect(result).not.toBeNull(); + expect(result!.maxY).toBe(top + 60); - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 30, measureMap); + // Observable outcome: fragments split evenly across two columns. + const col0 = fragments.filter((f) => f.x === 96).length; + const col1 = fragments.filter((f) => f.x === 96 + 288 + 48).length; + expect(col0).toBe(3); + expect(col1).toBe(3); + }); - // Both fragments should have width set to column width - expect(fragments[0].width).toBe(288); - expect(fragments[1].width).toBe(288); + it('returns null and leaves fragments untouched when section has <= 1 column', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 3); + const snapshot = fragments.map((f) => ({ x: f.x, y: f.y })); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 1, gap: 0, width: 624 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 624, + availableHeight: 720, + measureMap, }); - it('should reset Y positions in each column to start from top margin', () => { - // Use 6 fragments so we get a more even split for Y testing - // 6 * 20px = 120px total, target = 60px - // Blocks 1, 2 = 40px, block 3 would make 60px >= 60px, switch! - // Column 0: blocks 1, 2. Column 1: blocks 3, 4, 5, 6 - const fragments = [ - createFragment('block-1', 96, 96, 624), - createFragment('block-2', 96, 116, 624), - createFragment('block-3', 96, 136, 624), - createFragment('block-4', 96, 156, 624), - createFragment('block-5', 96, 176, 624), - createFragment('block-6', 96, 196, 624), - ]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ['block-3', createMeasure('paragraph', [20])], - ['block-4', createMeasure('paragraph', [20])], - ['block-5', createMeasure('paragraph', [20])], - ['block-6', createMeasure('paragraph', [20])], - ]); - - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 50, measureMap); - - // Column 0: blocks 1, 2 - Y positions stack from top - expect(fragments[0].y).toBe(96); - expect(fragments[1].y).toBe(116); - // Column 1: blocks 3+ - Y resets to top margin - expect(fragments[2].y).toBe(96); - expect(fragments[3].y).toBe(116); + expect(result).toBeNull(); + fragments.forEach((f, i) => { + expect(f.x).toBe(snapshot[i].x); + expect(f.y).toBe(snapshot[i].y); }); }); - describe('column switching threshold', () => { - it('should switch columns when target height is REACHED (not just exceeded)', () => { - // This tests the >= vs > fix: Word switches when target is reached, not exceeded. - // 6 fragments: 3 at 20px each = 60px, we want exactly 30px per column - // With >= condition: switch happens when 30px is reached - // With > condition: would need 30+ to switch - const fragments = [ - createFragment('block-1', 96, 96, 624), - createFragment('block-2', 96, 116, 624), - createFragment('block-3', 96, 136, 624), - createFragment('block-4', 96, 156, 624), - createFragment('block-5', 96, 176, 624), - createFragment('block-6', 96, 196, 624), - ]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ['block-3', createMeasure('paragraph', [20])], - ['block-4', createMeasure('paragraph', [20])], - ['block-5', createMeasure('paragraph', [20])], - ['block-6', createMeasure('paragraph', [20])], - ]); - - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 50, measureMap); - - // With >= condition: target = 120px / 2 = 60px per column - // Block 1 (20px): column 0, height=20 - // Block 2 (20px): 20+20=40 < 60, stay in column 0 - // Block 3 (20px): 40+20=60 >= 60, SWITCH to column 1 - // Blocks 4,5,6 stay in column 1 - expect(fragments[0].x).toBe(96); - expect(fragments[1].x).toBe(96); - expect(fragments[2].x).toBe(432); // Switched because 40+20=60 >= 60 - expect(fragments[3].x).toBe(432); - expect(fragments[4].x).toBe(432); - expect(fragments[5].x).toBe(432); + it('returns null when section contains an explicit column break', () => { + // Author-placed column breaks override balancing — preserve their intent. + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 6); + const snapshot = fragments.map((f) => f.x); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: true, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, }); - it('should match Word behavior with uneven height distribution', () => { - // Simulates the sd-1480 test document scenario: - // Block 1: 21px (1 line) - // Block 2: 42px (2 lines) - // Block 3: 21px (1 line) - // Block 4: 21px (1 line) - // Block 5: 42px (2 lines) - // Block 6: 21px (1 line) - // Total: 168px, target: 84px - // With >= : blocks 1,2 (63px) + block 3 (21px) = 84px >= 84px → switch - // Word puts blocks 1,2 in column 0, blocks 3,4,5,6 in column 1 - const fragments = [ - createFragment('block-1', 96, 96, 624), - createFragment('block-2', 96, 117, 624), - createFragment('block-3', 96, 159, 624), - createFragment('block-4', 96, 180, 624), - createFragment('block-5', 96, 201, 624), - createFragment('block-6', 96, 243, 624), - ]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [21])], - ['block-2', createMeasure('paragraph', [21, 21])], // 2 lines - ['block-3', createMeasure('paragraph', [21])], - ['block-4', createMeasure('paragraph', [21])], - ['block-5', createMeasure('paragraph', [21, 21])], // 2 lines - ['block-6', createMeasure('paragraph', [21])], - ]); - - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 70, measureMap); - - // Blocks 1, 2 should be in column 0 - expect(fragments[0].x).toBe(96); - expect(fragments[1].x).toBe(96); - // Blocks 3, 4, 5, 6 should be in column 1 - expect(fragments[2].x).toBe(432); - expect(fragments[3].x).toBe(432); - expect(fragments[4].x).toBe(432); - expect(fragments[5].x).toBe(432); - }); + expect(result).toBeNull(); + fragments.forEach((f, i) => expect(f.x).toBe(snapshot[i])); }); - describe('edge cases', () => { - it('should skip balancing for single column layout', () => { - const fragments = [createFragment('block-1', 96, 96, 624), createFragment('block-2', 96, 116, 624)]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ]); - - // Original positions - const origX1 = fragments[0].x; - const origX2 = fragments[1].x; + it('returns null when section has unequal explicit column widths', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 4); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288, equalWidth: false, widths: [200, 376] }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); - balancePageColumns(fragments, { count: 1, gap: 0, width: 624 }, { left: 96 }, 96, 1000, measureMap); + expect(result).toBeNull(); + }); - // Should not modify positions for single column - expect(fragments[0].x).toBe(origX1); - expect(fragments[1].x).toBe(origX2); + it('only moves fragments of the target section when the page has mixed sections', () => { + // Page has 3 fragments in section 1 (already positioned in col 0) and 6 in section 2. + // Balancing section 2 must not touch section 1 fragments. + const sec1 = buildSectionFixture(1, 3, 20, 96); + const sec2 = buildSectionFixture(2, 6, 20, 160); + const fragments = [...sec1.fragments, ...sec2.fragments]; + const measureMap = new Map([...sec1.measureMap, ...sec2.measureMap]); + const blockSectionMap = new Map([...sec1.blockSectionMap, ...sec2.blockSectionMap]); + const sec1Snapshot = sec1.fragments.map((f) => ({ id: f.blockId, x: f.x, y: f.y })); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 160, + columnWidth: 288, + availableHeight: 60, + measureMap, }); - it('should skip balancing for empty fragments array', () => { - const fragments: { x: number; y: number; width: number; kind: string; blockId: string }[] = []; - const measureMap = new Map }>(); + expect(result).not.toBeNull(); - // Should not throw - expect(() => - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 1000, measureMap), - ).not.toThrow(); - }); + // Section 1 fragments unchanged. + for (const s of sec1Snapshot) { + const f = fragments.find((x) => x.blockId === s.id)!; + expect(f.x).toBe(s.x); + expect(f.y).toBe(s.y); + } - it('should handle fragments with missing measure data', () => { - const fragments = [createFragment('block-1', 96, 96, 624), createFragment('block-2', 96, 116, 624)]; - // Only provide measure for first block - const measureMap = new Map([['block-1', createMeasure('paragraph', [20])]]); + // Section 2 fragments now split across two columns. + const sec2Xs = new Set(sec2.fragments.map((f) => f.x)); + expect(sec2Xs.size).toBe(2); + }); - // Should not throw - block-2 will have height 0 - expect(() => - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 10, measureMap), - ).not.toThrow(); + it('returns null when no fragments on the page belong to the target section', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(1, 3); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 99, // different section + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, }); - it('should handle 3-column layout', () => { - // 6 fragments for 3 columns = 2 per column - const fragments = [ - createFragment('block-1', 96, 96, 624), - createFragment('block-2', 96, 116, 624), - createFragment('block-3', 96, 136, 624), - createFragment('block-4', 96, 156, 624), - createFragment('block-5', 96, 176, 624), - createFragment('block-6', 96, 196, 624), - ]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ['block-3', createMeasure('paragraph', [20])], - ['block-4', createMeasure('paragraph', [20])], - ['block-5', createMeasure('paragraph', [20])], - ['block-6', createMeasure('paragraph', [20])], - ]); - - balancePageColumns(fragments, { count: 3, gap: 24, width: 192 }, { left: 96 }, 96, 30, measureMap); - - // Verify 3 columns are used - const colXValues = new Set(fragments.map((f) => f.x)); - expect(colXValues.size).toBeLessThanOrEqual(3); - }); + expect(result).toBeNull(); }); }); diff --git a/packages/layout-engine/layout-engine/src/column-balancing.ts b/packages/layout-engine/layout-engine/src/column-balancing.ts index cb14ae8a2c..a367d3c823 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.ts @@ -156,64 +156,58 @@ export function calculateBalancedColumnHeight( }; } - // Calculate total content height + // Calculate total content height and block-height extremes const totalHeight = ctx.contentBlocks.reduce((sum, b) => sum + b.measuredHeight, 0); + const maxBlockHeight = ctx.contentBlocks.reduce((m, b) => Math.max(m, b.measuredHeight), 0); // Early exit: content is very small, no need to balance if (totalHeight < config.minColumnHeight * ctx.columnCount) { return createSingleColumnResult(ctx); } - // Initial target: evenly divide content - let targetHeight = Math.ceil(totalHeight / ctx.columnCount); - - // Ensure target meets minimum column height - targetHeight = Math.max(targetHeight, config.minColumnHeight); - - // Don't exceed available height - targetHeight = Math.min(targetHeight, ctx.availableHeight); + // Binary-search for the minimum column height H such that a greedy + // left-to-right fill places every block with every column ≤ H. This matches + // Word's observed behavior: left columns are filled as tightly as possible + // against the minimum viable height, leaving the last column shorter when + // content doesn't divide evenly (e.g. 7 blocks across 3 columns → 3+3+1, + // not 2+2+3). Both splits have the same max column height, but Word prefers + // left-heavy packing for visual rhythm. + let lo = Math.max(maxBlockHeight, config.minColumnHeight); + let hi = Math.min(totalHeight, ctx.availableHeight); + if (lo > hi) lo = hi; let bestResult: SimulationResult | null = null; - let bestScore = Infinity; - - for (let i = 0; i < config.maxIterations; i++) { - const simulation = simulateBalancedLayout(ctx, targetHeight, config); - - // Calculate balance score (lower is better) - const score = calculateBalanceScore(simulation.columnHeights, config.tolerance); - - if (score < bestScore) { - bestScore = score; - bestResult = simulation; - } - - // Check if we've achieved acceptable balance - if (isBalanced(simulation.columnHeights, config.tolerance)) { - return { - targetColumnHeight: targetHeight, - columnAssignments: simulation.assignments, - success: true, - iterations: i + 1, - blockBreakPoints: simulation.breakPoints.size > 0 ? simulation.breakPoints : undefined, - }; + let bestH = hi; + let iterations = 0; + + while (lo <= hi) { + iterations++; + const mid = Math.floor((lo + hi) / 2); + const sim = simulateBalancedLayout(ctx, mid, config); + const maxCol = Math.max(...sim.columnHeights); + const placed = sim.assignments.size === ctx.contentBlocks.length; + if (placed && maxCol <= mid) { + bestResult = sim; + bestH = mid; + hi = mid - 1; + } else { + lo = mid + 1; } - - // Adjust target based on simulation results - targetHeight = adjustTargetHeight(simulation, targetHeight, ctx, config); + if (iterations >= config.maxIterations) break; } - // Use best result found if (bestResult) { return { - targetColumnHeight: targetHeight, + targetColumnHeight: bestH, columnAssignments: bestResult.assignments, - success: false, // Didn't converge within iterations - iterations: config.maxIterations, + success: true, + iterations, blockBreakPoints: bestResult.breakPoints.size > 0 ? bestResult.breakPoints : undefined, }; } - // Fallback: simple sequential layout + // Fallback: simple sequential layout if binary search never found a valid H + // (e.g. availableHeight too small to fit content). return createSequentialResult(ctx); } @@ -351,73 +345,6 @@ function calculateParagraphBreakPoint( return { breakAfterLine: lines.length - 1, canBreak: true }; } -/** - * Check if column heights are balanced within tolerance. - */ -function isBalanced(columnHeights: number[], tolerance: number): boolean { - if (columnHeights.length <= 1) return true; - - const nonEmptyHeights = columnHeights.filter((h) => h > 0); - if (nonEmptyHeights.length <= 1) return true; - - const maxHeight = Math.max(...nonEmptyHeights); - const minHeight = Math.min(...nonEmptyHeights); - - return maxHeight - minHeight <= tolerance; -} - -/** - * Calculate a balance score (lower is better). - * Used to track best result across iterations. - */ -function calculateBalanceScore(columnHeights: number[], tolerance: number): number { - if (columnHeights.length <= 1) return 0; - - const nonEmptyHeights = columnHeights.filter((h) => h > 0); - if (nonEmptyHeights.length <= 1) return 0; - - // Score based on variance from mean - const mean = nonEmptyHeights.reduce((a, b) => a + b, 0) / nonEmptyHeights.length; - const variance = nonEmptyHeights.reduce((sum, h) => sum + Math.pow(h - mean, 2), 0); - - // Penalize empty columns - const emptyPenalty = (columnHeights.length - nonEmptyHeights.length) * tolerance * 10; - - return variance + emptyPenalty; -} - -/** - * Adjust target height based on simulation results. - */ -function adjustTargetHeight( - simulation: SimulationResult, - currentTarget: number, - ctx: BalancingContext, - config: ColumnBalancingConfig, -): number { - const heights = simulation.columnHeights; - const maxHeight = Math.max(...heights); - const minHeight = Math.min(...heights.filter((h) => h > 0)); - - // If last column is significantly taller, increase target - if (heights[heights.length - 1] > maxHeight * 0.9 && heights[heights.length - 1] > currentTarget) { - return Math.min(currentTarget + (maxHeight - currentTarget) / 2, ctx.availableHeight); - } - - // If first columns are too tall and last is too short, decrease target - if (heights[0] > currentTarget && heights[heights.length - 1] < currentTarget * 0.5) { - return Math.max(currentTarget - (currentTarget - minHeight) / 2, config.minColumnHeight); - } - - // Binary search style adjustment - const diff = maxHeight - minHeight; - if (maxHeight > currentTarget) { - return Math.min(currentTarget + diff / 4, ctx.availableHeight); - } else { - return Math.max(currentTarget - diff / 4, config.minColumnHeight); - } -} - // ============================================================================ // Helper Functions // ============================================================================ @@ -562,7 +489,7 @@ export function shouldSkipBalancing( * Fragment with required properties for column balancing. * Represents a positioned content block that can be redistributed across columns. */ -interface BalancingFragment { +export interface BalancingFragment { /** Horizontal position in pixels from left edge of page */ x: number; /** Vertical position in pixels from top edge of page */ @@ -585,13 +512,15 @@ interface BalancingFragment { * Measure data used to calculate fragment heights. * Contains layout measurements from the measuring phase. */ -interface MeasureData { +export interface MeasureData { /** Type of measure: 'paragraph', 'image', etc. */ kind: string; /** Line measurements for paragraph content */ lines?: Array<{ lineHeight: number }>; - /** Total height for non-paragraph content */ + /** Total height for non-paragraph content (image, drawing) */ height?: number; + /** Total height for table content (TableMeasure stores this rather than `height`). */ + totalHeight?: number; } /** @@ -632,14 +561,27 @@ function getFragmentHeight(fragment: BalancingFragment, measureMap: Map 0) { return fragment.height; } const measure = measureMap.get(fragment.blockId); - if (measure && typeof measure.height === 'number') { - return measure.height; + if (measure) { + if (typeof measure.height === 'number' && measure.height > 0) { + return measure.height; + } + if (fragment.kind === 'table' && typeof measure.totalHeight === 'number') { + return measure.totalHeight; + } + } + if (typeof fragment.height === 'number') { + return fragment.height; } } @@ -647,145 +589,457 @@ function getFragmentHeight(fragment: BalancingFragment, measureMap: Map= comparison to match Word's behavior). + * Return the fragment height that the column balancer should use. * - * @param fragments - Page fragments to balance (mutated in place) - * @param columns - Column configuration with count, gap between columns, and column width - * @param margins - Page margins (left margin determines column 0 start position) - * @param topMargin - Top margin where content starts vertically - * @param measureMap - Map of block IDs to measure data for height calculation + * Differs from `getFragmentHeight` for one case: an empty sectPr-marker + * paragraph. In OOXML a paragraph that exists solely to carry `` + * is invisible to Word's renderer, so it must take no vertical space in the + * balanced layout (ECMA-376 §17.18.77). The pm-adapter stamps these + * paragraphs with `attrs.sectPrMarker === true` (paragraph.ts), and the + * caller threads the resulting block-id set through here. * - * @example - * ```typescript - * balancePageColumns( - * page.fragments, - * { count: 2, gap: 48, width: 288 }, - * { left: 96 }, - * 96, - * measureMap - * ); - * // Fragments are now redistributed: first half at x=96, second half at x=432 - * ``` + * Earlier versions of this function tried to detect markers from line + * geometry (`line.width === 0`), but a regular blank paragraph also + * measures with width 0 and DOES occupy line height — collapsing those to + * 0 caused the next paragraph to overlap the blank line. The metadata-based + * gate is the only safe signal. */ -export function balancePageColumns( - fragments: BalancingFragment[], - columns: { count: number; gap: number; width: number }, - margins: { left: number }, - topMargin: number, - availableHeight: number, +function getBalancingHeight( + fragment: BalancingFragment, measureMap: Map, -): void { - // Skip balancing for single-column layouts or empty pages - if (columns.count <= 1 || fragments.length === 0) { - return; + sectPrMarkerBlockIds?: Set, +): number { + if (fragment.kind === 'para' && sectPrMarkerBlockIds && sectPrMarkerBlockIds.has(fragment.blockId)) { + return 0; } + return getFragmentHeight(fragment, measureMap); +} + +// ============================================================================ +// Section-scoped balancing +// ============================================================================ + +/** + * Column layout properties relevant to balancing decisions. + * Mirrors the subset of ColumnLayout that this module reads. + */ +export interface SectionColumnLayout { + count: number; + gap: number; + width?: number; + widths?: number[]; + equalWidth?: boolean; +} +export interface BalanceSectionOnPageArgs { + /** All fragments on the target page. Only those belonging to sectionIndex are balanced (mutated in place). */ + fragments: BalancingFragment[]; + /** Section whose content ends on this page. */ + sectionIndex: number; + /** Column layout of the ending section. */ + sectionColumns: SectionColumnLayout; + /** True if the section contains an explicit — skip balancing to preserve author intent. */ + sectionHasExplicitColumnBreak: boolean; + /** blockId -> sectionIndex map (built once per layout, shared across calls). */ + blockSectionMap: Map; + /** Left page margin, used to compute column X positions. */ + margins: { left: number }; + /** Y position where the section's region begins on this page. */ + topMargin: number; + /** Column width — passed to balancePageColumns so it can resize fragments. */ + columnWidth: number; + /** Available height from topMargin to content bottom. */ + availableHeight: number; + /** Measurement data for fragments (built from measures array). */ + measureMap: Map; /** - * Calculates the X position for a given column index. - * Column 0 starts at the left margin, subsequent columns offset by (width + gap). + * Block IDs of paragraphs that exist only to carry `` properties. + * These contribute zero height to balanced columns — see `getBalancingHeight`. + * Optional; when omitted no fragment is treated as a marker. */ - const columnX = (columnIndex: number): number => { - return margins.left + columnIndex * (columns.width + columns.gap); - }; + sectPrMarkerBlockIds?: Set; +} - // Group fragments by Y position into logical rows. - // Fragments at the same Y coordinate are part of the same row and move together. - const rowMap = new Map(); - fragments.forEach((fragment, idx) => { - // Round Y to handle floating point precision - const y = Math.round(fragment.y); - if (!rowMap.has(y)) { - rowMap.set(y, []); - } - const height = getFragmentHeight(fragment, measureMap); - rowMap.get(y)!.push({ - fragment, - height, - originalIndex: idx, - }); - }); +/** + * Balance the fragments of one section on one page. + * + * Returns the tallest balanced column's bottom Y, or null if balancing was skipped. + * Callers can use the returned Y to update paginator cursors so subsequent content + * starts just below the balanced section rather than below an unbalanced maxCursorY. + * + * Guards (skip balancing when): + * - Section has <= 1 column (nothing to balance) + * - Section contains an explicit column break (author intent wins) + * - Section uses unequal column widths (Word doesn't rebalance these) + * - No fragments on this page belong to the section + */ +export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: number } | null { + const { sectionColumns, sectionHasExplicitColumnBreak, sectionIndex, blockSectionMap, fragments } = args; - // Sort rows by Y position (top to bottom) - const sortedRows = [...rowMap.entries()].sort((a, b) => a[0] - b[0]); - - // Calculate total content height by summing max height of each row - let totalHeight = 0; - const contentBlocks: BalancingBlock[] = []; - for (const [, rowFragments] of sortedRows) { - const maxHeight = Math.max(...rowFragments.map((f) => f.height)); - totalHeight += maxHeight; - contentBlocks.push({ - blockId: rowFragments[0]?.fragment.blockId ?? `row-${contentBlocks.length}`, - measuredHeight: maxHeight, - canBreak: false, - keepWithNext: false, - keepTogether: true, - }); + if (sectionColumns.count <= 1) return null; + if (sectionHasExplicitColumnBreak) return null; + if (sectionColumns.equalWidth === false && Array.isArray(sectionColumns.widths) && sectionColumns.widths.length > 0) { + return null; } + // Filter to fragments of the target section on this page. + const sectionFragments = fragments.filter((f) => blockSectionMap.get(f.blockId) === sectionIndex); + if (sectionFragments.length === 0) return null; + + const columnCount = sectionColumns.count; + const columnGap = sectionColumns.gap; + const columnWidth = sectionColumns.width ?? 0; + if (columnWidth <= 0) return null; + + // Use the minimum Y of the section's fragments as the balancing origin — the + // section may start mid-page (e.g. section 0 is single-column and section 1 + // continues below it). Using topMargin unconditionally would stack balanced + // columns on top of earlier single-column content on the same page. + let sectionTopY = Number.POSITIVE_INFINITY; + for (const f of sectionFragments) { + if (f.y < sectionTopY) sectionTopY = f.y; + } + if (!Number.isFinite(sectionTopY)) sectionTopY = args.topMargin; + + // Remaining height from the section's actual top to the page content bottom. + const remainingHeight = args.availableHeight - (sectionTopY - args.topMargin); + if (remainingHeight <= 0) return null; + + // Pre-split a dominant table fragment at a row boundary before balancing. + // + // When a section's final page contains a single splittable table that's + // taller than (totalSectionHeight / columnCount), the atomic-block balancer + // can only place the whole table in one column — leaving the other column + // empty. Word's behavior per ECMA-376 §17.18.77 is to balance the + // REMAINING content, which for a narrow table means splitting at a row + // boundary so both columns carry roughly half the rows. + // + // We split ONCE per balance call (we only need two halves for a 2-col + // section; extending to N > 2 would iterate). SD-2646: IT-945 page 2 has a + // 515px / 28-row table; splitting into ~257px halves lets the balancer + // assign half to each column. + // + // The split takes the cumulative height of fragments preceding the table + // (in document order) so the per-column target accounts for content + // already destined for column 0. Without this, a 100px paragraph + 300px + // table in 2 cols hits target=200, splits the table at row=200 → 100+200 + // / 100; subtracting the leading 100 gives target=150 → 100+100 / 200. + // + // The split returns a rollback closure. We invoke it if the post-split + // shouldSkipBalancing check still rejects, so the page never carries a + // mutated half table when balancing was ultimately skipped. + let precedingHeightBeforeTable = 0; + for (const f of sectionFragments) { + if (f.kind === 'table') break; + precedingHeightBeforeTable += getBalancingHeight(f, args.measureMap, args.sectPrMarkerBlockIds); + } + const splitResult = splitDominantTableAtRowBoundary({ + sectionFragments, + fragments, + columnCount, + measureMap: args.measureMap, + sectPrMarkerBlockIds: args.sectPrMarkerBlockIds, + precedingHeight: precedingHeightBeforeTable, + }); + + // Order fragments in document order: by current column (x → left-to-right), + // then by y within each column. During unbalanced layout the paginator fills + // column 0 top-to-bottom, then column 1, etc. — so (x, y) preserves the + // original sequence. + const ordered = [...sectionFragments].sort((a, b) => { + if (a.x !== b.x) return a.x - b.x; + return a.y - b.y; + }); + + // Treat each fragment as its own block for binary-search balancing. Grouping + // by y (as balancePageColumns does) would collapse fragments from different + // source columns that happen to share a y coordinate into a single row and + // re-stack them at one position — producing overlap. + // + // Use `getBalancingHeight` so empty sectPr-marker paragraphs contribute 0 + // to their column's cursor — matching Word's behavior of not rendering a + // blank line for such markers. + const contentBlocks: BalancingBlock[] = ordered.map((f, i) => ({ + blockId: `${f.blockId}#${i}`, + measuredHeight: getBalancingHeight(f, args.measureMap, args.sectPrMarkerBlockIds), + canBreak: false, + keepWithNext: false, + keepTogether: true, + })); + if ( shouldSkipBalancing({ - columnCount: columns.count, - columnWidth: columns.width, - columnGap: columns.gap, - availableHeight, + columnCount, + columnWidth, + columnGap, + availableHeight: remainingHeight, contentBlocks, }) ) { - return; + splitResult?.rollback(); + return null; } - // Calculate target height per column for balanced distribution - const targetHeight = totalHeight / columns.count; - - // Skip balancing if target height is below minimum threshold - if (targetHeight < DEFAULT_BALANCING_CONFIG.minColumnHeight) { - return; + const result = calculateBalancedColumnHeight( + { columnCount, columnWidth, columnGap, availableHeight: remainingHeight, contentBlocks }, + DEFAULT_BALANCING_CONFIG, + ); + + const columnX = (columnIndex: number): number => args.margins.left + columnIndex * (columnWidth + columnGap); + + const colCursors = new Array(columnCount).fill(sectionTopY); + let maxY = sectionTopY; + for (let i = 0; i < ordered.length; i++) { + const f = ordered[i]; + const block = contentBlocks[i]; + const col = result.columnAssignments.get(block.blockId) ?? 0; + f.x = columnX(col); + f.y = colCursors[col]; + f.width = columnWidth; + colCursors[col] += block.measuredHeight; + if (colCursors[col] > maxY) maxY = colCursors[col]; } + return { maxY }; +} - // Distribute rows across columns using greedy algorithm. - // Each row is assigned to the current column until adding it would - // reach or exceed the target height, then we advance to the next column. - let currentColumn = 0; - let currentColumnHeight = 0; - let currentY = topMargin; +/** + * Table measure shape used by the row-split preprocessor. + * + * Only the row-heights array is required. We access it through the runtime + * `measureMap` stored as `MeasureData`, which narrows the interface for the + * public balancer API — but the stored value is the full `TableMeasure` + * containing `rows: [{ height }]`, so a cast is safe. + */ +interface TableMeasureLike { + kind: string; + rows?: Array<{ height: number }>; +} - for (const [, rowFragments] of sortedRows) { - const rowHeight = Math.max(...rowFragments.map((f) => f.height)); +/** + * Row-boundary record matching the contract `TableRowBoundary` shape. + * + * The DOM renderer serializes these into the compact `{i,y,h,min,r}` keys + * for `data-table-boundaries`; storing them in the contract shape here + * keeps the layout-engine/contract boundary intact and prevents `undefined` + * row-boundary values from reaching the renderer when a table is split. + */ +interface RowBoundaryLike { + index: number; + y: number; + height: number; + minHeight: number; + resizable: boolean; +} - // Advance to next column when current column reaches target height. - // Uses >= to match Word's behavior: switch when target is reached, not just exceeded. - // This ensures balanced distribution where the first column doesn't exceed its share. - if ( - currentColumnHeight > 0 && - currentColumnHeight + rowHeight >= targetHeight && - currentColumn < columns.count - 1 - ) { - currentColumn++; - currentColumnHeight = 0; - currentY = topMargin; +/** + * In-place split of a dominant table fragment at a row boundary. + * + * Problem this solves: the column balancer treats each fragment as an atomic + * block. A multi-page two-column continuous section's final page can end up + * with a single table fragment that exceeds half the section's height. The + * balancer then places the whole table in one column and leaves the other + * empty — diverging from Word, which balances by splitting the table at a + * row boundary (ECMA-376 §17.18.77: continuous breaks balance the previous + * section's content). + * + * This preprocessor runs once per `balanceSectionOnPage` call. It detects a + * single dominant table fragment and splits it at the row whose cumulative + * height first meets or exceeds totalSectionHeight / columnCount. The two + * halves are inserted into both `sectionFragments` and the page's + * `fragments` array in place of the original; the rest of the balancer then + * runs on N + 1 similarly-sized blocks and naturally assigns one to each + * column. + * + * Guards: + * - Only one splittable table fragment on the section's page (skip if 0 or >1). + * - Table must span at least 2 rows (can't split a 1-row fragment). + * - Total height must exceed target (= total / columnCount) by more than a + * small epsilon; otherwise the atomic balancer already fits. + * + * Splitting is NOT appropriate when the author placed explicit column breaks + * or used unequal columns — those cases are already filtered by the caller. + * + * @param args Section fragments (already filtered to this section), the + * full page fragments (mutated in place), column count, and the + * measure map. + */ +/** + * Result of attempting a dominant-table split. When `applied` is true the + * caller is responsible for invoking `rollback()` if downstream balancing + * decides to skip — otherwise the page is left with overlapping table halves + * (the original table mutated to a partial range plus the inserted second + * half). Returns `null` when the split preconditions (single splittable + * table, rowSpan ≥ 2, oversized vs target) aren't met, in which case nothing + * was mutated and there is nothing to roll back. + */ +type DominantTableSplitResult = { + applied: true; + rollback: () => void; +} | null; + +function splitDominantTableAtRowBoundary(args: { + sectionFragments: BalancingFragment[]; + fragments: BalancingFragment[]; + columnCount: number; + measureMap: Map; + sectPrMarkerBlockIds?: Set; + /** + * Cumulative height of fragments already placed in earlier columns, BEFORE + * the dominant table. Subtracted from the per-column target so the split + * row produces halves that pack alongside preceding atomic blocks (e.g. + * a 100px paragraph + 300px table in 2 cols should split the table at + * row=100 → 100+100 vs 100+200, not row=200 → 100+200 vs 100). Defaults + * to 0 when caller doesn't track this. + */ + precedingHeight?: number; +}): DominantTableSplitResult { + const { sectionFragments, fragments, columnCount, measureMap, sectPrMarkerBlockIds } = args; + const precedingHeight = Math.max(0, args.precedingHeight ?? 0); + if (columnCount <= 1) return null; + + const tables = sectionFragments.filter((f) => f.kind === 'table'); + if (tables.length !== 1) return null; + const table = tables[0] as BalancingFragment & { + fromRow?: number; + toRow?: number; + height?: number; + continuesFromPrev?: boolean; + continuesOnNext?: boolean; + metadata?: { rowBoundaries?: RowBoundaryLike[]; columnBoundaries?: unknown; coordinateSystem?: string }; + }; + const fromRow = table.fromRow ?? 0; + const toRow = table.toRow ?? fromRow; + const rowSpan = toRow - fromRow; + if (rowSpan < 2) return null; + + const measure = measureMap.get(table.blockId) as TableMeasureLike | undefined; + if (!measure || measure.kind !== 'table' || !Array.isArray(measure.rows)) return null; + + const totalSectionHeight = sectionFragments.reduce( + (sum, f) => sum + getBalancingHeight(f, measureMap, sectPrMarkerBlockIds), + 0, + ); + if (totalSectionHeight <= 0) return null; + // Per-column target. Subtract the height already placed in earlier columns + // before this table so the split row produces halves that pack alongside + // those preceding atomic blocks. Without this adjustment, e.g. a 100px + // paragraph + 300px table in 2 cols hits target=200, splits the table + // at row=200 → cols 100+200 vs 100, max=300; subtracting precedingHeight + // gives target=150 → splits at row=100 → cols 100+100 vs 200, max=200. + // Floor at 1 so the pathological "preceding height already exceeds + // target" case still yields a forward-progressing split row search. + const target = Math.max(1, totalSectionHeight / columnCount - precedingHeight); + + const tableHeight = getBalancingHeight(table, measureMap, sectPrMarkerBlockIds); + // Small-epsilon guard: if the table alone fits within the target, the + // atomic balancer already produces a correct assignment — splitting would + // only introduce visual churn. + if (tableHeight <= target + 1) return null; + + // Find the row K in [fromRow + 1, toRow) such that cumulative height from + // fromRow to K first reaches the target. Guaranteed to succeed because + // tableHeight > target and rowSpan ≥ 2. + let running = 0; + let splitRow = fromRow + 1; + for (let r = fromRow; r < toRow - 1; r++) { + const h = measure.rows[r]?.height ?? 0; + if (running + h >= target) { + splitRow = r + 1; + break; } - - // Position all fragments in this row within the current column - const colX = columnX(currentColumn); - for (const info of rowFragments) { - info.fragment.x = colX; - info.fragment.y = currentY; - info.fragment.width = columns.width; + running += h; + splitRow = r + 2; // continue; r+2 so if we exit the loop we split before the last row + } + // Clamp to valid range (defensive — loop logic above should always hit the break). + if (splitRow <= fromRow || splitRow >= toRow) return null; + + const firstHalfRows = measure.rows.slice(fromRow, splitRow); + const secondHalfRows = measure.rows.slice(splitRow, toRow); + const firstHalfHeight = firstHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + const secondHalfHeight = secondHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + + // Regenerate rowBoundaries so the renderer draws horizontal dividers at the + // right y-offsets inside each half. rowBoundaries are 0-origin within the + // fragment; we walk the measure's rows for each half and accumulate. + // Use the contract `TableRowBoundary` shape ({index, y, height, minHeight, + // resizable}). The DOM renderer compresses these into {i,y,h,min,r} for the + // serialized data-table-boundaries attribute; emitting the compact shape + // here would produce undefined values after the renderer's projection and + // break interactive row resize handles for split fragments. + const makeRowBoundaries = (rows: Array<{ height: number }>, startIndex: number): RowBoundaryLike[] => { + const out: RowBoundaryLike[] = []; + let y = 0.5; + for (let i = 0; i < rows.length; i++) { + const h = rows[i].height ?? 0; + out.push({ index: startIndex + i, y, height: h, minHeight: h, resizable: true }); + y += h; } + return out; + }; - currentColumnHeight += rowHeight; - currentY += rowHeight; - } + const originalMetadata = table.metadata; + const firstMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(firstHalfRows, 0), + } + : undefined; + const secondMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(secondHalfRows, 0), + } + : undefined; + + // Capture original mutable fields BEFORE applying the split. Required for: + // (a) Rollback: if the caller decides to skip balancing post-split, we + // restore the table fragment to its pre-split state. + // (b) Correctly inheriting `continuesOnNext` on the second half. Reading + // `table.continuesOnNext` AFTER setting `table.continuesOnNext = true` + // always yielded `true`, so the prior `? false : (… ?? false)` + // ternary collapsed to `false` and the second half could never inherit + // the original cross-page continuation. Capturing first preserves the + // original intent: if the source table continued onto a later page, + // the SECOND half is the one that now carries that continuation. + const originalToRow = table.toRow; + const originalHeight = table.height; + const originalContinuesOnNext = table.continuesOnNext ?? false; + + // Construct the first half by mutating the original (preserves object + // identity so `fragments.indexOf(table)` still works below). + table.toRow = splitRow; + table.height = firstHalfHeight; + table.continuesOnNext = true; + if (firstMetadata) table.metadata = firstMetadata; + + const secondHalf: BalancingFragment = { + ...table, + fromRow: splitRow, + toRow, + height: secondHalfHeight, + continuesFromPrev: true, + continuesOnNext: originalContinuesOnNext, + metadata: secondMetadata ?? table.metadata, + } as BalancingFragment; + + // Insert the second half right after the first in both arrays so the + // balancer's (x, y) ordering keeps them adjacent in document order. + const fragIdx = fragments.indexOf(table); + if (fragIdx >= 0) fragments.splice(fragIdx + 1, 0, secondHalf); + const sectIdx = sectionFragments.indexOf(table); + if (sectIdx >= 0) sectionFragments.splice(sectIdx + 1, 0, secondHalf); + + return { + applied: true, + rollback: () => { + table.toRow = originalToRow; + table.height = originalHeight; + table.continuesOnNext = originalContinuesOnNext; + table.metadata = originalMetadata; + const fIdx = fragments.indexOf(secondHalf); + if (fIdx >= 0) fragments.splice(fIdx, 1); + const sIdx = sectionFragments.indexOf(secondHalf); + if (sIdx >= 0) sectionFragments.splice(sIdx, 1); + }, + }; } diff --git a/packages/layout-engine/layout-engine/src/index.test.ts b/packages/layout-engine/layout-engine/src/index.test.ts index 6d4dcb7228..3d4c736265 100644 --- a/packages/layout-engine/layout-engine/src/index.test.ts +++ b/packages/layout-engine/layout-engine/src/index.test.ts @@ -2868,6 +2868,280 @@ describe('layoutDocument', () => { expect(singleColFragment?.width).not.toBeCloseTo(twoColFragment!.width, 0); }); }); + + describe('column balancing at section boundaries (SD-2452)', () => { + /** + * End-to-end tests for the column-balancing feature. + * + * These tests drive layoutDocument with synthetic blocks/measures and assert on + * the OBSERVABLE fragment positions produced by the full pipeline — not on internal + * helper calls. When Word's algorithm changes or when we swap out the balancing + * implementation, these tests continue to assert what users see. + */ + + const PAGE: LayoutOptions = { + pageSize: { w: 612, h: 792 }, + margins: { top: 72, right: 72, bottom: 72, left: 72 }, + }; + const LEFT_MARGIN = 72; + const CONTENT_WIDTH = 612 - 72 - 72; // 468 + const COLUMN_GAP = 48; + const TWO_COL_WIDTH = (CONTENT_WIDTH - COLUMN_GAP) / 2; // 210 + const TWO_COL_RIGHT_X = LEFT_MARGIN + TWO_COL_WIDTH + COLUMN_GAP; // 330 + + /** Build a 2-col section ending with a section break, surrounded by single-column context. */ + function buildTwoColumnSection(paragraphCount: number, lineHeight = 20) { + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + ]; + const measures: Measure[] = [{ kind: 'sectionBreak' }]; + + for (let i = 0; i < paragraphCount; i++) { + blocks.push({ kind: 'paragraph', id: `p${i}`, runs: [], attrs: { sectionIndex: 0 } } as FlowBlock); + measures.push(makeMeasure([lineHeight])); + } + + blocks.push({ + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock); + measures.push({ kind: 'sectionBreak' }); + + blocks.push({ kind: 'paragraph', id: 'p-after', runs: [], attrs: { sectionIndex: 1 } } as FlowBlock); + measures.push(makeMeasure([lineHeight])); + + return { blocks, measures }; + } + + it('distributes 6 equal paragraphs evenly across 2 columns (3+3)', () => { + const { blocks, measures } = buildTwoColumnSection(6, 20); + + const layout = layoutDocument(blocks, measures, PAGE); + + const sectionFragments = layout.pages[0].fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + const col0 = sectionFragments.filter((f) => f.x === LEFT_MARGIN); + const col1 = sectionFragments.filter((f) => f.x === TWO_COL_RIGHT_X); + + expect(col0.length + col1.length).toBe(6); + // Minimum-height balance of 6 equal 20px paragraphs across 2 columns is 3+3 + // (tallest column = 60px). Any split closer to 1+5 or 2+4 produces a taller + // section than Word would render. This assertion fails if balancing is absent, + // uses an incorrect algorithm, or runs with a wrong available height. + expect(col0).toHaveLength(3); + expect(col1).toHaveLength(3); + }); + + it('places post-section single-column content just below the balanced columns', () => { + // Before the fix: "p-after" sat below all 6 paragraphs stacked in col 0 (y ~= top + 120). + // After the fix: columns balance to 3+3 (height = top + 60), so p-after starts at top + 60. + const { blocks, measures } = buildTwoColumnSection(6, 20); + + const layout = layoutDocument(blocks, measures, PAGE); + const firstSectionPara = layout.pages[0].fragments.find( + (f): f is ParaFragment => f.kind === 'para' && f.blockId === 'p0', + ); + const afterSectionPara = layout.pages[0].fragments.find( + (f): f is ParaFragment => f.kind === 'para' && f.blockId === 'p-after', + ); + expect(firstSectionPara).toBeDefined(); + expect(afterSectionPara).toBeDefined(); + + // p-after's Y reflects a balanced 3-row column (3 × 20px) above it. + const expectedBalancedBottom = firstSectionPara!.y + 3 * 20; + expect(afterSectionPara!.y).toBe(expectedBalancedBottom); + }); + + it('fills BOTH columns on every page of a multi-page 2-col continuous section', () => { + // ECMA-376 §17.18.77 (ST_SectionMark): a continuous section break + // "balances content of the previous section." Word's observable behavior + // is to fill col 0 to the balanced target, wrap to col 1 to the same + // target, then wrap to the next page — on EVERY page, not only the last. + // Regression for SD-2646: earlier pages must not stack content in col 0. + const lineHeight = 20; + const paraCount = 100; // ≈ 2000px, exceeds one page's content area + const { blocks, measures } = buildTwoColumnSection(paraCount, lineHeight); + + const layout = layoutDocument(blocks, measures, PAGE); + + expect(layout.pages.length).toBeGreaterThanOrEqual(2); + + for (const page of layout.pages) { + const sectionFragments = page.fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + if (sectionFragments.length < 2) continue; // tail of last page may have <2 fragments + const col0 = sectionFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = sectionFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + expect(col0.length).toBeGreaterThan(0); + expect(col1.length).toBeGreaterThan(0); + } + }); + + it('flows a narrow multi-page table across both columns on every page (SD-2646 regression)', () => { + // IT-945 shape: a narrow table (one column wide) inside a 2-col continuous + // section, spanning multiple pages. Regression guard for the layout path + // once pm-adapter correctly places the table in the 2-col section. + const rowCount = 114; + const rowHeight = 18.4; + const cellWidth = TWO_COL_WIDTH / 2; + + const rows = Array.from({ length: rowCount }, (_, r) => ({ + id: `tbl-row-${r}`, + cells: [ + { id: `tbl-cell-${r}-0`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-0-p`, runs: [] } }, + { id: `tbl-cell-${r}-1`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-1-p`, runs: [] } }, + ], + })); + const tbl: TableBlock = { + kind: 'table', + id: 'tbl', + rows, + attrs: { sectionIndex: 0 }, + } as TableBlock; + const tblM: TableMeasure = { + kind: 'table', + rows: Array.from({ length: rowCount }, () => ({ + height: rowHeight, + cells: [ + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + ], + })), + columnWidths: [cellWidth, cellWidth], + totalWidth: cellWidth * 2, + totalHeight: rowHeight * rowCount, + }; + + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + tbl as FlowBlock, + { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock, + ]; + const measures: Measure[] = [{ kind: 'sectionBreak' }, tblM, { kind: 'sectionBreak' }]; + + const layout = layoutDocument(blocks, measures, PAGE); + + expect(layout.pages.length).toBeGreaterThanOrEqual(2); + + let pagesWithBothColumns = 0; + for (const page of layout.pages) { + const tableFragments = page.fragments.filter((f) => f.kind === 'table'); + if (tableFragments.length === 0) continue; + const col0 = tableFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = tableFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + if (col0.length > 0 && col1.length > 0) pagesWithBothColumns++; + } + // At least one page must have fragments in both columns. A stricter + // assertion (every page) is made invalid by the tail of the final page + // which can reasonably hold <1 column's worth of content. + expect(pagesWithBothColumns).toBeGreaterThan(0); + }); + + it('distributes 6 paragraphs across 3 columns (no column is empty)', () => { + const threeColStart: FlowBlock = { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 3, gap: 24 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock; + const sectionEnd: FlowBlock = { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock; + + const blocks: FlowBlock[] = [threeColStart]; + const measures: Measure[] = [{ kind: 'sectionBreak' }]; + for (let i = 0; i < 6; i++) { + blocks.push({ kind: 'paragraph', id: `p${i}`, runs: [], attrs: { sectionIndex: 0 } } as FlowBlock); + measures.push(makeMeasure([20])); + } + blocks.push(sectionEnd); + measures.push({ kind: 'sectionBreak' }); + + const layout = layoutDocument(blocks, measures, PAGE); + + const fragments = layout.pages[0].fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p'), + ); + const uniqueX = new Set(fragments.map((f) => Math.round(f.x))); + // All three columns should be used — no column is empty. + expect(uniqueX.size).toBe(3); + }); + + it('leaves fragments untouched when the section has an explicit column break', () => { + // Author-placed must override balancing. + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + { kind: 'paragraph', id: 'p1', runs: [], attrs: { sectionIndex: 0 } } as FlowBlock, + { kind: 'columnBreak', id: 'br', attrs: { sectionIndex: 0 } } as ColumnBreakBlock, + { kind: 'paragraph', id: 'p2', runs: [], attrs: { sectionIndex: 0 } } as FlowBlock, + { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock, + ]; + const measures: Measure[] = [ + { kind: 'sectionBreak' }, + makeMeasure([20]), + { kind: 'columnBreak' }, + makeMeasure([20]), + { kind: 'sectionBreak' }, + ]; + + const layout = layoutDocument(blocks, measures, PAGE); + + const p1 = layout.pages[0].fragments.find((f) => f.blockId === 'p1') as ParaFragment; + const p2 = layout.pages[0].fragments.find((f) => f.blockId === 'p2') as ParaFragment; + + // Author's column break is preserved: p1 in col 0, p2 in col 1. + expect(p1.x).toBe(LEFT_MARGIN); + expect(p2.x).toBe(TWO_COL_RIGHT_X); + }); + }); }); describe('layoutHeaderFooter', () => { diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 89d87972e1..dc6cd88af1 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -52,7 +52,7 @@ import { normalizeFragmentsForRegion } from './normalize-header-footer-fragments import { createPaginator, type PageState, type ConstraintBoundary } from './paginator.js'; import { formatPageNumber } from './pageNumbering.js'; import { shouldSuppressSpacingForEmpty, shouldSuppressOwnSpacing } from './layout-utils.js'; -import { balancePageColumns } from './column-balancing.js'; +import { balanceSectionOnPage, type BalancingFragment, type MeasureData } from './column-balancing.js'; import { cloneColumnLayout, widthsEqual } from './column-utils.js'; type PageSize = { w: number; h: number }; @@ -1562,6 +1562,110 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options ); }; + // Build shared maps for column balancing. These are consumed both mid-layout + // (at continuous section-break boundaries) and post-layout (per-section final + // page), so we construct them once here rather than rebuilding in each pass. + const balancingMeasureMap = new Map(); + const blockSectionMap = new Map(); + const sectionColumnsMap = new Map(); + const sectionHasExplicitColumnBreak = new Set(); + // sectionIndex -> type of the section break that ENDS this section (per + // pm-adapter end-tagged semantics, ECMA-376 §17.6.17: a paragraph's sectPr + // describes the section ENDING at that paragraph, so SectionBreakBlock.type + // here is the type of the break that closes the section). Per ECMA-376 + // §17.18.77 only `continuous` breaks trigger column balancing — `nextPage`, + // `evenPage`, `oddPage` do not. Tracked here so the post-layout pass can + // skip the wrong section types. + const sectionEndBreakType = new Map(); + // sectionIndex -> whether `` was EXPLICIT in the source sectPr. + // Body sectPrs default to `continuous` when w:type is omitted; Word does + // NOT balance those single-page docs (sd-1655). Body sectPrs with explicit + // `` DO balance (sd-1480), even single-page. + // The flag carries the distinction across pm-adapter -> layout-engine. + const sectionTypeIsExplicit = new Map(); + // sectionIndex of the LAST section in the document. The body sectPr is + // always the final section break and represents the end of the document, + // not an actual mid-document break. Even when its type defaults to + // `continuous` (DEFAULT_BODY_SECTION_TYPE), there is no break AFTER the + // last section's content to trigger balancing. Excluding the last section + // matches Word: a 3-column doc with only a body sectPr (e.g. + // `sd-1655-col-sep-3-equal-columns`) is NOT balanced — content fills + // top-to-bottom by column. Without this guard the previous post-layout + // pass over-balanced single-section docs and split heading/body across + // columns when Word kept them together. + let lastSectionIdx: number | null = null; + // Block IDs of empty paragraphs that exist only to carry sectPr properties. + // These are invisible in Word's output and must contribute zero height to + // balanced columns (ECMA-376 §17.18.77). Threading explicit metadata avoids + // the older `line.width === 0` heuristic, which incorrectly collapsed normal + // blank paragraphs and caused overlap on the next paragraph. + const sectPrMarkerBlockIds = new Set(); + // True if any block in the document is a column break. Used as a guard for + // the document-wide balancing fallback (Nick comment 2): when callers use + // LayoutOptions.columns without section metadata, we still want Word's + // balanced-final-page behavior unless the author placed an explicit column + // break, in which case we preserve their intent. + let documentHasExplicitColumnBreak = false; + // True if any block in the document is a sectionBreak. The document-wide + // fallback only fires when there are NO sectionBreak blocks — otherwise the + // section-scoped path is the source of truth (even if pm-adapter or a + // synthetic caller didn't stamp `attrs.sectionIndex`, treating it as a + // single fallback section would clobber regions that the mid-page handler + // already balanced). + let documentHasAnySectionBreak = false; + // Tracks sections already balanced mid-page — the post-layout pass skips these + // to avoid double-balancing, which would overlap fragments at the same x/y. + const alreadyBalancedSections = new Set(); + // Walk blocks in document order. sectionBreak blocks carry attrs.sectionIndex and + // are emitted BEFORE the first paragraph of their section (see pm-adapter). Every + // subsequent content block belongs to that section until the next sectionBreak, + // so we track currentSectionIdx and stamp it on each block. This is required because + // pm-adapter only sets attrs.sectionIndex on sectionBreak blocks, not paragraphs. + let currentSectionIdx: number | null = null; + blocks.forEach((block, idx) => { + const measure = measures[idx]; + if (measure) { + balancingMeasureMap.set(block.id, measure as MeasureData); + } + const blockWithAttrs = block as { attrs?: { sectionIndex?: number; typeIsExplicit?: boolean } }; + const attrSectionIdx = blockWithAttrs.attrs?.sectionIndex; + if (block.kind === 'sectionBreak') { + documentHasAnySectionBreak = true; + if (typeof attrSectionIdx === 'number') { + currentSectionIdx = attrSectionIdx; + lastSectionIdx = attrSectionIdx; + if (block.columns) { + sectionColumnsMap.set(attrSectionIdx, cloneColumnLayout(block.columns)); + } + if (typeof block.type === 'string') { + sectionEndBreakType.set(attrSectionIdx, block.type); + } + if (typeof blockWithAttrs.attrs?.typeIsExplicit === 'boolean') { + sectionTypeIsExplicit.set(attrSectionIdx, blockWithAttrs.attrs.typeIsExplicit); + } + } + } + if (currentSectionIdx !== null) { + blockSectionMap.set(block.id, currentSectionIdx); + if (block.kind === 'columnBreak') { + sectionHasExplicitColumnBreak.add(currentSectionIdx); + documentHasExplicitColumnBreak = true; + } + } else if (block.kind === 'columnBreak') { + documentHasExplicitColumnBreak = true; + } + // Block paragraphs that exist only to carry sectPr metadata (pm-adapter + // sets this attr on otherwise-empty section-property paragraphs). These + // are invisible in Word's renderer and must not contribute height when + // balancing columns. + if ( + block.kind === 'paragraph' && + (blockWithAttrs as { attrs?: { sectPrMarker?: boolean } }).attrs?.sectPrMarker === true + ) { + sectPrMarkerBlockIds.add(block.id); + } + }); + // Collect anchored drawings mapped to their anchor paragraphs const anchoredByParagraph = collectAnchoredDrawings(blocks, measures); // PASS 1C: collect anchored/floating tables mapped to their anchor paragraphs. @@ -1900,9 +2004,81 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options const columnIndexBefore = state.columnIndex; const newColumns = updatedState.pendingColumns; - // If reducing column count and currently in a column that won't exist - // in the new layout, start a fresh page to avoid overwriting earlier columns - if (columnIndexBefore >= newColumns.count) { + // Identify the ending section from the current page's fragments. + // `activeSectionIndex` only updates at page boundaries, so for continuous + // mid-page section breaks it's stale. Walk back through page fragments + // to find the most recent section index that isn't the new one — that's + // the section that's ending. + let endingSectionIndex: number | null = null; + for (let i = state.page.fragments.length - 1; i >= 0; i--) { + const mapped = blockSectionMap.get(state.page.fragments[i].blockId); + if (typeof mapped === 'number' && mapped !== metadataIndex) { + endingSectionIndex = mapped; + break; + } + } + const endingSectionColumns = + endingSectionIndex !== null ? sectionColumnsMap.get(endingSectionIndex) : undefined; + const willBalance = + endingSectionIndex !== null && + !!endingSectionColumns && + endingSectionColumns.count > 1 && + !sectionHasExplicitColumnBreak.has(endingSectionIndex); + + // Balance BEFORE any forced page break. After balancing, all of the + // ending section's fragments are repositioned within the section's own + // vertical region — there's no risk of the new 1-col region overwriting + // prior column content, because the cursor moves to maxY below them. + // + // `willBalance` is a coarse approval: balanceSectionOnPage has its own + // late skip conditions (unequal column widths, zero remaining height, + // section content too small for shouldSkipBalancing's thresholds) that + // can return null even when willBalance was true. The page-break + // fallback below must consider the actual balance outcome, not just + // willBalance, otherwise we leave the new region starting on the same + // page from a stale column index and overwriting the previous + // section's column content. + let balanceResult: { maxY: number } | null = null; + if (willBalance) { + // The current region starts at the last constraint boundary's Y, or at + // the page's top margin if no mid-page region change has happened yet. + const lastBoundary = state.constraintBoundaries[state.constraintBoundaries.length - 1]; + const activeRegionTop = lastBoundary?.y ?? activeTopMargin; + const availableHeight = activePageSize.h - activeBottomMargin - activeRegionTop; + const contentWidth = activePageSize.w - (activeLeftMargin + activeRightMargin); + const normalized = normalizeColumns(endingSectionColumns!, contentWidth); + balanceResult = balanceSectionOnPage({ + fragments: state.page.fragments as BalancingFragment[], + sectionIndex: endingSectionIndex!, + sectionColumns: { + count: normalized.count, + gap: normalized.gap, + width: normalized.width, + widths: endingSectionColumns!.widths, + equalWidth: endingSectionColumns!.equalWidth, + }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: activeLeftMargin }, + topMargin: activeRegionTop, + columnWidth: normalized.width, + availableHeight, + measureMap: balancingMeasureMap, + sectPrMarkerBlockIds, + }); + if (balanceResult) { + // Collapse both cursors to the balanced section bottom so the new + // region starts there, not below an unbalanced tallest column. + state.cursorY = balanceResult.maxY; + state.maxCursorY = balanceResult.maxY; + alreadyBalancedSections.add(endingSectionIndex!); + } + } + if (balanceResult === null && columnIndexBefore >= newColumns.count) { + // No balancing applied (either willBalance was false, or + // balanceSectionOnPage skipped late). Reducing column count without + // balancing means starting the new region at col 0 could overwrite + // earlier column content. Force a fresh page to avoid that. state = paginator.startNewPage(); } @@ -2514,109 +2690,190 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } } - // Apply column balancing to pages with multi-column layout. - // This redistributes fragments to achieve balanced column heights, matching Word's behavior. - if (activeColumns.count > 1) { - const contentWidth = pageSize.w - (activeLeftMargin + activeRightMargin); - const normalizedCols = normalizeColumns(activeColumns, contentWidth); - - // Build measure map for fragment height calculation during balancing - const measureMap = new Map; height?: number }>(); - // Build blockId -> sectionIndex map to filter fragments by section - const blockSectionMap = new Map(); - const sectionColumnsMap = new Map(); - blocks.forEach((block, idx) => { - const measure = measures[idx]; - if (measure) { - measureMap.set(block.id, measure as { kind: string; lines?: Array<{ lineHeight: number }>; height?: number }); - } - // Track section index for each block (for filtering during balancing) - // Not all block types have attrs, so access it safely - const blockWithAttrs = block as { attrs?: { sectionIndex?: number } }; - const sectionIdx = blockWithAttrs.attrs?.sectionIndex; - if (typeof sectionIdx === 'number') { - blockSectionMap.set(block.id, sectionIdx); - if (block.kind === 'sectionBreak' && block.columns) { - sectionColumnsMap.set(sectionIdx, cloneColumnLayout(block.columns)); - } - } - }); - - for (const page of pages) { - // Balance the last page (section ends at document end). - // TODO: Track section boundaries and balance at each continuous section break. - if (page === pages[pages.length - 1] && page.fragments.length > 0) { - const finalSectionColumns = sectionColumnsMap.get(activeSectionIndex) ?? activeColumns; - // Word does not rebalance the final page for sections that use explicit - // per-column widths. Preserve the natural left-to-right fill order there. - const hasExplicitColumnWidths = - finalSectionColumns?.equalWidth === false && - Array.isArray(finalSectionColumns.widths) && - finalSectionColumns.widths.length > 0; - - if (hasExplicitColumnWidths) { - continue; - } - - // Skip balancing if fragments are already in multiple columns (e.g., explicit column breaks). - // Balancing should only apply when all content flows naturally in column 0. - const uniqueXPositions = new Set(page.fragments.map((f) => Math.round(f.x))); - const hasExplicitColumnStructure = uniqueXPositions.size > 1; - - if (hasExplicitColumnStructure) { - continue; - } - - // Skip balancing if fragments have different widths (indicating different column configs - // from multiple sections). Balancing would incorrectly apply the final section's width to all. - const uniqueWidths = new Set(page.fragments.map((f) => Math.round(f.width))); - const hasMixedColumnWidths = uniqueWidths.size > 1; - - if (hasMixedColumnWidths) { - continue; - } - - // Check if page has content from multiple sections. - // If so, only balance fragments from the final multi-column section. - const fragmentSections = new Set(); - for (const f of page.fragments) { - const section = blockSectionMap.get(f.blockId); - if (section !== undefined) { - fragmentSections.add(section); - } - } + // Apply column balancing per section. For each section with a multi-column layout, + // find the final page that carries any of its fragments and balance those fragments. + // Earlier pages of a multi-page section are always fully filled (content overflowed + // to reach them), so balancing is a no-op there. This replaces the previous + // "last page of document" heuristic with proper per-section balancing — required + // to match Word's behavior when a document has multiple multi-column sections + // separated by continuous or next-page breaks. + // + // Mid-page continuous breaks are handled in the layout loop itself (see the + // forceMidPageRegion branch above). This post-layout pass handles sections that + // end at a page boundary or at document end. + // + // Document-wide fallback: when callers pass `LayoutOptions.columns` directly + // without sectionBreak metadata, pm-adapter never stamps sectionIndex on any + // block and `sectionColumnsMap` stays empty. Synthesize a single virtual + // section that spans the whole document so multi-column callers still get + // their final page balanced (preserves the pre-SD-2452 behavior). Skip when + // the document carries an explicit column break — author intent wins. + const FALLBACK_SECTION_IDX = -1; + if ( + sectionColumnsMap.size === 0 && + !documentHasAnySectionBreak && + activeColumns.count > 1 && + !documentHasExplicitColumnBreak + ) { + sectionColumnsMap.set(FALLBACK_SECTION_IDX, cloneColumnLayout(activeColumns)); + for (const block of blocks) { + blockSectionMap.set(block.id, FALLBACK_SECTION_IDX); + } + } - // Only balance fragments from the final section when there are mixed sections - const hasMixedSections = fragmentSections.size > 1; - const fragmentsToBalance = hasMixedSections - ? page.fragments.filter((f) => { - const fragSection = blockSectionMap.get(f.blockId); - return fragSection === activeSectionIndex; - }) - : page.fragments; - - if (fragmentsToBalance.length > 0) { - const availableHeight = pageSize.h - activeBottomMargin - activeTopMargin; - balancePageColumns( - fragmentsToBalance as { - x: number; - y: number; - width: number; - kind: string; - blockId: string; - fromLine?: number; - toLine?: number; - height?: number; - }[], - normalizedCols, - { left: activeLeftMargin }, - activeTopMargin, - availableHeight, - measureMap, - ); + for (const [sectionIdx, sectionCols] of sectionColumnsMap) { + if (sectionCols.count <= 1) continue; + if (sectionHasExplicitColumnBreak.has(sectionIdx)) continue; + if (alreadyBalancedSections.has(sectionIdx)) continue; + + // Gate balancing per ECMA-376 §17.18.77 + empirical Word behavior. The + // section type defaults to `nextPage` for any sectPr without ``, + // so we lean on `typeIsExplicit` to know what was actually authored: + // + // - Explicit `` ending the section (or + // anywhere in the doc) signals continuous flow. Word balances the + // adjacent multi-column sections. + // - A multi-page multi-column section is balanced on its last page + // regardless of explicitness — this is the long-standing + // two_column_two_page-arial p17 behavior driven by SD-2646. + // + // Skip-when-not-allowed is the default. The three allowed scenarios: + // + // 1. Mid-doc explicit continuous: section's own end-break is + // `continuous` AND it is not the last section. Covers spec-test-1..5 + // and sd-2326 (explicit continuous mid-doc). + // + // 2. Doc-wide explicit continuous + non-explicitly-non-continuous + // section: the doc has at least one EXPLICIT continuous break + // somewhere AND this section's type was NOT explicitly set to a + // page-forcing type. Covers sd-1480-two-col-tab-positions: section 0 + // ends with default `nextPage` but the body sectPr has explicit + // `continuous` — Word balances 6 entries 3+3 on a single page. + // + // 3. Multi-page section: any section whose content spans more than one + // page. Covers `two_column_two_page-arial 2` p17 (body default, + // single section, 17 pages → balanced 3+2 on the final page). + // + // Skip path covers `sd-1655-col-sep-3-equal-columns` (single section, + // body without ``, single page, 3-col): no scenario fires → + // Word fills column-by-column without balancing. + // + // FALLBACK_SECTION_IDX (-1) bypasses the gate — synthesized only when + // pm-adapter emitted no section metadata at all. + if (sectionIdx !== FALLBACK_SECTION_IDX) { + const endBreakType = sectionEndBreakType.get(sectionIdx); + const typeIsExplicit = sectionTypeIsExplicit.get(sectionIdx) === true; + const isLast = lastSectionIdx !== null && sectionIdx === lastSectionIdx; + + // Per ECMA-376 §17.18.77, a continuous break balances the section it + // ENDS — i.e., the section BEFORE the break, not the section that + // contains or follows it. When the body sectPr authors an explicit + // continuous break, the affected section is the one IMMEDIATELY + // preceding the body. Compare: + // + // sd-1480: 2 sections; body (section 1) is explicit-continuous, + // section 0 has the 2-col content. Word balances section 0 + // (3+3) — exactly bodyExplicitContinuousIdx - 1. + // mixed-columns-tabs-tnr: body explicit-continuous, body has the + // 2-col Test list, section 0 is 1-col descriptions. Word + // does NOT balance section 1 (14+5 column-flow); the + // body-as-trigger applies to section 0 (single-col, no-op). + // + // Earlier this rule used a doc-wide `docHasExplicitContinuous` flag, + // which over-fired for any multi-col section in the document whenever + // some other section was explicit-continuous — including a single-page + // body section with omitted `` that should match sd-1655's + // skip rule. Tying it to bodyExplicitContinuousIdx − 1 (the section + // the break actually ends) restores ECMA-correct scope. + const bodyExplicitContinuousIdx = + lastSectionIdx !== null && + sectionTypeIsExplicit.get(lastSectionIdx) === true && + sectionEndBreakType.get(lastSectionIdx) === 'continuous' + ? lastSectionIdx + : null; + + const isExplicitNonContinuous = + typeIsExplicit && (endBreakType === 'nextPage' || endBreakType === 'evenPage' || endBreakType === 'oddPage'); + + // Page-count probe used by both the multi-page allow rule (3) and the + // mid-doc multi-page skip below. Computed once and short-circuits at >1. + let sectionPagesCount = 0; + for (const p of pages) { + if (p.fragments.some((f) => blockSectionMap.get(f.blockId) === sectionIdx)) { + sectionPagesCount += 1; + if (sectionPagesCount > 1) break; } } + const isMultiPage = sectionPagesCount > 1; + + // Mid-doc multi-page multi-column sections: Word does NOT balance the + // last page. ECMA's "minimum section height" balancing makes sense for + // single-page sections (rebalancing visibly shrinks the section) but + // not for multi-page sections whose height is already pinned by the + // page boundary — last-page rebalancing would just reshuffle a + // handful of overflow fragments. Verified against: + // layout/ivosass-sub p3 (section 1, mid-doc, 2-page, 4 overflow + // fragments → Word leaves them in col 0). + // lists/saas_original p4 (similar — overflow content stays single). + // Multi-page LAST sections still balance via rule 3 below + // (two_column_two_page-arial 2 p17 keeps its 3+2 split). + if (isMultiPage && !isLast) continue; + + const allowedByMidDocContinuous = endBreakType === 'continuous' && !isLast; + // Body-explicit-continuous balances the section IT ENDS, which is the + // section immediately preceding the body. No doc-wide flag. + const allowedByBodyExplicitContinuous = + bodyExplicitContinuousIdx !== null && sectionIdx === bodyExplicitContinuousIdx - 1 && !isExplicitNonContinuous; + const allowedByMultiPage = isMultiPage; + + if (!allowedByMidDocContinuous && !allowedByBodyExplicitContinuous && !allowedByMultiPage) continue; + } + + // Find the last page carrying any fragments from this section. + let lastPageForSection: (typeof pages)[number] | null = null; + for (const p of pages) { + if (p.fragments.some((f) => blockSectionMap.get(f.blockId) === sectionIdx)) { + lastPageForSection = p; + } } + if (!lastPageForSection) continue; + + // Section-local page geometry. Each page snapshots its own margins and size + // at startNewPage time (paginator.ts), so different sections with different + // page setups (margins, paper size, orientation) carry their own values on + // their pages. Earlier code derived the content box from the FINAL active* + // state, which silently rewrote earlier sections' fragments using the last + // section's content width and left margin. Use the target page's metrics. + const sectionPageSize = lastPageForSection.size ?? pageSize; + const sectionPageMargins = lastPageForSection.margins; + const sectionLeftMargin = sectionPageMargins?.left ?? activeLeftMargin; + const sectionRightMargin = sectionPageMargins?.right ?? activeRightMargin; + const sectionTopMarginPx = sectionPageMargins?.top ?? activeTopMargin; + const sectionBottomMargin = sectionPageMargins?.bottom ?? activeBottomMargin; + const sectionContentWidth = sectionPageSize.w - (sectionLeftMargin + sectionRightMargin); + const sectionAvailableHeight = sectionPageSize.h - sectionBottomMargin - sectionTopMarginPx; + + const normalized = normalizeColumns(sectionCols, sectionContentWidth); + + balanceSectionOnPage({ + fragments: lastPageForSection.fragments as BalancingFragment[], + sectionIndex: sectionIdx, + sectionColumns: { + count: normalized.count, + gap: normalized.gap, + width: normalized.width, + widths: sectionCols.widths, + equalWidth: sectionCols.equalWidth, + }, + sectionHasExplicitColumnBreak: false, // already filtered above + blockSectionMap, + margins: { left: sectionLeftMargin }, + topMargin: sectionTopMarginPx, + columnWidth: normalized.width, + availableHeight: sectionAvailableHeight, + measureMap: balancingMeasureMap, + sectPrMarkerBlockIds, + }); } // Serialize constraint boundaries into page.columnRegions so DomPainter can diff --git a/packages/layout-engine/measuring/dom/src/index.test.ts b/packages/layout-engine/measuring/dom/src/index.test.ts index e73d655adc..35b2467be1 100644 --- a/packages/layout-engine/measuring/dom/src/index.test.ts +++ b/packages/layout-engine/measuring/dom/src/index.test.ts @@ -1924,6 +1924,43 @@ describe('measureBlock', () => { expect(leader.to).toBeCloseTo(300 - pageNumWidth, 0); }); + it('emits leaders on both lines when a paragraph contains a between tab groups (sd-1480)', async () => { + // Repro for sd-1480-two-col-tab-positions: a single paragraph with right-aligned + // dot-leader tab stop and a soft line break. The tabs/pPr applies per line, so + // BOTH lines must emit a dot leader. + const rightStopTwips = 4306; + const block: FlowBlock = { + kind: 'paragraph', + id: 'multiline-leader', + runs: [ + { text: 'Page', fontFamily: 'Arial', fontSize: 13.333 }, + { kind: 'tab', text: '\t', tabIndex: 0, pmStart: 4, pmEnd: 5 }, + { text: '2', fontFamily: 'Arial', fontSize: 13.333 }, + { kind: 'lineBreak' }, + { text: 'Page', fontFamily: 'Arial', fontSize: 13.333 }, + { kind: 'tab', text: '\t', tabIndex: 1, pmStart: 11, pmEnd: 12 }, + { text: '5', fontFamily: 'Arial', fontSize: 13.333 }, + ], + attrs: { + tabs: [{ pos: rightStopTwips, val: 'end', leader: 'dot' }], + }, + }; + + const measure = expectParagraphMeasure(await measureBlock(block, 800)); + expect(measure.lines.length).toBeGreaterThanOrEqual(2); + + const line1 = measure.lines[0]; + const line2 = measure.lines[1]; + + expect(line1.leaders, 'line 1 should have a dot leader').toBeDefined(); + expect(line1.leaders).toHaveLength(1); + expect(line1.leaders?.[0]?.style).toBe('dot'); + + expect(line2.leaders, 'line 2 should have a dot leader').toBeDefined(); + expect(line2.leaders).toHaveLength(1); + expect(line2.leaders?.[0]?.style).toBe('dot'); + }); + it('preserves trailing spaces after tabs when line breaks', async () => { const block: FlowBlock = { kind: 'paragraph', diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index 1e78634953..ed7ce518d6 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -1375,17 +1375,52 @@ async function measureParagraphBlock(block: ParagraphBlock, maxWidth: number): P } }; + // Per-line-segment tab counts. The heuristic below binds the last N tabs of a + // segment to the last N alignment stops; segments are delimited by explicit + // runs because pPr/tabs apply per line, not per paragraph. + // sd-1480-two-col-tab-positions: a single paragraph "Page\t2
Page\t5" + // must emit a leader on BOTH lines, not only the last. + const tabSegmentInfo = new Map(); + { + let segmentTabRunIndices: number[] = []; + const closeSegment = () => { + const total = segmentTabRunIndices.length; + segmentTabRunIndices.forEach((runIdx, ord) => { + tabSegmentInfo.set(runIdx, { localOrdinal: ord, segmentTotal: total }); + }); + segmentTabRunIndices = []; + }; + for (let i = 0; i < runsToProcess.length; i++) { + const r = runsToProcess[i]; + if (isLineBreakRun(r) || (r.kind === 'break' && (r as { breakType?: string }).breakType === 'line')) { + closeSegment(); + } else if (isTabRun(r)) { + segmentTabRunIndices.push(i); + } + } + closeSegment(); + } + // Word-compat heuristic (not ECMA-376 17.3.3.32): the last N tab characters in a - // paragraph bind to the last N explicit end/center/decimal stops. Needed for TOC + // line bind to the last N explicit end/center/decimal stops. Needed for TOC // entries where a right-aligned dot-leader stop coexists with default grid stops — // strict greedy next-stop resolution would land the trailing tab on a default stop // instead of the leader stop. Mirrored in layout-bridge/src/remeasure.ts. - const getAlignmentStopForOrdinal = (ordinal: number): { stop: TabStopPx; index: number } | null => { + const getAlignmentStopForOrdinal = (ordinal: number, runIdx?: number): { stop: TabStopPx; index: number } | null => { if (alignmentTabStopsPx.length === 0 || totalTabRuns === 0 || !Number.isFinite(ordinal)) { return null; } - if (ordinal < 0 || ordinal >= totalTabRuns) return null; - const remainingTabs = totalTabRuns - ordinal - 1; + let scopeOrdinal = ordinal; + let scopeTotal = totalTabRuns; + if (runIdx !== undefined) { + const info = tabSegmentInfo.get(runIdx); + if (info) { + scopeOrdinal = info.localOrdinal; + scopeTotal = info.segmentTotal; + } + } + if (scopeOrdinal < 0 || scopeOrdinal >= scopeTotal) return null; + const remainingTabs = scopeTotal - scopeOrdinal - 1; const targetIndex = alignmentTabStopsPx.length - 1 - remainingTabs; if (targetIndex < 0 || targetIndex >= alignmentTabStopsPx.length) return null; return alignmentTabStopsPx[targetIndex]; @@ -1529,16 +1564,26 @@ async function measureParagraphBlock(block: ParagraphBlock, maxWidth: number): P // inputs (explicit + synthetic TabRuns) don't produce out-of-order ordinals. // Mirrors consumeTabOrdinal() in layout-bridge/src/remeasure.ts. sequentialTabIndex = Math.max(sequentialTabIndex, resolvedTabIndex + 1); - const forcedAlignment = getAlignmentStopForOrdinal(resolvedTabIndex); + // Compute greedy first so we can decide whether the SD-2447 heuristic is + // actually needed. The heuristic exists because when tabStops are seeded + // with synthetic 0.5" defaults from origin (TOC styles with only an + // alignment stop), greedy lands on a default before reaching the + // alignment stop. When the paragraph has an explicit start-aligned stop + // ahead of the alignment stop (e.g. TOC1 with `start@740, end@9360`), + // greedy already finds the correct stop and the heuristic over-fires. + // Only force the heuristic when greedy would land on a `source:default` + // stop — which is precisely the SD-2447 condition. + const greedy = getNextTabStopPx(absCurrentX, tabStops, tabStopCursor); + const greedyOnDefault = greedy.stop?.source === 'default'; + const forcedAlignment = greedyOnDefault ? getAlignmentStopForOrdinal(resolvedTabIndex, runIndex) : null; if (forcedAlignment && forcedAlignment.stop.pos > absCurrentX + TAB_EPSILON) { stop = forcedAlignment.stop; target = forcedAlignment.stop.pos; tabStopCursor = forcedAlignment.index + 1; } else { - const nextStop = getNextTabStopPx(absCurrentX, tabStops, tabStopCursor); - target = nextStop.target; - tabStopCursor = nextStop.nextIndex; - stop = nextStop.stop; + target = greedy.target; + tabStopCursor = greedy.nextIndex; + stop = greedy.stop; } const maxAbsWidth = currentLine.maxWidth + effectiveIndent; const clampedTarget = Math.min(target, maxAbsWidth); diff --git a/packages/layout-engine/painters/dom/src/renderer-column-separators.test.ts b/packages/layout-engine/painters/dom/src/renderer-column-separators.test.ts index 6b8c2e27e2..8d90167c3c 100644 --- a/packages/layout-engine/painters/dom/src/renderer-column-separators.test.ts +++ b/packages/layout-engine/painters/dom/src/renderer-column-separators.test.ts @@ -1,14 +1,29 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { createDomPainter } from './index.js'; -import type { ColumnRegion, Layout, Page } from '@superdoc/contracts'; +import type { ColumnRegion, Fragment, Layout, Page } from '@superdoc/contracts'; // These tests pin down DomPainter's column-separator rendering: // - the fallback path (page.columns only, no mid-page regions) // - the region-aware path (page.columnRegions supersedes page.columns) -// - all five early-return guards inside renderColumnSeparators +// - the early-return guards inside renderColumnSeparators +// - the content-presence gate: per Word's behavior, a separator is +// suppressed when the column to its right is empty within the region. // The layout-engine tests cover which data reaches the painter; these tests // cover what the painter does with it. +// Minimal fragment factory for separator-presence assertions. We only need x +// (column placement) and y (region membership). The other fields are required +// by the contract but not read by renderColumnSeparators. +const fragAt = (x: number, y: number = 100): Fragment => ({ + kind: 'para', + blockId: `frag-${x}-${y}`, + fromLine: 0, + toLine: 1, + x, + y, + width: 100, +}); + const buildPage = (overrides: Partial = {}): Page => ({ number: 1, fragments: [], @@ -50,7 +65,12 @@ describe('DomPainter renderColumnSeparators', () => { describe('fallback path (page.columns only)', () => { it('draws a single separator centered in the gap for 2 equal columns', () => { - const page = buildPage({ columns: { count: 2, gap: 48, withSeparator: true } }); + // 2 cols at x=96 and x=384 (96+288). Fragments in both → separator + // gate is satisfied; the test pins down geometry, not the gate. + const page = buildPage({ + columns: { count: 2, gap: 48, withSeparator: true }, + fragments: [fragAt(96), fragAt(432)], + }); paintOnce(buildLayout(page), mount); const seps = querySeparators(mount); @@ -64,7 +84,10 @@ describe('DomPainter renderColumnSeparators', () => { }); it('draws count-1 separators for 3 equal columns', () => { - const page = buildPage({ columns: { count: 3, gap: 48, withSeparator: true } }); + const page = buildPage({ + columns: { count: 3, gap: 48, withSeparator: true }, + fragments: [fragAt(96), fragAt(320), fragAt(544)], + }); paintOnce(buildLayout(page), mount); const seps = querySeparators(mount); @@ -77,6 +100,7 @@ describe('DomPainter renderColumnSeparators', () => { it('uses explicit column widths when drawing separators for page.columns', () => { const page = buildPage({ columns: { count: 2, gap: 48, widths: [200, 952], equalWidth: false, withSeparator: true }, + fragments: [fragAt(96), fragAt(244)], }); paintOnce(buildLayout(page), mount); @@ -146,6 +170,9 @@ describe('DomPainter renderColumnSeparators', () => { const page = buildPage({ columns: regions[0].columns, columnRegions: regions, + // Region 0 has fragments in both 2-col positions; Region 1 has one + // in each of three columns. + fragments: [fragAt(96, 200), fragAt(432, 200), fragAt(96, 500), fragAt(320, 500), fragAt(544, 500)], }); paintOnce(buildLayout(page), mount); @@ -173,7 +200,17 @@ describe('DomPainter renderColumnSeparators', () => { { yStart: 400, yEnd: 700, columns: { count: 2, gap: 48, withSeparator: false } }, { yStart: 700, yEnd: 960, columns: { count: 2, gap: 48, withSeparator: true } }, ]; - const page = buildPage({ columnRegions: regions }); + const page = buildPage({ + columnRegions: regions, + fragments: [ + fragAt(96, 200), + fragAt(432, 200), + fragAt(96, 500), + fragAt(432, 500), + fragAt(96, 800), + fragAt(432, 800), + ], + }); paintOnce(buildLayout(page), mount); const seps = querySeparators(mount); @@ -188,7 +225,10 @@ describe('DomPainter renderColumnSeparators', () => { { yStart: 96, yEnd: 400, columns: { count: 1, gap: 0, withSeparator: true } }, { yStart: 400, yEnd: 700, columns: { count: 2, gap: 48, withSeparator: true } }, ]; - const page = buildPage({ columnRegions: regions }); + const page = buildPage({ + columnRegions: regions, + fragments: [fragAt(96, 500), fragAt(432, 500)], + }); paintOnce(buildLayout(page), mount); const seps = querySeparators(mount); @@ -201,7 +241,10 @@ describe('DomPainter renderColumnSeparators', () => { { yStart: 96, yEnd: 96, columns: { count: 2, gap: 48, withSeparator: true } }, { yStart: 96, yEnd: 500, columns: { count: 2, gap: 48, withSeparator: true } }, ]; - const page = buildPage({ columnRegions: regions }); + const page = buildPage({ + columnRegions: regions, + fragments: [fragAt(96, 200), fragAt(432, 200)], + }); paintOnce(buildLayout(page), mount); const seps = querySeparators(mount); @@ -216,6 +259,7 @@ describe('DomPainter renderColumnSeparators', () => { const page = buildPage({ columns: { count: 2, gap: 48, withSeparator: false }, columnRegions: [{ yStart: 96, yEnd: 960, columns: { count: 2, gap: 48, withSeparator: true } }], + fragments: [fragAt(96), fragAt(432)], }); paintOnce(buildLayout(page), mount); @@ -234,6 +278,7 @@ describe('DomPainter renderColumnSeparators', () => { columns: { count: 2, gap: 48, widths: [200, 952], equalWidth: false, withSeparator: true }, }, ], + fragments: [fragAt(96, 200), fragAt(244, 200)], }); paintOnce(buildLayout(page), mount); @@ -244,4 +289,61 @@ describe('DomPainter renderColumnSeparators', () => { expect(seps[0].style.left).toBe('220px'); }); }); + + // The content-presence gate matches Word: a column separator is suppressed + // when the column to its right has no content within the region. This is + // observable in `multi-column-sections.docx` page 2 — Word draws no line + // because the section's content fits entirely in column 0. + describe('content-presence gate', () => { + it('suppresses the separator when no fragment sits past the column boundary', () => { + const page = buildPage({ + columns: { count: 2, gap: 48, withSeparator: true }, + // Only column 0 has content (x=96 < separatorX=408). Word draws nothing. + fragments: [fragAt(96), fragAt(96, 300)], + }); + paintOnce(buildLayout(page), mount); + + expect(querySeparators(mount)).toHaveLength(0); + }); + + it('draws only the separator whose right neighbor has content (3-col, col 3 empty)', () => { + // 3 cols at x=96, x=320, x=544. Separators at 296 and 520. + // Cols 1 and 2 have content; col 3 is empty. Only the 296 separator + // draws; the 520 separator (col 2 → col 3 boundary) is suppressed. + const page = buildPage({ + columns: { count: 3, gap: 48, withSeparator: true }, + fragments: [fragAt(96), fragAt(320)], + }); + paintOnce(buildLayout(page), mount); + + const seps = querySeparators(mount); + expect(seps).toHaveLength(1); + expect(seps[0].style.left).toBe('296px'); + }); + + it('checks fragment presence within the region only, not the whole page', () => { + // Region 0 (2-col): only col 0 has content → no separator. + // Region 1 (2-col): both cols have content → separator drawn. + // Without the y-bounded gate, region 0's separator would draw because + // region 1's col-1 fragment exists somewhere on the page. + const regions: ColumnRegion[] = [ + { yStart: 96, yEnd: 400, columns: { count: 2, gap: 48, withSeparator: true } }, + { yStart: 400, yEnd: 700, columns: { count: 2, gap: 48, withSeparator: true } }, + ]; + const page = buildPage({ + columnRegions: regions, + fragments: [ + fragAt(96, 200), // region 0, col 0 + fragAt(96, 500), // region 1, col 0 + fragAt(432, 500), // region 1, col 1 + ], + }); + paintOnce(buildLayout(page), mount); + + const seps = querySeparators(mount); + expect(seps).toHaveLength(1); + expect(seps[0].style.top).toBe('400px'); + expect(seps[0].style.height).toBe('300px'); + }); + }); }); diff --git a/packages/layout-engine/painters/dom/src/renderer.ts b/packages/layout-engine/painters/dom/src/renderer.ts index bb195212c8..02d670d4f8 100644 --- a/packages/layout-engine/painters/dom/src/renderer.ts +++ b/packages/layout-engine/painters/dom/src/renderer.ts @@ -2366,7 +2366,18 @@ export class DomPainter { const separatorPositions = this.getColumnSeparatorPositions(columns, leftMargin, contentWidth); if (separatorPositions.length === 0) continue; + // Word only renders the column separator between columns that both have + // content. For a 2-col page where col 1 is empty (e.g. the last page of + // a multi-column section that fits in col 0, or a `nextPage` section + // where Word fills col 0 first without balancing), Word draws no line + // even when the section's `w:cols` declared `w:sep="1"`. Gate each + // separator on whether any fragment sits past it within the region. + const fragmentsInRegion = page.fragments.filter((f) => f.y >= yStart - 0.5 && f.y < yEnd + 0.5); + for (const separatorX of separatorPositions) { + const hasContentPastSeparator = fragmentsInRegion.some((f) => f.x >= separatorX); + if (!hasContentPastSeparator) continue; + const separatorEl = this.doc.createElement('div'); separatorEl.dataset.superdocColumnSeparator = 'true'; diff --git a/packages/layout-engine/pm-adapter/README.md b/packages/layout-engine/pm-adapter/README.md index 27c0d1591c..e0653df917 100644 --- a/packages/layout-engine/pm-adapter/README.md +++ b/packages/layout-engine/pm-adapter/README.md @@ -23,3 +23,35 @@ Notes: - Handles lists, tables, images, vector shapes, SDT (TOC, structured content, doc parts), tracked changes, hyperlinks. - Consumes `@superdoc/style-engine` defaults and locale/tab interval hints from the PM document attrs. - `@superdoc/measuring-dom` is only a dependency for types; measurement happens later in the pipeline. + +## Section model (ECMA-376 §17.6, §17.18.77) + +Word uses **end-tagged** section semantics: a `` inside a paragraph defines the section that **ends with** that paragraph. The body-level `` defines the final section. All body children preceding a section-terminating paragraph — other paragraphs, **tables**, top-level drawings, SDT wrappers — belong to the section whose sectPr follows them. + +``` + + This is section A's first paragraph + + (section A props: 1-col, nextPage) + + ... ← section B (the NEXT sectPr) + + (section B props: 2-col continuous) + + This is section C's paragraph + (section C props: body-level) + +``` + +Section ranges are computed in `sections/analysis.ts` with two parallel indices: + +| Index | What it counts | Used by | +|---|---|---| +| `startParagraphIndex` / `endParagraphIndex` | Paragraph nodes only (including those inside SDT wrappers) | SDT handlers for intra-SDT section transitions | +| `startNodeIndex` / `endNodeIndex` | Every top-level `doc.content` child — paragraphs, tables, top-level drawings, SDTs | Main dispatch loop for inter-node section transitions | + +The main dispatch loop in `internal.ts` calls `maybeEmitNextSectionBreakForNode` BEFORE handling each top-level node. The helper uses `currentNodeIndex === nextSection.startNodeIndex` as its trigger, so non-paragraph nodes (tables especially) correctly get the sectionBreak emitted before them when they cross a section boundary. This is the fix for SD-2646. + +**When adding a new top-level block kind** (e.g., a new SDT type or a top-level drawing kind): do nothing. The dispatch-level hook covers you automatically. You only need a per-handler section check if your handler descends into children that carry their own sectPr markers — see the SDT handlers (`sdt/bibliography.ts`, `sdt/document-index.ts`, `sdt/table-of-authorities.ts`) for the intra-SDT pattern. + +Continuous section breaks (``) carry an extra requirement from §17.18.77: they "balance the content of the previous section." The balancing itself is the layout engine's responsibility (see `layout-engine/src/column-balancing.ts`); the adapter's job is only to ensure the right blocks are in the right section. diff --git a/packages/layout-engine/pm-adapter/src/attributes/borders.ts b/packages/layout-engine/pm-adapter/src/attributes/borders.ts index 3b5960fe9f..c080c0d9b4 100644 --- a/packages/layout-engine/pm-adapter/src/attributes/borders.ts +++ b/packages/layout-engine/pm-adapter/src/attributes/borders.ts @@ -36,7 +36,14 @@ type BorderConversionOptions = { * * Clamps results to reasonable bounds to prevent edge cases. */ -export const borderSizeToPx = (size?: number): number | undefined => eighthPointsToPixels(size, { clamp: true }); +export const borderSizeToPx = (size?: number): number | undefined => { + if (!isFiniteNumber(size)) return undefined; + if (size <= 0) return 0; + + const width = eighthPointsToPixels(size); + if (width == null) return undefined; + return clampPixelBorderWidth(width); +}; const clampPixelBorderWidth = (width: number): number => Math.min(MAX_BORDER_SIZE_PX, Math.max(MIN_BORDER_SIZE_PX, width)); diff --git a/packages/layout-engine/pm-adapter/src/index.test.ts b/packages/layout-engine/pm-adapter/src/index.test.ts index 441b210e73..ba5363b936 100644 --- a/packages/layout-engine/pm-adapter/src/index.test.ts +++ b/packages/layout-engine/pm-adapter/src/index.test.ts @@ -1269,6 +1269,72 @@ describe('toFlowBlocks', () => { [first, second, third].forEach((b) => expect(b?.type).toBe('continuous')); }); }); + + describe('end-tagged section membership for non-paragraph nodes (SD-2646, ECMA-376 §17.6.17)', () => { + it('emits the next section break BEFORE a table that sits between two sectPr-marker paragraphs', () => { + // IT-945 shape: table lives between the paragraph that ends section A + // and the paragraph that ends section B. Per §17.6.17 the table + // belongs to section B, so the sectionBreak introducing B's columns + // must precede the table in the flow stream. + const pmDoc: PMNode = { + type: 'doc', + attrs: { bodySectPr: createTestBodySectPr() }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'This is my first section' }] }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'nextPage' } }, + { name: 'w:cols', attributes: { 'w:num': '1', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }] }, + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }] }, + ], + }, + ], + }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'continuous' } }, + { name: 'w:cols', attributes: { 'w:num': '2', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { type: 'paragraph', content: [{ type: 'text', text: 'This is my third section' }] }, + ], + } as never; + + const { blocks } = toFlowBlocks(pmDoc, { emitSectionBreaks: true }); + + const tableIndex = blocks.findIndex((b) => b.kind === 'table'); + const twoColBreakIndex = blocks.findIndex((b) => b.kind === 'sectionBreak' && b.columns?.count === 2); + expect(tableIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeLessThan(tableIndex); + }); + }); }); describe('block id prefixing', () => { @@ -2111,6 +2177,12 @@ describe('toFlowBlocks', () => { expect(sectionBreaks).toHaveLength(1); expect(sectionBreaks[0].type).toBe('nextPage'); + // `typeIsExplicit` is only set on attrs when `` was authored. + // The body sectPr in this fixture has no ``, so the flag is + // omitted (undefined). The column-balancing gate treats absence as + // "defaulted" and skips balancing for default-nextPage body sections + // (sd-1655 behavior). + expect(sectionBreaks[0].attrs?.typeIsExplicit).toBeUndefined(); }); it('emits section breaks even when w:type element is missing (defaults to nextPage)', () => { @@ -2157,6 +2229,14 @@ describe('toFlowBlocks', () => { expect(sectionBreaks).toHaveLength(2); expect(sectionBreaks[0].type).toBe('nextPage'); expect(sectionBreaks[1].type).toBe('nextPage'); + // Section 1's sectPr writes `` explicitly + // so the flag is set true. Section 2's body sectPr omits `` + // so the flag is omitted. The column-balance gate uses this to tell + // explicit-nextPage (author intent: don't balance) from defaulted + // nextPage (could still balance if the doc has explicit continuous + // somewhere or is multi-page). + expect(sectionBreaks[0].attrs?.typeIsExplicit).toBe(true); + expect(sectionBreaks[1].attrs?.typeIsExplicit).toBeUndefined(); }); it('keeps final paragraph section break even without type when no body sectPr', () => { diff --git a/packages/layout-engine/pm-adapter/src/internal.test.ts b/packages/layout-engine/pm-adapter/src/internal.test.ts index e1972f240b..4c4eda6863 100644 --- a/packages/layout-engine/pm-adapter/src/internal.test.ts +++ b/packages/layout-engine/pm-adapter/src/internal.test.ts @@ -59,6 +59,10 @@ vi.mock('./sections/index.js', () => { id: nextBlockId('sectionBreak'), type: section.type, })), + maybeEmitNextSectionBreakForNode: vi.fn(() => { + // Mocked as no-op: this test file already provides zero section ranges + // via analyzeSectionRanges, so there is never a break to emit. + }), publishSectionMetadata: vi.fn(), }; }); diff --git a/packages/layout-engine/pm-adapter/src/internal.ts b/packages/layout-engine/pm-adapter/src/internal.ts index 1223d7ec7e..b7dde6420c 100644 --- a/packages/layout-engine/pm-adapter/src/internal.ts +++ b/packages/layout-engine/pm-adapter/src/internal.ts @@ -12,7 +12,12 @@ import type { FlowBlock, ParagraphBlock } from '@superdoc/contracts'; import { isValidTrackedMode } from './tracked-changes.js'; -import { analyzeSectionRanges, createSectionBreakBlock, publishSectionMetadata } from './sections/index.js'; +import { + analyzeSectionRanges, + createSectionBreakBlock, + maybeEmitNextSectionBreakForNode, + publishSectionMetadata, +} from './sections/index.js'; import { normalizePrefix, buildPositionMap, createBlockIdGenerator } from './utilities.js'; import { paragraphToFlowBlocks, @@ -208,6 +213,7 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): ranges: sectionRanges, currentSectionIndex: 0, currentParagraphIndex: 0, + currentNodeIndex: 0, }, converters, themeColors, @@ -216,12 +222,24 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): trackedListLastOrdinals: new Map(), }; - // Process nodes using handler dispatch pattern + // Process nodes using handler dispatch pattern. Before each top-level node + // we flush any pending section break whose `startNodeIndex` matches the + // current position — this keeps non-paragraph nodes (tables, top-level + // drawings) in their correct end-tagged section per ECMA-376 §17.6.17. doc.content.forEach((node) => { + maybeEmitNextSectionBreakForNode({ + sectionState: handlerContext.sectionState!, + nextBlockId, + pushBlock: (block) => { + blocks.push(block); + recordBlockKind(block.kind); + }, + }); const handler = nodeHandlers[node.type]; if (handler) { handler(node, handlerContext); } + handlerContext.sectionState!.currentNodeIndex++; }); // Ensure final body section is emitted only if not already emitted during paragraph processing. diff --git a/packages/layout-engine/pm-adapter/src/sdt/document-part-object.ts b/packages/layout-engine/pm-adapter/src/sdt/document-part-object.ts index facc5b8ef8..3ab4c4b17c 100644 --- a/packages/layout-engine/pm-adapter/src/sdt/document-part-object.ts +++ b/packages/layout-engine/pm-adapter/src/sdt/document-part-object.ts @@ -15,13 +15,9 @@ import { processTocChildren } from './toc.js'; * Processes TOC children for Table of Contents galleries. * For other gallery types (page numbers, etc.), processes child paragraphs normally. * - * If a preceding paragraph carried a `w:sectPr` whose next section starts at - * this SDT, emit the pending section break BEFORE processing children so the - * SDT's paragraphs render on the new page (see SD-2557). `findParagraphsWithSectPr` - * doesn't recurse into `documentPartObject`, so its child paragraphs don't bump - * `currentParagraphIndex` — and without this call, the deferred break would only - * fire on the next body paragraph AFTER the SDT, leaving e.g. a TOC on the - * prior page with the cover content. + * If a section transition occurs inside this SDT, child paragraph processing + * emits the pending break before the paragraph that starts the next section and + * advances `currentParagraphIndex` in step with `findParagraphsWithSectPr`. * * @param node - Document part object node to process * @param context - Shared handler context diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts index ac860ce7e6..97b6b5b2d7 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts @@ -296,8 +296,9 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 1, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(2); }); it('should find multiple paragraphs with sectPr', () => { @@ -315,12 +316,13 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(2); - expect(result.paragraphs[0]).toEqual({ index: 0, node: para1 }); - expect(result.paragraphs[1]).toEqual({ index: 2, node: para3 }); + expect(result.paragraphs[0]).toEqual({ index: 0, nodeIndex: 0, node: para1 }); + expect(result.paragraphs[1]).toEqual({ index: 2, nodeIndex: 2, node: para3 }); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(3); }); - it('should skip non-paragraph nodes', () => { + it('should skip non-paragraph nodes for paragraphIndex but count them in nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; @@ -334,11 +336,15 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + // paragraphIndex = 1 (second paragraph), nodeIndex = 2 (third top-level child + // after the heading). Non-paragraph top-level children count toward nodeIndex + // because ECMA-376 §17.6.17 sections span all body children. + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 2, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(4); }); - it('should include paragraphs inside index nodes', () => { + it('should include paragraphs inside index nodes and share the SDT nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; const para3 = { type: 'paragraph', content: [] }; @@ -352,8 +358,11 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); - expect(result.paragraphs).toEqual([{ index: 1, node: para2 }]); + // para2 is inside the SDT (index node) at top-level nodeIndex 1. + // paragraphIndex 1 still reflects paragraph order across descent. + expect(result.paragraphs).toEqual([{ index: 1, nodeIndex: 1, node: para2 }]); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(2); }); }); @@ -385,7 +394,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue(null); @@ -408,7 +417,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr', elements: [] }; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -450,8 +459,8 @@ describe('analysis', () => { }; const paragraphs = [ - { index: 2, node: para1 }, - { index: 5, node: para2 }, + { index: 2, nodeIndex: 2, node: para1 }, + { index: 5, nodeIndex: 5, node: para2 }, ]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr' }; @@ -487,7 +496,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.NEXT_PAGE, @@ -512,7 +521,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mock section data with page margins but no header/footer margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -550,7 +559,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only header margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -583,7 +592,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only footer margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -616,7 +625,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only top margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -649,7 +658,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Explicit zero values for all margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -688,7 +697,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mix of header/footer and page margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -725,7 +734,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ titlePg: false, @@ -747,7 +756,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.CONTINUOUS, diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.ts index 925b0561f5..c352abb6fe 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.ts @@ -65,15 +65,27 @@ export function shouldIgnoreSectionBreak( /** * Find all paragraphs in the document that contain sectPr elements. * + * Records two indices per match: + * - `paragraphIndex` counts only paragraph nodes (including those nested + * inside SDT wrappers), matching the long-standing contract callers use + * for SDT-internal section transitions. + * - `nodeIndex` counts every top-level `doc.content` child (paragraph, + * table, top-level drawing, SDT, …). This is required to fix SD-2646: + * a non-paragraph node between two sectPr markers belongs to the LATER + * marker's section per ECMA-376 §17.6.17, so the dispatch loop must + * know each section's bounds in top-level-node terms. + * * @param doc - ProseMirror document node - * @returns Object containing paragraphs with sectPr and total paragraph count + * @returns Paragraph matches plus both totals */ export function findParagraphsWithSectPr(doc: PMNode): { - paragraphs: Array<{ index: number; node: PMNode }>; + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>; totalCount: number; + totalNodeCount: number; } { - const paragraphs: Array<{ index: number; node: PMNode }> = []; + const paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }> = []; let paragraphIndex = 0; + let nodeIndex = 0; const getNodeChildren = (node: PMNode): PMNode[] => { if (Array.isArray(node.content)) return node.content; const content = node.content as { forEach?: (cb: (child: PMNode) => void) => void } | undefined; @@ -87,10 +99,10 @@ export function findParagraphsWithSectPr(doc: PMNode): { return []; }; - const visitNode = (node: PMNode): void => { + const visitNode = (node: PMNode, outerNodeIndex: number): void => { if (node.type === 'paragraph') { if (hasSectPr(node)) { - paragraphs.push({ index: paragraphIndex, node }); + paragraphs.push({ index: paragraphIndex, nodeIndex: outerNodeIndex, node }); } paragraphIndex++; return; @@ -101,6 +113,10 @@ export function findParagraphsWithSectPr(doc: PMNode): { // their handlers increment `currentParagraphIndex` + call the section-break // emission helper per child. // + // SDT descendants share the outer SDT's nodeIndex — dispatch-level + // section transitions fire on the SDT as a whole, while child handlers can + // still emit paragraph-index transitions within the SDT. + // // `documentPartObject` / `tableOfContents` are important for SD-2557: // Word stores the closing sectPr of a TOC section on the trailing empty // paragraph INSIDE the SDT. Without recursion, that sectPr is invisible to @@ -113,17 +129,18 @@ export function findParagraphsWithSectPr(doc: PMNode): { node.type === 'documentPartObject' || node.type === 'tableOfContents' ) { - getNodeChildren(node).forEach(visitNode); + getNodeChildren(node).forEach((child) => visitNode(child, outerNodeIndex)); } }; if (doc.content) { for (const node of doc.content) { - visitNode(node); + visitNode(node, nodeIndex); + nodeIndex++; } } - return { paragraphs, totalCount: paragraphIndex }; + return { paragraphs, totalCount: paragraphIndex, totalNodeCount: nodeIndex }; } /** @@ -144,11 +161,12 @@ export function findParagraphsWithSectPr(doc: PMNode): { * @returns Array of section ranges */ export function buildSectionRangesFromParagraphs( - paragraphs: Array<{ index: number; node: PMNode }>, + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>, hasBodySectPr: boolean, ): SectionRange[] { const ranges: SectionRange[] = []; let currentStart = 0; + let currentStartNode = 0; paragraphs.forEach((item, idx) => { if (shouldIgnoreSectionBreak(item.node, idx, paragraphs.length, hasBodySectPr)) { @@ -170,6 +188,8 @@ export function buildSectionRangesFromParagraphs( const range: SectionRange = { sectionIndex: idx, + startNodeIndex: currentStartNode, + endNodeIndex: item.nodeIndex, startParagraphIndex: currentStart, endParagraphIndex: item.index, sectPr, @@ -187,6 +207,7 @@ export function buildSectionRangesFromParagraphs( orientation: sectionData.orientation ?? null, columns: sectionData.columnsPx ?? null, type: (sectionData.type as SectionType) ?? DEFAULT_PARAGRAPH_SECTION_TYPE, + typeIsExplicit: sectionData.typeIsExplicit ?? false, titlePg: sectionData.titlePg ?? false, headerRefs: sectionData.headerRefs, footerRefs: sectionData.footerRefs, @@ -196,6 +217,7 @@ export function buildSectionRangesFromParagraphs( ranges.push(range); currentStart = item.index + 1; + currentStartNode = item.nodeIndex + 1; }); return ranges; @@ -244,6 +266,7 @@ export function createFinalSectionFromBodySectPr( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange | null { const clampedStart = Math.max(0, Math.min(currentStart, Math.max(totalParagraphs - 1, 0))); @@ -267,8 +290,15 @@ export function createFinalSectionFromBodySectPr( bodySectionData.bottomPx != null || bodySectionData.leftPx != null; + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds + ? Math.max(0, Math.min(nodeBounds.startNodeIndex, Math.max(totalNodes - 1, 0))) + : clampedStart; + return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: clampedStart, endParagraphIndex: totalParagraphs - 1, sectPr: bodySectPr, @@ -286,6 +316,7 @@ export function createFinalSectionFromBodySectPr( orientation: bodySectionData.orientation ?? null, columns: bodySectionData.columnsPx ?? null, type: (bodySectionData.type as SectionType) ?? DEFAULT_BODY_SECTION_TYPE, + typeIsExplicit: bodySectionData.typeIsExplicit ?? false, titlePg: bodySectionData.titlePg ?? false, headerRefs: bodySectionData.headerRefs, footerRefs: bodySectionData.footerRefs, @@ -307,9 +338,14 @@ export function createDefaultFinalSection( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange { + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds?.startNodeIndex ?? currentStart; return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: currentStart, endParagraphIndex: totalParagraphs - 1, sectPr: null, @@ -318,6 +354,7 @@ export function createDefaultFinalSection( orientation: null, columns: null, type: DEFAULT_BODY_SECTION_TYPE, + typeIsExplicit: false, titlePg: false, headerRefs: undefined, footerRefs: undefined, @@ -334,11 +371,13 @@ export function createDefaultFinalSection( * @returns Array of section ranges with backward-looking semantics */ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): SectionRange[] { - const { paragraphs, totalCount } = findParagraphsWithSectPr(doc); + const { paragraphs, totalCount, totalNodeCount } = findParagraphsWithSectPr(doc); const hasBody = Boolean(bodySectPr); const ranges = buildSectionRangesFromParagraphs(paragraphs, hasBody); - const currentStart = ranges.length > 0 ? ranges[ranges.length - 1].endParagraphIndex + 1 : 0; + const last = ranges[ranges.length - 1]; + const currentStart = last ? last.endParagraphIndex + 1 : 0; + const currentStartNode = last ? last.endNodeIndex + 1 : 0; // Always represent the final section defined by bodySectPr, even if there are // no remaining paragraphs after the last paragraph-level sectPr. This ensures @@ -349,12 +388,16 @@ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): Section Math.min(currentStart, totalCount), totalCount, ranges.length, + { startNodeIndex: Math.min(currentStartNode, totalNodeCount), totalNodeCount }, ); if (finalSection) { ranges.push(finalSection); } } else if (ranges.length > 0) { - const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length); + const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length, { + startNodeIndex: Math.min(currentStartNode, totalNodeCount), + totalNodeCount, + }); if (fallbackFinal) { fallbackFinal.type = DEFAULT_PARAGRAPH_SECTION_TYPE; ranges.push(fallbackFinal); diff --git a/packages/layout-engine/pm-adapter/src/sections/breaks.ts b/packages/layout-engine/pm-adapter/src/sections/breaks.ts index 4b4ed6f030..a841c3debb 100644 --- a/packages/layout-engine/pm-adapter/src/sections/breaks.ts +++ b/packages/layout-engine/pm-adapter/src/sections/breaks.ts @@ -130,6 +130,17 @@ export function createSectionBreakBlock( attrs: { source: 'sectPr', sectionIndex: section.sectionIndex, + // `typeIsExplicit` is set only when `` was authored in the + // source XML. We omit the field entirely when it would be `false` so + // we don't widen `attrs` for the (vast majority of) sectPrs that + // omit `` — that would produce a doc-wide snapshot diff + // against historical references on every existing fixture. + // The layout-engine's column-balance gate reads this to distinguish + // a body sectPr that defaulted to `nextPage` (Word does not balance, + // sd-1655-col-sep-3-equal-columns) from one with + // `` written out (Word balances, + // sd-1480-two-col-tab-positions, even single-page). + ...(section.typeIsExplicit ? { typeIsExplicit: true as const } : {}), ...extraAttrs, }, ...(section.pageSize && { pageSize: section.pageSize }), @@ -201,16 +212,52 @@ interface SectionStateMutable { currentParagraphIndex: number; } +/** + * Emit the next section's sectionBreak block if the dispatch loop has reached + * that section's starting top-level node index. + * + * ECMA-376 §17.6.17: a section is defined by its end-tagged ``. All + * body children preceding that tag — paragraphs, tables, top-level drawings — + * belong to the section that ENDS at the tag. This helper fires BEFORE any + * such node so the appropriate section config is active by the time the node + * is laid out. + * + * Calling it from the main dispatch loop covers every top-level node type — + * present and future — with no per-handler opt-in. Paragraph-index emission + * still handles transitions inside SDT child content. + */ +export function maybeEmitNextSectionBreakForNode(args: { + sectionState: { + ranges: SectionRange[]; + currentSectionIndex: number; + currentNodeIndex: number; + }; + nextBlockId: BlockIdGenerator; + pushBlock: (block: SectionBreakBlock) => void; +}): void { + const { sectionState, nextBlockId, pushBlock } = args; + if (sectionState.ranges.length === 0) return; + if (sectionState.currentSectionIndex >= sectionState.ranges.length - 1) return; + + const nextSection = sectionState.ranges[sectionState.currentSectionIndex + 1]; + if (!nextSection) return; + if (sectionState.currentNodeIndex !== nextSection.startNodeIndex) return; + + const currentSection = sectionState.ranges[sectionState.currentSectionIndex]; + const requiresPageBoundary = + shouldRequirePageBoundary(currentSection, nextSection) || hasIntrinsicBoundarySignals(nextSection); + const extraAttrs = requiresPageBoundary ? { requirePageBoundary: true } : undefined; + pushBlock(createSectionBreakBlock(nextSection, nextBlockId, extraAttrs)); + sectionState.currentSectionIndex++; +} + /** * Emit a pending section break before a paragraph if the current paragraph * index matches the start of the next section. * - * Centralizes the "check, emit, advance" pattern used by paragraph and SDT - * handlers. SDT handlers that process children as an opaque block (e.g. - * TOC/docPartObj where child paragraphs aren't counted by - * `findParagraphsWithSectPr`) should call this ONCE at the SDT boundary — - * if the SDT sits at a section boundary, this emits the break so the SDT's - * contents render on the new page. + * Centralizes the "check, emit, advance" pattern for handlers that process + * paragraph children directly, including SDT handlers. This keeps nested + * paragraph traversal in sync with `findParagraphsWithSectPr`. * * No-op when: * - sectionState is undefined or has no ranges diff --git a/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts new file mode 100644 index 0000000000..8d7cf42210 --- /dev/null +++ b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts @@ -0,0 +1,118 @@ +/** + * @file End-tagged section-membership tests (ECMA-376 §17.6.17, §17.18.77). + * + * OOXML rule: a `` inside a paragraph defines the section that + * ENDS with that paragraph. All body children preceding it (back to the + * previous section-terminating paragraph) belong to that section — + * including tables, drawings, and other non-paragraph nodes. + * + * These tests use real implementations with no module mocks so they assert + * observable behavior of the section analysis, not call patterns. + */ +import { describe, it, expect } from 'vitest'; +import { analyzeSectionRanges } from './analysis.js'; +import type { PMNode } from '../types.js'; +import type { SectPrElement } from './types.js'; + +const sectPr = (options: { + type?: 'continuous' | 'nextPage'; + cols?: number; + colSpace?: number; + pgSz?: { w: number; h: number }; +}): SectPrElement => { + const elements: SectPrElement['elements'] = []; + if (options.type) { + elements!.push({ + type: 'element', + name: 'w:type', + attributes: { 'w:val': options.type }, + }); + } + if (options.pgSz) { + elements!.push({ + type: 'element', + name: 'w:pgSz', + attributes: { 'w:w': String(options.pgSz.w), 'w:h': String(options.pgSz.h) }, + }); + } + elements!.push({ + type: 'element', + name: 'w:cols', + attributes: { + 'w:num': String(options.cols ?? 1), + 'w:space': String(options.colSpace ?? 720), + }, + }); + return { + type: 'element', + name: 'w:sectPr', + elements, + }; +}; + +const paragraph = (marker?: SectPrElement): PMNode => ({ + type: 'paragraph', + attrs: marker ? { paragraphProperties: { sectPr: marker } } : {}, + content: [], +}); + +const table = (): PMNode => ({ + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }], + }, + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }], + }, + ], + }, + ], +}); + +describe('analyzeSectionRanges — end-tagged membership for non-paragraph nodes (SD-2646)', () => { + it('places a table between two sectPr markers into the section of the LATER marker', () => { + // Arrange — IT-945 shape: + // p0 "first section" text + // p1(sectPr-A) empty marker, 1-col nextPage → ends Section A + // TABLE belongs to Section B per ECMA-376 §17.6.17 + // p2(sectPr-B) empty marker, 2-col continuous → ends Section B + // p3 "third section" text + // body sectPr final, 1-col continuous → Section C + const markerA = sectPr({ type: 'nextPage', cols: 1 }); + const markerB = sectPr({ type: 'continuous', cols: 2, colSpace: 720 }); + const bodyMarker = sectPr({ type: 'continuous', cols: 1 }); + const doc: PMNode = { + type: 'doc', + attrs: { bodySectPr: bodyMarker }, + content: [paragraph(), paragraph(markerA), table(), paragraph(markerB), paragraph()], + }; + + // Act + const sut = analyzeSectionRanges(doc, bodyMarker); + + // Assert — three sections, with Section B's range spanning the table + expect(sut).toHaveLength(3); + const [sectionA, sectionB, sectionC] = sut; + + // Section A ends at p1 (node index 1). Table at node index 2 is NOT in A. + expect(sectionA.endNodeIndex).toBe(1); + expect(sectionA.columns?.count ?? 1).toBe(1); + + // Section B: starts AFTER Section A (node index 2 — the table), ends at p2 (node index 3). + // The table must belong to Section B, not Section A. + expect(sectionB.startNodeIndex).toBe(2); + expect(sectionB.endNodeIndex).toBe(3); + expect(sectionB.columns?.count).toBe(2); + + // Section C (body): starts at node index 4 (p3). + expect(sectionC.startNodeIndex).toBe(4); + expect(sectionC.columns?.count ?? 1).toBe(1); + }); +}); diff --git a/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts b/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts index ce6a124e96..713a8a3746 100644 --- a/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts +++ b/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts @@ -319,6 +319,11 @@ describe('extraction', () => { }); it('should default type to "nextPage" when sectPr exists but w:type is missing', () => { + // type still defaults to 'nextPage' (preserves the established + // pipeline behavior). The added `typeIsExplicit: false` flag tells + // the column-balancing gate the type was defaulted, not authored — + // critical for distinguishing sd-1655 (Word does NOT balance) from + // sd-1480 (explicit continuous → Word balances). const para: PMNode = { type: 'paragraph', attrs: { @@ -341,6 +346,7 @@ describe('extraction', () => { expect(result).not.toBeNull(); expect(result?.type).toBe('nextPage'); + expect(result?.typeIsExplicit).toBe(false); expect(result?.vAlign).toBe('bottom'); }); }); diff --git a/packages/layout-engine/pm-adapter/src/sections/extraction.ts b/packages/layout-engine/pm-adapter/src/sections/extraction.ts index da98daeec3..234d260aa5 100644 --- a/packages/layout-engine/pm-adapter/src/sections/extraction.ts +++ b/packages/layout-engine/pm-adapter/src/sections/extraction.ts @@ -79,7 +79,13 @@ function extractNormalizedMargins(attrs: Record): { /** * Extract section type from element. - * Defaults to 'nextPage' per OOXML spec when absent. + * Defaults to 'nextPage' per OOXML spec when absent. This preserves the + * historical body-sectPr type behavior — any change to that ripples + * through page-break placement, header/footer inheritance, and section + * flow across the entire layout pipeline. Callers that need to know + * whether `` was actually present should consult + * `extractSectionTypeWithSource` below; the existing `type` value stays + * `nextPage` regardless. */ function extractSectionType(elements: SectionElement[]): SectionType { const typeEl = elements.find((el) => el?.name === 'w:type'); @@ -90,6 +96,22 @@ function extractSectionType(elements: SectionElement[]): SectionType { return 'nextPage'; } +/** + * True iff `` is present in the source XML. + * + * The column-balancing gate (ECMA-376 §17.18.77) needs to distinguish a + * body sectPr whose `` defaulted to `nextPage` because it was + * omitted (sd-1655-col-sep-3-equal-columns: Word fills col-by-col, no + * balance) from one with explicit `` + * (sd-1480-two-col-tab-positions: Word balances 6 entries 3+3 on a + * single page). The type field alone can't carry this — both produce + * `'nextPage'` in the resolved type — so we surface it as a separate + * flag without touching the established type defaulting. + */ +function extractSectionTypeIsExplicit(elements: SectionElement[]): boolean { + return elements.some((el) => el?.name === 'w:type'); +} + /** * Extract page size and orientation from element. * Infers orientation from dimensions if not explicitly set. @@ -316,6 +338,8 @@ export function extractSectionData(para: PMNode): { bottomPx?: number; leftPx?: number; type?: SectionType; + /** True iff `` was present in the source XML (vs. type defaulted by the caller). */ + typeIsExplicit?: boolean; pageSizePx?: { w: number; h: number }; orientation?: Orientation; columnsPx?: ColumnLayout; @@ -348,8 +372,12 @@ export function extractSectionData(para: PMNode): { return headerPx == null && footerPx == null ? null : { headerPx, footerPx }; } - // Extract all section properties. Type always has a value (defaults to 'nextPage') + // Extract all section properties. type defaults to 'nextPage' per OOXML + // spec (and this preserves the historical pipeline behavior across page + // breaks, header/footer flow, etc). `typeIsExplicit` lets the + // column-balancing gate know whether `` was actually written. const type = extractSectionType(sectPrElements); + const typeIsExplicit = extractSectionTypeIsExplicit(sectPrElements); const { pageSizePx, orientation } = extractPageSizeAndOrientation(sectPrElements); const titlePg = sectPrElements.some((el) => el?.name === 'w:titlePg'); const fallbackMargins = extractFallbackMargins(sectPrElements, headerPx, footerPx); @@ -362,7 +390,10 @@ export function extractSectionData(para: PMNode): { const columnsPx = extractColumns(sectPrElements); const vAlign = extractVerticalAlign(sectPrElements); - // When sectPrElements exist, always return data (even if minimal) since type defaults to 'nextPage' + // When sectPrElements exist, always return data (even if minimal). The + // caller applies the appropriate default for `type` (paragraph default = + // nextPage, body default = continuous) and sees `typeIsExplicit` for the + // distinction. return { headerPx, footerPx, @@ -371,6 +402,7 @@ export function extractSectionData(para: PMNode): { bottomPx, leftPx, type, + typeIsExplicit, pageSizePx, orientation, columnsPx, diff --git a/packages/layout-engine/pm-adapter/src/sections/index.ts b/packages/layout-engine/pm-adapter/src/sections/index.ts index 5f293b3d9d..b72280d106 100644 --- a/packages/layout-engine/pm-adapter/src/sections/index.ts +++ b/packages/layout-engine/pm-adapter/src/sections/index.ts @@ -41,5 +41,6 @@ export { isSectionBreakBlock, signaturesEqual, shallowObjectEquals, + maybeEmitNextSectionBreakForNode, emitPendingSectionBreakForParagraph, } from './breaks.js'; diff --git a/packages/layout-engine/pm-adapter/src/sections/types.ts b/packages/layout-engine/pm-adapter/src/sections/types.ts index a4134b48d0..51006470af 100644 --- a/packages/layout-engine/pm-adapter/src/sections/types.ts +++ b/packages/layout-engine/pm-adapter/src/sections/types.ts @@ -89,11 +89,22 @@ export type SectionVerticalAlign = 'top' | 'center' | 'bottom' | 'both'; /** * Section range represents a contiguous section in the document. - * Word uses "end-tagged" section semantics: a paragraph's sectPr defines - * properties for the section ENDING at that paragraph, not starting after it. + * + * Word uses "end-tagged" section semantics (ECMA-376 §17.6.17): a paragraph's + * sectPr defines properties for the section ENDING at that paragraph, not + * starting after it. All body children preceding the section-terminating + * paragraph — paragraphs, tables, top-level drawings — belong to that section. + * + * `startNodeIndex`/`endNodeIndex` are computed over every top-level + * `doc.content` child and are the authoritative boundaries for dispatching + * section breaks at emission time. `startParagraphIndex`/`endParagraphIndex` + * are retained for callers (SDT handlers) that count only paragraphs during + * recursive descent. */ export interface SectionRange { sectionIndex: number; + startNodeIndex: number; + endNodeIndex: number; startParagraphIndex: number; endParagraphIndex: number; sectPr: SectPrElement | null; @@ -109,6 +120,12 @@ export interface SectionRange { orientation: 'portrait' | 'landscape' | null; columns: ColumnLayout | null; type: SectionType; + /** True iff the section's `` was explicitly written in the source. + * Distinguishes "the body sectPr defaulted to continuous because OOXML + * omitted w:type" (sd-1655) from "an explicit w:type=continuous on the + * body sectPr" (sd-1480). Only the explicit form triggers Word's + * end-of-document column balancing for single-page sections. */ + typeIsExplicit: boolean; titlePg: boolean; headerRefs?: Partial>; footerRefs?: Partial>; diff --git a/packages/layout-engine/pm-adapter/src/types.ts b/packages/layout-engine/pm-adapter/src/types.ts index d60515c39b..0aed51dd1d 100644 --- a/packages/layout-engine/pm-adapter/src/types.ts +++ b/packages/layout-engine/pm-adapter/src/types.ts @@ -316,6 +316,13 @@ export interface NodeHandlerContext { ranges: SectionRange[]; currentSectionIndex: number; currentParagraphIndex: number; + /** + * Index of the current top-level `doc.content` child being dispatched. + * Advanced by the main dispatch loop in internal.ts — drives end-tagged + * section transitions for non-paragraph nodes (tables, top-level + * drawings, …) per ECMA-376 §17.6.17. + */ + currentNodeIndex: number; }; // Converters for nested content diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts index bd02088604..1f0b9eabb1 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts @@ -165,16 +165,20 @@ function readOddEvenHeadersFlag(editor: Editor): boolean { } function createSyntheticRange(bodySectPr: XmlElement | null, paragraphCount: number): SectionRange { + const lastIndex = Math.max(paragraphCount - 1, 0); return { sectionIndex: 0, + startNodeIndex: 0, + endNodeIndex: lastIndex, startParagraphIndex: 0, - endParagraphIndex: Math.max(paragraphCount - 1, 0), + endParagraphIndex: lastIndex, sectPr: (bodySectPr as unknown as SectionRange['sectPr']) ?? null, margins: null, pageSize: null, orientation: null, columns: null, type: SectionType.CONTINUOUS, + typeIsExplicit: false, titlePg: false, headerRefs: undefined, footerRefs: undefined, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index dd2a9a7aac..089b398bff 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -6429,7 +6429,6 @@ packages: '@microsoft/teamsapp-cli@3.0.2': resolution: {integrity: sha512-AowuJwrrUxeF9Bq/frxuy9YZjK/ECk3pi0UBXl3CQLZ4XNWfgWatiFi/UWpyHDLccFs+0Za3nNYATFvgsxEFwQ==} engines: {node: '>=12'} - deprecated: This package is deprecated and supported Node.js version is 18-22. Please use @microsoft/m365agentstoolkit-cli instead. hasBin: true '@microsoft/teamsfx-api@0.23.1':