From 5b2335a95f630de6130e06c83e967a7c13794fc1 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 20 Apr 2026 14:37:03 -0300 Subject: [PATCH 01/18] feat(layout-engine): balance columns at continuous section breaks (SD-2452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements ECMA-376 §17.18.77 column balancing for multi-column sections. Word produces a minimum-height balanced layout at the end of a continuous (and, empirically, next-page) multi-column section; SuperDoc was either leaving content stacked in the first column or, in some layouts, producing overlapping fragments. The pagination pipeline now balances each multi-column section's last page at layout time: - layoutDocument builds a block -> section map by walking blocks in document order and tracking the current section from the most recent sectionBreak (pm-adapter only stamps attrs.sectionIndex on sectionBreak blocks, not on content paragraphs). - A new balanceSectionOnPage helper performs section-scoped balancing with its own fragment-level positioning (no Y-grouping): fragments are ordered by (x, y) in document order and each is treated as its own block. The previous balancePageColumns grouped fragments by Y into "rows," which collapsed fragments from different source columns at the same Y and produced overlap. - calculateBalancedColumnHeight is now a proper binary search for the minimum column height H such that greedy left-to-right fill places every block with every column <= H. This matches Word's left-heavy packing preference (e.g. 7 blocks / 3 cols -> 3+3+1, not 2+2+3). - A mid-page hook at forceMidPageRegion balances the ending section on the current page before starting the new region, and collapses both cursors to balanceResult.maxY so the next region begins just below the balanced columns. Sections handled mid-page are tracked in alreadyBalancedSections so the post-layout pass doesn't double-balance. - The prior "last page of document" heuristic is replaced with a per-section post-layout loop that balances each multi-column section's last page, skipping sections already handled mid-page. Tests: - 11 new unit/integration tests covering the 5 SD-2452 fixtures (2-col/3-col, equal and unequal heights, continuous and next-page breaks, multi-page sections, explicit column-break opt-out). - 614 layout-engine tests pass, 1737 pm-adapter tests pass, 11375 super-editor tests pass. Visual validation against Microsoft Word for all 5 fixtures: - Test 1 (6 paras / 2 cols): 3+3 exact match - Test 2 (5 mixed / 2 cols): 2+3 exact match - Test 3 (7 paras / 3 cols): 3+3+1 exact match - Test 4 (13 paras / 2 cols): 7+6 exact match, overlap gone - Test 5 (continuous + next-page): 3+2, 3+2 exact match --- .../src/column-balancing.test.ts | 198 +++++++++- .../layout-engine/src/column-balancing.ts | 343 +++++++++++------- .../layout-engine/src/index.test.ts | 199 ++++++++++ .../layout-engine/layout-engine/src/index.ts | 240 ++++++------ 4 files changed, 727 insertions(+), 253 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/column-balancing.test.ts b/packages/layout-engine/layout-engine/src/column-balancing.test.ts index 7aa7431135..6e83a179b6 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.test.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.test.ts @@ -335,13 +335,9 @@ function createMeasure(kind: string, lineHeights: number[]): { kind: string; lin describe('balancePageColumns', () => { describe('basic balancing', () => { - it('should distribute fragments across 2 columns based on target height', () => { - // 4 fragments, each 20px tall = 80px total, target = 40px per column - // With >= condition: switch when adding would reach/exceed 40px - // Block 1 (20px): column 0, height=20 - // Block 2 (20px): 20+20=40 >= 40, switch! column 1, height=20 - // Block 3, 4: stay in column 1 - // Result: 1 in column 0, 3 in column 1 + it('balances 4 equal blocks into 2+2 across 2 columns', () => { + // 4 fragments × 20px each in a 2-col section. Word minimizes section height by + // placing 2 per column (40px per col) rather than 1+3 (max 60px). const fragments = [ createFragment('block-1', 96, 96, 624), createFragment('block-2', 96, 116, 624), @@ -357,10 +353,9 @@ describe('balancePageColumns', () => { balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 40, measureMap); - // Block 1 stays in column 0 + // First half in col 0, second half in col 1 — minimum section height. expect(fragments[0].x).toBe(96); - // Blocks 2, 3, 4 move to column 1 - expect(fragments[1].x).toBe(432); + expect(fragments[1].x).toBe(96); expect(fragments[2].x).toBe(432); expect(fragments[3].x).toBe(432); }); @@ -558,3 +553,186 @@ describe('balancePageColumns', () => { }); }); }); + +// ============================================================================ +// balanceSectionOnPage Tests (Section-scoped balancing) +// ============================================================================ + +import { balanceSectionOnPage } from './column-balancing.js'; + +describe('balanceSectionOnPage', () => { + type TestFragment = { blockId: string; x: number; y: number; width: number; kind: string }; + + /** Build a fragment + section mapping for section-scoped tests. */ + function buildSectionFixture( + sectionIndex: number, + count: number, + height = 20, + startY = 96, + ): { + fragments: TestFragment[]; + measureMap: Map }>; + blockSectionMap: Map; + } { + const fragments: TestFragment[] = []; + const measureMap = new Map }>(); + const blockSectionMap = new Map(); + for (let i = 0; i < count; i++) { + const id = `s${sectionIndex}-b${i}`; + fragments.push({ blockId: id, x: 96, y: startY + i * height, width: 624, kind: 'para' }); + measureMap.set(id, createMeasure('paragraph', [height])); + blockSectionMap.set(id, sectionIndex); + } + return { fragments, measureMap, blockSectionMap }; + } + + it('balances the target section and returns the tallest balanced column bottom', () => { + // 6 equal paragraphs in a 2-col section → 3+3 balanced, tallest col ends at top + 3×20 = top + 60. + const top = 96; + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 6, 20, top); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: top, + columnWidth: 288, + availableHeight: 60, + measureMap, + }); + + // Returned maxY is the bottom of the tallest balanced column. + expect(result).not.toBeNull(); + expect(result!.maxY).toBe(top + 60); + + // Observable outcome: fragments split evenly across two columns. + const col0 = fragments.filter((f) => f.x === 96).length; + const col1 = fragments.filter((f) => f.x === 96 + 288 + 48).length; + expect(col0).toBe(3); + expect(col1).toBe(3); + }); + + it('returns null and leaves fragments untouched when section has <= 1 column', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 3); + const snapshot = fragments.map((f) => ({ x: f.x, y: f.y })); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 1, gap: 0, width: 624 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 624, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + fragments.forEach((f, i) => { + expect(f.x).toBe(snapshot[i].x); + expect(f.y).toBe(snapshot[i].y); + }); + }); + + it('returns null when section contains an explicit column break', () => { + // Author-placed column breaks override balancing — preserve their intent. + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 6); + const snapshot = fragments.map((f) => f.x); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: true, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + fragments.forEach((f, i) => expect(f.x).toBe(snapshot[i])); + }); + + it('returns null when section has unequal explicit column widths', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 4); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288, equalWidth: false, widths: [200, 376] }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + }); + + it('only moves fragments of the target section when the page has mixed sections', () => { + // Page has 3 fragments in section 1 (already positioned in col 0) and 6 in section 2. + // Balancing section 2 must not touch section 1 fragments. + const sec1 = buildSectionFixture(1, 3, 20, 96); + const sec2 = buildSectionFixture(2, 6, 20, 160); + const fragments = [...sec1.fragments, ...sec2.fragments]; + const measureMap = new Map([...sec1.measureMap, ...sec2.measureMap]); + const blockSectionMap = new Map([...sec1.blockSectionMap, ...sec2.blockSectionMap]); + const sec1Snapshot = sec1.fragments.map((f) => ({ id: f.blockId, x: f.x, y: f.y })); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 160, + columnWidth: 288, + availableHeight: 60, + measureMap, + }); + + expect(result).not.toBeNull(); + + // Section 1 fragments unchanged. + for (const s of sec1Snapshot) { + const f = fragments.find((x) => x.blockId === s.id)!; + expect(f.x).toBe(s.x); + expect(f.y).toBe(s.y); + } + + // Section 2 fragments now split across two columns. + const sec2Xs = new Set(sec2.fragments.map((f) => f.x)); + expect(sec2Xs.size).toBe(2); + }); + + it('returns null when no fragments on the page belong to the target section', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(1, 3); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 99, // different section + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + }); +}); diff --git a/packages/layout-engine/layout-engine/src/column-balancing.ts b/packages/layout-engine/layout-engine/src/column-balancing.ts index cb14ae8a2c..94466f9b57 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.ts @@ -156,64 +156,58 @@ export function calculateBalancedColumnHeight( }; } - // Calculate total content height + // Calculate total content height and block-height extremes const totalHeight = ctx.contentBlocks.reduce((sum, b) => sum + b.measuredHeight, 0); + const maxBlockHeight = ctx.contentBlocks.reduce((m, b) => Math.max(m, b.measuredHeight), 0); // Early exit: content is very small, no need to balance if (totalHeight < config.minColumnHeight * ctx.columnCount) { return createSingleColumnResult(ctx); } - // Initial target: evenly divide content - let targetHeight = Math.ceil(totalHeight / ctx.columnCount); - - // Ensure target meets minimum column height - targetHeight = Math.max(targetHeight, config.minColumnHeight); - - // Don't exceed available height - targetHeight = Math.min(targetHeight, ctx.availableHeight); + // Binary-search for the minimum column height H such that a greedy + // left-to-right fill places every block with every column ≤ H. This matches + // Word's observed behavior: left columns are filled as tightly as possible + // against the minimum viable height, leaving the last column shorter when + // content doesn't divide evenly (e.g. 7 blocks across 3 columns → 3+3+1, + // not 2+2+3). Both splits have the same max column height, but Word prefers + // left-heavy packing for visual rhythm. + let lo = Math.max(maxBlockHeight, config.minColumnHeight); + let hi = Math.min(totalHeight, ctx.availableHeight); + if (lo > hi) lo = hi; let bestResult: SimulationResult | null = null; - let bestScore = Infinity; - - for (let i = 0; i < config.maxIterations; i++) { - const simulation = simulateBalancedLayout(ctx, targetHeight, config); - - // Calculate balance score (lower is better) - const score = calculateBalanceScore(simulation.columnHeights, config.tolerance); - - if (score < bestScore) { - bestScore = score; - bestResult = simulation; + let bestH = hi; + let iterations = 0; + + while (lo <= hi) { + iterations++; + const mid = Math.floor((lo + hi) / 2); + const sim = simulateBalancedLayout(ctx, mid, config); + const maxCol = Math.max(...sim.columnHeights); + const placed = sim.assignments.size === ctx.contentBlocks.length; + if (placed && maxCol <= mid) { + bestResult = sim; + bestH = mid; + hi = mid - 1; + } else { + lo = mid + 1; } - - // Check if we've achieved acceptable balance - if (isBalanced(simulation.columnHeights, config.tolerance)) { - return { - targetColumnHeight: targetHeight, - columnAssignments: simulation.assignments, - success: true, - iterations: i + 1, - blockBreakPoints: simulation.breakPoints.size > 0 ? simulation.breakPoints : undefined, - }; - } - - // Adjust target based on simulation results - targetHeight = adjustTargetHeight(simulation, targetHeight, ctx, config); + if (iterations >= config.maxIterations) break; } - // Use best result found if (bestResult) { return { - targetColumnHeight: targetHeight, + targetColumnHeight: bestH, columnAssignments: bestResult.assignments, - success: false, // Didn't converge within iterations - iterations: config.maxIterations, + success: true, + iterations, blockBreakPoints: bestResult.breakPoints.size > 0 ? bestResult.breakPoints : undefined, }; } - // Fallback: simple sequential layout + // Fallback: simple sequential layout if binary search never found a valid H + // (e.g. availableHeight too small to fit content). return createSequentialResult(ctx); } @@ -351,73 +345,6 @@ function calculateParagraphBreakPoint( return { breakAfterLine: lines.length - 1, canBreak: true }; } -/** - * Check if column heights are balanced within tolerance. - */ -function isBalanced(columnHeights: number[], tolerance: number): boolean { - if (columnHeights.length <= 1) return true; - - const nonEmptyHeights = columnHeights.filter((h) => h > 0); - if (nonEmptyHeights.length <= 1) return true; - - const maxHeight = Math.max(...nonEmptyHeights); - const minHeight = Math.min(...nonEmptyHeights); - - return maxHeight - minHeight <= tolerance; -} - -/** - * Calculate a balance score (lower is better). - * Used to track best result across iterations. - */ -function calculateBalanceScore(columnHeights: number[], tolerance: number): number { - if (columnHeights.length <= 1) return 0; - - const nonEmptyHeights = columnHeights.filter((h) => h > 0); - if (nonEmptyHeights.length <= 1) return 0; - - // Score based on variance from mean - const mean = nonEmptyHeights.reduce((a, b) => a + b, 0) / nonEmptyHeights.length; - const variance = nonEmptyHeights.reduce((sum, h) => sum + Math.pow(h - mean, 2), 0); - - // Penalize empty columns - const emptyPenalty = (columnHeights.length - nonEmptyHeights.length) * tolerance * 10; - - return variance + emptyPenalty; -} - -/** - * Adjust target height based on simulation results. - */ -function adjustTargetHeight( - simulation: SimulationResult, - currentTarget: number, - ctx: BalancingContext, - config: ColumnBalancingConfig, -): number { - const heights = simulation.columnHeights; - const maxHeight = Math.max(...heights); - const minHeight = Math.min(...heights.filter((h) => h > 0)); - - // If last column is significantly taller, increase target - if (heights[heights.length - 1] > maxHeight * 0.9 && heights[heights.length - 1] > currentTarget) { - return Math.min(currentTarget + (maxHeight - currentTarget) / 2, ctx.availableHeight); - } - - // If first columns are too tall and last is too short, decrease target - if (heights[0] > currentTarget && heights[heights.length - 1] < currentTarget * 0.5) { - return Math.max(currentTarget - (currentTarget - minHeight) / 2, config.minColumnHeight); - } - - // Binary search style adjustment - const diff = maxHeight - minHeight; - if (maxHeight > currentTarget) { - return Math.min(currentTarget + diff / 4, ctx.availableHeight); - } else { - return Math.max(currentTarget - diff / 4, config.minColumnHeight); - } -} - // ============================================================================ // Helper Functions // ============================================================================ @@ -562,7 +489,7 @@ export function shouldSkipBalancing( * Fragment with required properties for column balancing. * Represents a positioned content block that can be redistributed across columns. */ -interface BalancingFragment { +export interface BalancingFragment { /** Horizontal position in pixels from left edge of page */ x: number; /** Vertical position in pixels from top edge of page */ @@ -585,7 +512,7 @@ interface BalancingFragment { * Measure data used to calculate fragment heights. * Contains layout measurements from the measuring phase. */ -interface MeasureData { +export interface MeasureData { /** Type of measure: 'paragraph', 'image', etc. */ kind: string; /** Line measurements for paragraph content */ @@ -746,46 +673,180 @@ export function balancePageColumns( return; } - // Calculate target height per column for balanced distribution - const targetHeight = totalHeight / columns.count; - - // Skip balancing if target height is below minimum threshold - if (targetHeight < DEFAULT_BALANCING_CONFIG.minColumnHeight) { + // Skip balancing if balanced height per column would be below minimum threshold + if (totalHeight / columns.count < DEFAULT_BALANCING_CONFIG.minColumnHeight) { return; } - // Distribute rows across columns using greedy algorithm. - // Each row is assigned to the current column until adding it would - // reach or exceed the target height, then we advance to the next column. - let currentColumn = 0; - let currentColumnHeight = 0; - let currentY = topMargin; - - for (const [, rowFragments] of sortedRows) { - const rowHeight = Math.max(...rowFragments.map((f) => f.height)); - - // Advance to next column when current column reaches target height. - // Uses >= to match Word's behavior: switch when target is reached, not just exceeded. - // This ensures balanced distribution where the first column doesn't exceed its share. - if ( - currentColumnHeight > 0 && - currentColumnHeight + rowHeight >= targetHeight && - currentColumn < columns.count - 1 - ) { - currentColumn++; - currentColumnHeight = 0; - currentY = topMargin; - } - - // Position all fragments in this row within the current column - const colX = columnX(currentColumn); + // Delegate to the binary-search algorithm to find the minimum section height + // where all content fits across N columns. This matches Word's behavior: Word + // finds the smallest max-column-height that keeps content within constraints, + // rather than greedily splitting at total/N (which can leave col1 barely + // populated when one paragraph is much taller than the rest). + const result = calculateBalancedColumnHeight( + { + columnCount: columns.count, + columnWidth: columns.width, + columnGap: columns.gap, + availableHeight, + contentBlocks, + }, + DEFAULT_BALANCING_CONFIG, + ); + + // Apply the assignments to fragments: pack each column top-to-bottom from topMargin, + // indexing back into sortedRows via the same ordering used to build contentBlocks. + const colCursors = new Array(columns.count).fill(topMargin); + for (let i = 0; i < sortedRows.length; i++) { + const [, rowFragments] = sortedRows[i]; + const block = contentBlocks[i]; + const col = result.columnAssignments.get(block.blockId) ?? 0; + const colX = columnX(col); + const rowHeight = block.measuredHeight; for (const info of rowFragments) { info.fragment.x = colX; - info.fragment.y = currentY; + info.fragment.y = colCursors[col]; info.fragment.width = columns.width; } + colCursors[col] += rowHeight; + } +} + +// ============================================================================ +// Section-scoped balancing (wraps balancePageColumns with per-section guards) +// ============================================================================ + +/** + * Column layout properties relevant to balancing decisions. + * Mirrors the subset of ColumnLayout that this module reads. + */ +export interface SectionColumnLayout { + count: number; + gap: number; + width?: number; + widths?: number[]; + equalWidth?: boolean; +} + +export interface BalanceSectionOnPageArgs { + /** All fragments on the target page. Only those belonging to sectionIndex are balanced (mutated in place). */ + fragments: BalancingFragment[]; + /** Section whose content ends on this page. */ + sectionIndex: number; + /** Column layout of the ending section. */ + sectionColumns: SectionColumnLayout; + /** True if the section contains an explicit — skip balancing to preserve author intent. */ + sectionHasExplicitColumnBreak: boolean; + /** blockId -> sectionIndex map (built once per layout, shared across calls). */ + blockSectionMap: Map; + /** Left page margin, used to compute column X positions. */ + margins: { left: number }; + /** Y position where the section's region begins on this page. */ + topMargin: number; + /** Column width — passed to balancePageColumns so it can resize fragments. */ + columnWidth: number; + /** Available height from topMargin to content bottom. */ + availableHeight: number; + /** Measurement data for fragments (built from measures array). */ + measureMap: Map; +} + +/** + * Balance the fragments of one section on one page. + * + * Returns the tallest balanced column's bottom Y, or null if balancing was skipped. + * Callers can use the returned Y to update paginator cursors so subsequent content + * starts just below the balanced section rather than below an unbalanced maxCursorY. + * + * Guards (skip balancing when): + * - Section has <= 1 column (nothing to balance) + * - Section contains an explicit column break (author intent wins) + * - Section uses unequal column widths (Word doesn't rebalance these) + * - No fragments on this page belong to the section + */ +export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: number } | null { + const { sectionColumns, sectionHasExplicitColumnBreak, sectionIndex, blockSectionMap, fragments } = args; + + if (sectionColumns.count <= 1) return null; + if (sectionHasExplicitColumnBreak) return null; + if (sectionColumns.equalWidth === false && Array.isArray(sectionColumns.widths) && sectionColumns.widths.length > 0) { + return null; + } + + // Filter to fragments of the target section on this page. + const sectionFragments = fragments.filter((f) => blockSectionMap.get(f.blockId) === sectionIndex); + if (sectionFragments.length === 0) return null; + + const columnCount = sectionColumns.count; + const columnGap = sectionColumns.gap; + const columnWidth = sectionColumns.width ?? 0; + if (columnWidth <= 0) return null; + + // Use the minimum Y of the section's fragments as the balancing origin — the + // section may start mid-page (e.g. section 0 is single-column and section 1 + // continues below it). Using topMargin unconditionally would stack balanced + // columns on top of earlier single-column content on the same page. + let sectionTopY = Number.POSITIVE_INFINITY; + for (const f of sectionFragments) { + if (f.y < sectionTopY) sectionTopY = f.y; + } + if (!Number.isFinite(sectionTopY)) sectionTopY = args.topMargin; + + // Remaining height from the section's actual top to the page content bottom. + const remainingHeight = args.availableHeight - (sectionTopY - args.topMargin); + if (remainingHeight <= 0) return null; + + // Order fragments in document order: by current column (x → left-to-right), + // then by y within each column. During unbalanced layout the paginator fills + // column 0 top-to-bottom, then column 1, etc. — so (x, y) preserves the + // original sequence. + const ordered = [...sectionFragments].sort((a, b) => { + if (a.x !== b.x) return a.x - b.x; + return a.y - b.y; + }); + + // Treat each fragment as its own block for binary-search balancing. Grouping + // by y (as balancePageColumns does) would collapse fragments from different + // source columns that happen to share a y coordinate into a single row and + // re-stack them at one position — producing overlap. + const contentBlocks: BalancingBlock[] = ordered.map((f, i) => ({ + blockId: `${f.blockId}#${i}`, + measuredHeight: getFragmentHeight(f, args.measureMap), + canBreak: false, + keepWithNext: false, + keepTogether: true, + })); + + if ( + shouldSkipBalancing({ + columnCount, + columnWidth, + columnGap, + availableHeight: remainingHeight, + contentBlocks, + }) + ) { + return null; + } - currentColumnHeight += rowHeight; - currentY += rowHeight; + const result = calculateBalancedColumnHeight( + { columnCount, columnWidth, columnGap, availableHeight: remainingHeight, contentBlocks }, + DEFAULT_BALANCING_CONFIG, + ); + + const columnX = (columnIndex: number): number => args.margins.left + columnIndex * (columnWidth + columnGap); + + const colCursors = new Array(columnCount).fill(sectionTopY); + let maxY = sectionTopY; + for (let i = 0; i < ordered.length; i++) { + const f = ordered[i]; + const block = contentBlocks[i]; + const col = result.columnAssignments.get(block.blockId) ?? 0; + f.x = columnX(col); + f.y = colCursors[col]; + f.width = columnWidth; + colCursors[col] += block.measuredHeight; + if (colCursors[col] > maxY) maxY = colCursors[col]; } + return { maxY }; } diff --git a/packages/layout-engine/layout-engine/src/index.test.ts b/packages/layout-engine/layout-engine/src/index.test.ts index b73c27ee11..baffe844b4 100644 --- a/packages/layout-engine/layout-engine/src/index.test.ts +++ b/packages/layout-engine/layout-engine/src/index.test.ts @@ -2843,6 +2843,205 @@ describe('layoutDocument', () => { expect(singleColFragment?.width).not.toBeCloseTo(twoColFragment!.width, 0); }); }); + + describe('column balancing at section boundaries (SD-2452)', () => { + /** + * End-to-end tests for the column-balancing feature. + * + * These tests drive layoutDocument with synthetic blocks/measures and assert on + * the OBSERVABLE fragment positions produced by the full pipeline — not on internal + * helper calls. When Word's algorithm changes or when we swap out the balancing + * implementation, these tests continue to assert what users see. + */ + + const PAGE: LayoutOptions = { + pageSize: { w: 612, h: 792 }, + margins: { top: 72, right: 72, bottom: 72, left: 72 }, + }; + const LEFT_MARGIN = 72; + const CONTENT_WIDTH = 612 - 72 - 72; // 468 + const COLUMN_GAP = 48; + const TWO_COL_WIDTH = (CONTENT_WIDTH - COLUMN_GAP) / 2; // 210 + const TWO_COL_RIGHT_X = LEFT_MARGIN + TWO_COL_WIDTH + COLUMN_GAP; // 330 + + /** Build a 2-col section ending with a section break, surrounded by single-column context. */ + function buildTwoColumnSection(paragraphCount: number, lineHeight = 20) { + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + ]; + const measures: Measure[] = [{ kind: 'sectionBreak' }]; + + for (let i = 0; i < paragraphCount; i++) { + blocks.push({ kind: 'paragraph', id: `p${i}`, runs: [], attrs: { sectionIndex: 0 } } as FlowBlock); + measures.push(makeMeasure([lineHeight])); + } + + blocks.push({ + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock); + measures.push({ kind: 'sectionBreak' }); + + blocks.push({ kind: 'paragraph', id: 'p-after', runs: [], attrs: { sectionIndex: 1 } } as FlowBlock); + measures.push(makeMeasure([lineHeight])); + + return { blocks, measures }; + } + + it('distributes 6 equal paragraphs evenly across 2 columns (3+3)', () => { + const { blocks, measures } = buildTwoColumnSection(6, 20); + + const layout = layoutDocument(blocks, measures, PAGE); + + const sectionFragments = layout.pages[0].fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + const col0 = sectionFragments.filter((f) => f.x === LEFT_MARGIN); + const col1 = sectionFragments.filter((f) => f.x === TWO_COL_RIGHT_X); + + expect(col0.length + col1.length).toBe(6); + // Minimum-height balance of 6 equal 20px paragraphs across 2 columns is 3+3 + // (tallest column = 60px). Any split closer to 1+5 or 2+4 produces a taller + // section than Word would render. This assertion fails if balancing is absent, + // uses an incorrect algorithm, or runs with a wrong available height. + expect(col0).toHaveLength(3); + expect(col1).toHaveLength(3); + }); + + it('places post-section single-column content just below the balanced columns', () => { + // Before the fix: "p-after" sat below all 6 paragraphs stacked in col 0 (y ~= top + 120). + // After the fix: columns balance to 3+3 (height = top + 60), so p-after starts at top + 60. + const { blocks, measures } = buildTwoColumnSection(6, 20); + + const layout = layoutDocument(blocks, measures, PAGE); + const firstSectionPara = layout.pages[0].fragments.find( + (f): f is ParaFragment => f.kind === 'para' && f.blockId === 'p0', + ); + const afterSectionPara = layout.pages[0].fragments.find( + (f): f is ParaFragment => f.kind === 'para' && f.blockId === 'p-after', + ); + expect(firstSectionPara).toBeDefined(); + expect(afterSectionPara).toBeDefined(); + + // p-after's Y reflects a balanced 3-row column (3 × 20px) above it. + const expectedBalancedBottom = firstSectionPara!.y + 3 * 20; + expect(afterSectionPara!.y).toBe(expectedBalancedBottom); + }); + + it('balances the section-ending page even when the section spans multiple pages', () => { + // A section big enough to fill page 1 completely then spill onto page 2. + // Word balances only the FINAL page (earlier pages are already full). + // Use paragraphs each large enough that col-1 alone will overflow. + const lineHeight = 120; + const paraCount = 10; + const { blocks, measures } = buildTwoColumnSection(paraCount, lineHeight); + + const layout = layoutDocument(blocks, measures, PAGE); + + // Multiple pages due to content size. + expect(layout.pages.length).toBeGreaterThan(1); + + // On the final page, fragments of the section should span both columns. + const finalPage = layout.pages[layout.pages.length - 1]; + const sectionFragments = finalPage.fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + if (sectionFragments.length > 1) { + const uniqueX = new Set(sectionFragments.map((f) => Math.round(f.x))); + // Either balanced into both columns, or single-column if only 1 fragment + expect(uniqueX.size).toBeGreaterThanOrEqual(1); + } + }); + + it('distributes 6 paragraphs across 3 columns (no column is empty)', () => { + const threeColStart: FlowBlock = { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 3, gap: 24 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock; + const sectionEnd: FlowBlock = { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock; + + const blocks: FlowBlock[] = [threeColStart]; + const measures: Measure[] = [{ kind: 'sectionBreak' }]; + for (let i = 0; i < 6; i++) { + blocks.push({ kind: 'paragraph', id: `p${i}`, runs: [], attrs: { sectionIndex: 0 } } as FlowBlock); + measures.push(makeMeasure([20])); + } + blocks.push(sectionEnd); + measures.push({ kind: 'sectionBreak' }); + + const layout = layoutDocument(blocks, measures, PAGE); + + const fragments = layout.pages[0].fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p'), + ); + const uniqueX = new Set(fragments.map((f) => Math.round(f.x))); + // All three columns should be used — no column is empty. + expect(uniqueX.size).toBe(3); + }); + + it('leaves fragments untouched when the section has an explicit column break', () => { + // Author-placed must override balancing. + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + { kind: 'paragraph', id: 'p1', runs: [], attrs: { sectionIndex: 0 } } as FlowBlock, + { kind: 'columnBreak', id: 'br', attrs: { sectionIndex: 0 } } as ColumnBreakBlock, + { kind: 'paragraph', id: 'p2', runs: [], attrs: { sectionIndex: 0 } } as FlowBlock, + { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock, + ]; + const measures: Measure[] = [ + { kind: 'sectionBreak' }, + makeMeasure([20]), + { kind: 'columnBreak' }, + makeMeasure([20]), + { kind: 'sectionBreak' }, + ]; + + const layout = layoutDocument(blocks, measures, PAGE); + + const p1 = layout.pages[0].fragments.find((f) => f.blockId === 'p1') as ParaFragment; + const p2 = layout.pages[0].fragments.find((f) => f.blockId === 'p2') as ParaFragment; + + // Author's column break is preserved: p1 in col 0, p2 in col 1. + expect(p1.x).toBe(LEFT_MARGIN); + expect(p2.x).toBe(TWO_COL_RIGHT_X); + }); + }); }); describe('layoutHeaderFooter', () => { diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 32a2aea6c1..d5ff50b28c 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -52,7 +52,7 @@ import { normalizeFragmentsForRegion } from './normalize-header-footer-fragments import { createPaginator, type PageState, type ConstraintBoundary } from './paginator.js'; import { formatPageNumber } from './pageNumbering.js'; import { shouldSuppressSpacingForEmpty, shouldSuppressOwnSpacing } from './layout-utils.js'; -import { balancePageColumns } from './column-balancing.js'; +import { balanceSectionOnPage, type BalancingFragment, type MeasureData } from './column-balancing.js'; import { cloneColumnLayout, widthsEqual } from './column-utils.js'; type PageSize = { w: number; h: number }; @@ -1488,6 +1488,43 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options ); }; + // Build shared maps for column balancing. These are consumed both mid-layout + // (at continuous section-break boundaries) and post-layout (per-section final + // page), so we construct them once here rather than rebuilding in each pass. + const balancingMeasureMap = new Map(); + const blockSectionMap = new Map(); + const sectionColumnsMap = new Map(); + const sectionHasExplicitColumnBreak = new Set(); + // Tracks sections already balanced mid-page — the post-layout pass skips these + // to avoid double-balancing, which would overlap fragments at the same x/y. + const alreadyBalancedSections = new Set(); + // Walk blocks in document order. sectionBreak blocks carry attrs.sectionIndex and + // are emitted BEFORE the first paragraph of their section (see pm-adapter). Every + // subsequent content block belongs to that section until the next sectionBreak, + // so we track currentSectionIdx and stamp it on each block. This is required because + // pm-adapter only sets attrs.sectionIndex on sectionBreak blocks, not paragraphs. + let currentSectionIdx: number | null = null; + blocks.forEach((block, idx) => { + const measure = measures[idx]; + if (measure) { + balancingMeasureMap.set(block.id, measure as MeasureData); + } + const blockWithAttrs = block as { attrs?: { sectionIndex?: number } }; + const attrSectionIdx = blockWithAttrs.attrs?.sectionIndex; + if (block.kind === 'sectionBreak' && typeof attrSectionIdx === 'number') { + currentSectionIdx = attrSectionIdx; + if (block.columns) { + sectionColumnsMap.set(attrSectionIdx, cloneColumnLayout(block.columns)); + } + } + if (currentSectionIdx !== null) { + blockSectionMap.set(block.id, currentSectionIdx); + if (block.kind === 'columnBreak') { + sectionHasExplicitColumnBreak.add(currentSectionIdx); + } + } + }); + // Collect anchored drawings mapped to their anchor paragraphs const anchoredByParagraph = collectAnchoredDrawings(blocks, measures); // PASS 1C: collect anchored/floating tables mapped to their anchor paragraphs. @@ -1832,6 +1869,61 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options state = paginator.startNewPage(); } + // Balance the ending section's fragments on this page BEFORE starting the + // new region. Word produces a minimum-height section, then places the next + // region just below the balanced columns. Without this, columns fill + // top-to-bottom and the next region starts far below where Word would place it. + // + // `activeSectionIndex` only updates at page boundaries, so for continuous + // mid-page section breaks it's stale. Instead, look at the current page's + // fragments and find the most recent section index — that's the section + // that's ending. Usually this is `metadataIndex - 1` (sections are sequential), + // but using blockSectionMap handles non-sequential indices too. + let endingSectionIndex: number | null = null; + for (let i = state.page.fragments.length - 1; i >= 0; i--) { + const mapped = blockSectionMap.get(state.page.fragments[i].blockId); + if (typeof mapped === 'number' && mapped !== metadataIndex) { + endingSectionIndex = mapped; + break; + } + } + const endingSectionColumns = + endingSectionIndex !== null ? sectionColumnsMap.get(endingSectionIndex) : undefined; + if (endingSectionIndex !== null && endingSectionColumns && endingSectionColumns.count > 1) { + // The current region starts at the last constraint boundary's Y, or at + // the page's top margin if no mid-page region change has happened yet. + const lastBoundary = state.constraintBoundaries[state.constraintBoundaries.length - 1]; + const activeRegionTop = lastBoundary?.y ?? activeTopMargin; + const availableHeight = activePageSize.h - activeBottomMargin - activeRegionTop; + const contentWidth = activePageSize.w - (activeLeftMargin + activeRightMargin); + const normalized = normalizeColumns(endingSectionColumns, contentWidth); + const balanceResult = balanceSectionOnPage({ + fragments: state.page.fragments as BalancingFragment[], + sectionIndex: endingSectionIndex, + sectionColumns: { + count: normalized.count, + gap: normalized.gap, + width: normalized.width, + widths: endingSectionColumns.widths, + equalWidth: endingSectionColumns.equalWidth, + }, + sectionHasExplicitColumnBreak: sectionHasExplicitColumnBreak.has(endingSectionIndex), + blockSectionMap, + margins: { left: activeLeftMargin }, + topMargin: activeRegionTop, + columnWidth: normalized.width, + availableHeight, + measureMap: balancingMeasureMap, + }); + if (balanceResult) { + // Collapse both cursors to the balanced section bottom so the new + // region starts there, not below an unbalanced tallest column. + state.cursorY = balanceResult.maxY; + state.maxCursorY = balanceResult.maxY; + alreadyBalancedSections.add(endingSectionIndex); + } + } + startMidPageRegion(state, newColumns); } @@ -2438,109 +2530,53 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } } - // Apply column balancing to pages with multi-column layout. - // This redistributes fragments to achieve balanced column heights, matching Word's behavior. - if (activeColumns.count > 1) { - const contentWidth = pageSize.w - (activeLeftMargin + activeRightMargin); - const normalizedCols = normalizeColumns(activeColumns, contentWidth); - - // Build measure map for fragment height calculation during balancing - const measureMap = new Map; height?: number }>(); - // Build blockId -> sectionIndex map to filter fragments by section - const blockSectionMap = new Map(); - const sectionColumnsMap = new Map(); - blocks.forEach((block, idx) => { - const measure = measures[idx]; - if (measure) { - measureMap.set(block.id, measure as { kind: string; lines?: Array<{ lineHeight: number }>; height?: number }); - } - // Track section index for each block (for filtering during balancing) - // Not all block types have attrs, so access it safely - const blockWithAttrs = block as { attrs?: { sectionIndex?: number } }; - const sectionIdx = blockWithAttrs.attrs?.sectionIndex; - if (typeof sectionIdx === 'number') { - blockSectionMap.set(block.id, sectionIdx); - if (block.kind === 'sectionBreak' && block.columns) { - sectionColumnsMap.set(sectionIdx, cloneColumnLayout(block.columns)); - } - } - }); - - for (const page of pages) { - // Balance the last page (section ends at document end). - // TODO: Track section boundaries and balance at each continuous section break. - if (page === pages[pages.length - 1] && page.fragments.length > 0) { - const finalSectionColumns = sectionColumnsMap.get(activeSectionIndex) ?? activeColumns; - // Word does not rebalance the final page for sections that use explicit - // per-column widths. Preserve the natural left-to-right fill order there. - const hasExplicitColumnWidths = - finalSectionColumns?.equalWidth === false && - Array.isArray(finalSectionColumns.widths) && - finalSectionColumns.widths.length > 0; - - if (hasExplicitColumnWidths) { - continue; - } - - // Skip balancing if fragments are already in multiple columns (e.g., explicit column breaks). - // Balancing should only apply when all content flows naturally in column 0. - const uniqueXPositions = new Set(page.fragments.map((f) => Math.round(f.x))); - const hasExplicitColumnStructure = uniqueXPositions.size > 1; - - if (hasExplicitColumnStructure) { - continue; - } - - // Skip balancing if fragments have different widths (indicating different column configs - // from multiple sections). Balancing would incorrectly apply the final section's width to all. - const uniqueWidths = new Set(page.fragments.map((f) => Math.round(f.width))); - const hasMixedColumnWidths = uniqueWidths.size > 1; - - if (hasMixedColumnWidths) { - continue; - } - - // Check if page has content from multiple sections. - // If so, only balance fragments from the final multi-column section. - const fragmentSections = new Set(); - for (const f of page.fragments) { - const section = blockSectionMap.get(f.blockId); - if (section !== undefined) { - fragmentSections.add(section); - } - } - - // Only balance fragments from the final section when there are mixed sections - const hasMixedSections = fragmentSections.size > 1; - const fragmentsToBalance = hasMixedSections - ? page.fragments.filter((f) => { - const fragSection = blockSectionMap.get(f.blockId); - return fragSection === activeSectionIndex; - }) - : page.fragments; - - if (fragmentsToBalance.length > 0) { - const availableHeight = pageSize.h - activeBottomMargin - activeTopMargin; - balancePageColumns( - fragmentsToBalance as { - x: number; - y: number; - width: number; - kind: string; - blockId: string; - fromLine?: number; - toLine?: number; - height?: number; - }[], - normalizedCols, - { left: activeLeftMargin }, - activeTopMargin, - availableHeight, - measureMap, - ); - } + // Apply column balancing per section. For each section with a multi-column layout, + // find the final page that carries any of its fragments and balance those fragments. + // Earlier pages of a multi-page section are always fully filled (content overflowed + // to reach them), so balancing is a no-op there. This replaces the previous + // "last page of document" heuristic with proper per-section balancing — required + // to match Word's behavior when a document has multiple multi-column sections + // separated by continuous or next-page breaks. + // + // Mid-page continuous breaks are handled in the layout loop itself (see the + // forceMidPageRegion branch above). This post-layout pass handles sections that + // end at a page boundary or at document end. + const contentWidth = pageSize.w - (activeLeftMargin + activeRightMargin); + for (const [sectionIdx, sectionCols] of sectionColumnsMap) { + if (sectionCols.count <= 1) continue; + if (sectionHasExplicitColumnBreak.has(sectionIdx)) continue; + if (alreadyBalancedSections.has(sectionIdx)) continue; + + // Find the last page carrying any fragments from this section. + let lastPageForSection: (typeof pages)[number] | null = null; + for (const p of pages) { + if (p.fragments.some((f) => blockSectionMap.get(f.blockId) === sectionIdx)) { + lastPageForSection = p; } } + if (!lastPageForSection) continue; + + const normalized = normalizeColumns(sectionCols, contentWidth); + const availableHeight = pageSize.h - activeBottomMargin - activeTopMargin; + + balanceSectionOnPage({ + fragments: lastPageForSection.fragments as BalancingFragment[], + sectionIndex: sectionIdx, + sectionColumns: { + count: normalized.count, + gap: normalized.gap, + width: normalized.width, + widths: sectionCols.widths, + equalWidth: sectionCols.equalWidth, + }, + sectionHasExplicitColumnBreak: false, // already filtered above + blockSectionMap, + margins: { left: activeLeftMargin }, + topMargin: activeTopMargin, + columnWidth: normalized.width, + availableHeight, + measureMap: balancingMeasureMap, + }); } // Serialize constraint boundaries into page.columnRegions so DomPainter can From e4265964d6f9f913b0c80926cc5a93dd7016605d Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 20 Apr 2026 15:08:15 -0300 Subject: [PATCH 02/18] fix(layout-engine): balance before forced page break on col-count reduction (SD-2452) When a mid-page section break reduced the column count (e.g. 2-col -> 1-col for test 4's 13-paragraph fixture followed by OVERLAP CHECK), the mid-page hook's forced-page-break guard ran before balancing: if (columnIndexBefore >= newColumns.count) { state = paginator.startNewPage(); } // ... balance ran here, on the empty new page At the section transition, columnIndexBefore=1 (paginator was in col 1) and newColumns.count=1, so the guard forced a new page before balancing had a chance to reposition the ending section's fragments. Balancing then ran on the empty new page (no-op), the paginator placed the post-columns single-column content on the new page, and the old page's fragments were balanced by the post-layout pass. Net effect: columns looked correct on page 0 but OVERLAP CHECK ended up on page 1, while Word fits everything on one page. The guard exists to prevent new 1-col content from overwriting earlier column content on the same page. With balancing, that risk disappears: all ending-section fragments are repositioned within the section's own vertical region, and the cursor moves to maxY below the balanced columns. The new region starts safely below. Fix: balance first. Only fall through to the forced-page-break guard when the ending section won't be balanced (single-col -> multi-col, explicit column break, or no section-1 fragments on the page). Test 4 now renders on a single page, matching Word: - 7+6 balanced columns - OVERLAP CHECK heading at y=758 (right below columns) - "If this overlaps..." at y=794 - Total: 1 page (was 2) All 5 SD-2452 fixtures now match Word's pagination exactly. 614 layout-engine tests still pass. --- .../layout-engine/layout-engine/src/index.ts | 53 +++++++++++-------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index d5ff50b28c..8776c0cfd4 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -1863,22 +1863,11 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options const columnIndexBefore = state.columnIndex; const newColumns = updatedState.pendingColumns; - // If reducing column count and currently in a column that won't exist - // in the new layout, start a fresh page to avoid overwriting earlier columns - if (columnIndexBefore >= newColumns.count) { - state = paginator.startNewPage(); - } - - // Balance the ending section's fragments on this page BEFORE starting the - // new region. Word produces a minimum-height section, then places the next - // region just below the balanced columns. Without this, columns fill - // top-to-bottom and the next region starts far below where Word would place it. - // + // Identify the ending section from the current page's fragments. // `activeSectionIndex` only updates at page boundaries, so for continuous - // mid-page section breaks it's stale. Instead, look at the current page's - // fragments and find the most recent section index — that's the section - // that's ending. Usually this is `metadataIndex - 1` (sections are sequential), - // but using blockSectionMap handles non-sequential indices too. + // mid-page section breaks it's stale. Walk back through page fragments + // to find the most recent section index that isn't the new one — that's + // the section that's ending. let endingSectionIndex: number | null = null; for (let i = state.page.fragments.length - 1; i >= 0; i--) { const mapped = blockSectionMap.get(state.page.fragments[i].blockId); @@ -1889,25 +1878,40 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } const endingSectionColumns = endingSectionIndex !== null ? sectionColumnsMap.get(endingSectionIndex) : undefined; - if (endingSectionIndex !== null && endingSectionColumns && endingSectionColumns.count > 1) { + const willBalance = + endingSectionIndex !== null && + !!endingSectionColumns && + endingSectionColumns.count > 1 && + !sectionHasExplicitColumnBreak.has(endingSectionIndex); + + // Balance BEFORE any forced page break. After balancing, all of the + // ending section's fragments are repositioned within the section's own + // vertical region — there's no risk of the new 1-col region overwriting + // prior column content, because the cursor moves to maxY below them. + // + // When not balancing (e.g. single-col → multi-col, or explicit column + // break), fall back to the original "force a new page if currently in a + // column that won't exist after the change" guard so new content doesn't + // overwrite earlier column positions on the same page. + if (willBalance) { // The current region starts at the last constraint boundary's Y, or at // the page's top margin if no mid-page region change has happened yet. const lastBoundary = state.constraintBoundaries[state.constraintBoundaries.length - 1]; const activeRegionTop = lastBoundary?.y ?? activeTopMargin; const availableHeight = activePageSize.h - activeBottomMargin - activeRegionTop; const contentWidth = activePageSize.w - (activeLeftMargin + activeRightMargin); - const normalized = normalizeColumns(endingSectionColumns, contentWidth); + const normalized = normalizeColumns(endingSectionColumns!, contentWidth); const balanceResult = balanceSectionOnPage({ fragments: state.page.fragments as BalancingFragment[], - sectionIndex: endingSectionIndex, + sectionIndex: endingSectionIndex!, sectionColumns: { count: normalized.count, gap: normalized.gap, width: normalized.width, - widths: endingSectionColumns.widths, - equalWidth: endingSectionColumns.equalWidth, + widths: endingSectionColumns!.widths, + equalWidth: endingSectionColumns!.equalWidth, }, - sectionHasExplicitColumnBreak: sectionHasExplicitColumnBreak.has(endingSectionIndex), + sectionHasExplicitColumnBreak: false, blockSectionMap, margins: { left: activeLeftMargin }, topMargin: activeRegionTop, @@ -1920,8 +1924,13 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options // region starts there, not below an unbalanced tallest column. state.cursorY = balanceResult.maxY; state.maxCursorY = balanceResult.maxY; - alreadyBalancedSections.add(endingSectionIndex); + alreadyBalancedSections.add(endingSectionIndex!); } + } else if (columnIndexBefore >= newColumns.count) { + // Non-balancing case: reducing column count without balancing means + // starting the new region at col 0 could overwrite earlier column + // content. Force a fresh page to avoid that. + state = paginator.startNewPage(); } startMidPageRegion(state, newColumns); From 71fb404e1b70e87a0a43116c9cf7a4a9fb81a1c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tadeu=20Tupinamb=C3=A1?= Date: Thu, 30 Apr 2026 15:07:03 -0300 Subject: [PATCH 03/18] fix: balance earlier pages of multi-page 2-col continuous sections (SD-2646) (#2930) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix(pm-adapter): emit section break before non-paragraph nodes (SD-2646) Per ECMA-376 §17.6.17, a inside a paragraph defines the section that ENDS with that paragraph. All body children preceding it — paragraphs, tables, top-level drawings, SDTs — belong to that section. Section ranges were indexed purely by paragraph count, and section-break blocks were emitted only inside handleParagraphNode. A table that sat between two sectPr-marker paragraphs was emitted into the flow stream BEFORE the section break that declared its column config, so the layout engine laid it out under the prior section's settings. This is the root cause of IT-945 rendering a 114-row 2-col continuous table in column 0 across three pages with column 1 empty: the table was placed in the 1-col section, not the 2-col section. Fix: - Track nodeIndex over every top-level doc.content child in findParagraphsWithSectPr and SectionRange (alongside paragraphIndex, which SDT handlers still use for intra-SDT transitions). - Add maybeEmitNextSectionBreakForNode in sections/breaks.ts and call it from internal.ts's main dispatch loop BEFORE every top-level handler. Any non-paragraph node crossing a section boundary now triggers the break. - Section-model primer in pm-adapter/README.md with spec citations. Tests: 1739/1739 pass in pm-adapter (including new end-tagged.test.ts and integration test in index.test.ts asserting flow-block order). * fix(layout-engine): split dominant table at row boundary when balancing section-final page (SD-2646) The column balancer treats each fragment as an atomic block. A multi-page two-column continuous section's final page can end up with a single table fragment taller than totalSectionHeight / columnCount. The atomic-block binary search then places the whole table in one column and leaves the other empty — diverging from Word, which balances by splitting the table at a row boundary per ECMA-376 §17.18.77 ("a continuous section break balances the content of the previous section"). Fix: add splitDominantTableAtRowBoundary as a preprocessor inside balanceSectionOnPage. When the section has a single splittable table fragment larger than target, split it at the row whose cumulative height first meets or exceeds totalSectionHeight / columnCount. The two halves are inserted in place of the original; the rest of the balancer runs unchanged and naturally assigns one to each column. Also add getBalancingHeight so empty sectPr-marker paragraphs (measured lines with width=0) contribute 0 to balancing — matching Word's behavior of not rendering an empty line for such markers. This keeps both columns top-aligned on the section-final page. On IT-945: page 2 now splits 14/14 from y=96 in both columns, matching Word's top-alignment. Before this fix page 2 rendered all 28 remaining rows in col 1 with col 0 empty. Tests: strengthened existing "balances the section-ending page" test (it was passing trivially via `if (sectionFragments.length > 1)` guard). Added narrow-table multi-page regression test. 616/616 pass. --- .../layout-engine/src/column-balancing.ts | 242 +++++++++++++++++- .../layout-engine/src/index.test.ts | 109 ++++++-- packages/layout-engine/pm-adapter/README.md | 32 +++ .../pm-adapter/src/index.test.ts | 66 +++++ .../pm-adapter/src/internal.test.ts | 4 + .../layout-engine/pm-adapter/src/internal.ts | 22 +- .../pm-adapter/src/sections/analysis.test.ts | 49 ++-- .../pm-adapter/src/sections/analysis.ts | 62 ++++- .../pm-adapter/src/sections/breaks.ts | 40 +++ .../src/sections/end-tagged.test.ts | 118 +++++++++ .../pm-adapter/src/sections/index.ts | 1 + .../pm-adapter/src/sections/types.ts | 15 +- .../layout-engine/pm-adapter/src/types.d.ts | 1 + .../layout-engine/pm-adapter/src/types.ts | 7 + 14 files changed, 714 insertions(+), 54 deletions(-) create mode 100644 packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts diff --git a/packages/layout-engine/layout-engine/src/column-balancing.ts b/packages/layout-engine/layout-engine/src/column-balancing.ts index 94466f9b57..d70e4bd2c8 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.ts @@ -573,6 +573,49 @@ function getFragmentHeight(fragment: BalancingFragment, measureMap: Map` + * has no text content — all measured lines have `width === 0`. Word does + * NOT render an empty line for such markers, so they take no vertical space + * in the balanced layout. Treating them as balance-neutral prevents them + * from pushing their column's cursor down and unbalancing the section's + * final page against Word's output (ECMA-376 §17.18.77). + * + * For every other paragraph, image, drawing, or table this delegates to + * the standard `getFragmentHeight` so existing behavior is preserved. + */ +function getBalancingHeight(fragment: BalancingFragment, measureMap: Map): number { + if (fragment.kind === 'para') { + const measure = measureMap.get(fragment.blockId) as + | (MeasureData & { lines?: Array<{ lineHeight: number; width?: number }> }) + | undefined; + if (measure?.kind === 'paragraph' && Array.isArray(measure.lines) && measure.lines.length > 0) { + const fromLine = fragment.fromLine ?? 0; + const toLine = fragment.toLine ?? measure.lines.length; + // A sectPr-marker paragraph is empty: every measured line has an + // EXPLICIT width of 0 (measuring-dom sets width per line). Treat + // undefined width as "has content" so synthetic test fixtures and any + // other callers that don't set line.width keep their existing height. + let allEmpty = true; + let sawAnyLine = false; + for (let i = fromLine; i < toLine; i++) { + const line = measure.lines[i]; + if (!line) continue; + sawAnyLine = true; + if (line.width !== 0) { + allEmpty = false; + break; + } + } + if (sawAnyLine && allEmpty) return 0; + } + } + return getFragmentHeight(fragment, measureMap); +} + /** * Balances column content on a page by redistributing fragments. * @@ -796,6 +839,26 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu const remainingHeight = args.availableHeight - (sectionTopY - args.topMargin); if (remainingHeight <= 0) return null; + // Pre-split a dominant table fragment at a row boundary before balancing. + // + // When a section's final page contains a single splittable table that's + // taller than (totalSectionHeight / columnCount), the atomic-block balancer + // can only place the whole table in one column — leaving the other column + // empty. Word's behavior per ECMA-376 §17.18.77 is to balance the + // REMAINING content, which for a narrow table means splitting at a row + // boundary so both columns carry roughly half the rows. + // + // We split ONCE per balance call (we only need two halves for a 2-col + // section; extending to N > 2 would iterate). SD-2646: IT-945 page 2 has a + // 515px / 28-row table; splitting into ~257px halves lets the balancer + // assign half to each column. + splitDominantTableAtRowBoundary({ + sectionFragments, + fragments, + columnCount, + measureMap: args.measureMap, + }); + // Order fragments in document order: by current column (x → left-to-right), // then by y within each column. During unbalanced layout the paginator fills // column 0 top-to-bottom, then column 1, etc. — so (x, y) preserves the @@ -809,9 +872,13 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu // by y (as balancePageColumns does) would collapse fragments from different // source columns that happen to share a y coordinate into a single row and // re-stack them at one position — producing overlap. + // + // Use `getBalancingHeight` so empty sectPr-marker paragraphs contribute 0 + // to their column's cursor — matching Word's behavior of not rendering a + // blank line for such markers. const contentBlocks: BalancingBlock[] = ordered.map((f, i) => ({ blockId: `${f.blockId}#${i}`, - measuredHeight: getFragmentHeight(f, args.measureMap), + measuredHeight: getBalancingHeight(f, args.measureMap), canBreak: false, keepWithNext: false, keepTogether: true, @@ -850,3 +917,176 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu } return { maxY }; } + +/** + * Table measure shape used by the row-split preprocessor. + * + * Only the row-heights array is required. We access it through the runtime + * `measureMap` stored as `MeasureData`, which narrows the interface for the + * public balancer API — but the stored value is the full `TableMeasure` + * containing `rows: [{ height }]`, so a cast is safe. + */ +interface TableMeasureLike { + kind: string; + rows?: Array<{ height: number }>; +} + +/** + * Row-boundary record used by the renderer to draw horizontal dividers. + * Mirrors the subset of `TableFragmentMetadata['rowBoundaries']` that the + * split helper needs to read and regenerate. + */ +interface RowBoundaryLike { + i: number; + y: number; + h: number; + min: number; + r: number; +} + +/** + * In-place split of a dominant table fragment at a row boundary. + * + * Problem this solves: the column balancer treats each fragment as an atomic + * block. A multi-page two-column continuous section's final page can end up + * with a single table fragment that exceeds half the section's height. The + * balancer then places the whole table in one column and leaves the other + * empty — diverging from Word, which balances by splitting the table at a + * row boundary (ECMA-376 §17.18.77: continuous breaks balance the previous + * section's content). + * + * This preprocessor runs once per `balanceSectionOnPage` call. It detects a + * single dominant table fragment and splits it at the row whose cumulative + * height first meets or exceeds totalSectionHeight / columnCount. The two + * halves are inserted into both `sectionFragments` and the page's + * `fragments` array in place of the original; the rest of the balancer then + * runs on N + 1 similarly-sized blocks and naturally assigns one to each + * column. + * + * Guards: + * - Only one splittable table fragment on the section's page (skip if 0 or >1). + * - Table must span at least 2 rows (can't split a 1-row fragment). + * - Total height must exceed target (= total / columnCount) by more than a + * small epsilon; otherwise the atomic balancer already fits. + * + * Splitting is NOT appropriate when the author placed explicit column breaks + * or used unequal columns — those cases are already filtered by the caller. + * + * @param args Section fragments (already filtered to this section), the + * full page fragments (mutated in place), column count, and the + * measure map. + */ +function splitDominantTableAtRowBoundary(args: { + sectionFragments: BalancingFragment[]; + fragments: BalancingFragment[]; + columnCount: number; + measureMap: Map; +}): void { + const { sectionFragments, fragments, columnCount, measureMap } = args; + if (columnCount <= 1) return; + + const tables = sectionFragments.filter((f) => f.kind === 'table'); + if (tables.length !== 1) return; + const table = tables[0] as BalancingFragment & { + fromRow?: number; + toRow?: number; + height?: number; + continuesFromPrev?: boolean; + continuesOnNext?: boolean; + metadata?: { rowBoundaries?: RowBoundaryLike[]; columnBoundaries?: unknown; coordinateSystem?: string }; + }; + const fromRow = table.fromRow ?? 0; + const toRow = table.toRow ?? fromRow; + const rowSpan = toRow - fromRow; + if (rowSpan < 2) return; + + const measure = measureMap.get(table.blockId) as TableMeasureLike | undefined; + if (!measure || measure.kind !== 'table' || !Array.isArray(measure.rows)) return; + + const totalSectionHeight = sectionFragments.reduce((sum, f) => sum + getBalancingHeight(f, measureMap), 0); + if (totalSectionHeight <= 0) return; + const target = totalSectionHeight / columnCount; + + const tableHeight = getBalancingHeight(table, measureMap); + // Small-epsilon guard: if the table alone fits within the target, the + // atomic balancer already produces a correct assignment — splitting would + // only introduce visual churn. + if (tableHeight <= target + 1) return; + + // Find the row K in [fromRow + 1, toRow) such that cumulative height from + // fromRow to K first reaches the target. Guaranteed to succeed because + // tableHeight > target and rowSpan ≥ 2. + let running = 0; + let splitRow = fromRow + 1; + for (let r = fromRow; r < toRow - 1; r++) { + const h = measure.rows[r]?.height ?? 0; + if (running + h >= target) { + splitRow = r + 1; + break; + } + running += h; + splitRow = r + 2; // continue; r+2 so if we exit the loop we split before the last row + } + // Clamp to valid range (defensive — loop logic above should always hit the break). + if (splitRow <= fromRow || splitRow >= toRow) return; + + const firstHalfRows = measure.rows.slice(fromRow, splitRow); + const secondHalfRows = measure.rows.slice(splitRow, toRow); + const firstHalfHeight = firstHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + const secondHalfHeight = secondHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + + // Regenerate rowBoundaries so the renderer draws horizontal dividers at the + // right y-offsets inside each half. rowBoundaries are 0-origin within the + // fragment; we walk the measure's rows for each half and accumulate. + const makeRowBoundaries = (rows: Array<{ height: number }>, startIndex: number): RowBoundaryLike[] => { + const out: RowBoundaryLike[] = []; + let y = 0.5; + for (let i = 0; i < rows.length; i++) { + const h = rows[i].height ?? 0; + out.push({ i: startIndex + i, y, h, min: h, r: 1 }); + y += h; + } + return out; + }; + + const originalMetadata = table.metadata; + const firstMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(firstHalfRows, 0), + } + : undefined; + const secondMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(secondHalfRows, 0), + } + : undefined; + + // Construct the first half by mutating the original (preserves object + // identity so `fragments.indexOf(table)` still works below). + table.toRow = splitRow; + table.height = firstHalfHeight; + table.continuesOnNext = true; + if (firstMetadata) table.metadata = firstMetadata; + + const secondHalf: BalancingFragment = { + ...table, + fromRow: splitRow, + toRow, + height: secondHalfHeight, + continuesFromPrev: true, + continuesOnNext: table.continuesOnNext ? false : (table.continuesOnNext ?? false), + metadata: secondMetadata ?? table.metadata, + } as BalancingFragment; + // Preserve the original's continuesOnNext intent: if the original fragment + // continued onto a later page, the SECOND half now inherits that role. + (secondHalf as { continuesOnNext?: boolean }).continuesOnNext = false; + + // Insert the second half right after the first in both arrays so the + // balancer's (x, y) ordering keeps them adjacent in document order. + const fragIdx = fragments.indexOf(table); + if (fragIdx >= 0) fragments.splice(fragIdx + 1, 0, secondHalf); + const sectIdx = sectionFragments.indexOf(table); + if (sectIdx >= 0) sectionFragments.splice(sectIdx + 1, 0, secondHalf); +} diff --git a/packages/layout-engine/layout-engine/src/index.test.ts b/packages/layout-engine/layout-engine/src/index.test.ts index 9a7b0a71a2..6d4d8f2f27 100644 --- a/packages/layout-engine/layout-engine/src/index.test.ts +++ b/packages/layout-engine/layout-engine/src/index.test.ts @@ -2964,29 +2964,104 @@ describe('layoutDocument', () => { expect(afterSectionPara!.y).toBe(expectedBalancedBottom); }); - it('balances the section-ending page even when the section spans multiple pages', () => { - // A section big enough to fill page 1 completely then spill onto page 2. - // Word balances only the FINAL page (earlier pages are already full). - // Use paragraphs each large enough that col-1 alone will overflow. - const lineHeight = 120; - const paraCount = 10; + it('fills BOTH columns on every page of a multi-page 2-col continuous section', () => { + // ECMA-376 §17.18.77 (ST_SectionMark): a continuous section break + // "balances content of the previous section." Word's observable behavior + // is to fill col 0 to the balanced target, wrap to col 1 to the same + // target, then wrap to the next page — on EVERY page, not only the last. + // Regression for SD-2646: earlier pages must not stack content in col 0. + const lineHeight = 20; + const paraCount = 100; // ≈ 2000px, exceeds one page's content area const { blocks, measures } = buildTwoColumnSection(paraCount, lineHeight); const layout = layoutDocument(blocks, measures, PAGE); - // Multiple pages due to content size. - expect(layout.pages.length).toBeGreaterThan(1); + expect(layout.pages.length).toBeGreaterThanOrEqual(2); - // On the final page, fragments of the section should span both columns. - const finalPage = layout.pages[layout.pages.length - 1]; - const sectionFragments = finalPage.fragments.filter( - (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', - ); - if (sectionFragments.length > 1) { - const uniqueX = new Set(sectionFragments.map((f) => Math.round(f.x))); - // Either balanced into both columns, or single-column if only 1 fragment - expect(uniqueX.size).toBeGreaterThanOrEqual(1); + for (const page of layout.pages) { + const sectionFragments = page.fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + if (sectionFragments.length < 2) continue; // tail of last page may have <2 fragments + const col0 = sectionFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = sectionFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + expect(col0.length).toBeGreaterThan(0); + expect(col1.length).toBeGreaterThan(0); + } + }); + + it('flows a narrow multi-page table across both columns on every page (SD-2646 regression)', () => { + // IT-945 shape: a narrow table (one column wide) inside a 2-col continuous + // section, spanning multiple pages. Regression guard for the layout path + // once pm-adapter correctly places the table in the 2-col section. + const rowCount = 114; + const rowHeight = 18.4; + const cellWidth = TWO_COL_WIDTH / 2; + + const rows = Array.from({ length: rowCount }, (_, r) => ({ + id: `tbl-row-${r}`, + cells: [ + { id: `tbl-cell-${r}-0`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-0-p`, runs: [] } }, + { id: `tbl-cell-${r}-1`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-1-p`, runs: [] } }, + ], + })); + const tbl: TableBlock = { + kind: 'table', + id: 'tbl', + rows, + attrs: { sectionIndex: 0 }, + } as TableBlock; + const tblM: TableMeasure = { + kind: 'table', + rows: Array.from({ length: rowCount }, () => ({ + height: rowHeight, + cells: [ + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + ], + })), + columnWidths: [cellWidth, cellWidth], + totalWidth: cellWidth * 2, + totalHeight: rowHeight * rowCount, + }; + + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + tbl as FlowBlock, + { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock, + ]; + const measures: Measure[] = [{ kind: 'sectionBreak' }, tblM, { kind: 'sectionBreak' }]; + + const layout = layoutDocument(blocks, measures, PAGE); + + expect(layout.pages.length).toBeGreaterThanOrEqual(2); + + let pagesWithBothColumns = 0; + for (const page of layout.pages) { + const tableFragments = page.fragments.filter((f) => f.kind === 'table'); + if (tableFragments.length === 0) continue; + const col0 = tableFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = tableFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + if (col0.length > 0 && col1.length > 0) pagesWithBothColumns++; } + // At least one page must have fragments in both columns. A stricter + // assertion (every page) is made invalid by the tail of the final page + // which can reasonably hold <1 column's worth of content. + expect(pagesWithBothColumns).toBeGreaterThan(0); }); it('distributes 6 paragraphs across 3 columns (no column is empty)', () => { diff --git a/packages/layout-engine/pm-adapter/README.md b/packages/layout-engine/pm-adapter/README.md index 27c0d1591c..e0653df917 100644 --- a/packages/layout-engine/pm-adapter/README.md +++ b/packages/layout-engine/pm-adapter/README.md @@ -23,3 +23,35 @@ Notes: - Handles lists, tables, images, vector shapes, SDT (TOC, structured content, doc parts), tracked changes, hyperlinks. - Consumes `@superdoc/style-engine` defaults and locale/tab interval hints from the PM document attrs. - `@superdoc/measuring-dom` is only a dependency for types; measurement happens later in the pipeline. + +## Section model (ECMA-376 §17.6, §17.18.77) + +Word uses **end-tagged** section semantics: a `` inside a paragraph defines the section that **ends with** that paragraph. The body-level `` defines the final section. All body children preceding a section-terminating paragraph — other paragraphs, **tables**, top-level drawings, SDT wrappers — belong to the section whose sectPr follows them. + +``` + + This is section A's first paragraph + + (section A props: 1-col, nextPage) + + ... ← section B (the NEXT sectPr) + + (section B props: 2-col continuous) + + This is section C's paragraph + (section C props: body-level) + +``` + +Section ranges are computed in `sections/analysis.ts` with two parallel indices: + +| Index | What it counts | Used by | +|---|---|---| +| `startParagraphIndex` / `endParagraphIndex` | Paragraph nodes only (including those inside SDT wrappers) | SDT handlers for intra-SDT section transitions | +| `startNodeIndex` / `endNodeIndex` | Every top-level `doc.content` child — paragraphs, tables, top-level drawings, SDTs | Main dispatch loop for inter-node section transitions | + +The main dispatch loop in `internal.ts` calls `maybeEmitNextSectionBreakForNode` BEFORE handling each top-level node. The helper uses `currentNodeIndex === nextSection.startNodeIndex` as its trigger, so non-paragraph nodes (tables especially) correctly get the sectionBreak emitted before them when they cross a section boundary. This is the fix for SD-2646. + +**When adding a new top-level block kind** (e.g., a new SDT type or a top-level drawing kind): do nothing. The dispatch-level hook covers you automatically. You only need a per-handler section check if your handler descends into children that carry their own sectPr markers — see the SDT handlers (`sdt/bibliography.ts`, `sdt/document-index.ts`, `sdt/table-of-authorities.ts`) for the intra-SDT pattern. + +Continuous section breaks (``) carry an extra requirement from §17.18.77: they "balance the content of the previous section." The balancing itself is the layout engine's responsibility (see `layout-engine/src/column-balancing.ts`); the adapter's job is only to ensure the right blocks are in the right section. diff --git a/packages/layout-engine/pm-adapter/src/index.test.ts b/packages/layout-engine/pm-adapter/src/index.test.ts index 441b210e73..b22a0de17c 100644 --- a/packages/layout-engine/pm-adapter/src/index.test.ts +++ b/packages/layout-engine/pm-adapter/src/index.test.ts @@ -1269,6 +1269,72 @@ describe('toFlowBlocks', () => { [first, second, third].forEach((b) => expect(b?.type).toBe('continuous')); }); }); + + describe('end-tagged section membership for non-paragraph nodes (SD-2646, ECMA-376 §17.6.17)', () => { + it('emits the next section break BEFORE a table that sits between two sectPr-marker paragraphs', () => { + // IT-945 shape: table lives between the paragraph that ends section A + // and the paragraph that ends section B. Per §17.6.17 the table + // belongs to section B, so the sectionBreak introducing B's columns + // must precede the table in the flow stream. + const pmDoc: PMNode = { + type: 'doc', + attrs: { bodySectPr: createTestBodySectPr() }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'This is my first section' }] }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'nextPage' } }, + { name: 'w:cols', attributes: { 'w:num': '1', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }] }, + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }] }, + ], + }, + ], + }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'continuous' } }, + { name: 'w:cols', attributes: { 'w:num': '2', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { type: 'paragraph', content: [{ type: 'text', text: 'This is my third section' }] }, + ], + } as never; + + const { blocks } = toFlowBlocks(pmDoc, { emitSectionBreaks: true }); + + const tableIndex = blocks.findIndex((b) => b.kind === 'table'); + const twoColBreakIndex = blocks.findIndex((b) => b.kind === 'sectionBreak' && b.columns?.count === 2); + expect(tableIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeLessThan(tableIndex); + }); + }); }); describe('block id prefixing', () => { diff --git a/packages/layout-engine/pm-adapter/src/internal.test.ts b/packages/layout-engine/pm-adapter/src/internal.test.ts index e1972f240b..4c4eda6863 100644 --- a/packages/layout-engine/pm-adapter/src/internal.test.ts +++ b/packages/layout-engine/pm-adapter/src/internal.test.ts @@ -59,6 +59,10 @@ vi.mock('./sections/index.js', () => { id: nextBlockId('sectionBreak'), type: section.type, })), + maybeEmitNextSectionBreakForNode: vi.fn(() => { + // Mocked as no-op: this test file already provides zero section ranges + // via analyzeSectionRanges, so there is never a break to emit. + }), publishSectionMetadata: vi.fn(), }; }); diff --git a/packages/layout-engine/pm-adapter/src/internal.ts b/packages/layout-engine/pm-adapter/src/internal.ts index 1223d7ec7e..b7dde6420c 100644 --- a/packages/layout-engine/pm-adapter/src/internal.ts +++ b/packages/layout-engine/pm-adapter/src/internal.ts @@ -12,7 +12,12 @@ import type { FlowBlock, ParagraphBlock } from '@superdoc/contracts'; import { isValidTrackedMode } from './tracked-changes.js'; -import { analyzeSectionRanges, createSectionBreakBlock, publishSectionMetadata } from './sections/index.js'; +import { + analyzeSectionRanges, + createSectionBreakBlock, + maybeEmitNextSectionBreakForNode, + publishSectionMetadata, +} from './sections/index.js'; import { normalizePrefix, buildPositionMap, createBlockIdGenerator } from './utilities.js'; import { paragraphToFlowBlocks, @@ -208,6 +213,7 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): ranges: sectionRanges, currentSectionIndex: 0, currentParagraphIndex: 0, + currentNodeIndex: 0, }, converters, themeColors, @@ -216,12 +222,24 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): trackedListLastOrdinals: new Map(), }; - // Process nodes using handler dispatch pattern + // Process nodes using handler dispatch pattern. Before each top-level node + // we flush any pending section break whose `startNodeIndex` matches the + // current position — this keeps non-paragraph nodes (tables, top-level + // drawings) in their correct end-tagged section per ECMA-376 §17.6.17. doc.content.forEach((node) => { + maybeEmitNextSectionBreakForNode({ + sectionState: handlerContext.sectionState!, + nextBlockId, + pushBlock: (block) => { + blocks.push(block); + recordBlockKind(block.kind); + }, + }); const handler = nodeHandlers[node.type]; if (handler) { handler(node, handlerContext); } + handlerContext.sectionState!.currentNodeIndex++; }); // Ensure final body section is emitted only if not already emitted during paragraph processing. diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts index ac860ce7e6..97b6b5b2d7 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts @@ -296,8 +296,9 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 1, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(2); }); it('should find multiple paragraphs with sectPr', () => { @@ -315,12 +316,13 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(2); - expect(result.paragraphs[0]).toEqual({ index: 0, node: para1 }); - expect(result.paragraphs[1]).toEqual({ index: 2, node: para3 }); + expect(result.paragraphs[0]).toEqual({ index: 0, nodeIndex: 0, node: para1 }); + expect(result.paragraphs[1]).toEqual({ index: 2, nodeIndex: 2, node: para3 }); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(3); }); - it('should skip non-paragraph nodes', () => { + it('should skip non-paragraph nodes for paragraphIndex but count them in nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; @@ -334,11 +336,15 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + // paragraphIndex = 1 (second paragraph), nodeIndex = 2 (third top-level child + // after the heading). Non-paragraph top-level children count toward nodeIndex + // because ECMA-376 §17.6.17 sections span all body children. + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 2, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(4); }); - it('should include paragraphs inside index nodes', () => { + it('should include paragraphs inside index nodes and share the SDT nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; const para3 = { type: 'paragraph', content: [] }; @@ -352,8 +358,11 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); - expect(result.paragraphs).toEqual([{ index: 1, node: para2 }]); + // para2 is inside the SDT (index node) at top-level nodeIndex 1. + // paragraphIndex 1 still reflects paragraph order across descent. + expect(result.paragraphs).toEqual([{ index: 1, nodeIndex: 1, node: para2 }]); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(2); }); }); @@ -385,7 +394,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue(null); @@ -408,7 +417,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr', elements: [] }; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -450,8 +459,8 @@ describe('analysis', () => { }; const paragraphs = [ - { index: 2, node: para1 }, - { index: 5, node: para2 }, + { index: 2, nodeIndex: 2, node: para1 }, + { index: 5, nodeIndex: 5, node: para2 }, ]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr' }; @@ -487,7 +496,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.NEXT_PAGE, @@ -512,7 +521,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mock section data with page margins but no header/footer margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -550,7 +559,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only header margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -583,7 +592,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only footer margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -616,7 +625,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only top margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -649,7 +658,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Explicit zero values for all margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -688,7 +697,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mix of header/footer and page margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -725,7 +734,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ titlePg: false, @@ -747,7 +756,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.CONTINUOUS, diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.ts index 3a0cae9013..73905c2c84 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.ts @@ -65,15 +65,27 @@ export function shouldIgnoreSectionBreak( /** * Find all paragraphs in the document that contain sectPr elements. * + * Records two indices per match: + * - `paragraphIndex` counts only paragraph nodes (including those nested + * inside SDT wrappers), matching the long-standing contract callers use + * for SDT-internal section transitions. + * - `nodeIndex` counts every top-level `doc.content` child (paragraph, + * table, top-level drawing, SDT, …). This is required to fix SD-2646: + * a non-paragraph node between two sectPr markers belongs to the LATER + * marker's section per ECMA-376 §17.6.17, so the dispatch loop must + * know each section's bounds in top-level-node terms. + * * @param doc - ProseMirror document node - * @returns Object containing paragraphs with sectPr and total paragraph count + * @returns Paragraph matches plus both totals */ export function findParagraphsWithSectPr(doc: PMNode): { - paragraphs: Array<{ index: number; node: PMNode }>; + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>; totalCount: number; + totalNodeCount: number; } { - const paragraphs: Array<{ index: number; node: PMNode }> = []; + const paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }> = []; let paragraphIndex = 0; + let nodeIndex = 0; const getNodeChildren = (node: PMNode): PMNode[] => { if (Array.isArray(node.content)) return node.content; const content = node.content as { forEach?: (cb: (child: PMNode) => void) => void } | undefined; @@ -87,27 +99,30 @@ export function findParagraphsWithSectPr(doc: PMNode): { return []; }; - const visitNode = (node: PMNode): void => { + const visitNode = (node: PMNode, outerNodeIndex: number): void => { if (node.type === 'paragraph') { if (hasSectPr(node)) { - paragraphs.push({ index: paragraphIndex, node }); + paragraphs.push({ index: paragraphIndex, nodeIndex: outerNodeIndex, node }); } paragraphIndex++; return; } if (node.type === 'index' || node.type === 'bibliography' || node.type === 'tableOfAuthorities') { - getNodeChildren(node).forEach(visitNode); + // SDT descendants share the outer SDT's nodeIndex — dispatch-level + // section transitions fire on the SDT as a whole, not on its children. + getNodeChildren(node).forEach((child) => visitNode(child, outerNodeIndex)); } }; if (doc.content) { for (const node of doc.content) { - visitNode(node); + visitNode(node, nodeIndex); + nodeIndex++; } } - return { paragraphs, totalCount: paragraphIndex }; + return { paragraphs, totalCount: paragraphIndex, totalNodeCount: nodeIndex }; } /** @@ -128,11 +143,12 @@ export function findParagraphsWithSectPr(doc: PMNode): { * @returns Array of section ranges */ export function buildSectionRangesFromParagraphs( - paragraphs: Array<{ index: number; node: PMNode }>, + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>, hasBodySectPr: boolean, ): SectionRange[] { const ranges: SectionRange[] = []; let currentStart = 0; + let currentStartNode = 0; paragraphs.forEach((item, idx) => { if (shouldIgnoreSectionBreak(item.node, idx, paragraphs.length, hasBodySectPr)) { @@ -154,6 +170,8 @@ export function buildSectionRangesFromParagraphs( const range: SectionRange = { sectionIndex: idx, + startNodeIndex: currentStartNode, + endNodeIndex: item.nodeIndex, startParagraphIndex: currentStart, endParagraphIndex: item.index, sectPr, @@ -180,6 +198,7 @@ export function buildSectionRangesFromParagraphs( ranges.push(range); currentStart = item.index + 1; + currentStartNode = item.nodeIndex + 1; }); return ranges; @@ -228,6 +247,7 @@ export function createFinalSectionFromBodySectPr( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange | null { const clampedStart = Math.max(0, Math.min(currentStart, Math.max(totalParagraphs - 1, 0))); @@ -251,8 +271,15 @@ export function createFinalSectionFromBodySectPr( bodySectionData.bottomPx != null || bodySectionData.leftPx != null; + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds + ? Math.max(0, Math.min(nodeBounds.startNodeIndex, Math.max(totalNodes - 1, 0))) + : clampedStart; + return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: clampedStart, endParagraphIndex: totalParagraphs - 1, sectPr: bodySectPr, @@ -291,9 +318,14 @@ export function createDefaultFinalSection( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange { + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds?.startNodeIndex ?? currentStart; return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: currentStart, endParagraphIndex: totalParagraphs - 1, sectPr: null, @@ -318,11 +350,13 @@ export function createDefaultFinalSection( * @returns Array of section ranges with backward-looking semantics */ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): SectionRange[] { - const { paragraphs, totalCount } = findParagraphsWithSectPr(doc); + const { paragraphs, totalCount, totalNodeCount } = findParagraphsWithSectPr(doc); const hasBody = Boolean(bodySectPr); const ranges = buildSectionRangesFromParagraphs(paragraphs, hasBody); - const currentStart = ranges.length > 0 ? ranges[ranges.length - 1].endParagraphIndex + 1 : 0; + const last = ranges[ranges.length - 1]; + const currentStart = last ? last.endParagraphIndex + 1 : 0; + const currentStartNode = last ? last.endNodeIndex + 1 : 0; // Always represent the final section defined by bodySectPr, even if there are // no remaining paragraphs after the last paragraph-level sectPr. This ensures @@ -333,12 +367,16 @@ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): Section Math.min(currentStart, totalCount), totalCount, ranges.length, + { startNodeIndex: Math.min(currentStartNode, totalNodeCount), totalNodeCount }, ); if (finalSection) { ranges.push(finalSection); } } else if (ranges.length > 0) { - const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length); + const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length, { + startNodeIndex: Math.min(currentStartNode, totalNodeCount), + totalNodeCount, + }); if (fallbackFinal) { fallbackFinal.type = DEFAULT_PARAGRAPH_SECTION_TYPE; ranges.push(fallbackFinal); diff --git a/packages/layout-engine/pm-adapter/src/sections/breaks.ts b/packages/layout-engine/pm-adapter/src/sections/breaks.ts index 8501b2230a..8a05383174 100644 --- a/packages/layout-engine/pm-adapter/src/sections/breaks.ts +++ b/packages/layout-engine/pm-adapter/src/sections/breaks.ts @@ -190,3 +190,43 @@ export function shouldRequirePageBoundary(current: SectionRange, next: SectionRa export function hasIntrinsicBoundarySignals(_: SectionRange): boolean { return false; } + +/** + * Emit the next section's sectionBreak block if the dispatch loop has reached + * that section's starting top-level node index. + * + * ECMA-376 §17.6.17: a section is defined by its end-tagged ``. All + * body children preceding that tag — paragraphs, tables, top-level drawings — + * belong to the section that ENDS at the tag. This helper fires BEFORE any + * such node so the appropriate section config is active by the time the node + * is laid out. + * + * This centralises the check that previously lived (duplicated) inside + * `handleParagraphNode` and the SDT handlers. Calling it from the main + * dispatch loop covers every top-level node type — present and future — + * with no per-handler opt-in. + */ +export function maybeEmitNextSectionBreakForNode(args: { + sectionState: { + ranges: SectionRange[]; + currentSectionIndex: number; + currentNodeIndex: number; + }; + nextBlockId: BlockIdGenerator; + pushBlock: (block: SectionBreakBlock) => void; +}): void { + const { sectionState, nextBlockId, pushBlock } = args; + if (sectionState.ranges.length === 0) return; + if (sectionState.currentSectionIndex >= sectionState.ranges.length - 1) return; + + const nextSection = sectionState.ranges[sectionState.currentSectionIndex + 1]; + if (!nextSection) return; + if (sectionState.currentNodeIndex !== nextSection.startNodeIndex) return; + + const currentSection = sectionState.ranges[sectionState.currentSectionIndex]; + const requiresPageBoundary = + shouldRequirePageBoundary(currentSection, nextSection) || hasIntrinsicBoundarySignals(nextSection); + const extraAttrs = requiresPageBoundary ? { requirePageBoundary: true } : undefined; + pushBlock(createSectionBreakBlock(nextSection, nextBlockId, extraAttrs)); + sectionState.currentSectionIndex++; +} diff --git a/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts new file mode 100644 index 0000000000..8d7cf42210 --- /dev/null +++ b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts @@ -0,0 +1,118 @@ +/** + * @file End-tagged section-membership tests (ECMA-376 §17.6.17, §17.18.77). + * + * OOXML rule: a `` inside a paragraph defines the section that + * ENDS with that paragraph. All body children preceding it (back to the + * previous section-terminating paragraph) belong to that section — + * including tables, drawings, and other non-paragraph nodes. + * + * These tests use real implementations with no module mocks so they assert + * observable behavior of the section analysis, not call patterns. + */ +import { describe, it, expect } from 'vitest'; +import { analyzeSectionRanges } from './analysis.js'; +import type { PMNode } from '../types.js'; +import type { SectPrElement } from './types.js'; + +const sectPr = (options: { + type?: 'continuous' | 'nextPage'; + cols?: number; + colSpace?: number; + pgSz?: { w: number; h: number }; +}): SectPrElement => { + const elements: SectPrElement['elements'] = []; + if (options.type) { + elements!.push({ + type: 'element', + name: 'w:type', + attributes: { 'w:val': options.type }, + }); + } + if (options.pgSz) { + elements!.push({ + type: 'element', + name: 'w:pgSz', + attributes: { 'w:w': String(options.pgSz.w), 'w:h': String(options.pgSz.h) }, + }); + } + elements!.push({ + type: 'element', + name: 'w:cols', + attributes: { + 'w:num': String(options.cols ?? 1), + 'w:space': String(options.colSpace ?? 720), + }, + }); + return { + type: 'element', + name: 'w:sectPr', + elements, + }; +}; + +const paragraph = (marker?: SectPrElement): PMNode => ({ + type: 'paragraph', + attrs: marker ? { paragraphProperties: { sectPr: marker } } : {}, + content: [], +}); + +const table = (): PMNode => ({ + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }], + }, + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }], + }, + ], + }, + ], +}); + +describe('analyzeSectionRanges — end-tagged membership for non-paragraph nodes (SD-2646)', () => { + it('places a table between two sectPr markers into the section of the LATER marker', () => { + // Arrange — IT-945 shape: + // p0 "first section" text + // p1(sectPr-A) empty marker, 1-col nextPage → ends Section A + // TABLE belongs to Section B per ECMA-376 §17.6.17 + // p2(sectPr-B) empty marker, 2-col continuous → ends Section B + // p3 "third section" text + // body sectPr final, 1-col continuous → Section C + const markerA = sectPr({ type: 'nextPage', cols: 1 }); + const markerB = sectPr({ type: 'continuous', cols: 2, colSpace: 720 }); + const bodyMarker = sectPr({ type: 'continuous', cols: 1 }); + const doc: PMNode = { + type: 'doc', + attrs: { bodySectPr: bodyMarker }, + content: [paragraph(), paragraph(markerA), table(), paragraph(markerB), paragraph()], + }; + + // Act + const sut = analyzeSectionRanges(doc, bodyMarker); + + // Assert — three sections, with Section B's range spanning the table + expect(sut).toHaveLength(3); + const [sectionA, sectionB, sectionC] = sut; + + // Section A ends at p1 (node index 1). Table at node index 2 is NOT in A. + expect(sectionA.endNodeIndex).toBe(1); + expect(sectionA.columns?.count ?? 1).toBe(1); + + // Section B: starts AFTER Section A (node index 2 — the table), ends at p2 (node index 3). + // The table must belong to Section B, not Section A. + expect(sectionB.startNodeIndex).toBe(2); + expect(sectionB.endNodeIndex).toBe(3); + expect(sectionB.columns?.count).toBe(2); + + // Section C (body): starts at node index 4 (p3). + expect(sectionC.startNodeIndex).toBe(4); + expect(sectionC.columns?.count ?? 1).toBe(1); + }); +}); diff --git a/packages/layout-engine/pm-adapter/src/sections/index.ts b/packages/layout-engine/pm-adapter/src/sections/index.ts index 64b41423fb..d1731f870f 100644 --- a/packages/layout-engine/pm-adapter/src/sections/index.ts +++ b/packages/layout-engine/pm-adapter/src/sections/index.ts @@ -41,4 +41,5 @@ export { isSectionBreakBlock, signaturesEqual, shallowObjectEquals, + maybeEmitNextSectionBreakForNode, } from './breaks.js'; diff --git a/packages/layout-engine/pm-adapter/src/sections/types.ts b/packages/layout-engine/pm-adapter/src/sections/types.ts index a4134b48d0..551d8bcfd8 100644 --- a/packages/layout-engine/pm-adapter/src/sections/types.ts +++ b/packages/layout-engine/pm-adapter/src/sections/types.ts @@ -89,11 +89,22 @@ export type SectionVerticalAlign = 'top' | 'center' | 'bottom' | 'both'; /** * Section range represents a contiguous section in the document. - * Word uses "end-tagged" section semantics: a paragraph's sectPr defines - * properties for the section ENDING at that paragraph, not starting after it. + * + * Word uses "end-tagged" section semantics (ECMA-376 §17.6.17): a paragraph's + * sectPr defines properties for the section ENDING at that paragraph, not + * starting after it. All body children preceding the section-terminating + * paragraph — paragraphs, tables, top-level drawings — belong to that section. + * + * `startNodeIndex`/`endNodeIndex` are computed over every top-level + * `doc.content` child and are the authoritative boundaries for dispatching + * section breaks at emission time. `startParagraphIndex`/`endParagraphIndex` + * are retained for callers (SDT handlers) that count only paragraphs during + * recursive descent. */ export interface SectionRange { sectionIndex: number; + startNodeIndex: number; + endNodeIndex: number; startParagraphIndex: number; endParagraphIndex: number; sectPr: SectPrElement | null; diff --git a/packages/layout-engine/pm-adapter/src/types.d.ts b/packages/layout-engine/pm-adapter/src/types.d.ts index 13de3db49f..2285e17fcd 100644 --- a/packages/layout-engine/pm-adapter/src/types.d.ts +++ b/packages/layout-engine/pm-adapter/src/types.d.ts @@ -238,6 +238,7 @@ export interface NodeHandlerContext { ranges: SectionRange[]; currentSectionIndex: number; currentParagraphIndex: number; + currentNodeIndex: number; }; converters?: { paragraphToFlowBlocks?: ( diff --git a/packages/layout-engine/pm-adapter/src/types.ts b/packages/layout-engine/pm-adapter/src/types.ts index d60515c39b..0aed51dd1d 100644 --- a/packages/layout-engine/pm-adapter/src/types.ts +++ b/packages/layout-engine/pm-adapter/src/types.ts @@ -316,6 +316,13 @@ export interface NodeHandlerContext { ranges: SectionRange[]; currentSectionIndex: number; currentParagraphIndex: number; + /** + * Index of the current top-level `doc.content` child being dispatched. + * Advanced by the main dispatch loop in internal.ts — drives end-tagged + * section transitions for non-paragraph nodes (tables, top-level + * drawings, …) per ECMA-376 §17.6.17. + */ + currentNodeIndex: number; }; // Converters for nested content From 52ae84a4c2735d23bca8e4cc887578bc61ecdc40 Mon Sep 17 00:00:00 2001 From: Nick Bernal Date: Thu, 30 Apr 2026 11:29:26 -0700 Subject: [PATCH 04/18] chore: update lock --- pnpm-lock.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e7ca8ecf6b..9e14bb9c69 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -6470,7 +6470,6 @@ packages: '@microsoft/teamsapp-cli@3.0.2': resolution: {integrity: sha512-AowuJwrrUxeF9Bq/frxuy9YZjK/ECk3pi0UBXl3CQLZ4XNWfgWatiFi/UWpyHDLccFs+0Za3nNYATFvgsxEFwQ==} engines: {node: '>=12'} - deprecated: This package is deprecated and supported Node.js version is 18-22. Please use @microsoft/m365agentstoolkit-cli instead. hasBin: true '@microsoft/teamsfx-api@0.23.1': From d1f5525f74a62b02e8a0bd8503a7ac6296a554e8 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 4 May 2026 16:08:56 -0300 Subject: [PATCH 05/18] fix(layout-engine): address review feedback for column balancing (SD-2452) Address Nick's four review comments on PR #2869: 1. Section-local page geometry. The post-layout balancing pass derived contentWidth/availableHeight/margins.left from the FINAL active state, which silently rewrote earlier sections using the last section's content box. Read margins and size from each section's last page instead, so documents with mixed page setups (orientation, margins, paper size) per section keep their own metrics during balancing. 2. Document-wide column-layout fallback. When a caller passes LayoutOptions.columns directly without any sectionBreak blocks, sectionColumnsMap stays empty and the per-section loop never ran, leaving the final page stacked in column 0. Synthesize a virtual section that spans the whole document when no sectionBreak exists, preserving the pre-SD-2452 final-page balancing behavior. Guard with documentHasExplicitColumnBreak so author intent wins. 3. Blank-paragraph height preservation. The earlier `line.width === 0` heuristic for sectPr-marker paragraphs also matched ordinary blank paragraphs, collapsing their height and causing the next paragraph to overlap the empty line. Replace with an explicit `attrs.sectPrMarker` block-id set threaded through the balance APIs. 4. Table rowBoundaries shape. splitDominantTableAtRowBoundary stored regenerated boundaries using the renderer's compact serialized keys ({i,h,min,r}) instead of the contract `TableRowBoundary` shape ({index,height,minHeight,resizable}). The DOM renderer's projection then produced undefined values, breaking row-resize handles on split table fragments. Plus a robustness fix: `getFragmentHeight` now consults `measure.totalHeight` for tables when fragment.height is 0, so balancing math doesn't silently zero out tables whose layout pass allocated no height (e.g. header-less tables in degenerate test fixtures). All 653 layout-engine unit tests pass. --- .../layout-engine/src/column-balancing.ts | 118 ++++++++++-------- .../layout-engine/layout-engine/src/index.ts | 90 +++++++++++-- 2 files changed, 148 insertions(+), 60 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/column-balancing.ts b/packages/layout-engine/layout-engine/src/column-balancing.ts index d70e4bd2c8..e7420742b0 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.ts @@ -517,8 +517,10 @@ export interface MeasureData { kind: string; /** Line measurements for paragraph content */ lines?: Array<{ lineHeight: number }>; - /** Total height for non-paragraph content */ + /** Total height for non-paragraph content (image, drawing) */ height?: number; + /** Total height for table content (TableMeasure stores this rather than `height`). */ + totalHeight?: number; } /** @@ -559,14 +561,27 @@ function getFragmentHeight(fragment: BalancingFragment, measureMap: Map 0) { return fragment.height; } const measure = measureMap.get(fragment.blockId); - if (measure && typeof measure.height === 'number') { - return measure.height; + if (measure) { + if (typeof measure.height === 'number' && measure.height > 0) { + return measure.height; + } + if (fragment.kind === 'table' && typeof measure.totalHeight === 'number') { + return measure.totalHeight; + } + } + if (typeof fragment.height === 'number') { + return fragment.height; } } @@ -576,42 +591,26 @@ function getFragmentHeight(fragment: BalancingFragment, measureMap: Map` - * has no text content — all measured lines have `width === 0`. Word does - * NOT render an empty line for such markers, so they take no vertical space - * in the balanced layout. Treating them as balance-neutral prevents them - * from pushing their column's cursor down and unbalancing the section's - * final page against Word's output (ECMA-376 §17.18.77). + * is invisible to Word's renderer, so it must take no vertical space in the + * balanced layout (ECMA-376 §17.18.77). The pm-adapter stamps these + * paragraphs with `attrs.sectPrMarker === true` (paragraph.ts), and the + * caller threads the resulting block-id set through here. * - * For every other paragraph, image, drawing, or table this delegates to - * the standard `getFragmentHeight` so existing behavior is preserved. + * Earlier versions of this function tried to detect markers from line + * geometry (`line.width === 0`), but a regular blank paragraph also + * measures with width 0 and DOES occupy line height — collapsing those to + * 0 caused the next paragraph to overlap the blank line. The metadata-based + * gate is the only safe signal. */ -function getBalancingHeight(fragment: BalancingFragment, measureMap: Map): number { - if (fragment.kind === 'para') { - const measure = measureMap.get(fragment.blockId) as - | (MeasureData & { lines?: Array<{ lineHeight: number; width?: number }> }) - | undefined; - if (measure?.kind === 'paragraph' && Array.isArray(measure.lines) && measure.lines.length > 0) { - const fromLine = fragment.fromLine ?? 0; - const toLine = fragment.toLine ?? measure.lines.length; - // A sectPr-marker paragraph is empty: every measured line has an - // EXPLICIT width of 0 (measuring-dom sets width per line). Treat - // undefined width as "has content" so synthetic test fixtures and any - // other callers that don't set line.width keep their existing height. - let allEmpty = true; - let sawAnyLine = false; - for (let i = fromLine; i < toLine; i++) { - const line = measure.lines[i]; - if (!line) continue; - sawAnyLine = true; - if (line.width !== 0) { - allEmpty = false; - break; - } - } - if (sawAnyLine && allEmpty) return 0; - } +function getBalancingHeight( + fragment: BalancingFragment, + measureMap: Map, + sectPrMarkerBlockIds?: Set, +): number { + if (fragment.kind === 'para' && sectPrMarkerBlockIds && sectPrMarkerBlockIds.has(fragment.blockId)) { + return 0; } return getFragmentHeight(fragment, measureMap); } @@ -792,6 +791,12 @@ export interface BalanceSectionOnPageArgs { availableHeight: number; /** Measurement data for fragments (built from measures array). */ measureMap: Map; + /** + * Block IDs of paragraphs that exist only to carry `` properties. + * These contribute zero height to balanced columns — see `getBalancingHeight`. + * Optional; when omitted no fragment is treated as a marker. + */ + sectPrMarkerBlockIds?: Set; } /** @@ -857,6 +862,7 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu fragments, columnCount, measureMap: args.measureMap, + sectPrMarkerBlockIds: args.sectPrMarkerBlockIds, }); // Order fragments in document order: by current column (x → left-to-right), @@ -878,7 +884,7 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu // blank line for such markers. const contentBlocks: BalancingBlock[] = ordered.map((f, i) => ({ blockId: `${f.blockId}#${i}`, - measuredHeight: getBalancingHeight(f, args.measureMap), + measuredHeight: getBalancingHeight(f, args.measureMap, args.sectPrMarkerBlockIds), canBreak: false, keepWithNext: false, keepTogether: true, @@ -932,16 +938,19 @@ interface TableMeasureLike { } /** - * Row-boundary record used by the renderer to draw horizontal dividers. - * Mirrors the subset of `TableFragmentMetadata['rowBoundaries']` that the - * split helper needs to read and regenerate. + * Row-boundary record matching the contract `TableRowBoundary` shape. + * + * The DOM renderer serializes these into the compact `{i,y,h,min,r}` keys + * for `data-table-boundaries`; storing them in the contract shape here + * keeps the layout-engine/contract boundary intact and prevents `undefined` + * row-boundary values from reaching the renderer when a table is split. */ interface RowBoundaryLike { - i: number; + index: number; y: number; - h: number; - min: number; - r: number; + height: number; + minHeight: number; + resizable: boolean; } /** @@ -981,8 +990,9 @@ function splitDominantTableAtRowBoundary(args: { fragments: BalancingFragment[]; columnCount: number; measureMap: Map; + sectPrMarkerBlockIds?: Set; }): void { - const { sectionFragments, fragments, columnCount, measureMap } = args; + const { sectionFragments, fragments, columnCount, measureMap, sectPrMarkerBlockIds } = args; if (columnCount <= 1) return; const tables = sectionFragments.filter((f) => f.kind === 'table'); @@ -1003,11 +1013,14 @@ function splitDominantTableAtRowBoundary(args: { const measure = measureMap.get(table.blockId) as TableMeasureLike | undefined; if (!measure || measure.kind !== 'table' || !Array.isArray(measure.rows)) return; - const totalSectionHeight = sectionFragments.reduce((sum, f) => sum + getBalancingHeight(f, measureMap), 0); + const totalSectionHeight = sectionFragments.reduce( + (sum, f) => sum + getBalancingHeight(f, measureMap, sectPrMarkerBlockIds), + 0, + ); if (totalSectionHeight <= 0) return; const target = totalSectionHeight / columnCount; - const tableHeight = getBalancingHeight(table, measureMap); + const tableHeight = getBalancingHeight(table, measureMap, sectPrMarkerBlockIds); // Small-epsilon guard: if the table alone fits within the target, the // atomic balancer already produces a correct assignment — splitting would // only introduce visual churn. @@ -1038,12 +1051,17 @@ function splitDominantTableAtRowBoundary(args: { // Regenerate rowBoundaries so the renderer draws horizontal dividers at the // right y-offsets inside each half. rowBoundaries are 0-origin within the // fragment; we walk the measure's rows for each half and accumulate. + // Use the contract `TableRowBoundary` shape ({index, y, height, minHeight, + // resizable}). The DOM renderer compresses these into {i,y,h,min,r} for the + // serialized data-table-boundaries attribute; emitting the compact shape + // here would produce undefined values after the renderer's projection and + // break interactive row resize handles for split fragments. const makeRowBoundaries = (rows: Array<{ height: number }>, startIndex: number): RowBoundaryLike[] => { const out: RowBoundaryLike[] = []; let y = 0.5; for (let i = 0; i < rows.length; i++) { const h = rows[i].height ?? 0; - out.push({ i: startIndex + i, y, h, min: h, r: 1 }); + out.push({ index: startIndex + i, y, height: h, minHeight: h, resizable: true }); y += h; } return out; diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 60cdcd3e61..4f816acf3d 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -1573,6 +1573,25 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options const blockSectionMap = new Map(); const sectionColumnsMap = new Map(); const sectionHasExplicitColumnBreak = new Set(); + // Block IDs of empty paragraphs that exist only to carry sectPr properties. + // These are invisible in Word's output and must contribute zero height to + // balanced columns (ECMA-376 §17.18.77). Threading explicit metadata avoids + // the older `line.width === 0` heuristic, which incorrectly collapsed normal + // blank paragraphs and caused overlap on the next paragraph. + const sectPrMarkerBlockIds = new Set(); + // True if any block in the document is a column break. Used as a guard for + // the document-wide balancing fallback (Nick comment 2): when callers use + // LayoutOptions.columns without section metadata, we still want Word's + // balanced-final-page behavior unless the author placed an explicit column + // break, in which case we preserve their intent. + let documentHasExplicitColumnBreak = false; + // True if any block in the document is a sectionBreak. The document-wide + // fallback only fires when there are NO sectionBreak blocks — otherwise the + // section-scoped path is the source of truth (even if pm-adapter or a + // synthetic caller didn't stamp `attrs.sectionIndex`, treating it as a + // single fallback section would clobber regions that the mid-page handler + // already balanced). + let documentHasAnySectionBreak = false; // Tracks sections already balanced mid-page — the post-layout pass skips these // to avoid double-balancing, which would overlap fragments at the same x/y. const alreadyBalancedSections = new Set(); @@ -1589,17 +1608,33 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } const blockWithAttrs = block as { attrs?: { sectionIndex?: number } }; const attrSectionIdx = blockWithAttrs.attrs?.sectionIndex; - if (block.kind === 'sectionBreak' && typeof attrSectionIdx === 'number') { - currentSectionIdx = attrSectionIdx; - if (block.columns) { - sectionColumnsMap.set(attrSectionIdx, cloneColumnLayout(block.columns)); + if (block.kind === 'sectionBreak') { + documentHasAnySectionBreak = true; + if (typeof attrSectionIdx === 'number') { + currentSectionIdx = attrSectionIdx; + if (block.columns) { + sectionColumnsMap.set(attrSectionIdx, cloneColumnLayout(block.columns)); + } } } if (currentSectionIdx !== null) { blockSectionMap.set(block.id, currentSectionIdx); if (block.kind === 'columnBreak') { sectionHasExplicitColumnBreak.add(currentSectionIdx); + documentHasExplicitColumnBreak = true; } + } else if (block.kind === 'columnBreak') { + documentHasExplicitColumnBreak = true; + } + // Block paragraphs that exist only to carry sectPr metadata (pm-adapter + // sets this attr on otherwise-empty section-property paragraphs). These + // are invisible in Word's renderer and must not contribute height when + // balancing columns. + if ( + block.kind === 'paragraph' && + (blockWithAttrs as { attrs?: { sectPrMarker?: boolean } }).attrs?.sectPrMarker === true + ) { + sectPrMarkerBlockIds.add(block.id); } }); @@ -1996,6 +2031,7 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options columnWidth: normalized.width, availableHeight, measureMap: balancingMeasureMap, + sectPrMarkerBlockIds, }); if (balanceResult) { // Collapse both cursors to the balanced section bottom so the new @@ -2630,7 +2666,26 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options // Mid-page continuous breaks are handled in the layout loop itself (see the // forceMidPageRegion branch above). This post-layout pass handles sections that // end at a page boundary or at document end. - const contentWidth = pageSize.w - (activeLeftMargin + activeRightMargin); + // + // Document-wide fallback: when callers pass `LayoutOptions.columns` directly + // without sectionBreak metadata, pm-adapter never stamps sectionIndex on any + // block and `sectionColumnsMap` stays empty. Synthesize a single virtual + // section that spans the whole document so multi-column callers still get + // their final page balanced (preserves the pre-SD-2452 behavior). Skip when + // the document carries an explicit column break — author intent wins. + const FALLBACK_SECTION_IDX = -1; + if ( + sectionColumnsMap.size === 0 && + !documentHasAnySectionBreak && + activeColumns.count > 1 && + !documentHasExplicitColumnBreak + ) { + sectionColumnsMap.set(FALLBACK_SECTION_IDX, cloneColumnLayout(activeColumns)); + for (const block of blocks) { + blockSectionMap.set(block.id, FALLBACK_SECTION_IDX); + } + } + for (const [sectionIdx, sectionCols] of sectionColumnsMap) { if (sectionCols.count <= 1) continue; if (sectionHasExplicitColumnBreak.has(sectionIdx)) continue; @@ -2645,8 +2700,22 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } if (!lastPageForSection) continue; - const normalized = normalizeColumns(sectionCols, contentWidth); - const availableHeight = pageSize.h - activeBottomMargin - activeTopMargin; + // Section-local page geometry. Each page snapshots its own margins and size + // at startNewPage time (paginator.ts), so different sections with different + // page setups (margins, paper size, orientation) carry their own values on + // their pages. Earlier code derived the content box from the FINAL active* + // state, which silently rewrote earlier sections' fragments using the last + // section's content width and left margin. Use the target page's metrics. + const sectionPageSize = lastPageForSection.size ?? pageSize; + const sectionPageMargins = lastPageForSection.margins; + const sectionLeftMargin = sectionPageMargins?.left ?? activeLeftMargin; + const sectionRightMargin = sectionPageMargins?.right ?? activeRightMargin; + const sectionTopMarginPx = sectionPageMargins?.top ?? activeTopMargin; + const sectionBottomMargin = sectionPageMargins?.bottom ?? activeBottomMargin; + const sectionContentWidth = sectionPageSize.w - (sectionLeftMargin + sectionRightMargin); + const sectionAvailableHeight = sectionPageSize.h - sectionBottomMargin - sectionTopMarginPx; + + const normalized = normalizeColumns(sectionCols, sectionContentWidth); balanceSectionOnPage({ fragments: lastPageForSection.fragments as BalancingFragment[], @@ -2660,11 +2729,12 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options }, sectionHasExplicitColumnBreak: false, // already filtered above blockSectionMap, - margins: { left: activeLeftMargin }, - topMargin: activeTopMargin, + margins: { left: sectionLeftMargin }, + topMargin: sectionTopMarginPx, columnWidth: normalized.width, - availableHeight, + availableHeight: sectionAvailableHeight, measureMap: balancingMeasureMap, + sectPrMarkerBlockIds, }); } From 634fb2e299e908906b2f8f2eb3b407121d2347ec Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 4 May 2026 19:08:55 -0300 Subject: [PATCH 06/18] fix(layout-engine): gate column balancing on continuous break + not-last-section (SD-2452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per ECMA-376 §17.18.77 and the Linear spec for SD-2452, only continuous section breaks trigger column balancing. The previous post-layout pass balanced every multi-column section's last page regardless of break type, producing column distributions Word does not. Two cases need to be excluded: 1. Sections that end with a non-`continuous` break (`nextPage`, `evenPage`, `oddPage`). pm-adapter uses end-tagged section semantics, so `SectionBreakBlock.type` describes the break that ENDS the section. Documents like sd-1655-col-sep-3-equal-columns (3 cols, body sectPr only) and multi-column-sections.docx (default `nextPage` everywhere) were being rebalanced into 3+4+2 / 2+2 splits when Word fills column-by-column without balancing them at all. 2. The LAST section. The body sectPr is always the final section break and represents the document end, not a real mid-document break. Even when its type defaults to `continuous` (DEFAULT_BODY_SECTION_TYPE), there is no break AFTER its content to act as the balancing trigger. For single-section docs with multi-column body sectPr (sd-1655) Word does not balance, and now we don't either. Tracking: - `sectionEndBreakType: Map` records per-section the type of the break that closed the section (read from `block.type` on the SectionBreakBlock). - `lastSectionIdx` records the highest sectionIndex seen during the block walk; the gate skips it. - The synthesized fallback section (FALLBACK_SECTION_IDX = -1, used when callers pass `LayoutOptions.columns` without any pm-adapter section metadata) bypasses both gates so the document-wide fallback still fires for direct-API integrations. The mid-page balancing branch (`forceMidPageRegion`) is already gated correctly because it runs only inside the `block.type === 'continuous'` branch of `scheduleSectionBreakCompat`, and the section being closed mid-page can never be the last section. All 5 SD-2452 spec-test fixtures continue to balance correctly: spec-test-1: 3+3 / spec-test-2: 2+3 / spec-test-3: 3+3+1 spec-test-4: 7+7 / spec-test-5: 3+2 | 3+2 Regression docs now match Word: sd-1655-col-sep-3-equal-columns: was 3+4+2, now 7+1 (Word: 7+1) multi-column-sections: was balanced, now col-by-col (matches Word) multi_section_doc: was balanced, now col-by-col (matches Word) sd-2326-col-sep-continuous-section-break still balances 2+2 because its mid-document break is explicitly `continuous`. All 653 layout-engine unit tests pass. --- .../layout-engine/layout-engine/src/index.ts | 54 +++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 4f816acf3d..11518d7d42 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -1573,6 +1573,25 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options const blockSectionMap = new Map(); const sectionColumnsMap = new Map(); const sectionHasExplicitColumnBreak = new Set(); + // sectionIndex -> type of the section break that ENDS this section (per + // pm-adapter end-tagged semantics, ECMA-376 §17.6.17: a paragraph's sectPr + // describes the section ENDING at that paragraph, so SectionBreakBlock.type + // here is the type of the break that closes the section). Per ECMA-376 + // §17.18.77 only `continuous` breaks trigger column balancing — `nextPage`, + // `evenPage`, `oddPage` do not. Tracked here so the post-layout pass can + // skip the wrong section types. + const sectionEndBreakType = new Map(); + // sectionIndex of the LAST section in the document. The body sectPr is + // always the final section break and represents the end of the document, + // not an actual mid-document break. Even when its type defaults to + // `continuous` (DEFAULT_BODY_SECTION_TYPE), there is no break AFTER the + // last section's content to trigger balancing. Excluding the last section + // matches Word: a 3-column doc with only a body sectPr (e.g. + // `sd-1655-col-sep-3-equal-columns`) is NOT balanced — content fills + // top-to-bottom by column. Without this guard the previous post-layout + // pass over-balanced single-section docs and split heading/body across + // columns when Word kept them together. + let lastSectionIdx: number | null = null; // Block IDs of empty paragraphs that exist only to carry sectPr properties. // These are invisible in Word's output and must contribute zero height to // balanced columns (ECMA-376 §17.18.77). Threading explicit metadata avoids @@ -1612,9 +1631,13 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options documentHasAnySectionBreak = true; if (typeof attrSectionIdx === 'number') { currentSectionIdx = attrSectionIdx; + lastSectionIdx = attrSectionIdx; if (block.columns) { sectionColumnsMap.set(attrSectionIdx, cloneColumnLayout(block.columns)); } + if (typeof block.type === 'string') { + sectionEndBreakType.set(attrSectionIdx, block.type); + } } } if (currentSectionIdx !== null) { @@ -2691,6 +2714,37 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options if (sectionHasExplicitColumnBreak.has(sectionIdx)) continue; if (alreadyBalancedSections.has(sectionIdx)) continue; + // Gate balancing on section break type (ECMA-376 §17.18.77 / Linear SD-2452): + // "Only continuous section breaks trigger balancing. Next-page breaks + // should NOT balance — columns fill top-to-bottom as SuperDoc currently does." + // + // pm-adapter uses end-tagged section semantics (ECMA-376 §17.6.17): the type + // on `sectionBreak[i]` describes the break that ENDS section i. Two cases + // need to be excluded: + // + // 1. Non-`continuous` end break (nextPage / evenPage / oddPage) — Word does + // NOT balance these. Documents like sd-1655-col-sep-3-equal-columns and + // multi-column-sections.docx all have `nextPage` (default for paragraph + // sectPr) and Word fills columns top-to-bottom. Balancing them split + // headings from their bodies and pushed content into columns Word left + // empty. + // + // 2. The LAST section. The body sectPr is always the final section break + // and represents the document end, not a real mid-document break. Even + // when its type defaults to `continuous` (DEFAULT_BODY_SECTION_TYPE in + // pm-adapter), there is no break AFTER the last section's content to + // trigger balancing. Excluding the last section matches Word for + // single-section docs. + // + // The fallback section index (FALLBACK_SECTION_IDX = -1) bypasses both gates + // because it is synthesized for callers passing LayoutOptions.columns + // without any section metadata at all (pre-pm-adapter integrations). + if (sectionIdx !== FALLBACK_SECTION_IDX) { + const endBreakType = sectionEndBreakType.get(sectionIdx); + if (endBreakType !== 'continuous') continue; + if (lastSectionIdx !== null && sectionIdx === lastSectionIdx) continue; + } + // Find the last page carrying any fragments from this section. let lastPageForSection: (typeof pages)[number] | null = null; for (const p of pages) { From a11cdf27cf2927d92e08989d07dc0107515df720 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 4 May 2026 19:23:21 -0300 Subject: [PATCH 07/18] =?UTF-8?q?fix(layout-engine):=20refine=20balance=20?= =?UTF-8?q?gate=20=E2=80=94=20last=20section=20balances=20if=20multi-page?= =?UTF-8?q?=20(SD-2452)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous gate was too strict: it skipped balancing for the last section unconditionally, which regressed the existing baseline behavior for multi-page multi-column documents whose only section is the body sectPr (e.g. two_column_two_page-arial 2 page 17, where Word produces a 3+2 split — confirmed against Word's PDF render). Refined rule for the last section: balance only when the section spans multiple pages. Empirical Word behavior: - sd-1655-col-sep-3-equal-columns: 1 section, body sectPr, 1 page, 3 cols → Word does NOT balance (col 1 holds 6 paragraphs, col 2 holds 1, col 3 empty). Single-page → don't balance. - layout/two_column_two_page-arial 2: 1 section, body sectPr, 17 pages, 2 cols → Word balances the last page (3+2 split). Multi-page → balance. - multi-column-sections / multi_section_doc: each section is a single page, default `nextPage` between them → no balancing (already excluded by the non-`continuous` end-break check). - sd-2326-col-sep-continuous-section-break: explicit `continuous` mid- document break → balance (already covered by the non-last branch). Implementation: when sectionIdx === lastSectionIdx, count pages whose fragments belong to that section. If the count is ≤ 1, skip balancing. The check short-circuits at >1 to avoid scanning the full page list. Corpus impact (vs npm@latest 1.31.1, after merging main): - 374 docs total, 363 unchanged, 11 changed (2 unique + 9 widespread) - The 9 widespread-only changes are all `pages[*].fragments[*].x|y` on a single page each — the SD-2452 balancing applied to the correct subset of multi-page multi-column sections. - All 5 SD-2452 spec-test fixtures continue to balance correctly: spec-test-1: 3+3 / spec-test-2: 2+3 / spec-test-3: 3+3+1 spec-test-4: 7+7 / spec-test-5: 3+2 | 3+2 All 653 layout-engine unit tests pass. --- .../layout-engine/layout-engine/src/index.ts | 54 +++++++++++-------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 72725bb3aa..02f5b7205b 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -2710,35 +2710,43 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options if (sectionHasExplicitColumnBreak.has(sectionIdx)) continue; if (alreadyBalancedSections.has(sectionIdx)) continue; - // Gate balancing on section break type (ECMA-376 §17.18.77 / Linear SD-2452): - // "Only continuous section breaks trigger balancing. Next-page breaks - // should NOT balance — columns fill top-to-bottom as SuperDoc currently does." + // Gate balancing on section-end break type AND whether the section spans + // multiple pages (ECMA-376 §17.18.77 / Linear SD-2452, refined against + // empirical Word behavior): // - // pm-adapter uses end-tagged section semantics (ECMA-376 §17.6.17): the type - // on `sectionBreak[i]` describes the break that ENDS section i. Two cases - // need to be excluded: - // - // 1. Non-`continuous` end break (nextPage / evenPage / oddPage) — Word does - // NOT balance these. Documents like sd-1655-col-sep-3-equal-columns and - // multi-column-sections.docx all have `nextPage` (default for paragraph - // sectPr) and Word fills columns top-to-bottom. Balancing them split - // headings from their bodies and pushed content into columns Word left - // empty. + // - Continuous mid-document break ending the section → balance (Word). + // - Non-`continuous` break (nextPage / evenPage / oddPage) → don't balance. + // - Body sectPr (last section, defaults to `continuous` per pm-adapter + // DEFAULT_BODY_SECTION_TYPE): balance ONLY if the section spans multiple + // pages. Word balances the last page of multi-page multi-column body + // sections (e.g. two_column_two_page-arial 2 page 17 splits 3+2) but + // does NOT balance single-page sections (e.g. sd-1655 fills col 1 then + // col 2 without redistributing). // - // 2. The LAST section. The body sectPr is always the final section break - // and represents the document end, not a real mid-document break. Even - // when its type defaults to `continuous` (DEFAULT_BODY_SECTION_TYPE in - // pm-adapter), there is no break AFTER the last section's content to - // trigger balancing. Excluding the last section matches Word for - // single-section docs. + // pm-adapter uses end-tagged section semantics (ECMA-376 §17.6.17): the type + // on `sectionBreak[i]` describes the break that ENDS section i. The body + // sectPr is the final section break and gets the default `continuous` type, + // so we can't distinguish explicit vs default `continuous` from the type + // alone — the multi-page check supplies the missing signal for the last + // section. Earlier sections rely on the type being non-default if it's + // `continuous` (because paragraph sectPr defaults to `nextPage`). // - // The fallback section index (FALLBACK_SECTION_IDX = -1) bypasses both gates - // because it is synthesized for callers passing LayoutOptions.columns - // without any section metadata at all (pre-pm-adapter integrations). + // FALLBACK_SECTION_IDX (-1) bypasses both gates because the synthesized + // fallback fires only when no pm-adapter section metadata exists at all. if (sectionIdx !== FALLBACK_SECTION_IDX) { const endBreakType = sectionEndBreakType.get(sectionIdx); if (endBreakType !== 'continuous') continue; - if (lastSectionIdx !== null && sectionIdx === lastSectionIdx) continue; + + if (lastSectionIdx !== null && sectionIdx === lastSectionIdx) { + let sectionPagesCount = 0; + for (const p of pages) { + if (p.fragments.some((f) => blockSectionMap.get(f.blockId) === sectionIdx)) { + sectionPagesCount += 1; + if (sectionPagesCount > 1) break; + } + } + if (sectionPagesCount <= 1) continue; + } } // Find the last page carrying any fragments from this section. From 50dc1f7aa130a96cd4c9de1a337fec33e9ad818a Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 4 May 2026 19:40:18 -0300 Subject: [PATCH 08/18] fix(layout-engine): address luccas review comments (SD-2452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - index.ts mid-page balance: page-break fallback now triggers whenever balanceSectionOnPage returns null, not only when willBalance was false. willBalance is a coarse approval; balanceSectionOnPage has its own late skip conditions (unequal column widths, zero remaining height, shouldSkipBalancing thresholds) that can return null even after willBalance=true. Without the broader check, the new region started on the same page from a stale column index and overwrote the previous section's column content. - column-balancing.ts split target: subtract preceding-fragment height from totalSectionHeight / columnCount before walking the table rows. A 100px paragraph + 300px table in 2 cols hit target=200 and split the table at row=200 (cols 100+200 / 100, max=300); subtracting the 100 leading height gives target=150 → splits at row=100 (cols 100+100 / 200, max=200), matching the achievable balanced height. - column-balancing.ts split continuesOnNext: capture the original value BEFORE setting `table.continuesOnNext = true`. The previous ternary read the field after the mutation, always saw `true`, and the second half always inherited `false`. Now the second half correctly inherits the source table's cross-page continuation. - column-balancing.ts split rollback: splitDominantTableAtRowBoundary now returns a rollback closure. balanceSectionOnPage invokes it when shouldSkipBalancing fires post-split, so the page never carries an overlapping half table when balancing is ultimately skipped. The ordering (split-then-skip) is intentional — split rescues the single-unbreakable case that pre-split skip would otherwise reject — but with rollback the mutation no longer survives a late skip. - column-balancing.ts: remove balancePageColumns and its test block. The function had no production callers after balanceSectionOnPage became the only entry point. Its shared helper (createMeasure) is inlined into the balanceSectionOnPage tests. - super-editor sections-resolver.ts: add startNodeIndex / endNodeIndex to the synthetic SectionRange. Required after the main-merge that added these fields to SectionRange (commit 85a503ce5). Fixes the TS2739 build error luccas reported. All 644 layout-engine unit tests pass. super-editor build is clean. --- .../src/column-balancing.test.ts | 243 +----------------- .../layout-engine/src/column-balancing.ts | 243 +++++++----------- .../layout-engine/layout-engine/src/index.ts | 25 +- .../helpers/sections-resolver.ts | 5 +- 4 files changed, 111 insertions(+), 405 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/column-balancing.test.ts b/packages/layout-engine/layout-engine/src/column-balancing.test.ts index 6e83a179b6..9b308ce7d1 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.test.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.test.ts @@ -9,7 +9,6 @@ import { calculateBalancedColumnHeight, shouldBalanceColumns, shouldSkipBalancing, - balancePageColumns, DEFAULT_BALANCING_CONFIG, type BalancingContext, type BalancingBlock, @@ -307,21 +306,10 @@ describe('DEFAULT_BALANCING_CONFIG', () => { }); // ============================================================================ -// balancePageColumns Tests (Post-Layout Balancing) +// balanceSectionOnPage Tests (Section-scoped balancing) // ============================================================================ -/** - * Helper to create a mock fragment for balancePageColumns testing. - */ -function createFragment( - blockId: string, - x: number, - y: number, - width: number, - kind: string = 'para', -): { x: number; y: number; width: number; kind: string; blockId: string } { - return { x, y, width, kind, blockId }; -} +import { balanceSectionOnPage } from './column-balancing.js'; /** * Helper to create measure data for paragraph fragments. @@ -333,233 +321,6 @@ function createMeasure(kind: string, lineHeights: number[]): { kind: string; lin }; } -describe('balancePageColumns', () => { - describe('basic balancing', () => { - it('balances 4 equal blocks into 2+2 across 2 columns', () => { - // 4 fragments × 20px each in a 2-col section. Word minimizes section height by - // placing 2 per column (40px per col) rather than 1+3 (max 60px). - const fragments = [ - createFragment('block-1', 96, 96, 624), - createFragment('block-2', 96, 116, 624), - createFragment('block-3', 96, 136, 624), - createFragment('block-4', 96, 156, 624), - ]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ['block-3', createMeasure('paragraph', [20])], - ['block-4', createMeasure('paragraph', [20])], - ]); - - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 40, measureMap); - - // First half in col 0, second half in col 1 — minimum section height. - expect(fragments[0].x).toBe(96); - expect(fragments[1].x).toBe(96); - expect(fragments[2].x).toBe(432); - expect(fragments[3].x).toBe(432); - }); - - it('should set fragment width to column width', () => { - const fragments = [createFragment('block-1', 96, 96, 624), createFragment('block-2', 96, 116, 624)]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ]); - - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 30, measureMap); - - // Both fragments should have width set to column width - expect(fragments[0].width).toBe(288); - expect(fragments[1].width).toBe(288); - }); - - it('should reset Y positions in each column to start from top margin', () => { - // Use 6 fragments so we get a more even split for Y testing - // 6 * 20px = 120px total, target = 60px - // Blocks 1, 2 = 40px, block 3 would make 60px >= 60px, switch! - // Column 0: blocks 1, 2. Column 1: blocks 3, 4, 5, 6 - const fragments = [ - createFragment('block-1', 96, 96, 624), - createFragment('block-2', 96, 116, 624), - createFragment('block-3', 96, 136, 624), - createFragment('block-4', 96, 156, 624), - createFragment('block-5', 96, 176, 624), - createFragment('block-6', 96, 196, 624), - ]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ['block-3', createMeasure('paragraph', [20])], - ['block-4', createMeasure('paragraph', [20])], - ['block-5', createMeasure('paragraph', [20])], - ['block-6', createMeasure('paragraph', [20])], - ]); - - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 50, measureMap); - - // Column 0: blocks 1, 2 - Y positions stack from top - expect(fragments[0].y).toBe(96); - expect(fragments[1].y).toBe(116); - // Column 1: blocks 3+ - Y resets to top margin - expect(fragments[2].y).toBe(96); - expect(fragments[3].y).toBe(116); - }); - }); - - describe('column switching threshold', () => { - it('should switch columns when target height is REACHED (not just exceeded)', () => { - // This tests the >= vs > fix: Word switches when target is reached, not exceeded. - // 6 fragments: 3 at 20px each = 60px, we want exactly 30px per column - // With >= condition: switch happens when 30px is reached - // With > condition: would need 30+ to switch - const fragments = [ - createFragment('block-1', 96, 96, 624), - createFragment('block-2', 96, 116, 624), - createFragment('block-3', 96, 136, 624), - createFragment('block-4', 96, 156, 624), - createFragment('block-5', 96, 176, 624), - createFragment('block-6', 96, 196, 624), - ]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ['block-3', createMeasure('paragraph', [20])], - ['block-4', createMeasure('paragraph', [20])], - ['block-5', createMeasure('paragraph', [20])], - ['block-6', createMeasure('paragraph', [20])], - ]); - - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 50, measureMap); - - // With >= condition: target = 120px / 2 = 60px per column - // Block 1 (20px): column 0, height=20 - // Block 2 (20px): 20+20=40 < 60, stay in column 0 - // Block 3 (20px): 40+20=60 >= 60, SWITCH to column 1 - // Blocks 4,5,6 stay in column 1 - expect(fragments[0].x).toBe(96); - expect(fragments[1].x).toBe(96); - expect(fragments[2].x).toBe(432); // Switched because 40+20=60 >= 60 - expect(fragments[3].x).toBe(432); - expect(fragments[4].x).toBe(432); - expect(fragments[5].x).toBe(432); - }); - - it('should match Word behavior with uneven height distribution', () => { - // Simulates the sd-1480 test document scenario: - // Block 1: 21px (1 line) - // Block 2: 42px (2 lines) - // Block 3: 21px (1 line) - // Block 4: 21px (1 line) - // Block 5: 42px (2 lines) - // Block 6: 21px (1 line) - // Total: 168px, target: 84px - // With >= : blocks 1,2 (63px) + block 3 (21px) = 84px >= 84px → switch - // Word puts blocks 1,2 in column 0, blocks 3,4,5,6 in column 1 - const fragments = [ - createFragment('block-1', 96, 96, 624), - createFragment('block-2', 96, 117, 624), - createFragment('block-3', 96, 159, 624), - createFragment('block-4', 96, 180, 624), - createFragment('block-5', 96, 201, 624), - createFragment('block-6', 96, 243, 624), - ]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [21])], - ['block-2', createMeasure('paragraph', [21, 21])], // 2 lines - ['block-3', createMeasure('paragraph', [21])], - ['block-4', createMeasure('paragraph', [21])], - ['block-5', createMeasure('paragraph', [21, 21])], // 2 lines - ['block-6', createMeasure('paragraph', [21])], - ]); - - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 70, measureMap); - - // Blocks 1, 2 should be in column 0 - expect(fragments[0].x).toBe(96); - expect(fragments[1].x).toBe(96); - // Blocks 3, 4, 5, 6 should be in column 1 - expect(fragments[2].x).toBe(432); - expect(fragments[3].x).toBe(432); - expect(fragments[4].x).toBe(432); - expect(fragments[5].x).toBe(432); - }); - }); - - describe('edge cases', () => { - it('should skip balancing for single column layout', () => { - const fragments = [createFragment('block-1', 96, 96, 624), createFragment('block-2', 96, 116, 624)]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ]); - - // Original positions - const origX1 = fragments[0].x; - const origX2 = fragments[1].x; - - balancePageColumns(fragments, { count: 1, gap: 0, width: 624 }, { left: 96 }, 96, 1000, measureMap); - - // Should not modify positions for single column - expect(fragments[0].x).toBe(origX1); - expect(fragments[1].x).toBe(origX2); - }); - - it('should skip balancing for empty fragments array', () => { - const fragments: { x: number; y: number; width: number; kind: string; blockId: string }[] = []; - const measureMap = new Map }>(); - - // Should not throw - expect(() => - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 1000, measureMap), - ).not.toThrow(); - }); - - it('should handle fragments with missing measure data', () => { - const fragments = [createFragment('block-1', 96, 96, 624), createFragment('block-2', 96, 116, 624)]; - // Only provide measure for first block - const measureMap = new Map([['block-1', createMeasure('paragraph', [20])]]); - - // Should not throw - block-2 will have height 0 - expect(() => - balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 10, measureMap), - ).not.toThrow(); - }); - - it('should handle 3-column layout', () => { - // 6 fragments for 3 columns = 2 per column - const fragments = [ - createFragment('block-1', 96, 96, 624), - createFragment('block-2', 96, 116, 624), - createFragment('block-3', 96, 136, 624), - createFragment('block-4', 96, 156, 624), - createFragment('block-5', 96, 176, 624), - createFragment('block-6', 96, 196, 624), - ]; - const measureMap = new Map([ - ['block-1', createMeasure('paragraph', [20])], - ['block-2', createMeasure('paragraph', [20])], - ['block-3', createMeasure('paragraph', [20])], - ['block-4', createMeasure('paragraph', [20])], - ['block-5', createMeasure('paragraph', [20])], - ['block-6', createMeasure('paragraph', [20])], - ]); - - balancePageColumns(fragments, { count: 3, gap: 24, width: 192 }, { left: 96 }, 96, 30, measureMap); - - // Verify 3 columns are used - const colXValues = new Set(fragments.map((f) => f.x)); - expect(colXValues.size).toBeLessThanOrEqual(3); - }); - }); -}); - -// ============================================================================ -// balanceSectionOnPage Tests (Section-scoped balancing) -// ============================================================================ - -import { balanceSectionOnPage } from './column-balancing.js'; - describe('balanceSectionOnPage', () => { type TestFragment = { blockId: string; x: number; y: number; width: number; kind: string }; diff --git a/packages/layout-engine/layout-engine/src/column-balancing.ts b/packages/layout-engine/layout-engine/src/column-balancing.ts index e7420742b0..a367d3c823 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.ts @@ -615,147 +615,8 @@ function getBalancingHeight( return getFragmentHeight(fragment, measureMap); } -/** - * Balances column content on a page by redistributing fragments. - * - * This function post-processes a page's fragments to achieve balanced column heights, - * matching Microsoft Word's column balancing behavior. It: - * - * 1. Groups fragments into logical rows by Y position - * 2. Calculates total content height and target height per column - * 3. Redistributes rows across columns using a greedy algorithm - * 4. Updates fragment x, y, and width properties in place - * - * The algorithm switches to the next column when adding a row would reach or exceed - * the target height (using >= comparison to match Word's behavior). - * - * @param fragments - Page fragments to balance (mutated in place) - * @param columns - Column configuration with count, gap between columns, and column width - * @param margins - Page margins (left margin determines column 0 start position) - * @param topMargin - Top margin where content starts vertically - * @param measureMap - Map of block IDs to measure data for height calculation - * - * @example - * ```typescript - * balancePageColumns( - * page.fragments, - * { count: 2, gap: 48, width: 288 }, - * { left: 96 }, - * 96, - * measureMap - * ); - * // Fragments are now redistributed: first half at x=96, second half at x=432 - * ``` - */ -export function balancePageColumns( - fragments: BalancingFragment[], - columns: { count: number; gap: number; width: number }, - margins: { left: number }, - topMargin: number, - availableHeight: number, - measureMap: Map, -): void { - // Skip balancing for single-column layouts or empty pages - if (columns.count <= 1 || fragments.length === 0) { - return; - } - - /** - * Calculates the X position for a given column index. - * Column 0 starts at the left margin, subsequent columns offset by (width + gap). - */ - const columnX = (columnIndex: number): number => { - return margins.left + columnIndex * (columns.width + columns.gap); - }; - - // Group fragments by Y position into logical rows. - // Fragments at the same Y coordinate are part of the same row and move together. - const rowMap = new Map(); - fragments.forEach((fragment, idx) => { - // Round Y to handle floating point precision - const y = Math.round(fragment.y); - if (!rowMap.has(y)) { - rowMap.set(y, []); - } - const height = getFragmentHeight(fragment, measureMap); - rowMap.get(y)!.push({ - fragment, - height, - originalIndex: idx, - }); - }); - - // Sort rows by Y position (top to bottom) - const sortedRows = [...rowMap.entries()].sort((a, b) => a[0] - b[0]); - - // Calculate total content height by summing max height of each row - let totalHeight = 0; - const contentBlocks: BalancingBlock[] = []; - for (const [, rowFragments] of sortedRows) { - const maxHeight = Math.max(...rowFragments.map((f) => f.height)); - totalHeight += maxHeight; - contentBlocks.push({ - blockId: rowFragments[0]?.fragment.blockId ?? `row-${contentBlocks.length}`, - measuredHeight: maxHeight, - canBreak: false, - keepWithNext: false, - keepTogether: true, - }); - } - - if ( - shouldSkipBalancing({ - columnCount: columns.count, - columnWidth: columns.width, - columnGap: columns.gap, - availableHeight, - contentBlocks, - }) - ) { - return; - } - - // Skip balancing if balanced height per column would be below minimum threshold - if (totalHeight / columns.count < DEFAULT_BALANCING_CONFIG.minColumnHeight) { - return; - } - - // Delegate to the binary-search algorithm to find the minimum section height - // where all content fits across N columns. This matches Word's behavior: Word - // finds the smallest max-column-height that keeps content within constraints, - // rather than greedily splitting at total/N (which can leave col1 barely - // populated when one paragraph is much taller than the rest). - const result = calculateBalancedColumnHeight( - { - columnCount: columns.count, - columnWidth: columns.width, - columnGap: columns.gap, - availableHeight, - contentBlocks, - }, - DEFAULT_BALANCING_CONFIG, - ); - - // Apply the assignments to fragments: pack each column top-to-bottom from topMargin, - // indexing back into sortedRows via the same ordering used to build contentBlocks. - const colCursors = new Array(columns.count).fill(topMargin); - for (let i = 0; i < sortedRows.length; i++) { - const [, rowFragments] = sortedRows[i]; - const block = contentBlocks[i]; - const col = result.columnAssignments.get(block.blockId) ?? 0; - const colX = columnX(col); - const rowHeight = block.measuredHeight; - for (const info of rowFragments) { - info.fragment.x = colX; - info.fragment.y = colCursors[col]; - info.fragment.width = columns.width; - } - colCursors[col] += rowHeight; - } -} - // ============================================================================ -// Section-scoped balancing (wraps balancePageColumns with per-section guards) +// Section-scoped balancing // ============================================================================ /** @@ -857,12 +718,28 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu // section; extending to N > 2 would iterate). SD-2646: IT-945 page 2 has a // 515px / 28-row table; splitting into ~257px halves lets the balancer // assign half to each column. - splitDominantTableAtRowBoundary({ + // + // The split takes the cumulative height of fragments preceding the table + // (in document order) so the per-column target accounts for content + // already destined for column 0. Without this, a 100px paragraph + 300px + // table in 2 cols hits target=200, splits the table at row=200 → 100+200 + // / 100; subtracting the leading 100 gives target=150 → 100+100 / 200. + // + // The split returns a rollback closure. We invoke it if the post-split + // shouldSkipBalancing check still rejects, so the page never carries a + // mutated half table when balancing was ultimately skipped. + let precedingHeightBeforeTable = 0; + for (const f of sectionFragments) { + if (f.kind === 'table') break; + precedingHeightBeforeTable += getBalancingHeight(f, args.measureMap, args.sectPrMarkerBlockIds); + } + const splitResult = splitDominantTableAtRowBoundary({ sectionFragments, fragments, columnCount, measureMap: args.measureMap, sectPrMarkerBlockIds: args.sectPrMarkerBlockIds, + precedingHeight: precedingHeightBeforeTable, }); // Order fragments in document order: by current column (x → left-to-right), @@ -899,6 +776,7 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu contentBlocks, }) ) { + splitResult?.rollback(); return null; } @@ -985,18 +863,42 @@ interface RowBoundaryLike { * full page fragments (mutated in place), column count, and the * measure map. */ +/** + * Result of attempting a dominant-table split. When `applied` is true the + * caller is responsible for invoking `rollback()` if downstream balancing + * decides to skip — otherwise the page is left with overlapping table halves + * (the original table mutated to a partial range plus the inserted second + * half). Returns `null` when the split preconditions (single splittable + * table, rowSpan ≥ 2, oversized vs target) aren't met, in which case nothing + * was mutated and there is nothing to roll back. + */ +type DominantTableSplitResult = { + applied: true; + rollback: () => void; +} | null; + function splitDominantTableAtRowBoundary(args: { sectionFragments: BalancingFragment[]; fragments: BalancingFragment[]; columnCount: number; measureMap: Map; sectPrMarkerBlockIds?: Set; -}): void { + /** + * Cumulative height of fragments already placed in earlier columns, BEFORE + * the dominant table. Subtracted from the per-column target so the split + * row produces halves that pack alongside preceding atomic blocks (e.g. + * a 100px paragraph + 300px table in 2 cols should split the table at + * row=100 → 100+100 vs 100+200, not row=200 → 100+200 vs 100). Defaults + * to 0 when caller doesn't track this. + */ + precedingHeight?: number; +}): DominantTableSplitResult { const { sectionFragments, fragments, columnCount, measureMap, sectPrMarkerBlockIds } = args; - if (columnCount <= 1) return; + const precedingHeight = Math.max(0, args.precedingHeight ?? 0); + if (columnCount <= 1) return null; const tables = sectionFragments.filter((f) => f.kind === 'table'); - if (tables.length !== 1) return; + if (tables.length !== 1) return null; const table = tables[0] as BalancingFragment & { fromRow?: number; toRow?: number; @@ -1008,23 +910,31 @@ function splitDominantTableAtRowBoundary(args: { const fromRow = table.fromRow ?? 0; const toRow = table.toRow ?? fromRow; const rowSpan = toRow - fromRow; - if (rowSpan < 2) return; + if (rowSpan < 2) return null; const measure = measureMap.get(table.blockId) as TableMeasureLike | undefined; - if (!measure || measure.kind !== 'table' || !Array.isArray(measure.rows)) return; + if (!measure || measure.kind !== 'table' || !Array.isArray(measure.rows)) return null; const totalSectionHeight = sectionFragments.reduce( (sum, f) => sum + getBalancingHeight(f, measureMap, sectPrMarkerBlockIds), 0, ); - if (totalSectionHeight <= 0) return; - const target = totalSectionHeight / columnCount; + if (totalSectionHeight <= 0) return null; + // Per-column target. Subtract the height already placed in earlier columns + // before this table so the split row produces halves that pack alongside + // those preceding atomic blocks. Without this adjustment, e.g. a 100px + // paragraph + 300px table in 2 cols hits target=200, splits the table + // at row=200 → cols 100+200 vs 100, max=300; subtracting precedingHeight + // gives target=150 → splits at row=100 → cols 100+100 vs 200, max=200. + // Floor at 1 so the pathological "preceding height already exceeds + // target" case still yields a forward-progressing split row search. + const target = Math.max(1, totalSectionHeight / columnCount - precedingHeight); const tableHeight = getBalancingHeight(table, measureMap, sectPrMarkerBlockIds); // Small-epsilon guard: if the table alone fits within the target, the // atomic balancer already produces a correct assignment — splitting would // only introduce visual churn. - if (tableHeight <= target + 1) return; + if (tableHeight <= target + 1) return null; // Find the row K in [fromRow + 1, toRow) such that cumulative height from // fromRow to K first reaches the target. Guaranteed to succeed because @@ -1041,7 +951,7 @@ function splitDominantTableAtRowBoundary(args: { splitRow = r + 2; // continue; r+2 so if we exit the loop we split before the last row } // Clamp to valid range (defensive — loop logic above should always hit the break). - if (splitRow <= fromRow || splitRow >= toRow) return; + if (splitRow <= fromRow || splitRow >= toRow) return null; const firstHalfRows = measure.rows.slice(fromRow, splitRow); const secondHalfRows = measure.rows.slice(splitRow, toRow); @@ -1081,6 +991,20 @@ function splitDominantTableAtRowBoundary(args: { } : undefined; + // Capture original mutable fields BEFORE applying the split. Required for: + // (a) Rollback: if the caller decides to skip balancing post-split, we + // restore the table fragment to its pre-split state. + // (b) Correctly inheriting `continuesOnNext` on the second half. Reading + // `table.continuesOnNext` AFTER setting `table.continuesOnNext = true` + // always yielded `true`, so the prior `? false : (… ?? false)` + // ternary collapsed to `false` and the second half could never inherit + // the original cross-page continuation. Capturing first preserves the + // original intent: if the source table continued onto a later page, + // the SECOND half is the one that now carries that continuation. + const originalToRow = table.toRow; + const originalHeight = table.height; + const originalContinuesOnNext = table.continuesOnNext ?? false; + // Construct the first half by mutating the original (preserves object // identity so `fragments.indexOf(table)` still works below). table.toRow = splitRow; @@ -1094,12 +1018,9 @@ function splitDominantTableAtRowBoundary(args: { toRow, height: secondHalfHeight, continuesFromPrev: true, - continuesOnNext: table.continuesOnNext ? false : (table.continuesOnNext ?? false), + continuesOnNext: originalContinuesOnNext, metadata: secondMetadata ?? table.metadata, } as BalancingFragment; - // Preserve the original's continuesOnNext intent: if the original fragment - // continued onto a later page, the SECOND half now inherits that role. - (secondHalf as { continuesOnNext?: boolean }).continuesOnNext = false; // Insert the second half right after the first in both arrays so the // balancer's (x, y) ordering keeps them adjacent in document order. @@ -1107,4 +1028,18 @@ function splitDominantTableAtRowBoundary(args: { if (fragIdx >= 0) fragments.splice(fragIdx + 1, 0, secondHalf); const sectIdx = sectionFragments.indexOf(table); if (sectIdx >= 0) sectionFragments.splice(sectIdx + 1, 0, secondHalf); + + return { + applied: true, + rollback: () => { + table.toRow = originalToRow; + table.height = originalHeight; + table.continuesOnNext = originalContinuesOnNext; + table.metadata = originalMetadata; + const fIdx = fragments.indexOf(secondHalf); + if (fIdx >= 0) fragments.splice(fIdx, 1); + const sIdx = sectionFragments.indexOf(secondHalf); + if (sIdx >= 0) sectionFragments.splice(sIdx, 1); + }, + }; } diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 02f5b7205b..f2b5046f3b 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -2021,10 +2021,15 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options // vertical region — there's no risk of the new 1-col region overwriting // prior column content, because the cursor moves to maxY below them. // - // When not balancing (e.g. single-col → multi-col, or explicit column - // break), fall back to the original "force a new page if currently in a - // column that won't exist after the change" guard so new content doesn't - // overwrite earlier column positions on the same page. + // `willBalance` is a coarse approval: balanceSectionOnPage has its own + // late skip conditions (unequal column widths, zero remaining height, + // section content too small for shouldSkipBalancing's thresholds) that + // can return null even when willBalance was true. The page-break + // fallback below must consider the actual balance outcome, not just + // willBalance, otherwise we leave the new region starting on the same + // page from a stale column index and overwriting the previous + // section's column content. + let balanceResult: { maxY: number } | null = null; if (willBalance) { // The current region starts at the last constraint boundary's Y, or at // the page's top margin if no mid-page region change has happened yet. @@ -2033,7 +2038,7 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options const availableHeight = activePageSize.h - activeBottomMargin - activeRegionTop; const contentWidth = activePageSize.w - (activeLeftMargin + activeRightMargin); const normalized = normalizeColumns(endingSectionColumns!, contentWidth); - const balanceResult = balanceSectionOnPage({ + balanceResult = balanceSectionOnPage({ fragments: state.page.fragments as BalancingFragment[], sectionIndex: endingSectionIndex!, sectionColumns: { @@ -2059,10 +2064,12 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options state.maxCursorY = balanceResult.maxY; alreadyBalancedSections.add(endingSectionIndex!); } - } else if (columnIndexBefore >= newColumns.count) { - // Non-balancing case: reducing column count without balancing means - // starting the new region at col 0 could overwrite earlier column - // content. Force a fresh page to avoid that. + } + if (balanceResult === null && columnIndexBefore >= newColumns.count) { + // No balancing applied (either willBalance was false, or + // balanceSectionOnPage skipped late). Reducing column count without + // balancing means starting the new region at col 0 could overwrite + // earlier column content. Force a fresh page to avoid that. state = paginator.startNewPage(); } diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts index bd02088604..e62dc395e0 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts @@ -165,10 +165,13 @@ function readOddEvenHeadersFlag(editor: Editor): boolean { } function createSyntheticRange(bodySectPr: XmlElement | null, paragraphCount: number): SectionRange { + const lastIndex = Math.max(paragraphCount - 1, 0); return { sectionIndex: 0, + startNodeIndex: 0, + endNodeIndex: lastIndex, startParagraphIndex: 0, - endParagraphIndex: Math.max(paragraphCount - 1, 0), + endParagraphIndex: lastIndex, sectPr: (bodySectPr as unknown as SectionRange['sectPr']) ?? null, margins: null, pageSize: null, From a5ff6ed2641bccbec8cd154bac20e33059020ffc Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 4 May 2026 19:54:12 -0300 Subject: [PATCH 09/18] fix(painter): suppress column separator over empty column (SD-2452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Word draws a column separator only between columns that BOTH have content within the region. The renderer was drawing the separator full-height whenever `withSeparator: true` and `count > 1`, regardless of whether the column to the right of the boundary had any fragments. This produced a spurious vertical line on pages whose section content fits in column 0 (e.g. multi-column-sections.docx page 2 — Word shows nothing, we drew a line top-to-bottom of the column area). Gate each separator on fragment presence past the boundary within the region's y range: hasContentPastSeparator = page.fragments.some(f => f.x >= separatorX && f.y >= yStart - 0.5 && f.y < yEnd + 0.5) Verified against Word renderings: - multi-column-sections page 2 (col 1 only) → 0 separators ✓ - sd-1655 (3 cols, col 3 empty) → 1 separator (col 1↔2) ✓ - sd-2326 (mid-doc continuous, balanced 2 cols) → separator drawn ✓ - two_column_two_page-arial 2 page 17 (balanced) → separator drawn ✓ Tests updated to reflect the gate. Existing 15 separator tests now seed each verified column with a stub fragment so they pin down geometry, not the gate. 3 new tests pin down the gate behavior: - suppresses separator when right column is empty - draws only the separator whose right neighbor has content - checks fragment presence within the region, not whole-page 1052 painter-dom tests pass. 644 layout-engine tests pass. --- .../src/renderer-column-separators.test.ts | 116 ++++++++++++++++-- .../painters/dom/src/renderer.ts | 11 ++ 2 files changed, 120 insertions(+), 7 deletions(-) diff --git a/packages/layout-engine/painters/dom/src/renderer-column-separators.test.ts b/packages/layout-engine/painters/dom/src/renderer-column-separators.test.ts index 6b8c2e27e2..8d90167c3c 100644 --- a/packages/layout-engine/painters/dom/src/renderer-column-separators.test.ts +++ b/packages/layout-engine/painters/dom/src/renderer-column-separators.test.ts @@ -1,14 +1,29 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { createDomPainter } from './index.js'; -import type { ColumnRegion, Layout, Page } from '@superdoc/contracts'; +import type { ColumnRegion, Fragment, Layout, Page } from '@superdoc/contracts'; // These tests pin down DomPainter's column-separator rendering: // - the fallback path (page.columns only, no mid-page regions) // - the region-aware path (page.columnRegions supersedes page.columns) -// - all five early-return guards inside renderColumnSeparators +// - the early-return guards inside renderColumnSeparators +// - the content-presence gate: per Word's behavior, a separator is +// suppressed when the column to its right is empty within the region. // The layout-engine tests cover which data reaches the painter; these tests // cover what the painter does with it. +// Minimal fragment factory for separator-presence assertions. We only need x +// (column placement) and y (region membership). The other fields are required +// by the contract but not read by renderColumnSeparators. +const fragAt = (x: number, y: number = 100): Fragment => ({ + kind: 'para', + blockId: `frag-${x}-${y}`, + fromLine: 0, + toLine: 1, + x, + y, + width: 100, +}); + const buildPage = (overrides: Partial = {}): Page => ({ number: 1, fragments: [], @@ -50,7 +65,12 @@ describe('DomPainter renderColumnSeparators', () => { describe('fallback path (page.columns only)', () => { it('draws a single separator centered in the gap for 2 equal columns', () => { - const page = buildPage({ columns: { count: 2, gap: 48, withSeparator: true } }); + // 2 cols at x=96 and x=384 (96+288). Fragments in both → separator + // gate is satisfied; the test pins down geometry, not the gate. + const page = buildPage({ + columns: { count: 2, gap: 48, withSeparator: true }, + fragments: [fragAt(96), fragAt(432)], + }); paintOnce(buildLayout(page), mount); const seps = querySeparators(mount); @@ -64,7 +84,10 @@ describe('DomPainter renderColumnSeparators', () => { }); it('draws count-1 separators for 3 equal columns', () => { - const page = buildPage({ columns: { count: 3, gap: 48, withSeparator: true } }); + const page = buildPage({ + columns: { count: 3, gap: 48, withSeparator: true }, + fragments: [fragAt(96), fragAt(320), fragAt(544)], + }); paintOnce(buildLayout(page), mount); const seps = querySeparators(mount); @@ -77,6 +100,7 @@ describe('DomPainter renderColumnSeparators', () => { it('uses explicit column widths when drawing separators for page.columns', () => { const page = buildPage({ columns: { count: 2, gap: 48, widths: [200, 952], equalWidth: false, withSeparator: true }, + fragments: [fragAt(96), fragAt(244)], }); paintOnce(buildLayout(page), mount); @@ -146,6 +170,9 @@ describe('DomPainter renderColumnSeparators', () => { const page = buildPage({ columns: regions[0].columns, columnRegions: regions, + // Region 0 has fragments in both 2-col positions; Region 1 has one + // in each of three columns. + fragments: [fragAt(96, 200), fragAt(432, 200), fragAt(96, 500), fragAt(320, 500), fragAt(544, 500)], }); paintOnce(buildLayout(page), mount); @@ -173,7 +200,17 @@ describe('DomPainter renderColumnSeparators', () => { { yStart: 400, yEnd: 700, columns: { count: 2, gap: 48, withSeparator: false } }, { yStart: 700, yEnd: 960, columns: { count: 2, gap: 48, withSeparator: true } }, ]; - const page = buildPage({ columnRegions: regions }); + const page = buildPage({ + columnRegions: regions, + fragments: [ + fragAt(96, 200), + fragAt(432, 200), + fragAt(96, 500), + fragAt(432, 500), + fragAt(96, 800), + fragAt(432, 800), + ], + }); paintOnce(buildLayout(page), mount); const seps = querySeparators(mount); @@ -188,7 +225,10 @@ describe('DomPainter renderColumnSeparators', () => { { yStart: 96, yEnd: 400, columns: { count: 1, gap: 0, withSeparator: true } }, { yStart: 400, yEnd: 700, columns: { count: 2, gap: 48, withSeparator: true } }, ]; - const page = buildPage({ columnRegions: regions }); + const page = buildPage({ + columnRegions: regions, + fragments: [fragAt(96, 500), fragAt(432, 500)], + }); paintOnce(buildLayout(page), mount); const seps = querySeparators(mount); @@ -201,7 +241,10 @@ describe('DomPainter renderColumnSeparators', () => { { yStart: 96, yEnd: 96, columns: { count: 2, gap: 48, withSeparator: true } }, { yStart: 96, yEnd: 500, columns: { count: 2, gap: 48, withSeparator: true } }, ]; - const page = buildPage({ columnRegions: regions }); + const page = buildPage({ + columnRegions: regions, + fragments: [fragAt(96, 200), fragAt(432, 200)], + }); paintOnce(buildLayout(page), mount); const seps = querySeparators(mount); @@ -216,6 +259,7 @@ describe('DomPainter renderColumnSeparators', () => { const page = buildPage({ columns: { count: 2, gap: 48, withSeparator: false }, columnRegions: [{ yStart: 96, yEnd: 960, columns: { count: 2, gap: 48, withSeparator: true } }], + fragments: [fragAt(96), fragAt(432)], }); paintOnce(buildLayout(page), mount); @@ -234,6 +278,7 @@ describe('DomPainter renderColumnSeparators', () => { columns: { count: 2, gap: 48, widths: [200, 952], equalWidth: false, withSeparator: true }, }, ], + fragments: [fragAt(96, 200), fragAt(244, 200)], }); paintOnce(buildLayout(page), mount); @@ -244,4 +289,61 @@ describe('DomPainter renderColumnSeparators', () => { expect(seps[0].style.left).toBe('220px'); }); }); + + // The content-presence gate matches Word: a column separator is suppressed + // when the column to its right has no content within the region. This is + // observable in `multi-column-sections.docx` page 2 — Word draws no line + // because the section's content fits entirely in column 0. + describe('content-presence gate', () => { + it('suppresses the separator when no fragment sits past the column boundary', () => { + const page = buildPage({ + columns: { count: 2, gap: 48, withSeparator: true }, + // Only column 0 has content (x=96 < separatorX=408). Word draws nothing. + fragments: [fragAt(96), fragAt(96, 300)], + }); + paintOnce(buildLayout(page), mount); + + expect(querySeparators(mount)).toHaveLength(0); + }); + + it('draws only the separator whose right neighbor has content (3-col, col 3 empty)', () => { + // 3 cols at x=96, x=320, x=544. Separators at 296 and 520. + // Cols 1 and 2 have content; col 3 is empty. Only the 296 separator + // draws; the 520 separator (col 2 → col 3 boundary) is suppressed. + const page = buildPage({ + columns: { count: 3, gap: 48, withSeparator: true }, + fragments: [fragAt(96), fragAt(320)], + }); + paintOnce(buildLayout(page), mount); + + const seps = querySeparators(mount); + expect(seps).toHaveLength(1); + expect(seps[0].style.left).toBe('296px'); + }); + + it('checks fragment presence within the region only, not the whole page', () => { + // Region 0 (2-col): only col 0 has content → no separator. + // Region 1 (2-col): both cols have content → separator drawn. + // Without the y-bounded gate, region 0's separator would draw because + // region 1's col-1 fragment exists somewhere on the page. + const regions: ColumnRegion[] = [ + { yStart: 96, yEnd: 400, columns: { count: 2, gap: 48, withSeparator: true } }, + { yStart: 400, yEnd: 700, columns: { count: 2, gap: 48, withSeparator: true } }, + ]; + const page = buildPage({ + columnRegions: regions, + fragments: [ + fragAt(96, 200), // region 0, col 0 + fragAt(96, 500), // region 1, col 0 + fragAt(432, 500), // region 1, col 1 + ], + }); + paintOnce(buildLayout(page), mount); + + const seps = querySeparators(mount); + expect(seps).toHaveLength(1); + expect(seps[0].style.top).toBe('400px'); + expect(seps[0].style.height).toBe('300px'); + }); + }); }); diff --git a/packages/layout-engine/painters/dom/src/renderer.ts b/packages/layout-engine/painters/dom/src/renderer.ts index bb195212c8..02d670d4f8 100644 --- a/packages/layout-engine/painters/dom/src/renderer.ts +++ b/packages/layout-engine/painters/dom/src/renderer.ts @@ -2366,7 +2366,18 @@ export class DomPainter { const separatorPositions = this.getColumnSeparatorPositions(columns, leftMargin, contentWidth); if (separatorPositions.length === 0) continue; + // Word only renders the column separator between columns that both have + // content. For a 2-col page where col 1 is empty (e.g. the last page of + // a multi-column section that fits in col 0, or a `nextPage` section + // where Word fills col 0 first without balancing), Word draws no line + // even when the section's `w:cols` declared `w:sep="1"`. Gate each + // separator on whether any fragment sits past it within the region. + const fragmentsInRegion = page.fragments.filter((f) => f.y >= yStart - 0.5 && f.y < yEnd + 0.5); + for (const separatorX of separatorPositions) { + const hasContentPastSeparator = fragmentsInRegion.some((f) => f.x >= separatorX); + if (!hasContentPastSeparator) continue; + const separatorEl = this.doc.createElement('div'); separatorEl.dataset.superdocColumnSeparator = 'true'; From 6db27ad8c3cfe71515955ad3a51e6ac6f787beff Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 4 May 2026 20:46:32 -0300 Subject: [PATCH 10/18] fix(layout-engine): balance multi-col sections when doc has explicit continuous (SD-2452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Empirical Word behavior on docs with explicit `` on the body sectPr: balance any multi-column section whose content precedes the body, even when that section's own end-break is `nextPage` (default). The simplest reproducer is `tabs/sd-1480-two-col-tab-positions.docx`: - 5 paragraphs ending with an inline sectPr (no `` → default `nextPage`). - 1 empty paragraph followed by the body sectPr with explicit ``. - Word renders the 5 entries 3+3 across 2 columns on a single page. - Pre-fix: our gate skipped balancing because section 0's break is nextPage → the page rendered as 6+0. Distinguishing explicit vs. default `continuous` requires plumbing a `typeIsExplicit` flag from the OOXML parser through to the layout-engine: - `extractSectionType` now returns `null` when `` is absent, instead of defaulting to `nextPage`. Callers apply the correct default (paragraph sectPr → `nextPage`, body sectPr → `continuous`). - `extractSectionData` exposes `typeIsExplicit: boolean`. - `SectionRange.typeIsExplicit` carries the flag through analysis. - `createSectionBreakBlock` writes it onto `attrs.typeIsExplicit`. - `layoutDocument` reads it into `sectionTypeIsExplicit: Map`. Updated balance gate (per-section, count > 1): Balance if: (a) section's own end-break is `continuous` AND it is NOT the last section, OR (b) the doc contains any EXPLICIT continuous break (typically the body sectPr), OR (c) the section spans multiple pages. Otherwise skip — covers `sd-1655-col-sep-3-equal-columns` (single section, default body continuous, single page → Word fills col-by-col). Test fallout: three pm-adapter tests asserted that body sectPrs without `` defaulted to `nextPage`. That was a leak from the old `extractSectionType` paragraph-style default. The corrected default is `continuous` per OOXML body-sectPr semantics. Tests updated to assert the new behavior plus `typeIsExplicit: false`. Verification: - sd-1480 page 1: was 6+0, now 4+2 (Word shows 3+3 — balancing engages correctly; the 4+2 vs 3+3 distribution gap is residual binary-search behavior on uneven paragraph heights, separate from the gate). - sd-1655: still col-by-col (no balancing). ✓ - multi-column-sections, multi_section_doc: still col-by-col. ✓ - spec-test-1..5: 3+3 / 2+3 / 3+3+1 / 7+7 / 3+2|3+2. ✓ - sd-2326 (mid-doc continuous): still balanced 2+2. ✓ 644 layout-engine + 1802 pm-adapter unit tests pass. --- .../layout-engine/layout-engine/src/index.ts | 84 +++++++++++++------ .../pm-adapter/src/index.test.ts | 16 +++- .../pm-adapter/src/integration.test.ts | 6 +- .../pm-adapter/src/sections/analysis.ts | 3 + .../pm-adapter/src/sections/breaks.ts | 7 ++ .../src/sections/extraction.test.ts | 13 ++- .../pm-adapter/src/sections/extraction.ts | 28 +++++-- .../pm-adapter/src/sections/types.ts | 6 ++ .../helpers/sections-resolver.ts | 1 + 9 files changed, 128 insertions(+), 36 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index f2b5046f3b..80c0c12a6b 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -1577,6 +1577,12 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options // `evenPage`, `oddPage` do not. Tracked here so the post-layout pass can // skip the wrong section types. const sectionEndBreakType = new Map(); + // sectionIndex -> whether `` was EXPLICIT in the source sectPr. + // Body sectPrs default to `continuous` when w:type is omitted; Word does + // NOT balance those single-page docs (sd-1655). Body sectPrs with explicit + // `` DO balance (sd-1480), even single-page. + // The flag carries the distinction across pm-adapter -> layout-engine. + const sectionTypeIsExplicit = new Map(); // sectionIndex of the LAST section in the document. The body sectPr is // always the final section break and represents the end of the document, // not an actual mid-document break. Even when its type defaults to @@ -1621,7 +1627,7 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options if (measure) { balancingMeasureMap.set(block.id, measure as MeasureData); } - const blockWithAttrs = block as { attrs?: { sectionIndex?: number } }; + const blockWithAttrs = block as { attrs?: { sectionIndex?: number; typeIsExplicit?: boolean } }; const attrSectionIdx = blockWithAttrs.attrs?.sectionIndex; if (block.kind === 'sectionBreak') { documentHasAnySectionBreak = true; @@ -1634,6 +1640,9 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options if (typeof block.type === 'string') { sectionEndBreakType.set(attrSectionIdx, block.type); } + if (typeof blockWithAttrs.attrs?.typeIsExplicit === 'boolean') { + sectionTypeIsExplicit.set(attrSectionIdx, blockWithAttrs.attrs.typeIsExplicit); + } } } if (currentSectionIdx !== null) { @@ -2717,43 +2726,70 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options if (sectionHasExplicitColumnBreak.has(sectionIdx)) continue; if (alreadyBalancedSections.has(sectionIdx)) continue; - // Gate balancing on section-end break type AND whether the section spans - // multiple pages (ECMA-376 §17.18.77 / Linear SD-2452, refined against - // empirical Word behavior): + // Gate balancing on the break-type signals around the section + // (ECMA-376 §17.18.77 / Linear SD-2452, refined against empirical Word + // behavior). Three balance-allowed scenarios + one skip-the-last-section + // safeguard: + // + // 1. The section's own end-break is `continuous` AND it is NOT the last + // section. This is the spec-canonical case — a mid-document + // continuous break balances the preceding section. Covers + // spec-test-1..5, sd-2326, all `pagination/two_column_two_page-arial` + // page 17 scenarios. // - // - Continuous mid-document break ending the section → balance (Word). - // - Non-`continuous` break (nextPage / evenPage / oddPage) → don't balance. - // - Body sectPr (last section, defaults to `continuous` per pm-adapter - // DEFAULT_BODY_SECTION_TYPE): balance ONLY if the section spans multiple - // pages. Word balances the last page of multi-page multi-column body - // sections (e.g. two_column_two_page-arial 2 page 17 splits 3+2) but - // does NOT balance single-page sections (e.g. sd-1655 fills col 1 then - // col 2 without redistributing). + // 2. The doc contains at least one EXPLICIT `` + // somewhere. Word treats an explicit continuous break (typically the + // body sectPr) as a doc-wide signal to balance every multi-column + // section, even ones whose own break is `nextPage`. Covers + // sd-1480-two-col-tab-positions: section 0 ends with default + // `nextPage` but the body sectPr has explicit `continuous`, and Word + // balances 6 entries 3+3. // - // pm-adapter uses end-tagged section semantics (ECMA-376 §17.6.17): the type - // on `sectionBreak[i]` describes the break that ENDS section i. The body - // sectPr is the final section break and gets the default `continuous` type, - // so we can't distinguish explicit vs default `continuous` from the type - // alone — the multi-page check supplies the missing signal for the last - // section. Earlier sections rely on the type being non-default if it's - // `continuous` (because paragraph sectPr defaults to `nextPage`). + // 3. The section spans multiple pages. Word balances the last page of a + // multi-page multi-column body section regardless of break-type + // explicitness. Covers `two_column_two_page-arial 2` page 17 (17 + // pages, body default continuous, balanced 3+2 on the final page). // - // FALLBACK_SECTION_IDX (-1) bypasses both gates because the synthesized + // The skip-the-last-section safeguard handles `sd-1655-col-sep-3-equal-columns`: + // single section, body sectPr without ``, single page, + // 3-column → Word fills column-by-column without balancing. None of + // the three allowed scenarios fire here, so the skip path runs. + // + // FALLBACK_SECTION_IDX (-1) bypasses all of this — the synthesized // fallback fires only when no pm-adapter section metadata exists at all. if (sectionIdx !== FALLBACK_SECTION_IDX) { const endBreakType = sectionEndBreakType.get(sectionIdx); - if (endBreakType !== 'continuous') continue; + const isLast = lastSectionIdx !== null && sectionIdx === lastSectionIdx; + const isExplicitContinuous = endBreakType === 'continuous' && sectionTypeIsExplicit.get(sectionIdx) === true; + // Scan all sections once for a doc-wide explicit continuous break. + let docHasExplicitContinuous = false; + for (const [idx, explicit] of sectionTypeIsExplicit) { + if (explicit && sectionEndBreakType.get(idx) === 'continuous') { + docHasExplicitContinuous = true; + break; + } + } - if (lastSectionIdx !== null && sectionIdx === lastSectionIdx) { + const allowedByMidDocContinuous = endBreakType === 'continuous' && !isLast; + const allowedByDocWideExplicit = docHasExplicitContinuous; + let allowedByMultiPage = false; + if (!allowedByMidDocContinuous && !allowedByDocWideExplicit) { let sectionPagesCount = 0; for (const p of pages) { if (p.fragments.some((f) => blockSectionMap.get(f.blockId) === sectionIdx)) { sectionPagesCount += 1; - if (sectionPagesCount > 1) break; + if (sectionPagesCount > 1) { + allowedByMultiPage = true; + break; + } } } - if (sectionPagesCount <= 1) continue; } + + if (!allowedByMidDocContinuous && !allowedByDocWideExplicit && !allowedByMultiPage) continue; + // Suppress the `void` lint warning while keeping the variable available + // for future refinements that want a "section-own-explicit" gate. + void isExplicitContinuous; } // Find the last page carrying any fragments from this section. diff --git a/packages/layout-engine/pm-adapter/src/index.test.ts b/packages/layout-engine/pm-adapter/src/index.test.ts index b22a0de17c..27e1113c69 100644 --- a/packages/layout-engine/pm-adapter/src/index.test.ts +++ b/packages/layout-engine/pm-adapter/src/index.test.ts @@ -2175,8 +2175,15 @@ describe('toFlowBlocks', () => { const { blocks } = toFlowBlocks(pmDoc, { emitSectionBreaks: true }); const sectionBreaks = blocks.filter((b: FlowBlock) => b.kind === 'sectionBreak' && !b.attrs?.isFirstSection); + // The retained break here is the FINAL section's, derived from the + // body sectPr produced by `createTestBodySectPr` (no ``). That + // section defaults to `continuous` per DEFAULT_BODY_SECTION_TYPE — Word's + // body-sectPr default. The earlier expectation of `'nextPage'` matched a + // bug where extractSectionType applied a paragraph-style default to body + // sectPrs. expect(sectionBreaks).toHaveLength(1); - expect(sectionBreaks[0].type).toBe('nextPage'); + expect(sectionBreaks[0].type).toBe('continuous'); + expect(sectionBreaks[0].attrs?.typeIsExplicit).toBe(false); }); it('emits section breaks even when w:type element is missing (defaults to nextPage)', () => { @@ -2220,9 +2227,14 @@ describe('toFlowBlocks', () => { const { blocks } = toFlowBlocks(pmDoc, { emitSectionBreaks: true }); const sectionBreaks = blocks.filter((b: FlowBlock) => b.kind === 'sectionBreak' && !b.attrs?.isFirstSection); + // Two retained breaks: + // 1. The middle section's, ending at the explicit `nextPage` paragraph. + // 2. The final section's, derived from the body sectPr (no ), + // defaulting to `continuous` per OOXML body-sectPr semantics. expect(sectionBreaks).toHaveLength(2); expect(sectionBreaks[0].type).toBe('nextPage'); - expect(sectionBreaks[1].type).toBe('nextPage'); + expect(sectionBreaks[1].type).toBe('continuous'); + expect(sectionBreaks[1].attrs?.typeIsExplicit).toBe(false); }); it('keeps final paragraph section break even without type when no body sectPr', () => { diff --git a/packages/layout-engine/pm-adapter/src/integration.test.ts b/packages/layout-engine/pm-adapter/src/integration.test.ts index 065b52b128..d314d8f73c 100644 --- a/packages/layout-engine/pm-adapter/src/integration.test.ts +++ b/packages/layout-engine/pm-adapter/src/integration.test.ts @@ -227,7 +227,11 @@ describe('PM → FlowBlock → Measure integration', () => { expect(breaks.length).toBe(4); const last = breaks[breaks.length - 1] as never; - expect(last.type).toBe('nextPage'); + // The body sectPr in this fixture has no ``, so the final section + // takes pm-adapter's body default (DEFAULT_BODY_SECTION_TYPE = continuous). + // The earlier `nextPage` expectation matched a bug where extractSectionType + // applied a paragraph-style default to body sectPrs. + expect(last.type).toBe('continuous'); expect(last.orientation).toBe('landscape'); expect(last.pageSize).toBeDefined(); expect(Math.round(last.pageSize.w)).toBe(1056); // Landscape width (legal @ 96dpi) diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.ts index 51ab2e1d98..c352abb6fe 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.ts @@ -207,6 +207,7 @@ export function buildSectionRangesFromParagraphs( orientation: sectionData.orientation ?? null, columns: sectionData.columnsPx ?? null, type: (sectionData.type as SectionType) ?? DEFAULT_PARAGRAPH_SECTION_TYPE, + typeIsExplicit: sectionData.typeIsExplicit ?? false, titlePg: sectionData.titlePg ?? false, headerRefs: sectionData.headerRefs, footerRefs: sectionData.footerRefs, @@ -315,6 +316,7 @@ export function createFinalSectionFromBodySectPr( orientation: bodySectionData.orientation ?? null, columns: bodySectionData.columnsPx ?? null, type: (bodySectionData.type as SectionType) ?? DEFAULT_BODY_SECTION_TYPE, + typeIsExplicit: bodySectionData.typeIsExplicit ?? false, titlePg: bodySectionData.titlePg ?? false, headerRefs: bodySectionData.headerRefs, footerRefs: bodySectionData.footerRefs, @@ -352,6 +354,7 @@ export function createDefaultFinalSection( orientation: null, columns: null, type: DEFAULT_BODY_SECTION_TYPE, + typeIsExplicit: false, titlePg: false, headerRefs: undefined, footerRefs: undefined, diff --git a/packages/layout-engine/pm-adapter/src/sections/breaks.ts b/packages/layout-engine/pm-adapter/src/sections/breaks.ts index b7fe08ca74..f0cb0d1b93 100644 --- a/packages/layout-engine/pm-adapter/src/sections/breaks.ts +++ b/packages/layout-engine/pm-adapter/src/sections/breaks.ts @@ -130,6 +130,13 @@ export function createSectionBreakBlock( attrs: { source: 'sectPr', sectionIndex: section.sectionIndex, + // Surface whether `` was explicit in the source XML. The + // layout-engine's column-balance gate uses this to distinguish a + // body sectPr that defaulted to `continuous` (Word does not balance, + // e.g. sd-1655-col-sep-3-equal-columns) from one with + // `` written out (Word balances, e.g. + // sd-1480-two-col-tab-positions, even when the section is single-page). + typeIsExplicit: section.typeIsExplicit, ...extraAttrs, }, ...(section.pageSize && { pageSize: section.pageSize }), diff --git a/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts b/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts index ce6a124e96..b5040b0431 100644 --- a/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts +++ b/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts @@ -318,7 +318,15 @@ describe('extraction', () => { expect(result).toBeNull(); }); - it('should default type to "nextPage" when sectPr exists but w:type is missing', () => { + it('returns type: undefined and typeIsExplicit: false when w:type is missing', () => { + // extractSectionData no longer assumes a section-type default — that + // belongs to the caller (paragraph sectPr defaults to nextPage, + // body sectPr defaults to continuous). Returning the absence as + // `type: undefined` plus `typeIsExplicit: false` lets the caller + // apply the correct default while still distinguishing "OOXML omitted + // w:type" from "OOXML wrote w:type=continuous". The distinction is + // load-bearing for column-balancing: Word balances explicit-continuous + // body sectPrs even single-page (sd-1480) but not defaulted ones (sd-1655). const para: PMNode = { type: 'paragraph', attrs: { @@ -340,7 +348,8 @@ describe('extraction', () => { const result = extractSectionData(para); expect(result).not.toBeNull(); - expect(result?.type).toBe('nextPage'); + expect(result?.type).toBeUndefined(); + expect(result?.typeIsExplicit).toBe(false); expect(result?.vAlign).toBe('bottom'); }); }); diff --git a/packages/layout-engine/pm-adapter/src/sections/extraction.ts b/packages/layout-engine/pm-adapter/src/sections/extraction.ts index da98daeec3..2aefd56f04 100644 --- a/packages/layout-engine/pm-adapter/src/sections/extraction.ts +++ b/packages/layout-engine/pm-adapter/src/sections/extraction.ts @@ -79,15 +79,21 @@ function extractNormalizedMargins(attrs: Record): { /** * Extract section type from element. - * Defaults to 'nextPage' per OOXML spec when absent. + * Returns the explicit value when present, or `null` when absent so callers + * can distinguish "OOXML omitted w:type" from "OOXML wrote nextPage". This + * matters for column balancing: Word balances a section ending in an + * explicit `w:type="continuous"` body sectPr but does NOT balance one that + * merely defaulted to continuous (per ECMA-376 §17.18.77 and observed + * behavior on `sd-1655-col-sep-3-equal-columns` vs `sd-1480-two-col-tab-positions`). */ -function extractSectionType(elements: SectionElement[]): SectionType { +function extractSectionType(elements: SectionElement[]): SectionType | null { const typeEl = elements.find((el) => el?.name === 'w:type'); - const val = typeEl?.attributes?.['w:val']; + if (!typeEl) return null; + const val = typeEl.attributes?.['w:val']; if (val === 'continuous' || val === 'nextPage' || val === 'evenPage' || val === 'oddPage') { return val; } - return 'nextPage'; + return null; } /** @@ -316,6 +322,8 @@ export function extractSectionData(para: PMNode): { bottomPx?: number; leftPx?: number; type?: SectionType; + /** True iff `` was present in the source XML (vs. type defaulted by the caller). */ + typeIsExplicit?: boolean; pageSizePx?: { w: number; h: number }; orientation?: Orientation; columnsPx?: ColumnLayout; @@ -348,8 +356,10 @@ export function extractSectionData(para: PMNode): { return headerPx == null && footerPx == null ? null : { headerPx, footerPx }; } - // Extract all section properties. Type always has a value (defaults to 'nextPage') - const type = extractSectionType(sectPrElements); + // Extract all section properties. type is null when is omitted. + const explicitType = extractSectionType(sectPrElements); + const type = explicitType ?? undefined; + const typeIsExplicit = explicitType !== null; const { pageSizePx, orientation } = extractPageSizeAndOrientation(sectPrElements); const titlePg = sectPrElements.some((el) => el?.name === 'w:titlePg'); const fallbackMargins = extractFallbackMargins(sectPrElements, headerPx, footerPx); @@ -362,7 +372,10 @@ export function extractSectionData(para: PMNode): { const columnsPx = extractColumns(sectPrElements); const vAlign = extractVerticalAlign(sectPrElements); - // When sectPrElements exist, always return data (even if minimal) since type defaults to 'nextPage' + // When sectPrElements exist, always return data (even if minimal). The + // caller applies the appropriate default for `type` (paragraph default = + // nextPage, body default = continuous) and sees `typeIsExplicit` for the + // distinction. return { headerPx, footerPx, @@ -371,6 +384,7 @@ export function extractSectionData(para: PMNode): { bottomPx, leftPx, type, + typeIsExplicit, pageSizePx, orientation, columnsPx, diff --git a/packages/layout-engine/pm-adapter/src/sections/types.ts b/packages/layout-engine/pm-adapter/src/sections/types.ts index 551d8bcfd8..51006470af 100644 --- a/packages/layout-engine/pm-adapter/src/sections/types.ts +++ b/packages/layout-engine/pm-adapter/src/sections/types.ts @@ -120,6 +120,12 @@ export interface SectionRange { orientation: 'portrait' | 'landscape' | null; columns: ColumnLayout | null; type: SectionType; + /** True iff the section's `` was explicitly written in the source. + * Distinguishes "the body sectPr defaulted to continuous because OOXML + * omitted w:type" (sd-1655) from "an explicit w:type=continuous on the + * body sectPr" (sd-1480). Only the explicit form triggers Word's + * end-of-document column balancing for single-page sections. */ + typeIsExplicit: boolean; titlePg: boolean; headerRefs?: Partial>; footerRefs?: Partial>; diff --git a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts index e62dc395e0..1f0b9eabb1 100644 --- a/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts +++ b/packages/super-editor/src/editors/v1/document-api-adapters/helpers/sections-resolver.ts @@ -178,6 +178,7 @@ function createSyntheticRange(bodySectPr: XmlElement | null, paragraphCount: num orientation: null, columns: null, type: SectionType.CONTINUOUS, + typeIsExplicit: false, titlePg: false, headerRefs: undefined, footerRefs: undefined, From 692811ffea19f9375a6a2346b6bc27d926d45165 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 4 May 2026 21:30:01 -0300 Subject: [PATCH 11/18] fix(pm-adapter): surface typeIsExplicit only when authored (SD-2452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous commit changed `extractSectionType` to return `null` when `` was missing, which let analysis.ts apply `DEFAULT_BODY_SECTION_TYPE = continuous` for body sectPrs. Most fixtures flipped from `nextPage` to `continuous`, rippling through page-break placement, header/footer flow, and column-flow decisions across the whole pipeline (541 of 374 corpus docs changed, 1204 visual diffs). Surgical revert: - `extractSectionType` returns the OOXML default (`'nextPage'`) again, matching the pre-PR pipeline behavior. The body-sectPr type is once more `'nextPage'` when `` is omitted. - A new `extractSectionTypeIsExplicit` helper returns `true` only when `` was actually written. `extractSectionData` exposes it as `typeIsExplicit`. - `SectionRange.typeIsExplicit` propagates through analysis (paragraph sectPrs, body sectPr, fallback final, synthetic ranges). - `createSectionBreakBlock` writes `attrs.typeIsExplicit: true` ONLY when the flag is true. Omitting the field for the (vast majority of) sectPrs without `` keeps the FlowBlock attrs schema backward-compatible with the published 1.31.1 layout snapshots. Refined column-balance gate (layout-engine/index.ts) reads the new flag. Balance if any of: 1. Section's own end-break is `continuous` AND not the last section (covers spec-test-1..5, sd-2326). 2. Doc has at least one EXPLICIT continuous break AND this section's type was NOT explicitly set to a page-forcing type. Covers sd-1480-two-col-tab-positions section 0 (default `nextPage` but body sectPr explicit continuous → Word balances). 3. Section spans multiple pages (covers `two_column_two_page-arial 2` p17, body default, multi-page). Otherwise skip — covers `sd-1655-col-sep-3-equal-columns` (single section, default body, single page → Word fills col-by-col). Corpus (vs npm@latest 1.31.1): 541 → 47 changed (13 unique + 34 widespread-only `attrs.typeIsExplicit` schema additions). Browser verification: - spec-test-1..5: 3+3 / 2+3 / 3+3+1 / 7+7 / 3+2|3+2 ✓ - sd-1655: 7+1 col-by-col ✓ - multi-column-sections / multi_section_doc: col-by-col ✓ - sd-1480: was 6+0, now 3+2 (Word: 3+3 — gate engages correctly; the remaining 3+2 vs 3+3 gap is balancer-algorithm behavior on uneven paragraph heights, separate from the gate). - sd-2326: 2+2 ✓ 644 layout-engine + 1802 pm-adapter unit tests pass. --- .../layout-engine/layout-engine/src/index.ts | 66 ++++++++++--------- .../pm-adapter/src/index.test.ts | 30 +++++---- .../pm-adapter/src/integration.test.ts | 6 +- .../pm-adapter/src/sections/breaks.ts | 18 +++-- .../src/sections/extraction.test.ts | 17 ++--- .../pm-adapter/src/sections/extraction.ts | 46 +++++++++---- 6 files changed, 102 insertions(+), 81 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 80c0c12a6b..b9e9cd0ba5 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -2726,41 +2726,45 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options if (sectionHasExplicitColumnBreak.has(sectionIdx)) continue; if (alreadyBalancedSections.has(sectionIdx)) continue; - // Gate balancing on the break-type signals around the section - // (ECMA-376 §17.18.77 / Linear SD-2452, refined against empirical Word - // behavior). Three balance-allowed scenarios + one skip-the-last-section - // safeguard: + // Gate balancing per ECMA-376 §17.18.77 + empirical Word behavior. The + // section type defaults to `nextPage` for any sectPr without ``, + // so we lean on `typeIsExplicit` to know what was actually authored: // - // 1. The section's own end-break is `continuous` AND it is NOT the last - // section. This is the spec-canonical case — a mid-document - // continuous break balances the preceding section. Covers - // spec-test-1..5, sd-2326, all `pagination/two_column_two_page-arial` - // page 17 scenarios. + // - Explicit `` ending the section (or + // anywhere in the doc) signals continuous flow. Word balances the + // adjacent multi-column sections. + // - A multi-page multi-column section is balanced on its last page + // regardless of explicitness — this is the long-standing + // two_column_two_page-arial p17 behavior driven by SD-2646. // - // 2. The doc contains at least one EXPLICIT `` - // somewhere. Word treats an explicit continuous break (typically the - // body sectPr) as a doc-wide signal to balance every multi-column - // section, even ones whose own break is `nextPage`. Covers - // sd-1480-two-col-tab-positions: section 0 ends with default - // `nextPage` but the body sectPr has explicit `continuous`, and Word - // balances 6 entries 3+3. + // Skip-when-not-allowed is the default. The three allowed scenarios: // - // 3. The section spans multiple pages. Word balances the last page of a - // multi-page multi-column body section regardless of break-type - // explicitness. Covers `two_column_two_page-arial 2` page 17 (17 - // pages, body default continuous, balanced 3+2 on the final page). + // 1. Mid-doc explicit continuous: section's own end-break is + // `continuous` AND it is not the last section. Covers spec-test-1..5 + // and sd-2326 (explicit continuous mid-doc). // - // The skip-the-last-section safeguard handles `sd-1655-col-sep-3-equal-columns`: - // single section, body sectPr without ``, single page, - // 3-column → Word fills column-by-column without balancing. None of - // the three allowed scenarios fire here, so the skip path runs. + // 2. Doc-wide explicit continuous + non-explicitly-non-continuous + // section: the doc has at least one EXPLICIT continuous break + // somewhere AND this section's type was NOT explicitly set to a + // page-forcing type. Covers sd-1480-two-col-tab-positions: section 0 + // ends with default `nextPage` but the body sectPr has explicit + // `continuous` — Word balances 6 entries 3+3 on a single page. // - // FALLBACK_SECTION_IDX (-1) bypasses all of this — the synthesized - // fallback fires only when no pm-adapter section metadata exists at all. + // 3. Multi-page section: any section whose content spans more than one + // page. Covers `two_column_two_page-arial 2` p17 (body default, + // single section, 17 pages → balanced 3+2 on the final page). + // + // Skip path covers `sd-1655-col-sep-3-equal-columns` (single section, + // body without ``, single page, 3-col): no scenario fires → + // Word fills column-by-column without balancing. + // + // FALLBACK_SECTION_IDX (-1) bypasses the gate — synthesized only when + // pm-adapter emitted no section metadata at all. if (sectionIdx !== FALLBACK_SECTION_IDX) { const endBreakType = sectionEndBreakType.get(sectionIdx); + const typeIsExplicit = sectionTypeIsExplicit.get(sectionIdx) === true; const isLast = lastSectionIdx !== null && sectionIdx === lastSectionIdx; - const isExplicitContinuous = endBreakType === 'continuous' && sectionTypeIsExplicit.get(sectionIdx) === true; + // Scan all sections once for a doc-wide explicit continuous break. let docHasExplicitContinuous = false; for (const [idx, explicit] of sectionTypeIsExplicit) { @@ -2770,8 +2774,11 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } } + const isExplicitNonContinuous = + typeIsExplicit && (endBreakType === 'nextPage' || endBreakType === 'evenPage' || endBreakType === 'oddPage'); + const allowedByMidDocContinuous = endBreakType === 'continuous' && !isLast; - const allowedByDocWideExplicit = docHasExplicitContinuous; + const allowedByDocWideExplicit = docHasExplicitContinuous && !isExplicitNonContinuous; let allowedByMultiPage = false; if (!allowedByMidDocContinuous && !allowedByDocWideExplicit) { let sectionPagesCount = 0; @@ -2787,9 +2794,6 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } if (!allowedByMidDocContinuous && !allowedByDocWideExplicit && !allowedByMultiPage) continue; - // Suppress the `void` lint warning while keeping the variable available - // for future refinements that want a "section-own-explicit" gate. - void isExplicitContinuous; } // Find the last page carrying any fragments from this section. diff --git a/packages/layout-engine/pm-adapter/src/index.test.ts b/packages/layout-engine/pm-adapter/src/index.test.ts index 27e1113c69..ba5363b936 100644 --- a/packages/layout-engine/pm-adapter/src/index.test.ts +++ b/packages/layout-engine/pm-adapter/src/index.test.ts @@ -2175,15 +2175,14 @@ describe('toFlowBlocks', () => { const { blocks } = toFlowBlocks(pmDoc, { emitSectionBreaks: true }); const sectionBreaks = blocks.filter((b: FlowBlock) => b.kind === 'sectionBreak' && !b.attrs?.isFirstSection); - // The retained break here is the FINAL section's, derived from the - // body sectPr produced by `createTestBodySectPr` (no ``). That - // section defaults to `continuous` per DEFAULT_BODY_SECTION_TYPE — Word's - // body-sectPr default. The earlier expectation of `'nextPage'` matched a - // bug where extractSectionType applied a paragraph-style default to body - // sectPrs. expect(sectionBreaks).toHaveLength(1); - expect(sectionBreaks[0].type).toBe('continuous'); - expect(sectionBreaks[0].attrs?.typeIsExplicit).toBe(false); + expect(sectionBreaks[0].type).toBe('nextPage'); + // `typeIsExplicit` is only set on attrs when `` was authored. + // The body sectPr in this fixture has no ``, so the flag is + // omitted (undefined). The column-balancing gate treats absence as + // "defaulted" and skips balancing for default-nextPage body sections + // (sd-1655 behavior). + expect(sectionBreaks[0].attrs?.typeIsExplicit).toBeUndefined(); }); it('emits section breaks even when w:type element is missing (defaults to nextPage)', () => { @@ -2227,14 +2226,17 @@ describe('toFlowBlocks', () => { const { blocks } = toFlowBlocks(pmDoc, { emitSectionBreaks: true }); const sectionBreaks = blocks.filter((b: FlowBlock) => b.kind === 'sectionBreak' && !b.attrs?.isFirstSection); - // Two retained breaks: - // 1. The middle section's, ending at the explicit `nextPage` paragraph. - // 2. The final section's, derived from the body sectPr (no ), - // defaulting to `continuous` per OOXML body-sectPr semantics. expect(sectionBreaks).toHaveLength(2); expect(sectionBreaks[0].type).toBe('nextPage'); - expect(sectionBreaks[1].type).toBe('continuous'); - expect(sectionBreaks[1].attrs?.typeIsExplicit).toBe(false); + expect(sectionBreaks[1].type).toBe('nextPage'); + // Section 1's sectPr writes `` explicitly + // so the flag is set true. Section 2's body sectPr omits `` + // so the flag is omitted. The column-balance gate uses this to tell + // explicit-nextPage (author intent: don't balance) from defaulted + // nextPage (could still balance if the doc has explicit continuous + // somewhere or is multi-page). + expect(sectionBreaks[0].attrs?.typeIsExplicit).toBe(true); + expect(sectionBreaks[1].attrs?.typeIsExplicit).toBeUndefined(); }); it('keeps final paragraph section break even without type when no body sectPr', () => { diff --git a/packages/layout-engine/pm-adapter/src/integration.test.ts b/packages/layout-engine/pm-adapter/src/integration.test.ts index d314d8f73c..065b52b128 100644 --- a/packages/layout-engine/pm-adapter/src/integration.test.ts +++ b/packages/layout-engine/pm-adapter/src/integration.test.ts @@ -227,11 +227,7 @@ describe('PM → FlowBlock → Measure integration', () => { expect(breaks.length).toBe(4); const last = breaks[breaks.length - 1] as never; - // The body sectPr in this fixture has no ``, so the final section - // takes pm-adapter's body default (DEFAULT_BODY_SECTION_TYPE = continuous). - // The earlier `nextPage` expectation matched a bug where extractSectionType - // applied a paragraph-style default to body sectPrs. - expect(last.type).toBe('continuous'); + expect(last.type).toBe('nextPage'); expect(last.orientation).toBe('landscape'); expect(last.pageSize).toBeDefined(); expect(Math.round(last.pageSize.w)).toBe(1056); // Landscape width (legal @ 96dpi) diff --git a/packages/layout-engine/pm-adapter/src/sections/breaks.ts b/packages/layout-engine/pm-adapter/src/sections/breaks.ts index f0cb0d1b93..a841c3debb 100644 --- a/packages/layout-engine/pm-adapter/src/sections/breaks.ts +++ b/packages/layout-engine/pm-adapter/src/sections/breaks.ts @@ -130,13 +130,17 @@ export function createSectionBreakBlock( attrs: { source: 'sectPr', sectionIndex: section.sectionIndex, - // Surface whether `` was explicit in the source XML. The - // layout-engine's column-balance gate uses this to distinguish a - // body sectPr that defaulted to `continuous` (Word does not balance, - // e.g. sd-1655-col-sep-3-equal-columns) from one with - // `` written out (Word balances, e.g. - // sd-1480-two-col-tab-positions, even when the section is single-page). - typeIsExplicit: section.typeIsExplicit, + // `typeIsExplicit` is set only when `` was authored in the + // source XML. We omit the field entirely when it would be `false` so + // we don't widen `attrs` for the (vast majority of) sectPrs that + // omit `` — that would produce a doc-wide snapshot diff + // against historical references on every existing fixture. + // The layout-engine's column-balance gate reads this to distinguish + // a body sectPr that defaulted to `nextPage` (Word does not balance, + // sd-1655-col-sep-3-equal-columns) from one with + // `` written out (Word balances, + // sd-1480-two-col-tab-positions, even single-page). + ...(section.typeIsExplicit ? { typeIsExplicit: true as const } : {}), ...extraAttrs, }, ...(section.pageSize && { pageSize: section.pageSize }), diff --git a/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts b/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts index b5040b0431..713a8a3746 100644 --- a/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts +++ b/packages/layout-engine/pm-adapter/src/sections/extraction.test.ts @@ -318,15 +318,12 @@ describe('extraction', () => { expect(result).toBeNull(); }); - it('returns type: undefined and typeIsExplicit: false when w:type is missing', () => { - // extractSectionData no longer assumes a section-type default — that - // belongs to the caller (paragraph sectPr defaults to nextPage, - // body sectPr defaults to continuous). Returning the absence as - // `type: undefined` plus `typeIsExplicit: false` lets the caller - // apply the correct default while still distinguishing "OOXML omitted - // w:type" from "OOXML wrote w:type=continuous". The distinction is - // load-bearing for column-balancing: Word balances explicit-continuous - // body sectPrs even single-page (sd-1480) but not defaulted ones (sd-1655). + it('should default type to "nextPage" when sectPr exists but w:type is missing', () => { + // type still defaults to 'nextPage' (preserves the established + // pipeline behavior). The added `typeIsExplicit: false` flag tells + // the column-balancing gate the type was defaulted, not authored — + // critical for distinguishing sd-1655 (Word does NOT balance) from + // sd-1480 (explicit continuous → Word balances). const para: PMNode = { type: 'paragraph', attrs: { @@ -348,7 +345,7 @@ describe('extraction', () => { const result = extractSectionData(para); expect(result).not.toBeNull(); - expect(result?.type).toBeUndefined(); + expect(result?.type).toBe('nextPage'); expect(result?.typeIsExplicit).toBe(false); expect(result?.vAlign).toBe('bottom'); }); diff --git a/packages/layout-engine/pm-adapter/src/sections/extraction.ts b/packages/layout-engine/pm-adapter/src/sections/extraction.ts index 2aefd56f04..234d260aa5 100644 --- a/packages/layout-engine/pm-adapter/src/sections/extraction.ts +++ b/packages/layout-engine/pm-adapter/src/sections/extraction.ts @@ -79,21 +79,37 @@ function extractNormalizedMargins(attrs: Record): { /** * Extract section type from element. - * Returns the explicit value when present, or `null` when absent so callers - * can distinguish "OOXML omitted w:type" from "OOXML wrote nextPage". This - * matters for column balancing: Word balances a section ending in an - * explicit `w:type="continuous"` body sectPr but does NOT balance one that - * merely defaulted to continuous (per ECMA-376 §17.18.77 and observed - * behavior on `sd-1655-col-sep-3-equal-columns` vs `sd-1480-two-col-tab-positions`). + * Defaults to 'nextPage' per OOXML spec when absent. This preserves the + * historical body-sectPr type behavior — any change to that ripples + * through page-break placement, header/footer inheritance, and section + * flow across the entire layout pipeline. Callers that need to know + * whether `` was actually present should consult + * `extractSectionTypeWithSource` below; the existing `type` value stays + * `nextPage` regardless. */ -function extractSectionType(elements: SectionElement[]): SectionType | null { +function extractSectionType(elements: SectionElement[]): SectionType { const typeEl = elements.find((el) => el?.name === 'w:type'); - if (!typeEl) return null; - const val = typeEl.attributes?.['w:val']; + const val = typeEl?.attributes?.['w:val']; if (val === 'continuous' || val === 'nextPage' || val === 'evenPage' || val === 'oddPage') { return val; } - return null; + return 'nextPage'; +} + +/** + * True iff `` is present in the source XML. + * + * The column-balancing gate (ECMA-376 §17.18.77) needs to distinguish a + * body sectPr whose `` defaulted to `nextPage` because it was + * omitted (sd-1655-col-sep-3-equal-columns: Word fills col-by-col, no + * balance) from one with explicit `` + * (sd-1480-two-col-tab-positions: Word balances 6 entries 3+3 on a + * single page). The type field alone can't carry this — both produce + * `'nextPage'` in the resolved type — so we surface it as a separate + * flag without touching the established type defaulting. + */ +function extractSectionTypeIsExplicit(elements: SectionElement[]): boolean { + return elements.some((el) => el?.name === 'w:type'); } /** @@ -356,10 +372,12 @@ export function extractSectionData(para: PMNode): { return headerPx == null && footerPx == null ? null : { headerPx, footerPx }; } - // Extract all section properties. type is null when is omitted. - const explicitType = extractSectionType(sectPrElements); - const type = explicitType ?? undefined; - const typeIsExplicit = explicitType !== null; + // Extract all section properties. type defaults to 'nextPage' per OOXML + // spec (and this preserves the historical pipeline behavior across page + // breaks, header/footer flow, etc). `typeIsExplicit` lets the + // column-balancing gate know whether `` was actually written. + const type = extractSectionType(sectPrElements); + const typeIsExplicit = extractSectionTypeIsExplicit(sectPrElements); const { pageSizePx, orientation } = extractPageSizeAndOrientation(sectPrElements); const titlePg = sectPrElements.some((el) => el?.name === 'w:titlePg'); const fallbackMargins = extractFallbackMargins(sectPrElements, headerPx, footerPx); From 4dc4c621ac30bf201d139d24d5a99f9bfc070ef9 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 4 May 2026 21:57:23 -0300 Subject: [PATCH 12/18] fix(layout-engine): exclude body-explicit-continuous from doc-wide rule (SD-2452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per ECMA-376 §17.18.77 a continuous break "balances the section it ENDS" — i.e., the section BEFORE the break, not the section the break belongs to. When the body sectPr itself is the explicit-continuous trigger, it balances the section preceding the body, not the body's own content. Bug: rule 2 ("doc has explicit continuous → balance any non-explicitly- non-continuous section") was firing on the body section itself when the body sectPr was the only explicit continuous in the doc. That caused `tabs/mixed-columns-tabs tnr` p1 to render 10+9 when Word renders 14+5 (column-flow without balancing): the body sectPr is explicit-continuous + 2-col, but the 2-col Test list IS the body — there is no preceding section for the body break to "balance". Compare with `tabs/sd-1480-two-col-tab-positions`: body sectPr is also explicit-continuous + 2-col, but the 2-col Page entries live in section 0 (a section BEFORE the body). The body break correctly balances section 0 — that produces 3+3 like Word. Fix: identify the body-explicit-continuous section (last section whose typeIsExplicit is true and whose end-break is `continuous`) and exclude it from rule 2. Section 0 of sd-1480 still balances. Section 1 (body) of mixed-columns-tabs-tnr does not. Body sections can still balance via rule 1 (they can't — last section can't be "not last") or rule 3 (multi-page check, e.g. two_column_two_page-arial 2 p17). Browser verification: - mixed-columns-tabs-tnr: was 10+9, now 14+5 (Word: 14+5) ✓ exact match - sd-1480: 3+2 unchanged (Word: 3+3, residual balancer-algorithm gap) - sd-1655: 7+1 col-by-col, unchanged ✓ - multi-column-sections: col-by-col, unchanged ✓ - sd-2326: 2+2, unchanged ✓ - spec-test-1..5: 3+3 / 2+3 / 3+3+1 / 7+7 / 3+2|3+2 ✓ Corpus (vs npm@latest 1.31.1): 47 changed total (12 unique + 35 widespread-only attrs.typeIsExplicit schema-only). Previously 47 with 13 unique — mixed-columns-tabs-tnr moved from "structural diff vs reference" to "matches reference behavior on the 2-col flow". 644 layout-engine + 1802 pm-adapter unit tests pass. --- .../layout-engine/layout-engine/src/index.ts | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index b9e9cd0ba5..68ca5d822c 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -2774,11 +2774,38 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } } + // Per ECMA-376 §17.18.77, a continuous break balances the section it + // ENDS (i.e., the section BEFORE the break), not the section that + // CONTAINS or follows it. When the body sectPr itself is the explicit + // continuous trigger, it balances the section preceding the body, not + // the body section itself. Compare: + // + // sd-1480: body sectPr explicit-continuous + 2-col, section 0 has + // content. Word balances section 0 (3+3). + // mixed-columns-tabs-tnr: body sectPr explicit-continuous + 2-col, + // section 1 (the body) has the 2-col Test list, section 0 + // has 1-col descriptions. Word does NOT balance section 1 + // (14+5 column-flow); the body-as-trigger applies to + // section 0, which is single-col and so a no-op. + // + // Excluding the body-explicit-continuous section itself from rule 2 + // matches both: section 0 of sd-1480 still balances (it's not the + // body), section 1 of mixed-columns-tabs-tnr does not (it IS the + // body). The body section can still balance via rule 1 (impossible: + // last section can't satisfy "not last") or rule 3 (multi-page). + const bodyExplicitContinuousIdx = + lastSectionIdx !== null && + sectionTypeIsExplicit.get(lastSectionIdx) === true && + sectionEndBreakType.get(lastSectionIdx) === 'continuous' + ? lastSectionIdx + : null; + const isExplicitNonContinuous = typeIsExplicit && (endBreakType === 'nextPage' || endBreakType === 'evenPage' || endBreakType === 'oddPage'); const allowedByMidDocContinuous = endBreakType === 'continuous' && !isLast; - const allowedByDocWideExplicit = docHasExplicitContinuous && !isExplicitNonContinuous; + const allowedByDocWideExplicit = + docHasExplicitContinuous && !isExplicitNonContinuous && sectionIdx !== bodyExplicitContinuousIdx; let allowedByMultiPage = false; if (!allowedByMidDocContinuous && !allowedByDocWideExplicit) { let sectionPagesCount = 0; From e2d10559fe9306365edc31118b2ab2272244faea Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 4 May 2026 22:07:34 -0300 Subject: [PATCH 13/18] fix(layout-engine): skip mid-doc multi-page balance (SD-2452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Word balances the LAST PAGE of a multi-page multi-column section only when that section is the final/body section. Mid-doc multi-page multi-column sections retain natural column-flow on every page, including the last — Word doesn't rebalance the overflow remainder. Verified: layout/ivosass-sub p3 — section 1 is mid-doc, 2-page, 2-col, explicit-continuous end-break. The last page has 4 overflow fragments. Word leaves them in column 0. Pre-fix our gate's rule 1 fired and balanced p3 to 2+2. Now mid-doc multi-page sections skip the gate and p3 stays in col 0. lists/saas_original p4 — same pattern: mid-doc 2-col section overflows to last page; Word doesn't rebalance. Multi-page LAST sections (two_column_two_page-arial 2 p17 — 17 pages, body default continuous) still balance via rule 3, matching Word's 3+2 split on the final page. Implementation: page-count probe runs once per section (short-circuits at >1) and feeds both the new mid-doc skip and the existing rule 3 multi-page allow. Browser: - ivosass-sub: was 2+2 on p3, now col 0 only (matches Word). - saas_original: was 2+2 on p4, now col 0 only (matches Word). - mixed-columns-tabs-tnr: 14+5 unchanged. - sd-1480: 3+2 unchanged. - sd-1655: 7+1 unchanged. - multi-column-sections: col-by-col unchanged. - sd-2326: 2+2 unchanged. - spec-test-1..5: 3+3 / 2+3 / 3+3+1 / 7+7 / 3+2|3+2. - two_column_two_page-arial p17: still balances. Corpus (vs npm@latest 1.31.1): 9 unique structural changes (down from 12) plus 38 widespread-only attrs.typeIsExplicit schema additions. The 9 remaining are intentional SD-2452 differences. 644 layout-engine + 1802 pm-adapter unit tests pass. --- .../layout-engine/layout-engine/src/index.ts | 38 ++++++++++++------- 1 file changed, 25 insertions(+), 13 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 68ca5d822c..4ae813d066 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -2803,22 +2803,34 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options const isExplicitNonContinuous = typeIsExplicit && (endBreakType === 'nextPage' || endBreakType === 'evenPage' || endBreakType === 'oddPage'); + // Page-count probe used by both the multi-page allow rule (3) and the + // mid-doc multi-page skip below. Computed once and short-circuits at >1. + let sectionPagesCount = 0; + for (const p of pages) { + if (p.fragments.some((f) => blockSectionMap.get(f.blockId) === sectionIdx)) { + sectionPagesCount += 1; + if (sectionPagesCount > 1) break; + } + } + const isMultiPage = sectionPagesCount > 1; + + // Mid-doc multi-page multi-column sections: Word does NOT balance the + // last page. ECMA's "minimum section height" balancing makes sense for + // single-page sections (rebalancing visibly shrinks the section) but + // not for multi-page sections whose height is already pinned by the + // page boundary — last-page rebalancing would just reshuffle a + // handful of overflow fragments. Verified against: + // layout/ivosass-sub p3 (section 1, mid-doc, 2-page, 4 overflow + // fragments → Word leaves them in col 0). + // lists/saas_original p4 (similar — overflow content stays single). + // Multi-page LAST sections still balance via rule 3 below + // (two_column_two_page-arial 2 p17 keeps its 3+2 split). + if (isMultiPage && !isLast) continue; + const allowedByMidDocContinuous = endBreakType === 'continuous' && !isLast; const allowedByDocWideExplicit = docHasExplicitContinuous && !isExplicitNonContinuous && sectionIdx !== bodyExplicitContinuousIdx; - let allowedByMultiPage = false; - if (!allowedByMidDocContinuous && !allowedByDocWideExplicit) { - let sectionPagesCount = 0; - for (const p of pages) { - if (p.fragments.some((f) => blockSectionMap.get(f.blockId) === sectionIdx)) { - sectionPagesCount += 1; - if (sectionPagesCount > 1) { - allowedByMultiPage = true; - break; - } - } - } - } + const allowedByMultiPage = isMultiPage; if (!allowedByMidDocContinuous && !allowedByDocWideExplicit && !allowedByMultiPage) continue; } From 315ab84e7484f5d4edae90e329299497f0ef822c Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Tue, 5 May 2026 08:02:09 -0300 Subject: [PATCH 14/18] fix(measuring): scope tab alignment heuristic per line segment (SD-1480) Tab leaders were missing on every line BEFORE the final in a paragraph that uses a right-aligned dot-leader stop. The 'last N tabs of the paragraph bind to the last N alignment stops' heuristic counted across the whole paragraph, so only the trailing tab (after the final soft line break) was bound. Earlier lines fell through to default grid stops, dropping their leaders. Two changes: 1. Scope the heuristic to per-line segments delimited by explicit runs. pPr/tabs apply per line, not per paragraph. 2. Strip trailing-empty runs (a tab at the end of a segment with no content after it). Word emits these as authoring artifacts; if they consumed an alignment-stop slot, the meaningful tab earlier in the line would fall to a default grid stop. Mirrored in measuring/dom and layout-bridge/remeasure (the two call sites that share this heuristic). Fixes the visible bug in tabs/sd-1480-two-col-tab-positions where 'Page
Page5' rendered with leaders only on the last line and 'Page 5' lost its leader entirely. Each line now matches Word's 'Page........N' rendering. --- .../layout-bridge/src/remeasure.ts | 72 +++++++++++++++++-- .../measuring/dom/src/index.test.ts | 69 ++++++++++++++++++ .../layout-engine/measuring/dom/src/index.ts | 64 +++++++++++++++-- 3 files changed, 193 insertions(+), 12 deletions(-) diff --git a/packages/layout-engine/layout-bridge/src/remeasure.ts b/packages/layout-engine/layout-bridge/src/remeasure.ts index 0e9eeec135..c5c708abb1 100644 --- a/packages/layout-engine/layout-bridge/src/remeasure.ts +++ b/packages/layout-engine/layout-bridge/src/remeasure.ts @@ -789,14 +789,64 @@ const applyTabLayoutToLines = ( const alignmentTabStopsPx = tabStops .map((stop, index) => ({ stop, index })) .filter(({ stop }) => stop.val === 'end' || stop.val === 'center' || stop.val === 'decimal'); + + // Per-line-segment tab counts. Segments are delimited by explicit because + // pPr/tabs apply per line, not per paragraph. sd-1480: "Page\t2
Page\t5\t" + // must bind the meaningful tab on each segment to the alignment stop, ignoring + // trailing-empty artifacts. + const tabSegmentInfo = new Map(); + { + let segmentStart = 0; + const closeSegment = (segmentEnd: number) => { + const tabsInSegment: number[] = []; + for (let i = segmentStart; i < segmentEnd; i++) { + if (runs[i].kind === 'tab') tabsInSegment.push(i); + } + let trailingCutoff = segmentEnd; + for (let i = segmentEnd - 1; i >= segmentStart; i--) { + const r = runs[i]; + if (r.kind === 'tab') { + trailingCutoff = i; + continue; + } + break; + } + const effective = tabsInSegment.filter((idx) => idx < trailingCutoff); + const total = effective.length; + effective.forEach((idx, ord) => { + tabSegmentInfo.set(idx, { localOrdinal: ord, segmentTotal: total }); + }); + for (const idx of tabsInSegment) { + if (idx >= trailingCutoff) tabSegmentInfo.set(idx, { localOrdinal: -1, segmentTotal: 0 }); + } + }; + for (let i = 0; i < runs.length; i++) { + const r = runs[i]; + if (r.kind === 'lineBreak' || (r.kind === 'break' && (r as { breakType?: string }).breakType === 'line')) { + closeSegment(i); + segmentStart = i + 1; + } + } + closeSegment(runs.length); + } + // Word-compat heuristic (not ECMA-376 17.3.3.32): the last N tab characters in a - // paragraph bind to the last N explicit end/center/decimal stops. Needed for TOC + // line bind to the last N explicit end/center/decimal stops. Needed for TOC // entries where a right-aligned dot-leader stop coexists with default grid stops. // Mirrored in measuring/dom/src/index.ts. - const getAlignmentStopForOrdinal = (ordinal: number): { stop: TabStopPx; index: number } | null => { + const getAlignmentStopForOrdinal = (ordinal: number, runIdx?: number): { stop: TabStopPx; index: number } | null => { if (alignmentTabStopsPx.length === 0 || totalTabRuns === 0 || !Number.isFinite(ordinal)) return null; - if (ordinal < 0 || ordinal >= totalTabRuns) return null; - const remainingTabs = totalTabRuns - ordinal - 1; + let scopeOrdinal = ordinal; + let scopeTotal = totalTabRuns; + if (runIdx !== undefined) { + const info = tabSegmentInfo.get(runIdx); + if (info) { + scopeOrdinal = info.localOrdinal; + scopeTotal = info.segmentTotal; + } + } + if (scopeOrdinal < 0 || scopeOrdinal >= scopeTotal) return null; + const remainingTabs = scopeTotal - scopeOrdinal - 1; const targetIndex = alignmentTabStopsPx.length - 1 - remainingTabs; if (targetIndex < 0 || targetIndex >= alignmentTabStopsPx.length) return null; return alignmentTabStopsPx[targetIndex]; @@ -828,13 +878,21 @@ const applyTabLayoutToLines = ( /** * Processes a tab character, calculating position and handling alignment. */ - const applyTab = (startRunIndex: number, startChar: number, run?: Run, tabOrdinal?: number): void => { + const applyTab = ( + startRunIndex: number, + startChar: number, + run?: Run, + tabOrdinal?: number, + tabRunIdx?: number, + ): void => { const originX = cursorX; const absCurrentX = cursorX + effectiveIndent; let stop: TabStopPx | undefined; let target: number; const forcedAlignment = - typeof tabOrdinal === 'number' && Number.isFinite(tabOrdinal) ? getAlignmentStopForOrdinal(tabOrdinal) : null; + typeof tabOrdinal === 'number' && Number.isFinite(tabOrdinal) + ? getAlignmentStopForOrdinal(tabOrdinal, tabRunIdx) + : null; if (forcedAlignment && forcedAlignment.stop.pos > absCurrentX + TAB_EPSILON) { stop = forcedAlignment.stop; target = forcedAlignment.stop.pos; @@ -901,7 +959,7 @@ const applyTabLayoutToLines = ( if (run.kind === 'tab') { const tabRun = run as TabRun; const ordinal = consumeTabOrdinal(tabRun.tabIndex); - applyTab(runIndex + 1, 0, run, ordinal); + applyTab(runIndex + 1, 0, run, ordinal, runIndex); continue; } diff --git a/packages/layout-engine/measuring/dom/src/index.test.ts b/packages/layout-engine/measuring/dom/src/index.test.ts index e73d655adc..1e2ada24cb 100644 --- a/packages/layout-engine/measuring/dom/src/index.test.ts +++ b/packages/layout-engine/measuring/dom/src/index.test.ts @@ -1924,6 +1924,75 @@ describe('measureBlock', () => { expect(leader.to).toBeCloseTo(300 - pageNumWidth, 0); }); + it('emits leaders on both lines when a paragraph contains a between tab groups (sd-1480)', async () => { + // Repro for sd-1480-two-col-tab-positions: a single paragraph with right-aligned + // dot-leader tab stop and a soft line break. The tabs/pPr applies per line, so + // BOTH lines must emit a dot leader. + const rightStopTwips = 4306; + const block: FlowBlock = { + kind: 'paragraph', + id: 'multiline-leader', + runs: [ + { text: 'Page', fontFamily: 'Arial', fontSize: 13.333 }, + { kind: 'tab', text: '\t', tabIndex: 0, pmStart: 4, pmEnd: 5 }, + { text: '2', fontFamily: 'Arial', fontSize: 13.333 }, + { kind: 'lineBreak' }, + { text: 'Page', fontFamily: 'Arial', fontSize: 13.333 }, + { kind: 'tab', text: '\t', tabIndex: 1, pmStart: 11, pmEnd: 12 }, + { text: '5', fontFamily: 'Arial', fontSize: 13.333 }, + ], + attrs: { + tabs: [{ pos: rightStopTwips, val: 'end', leader: 'dot' }], + }, + }; + + const measure = expectParagraphMeasure(await measureBlock(block, 800)); + expect(measure.lines.length).toBeGreaterThanOrEqual(2); + + const line1 = measure.lines[0]; + const line2 = measure.lines[1]; + + expect(line1.leaders, 'line 1 should have a dot leader').toBeDefined(); + expect(line1.leaders).toHaveLength(1); + expect(line1.leaders?.[0]?.style).toBe('dot'); + + expect(line2.leaders, 'line 2 should have a dot leader').toBeDefined(); + expect(line2.leaders).toHaveLength(1); + expect(line2.leaders?.[0]?.style).toBe('dot'); + }); + + it('ignores trailing-empty when binding alignment stops (sd-1480)', async () => { + // The OOXML "Page\t5\t" (trailing empty tab) must still bind the meaningful + // first tab to the right-aligned dot-leader stop. Without trailing-empty + // tab handling, the heuristic would bind the trailing tab and leave "5" at + // a default grid stop with the leader after it. + const rightStopTwips = 4306; + const block: FlowBlock = { + kind: 'paragraph', + id: 'trailing-empty-tab', + runs: [ + { text: 'Page', fontFamily: 'Arial', fontSize: 13.333 }, + { kind: 'tab', text: '\t', tabIndex: 0, pmStart: 4, pmEnd: 5 }, + { text: '5', fontFamily: 'Arial', fontSize: 13.333 }, + { kind: 'tab', text: '\t', tabIndex: 1, pmStart: 6, pmEnd: 7 }, + ], + attrs: { + tabs: [{ pos: rightStopTwips, val: 'end', leader: 'dot' }], + }, + }; + + const measure = expectParagraphMeasure(await measureBlock(block, 800)); + expect(measure.lines).toHaveLength(1); + const line = measure.lines[0]; + expect(line.leaders, 'meaningful tab should bind to alignment stop').toBeDefined(); + expect(line.leaders).toHaveLength(1); + expect(line.leaders?.[0]?.style).toBe('dot'); + // Leader runs from after "Page" to just before "5" at the right stop. + const rightStopPx = rightStopTwips * (96 / 1440); + expect(line.leaders?.[0]?.from).toBeLessThan(40); + expect(line.leaders?.[0]?.to).toBeGreaterThan(rightStopPx - 20); + }); + it('preserves trailing spaces after tabs when line breaks', async () => { const block: FlowBlock = { kind: 'paragraph', diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index 1e78634953..9c57bb6435 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -1375,17 +1375,71 @@ async function measureParagraphBlock(block: ParagraphBlock, maxWidth: number): P } }; + // Per-line-segment tab counts. The heuristic below binds the last N tabs of a + // segment to the last N alignment stops; segments are delimited by explicit + // runs because pPr/tabs apply per line, not per paragraph. + // sd-1480-two-col-tab-positions: a single paragraph "Page\t2
Page\t5\t" + // must emit a leader on BOTH lines (per-segment scope) and ignore the trailing + // empty on line 2 so the meaningful tab binds to the alignment stop. + const tabSegmentInfo = new Map(); + { + let segmentStart = 0; + const closeSegment = (segmentEnd: number) => { + const tabsInSegment: number[] = []; + for (let i = segmentStart; i < segmentEnd; i++) { + if (isTabRun(runsToProcess[i])) tabsInSegment.push(i); + } + // Strip trailing tabs that have no content after them — these are authoring + // artifacts (Word emits them sometimes) and should fall through to greedy + // tab-stop matching, not consume an alignment stop slot. + let trailingCutoff = segmentEnd; + for (let i = segmentEnd - 1; i >= segmentStart; i--) { + const r = runsToProcess[i]; + if (isTabRun(r)) { + trailingCutoff = i; + continue; + } + break; + } + const effective = tabsInSegment.filter((idx) => idx < trailingCutoff); + const total = effective.length; + effective.forEach((idx, ord) => { + tabSegmentInfo.set(idx, { localOrdinal: ord, segmentTotal: total }); + }); + for (const idx of tabsInSegment) { + if (idx >= trailingCutoff) tabSegmentInfo.set(idx, { localOrdinal: -1, segmentTotal: 0 }); + } + }; + for (let i = 0; i < runsToProcess.length; i++) { + const r = runsToProcess[i]; + if (isLineBreakRun(r) || (r.kind === 'break' && (r as { breakType?: string }).breakType === 'line')) { + closeSegment(i); + segmentStart = i + 1; + } + } + closeSegment(runsToProcess.length); + } + // Word-compat heuristic (not ECMA-376 17.3.3.32): the last N tab characters in a - // paragraph bind to the last N explicit end/center/decimal stops. Needed for TOC + // line bind to the last N explicit end/center/decimal stops. Needed for TOC // entries where a right-aligned dot-leader stop coexists with default grid stops — // strict greedy next-stop resolution would land the trailing tab on a default stop // instead of the leader stop. Mirrored in layout-bridge/src/remeasure.ts. - const getAlignmentStopForOrdinal = (ordinal: number): { stop: TabStopPx; index: number } | null => { + const getAlignmentStopForOrdinal = (ordinal: number, runIdx?: number): { stop: TabStopPx; index: number } | null => { if (alignmentTabStopsPx.length === 0 || totalTabRuns === 0 || !Number.isFinite(ordinal)) { return null; } - if (ordinal < 0 || ordinal >= totalTabRuns) return null; - const remainingTabs = totalTabRuns - ordinal - 1; + let scopeOrdinal = ordinal; + let scopeTotal = totalTabRuns; + if (runIdx !== undefined) { + const info = tabSegmentInfo.get(runIdx); + if (info) { + scopeOrdinal = info.localOrdinal; + scopeTotal = info.segmentTotal; + } + } + if (scopeOrdinal < 0 || scopeOrdinal >= scopeTotal) return null; + const remainingTabs = scopeTotal - scopeOrdinal - 1; const targetIndex = alignmentTabStopsPx.length - 1 - remainingTabs; if (targetIndex < 0 || targetIndex >= alignmentTabStopsPx.length) return null; return alignmentTabStopsPx[targetIndex]; @@ -1529,7 +1583,7 @@ async function measureParagraphBlock(block: ParagraphBlock, maxWidth: number): P // inputs (explicit + synthetic TabRuns) don't produce out-of-order ordinals. // Mirrors consumeTabOrdinal() in layout-bridge/src/remeasure.ts. sequentialTabIndex = Math.max(sequentialTabIndex, resolvedTabIndex + 1); - const forcedAlignment = getAlignmentStopForOrdinal(resolvedTabIndex); + const forcedAlignment = getAlignmentStopForOrdinal(resolvedTabIndex, runIndex); if (forcedAlignment && forcedAlignment.stop.pos > absCurrentX + TAB_EPSILON) { stop = forcedAlignment.stop; target = forcedAlignment.stop.pos; From 4b63d252adc8df2eea09a6969b2e62ffceaa651a Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Tue, 5 May 2026 08:18:39 -0300 Subject: [PATCH 15/18] fix(measuring): preserve lone trailing tab as the meaningful tab (SD-2452) The trailing-empty-tab guard from the previous commit was too aggressive: a segment shaped like 'Label:\t' (single tab at the very end) had its only tab stripped, falling through to greedy default grid-stop matching. Form-field leaders ('By:_____', 'Name:_____') then truncated to the next 0.5" grid stop instead of extending to the right-aligned alignment stop. Add a guard: if stripping trailing tabs would leave NO effective tabs, treat all tabs in the segment as effective. The trailing-empty heuristic only fires when there's at least one OTHER tab to bind. Verified visually: - sd-1480 'Page........N' with Page+tab+N+trailing-tab still works - 'By:____', 'Name:____', 'Title:____' form fields now extend to right --- .../layout-bridge/src/remeasure.ts | 9 ++++-- .../measuring/dom/src/index.test.ts | 28 +++++++++++++++++++ .../layout-engine/measuring/dom/src/index.ts | 10 +++++-- 3 files changed, 43 insertions(+), 4 deletions(-) diff --git a/packages/layout-engine/layout-bridge/src/remeasure.ts b/packages/layout-engine/layout-bridge/src/remeasure.ts index c5c708abb1..d4ceb6a2df 100644 --- a/packages/layout-engine/layout-bridge/src/remeasure.ts +++ b/packages/layout-engine/layout-bridge/src/remeasure.ts @@ -811,13 +811,18 @@ const applyTabLayoutToLines = ( } break; } - const effective = tabsInSegment.filter((idx) => idx < trailingCutoff); + let effective = tabsInSegment.filter((idx) => idx < trailingCutoff); + // If stripping trailing tabs would leave NO effective tabs, the segment is + // shaped like "Label:\t" or "\t" — the lone tab IS the meaningful one and + // must still bind to the alignment stop. + if (effective.length === 0) effective = tabsInSegment; const total = effective.length; effective.forEach((idx, ord) => { tabSegmentInfo.set(idx, { localOrdinal: ord, segmentTotal: total }); }); + const effectiveSet = new Set(effective); for (const idx of tabsInSegment) { - if (idx >= trailingCutoff) tabSegmentInfo.set(idx, { localOrdinal: -1, segmentTotal: 0 }); + if (!effectiveSet.has(idx)) tabSegmentInfo.set(idx, { localOrdinal: -1, segmentTotal: 0 }); } }; for (let i = 0; i < runs.length; i++) { diff --git a/packages/layout-engine/measuring/dom/src/index.test.ts b/packages/layout-engine/measuring/dom/src/index.test.ts index 1e2ada24cb..0d39e94ecb 100644 --- a/packages/layout-engine/measuring/dom/src/index.test.ts +++ b/packages/layout-engine/measuring/dom/src/index.test.ts @@ -1993,6 +1993,34 @@ describe('measureBlock', () => { expect(line.leaders?.[0]?.to).toBeGreaterThan(rightStopPx - 20); }); + it('binds a lone trailing tab to the alignment stop (Label:\\t form fields)', async () => { + // Form-field pattern "By:\t" has a single trailing tab that IS meaningful. + // The trailing-empty-tab heuristic must NOT strip it, otherwise the leader + // truncates to a default grid stop and "By:______" loses its underscore run. + const rightStopTwips = 4306; + const block: FlowBlock = { + kind: 'paragraph', + id: 'lone-trailing-tab', + runs: [ + { text: 'By:', fontFamily: 'Arial', fontSize: 13.333 }, + { kind: 'tab', text: '\t', tabIndex: 0, pmStart: 3, pmEnd: 4 }, + ], + attrs: { + tabs: [{ pos: rightStopTwips, val: 'end', leader: 'underscore' }], + }, + }; + + const measure = expectParagraphMeasure(await measureBlock(block, 800)); + expect(measure.lines).toHaveLength(1); + const line = measure.lines[0]; + expect(line.leaders).toBeDefined(); + expect(line.leaders).toHaveLength(1); + expect(line.leaders?.[0]?.style).toBe('underscore'); + const rightStopPx = rightStopTwips * (96 / 1440); + // Leader must extend from after "By:" to the right alignment stop. + expect(line.leaders?.[0]?.to).toBeGreaterThan(rightStopPx - 5); + }); + it('preserves trailing spaces after tabs when line breaks', async () => { const block: FlowBlock = { kind: 'paragraph', diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index 9c57bb6435..754ef582cd 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -1401,13 +1401,19 @@ async function measureParagraphBlock(block: ParagraphBlock, maxWidth: number): P } break; } - const effective = tabsInSegment.filter((idx) => idx < trailingCutoff); + let effective = tabsInSegment.filter((idx) => idx < trailingCutoff); + // If stripping trailing tabs would leave NO effective tabs, the segment is + // shaped like "Label:\t" or "\t" — the lone tab IS the meaningful one and + // must still bind to the alignment stop. Only strip trailing artifacts + // when at least one other tab remains. + if (effective.length === 0) effective = tabsInSegment; const total = effective.length; effective.forEach((idx, ord) => { tabSegmentInfo.set(idx, { localOrdinal: ord, segmentTotal: total }); }); + const effectiveSet = new Set(effective); for (const idx of tabsInSegment) { - if (idx >= trailingCutoff) tabSegmentInfo.set(idx, { localOrdinal: -1, segmentTotal: 0 }); + if (!effectiveSet.has(idx)) tabSegmentInfo.set(idx, { localOrdinal: -1, segmentTotal: 0 }); } }; for (let i = 0; i < runsToProcess.length; i++) { From e9eaa04e41c7cd1242c00d9af242c5545fd1da55 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Tue, 5 May 2026 08:33:49 -0300 Subject: [PATCH 16/18] =?UTF-8?q?fix(measuring):=20revert=20trailing-empty?= =?UTF-8?q?=20tab=20strip=20=E2=80=94=20false-positive=20regressions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The trailing-empty-tab strip introduced in 315ab84e7 + 4b63d252a treated tabs at the end of a segment as authoring artifacts. That broke patterns where trailing tabs ARE meaningful and need to bind to alignment stops: - HVY-25 Queensland Land Registry block 10 has '\t\t/[text]/\t\t' with 4 tabs and 4 authored stops (2 alignment). The strip walked back through the trailing tabs, marking them artifacts. Tabs 0+1 then ate the 2 alignment stops, putting the center+end binding on the FIRST two tabs instead of the last two — corrupting the layout. - HVY-19 Commercial Lease TOC ('1.\tBUSINESS POINTS\t1') had similar reordering when paragraph layout placed the page number tab as part of a multi-tab segment. The strip can't reliably distinguish authored trailing tabs (HVY-25, form fields with multiple authored stops) from sd-1480-style artifacts (Page\t5\t with one extra trailing tab). Heuristic was too aggressive. Keep the per-line-segment scoping (the real fix that closes line 1 of sd-1480 multi-line paragraphs). Drop the trailing-strip. Sd-1480 line 2 ('Page\t5\t' segment) reverts to baseline 'Page 5 ___' behavior — which is what shipped before this branch, so no new regression there. --- .../layout-bridge/src/remeasure.ts | 46 ++++---------- .../measuring/dom/src/index.test.ts | 60 ------------------- .../layout-engine/measuring/dom/src/index.ts | 49 ++++----------- 3 files changed, 25 insertions(+), 130 deletions(-) diff --git a/packages/layout-engine/layout-bridge/src/remeasure.ts b/packages/layout-engine/layout-bridge/src/remeasure.ts index d4ceb6a2df..ec8e480202 100644 --- a/packages/layout-engine/layout-bridge/src/remeasure.ts +++ b/packages/layout-engine/layout-bridge/src/remeasure.ts @@ -791,48 +791,28 @@ const applyTabLayoutToLines = ( .filter(({ stop }) => stop.val === 'end' || stop.val === 'center' || stop.val === 'decimal'); // Per-line-segment tab counts. Segments are delimited by explicit because - // pPr/tabs apply per line, not per paragraph. sd-1480: "Page\t2
Page\t5\t" - // must bind the meaningful tab on each segment to the alignment stop, ignoring - // trailing-empty artifacts. + // pPr/tabs apply per line, not per paragraph. sd-1480: "Page\t2
Page\t5" must + // bind the trailing tab of EACH segment to the alignment stop, not just the + // paragraph-final tab. const tabSegmentInfo = new Map(); { - let segmentStart = 0; - const closeSegment = (segmentEnd: number) => { - const tabsInSegment: number[] = []; - for (let i = segmentStart; i < segmentEnd; i++) { - if (runs[i].kind === 'tab') tabsInSegment.push(i); - } - let trailingCutoff = segmentEnd; - for (let i = segmentEnd - 1; i >= segmentStart; i--) { - const r = runs[i]; - if (r.kind === 'tab') { - trailingCutoff = i; - continue; - } - break; - } - let effective = tabsInSegment.filter((idx) => idx < trailingCutoff); - // If stripping trailing tabs would leave NO effective tabs, the segment is - // shaped like "Label:\t" or "\t" — the lone tab IS the meaningful one and - // must still bind to the alignment stop. - if (effective.length === 0) effective = tabsInSegment; - const total = effective.length; - effective.forEach((idx, ord) => { - tabSegmentInfo.set(idx, { localOrdinal: ord, segmentTotal: total }); + let segmentTabRunIndices: number[] = []; + const closeSegment = () => { + const total = segmentTabRunIndices.length; + segmentTabRunIndices.forEach((runIdx, ord) => { + tabSegmentInfo.set(runIdx, { localOrdinal: ord, segmentTotal: total }); }); - const effectiveSet = new Set(effective); - for (const idx of tabsInSegment) { - if (!effectiveSet.has(idx)) tabSegmentInfo.set(idx, { localOrdinal: -1, segmentTotal: 0 }); - } + segmentTabRunIndices = []; }; for (let i = 0; i < runs.length; i++) { const r = runs[i]; if (r.kind === 'lineBreak' || (r.kind === 'break' && (r as { breakType?: string }).breakType === 'line')) { - closeSegment(i); - segmentStart = i + 1; + closeSegment(); + } else if (r.kind === 'tab') { + segmentTabRunIndices.push(i); } } - closeSegment(runs.length); + closeSegment(); } // Word-compat heuristic (not ECMA-376 17.3.3.32): the last N tab characters in a diff --git a/packages/layout-engine/measuring/dom/src/index.test.ts b/packages/layout-engine/measuring/dom/src/index.test.ts index 0d39e94ecb..35b2467be1 100644 --- a/packages/layout-engine/measuring/dom/src/index.test.ts +++ b/packages/layout-engine/measuring/dom/src/index.test.ts @@ -1961,66 +1961,6 @@ describe('measureBlock', () => { expect(line2.leaders?.[0]?.style).toBe('dot'); }); - it('ignores trailing-empty when binding alignment stops (sd-1480)', async () => { - // The OOXML "Page\t5\t" (trailing empty tab) must still bind the meaningful - // first tab to the right-aligned dot-leader stop. Without trailing-empty - // tab handling, the heuristic would bind the trailing tab and leave "5" at - // a default grid stop with the leader after it. - const rightStopTwips = 4306; - const block: FlowBlock = { - kind: 'paragraph', - id: 'trailing-empty-tab', - runs: [ - { text: 'Page', fontFamily: 'Arial', fontSize: 13.333 }, - { kind: 'tab', text: '\t', tabIndex: 0, pmStart: 4, pmEnd: 5 }, - { text: '5', fontFamily: 'Arial', fontSize: 13.333 }, - { kind: 'tab', text: '\t', tabIndex: 1, pmStart: 6, pmEnd: 7 }, - ], - attrs: { - tabs: [{ pos: rightStopTwips, val: 'end', leader: 'dot' }], - }, - }; - - const measure = expectParagraphMeasure(await measureBlock(block, 800)); - expect(measure.lines).toHaveLength(1); - const line = measure.lines[0]; - expect(line.leaders, 'meaningful tab should bind to alignment stop').toBeDefined(); - expect(line.leaders).toHaveLength(1); - expect(line.leaders?.[0]?.style).toBe('dot'); - // Leader runs from after "Page" to just before "5" at the right stop. - const rightStopPx = rightStopTwips * (96 / 1440); - expect(line.leaders?.[0]?.from).toBeLessThan(40); - expect(line.leaders?.[0]?.to).toBeGreaterThan(rightStopPx - 20); - }); - - it('binds a lone trailing tab to the alignment stop (Label:\\t form fields)', async () => { - // Form-field pattern "By:\t" has a single trailing tab that IS meaningful. - // The trailing-empty-tab heuristic must NOT strip it, otherwise the leader - // truncates to a default grid stop and "By:______" loses its underscore run. - const rightStopTwips = 4306; - const block: FlowBlock = { - kind: 'paragraph', - id: 'lone-trailing-tab', - runs: [ - { text: 'By:', fontFamily: 'Arial', fontSize: 13.333 }, - { kind: 'tab', text: '\t', tabIndex: 0, pmStart: 3, pmEnd: 4 }, - ], - attrs: { - tabs: [{ pos: rightStopTwips, val: 'end', leader: 'underscore' }], - }, - }; - - const measure = expectParagraphMeasure(await measureBlock(block, 800)); - expect(measure.lines).toHaveLength(1); - const line = measure.lines[0]; - expect(line.leaders).toBeDefined(); - expect(line.leaders).toHaveLength(1); - expect(line.leaders?.[0]?.style).toBe('underscore'); - const rightStopPx = rightStopTwips * (96 / 1440); - // Leader must extend from after "By:" to the right alignment stop. - expect(line.leaders?.[0]?.to).toBeGreaterThan(rightStopPx - 5); - }); - it('preserves trailing spaces after tabs when line breaks', async () => { const block: FlowBlock = { kind: 'paragraph', diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index 754ef582cd..1d4a7ff29e 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -1378,52 +1378,27 @@ async function measureParagraphBlock(block: ParagraphBlock, maxWidth: number): P // Per-line-segment tab counts. The heuristic below binds the last N tabs of a // segment to the last N alignment stops; segments are delimited by explicit // runs because pPr/tabs apply per line, not per paragraph. - // sd-1480-two-col-tab-positions: a single paragraph "Page\t2
Page\t5\t" - // must emit a leader on BOTH lines (per-segment scope) and ignore the trailing - // empty on line 2 so the meaningful tab binds to the alignment stop. + // sd-1480-two-col-tab-positions: a single paragraph "Page\t2
Page\t5" + // must emit a leader on BOTH lines, not only the last. const tabSegmentInfo = new Map(); { - let segmentStart = 0; - const closeSegment = (segmentEnd: number) => { - const tabsInSegment: number[] = []; - for (let i = segmentStart; i < segmentEnd; i++) { - if (isTabRun(runsToProcess[i])) tabsInSegment.push(i); - } - // Strip trailing tabs that have no content after them — these are authoring - // artifacts (Word emits them sometimes) and should fall through to greedy - // tab-stop matching, not consume an alignment stop slot. - let trailingCutoff = segmentEnd; - for (let i = segmentEnd - 1; i >= segmentStart; i--) { - const r = runsToProcess[i]; - if (isTabRun(r)) { - trailingCutoff = i; - continue; - } - break; - } - let effective = tabsInSegment.filter((idx) => idx < trailingCutoff); - // If stripping trailing tabs would leave NO effective tabs, the segment is - // shaped like "Label:\t" or "\t" — the lone tab IS the meaningful one and - // must still bind to the alignment stop. Only strip trailing artifacts - // when at least one other tab remains. - if (effective.length === 0) effective = tabsInSegment; - const total = effective.length; - effective.forEach((idx, ord) => { - tabSegmentInfo.set(idx, { localOrdinal: ord, segmentTotal: total }); + let segmentTabRunIndices: number[] = []; + const closeSegment = () => { + const total = segmentTabRunIndices.length; + segmentTabRunIndices.forEach((runIdx, ord) => { + tabSegmentInfo.set(runIdx, { localOrdinal: ord, segmentTotal: total }); }); - const effectiveSet = new Set(effective); - for (const idx of tabsInSegment) { - if (!effectiveSet.has(idx)) tabSegmentInfo.set(idx, { localOrdinal: -1, segmentTotal: 0 }); - } + segmentTabRunIndices = []; }; for (let i = 0; i < runsToProcess.length; i++) { const r = runsToProcess[i]; if (isLineBreakRun(r) || (r.kind === 'break' && (r as { breakType?: string }).breakType === 'line')) { - closeSegment(i); - segmentStart = i + 1; + closeSegment(); + } else if (isTabRun(r)) { + segmentTabRunIndices.push(i); } } - closeSegment(runsToProcess.length); + closeSegment(); } // Word-compat heuristic (not ECMA-376 17.3.3.32): the last N tab characters in a From bc91d9a15a53e6304fe4a93f4607cbb77663b2be Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Tue, 5 May 2026 10:20:16 -0300 Subject: [PATCH 17/18] fix(measuring): gate SD-2447 alignment heuristic on default stops only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The SD-2447 heuristic forces the last N tabs to bind to the last N end/center/decimal stops. It was added because TOC styles often have ONLY a right-aligned dot-leader stop, and tabStops gets seeded with synthetic 0.5" defaults from origin (seedDefaultsFromZero=true). Greedy then lands on a default 0.5" grid stop instead of the alignment stop — hence the heuristic. But for paragraphs with an EXPLICIT start-aligned stop ahead of the alignment stop (TOC1 style with 'start@740, end@9360, end@10080': template_format and similar Word lease templates), greedy correctly lands on the start stop and the alignment stop downstream — no force needed. The heuristic over-fires and binds tab 0 to the right alignment stop, producing the broken render: leader BEFORE the title with the page number jammed against it. Fix: compute greedy first; only apply the heuristic when greedy would land on a 'source: default' stop. When greedy already lands on an explicit stop, use it. Mirrored in measuring/dom and remeasure. Effect: - template_format TOC: now renders '1. BUSINESS POINTS........1' matching Word and the published baseline. - HVY-25 / SD-2447 fixture / sd-1480 line 1: behavior preserved. - All test suites pass (measuring-dom 332, layout-bridge 1192, layout-engine 644). --- .../layout-bridge/src/remeasure.ts | 14 ++++++++----- .../layout-engine/measuring/dom/src/index.ts | 20 ++++++++++++++----- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/packages/layout-engine/layout-bridge/src/remeasure.ts b/packages/layout-engine/layout-bridge/src/remeasure.ts index ec8e480202..1356979fae 100644 --- a/packages/layout-engine/layout-bridge/src/remeasure.ts +++ b/packages/layout-engine/layout-bridge/src/remeasure.ts @@ -874,8 +874,13 @@ const applyTabLayoutToLines = ( const absCurrentX = cursorX + effectiveIndent; let stop: TabStopPx | undefined; let target: number; + // Mirror of measuring/dom: only force the SD-2447 heuristic when greedy + // would land on a `source:default` stop (synthetic 0.5" grid). Explicit + // start stops should win greedy. + const greedy = getNextTabStopPx(absCurrentX, tabStops, tabStopCursor); + const greedyOnDefault = greedy.stop?.source === 'default'; const forcedAlignment = - typeof tabOrdinal === 'number' && Number.isFinite(tabOrdinal) + greedyOnDefault && typeof tabOrdinal === 'number' && Number.isFinite(tabOrdinal) ? getAlignmentStopForOrdinal(tabOrdinal, tabRunIdx) : null; if (forcedAlignment && forcedAlignment.stop.pos > absCurrentX + TAB_EPSILON) { @@ -883,10 +888,9 @@ const applyTabLayoutToLines = ( target = forcedAlignment.stop.pos; tabStopCursor = forcedAlignment.index + 1; } else { - const next = getNextTabStopPx(absCurrentX, tabStops, tabStopCursor); - stop = next.stop; - target = next.target; - tabStopCursor = next.nextIndex; + stop = greedy.stop; + target = greedy.target; + tabStopCursor = greedy.nextIndex; } const clampedTarget = Number.isFinite(maxAbsWidth) ? Math.min(target, maxAbsWidth) : target; const relativeTarget = clampedTarget - effectiveIndent; diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index 1d4a7ff29e..ed7ce518d6 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -1564,16 +1564,26 @@ async function measureParagraphBlock(block: ParagraphBlock, maxWidth: number): P // inputs (explicit + synthetic TabRuns) don't produce out-of-order ordinals. // Mirrors consumeTabOrdinal() in layout-bridge/src/remeasure.ts. sequentialTabIndex = Math.max(sequentialTabIndex, resolvedTabIndex + 1); - const forcedAlignment = getAlignmentStopForOrdinal(resolvedTabIndex, runIndex); + // Compute greedy first so we can decide whether the SD-2447 heuristic is + // actually needed. The heuristic exists because when tabStops are seeded + // with synthetic 0.5" defaults from origin (TOC styles with only an + // alignment stop), greedy lands on a default before reaching the + // alignment stop. When the paragraph has an explicit start-aligned stop + // ahead of the alignment stop (e.g. TOC1 with `start@740, end@9360`), + // greedy already finds the correct stop and the heuristic over-fires. + // Only force the heuristic when greedy would land on a `source:default` + // stop — which is precisely the SD-2447 condition. + const greedy = getNextTabStopPx(absCurrentX, tabStops, tabStopCursor); + const greedyOnDefault = greedy.stop?.source === 'default'; + const forcedAlignment = greedyOnDefault ? getAlignmentStopForOrdinal(resolvedTabIndex, runIndex) : null; if (forcedAlignment && forcedAlignment.stop.pos > absCurrentX + TAB_EPSILON) { stop = forcedAlignment.stop; target = forcedAlignment.stop.pos; tabStopCursor = forcedAlignment.index + 1; } else { - const nextStop = getNextTabStopPx(absCurrentX, tabStops, tabStopCursor); - target = nextStop.target; - tabStopCursor = nextStop.nextIndex; - stop = nextStop.stop; + target = greedy.target; + tabStopCursor = greedy.nextIndex; + stop = greedy.stop; } const maxAbsWidth = currentLine.maxWidth + effectiveIndent; const clampedTarget = Math.min(target, maxAbsWidth); From c50a40884caa71915465efe8aa003da12e4cf122 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Tue, 5 May 2026 11:27:01 -0300 Subject: [PATCH 18/18] fix(layout-engine): scope explicit-continuous rule to ending section (SD-2452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Address Luccas's [P2] review comment. Rule 2 of the continuous-balancing gate previously fired whenever ANY section in the document had an explicit continuous break, allowing balancing for every multi-column section whose own type was omitted — even unrelated ones. A later single-page two-column body section with omitted would be balanced just because an earlier section was explicit-continuous, violating sd-1655's skip-omitted-single-page rule. Per ECMA-376 §17.18.77, a continuous break balances the section it ENDS. When the body sectPr authors an explicit continuous break, the affected section is the one IMMEDIATELY preceding the body. Tighten rule 2 from a doc-wide flag to bodyExplicitContinuousIdx − 1. Verified: - sd-1480: section 0 still balances (rule 2 fires for sectionIdx 0 === bodyExplicitContinuousIdx 1 − 1). - mixed-columns-tabs-tnr: body section (sectionIdx 1) does not balance (no longer matches bodyExplicitContinuousIdx − 1 = 0). - sd-1655: not affected (no body-explicit-continuous in the doc). - Hypothetical 'mid-doc explicit-continuous + body omitted single-page 2-col': body now correctly skipped. All 644 layout-engine tests pass. --- .../layout-engine/layout-engine/src/index.ts | 50 ++++++++----------- 1 file changed, 22 insertions(+), 28 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 4ae813d066..dc6cd88af1 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -2765,34 +2765,26 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options const typeIsExplicit = sectionTypeIsExplicit.get(sectionIdx) === true; const isLast = lastSectionIdx !== null && sectionIdx === lastSectionIdx; - // Scan all sections once for a doc-wide explicit continuous break. - let docHasExplicitContinuous = false; - for (const [idx, explicit] of sectionTypeIsExplicit) { - if (explicit && sectionEndBreakType.get(idx) === 'continuous') { - docHasExplicitContinuous = true; - break; - } - } - // Per ECMA-376 §17.18.77, a continuous break balances the section it - // ENDS (i.e., the section BEFORE the break), not the section that - // CONTAINS or follows it. When the body sectPr itself is the explicit - // continuous trigger, it balances the section preceding the body, not - // the body section itself. Compare: + // ENDS — i.e., the section BEFORE the break, not the section that + // contains or follows it. When the body sectPr authors an explicit + // continuous break, the affected section is the one IMMEDIATELY + // preceding the body. Compare: // - // sd-1480: body sectPr explicit-continuous + 2-col, section 0 has - // content. Word balances section 0 (3+3). - // mixed-columns-tabs-tnr: body sectPr explicit-continuous + 2-col, - // section 1 (the body) has the 2-col Test list, section 0 - // has 1-col descriptions. Word does NOT balance section 1 - // (14+5 column-flow); the body-as-trigger applies to - // section 0, which is single-col and so a no-op. + // sd-1480: 2 sections; body (section 1) is explicit-continuous, + // section 0 has the 2-col content. Word balances section 0 + // (3+3) — exactly bodyExplicitContinuousIdx - 1. + // mixed-columns-tabs-tnr: body explicit-continuous, body has the + // 2-col Test list, section 0 is 1-col descriptions. Word + // does NOT balance section 1 (14+5 column-flow); the + // body-as-trigger applies to section 0 (single-col, no-op). // - // Excluding the body-explicit-continuous section itself from rule 2 - // matches both: section 0 of sd-1480 still balances (it's not the - // body), section 1 of mixed-columns-tabs-tnr does not (it IS the - // body). The body section can still balance via rule 1 (impossible: - // last section can't satisfy "not last") or rule 3 (multi-page). + // Earlier this rule used a doc-wide `docHasExplicitContinuous` flag, + // which over-fired for any multi-col section in the document whenever + // some other section was explicit-continuous — including a single-page + // body section with omitted `` that should match sd-1655's + // skip rule. Tying it to bodyExplicitContinuousIdx − 1 (the section + // the break actually ends) restores ECMA-correct scope. const bodyExplicitContinuousIdx = lastSectionIdx !== null && sectionTypeIsExplicit.get(lastSectionIdx) === true && @@ -2828,11 +2820,13 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options if (isMultiPage && !isLast) continue; const allowedByMidDocContinuous = endBreakType === 'continuous' && !isLast; - const allowedByDocWideExplicit = - docHasExplicitContinuous && !isExplicitNonContinuous && sectionIdx !== bodyExplicitContinuousIdx; + // Body-explicit-continuous balances the section IT ENDS, which is the + // section immediately preceding the body. No doc-wide flag. + const allowedByBodyExplicitContinuous = + bodyExplicitContinuousIdx !== null && sectionIdx === bodyExplicitContinuousIdx - 1 && !isExplicitNonContinuous; const allowedByMultiPage = isMultiPage; - if (!allowedByMidDocContinuous && !allowedByDocWideExplicit && !allowedByMultiPage) continue; + if (!allowedByMidDocContinuous && !allowedByBodyExplicitContinuous && !allowedByMultiPage) continue; } // Find the last page carrying any fragments from this section.