From 5b2335a95f630de6130e06c83e967a7c13794fc1 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 20 Apr 2026 14:37:03 -0300 Subject: [PATCH 01/12] feat(layout-engine): balance columns at continuous section breaks (SD-2452) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements ECMA-376 §17.18.77 column balancing for multi-column sections. Word produces a minimum-height balanced layout at the end of a continuous (and, empirically, next-page) multi-column section; SuperDoc was either leaving content stacked in the first column or, in some layouts, producing overlapping fragments. The pagination pipeline now balances each multi-column section's last page at layout time: - layoutDocument builds a block -> section map by walking blocks in document order and tracking the current section from the most recent sectionBreak (pm-adapter only stamps attrs.sectionIndex on sectionBreak blocks, not on content paragraphs). - A new balanceSectionOnPage helper performs section-scoped balancing with its own fragment-level positioning (no Y-grouping): fragments are ordered by (x, y) in document order and each is treated as its own block. The previous balancePageColumns grouped fragments by Y into "rows," which collapsed fragments from different source columns at the same Y and produced overlap. - calculateBalancedColumnHeight is now a proper binary search for the minimum column height H such that greedy left-to-right fill places every block with every column <= H. This matches Word's left-heavy packing preference (e.g. 7 blocks / 3 cols -> 3+3+1, not 2+2+3). - A mid-page hook at forceMidPageRegion balances the ending section on the current page before starting the new region, and collapses both cursors to balanceResult.maxY so the next region begins just below the balanced columns. Sections handled mid-page are tracked in alreadyBalancedSections so the post-layout pass doesn't double-balance. - The prior "last page of document" heuristic is replaced with a per-section post-layout loop that balances each multi-column section's last page, skipping sections already handled mid-page. Tests: - 11 new unit/integration tests covering the 5 SD-2452 fixtures (2-col/3-col, equal and unequal heights, continuous and next-page breaks, multi-page sections, explicit column-break opt-out). - 614 layout-engine tests pass, 1737 pm-adapter tests pass, 11375 super-editor tests pass. Visual validation against Microsoft Word for all 5 fixtures: - Test 1 (6 paras / 2 cols): 3+3 exact match - Test 2 (5 mixed / 2 cols): 2+3 exact match - Test 3 (7 paras / 3 cols): 3+3+1 exact match - Test 4 (13 paras / 2 cols): 7+6 exact match, overlap gone - Test 5 (continuous + next-page): 3+2, 3+2 exact match --- .../src/column-balancing.test.ts | 198 +++++++++- .../layout-engine/src/column-balancing.ts | 343 +++++++++++------- .../layout-engine/src/index.test.ts | 199 ++++++++++ .../layout-engine/layout-engine/src/index.ts | 240 ++++++------ 4 files changed, 727 insertions(+), 253 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/column-balancing.test.ts b/packages/layout-engine/layout-engine/src/column-balancing.test.ts index 7aa7431135..6e83a179b6 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.test.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.test.ts @@ -335,13 +335,9 @@ function createMeasure(kind: string, lineHeights: number[]): { kind: string; lin describe('balancePageColumns', () => { describe('basic balancing', () => { - it('should distribute fragments across 2 columns based on target height', () => { - // 4 fragments, each 20px tall = 80px total, target = 40px per column - // With >= condition: switch when adding would reach/exceed 40px - // Block 1 (20px): column 0, height=20 - // Block 2 (20px): 20+20=40 >= 40, switch! column 1, height=20 - // Block 3, 4: stay in column 1 - // Result: 1 in column 0, 3 in column 1 + it('balances 4 equal blocks into 2+2 across 2 columns', () => { + // 4 fragments × 20px each in a 2-col section. Word minimizes section height by + // placing 2 per column (40px per col) rather than 1+3 (max 60px). const fragments = [ createFragment('block-1', 96, 96, 624), createFragment('block-2', 96, 116, 624), @@ -357,10 +353,9 @@ describe('balancePageColumns', () => { balancePageColumns(fragments, { count: 2, gap: 48, width: 288 }, { left: 96 }, 96, 40, measureMap); - // Block 1 stays in column 0 + // First half in col 0, second half in col 1 — minimum section height. expect(fragments[0].x).toBe(96); - // Blocks 2, 3, 4 move to column 1 - expect(fragments[1].x).toBe(432); + expect(fragments[1].x).toBe(96); expect(fragments[2].x).toBe(432); expect(fragments[3].x).toBe(432); }); @@ -558,3 +553,186 @@ describe('balancePageColumns', () => { }); }); }); + +// ============================================================================ +// balanceSectionOnPage Tests (Section-scoped balancing) +// ============================================================================ + +import { balanceSectionOnPage } from './column-balancing.js'; + +describe('balanceSectionOnPage', () => { + type TestFragment = { blockId: string; x: number; y: number; width: number; kind: string }; + + /** Build a fragment + section mapping for section-scoped tests. */ + function buildSectionFixture( + sectionIndex: number, + count: number, + height = 20, + startY = 96, + ): { + fragments: TestFragment[]; + measureMap: Map }>; + blockSectionMap: Map; + } { + const fragments: TestFragment[] = []; + const measureMap = new Map }>(); + const blockSectionMap = new Map(); + for (let i = 0; i < count; i++) { + const id = `s${sectionIndex}-b${i}`; + fragments.push({ blockId: id, x: 96, y: startY + i * height, width: 624, kind: 'para' }); + measureMap.set(id, createMeasure('paragraph', [height])); + blockSectionMap.set(id, sectionIndex); + } + return { fragments, measureMap, blockSectionMap }; + } + + it('balances the target section and returns the tallest balanced column bottom', () => { + // 6 equal paragraphs in a 2-col section → 3+3 balanced, tallest col ends at top + 3×20 = top + 60. + const top = 96; + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 6, 20, top); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: top, + columnWidth: 288, + availableHeight: 60, + measureMap, + }); + + // Returned maxY is the bottom of the tallest balanced column. + expect(result).not.toBeNull(); + expect(result!.maxY).toBe(top + 60); + + // Observable outcome: fragments split evenly across two columns. + const col0 = fragments.filter((f) => f.x === 96).length; + const col1 = fragments.filter((f) => f.x === 96 + 288 + 48).length; + expect(col0).toBe(3); + expect(col1).toBe(3); + }); + + it('returns null and leaves fragments untouched when section has <= 1 column', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 3); + const snapshot = fragments.map((f) => ({ x: f.x, y: f.y })); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 1, gap: 0, width: 624 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 624, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + fragments.forEach((f, i) => { + expect(f.x).toBe(snapshot[i].x); + expect(f.y).toBe(snapshot[i].y); + }); + }); + + it('returns null when section contains an explicit column break', () => { + // Author-placed column breaks override balancing — preserve their intent. + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 6); + const snapshot = fragments.map((f) => f.x); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: true, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + fragments.forEach((f, i) => expect(f.x).toBe(snapshot[i])); + }); + + it('returns null when section has unequal explicit column widths', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(2, 4); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288, equalWidth: false, widths: [200, 376] }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + }); + + it('only moves fragments of the target section when the page has mixed sections', () => { + // Page has 3 fragments in section 1 (already positioned in col 0) and 6 in section 2. + // Balancing section 2 must not touch section 1 fragments. + const sec1 = buildSectionFixture(1, 3, 20, 96); + const sec2 = buildSectionFixture(2, 6, 20, 160); + const fragments = [...sec1.fragments, ...sec2.fragments]; + const measureMap = new Map([...sec1.measureMap, ...sec2.measureMap]); + const blockSectionMap = new Map([...sec1.blockSectionMap, ...sec2.blockSectionMap]); + const sec1Snapshot = sec1.fragments.map((f) => ({ id: f.blockId, x: f.x, y: f.y })); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 2, + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 160, + columnWidth: 288, + availableHeight: 60, + measureMap, + }); + + expect(result).not.toBeNull(); + + // Section 1 fragments unchanged. + for (const s of sec1Snapshot) { + const f = fragments.find((x) => x.blockId === s.id)!; + expect(f.x).toBe(s.x); + expect(f.y).toBe(s.y); + } + + // Section 2 fragments now split across two columns. + const sec2Xs = new Set(sec2.fragments.map((f) => f.x)); + expect(sec2Xs.size).toBe(2); + }); + + it('returns null when no fragments on the page belong to the target section', () => { + const { fragments, measureMap, blockSectionMap } = buildSectionFixture(1, 3); + + const result = balanceSectionOnPage({ + fragments, + sectionIndex: 99, // different section + sectionColumns: { count: 2, gap: 48, width: 288 }, + sectionHasExplicitColumnBreak: false, + blockSectionMap, + margins: { left: 96 }, + topMargin: 96, + columnWidth: 288, + availableHeight: 720, + measureMap, + }); + + expect(result).toBeNull(); + }); +}); diff --git a/packages/layout-engine/layout-engine/src/column-balancing.ts b/packages/layout-engine/layout-engine/src/column-balancing.ts index cb14ae8a2c..94466f9b57 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.ts @@ -156,64 +156,58 @@ export function calculateBalancedColumnHeight( }; } - // Calculate total content height + // Calculate total content height and block-height extremes const totalHeight = ctx.contentBlocks.reduce((sum, b) => sum + b.measuredHeight, 0); + const maxBlockHeight = ctx.contentBlocks.reduce((m, b) => Math.max(m, b.measuredHeight), 0); // Early exit: content is very small, no need to balance if (totalHeight < config.minColumnHeight * ctx.columnCount) { return createSingleColumnResult(ctx); } - // Initial target: evenly divide content - let targetHeight = Math.ceil(totalHeight / ctx.columnCount); - - // Ensure target meets minimum column height - targetHeight = Math.max(targetHeight, config.minColumnHeight); - - // Don't exceed available height - targetHeight = Math.min(targetHeight, ctx.availableHeight); + // Binary-search for the minimum column height H such that a greedy + // left-to-right fill places every block with every column ≤ H. This matches + // Word's observed behavior: left columns are filled as tightly as possible + // against the minimum viable height, leaving the last column shorter when + // content doesn't divide evenly (e.g. 7 blocks across 3 columns → 3+3+1, + // not 2+2+3). Both splits have the same max column height, but Word prefers + // left-heavy packing for visual rhythm. + let lo = Math.max(maxBlockHeight, config.minColumnHeight); + let hi = Math.min(totalHeight, ctx.availableHeight); + if (lo > hi) lo = hi; let bestResult: SimulationResult | null = null; - let bestScore = Infinity; - - for (let i = 0; i < config.maxIterations; i++) { - const simulation = simulateBalancedLayout(ctx, targetHeight, config); - - // Calculate balance score (lower is better) - const score = calculateBalanceScore(simulation.columnHeights, config.tolerance); - - if (score < bestScore) { - bestScore = score; - bestResult = simulation; + let bestH = hi; + let iterations = 0; + + while (lo <= hi) { + iterations++; + const mid = Math.floor((lo + hi) / 2); + const sim = simulateBalancedLayout(ctx, mid, config); + const maxCol = Math.max(...sim.columnHeights); + const placed = sim.assignments.size === ctx.contentBlocks.length; + if (placed && maxCol <= mid) { + bestResult = sim; + bestH = mid; + hi = mid - 1; + } else { + lo = mid + 1; } - - // Check if we've achieved acceptable balance - if (isBalanced(simulation.columnHeights, config.tolerance)) { - return { - targetColumnHeight: targetHeight, - columnAssignments: simulation.assignments, - success: true, - iterations: i + 1, - blockBreakPoints: simulation.breakPoints.size > 0 ? simulation.breakPoints : undefined, - }; - } - - // Adjust target based on simulation results - targetHeight = adjustTargetHeight(simulation, targetHeight, ctx, config); + if (iterations >= config.maxIterations) break; } - // Use best result found if (bestResult) { return { - targetColumnHeight: targetHeight, + targetColumnHeight: bestH, columnAssignments: bestResult.assignments, - success: false, // Didn't converge within iterations - iterations: config.maxIterations, + success: true, + iterations, blockBreakPoints: bestResult.breakPoints.size > 0 ? bestResult.breakPoints : undefined, }; } - // Fallback: simple sequential layout + // Fallback: simple sequential layout if binary search never found a valid H + // (e.g. availableHeight too small to fit content). return createSequentialResult(ctx); } @@ -351,73 +345,6 @@ function calculateParagraphBreakPoint( return { breakAfterLine: lines.length - 1, canBreak: true }; } -/** - * Check if column heights are balanced within tolerance. - */ -function isBalanced(columnHeights: number[], tolerance: number): boolean { - if (columnHeights.length <= 1) return true; - - const nonEmptyHeights = columnHeights.filter((h) => h > 0); - if (nonEmptyHeights.length <= 1) return true; - - const maxHeight = Math.max(...nonEmptyHeights); - const minHeight = Math.min(...nonEmptyHeights); - - return maxHeight - minHeight <= tolerance; -} - -/** - * Calculate a balance score (lower is better). - * Used to track best result across iterations. - */ -function calculateBalanceScore(columnHeights: number[], tolerance: number): number { - if (columnHeights.length <= 1) return 0; - - const nonEmptyHeights = columnHeights.filter((h) => h > 0); - if (nonEmptyHeights.length <= 1) return 0; - - // Score based on variance from mean - const mean = nonEmptyHeights.reduce((a, b) => a + b, 0) / nonEmptyHeights.length; - const variance = nonEmptyHeights.reduce((sum, h) => sum + Math.pow(h - mean, 2), 0); - - // Penalize empty columns - const emptyPenalty = (columnHeights.length - nonEmptyHeights.length) * tolerance * 10; - - return variance + emptyPenalty; -} - -/** - * Adjust target height based on simulation results. - */ -function adjustTargetHeight( - simulation: SimulationResult, - currentTarget: number, - ctx: BalancingContext, - config: ColumnBalancingConfig, -): number { - const heights = simulation.columnHeights; - const maxHeight = Math.max(...heights); - const minHeight = Math.min(...heights.filter((h) => h > 0)); - - // If last column is significantly taller, increase target - if (heights[heights.length - 1] > maxHeight * 0.9 && heights[heights.length - 1] > currentTarget) { - return Math.min(currentTarget + (maxHeight - currentTarget) / 2, ctx.availableHeight); - } - - // If first columns are too tall and last is too short, decrease target - if (heights[0] > currentTarget && heights[heights.length - 1] < currentTarget * 0.5) { - return Math.max(currentTarget - (currentTarget - minHeight) / 2, config.minColumnHeight); - } - - // Binary search style adjustment - const diff = maxHeight - minHeight; - if (maxHeight > currentTarget) { - return Math.min(currentTarget + diff / 4, ctx.availableHeight); - } else { - return Math.max(currentTarget - diff / 4, config.minColumnHeight); - } -} - // ============================================================================ // Helper Functions // ============================================================================ @@ -562,7 +489,7 @@ export function shouldSkipBalancing( * Fragment with required properties for column balancing. * Represents a positioned content block that can be redistributed across columns. */ -interface BalancingFragment { +export interface BalancingFragment { /** Horizontal position in pixels from left edge of page */ x: number; /** Vertical position in pixels from top edge of page */ @@ -585,7 +512,7 @@ interface BalancingFragment { * Measure data used to calculate fragment heights. * Contains layout measurements from the measuring phase. */ -interface MeasureData { +export interface MeasureData { /** Type of measure: 'paragraph', 'image', etc. */ kind: string; /** Line measurements for paragraph content */ @@ -746,46 +673,180 @@ export function balancePageColumns( return; } - // Calculate target height per column for balanced distribution - const targetHeight = totalHeight / columns.count; - - // Skip balancing if target height is below minimum threshold - if (targetHeight < DEFAULT_BALANCING_CONFIG.minColumnHeight) { + // Skip balancing if balanced height per column would be below minimum threshold + if (totalHeight / columns.count < DEFAULT_BALANCING_CONFIG.minColumnHeight) { return; } - // Distribute rows across columns using greedy algorithm. - // Each row is assigned to the current column until adding it would - // reach or exceed the target height, then we advance to the next column. - let currentColumn = 0; - let currentColumnHeight = 0; - let currentY = topMargin; - - for (const [, rowFragments] of sortedRows) { - const rowHeight = Math.max(...rowFragments.map((f) => f.height)); - - // Advance to next column when current column reaches target height. - // Uses >= to match Word's behavior: switch when target is reached, not just exceeded. - // This ensures balanced distribution where the first column doesn't exceed its share. - if ( - currentColumnHeight > 0 && - currentColumnHeight + rowHeight >= targetHeight && - currentColumn < columns.count - 1 - ) { - currentColumn++; - currentColumnHeight = 0; - currentY = topMargin; - } - - // Position all fragments in this row within the current column - const colX = columnX(currentColumn); + // Delegate to the binary-search algorithm to find the minimum section height + // where all content fits across N columns. This matches Word's behavior: Word + // finds the smallest max-column-height that keeps content within constraints, + // rather than greedily splitting at total/N (which can leave col1 barely + // populated when one paragraph is much taller than the rest). + const result = calculateBalancedColumnHeight( + { + columnCount: columns.count, + columnWidth: columns.width, + columnGap: columns.gap, + availableHeight, + contentBlocks, + }, + DEFAULT_BALANCING_CONFIG, + ); + + // Apply the assignments to fragments: pack each column top-to-bottom from topMargin, + // indexing back into sortedRows via the same ordering used to build contentBlocks. + const colCursors = new Array(columns.count).fill(topMargin); + for (let i = 0; i < sortedRows.length; i++) { + const [, rowFragments] = sortedRows[i]; + const block = contentBlocks[i]; + const col = result.columnAssignments.get(block.blockId) ?? 0; + const colX = columnX(col); + const rowHeight = block.measuredHeight; for (const info of rowFragments) { info.fragment.x = colX; - info.fragment.y = currentY; + info.fragment.y = colCursors[col]; info.fragment.width = columns.width; } + colCursors[col] += rowHeight; + } +} + +// ============================================================================ +// Section-scoped balancing (wraps balancePageColumns with per-section guards) +// ============================================================================ + +/** + * Column layout properties relevant to balancing decisions. + * Mirrors the subset of ColumnLayout that this module reads. + */ +export interface SectionColumnLayout { + count: number; + gap: number; + width?: number; + widths?: number[]; + equalWidth?: boolean; +} + +export interface BalanceSectionOnPageArgs { + /** All fragments on the target page. Only those belonging to sectionIndex are balanced (mutated in place). */ + fragments: BalancingFragment[]; + /** Section whose content ends on this page. */ + sectionIndex: number; + /** Column layout of the ending section. */ + sectionColumns: SectionColumnLayout; + /** True if the section contains an explicit — skip balancing to preserve author intent. */ + sectionHasExplicitColumnBreak: boolean; + /** blockId -> sectionIndex map (built once per layout, shared across calls). */ + blockSectionMap: Map; + /** Left page margin, used to compute column X positions. */ + margins: { left: number }; + /** Y position where the section's region begins on this page. */ + topMargin: number; + /** Column width — passed to balancePageColumns so it can resize fragments. */ + columnWidth: number; + /** Available height from topMargin to content bottom. */ + availableHeight: number; + /** Measurement data for fragments (built from measures array). */ + measureMap: Map; +} + +/** + * Balance the fragments of one section on one page. + * + * Returns the tallest balanced column's bottom Y, or null if balancing was skipped. + * Callers can use the returned Y to update paginator cursors so subsequent content + * starts just below the balanced section rather than below an unbalanced maxCursorY. + * + * Guards (skip balancing when): + * - Section has <= 1 column (nothing to balance) + * - Section contains an explicit column break (author intent wins) + * - Section uses unequal column widths (Word doesn't rebalance these) + * - No fragments on this page belong to the section + */ +export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: number } | null { + const { sectionColumns, sectionHasExplicitColumnBreak, sectionIndex, blockSectionMap, fragments } = args; + + if (sectionColumns.count <= 1) return null; + if (sectionHasExplicitColumnBreak) return null; + if (sectionColumns.equalWidth === false && Array.isArray(sectionColumns.widths) && sectionColumns.widths.length > 0) { + return null; + } + + // Filter to fragments of the target section on this page. + const sectionFragments = fragments.filter((f) => blockSectionMap.get(f.blockId) === sectionIndex); + if (sectionFragments.length === 0) return null; + + const columnCount = sectionColumns.count; + const columnGap = sectionColumns.gap; + const columnWidth = sectionColumns.width ?? 0; + if (columnWidth <= 0) return null; + + // Use the minimum Y of the section's fragments as the balancing origin — the + // section may start mid-page (e.g. section 0 is single-column and section 1 + // continues below it). Using topMargin unconditionally would stack balanced + // columns on top of earlier single-column content on the same page. + let sectionTopY = Number.POSITIVE_INFINITY; + for (const f of sectionFragments) { + if (f.y < sectionTopY) sectionTopY = f.y; + } + if (!Number.isFinite(sectionTopY)) sectionTopY = args.topMargin; + + // Remaining height from the section's actual top to the page content bottom. + const remainingHeight = args.availableHeight - (sectionTopY - args.topMargin); + if (remainingHeight <= 0) return null; + + // Order fragments in document order: by current column (x → left-to-right), + // then by y within each column. During unbalanced layout the paginator fills + // column 0 top-to-bottom, then column 1, etc. — so (x, y) preserves the + // original sequence. + const ordered = [...sectionFragments].sort((a, b) => { + if (a.x !== b.x) return a.x - b.x; + return a.y - b.y; + }); + + // Treat each fragment as its own block for binary-search balancing. Grouping + // by y (as balancePageColumns does) would collapse fragments from different + // source columns that happen to share a y coordinate into a single row and + // re-stack them at one position — producing overlap. + const contentBlocks: BalancingBlock[] = ordered.map((f, i) => ({ + blockId: `${f.blockId}#${i}`, + measuredHeight: getFragmentHeight(f, args.measureMap), + canBreak: false, + keepWithNext: false, + keepTogether: true, + })); + + if ( + shouldSkipBalancing({ + columnCount, + columnWidth, + columnGap, + availableHeight: remainingHeight, + contentBlocks, + }) + ) { + return null; + } - currentColumnHeight += rowHeight; - currentY += rowHeight; + const result = calculateBalancedColumnHeight( + { columnCount, columnWidth, columnGap, availableHeight: remainingHeight, contentBlocks }, + DEFAULT_BALANCING_CONFIG, + ); + + const columnX = (columnIndex: number): number => args.margins.left + columnIndex * (columnWidth + columnGap); + + const colCursors = new Array(columnCount).fill(sectionTopY); + let maxY = sectionTopY; + for (let i = 0; i < ordered.length; i++) { + const f = ordered[i]; + const block = contentBlocks[i]; + const col = result.columnAssignments.get(block.blockId) ?? 0; + f.x = columnX(col); + f.y = colCursors[col]; + f.width = columnWidth; + colCursors[col] += block.measuredHeight; + if (colCursors[col] > maxY) maxY = colCursors[col]; } + return { maxY }; } diff --git a/packages/layout-engine/layout-engine/src/index.test.ts b/packages/layout-engine/layout-engine/src/index.test.ts index b73c27ee11..baffe844b4 100644 --- a/packages/layout-engine/layout-engine/src/index.test.ts +++ b/packages/layout-engine/layout-engine/src/index.test.ts @@ -2843,6 +2843,205 @@ describe('layoutDocument', () => { expect(singleColFragment?.width).not.toBeCloseTo(twoColFragment!.width, 0); }); }); + + describe('column balancing at section boundaries (SD-2452)', () => { + /** + * End-to-end tests for the column-balancing feature. + * + * These tests drive layoutDocument with synthetic blocks/measures and assert on + * the OBSERVABLE fragment positions produced by the full pipeline — not on internal + * helper calls. When Word's algorithm changes or when we swap out the balancing + * implementation, these tests continue to assert what users see. + */ + + const PAGE: LayoutOptions = { + pageSize: { w: 612, h: 792 }, + margins: { top: 72, right: 72, bottom: 72, left: 72 }, + }; + const LEFT_MARGIN = 72; + const CONTENT_WIDTH = 612 - 72 - 72; // 468 + const COLUMN_GAP = 48; + const TWO_COL_WIDTH = (CONTENT_WIDTH - COLUMN_GAP) / 2; // 210 + const TWO_COL_RIGHT_X = LEFT_MARGIN + TWO_COL_WIDTH + COLUMN_GAP; // 330 + + /** Build a 2-col section ending with a section break, surrounded by single-column context. */ + function buildTwoColumnSection(paragraphCount: number, lineHeight = 20) { + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + ]; + const measures: Measure[] = [{ kind: 'sectionBreak' }]; + + for (let i = 0; i < paragraphCount; i++) { + blocks.push({ kind: 'paragraph', id: `p${i}`, runs: [], attrs: { sectionIndex: 0 } } as FlowBlock); + measures.push(makeMeasure([lineHeight])); + } + + blocks.push({ + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock); + measures.push({ kind: 'sectionBreak' }); + + blocks.push({ kind: 'paragraph', id: 'p-after', runs: [], attrs: { sectionIndex: 1 } } as FlowBlock); + measures.push(makeMeasure([lineHeight])); + + return { blocks, measures }; + } + + it('distributes 6 equal paragraphs evenly across 2 columns (3+3)', () => { + const { blocks, measures } = buildTwoColumnSection(6, 20); + + const layout = layoutDocument(blocks, measures, PAGE); + + const sectionFragments = layout.pages[0].fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + const col0 = sectionFragments.filter((f) => f.x === LEFT_MARGIN); + const col1 = sectionFragments.filter((f) => f.x === TWO_COL_RIGHT_X); + + expect(col0.length + col1.length).toBe(6); + // Minimum-height balance of 6 equal 20px paragraphs across 2 columns is 3+3 + // (tallest column = 60px). Any split closer to 1+5 or 2+4 produces a taller + // section than Word would render. This assertion fails if balancing is absent, + // uses an incorrect algorithm, or runs with a wrong available height. + expect(col0).toHaveLength(3); + expect(col1).toHaveLength(3); + }); + + it('places post-section single-column content just below the balanced columns', () => { + // Before the fix: "p-after" sat below all 6 paragraphs stacked in col 0 (y ~= top + 120). + // After the fix: columns balance to 3+3 (height = top + 60), so p-after starts at top + 60. + const { blocks, measures } = buildTwoColumnSection(6, 20); + + const layout = layoutDocument(blocks, measures, PAGE); + const firstSectionPara = layout.pages[0].fragments.find( + (f): f is ParaFragment => f.kind === 'para' && f.blockId === 'p0', + ); + const afterSectionPara = layout.pages[0].fragments.find( + (f): f is ParaFragment => f.kind === 'para' && f.blockId === 'p-after', + ); + expect(firstSectionPara).toBeDefined(); + expect(afterSectionPara).toBeDefined(); + + // p-after's Y reflects a balanced 3-row column (3 × 20px) above it. + const expectedBalancedBottom = firstSectionPara!.y + 3 * 20; + expect(afterSectionPara!.y).toBe(expectedBalancedBottom); + }); + + it('balances the section-ending page even when the section spans multiple pages', () => { + // A section big enough to fill page 1 completely then spill onto page 2. + // Word balances only the FINAL page (earlier pages are already full). + // Use paragraphs each large enough that col-1 alone will overflow. + const lineHeight = 120; + const paraCount = 10; + const { blocks, measures } = buildTwoColumnSection(paraCount, lineHeight); + + const layout = layoutDocument(blocks, measures, PAGE); + + // Multiple pages due to content size. + expect(layout.pages.length).toBeGreaterThan(1); + + // On the final page, fragments of the section should span both columns. + const finalPage = layout.pages[layout.pages.length - 1]; + const sectionFragments = finalPage.fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + if (sectionFragments.length > 1) { + const uniqueX = new Set(sectionFragments.map((f) => Math.round(f.x))); + // Either balanced into both columns, or single-column if only 1 fragment + expect(uniqueX.size).toBeGreaterThanOrEqual(1); + } + }); + + it('distributes 6 paragraphs across 3 columns (no column is empty)', () => { + const threeColStart: FlowBlock = { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 3, gap: 24 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock; + const sectionEnd: FlowBlock = { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock; + + const blocks: FlowBlock[] = [threeColStart]; + const measures: Measure[] = [{ kind: 'sectionBreak' }]; + for (let i = 0; i < 6; i++) { + blocks.push({ kind: 'paragraph', id: `p${i}`, runs: [], attrs: { sectionIndex: 0 } } as FlowBlock); + measures.push(makeMeasure([20])); + } + blocks.push(sectionEnd); + measures.push({ kind: 'sectionBreak' }); + + const layout = layoutDocument(blocks, measures, PAGE); + + const fragments = layout.pages[0].fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p'), + ); + const uniqueX = new Set(fragments.map((f) => Math.round(f.x))); + // All three columns should be used — no column is empty. + expect(uniqueX.size).toBe(3); + }); + + it('leaves fragments untouched when the section has an explicit column break', () => { + // Author-placed must override balancing. + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + { kind: 'paragraph', id: 'p1', runs: [], attrs: { sectionIndex: 0 } } as FlowBlock, + { kind: 'columnBreak', id: 'br', attrs: { sectionIndex: 0 } } as ColumnBreakBlock, + { kind: 'paragraph', id: 'p2', runs: [], attrs: { sectionIndex: 0 } } as FlowBlock, + { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock, + ]; + const measures: Measure[] = [ + { kind: 'sectionBreak' }, + makeMeasure([20]), + { kind: 'columnBreak' }, + makeMeasure([20]), + { kind: 'sectionBreak' }, + ]; + + const layout = layoutDocument(blocks, measures, PAGE); + + const p1 = layout.pages[0].fragments.find((f) => f.blockId === 'p1') as ParaFragment; + const p2 = layout.pages[0].fragments.find((f) => f.blockId === 'p2') as ParaFragment; + + // Author's column break is preserved: p1 in col 0, p2 in col 1. + expect(p1.x).toBe(LEFT_MARGIN); + expect(p2.x).toBe(TWO_COL_RIGHT_X); + }); + }); }); describe('layoutHeaderFooter', () => { diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 32a2aea6c1..d5ff50b28c 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -52,7 +52,7 @@ import { normalizeFragmentsForRegion } from './normalize-header-footer-fragments import { createPaginator, type PageState, type ConstraintBoundary } from './paginator.js'; import { formatPageNumber } from './pageNumbering.js'; import { shouldSuppressSpacingForEmpty, shouldSuppressOwnSpacing } from './layout-utils.js'; -import { balancePageColumns } from './column-balancing.js'; +import { balanceSectionOnPage, type BalancingFragment, type MeasureData } from './column-balancing.js'; import { cloneColumnLayout, widthsEqual } from './column-utils.js'; type PageSize = { w: number; h: number }; @@ -1488,6 +1488,43 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options ); }; + // Build shared maps for column balancing. These are consumed both mid-layout + // (at continuous section-break boundaries) and post-layout (per-section final + // page), so we construct them once here rather than rebuilding in each pass. + const balancingMeasureMap = new Map(); + const blockSectionMap = new Map(); + const sectionColumnsMap = new Map(); + const sectionHasExplicitColumnBreak = new Set(); + // Tracks sections already balanced mid-page — the post-layout pass skips these + // to avoid double-balancing, which would overlap fragments at the same x/y. + const alreadyBalancedSections = new Set(); + // Walk blocks in document order. sectionBreak blocks carry attrs.sectionIndex and + // are emitted BEFORE the first paragraph of their section (see pm-adapter). Every + // subsequent content block belongs to that section until the next sectionBreak, + // so we track currentSectionIdx and stamp it on each block. This is required because + // pm-adapter only sets attrs.sectionIndex on sectionBreak blocks, not paragraphs. + let currentSectionIdx: number | null = null; + blocks.forEach((block, idx) => { + const measure = measures[idx]; + if (measure) { + balancingMeasureMap.set(block.id, measure as MeasureData); + } + const blockWithAttrs = block as { attrs?: { sectionIndex?: number } }; + const attrSectionIdx = blockWithAttrs.attrs?.sectionIndex; + if (block.kind === 'sectionBreak' && typeof attrSectionIdx === 'number') { + currentSectionIdx = attrSectionIdx; + if (block.columns) { + sectionColumnsMap.set(attrSectionIdx, cloneColumnLayout(block.columns)); + } + } + if (currentSectionIdx !== null) { + blockSectionMap.set(block.id, currentSectionIdx); + if (block.kind === 'columnBreak') { + sectionHasExplicitColumnBreak.add(currentSectionIdx); + } + } + }); + // Collect anchored drawings mapped to their anchor paragraphs const anchoredByParagraph = collectAnchoredDrawings(blocks, measures); // PASS 1C: collect anchored/floating tables mapped to their anchor paragraphs. @@ -1832,6 +1869,61 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options state = paginator.startNewPage(); } + // Balance the ending section's fragments on this page BEFORE starting the + // new region. Word produces a minimum-height section, then places the next + // region just below the balanced columns. Without this, columns fill + // top-to-bottom and the next region starts far below where Word would place it. + // + // `activeSectionIndex` only updates at page boundaries, so for continuous + // mid-page section breaks it's stale. Instead, look at the current page's + // fragments and find the most recent section index — that's the section + // that's ending. Usually this is `metadataIndex - 1` (sections are sequential), + // but using blockSectionMap handles non-sequential indices too. + let endingSectionIndex: number | null = null; + for (let i = state.page.fragments.length - 1; i >= 0; i--) { + const mapped = blockSectionMap.get(state.page.fragments[i].blockId); + if (typeof mapped === 'number' && mapped !== metadataIndex) { + endingSectionIndex = mapped; + break; + } + } + const endingSectionColumns = + endingSectionIndex !== null ? sectionColumnsMap.get(endingSectionIndex) : undefined; + if (endingSectionIndex !== null && endingSectionColumns && endingSectionColumns.count > 1) { + // The current region starts at the last constraint boundary's Y, or at + // the page's top margin if no mid-page region change has happened yet. + const lastBoundary = state.constraintBoundaries[state.constraintBoundaries.length - 1]; + const activeRegionTop = lastBoundary?.y ?? activeTopMargin; + const availableHeight = activePageSize.h - activeBottomMargin - activeRegionTop; + const contentWidth = activePageSize.w - (activeLeftMargin + activeRightMargin); + const normalized = normalizeColumns(endingSectionColumns, contentWidth); + const balanceResult = balanceSectionOnPage({ + fragments: state.page.fragments as BalancingFragment[], + sectionIndex: endingSectionIndex, + sectionColumns: { + count: normalized.count, + gap: normalized.gap, + width: normalized.width, + widths: endingSectionColumns.widths, + equalWidth: endingSectionColumns.equalWidth, + }, + sectionHasExplicitColumnBreak: sectionHasExplicitColumnBreak.has(endingSectionIndex), + blockSectionMap, + margins: { left: activeLeftMargin }, + topMargin: activeRegionTop, + columnWidth: normalized.width, + availableHeight, + measureMap: balancingMeasureMap, + }); + if (balanceResult) { + // Collapse both cursors to the balanced section bottom so the new + // region starts there, not below an unbalanced tallest column. + state.cursorY = balanceResult.maxY; + state.maxCursorY = balanceResult.maxY; + alreadyBalancedSections.add(endingSectionIndex); + } + } + startMidPageRegion(state, newColumns); } @@ -2438,109 +2530,53 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } } - // Apply column balancing to pages with multi-column layout. - // This redistributes fragments to achieve balanced column heights, matching Word's behavior. - if (activeColumns.count > 1) { - const contentWidth = pageSize.w - (activeLeftMargin + activeRightMargin); - const normalizedCols = normalizeColumns(activeColumns, contentWidth); - - // Build measure map for fragment height calculation during balancing - const measureMap = new Map; height?: number }>(); - // Build blockId -> sectionIndex map to filter fragments by section - const blockSectionMap = new Map(); - const sectionColumnsMap = new Map(); - blocks.forEach((block, idx) => { - const measure = measures[idx]; - if (measure) { - measureMap.set(block.id, measure as { kind: string; lines?: Array<{ lineHeight: number }>; height?: number }); - } - // Track section index for each block (for filtering during balancing) - // Not all block types have attrs, so access it safely - const blockWithAttrs = block as { attrs?: { sectionIndex?: number } }; - const sectionIdx = blockWithAttrs.attrs?.sectionIndex; - if (typeof sectionIdx === 'number') { - blockSectionMap.set(block.id, sectionIdx); - if (block.kind === 'sectionBreak' && block.columns) { - sectionColumnsMap.set(sectionIdx, cloneColumnLayout(block.columns)); - } - } - }); - - for (const page of pages) { - // Balance the last page (section ends at document end). - // TODO: Track section boundaries and balance at each continuous section break. - if (page === pages[pages.length - 1] && page.fragments.length > 0) { - const finalSectionColumns = sectionColumnsMap.get(activeSectionIndex) ?? activeColumns; - // Word does not rebalance the final page for sections that use explicit - // per-column widths. Preserve the natural left-to-right fill order there. - const hasExplicitColumnWidths = - finalSectionColumns?.equalWidth === false && - Array.isArray(finalSectionColumns.widths) && - finalSectionColumns.widths.length > 0; - - if (hasExplicitColumnWidths) { - continue; - } - - // Skip balancing if fragments are already in multiple columns (e.g., explicit column breaks). - // Balancing should only apply when all content flows naturally in column 0. - const uniqueXPositions = new Set(page.fragments.map((f) => Math.round(f.x))); - const hasExplicitColumnStructure = uniqueXPositions.size > 1; - - if (hasExplicitColumnStructure) { - continue; - } - - // Skip balancing if fragments have different widths (indicating different column configs - // from multiple sections). Balancing would incorrectly apply the final section's width to all. - const uniqueWidths = new Set(page.fragments.map((f) => Math.round(f.width))); - const hasMixedColumnWidths = uniqueWidths.size > 1; - - if (hasMixedColumnWidths) { - continue; - } - - // Check if page has content from multiple sections. - // If so, only balance fragments from the final multi-column section. - const fragmentSections = new Set(); - for (const f of page.fragments) { - const section = blockSectionMap.get(f.blockId); - if (section !== undefined) { - fragmentSections.add(section); - } - } - - // Only balance fragments from the final section when there are mixed sections - const hasMixedSections = fragmentSections.size > 1; - const fragmentsToBalance = hasMixedSections - ? page.fragments.filter((f) => { - const fragSection = blockSectionMap.get(f.blockId); - return fragSection === activeSectionIndex; - }) - : page.fragments; - - if (fragmentsToBalance.length > 0) { - const availableHeight = pageSize.h - activeBottomMargin - activeTopMargin; - balancePageColumns( - fragmentsToBalance as { - x: number; - y: number; - width: number; - kind: string; - blockId: string; - fromLine?: number; - toLine?: number; - height?: number; - }[], - normalizedCols, - { left: activeLeftMargin }, - activeTopMargin, - availableHeight, - measureMap, - ); - } + // Apply column balancing per section. For each section with a multi-column layout, + // find the final page that carries any of its fragments and balance those fragments. + // Earlier pages of a multi-page section are always fully filled (content overflowed + // to reach them), so balancing is a no-op there. This replaces the previous + // "last page of document" heuristic with proper per-section balancing — required + // to match Word's behavior when a document has multiple multi-column sections + // separated by continuous or next-page breaks. + // + // Mid-page continuous breaks are handled in the layout loop itself (see the + // forceMidPageRegion branch above). This post-layout pass handles sections that + // end at a page boundary or at document end. + const contentWidth = pageSize.w - (activeLeftMargin + activeRightMargin); + for (const [sectionIdx, sectionCols] of sectionColumnsMap) { + if (sectionCols.count <= 1) continue; + if (sectionHasExplicitColumnBreak.has(sectionIdx)) continue; + if (alreadyBalancedSections.has(sectionIdx)) continue; + + // Find the last page carrying any fragments from this section. + let lastPageForSection: (typeof pages)[number] | null = null; + for (const p of pages) { + if (p.fragments.some((f) => blockSectionMap.get(f.blockId) === sectionIdx)) { + lastPageForSection = p; } } + if (!lastPageForSection) continue; + + const normalized = normalizeColumns(sectionCols, contentWidth); + const availableHeight = pageSize.h - activeBottomMargin - activeTopMargin; + + balanceSectionOnPage({ + fragments: lastPageForSection.fragments as BalancingFragment[], + sectionIndex: sectionIdx, + sectionColumns: { + count: normalized.count, + gap: normalized.gap, + width: normalized.width, + widths: sectionCols.widths, + equalWidth: sectionCols.equalWidth, + }, + sectionHasExplicitColumnBreak: false, // already filtered above + blockSectionMap, + margins: { left: activeLeftMargin }, + topMargin: activeTopMargin, + columnWidth: normalized.width, + availableHeight, + measureMap: balancingMeasureMap, + }); } // Serialize constraint boundaries into page.columnRegions so DomPainter can From e4265964d6f9f913b0c80926cc5a93dd7016605d Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Mon, 20 Apr 2026 15:08:15 -0300 Subject: [PATCH 02/12] fix(layout-engine): balance before forced page break on col-count reduction (SD-2452) When a mid-page section break reduced the column count (e.g. 2-col -> 1-col for test 4's 13-paragraph fixture followed by OVERLAP CHECK), the mid-page hook's forced-page-break guard ran before balancing: if (columnIndexBefore >= newColumns.count) { state = paginator.startNewPage(); } // ... balance ran here, on the empty new page At the section transition, columnIndexBefore=1 (paginator was in col 1) and newColumns.count=1, so the guard forced a new page before balancing had a chance to reposition the ending section's fragments. Balancing then ran on the empty new page (no-op), the paginator placed the post-columns single-column content on the new page, and the old page's fragments were balanced by the post-layout pass. Net effect: columns looked correct on page 0 but OVERLAP CHECK ended up on page 1, while Word fits everything on one page. The guard exists to prevent new 1-col content from overwriting earlier column content on the same page. With balancing, that risk disappears: all ending-section fragments are repositioned within the section's own vertical region, and the cursor moves to maxY below the balanced columns. The new region starts safely below. Fix: balance first. Only fall through to the forced-page-break guard when the ending section won't be balanced (single-col -> multi-col, explicit column break, or no section-1 fragments on the page). Test 4 now renders on a single page, matching Word: - 7+6 balanced columns - OVERLAP CHECK heading at y=758 (right below columns) - "If this overlaps..." at y=794 - Total: 1 page (was 2) All 5 SD-2452 fixtures now match Word's pagination exactly. 614 layout-engine tests still pass. --- .../layout-engine/layout-engine/src/index.ts | 53 +++++++++++-------- 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index d5ff50b28c..8776c0cfd4 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -1863,22 +1863,11 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options const columnIndexBefore = state.columnIndex; const newColumns = updatedState.pendingColumns; - // If reducing column count and currently in a column that won't exist - // in the new layout, start a fresh page to avoid overwriting earlier columns - if (columnIndexBefore >= newColumns.count) { - state = paginator.startNewPage(); - } - - // Balance the ending section's fragments on this page BEFORE starting the - // new region. Word produces a minimum-height section, then places the next - // region just below the balanced columns. Without this, columns fill - // top-to-bottom and the next region starts far below where Word would place it. - // + // Identify the ending section from the current page's fragments. // `activeSectionIndex` only updates at page boundaries, so for continuous - // mid-page section breaks it's stale. Instead, look at the current page's - // fragments and find the most recent section index — that's the section - // that's ending. Usually this is `metadataIndex - 1` (sections are sequential), - // but using blockSectionMap handles non-sequential indices too. + // mid-page section breaks it's stale. Walk back through page fragments + // to find the most recent section index that isn't the new one — that's + // the section that's ending. let endingSectionIndex: number | null = null; for (let i = state.page.fragments.length - 1; i >= 0; i--) { const mapped = blockSectionMap.get(state.page.fragments[i].blockId); @@ -1889,25 +1878,40 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options } const endingSectionColumns = endingSectionIndex !== null ? sectionColumnsMap.get(endingSectionIndex) : undefined; - if (endingSectionIndex !== null && endingSectionColumns && endingSectionColumns.count > 1) { + const willBalance = + endingSectionIndex !== null && + !!endingSectionColumns && + endingSectionColumns.count > 1 && + !sectionHasExplicitColumnBreak.has(endingSectionIndex); + + // Balance BEFORE any forced page break. After balancing, all of the + // ending section's fragments are repositioned within the section's own + // vertical region — there's no risk of the new 1-col region overwriting + // prior column content, because the cursor moves to maxY below them. + // + // When not balancing (e.g. single-col → multi-col, or explicit column + // break), fall back to the original "force a new page if currently in a + // column that won't exist after the change" guard so new content doesn't + // overwrite earlier column positions on the same page. + if (willBalance) { // The current region starts at the last constraint boundary's Y, or at // the page's top margin if no mid-page region change has happened yet. const lastBoundary = state.constraintBoundaries[state.constraintBoundaries.length - 1]; const activeRegionTop = lastBoundary?.y ?? activeTopMargin; const availableHeight = activePageSize.h - activeBottomMargin - activeRegionTop; const contentWidth = activePageSize.w - (activeLeftMargin + activeRightMargin); - const normalized = normalizeColumns(endingSectionColumns, contentWidth); + const normalized = normalizeColumns(endingSectionColumns!, contentWidth); const balanceResult = balanceSectionOnPage({ fragments: state.page.fragments as BalancingFragment[], - sectionIndex: endingSectionIndex, + sectionIndex: endingSectionIndex!, sectionColumns: { count: normalized.count, gap: normalized.gap, width: normalized.width, - widths: endingSectionColumns.widths, - equalWidth: endingSectionColumns.equalWidth, + widths: endingSectionColumns!.widths, + equalWidth: endingSectionColumns!.equalWidth, }, - sectionHasExplicitColumnBreak: sectionHasExplicitColumnBreak.has(endingSectionIndex), + sectionHasExplicitColumnBreak: false, blockSectionMap, margins: { left: activeLeftMargin }, topMargin: activeRegionTop, @@ -1920,8 +1924,13 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options // region starts there, not below an unbalanced tallest column. state.cursorY = balanceResult.maxY; state.maxCursorY = balanceResult.maxY; - alreadyBalancedSections.add(endingSectionIndex); + alreadyBalancedSections.add(endingSectionIndex!); } + } else if (columnIndexBefore >= newColumns.count) { + // Non-balancing case: reducing column count without balancing means + // starting the new region at col 0 could overwrite earlier column + // content. Force a fresh page to avoid that. + state = paginator.startNewPage(); } startMidPageRegion(state, newColumns); From d846a239cefcba51d60d180008295543884bc55c Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 21:14:06 -0300 Subject: [PATCH 03/12] fix(pm-adapter): emit section break before non-paragraph nodes (SD-2646) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per ECMA-376 §17.6.17, a inside a paragraph defines the section that ENDS with that paragraph. All body children preceding it — paragraphs, tables, top-level drawings, SDTs — belong to that section. Section ranges were indexed purely by paragraph count, and section-break blocks were emitted only inside handleParagraphNode. A table that sat between two sectPr-marker paragraphs was emitted into the flow stream BEFORE the section break that declared its column config, so the layout engine laid it out under the prior section's settings. This is the root cause of IT-945 rendering a 114-row 2-col continuous table in column 0 across three pages with column 1 empty: the table was placed in the 1-col section, not the 2-col section. Fix: - Track nodeIndex over every top-level doc.content child in findParagraphsWithSectPr and SectionRange (alongside paragraphIndex, which SDT handlers still use for intra-SDT transitions). - Add maybeEmitNextSectionBreakForNode in sections/breaks.ts and call it from internal.ts's main dispatch loop BEFORE every top-level handler. Any non-paragraph node crossing a section boundary now triggers the break. - Section-model primer in pm-adapter/README.md with spec citations. Tests: 1739/1739 pass in pm-adapter (including new end-tagged.test.ts and integration test in index.test.ts asserting flow-block order). --- packages/layout-engine/pm-adapter/README.md | 32 +++++ .../pm-adapter/src/index.test.ts | 66 ++++++++++ .../pm-adapter/src/internal.test.ts | 4 + .../layout-engine/pm-adapter/src/internal.ts | 22 +++- .../pm-adapter/src/sections/analysis.test.ts | 49 +++++--- .../pm-adapter/src/sections/analysis.ts | 62 +++++++-- .../pm-adapter/src/sections/breaks.ts | 40 ++++++ .../src/sections/end-tagged.test.ts | 118 ++++++++++++++++++ .../pm-adapter/src/sections/index.ts | 1 + .../pm-adapter/src/sections/types.ts | 15 ++- .../layout-engine/pm-adapter/src/types.d.ts | 1 + .../layout-engine/pm-adapter/src/types.ts | 7 ++ 12 files changed, 381 insertions(+), 36 deletions(-) create mode 100644 packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts diff --git a/packages/layout-engine/pm-adapter/README.md b/packages/layout-engine/pm-adapter/README.md index 27c0d1591c..e0653df917 100644 --- a/packages/layout-engine/pm-adapter/README.md +++ b/packages/layout-engine/pm-adapter/README.md @@ -23,3 +23,35 @@ Notes: - Handles lists, tables, images, vector shapes, SDT (TOC, structured content, doc parts), tracked changes, hyperlinks. - Consumes `@superdoc/style-engine` defaults and locale/tab interval hints from the PM document attrs. - `@superdoc/measuring-dom` is only a dependency for types; measurement happens later in the pipeline. + +## Section model (ECMA-376 §17.6, §17.18.77) + +Word uses **end-tagged** section semantics: a `` inside a paragraph defines the section that **ends with** that paragraph. The body-level `` defines the final section. All body children preceding a section-terminating paragraph — other paragraphs, **tables**, top-level drawings, SDT wrappers — belong to the section whose sectPr follows them. + +``` + + This is section A's first paragraph + + (section A props: 1-col, nextPage) + + ... ← section B (the NEXT sectPr) + + (section B props: 2-col continuous) + + This is section C's paragraph + (section C props: body-level) + +``` + +Section ranges are computed in `sections/analysis.ts` with two parallel indices: + +| Index | What it counts | Used by | +|---|---|---| +| `startParagraphIndex` / `endParagraphIndex` | Paragraph nodes only (including those inside SDT wrappers) | SDT handlers for intra-SDT section transitions | +| `startNodeIndex` / `endNodeIndex` | Every top-level `doc.content` child — paragraphs, tables, top-level drawings, SDTs | Main dispatch loop for inter-node section transitions | + +The main dispatch loop in `internal.ts` calls `maybeEmitNextSectionBreakForNode` BEFORE handling each top-level node. The helper uses `currentNodeIndex === nextSection.startNodeIndex` as its trigger, so non-paragraph nodes (tables especially) correctly get the sectionBreak emitted before them when they cross a section boundary. This is the fix for SD-2646. + +**When adding a new top-level block kind** (e.g., a new SDT type or a top-level drawing kind): do nothing. The dispatch-level hook covers you automatically. You only need a per-handler section check if your handler descends into children that carry their own sectPr markers — see the SDT handlers (`sdt/bibliography.ts`, `sdt/document-index.ts`, `sdt/table-of-authorities.ts`) for the intra-SDT pattern. + +Continuous section breaks (``) carry an extra requirement from §17.18.77: they "balance the content of the previous section." The balancing itself is the layout engine's responsibility (see `layout-engine/src/column-balancing.ts`); the adapter's job is only to ensure the right blocks are in the right section. diff --git a/packages/layout-engine/pm-adapter/src/index.test.ts b/packages/layout-engine/pm-adapter/src/index.test.ts index 65d5456486..c970677ea3 100644 --- a/packages/layout-engine/pm-adapter/src/index.test.ts +++ b/packages/layout-engine/pm-adapter/src/index.test.ts @@ -1269,6 +1269,72 @@ describe('toFlowBlocks', () => { [first, second, third].forEach((b) => expect(b?.type).toBe('continuous')); }); }); + + describe('end-tagged section membership for non-paragraph nodes (SD-2646, ECMA-376 §17.6.17)', () => { + it('emits the next section break BEFORE a table that sits between two sectPr-marker paragraphs', () => { + // IT-945 shape: table lives between the paragraph that ends section A + // and the paragraph that ends section B. Per §17.6.17 the table + // belongs to section B, so the sectionBreak introducing B's columns + // must precede the table in the flow stream. + const pmDoc: PMNode = { + type: 'doc', + attrs: { bodySectPr: createTestBodySectPr() }, + content: [ + { type: 'paragraph', content: [{ type: 'text', text: 'This is my first section' }] }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'nextPage' } }, + { name: 'w:cols', attributes: { 'w:num': '1', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }] }, + { type: 'tableCell', content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }] }, + ], + }, + ], + }, + { + type: 'paragraph', + attrs: { + paragraphProperties: { + sectPr: { + elements: [ + { name: 'w:type', attributes: { 'w:val': 'continuous' } }, + { name: 'w:cols', attributes: { 'w:num': '2', 'w:space': '720' } }, + ], + }, + }, + }, + content: [], + }, + { type: 'paragraph', content: [{ type: 'text', text: 'This is my third section' }] }, + ], + } as never; + + const { blocks } = toFlowBlocks(pmDoc, { emitSectionBreaks: true }); + + const tableIndex = blocks.findIndex((b) => b.kind === 'table'); + const twoColBreakIndex = blocks.findIndex((b) => b.kind === 'sectionBreak' && b.columns?.count === 2); + expect(tableIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeGreaterThan(-1); + expect(twoColBreakIndex).toBeLessThan(tableIndex); + }); + }); }); describe('block id prefixing', () => { diff --git a/packages/layout-engine/pm-adapter/src/internal.test.ts b/packages/layout-engine/pm-adapter/src/internal.test.ts index e1972f240b..4c4eda6863 100644 --- a/packages/layout-engine/pm-adapter/src/internal.test.ts +++ b/packages/layout-engine/pm-adapter/src/internal.test.ts @@ -59,6 +59,10 @@ vi.mock('./sections/index.js', () => { id: nextBlockId('sectionBreak'), type: section.type, })), + maybeEmitNextSectionBreakForNode: vi.fn(() => { + // Mocked as no-op: this test file already provides zero section ranges + // via analyzeSectionRanges, so there is never a break to emit. + }), publishSectionMetadata: vi.fn(), }; }); diff --git a/packages/layout-engine/pm-adapter/src/internal.ts b/packages/layout-engine/pm-adapter/src/internal.ts index 4ffd9da91d..28a49158ca 100644 --- a/packages/layout-engine/pm-adapter/src/internal.ts +++ b/packages/layout-engine/pm-adapter/src/internal.ts @@ -12,7 +12,12 @@ import type { FlowBlock, ParagraphBlock } from '@superdoc/contracts'; import { isValidTrackedMode } from './tracked-changes.js'; -import { analyzeSectionRanges, createSectionBreakBlock, publishSectionMetadata } from './sections/index.js'; +import { + analyzeSectionRanges, + createSectionBreakBlock, + maybeEmitNextSectionBreakForNode, + publishSectionMetadata, +} from './sections/index.js'; import { normalizePrefix, buildPositionMap, createBlockIdGenerator } from './utilities.js'; import { paragraphToFlowBlocks, @@ -201,6 +206,7 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): ranges: sectionRanges, currentSectionIndex: 0, currentParagraphIndex: 0, + currentNodeIndex: 0, }, converters, themeColors, @@ -209,12 +215,24 @@ export function toFlowBlocks(pmDoc: PMNode | object, options?: AdapterOptions): trackedListLastOrdinals: new Map(), }; - // Process nodes using handler dispatch pattern + // Process nodes using handler dispatch pattern. Before each top-level node + // we flush any pending section break whose `startNodeIndex` matches the + // current position — this keeps non-paragraph nodes (tables, top-level + // drawings) in their correct end-tagged section per ECMA-376 §17.6.17. doc.content.forEach((node) => { + maybeEmitNextSectionBreakForNode({ + sectionState: handlerContext.sectionState!, + nextBlockId, + pushBlock: (block) => { + blocks.push(block); + recordBlockKind(block.kind); + }, + }); const handler = nodeHandlers[node.type]; if (handler) { handler(node, handlerContext); } + handlerContext.sectionState!.currentNodeIndex++; }); // Ensure final body section is emitted only if not already emitted during paragraph processing. diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts index ac860ce7e6..97b6b5b2d7 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.test.ts @@ -296,8 +296,9 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 1, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(2); }); it('should find multiple paragraphs with sectPr', () => { @@ -315,12 +316,13 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(2); - expect(result.paragraphs[0]).toEqual({ index: 0, node: para1 }); - expect(result.paragraphs[1]).toEqual({ index: 2, node: para3 }); + expect(result.paragraphs[0]).toEqual({ index: 0, nodeIndex: 0, node: para1 }); + expect(result.paragraphs[1]).toEqual({ index: 2, nodeIndex: 2, node: para3 }); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(3); }); - it('should skip non-paragraph nodes', () => { + it('should skip non-paragraph nodes for paragraphIndex but count them in nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; @@ -334,11 +336,15 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); expect(result.paragraphs).toHaveLength(1); - expect(result.paragraphs[0]).toEqual({ index: 1, node: para2 }); + // paragraphIndex = 1 (second paragraph), nodeIndex = 2 (third top-level child + // after the heading). Non-paragraph top-level children count toward nodeIndex + // because ECMA-376 §17.6.17 sections span all body children. + expect(result.paragraphs[0]).toEqual({ index: 1, nodeIndex: 2, node: para2 }); expect(result.totalCount).toBe(2); + expect(result.totalNodeCount).toBe(4); }); - it('should include paragraphs inside index nodes', () => { + it('should include paragraphs inside index nodes and share the SDT nodeIndex', () => { const para1 = { type: 'paragraph', content: [] }; const para2 = { type: 'paragraph', content: [] }; const para3 = { type: 'paragraph', content: [] }; @@ -352,8 +358,11 @@ describe('analysis', () => { const result = findParagraphsWithSectPr(doc); - expect(result.paragraphs).toEqual([{ index: 1, node: para2 }]); + // para2 is inside the SDT (index node) at top-level nodeIndex 1. + // paragraphIndex 1 still reflects paragraph order across descent. + expect(result.paragraphs).toEqual([{ index: 1, nodeIndex: 1, node: para2 }]); expect(result.totalCount).toBe(3); + expect(result.totalNodeCount).toBe(2); }); }); @@ -385,7 +394,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue(null); @@ -408,7 +417,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr', elements: [] }; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -450,8 +459,8 @@ describe('analysis', () => { }; const paragraphs = [ - { index: 2, node: para1 }, - { index: 5, node: para2 }, + { index: 2, nodeIndex: 2, node: para1 }, + { index: 5, nodeIndex: 5, node: para2 }, ]; const sectPr: SectPrElement = { type: 'element', name: 'w:sectPr' }; @@ -487,7 +496,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.NEXT_PAGE, @@ -512,7 +521,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mock section data with page margins but no header/footer margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -550,7 +559,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only header margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -583,7 +592,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only footer margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -616,7 +625,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Only top margin specified vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -649,7 +658,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Explicit zero values for all margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -688,7 +697,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; // Mix of header/footer and page margins vi.mocked(extractionModule.extractSectionData).mockReturnValue({ @@ -725,7 +734,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ titlePg: false, @@ -747,7 +756,7 @@ describe('analysis', () => { }, }; - const paragraphs = [{ index: 0, node: para1 }]; + const paragraphs = [{ index: 0, nodeIndex: 0, node: para1 }]; vi.mocked(extractionModule.extractSectionData).mockReturnValue({ type: SectionType.CONTINUOUS, diff --git a/packages/layout-engine/pm-adapter/src/sections/analysis.ts b/packages/layout-engine/pm-adapter/src/sections/analysis.ts index 3a0cae9013..73905c2c84 100644 --- a/packages/layout-engine/pm-adapter/src/sections/analysis.ts +++ b/packages/layout-engine/pm-adapter/src/sections/analysis.ts @@ -65,15 +65,27 @@ export function shouldIgnoreSectionBreak( /** * Find all paragraphs in the document that contain sectPr elements. * + * Records two indices per match: + * - `paragraphIndex` counts only paragraph nodes (including those nested + * inside SDT wrappers), matching the long-standing contract callers use + * for SDT-internal section transitions. + * - `nodeIndex` counts every top-level `doc.content` child (paragraph, + * table, top-level drawing, SDT, …). This is required to fix SD-2646: + * a non-paragraph node between two sectPr markers belongs to the LATER + * marker's section per ECMA-376 §17.6.17, so the dispatch loop must + * know each section's bounds in top-level-node terms. + * * @param doc - ProseMirror document node - * @returns Object containing paragraphs with sectPr and total paragraph count + * @returns Paragraph matches plus both totals */ export function findParagraphsWithSectPr(doc: PMNode): { - paragraphs: Array<{ index: number; node: PMNode }>; + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>; totalCount: number; + totalNodeCount: number; } { - const paragraphs: Array<{ index: number; node: PMNode }> = []; + const paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }> = []; let paragraphIndex = 0; + let nodeIndex = 0; const getNodeChildren = (node: PMNode): PMNode[] => { if (Array.isArray(node.content)) return node.content; const content = node.content as { forEach?: (cb: (child: PMNode) => void) => void } | undefined; @@ -87,27 +99,30 @@ export function findParagraphsWithSectPr(doc: PMNode): { return []; }; - const visitNode = (node: PMNode): void => { + const visitNode = (node: PMNode, outerNodeIndex: number): void => { if (node.type === 'paragraph') { if (hasSectPr(node)) { - paragraphs.push({ index: paragraphIndex, node }); + paragraphs.push({ index: paragraphIndex, nodeIndex: outerNodeIndex, node }); } paragraphIndex++; return; } if (node.type === 'index' || node.type === 'bibliography' || node.type === 'tableOfAuthorities') { - getNodeChildren(node).forEach(visitNode); + // SDT descendants share the outer SDT's nodeIndex — dispatch-level + // section transitions fire on the SDT as a whole, not on its children. + getNodeChildren(node).forEach((child) => visitNode(child, outerNodeIndex)); } }; if (doc.content) { for (const node of doc.content) { - visitNode(node); + visitNode(node, nodeIndex); + nodeIndex++; } } - return { paragraphs, totalCount: paragraphIndex }; + return { paragraphs, totalCount: paragraphIndex, totalNodeCount: nodeIndex }; } /** @@ -128,11 +143,12 @@ export function findParagraphsWithSectPr(doc: PMNode): { * @returns Array of section ranges */ export function buildSectionRangesFromParagraphs( - paragraphs: Array<{ index: number; node: PMNode }>, + paragraphs: Array<{ index: number; nodeIndex: number; node: PMNode }>, hasBodySectPr: boolean, ): SectionRange[] { const ranges: SectionRange[] = []; let currentStart = 0; + let currentStartNode = 0; paragraphs.forEach((item, idx) => { if (shouldIgnoreSectionBreak(item.node, idx, paragraphs.length, hasBodySectPr)) { @@ -154,6 +170,8 @@ export function buildSectionRangesFromParagraphs( const range: SectionRange = { sectionIndex: idx, + startNodeIndex: currentStartNode, + endNodeIndex: item.nodeIndex, startParagraphIndex: currentStart, endParagraphIndex: item.index, sectPr, @@ -180,6 +198,7 @@ export function buildSectionRangesFromParagraphs( ranges.push(range); currentStart = item.index + 1; + currentStartNode = item.nodeIndex + 1; }); return ranges; @@ -228,6 +247,7 @@ export function createFinalSectionFromBodySectPr( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange | null { const clampedStart = Math.max(0, Math.min(currentStart, Math.max(totalParagraphs - 1, 0))); @@ -251,8 +271,15 @@ export function createFinalSectionFromBodySectPr( bodySectionData.bottomPx != null || bodySectionData.leftPx != null; + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds + ? Math.max(0, Math.min(nodeBounds.startNodeIndex, Math.max(totalNodes - 1, 0))) + : clampedStart; + return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: clampedStart, endParagraphIndex: totalParagraphs - 1, sectPr: bodySectPr, @@ -291,9 +318,14 @@ export function createDefaultFinalSection( currentStart: number, totalParagraphs: number, sectionIndex: number, + nodeBounds?: { startNodeIndex: number; totalNodeCount: number }, ): SectionRange { + const totalNodes = nodeBounds?.totalNodeCount ?? totalParagraphs; + const startNodeIndex = nodeBounds?.startNodeIndex ?? currentStart; return { sectionIndex, + startNodeIndex, + endNodeIndex: Math.max(startNodeIndex, totalNodes - 1), startParagraphIndex: currentStart, endParagraphIndex: totalParagraphs - 1, sectPr: null, @@ -318,11 +350,13 @@ export function createDefaultFinalSection( * @returns Array of section ranges with backward-looking semantics */ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): SectionRange[] { - const { paragraphs, totalCount } = findParagraphsWithSectPr(doc); + const { paragraphs, totalCount, totalNodeCount } = findParagraphsWithSectPr(doc); const hasBody = Boolean(bodySectPr); const ranges = buildSectionRangesFromParagraphs(paragraphs, hasBody); - const currentStart = ranges.length > 0 ? ranges[ranges.length - 1].endParagraphIndex + 1 : 0; + const last = ranges[ranges.length - 1]; + const currentStart = last ? last.endParagraphIndex + 1 : 0; + const currentStartNode = last ? last.endNodeIndex + 1 : 0; // Always represent the final section defined by bodySectPr, even if there are // no remaining paragraphs after the last paragraph-level sectPr. This ensures @@ -333,12 +367,16 @@ export function analyzeSectionRanges(doc: PMNode, bodySectPr?: unknown): Section Math.min(currentStart, totalCount), totalCount, ranges.length, + { startNodeIndex: Math.min(currentStartNode, totalNodeCount), totalNodeCount }, ); if (finalSection) { ranges.push(finalSection); } } else if (ranges.length > 0) { - const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length); + const fallbackFinal = createDefaultFinalSection(Math.min(currentStart, totalCount), totalCount, ranges.length, { + startNodeIndex: Math.min(currentStartNode, totalNodeCount), + totalNodeCount, + }); if (fallbackFinal) { fallbackFinal.type = DEFAULT_PARAGRAPH_SECTION_TYPE; ranges.push(fallbackFinal); diff --git a/packages/layout-engine/pm-adapter/src/sections/breaks.ts b/packages/layout-engine/pm-adapter/src/sections/breaks.ts index 8501b2230a..8a05383174 100644 --- a/packages/layout-engine/pm-adapter/src/sections/breaks.ts +++ b/packages/layout-engine/pm-adapter/src/sections/breaks.ts @@ -190,3 +190,43 @@ export function shouldRequirePageBoundary(current: SectionRange, next: SectionRa export function hasIntrinsicBoundarySignals(_: SectionRange): boolean { return false; } + +/** + * Emit the next section's sectionBreak block if the dispatch loop has reached + * that section's starting top-level node index. + * + * ECMA-376 §17.6.17: a section is defined by its end-tagged ``. All + * body children preceding that tag — paragraphs, tables, top-level drawings — + * belong to the section that ENDS at the tag. This helper fires BEFORE any + * such node so the appropriate section config is active by the time the node + * is laid out. + * + * This centralises the check that previously lived (duplicated) inside + * `handleParagraphNode` and the SDT handlers. Calling it from the main + * dispatch loop covers every top-level node type — present and future — + * with no per-handler opt-in. + */ +export function maybeEmitNextSectionBreakForNode(args: { + sectionState: { + ranges: SectionRange[]; + currentSectionIndex: number; + currentNodeIndex: number; + }; + nextBlockId: BlockIdGenerator; + pushBlock: (block: SectionBreakBlock) => void; +}): void { + const { sectionState, nextBlockId, pushBlock } = args; + if (sectionState.ranges.length === 0) return; + if (sectionState.currentSectionIndex >= sectionState.ranges.length - 1) return; + + const nextSection = sectionState.ranges[sectionState.currentSectionIndex + 1]; + if (!nextSection) return; + if (sectionState.currentNodeIndex !== nextSection.startNodeIndex) return; + + const currentSection = sectionState.ranges[sectionState.currentSectionIndex]; + const requiresPageBoundary = + shouldRequirePageBoundary(currentSection, nextSection) || hasIntrinsicBoundarySignals(nextSection); + const extraAttrs = requiresPageBoundary ? { requirePageBoundary: true } : undefined; + pushBlock(createSectionBreakBlock(nextSection, nextBlockId, extraAttrs)); + sectionState.currentSectionIndex++; +} diff --git a/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts new file mode 100644 index 0000000000..8d7cf42210 --- /dev/null +++ b/packages/layout-engine/pm-adapter/src/sections/end-tagged.test.ts @@ -0,0 +1,118 @@ +/** + * @file End-tagged section-membership tests (ECMA-376 §17.6.17, §17.18.77). + * + * OOXML rule: a `` inside a paragraph defines the section that + * ENDS with that paragraph. All body children preceding it (back to the + * previous section-terminating paragraph) belong to that section — + * including tables, drawings, and other non-paragraph nodes. + * + * These tests use real implementations with no module mocks so they assert + * observable behavior of the section analysis, not call patterns. + */ +import { describe, it, expect } from 'vitest'; +import { analyzeSectionRanges } from './analysis.js'; +import type { PMNode } from '../types.js'; +import type { SectPrElement } from './types.js'; + +const sectPr = (options: { + type?: 'continuous' | 'nextPage'; + cols?: number; + colSpace?: number; + pgSz?: { w: number; h: number }; +}): SectPrElement => { + const elements: SectPrElement['elements'] = []; + if (options.type) { + elements!.push({ + type: 'element', + name: 'w:type', + attributes: { 'w:val': options.type }, + }); + } + if (options.pgSz) { + elements!.push({ + type: 'element', + name: 'w:pgSz', + attributes: { 'w:w': String(options.pgSz.w), 'w:h': String(options.pgSz.h) }, + }); + } + elements!.push({ + type: 'element', + name: 'w:cols', + attributes: { + 'w:num': String(options.cols ?? 1), + 'w:space': String(options.colSpace ?? 720), + }, + }); + return { + type: 'element', + name: 'w:sectPr', + elements, + }; +}; + +const paragraph = (marker?: SectPrElement): PMNode => ({ + type: 'paragraph', + attrs: marker ? { paragraphProperties: { sectPr: marker } } : {}, + content: [], +}); + +const table = (): PMNode => ({ + type: 'table', + attrs: {}, + content: [ + { + type: 'tableRow', + content: [ + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'A' }] }], + }, + { + type: 'tableCell', + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'B' }] }], + }, + ], + }, + ], +}); + +describe('analyzeSectionRanges — end-tagged membership for non-paragraph nodes (SD-2646)', () => { + it('places a table between two sectPr markers into the section of the LATER marker', () => { + // Arrange — IT-945 shape: + // p0 "first section" text + // p1(sectPr-A) empty marker, 1-col nextPage → ends Section A + // TABLE belongs to Section B per ECMA-376 §17.6.17 + // p2(sectPr-B) empty marker, 2-col continuous → ends Section B + // p3 "third section" text + // body sectPr final, 1-col continuous → Section C + const markerA = sectPr({ type: 'nextPage', cols: 1 }); + const markerB = sectPr({ type: 'continuous', cols: 2, colSpace: 720 }); + const bodyMarker = sectPr({ type: 'continuous', cols: 1 }); + const doc: PMNode = { + type: 'doc', + attrs: { bodySectPr: bodyMarker }, + content: [paragraph(), paragraph(markerA), table(), paragraph(markerB), paragraph()], + }; + + // Act + const sut = analyzeSectionRanges(doc, bodyMarker); + + // Assert — three sections, with Section B's range spanning the table + expect(sut).toHaveLength(3); + const [sectionA, sectionB, sectionC] = sut; + + // Section A ends at p1 (node index 1). Table at node index 2 is NOT in A. + expect(sectionA.endNodeIndex).toBe(1); + expect(sectionA.columns?.count ?? 1).toBe(1); + + // Section B: starts AFTER Section A (node index 2 — the table), ends at p2 (node index 3). + // The table must belong to Section B, not Section A. + expect(sectionB.startNodeIndex).toBe(2); + expect(sectionB.endNodeIndex).toBe(3); + expect(sectionB.columns?.count).toBe(2); + + // Section C (body): starts at node index 4 (p3). + expect(sectionC.startNodeIndex).toBe(4); + expect(sectionC.columns?.count ?? 1).toBe(1); + }); +}); diff --git a/packages/layout-engine/pm-adapter/src/sections/index.ts b/packages/layout-engine/pm-adapter/src/sections/index.ts index 64b41423fb..d1731f870f 100644 --- a/packages/layout-engine/pm-adapter/src/sections/index.ts +++ b/packages/layout-engine/pm-adapter/src/sections/index.ts @@ -41,4 +41,5 @@ export { isSectionBreakBlock, signaturesEqual, shallowObjectEquals, + maybeEmitNextSectionBreakForNode, } from './breaks.js'; diff --git a/packages/layout-engine/pm-adapter/src/sections/types.ts b/packages/layout-engine/pm-adapter/src/sections/types.ts index a4134b48d0..551d8bcfd8 100644 --- a/packages/layout-engine/pm-adapter/src/sections/types.ts +++ b/packages/layout-engine/pm-adapter/src/sections/types.ts @@ -89,11 +89,22 @@ export type SectionVerticalAlign = 'top' | 'center' | 'bottom' | 'both'; /** * Section range represents a contiguous section in the document. - * Word uses "end-tagged" section semantics: a paragraph's sectPr defines - * properties for the section ENDING at that paragraph, not starting after it. + * + * Word uses "end-tagged" section semantics (ECMA-376 §17.6.17): a paragraph's + * sectPr defines properties for the section ENDING at that paragraph, not + * starting after it. All body children preceding the section-terminating + * paragraph — paragraphs, tables, top-level drawings — belong to that section. + * + * `startNodeIndex`/`endNodeIndex` are computed over every top-level + * `doc.content` child and are the authoritative boundaries for dispatching + * section breaks at emission time. `startParagraphIndex`/`endParagraphIndex` + * are retained for callers (SDT handlers) that count only paragraphs during + * recursive descent. */ export interface SectionRange { sectionIndex: number; + startNodeIndex: number; + endNodeIndex: number; startParagraphIndex: number; endParagraphIndex: number; sectPr: SectPrElement | null; diff --git a/packages/layout-engine/pm-adapter/src/types.d.ts b/packages/layout-engine/pm-adapter/src/types.d.ts index 13de3db49f..2285e17fcd 100644 --- a/packages/layout-engine/pm-adapter/src/types.d.ts +++ b/packages/layout-engine/pm-adapter/src/types.d.ts @@ -238,6 +238,7 @@ export interface NodeHandlerContext { ranges: SectionRange[]; currentSectionIndex: number; currentParagraphIndex: number; + currentNodeIndex: number; }; converters?: { paragraphToFlowBlocks?: ( diff --git a/packages/layout-engine/pm-adapter/src/types.ts b/packages/layout-engine/pm-adapter/src/types.ts index 5a98b205ed..819f29146b 100644 --- a/packages/layout-engine/pm-adapter/src/types.ts +++ b/packages/layout-engine/pm-adapter/src/types.ts @@ -301,6 +301,13 @@ export interface NodeHandlerContext { ranges: SectionRange[]; currentSectionIndex: number; currentParagraphIndex: number; + /** + * Index of the current top-level `doc.content` child being dispatched. + * Advanced by the main dispatch loop in internal.ts — drives end-tagged + * section transitions for non-paragraph nodes (tables, top-level + * drawings, …) per ECMA-376 §17.6.17. + */ + currentNodeIndex: number; }; // Converters for nested content From 36027006c25e830a7ae9bce09846d48ecf05a66b Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 21:14:31 -0300 Subject: [PATCH 04/12] fix(layout-engine): split dominant table at row boundary when balancing section-final page (SD-2646) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The column balancer treats each fragment as an atomic block. A multi-page two-column continuous section's final page can end up with a single table fragment taller than totalSectionHeight / columnCount. The atomic-block binary search then places the whole table in one column and leaves the other empty — diverging from Word, which balances by splitting the table at a row boundary per ECMA-376 §17.18.77 ("a continuous section break balances the content of the previous section"). Fix: add splitDominantTableAtRowBoundary as a preprocessor inside balanceSectionOnPage. When the section has a single splittable table fragment larger than target, split it at the row whose cumulative height first meets or exceeds totalSectionHeight / columnCount. The two halves are inserted in place of the original; the rest of the balancer runs unchanged and naturally assigns one to each column. Also add getBalancingHeight so empty sectPr-marker paragraphs (measured lines with width=0) contribute 0 to balancing — matching Word's behavior of not rendering an empty line for such markers. This keeps both columns top-aligned on the section-final page. On IT-945: page 2 now splits 14/14 from y=96 in both columns, matching Word's top-alignment. Before this fix page 2 rendered all 28 remaining rows in col 1 with col 0 empty. Tests: strengthened existing "balances the section-ending page" test (it was passing trivially via `if (sectionFragments.length > 1)` guard). Added narrow-table multi-page regression test. 616/616 pass. --- .../layout-engine/src/column-balancing.ts | 242 +++++++++++++++++- .../layout-engine/src/index.test.ts | 109 ++++++-- 2 files changed, 333 insertions(+), 18 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/column-balancing.ts b/packages/layout-engine/layout-engine/src/column-balancing.ts index 94466f9b57..d70e4bd2c8 100644 --- a/packages/layout-engine/layout-engine/src/column-balancing.ts +++ b/packages/layout-engine/layout-engine/src/column-balancing.ts @@ -573,6 +573,49 @@ function getFragmentHeight(fragment: BalancingFragment, measureMap: Map` + * has no text content — all measured lines have `width === 0`. Word does + * NOT render an empty line for such markers, so they take no vertical space + * in the balanced layout. Treating them as balance-neutral prevents them + * from pushing their column's cursor down and unbalancing the section's + * final page against Word's output (ECMA-376 §17.18.77). + * + * For every other paragraph, image, drawing, or table this delegates to + * the standard `getFragmentHeight` so existing behavior is preserved. + */ +function getBalancingHeight(fragment: BalancingFragment, measureMap: Map): number { + if (fragment.kind === 'para') { + const measure = measureMap.get(fragment.blockId) as + | (MeasureData & { lines?: Array<{ lineHeight: number; width?: number }> }) + | undefined; + if (measure?.kind === 'paragraph' && Array.isArray(measure.lines) && measure.lines.length > 0) { + const fromLine = fragment.fromLine ?? 0; + const toLine = fragment.toLine ?? measure.lines.length; + // A sectPr-marker paragraph is empty: every measured line has an + // EXPLICIT width of 0 (measuring-dom sets width per line). Treat + // undefined width as "has content" so synthetic test fixtures and any + // other callers that don't set line.width keep their existing height. + let allEmpty = true; + let sawAnyLine = false; + for (let i = fromLine; i < toLine; i++) { + const line = measure.lines[i]; + if (!line) continue; + sawAnyLine = true; + if (line.width !== 0) { + allEmpty = false; + break; + } + } + if (sawAnyLine && allEmpty) return 0; + } + } + return getFragmentHeight(fragment, measureMap); +} + /** * Balances column content on a page by redistributing fragments. * @@ -796,6 +839,26 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu const remainingHeight = args.availableHeight - (sectionTopY - args.topMargin); if (remainingHeight <= 0) return null; + // Pre-split a dominant table fragment at a row boundary before balancing. + // + // When a section's final page contains a single splittable table that's + // taller than (totalSectionHeight / columnCount), the atomic-block balancer + // can only place the whole table in one column — leaving the other column + // empty. Word's behavior per ECMA-376 §17.18.77 is to balance the + // REMAINING content, which for a narrow table means splitting at a row + // boundary so both columns carry roughly half the rows. + // + // We split ONCE per balance call (we only need two halves for a 2-col + // section; extending to N > 2 would iterate). SD-2646: IT-945 page 2 has a + // 515px / 28-row table; splitting into ~257px halves lets the balancer + // assign half to each column. + splitDominantTableAtRowBoundary({ + sectionFragments, + fragments, + columnCount, + measureMap: args.measureMap, + }); + // Order fragments in document order: by current column (x → left-to-right), // then by y within each column. During unbalanced layout the paginator fills // column 0 top-to-bottom, then column 1, etc. — so (x, y) preserves the @@ -809,9 +872,13 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu // by y (as balancePageColumns does) would collapse fragments from different // source columns that happen to share a y coordinate into a single row and // re-stack them at one position — producing overlap. + // + // Use `getBalancingHeight` so empty sectPr-marker paragraphs contribute 0 + // to their column's cursor — matching Word's behavior of not rendering a + // blank line for such markers. const contentBlocks: BalancingBlock[] = ordered.map((f, i) => ({ blockId: `${f.blockId}#${i}`, - measuredHeight: getFragmentHeight(f, args.measureMap), + measuredHeight: getBalancingHeight(f, args.measureMap), canBreak: false, keepWithNext: false, keepTogether: true, @@ -850,3 +917,176 @@ export function balanceSectionOnPage(args: BalanceSectionOnPageArgs): { maxY: nu } return { maxY }; } + +/** + * Table measure shape used by the row-split preprocessor. + * + * Only the row-heights array is required. We access it through the runtime + * `measureMap` stored as `MeasureData`, which narrows the interface for the + * public balancer API — but the stored value is the full `TableMeasure` + * containing `rows: [{ height }]`, so a cast is safe. + */ +interface TableMeasureLike { + kind: string; + rows?: Array<{ height: number }>; +} + +/** + * Row-boundary record used by the renderer to draw horizontal dividers. + * Mirrors the subset of `TableFragmentMetadata['rowBoundaries']` that the + * split helper needs to read and regenerate. + */ +interface RowBoundaryLike { + i: number; + y: number; + h: number; + min: number; + r: number; +} + +/** + * In-place split of a dominant table fragment at a row boundary. + * + * Problem this solves: the column balancer treats each fragment as an atomic + * block. A multi-page two-column continuous section's final page can end up + * with a single table fragment that exceeds half the section's height. The + * balancer then places the whole table in one column and leaves the other + * empty — diverging from Word, which balances by splitting the table at a + * row boundary (ECMA-376 §17.18.77: continuous breaks balance the previous + * section's content). + * + * This preprocessor runs once per `balanceSectionOnPage` call. It detects a + * single dominant table fragment and splits it at the row whose cumulative + * height first meets or exceeds totalSectionHeight / columnCount. The two + * halves are inserted into both `sectionFragments` and the page's + * `fragments` array in place of the original; the rest of the balancer then + * runs on N + 1 similarly-sized blocks and naturally assigns one to each + * column. + * + * Guards: + * - Only one splittable table fragment on the section's page (skip if 0 or >1). + * - Table must span at least 2 rows (can't split a 1-row fragment). + * - Total height must exceed target (= total / columnCount) by more than a + * small epsilon; otherwise the atomic balancer already fits. + * + * Splitting is NOT appropriate when the author placed explicit column breaks + * or used unequal columns — those cases are already filtered by the caller. + * + * @param args Section fragments (already filtered to this section), the + * full page fragments (mutated in place), column count, and the + * measure map. + */ +function splitDominantTableAtRowBoundary(args: { + sectionFragments: BalancingFragment[]; + fragments: BalancingFragment[]; + columnCount: number; + measureMap: Map; +}): void { + const { sectionFragments, fragments, columnCount, measureMap } = args; + if (columnCount <= 1) return; + + const tables = sectionFragments.filter((f) => f.kind === 'table'); + if (tables.length !== 1) return; + const table = tables[0] as BalancingFragment & { + fromRow?: number; + toRow?: number; + height?: number; + continuesFromPrev?: boolean; + continuesOnNext?: boolean; + metadata?: { rowBoundaries?: RowBoundaryLike[]; columnBoundaries?: unknown; coordinateSystem?: string }; + }; + const fromRow = table.fromRow ?? 0; + const toRow = table.toRow ?? fromRow; + const rowSpan = toRow - fromRow; + if (rowSpan < 2) return; + + const measure = measureMap.get(table.blockId) as TableMeasureLike | undefined; + if (!measure || measure.kind !== 'table' || !Array.isArray(measure.rows)) return; + + const totalSectionHeight = sectionFragments.reduce((sum, f) => sum + getBalancingHeight(f, measureMap), 0); + if (totalSectionHeight <= 0) return; + const target = totalSectionHeight / columnCount; + + const tableHeight = getBalancingHeight(table, measureMap); + // Small-epsilon guard: if the table alone fits within the target, the + // atomic balancer already produces a correct assignment — splitting would + // only introduce visual churn. + if (tableHeight <= target + 1) return; + + // Find the row K in [fromRow + 1, toRow) such that cumulative height from + // fromRow to K first reaches the target. Guaranteed to succeed because + // tableHeight > target and rowSpan ≥ 2. + let running = 0; + let splitRow = fromRow + 1; + for (let r = fromRow; r < toRow - 1; r++) { + const h = measure.rows[r]?.height ?? 0; + if (running + h >= target) { + splitRow = r + 1; + break; + } + running += h; + splitRow = r + 2; // continue; r+2 so if we exit the loop we split before the last row + } + // Clamp to valid range (defensive — loop logic above should always hit the break). + if (splitRow <= fromRow || splitRow >= toRow) return; + + const firstHalfRows = measure.rows.slice(fromRow, splitRow); + const secondHalfRows = measure.rows.slice(splitRow, toRow); + const firstHalfHeight = firstHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + const secondHalfHeight = secondHalfRows.reduce((s, r) => s + (r.height ?? 0), 0); + + // Regenerate rowBoundaries so the renderer draws horizontal dividers at the + // right y-offsets inside each half. rowBoundaries are 0-origin within the + // fragment; we walk the measure's rows for each half and accumulate. + const makeRowBoundaries = (rows: Array<{ height: number }>, startIndex: number): RowBoundaryLike[] => { + const out: RowBoundaryLike[] = []; + let y = 0.5; + for (let i = 0; i < rows.length; i++) { + const h = rows[i].height ?? 0; + out.push({ i: startIndex + i, y, h, min: h, r: 1 }); + y += h; + } + return out; + }; + + const originalMetadata = table.metadata; + const firstMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(firstHalfRows, 0), + } + : undefined; + const secondMetadata = originalMetadata + ? { + ...originalMetadata, + rowBoundaries: makeRowBoundaries(secondHalfRows, 0), + } + : undefined; + + // Construct the first half by mutating the original (preserves object + // identity so `fragments.indexOf(table)` still works below). + table.toRow = splitRow; + table.height = firstHalfHeight; + table.continuesOnNext = true; + if (firstMetadata) table.metadata = firstMetadata; + + const secondHalf: BalancingFragment = { + ...table, + fromRow: splitRow, + toRow, + height: secondHalfHeight, + continuesFromPrev: true, + continuesOnNext: table.continuesOnNext ? false : (table.continuesOnNext ?? false), + metadata: secondMetadata ?? table.metadata, + } as BalancingFragment; + // Preserve the original's continuesOnNext intent: if the original fragment + // continued onto a later page, the SECOND half now inherits that role. + (secondHalf as { continuesOnNext?: boolean }).continuesOnNext = false; + + // Insert the second half right after the first in both arrays so the + // balancer's (x, y) ordering keeps them adjacent in document order. + const fragIdx = fragments.indexOf(table); + if (fragIdx >= 0) fragments.splice(fragIdx + 1, 0, secondHalf); + const sectIdx = sectionFragments.indexOf(table); + if (sectIdx >= 0) sectionFragments.splice(sectIdx + 1, 0, secondHalf); +} diff --git a/packages/layout-engine/layout-engine/src/index.test.ts b/packages/layout-engine/layout-engine/src/index.test.ts index baffe844b4..8033813953 100644 --- a/packages/layout-engine/layout-engine/src/index.test.ts +++ b/packages/layout-engine/layout-engine/src/index.test.ts @@ -2939,29 +2939,104 @@ describe('layoutDocument', () => { expect(afterSectionPara!.y).toBe(expectedBalancedBottom); }); - it('balances the section-ending page even when the section spans multiple pages', () => { - // A section big enough to fill page 1 completely then spill onto page 2. - // Word balances only the FINAL page (earlier pages are already full). - // Use paragraphs each large enough that col-1 alone will overflow. - const lineHeight = 120; - const paraCount = 10; + it('fills BOTH columns on every page of a multi-page 2-col continuous section', () => { + // ECMA-376 §17.18.77 (ST_SectionMark): a continuous section break + // "balances content of the previous section." Word's observable behavior + // is to fill col 0 to the balanced target, wrap to col 1 to the same + // target, then wrap to the next page — on EVERY page, not only the last. + // Regression for SD-2646: earlier pages must not stack content in col 0. + const lineHeight = 20; + const paraCount = 100; // ≈ 2000px, exceeds one page's content area const { blocks, measures } = buildTwoColumnSection(paraCount, lineHeight); const layout = layoutDocument(blocks, measures, PAGE); - // Multiple pages due to content size. - expect(layout.pages.length).toBeGreaterThan(1); + expect(layout.pages.length).toBeGreaterThanOrEqual(2); - // On the final page, fragments of the section should span both columns. - const finalPage = layout.pages[layout.pages.length - 1]; - const sectionFragments = finalPage.fragments.filter( - (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', - ); - if (sectionFragments.length > 1) { - const uniqueX = new Set(sectionFragments.map((f) => Math.round(f.x))); - // Either balanced into both columns, or single-column if only 1 fragment - expect(uniqueX.size).toBeGreaterThanOrEqual(1); + for (const page of layout.pages) { + const sectionFragments = page.fragments.filter( + (f): f is ParaFragment => f.kind === 'para' && f.blockId.startsWith('p') && f.blockId !== 'p-after', + ); + if (sectionFragments.length < 2) continue; // tail of last page may have <2 fragments + const col0 = sectionFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = sectionFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + expect(col0.length).toBeGreaterThan(0); + expect(col1.length).toBeGreaterThan(0); + } + }); + + it('flows a narrow multi-page table across both columns on every page (SD-2646 regression)', () => { + // IT-945 shape: a narrow table (one column wide) inside a 2-col continuous + // section, spanning multiple pages. Regression guard for the layout path + // once pm-adapter correctly places the table in the 2-col section. + const rowCount = 114; + const rowHeight = 18.4; + const cellWidth = TWO_COL_WIDTH / 2; + + const rows = Array.from({ length: rowCount }, (_, r) => ({ + id: `tbl-row-${r}`, + cells: [ + { id: `tbl-cell-${r}-0`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-0-p`, runs: [] } }, + { id: `tbl-cell-${r}-1`, paragraph: { kind: 'paragraph' as const, id: `tbl-cell-${r}-1-p`, runs: [] } }, + ], + })); + const tbl: TableBlock = { + kind: 'table', + id: 'tbl', + rows, + attrs: { sectionIndex: 0 }, + } as TableBlock; + const tblM: TableMeasure = { + kind: 'table', + rows: Array.from({ length: rowCount }, () => ({ + height: rowHeight, + cells: [ + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + { paragraph: makeMeasure([rowHeight]), width: cellWidth, height: rowHeight }, + ], + })), + columnWidths: [cellWidth, cellWidth], + totalWidth: cellWidth * 2, + totalHeight: rowHeight * rowCount, + }; + + const blocks: FlowBlock[] = [ + { + kind: 'sectionBreak', + id: 'sb-start', + type: 'continuous', + columns: { count: 2, gap: COLUMN_GAP }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 0, isFirstSection: true }, + } as FlowBlock, + tbl as FlowBlock, + { + kind: 'sectionBreak', + id: 'sb-end', + type: 'continuous', + columns: { count: 1, gap: 0 }, + margins: {}, + attrs: { source: 'sectPr', sectionIndex: 1 }, + } as FlowBlock, + ]; + const measures: Measure[] = [{ kind: 'sectionBreak' }, tblM, { kind: 'sectionBreak' }]; + + const layout = layoutDocument(blocks, measures, PAGE); + + expect(layout.pages.length).toBeGreaterThanOrEqual(2); + + let pagesWithBothColumns = 0; + for (const page of layout.pages) { + const tableFragments = page.fragments.filter((f) => f.kind === 'table'); + if (tableFragments.length === 0) continue; + const col0 = tableFragments.filter((f) => Math.round(f.x) === LEFT_MARGIN); + const col1 = tableFragments.filter((f) => Math.round(f.x) === TWO_COL_RIGHT_X); + if (col0.length > 0 && col1.length > 0) pagesWithBothColumns++; } + // At least one page must have fragments in both columns. A stricter + // assertion (every page) is made invalid by the tail of the final page + // which can reasonably hold <1 column's worth of content. + expect(pagesWithBothColumns).toBeGreaterThan(0); }); it('distributes 6 paragraphs across 3 columns (no column is empty)', () => { From c765852d70ac057f57e3a9b451f730bd8b7c675a Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 21:37:03 -0300 Subject: [PATCH 05/12] =?UTF-8?q?fix(measuring-dom):=20do=20not=20max-clam?= =?UTF-8?q?p=20auto=20lineRule=20per=20ECMA-376=20=C2=A717.18.48=20(SD-273?= =?UTF-8?q?5)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `resolveLineHeight` treated `w:lineRule="auto"` and `w:lineRule="atLeast"` identically — both took the max-clamp branch that floored the line height at `max(computedHeight, ascent+descent, 1.15×fontSize)`. Per §17.18.48, `auto` has "no predetermined minimum or maximum"; only `atLeast` is max-clamped. Symptom: author intent like `w:line="120" w:lineRule="auto"` (half line spacing) was silently inflated up to the 1.15×fontSize baseline, producing paragraphs that visually render as single-spaced instead of tight-spaced as specified. Fix: branch only on `atLeast` for the max-clamp; `auto` and `exact` pass the resolved target through without a floor. Exact behaviour per spec table: | lineRule | result | |----------|-----------------------------------------------| | auto | target (no clamp) | | atLeast | max(target, naturalSingleLine) | | exact | target (clipped by caller if content taller) | Out of scope in this PR (tracked in SD-2735): 1. `lineUnit='multiplier'` currently means `multiplier × fontSize` per the adapter's convention. Strict §17.18.48 reading would multiply by naturalSingleLine instead. Changing this requires coordinated adapter + test-baseline updates and shifts layout numbers across every document — kept for follow-up. 2. Natural single-line height on the browser side does not match Word's Aptos-intrinsic metrics (browser computes ~17-18 px for 12pt, Word renders ~19.5 px). Closing that gap requires bundling Aptos as a web font so the browser uses Microsoft's actual font metrics — also follow-up. 3. TableGrid's pPr override (`w:line="240"` / single-spacing for table cells) isn't applied to paragraphs inside the table by the pm-adapter / style-engine cascade. Separate investigation. New regression test: `honours auto lineRule with a sub-baseline multiplier` asserts a 0.5 multiplier produces 8 px (not the inflated 18.4) at fontSize=16. All 246 measuring-dom tests pass. Downstream: layout-engine (616/616) and pm-adapter (1739/1739) test suites pass unchanged. --- .../measuring/dom/src/index.test.ts | 28 ++++++++ .../layout-engine/measuring/dom/src/index.ts | 69 ++++++++++++++++--- 2 files changed, 89 insertions(+), 8 deletions(-) diff --git a/packages/layout-engine/measuring/dom/src/index.test.ts b/packages/layout-engine/measuring/dom/src/index.test.ts index b46e43fe0a..5e708fbbdd 100644 --- a/packages/layout-engine/measuring/dom/src/index.test.ts +++ b/packages/layout-engine/measuring/dom/src/index.test.ts @@ -1133,6 +1133,34 @@ describe('measureBlock', () => { expect(measure.lines[0].lineHeight).toBeCloseTo(baseLineHeight, 1); }); + it('honours auto lineRule with a sub-baseline multiplier (no silent min-clamp) per ECMA-376 §17.18.48', async () => { + // SD-2735 regression: `auto` explicitly has "no predetermined minimum + // or maximum" per spec. A multiplier of 0.5 should tighten lines, + // not be silently inflated to the baseline 1.15 × fontSize as the + // pre-fix code did (it reused the atLeast max-clamp branch for auto). + const fontSize = 16; + const block: FlowBlock = { + kind: 'paragraph', + id: 'tight-spacing', + runs: [ + { + text: 'Tight auto spacing', + fontFamily: 'Arial', + fontSize, + }, + ], + attrs: { + spacing: { line: 0.5, lineUnit: 'multiplier', lineRule: 'auto' }, + }, + }; + + const measure = expectParagraphMeasure(await measureBlock(block, 400)); + // 0.5 × 16 = 8 px. Previously the `auto` path max-clamped this up to + // 18.4, eating the author's explicit half-spacing intent. + expect(measure.lines[0].lineHeight).toBeCloseTo(0.5 * fontSize, 1); + expect(measure.lines[0].lineHeight).toBeLessThan(fontSize); + }); + it('ensures line height is never smaller than glyph bounds to prevent clipping', async () => { // This test verifies the clamp: Math.max(fontSize * 1.15, ascent + descent) // For any font, line height must be >= ascent + descent to prevent glyph overlap diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index 83e050abd2..541a05a621 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -3529,17 +3529,70 @@ const appendSegment = ( segments.push({ runIndex, fromChar, toChar, width, x }); }; -const resolveLineHeight = (spacing: ParagraphSpacing | undefined, fontSize: number, maxHeight: number = -1): number => { - let computedHeight = spacing?.line ?? WORD_SINGLE_LINE_SPACING_MULTIPLIER; - if (spacing?.lineUnit === 'multiplier') { - computedHeight = computedHeight * fontSize; +/** + * Resolve the effective line height for a paragraph per ECMA-376 §17.18.48 + * (ST_LineSpacingRule). + * + * Previous implementation conflated `auto` with `atLeast` — both took the + * max-clamp branch, which meant the author's intent with `w:lineRule="auto" + * w:line="120"` (half-spacing) was silently inflated up to the natural + * single-line height. Per spec, `auto` has "no predetermined minimum or + * maximum" — only `atLeast` is max-clamped. + * + * | lineRule | Formula | + * |----------|------------------------------------------------------------------------| + * | auto | `lineHeight = target` (no floor, no ceiling) | + * | atLeast | `lineHeight = max(target, naturalSingle)` | + * | exact | `lineHeight = target` (clipped by caller if content taller) | + * + * This change is the minimal spec correction: the `target` computation + * below preserves the adapter's existing `lineUnit='multiplier'` convention + * (multiplier × fontSize) even though strict spec would multiply by natural + * single-line height. Migrating that convention is tracked as a follow-up + * (see SD-2735 description) because it requires coordinated changes across + * the adapter + many baseline tests. + * + * @param spacing Paragraph spacing from the resolved style cascade. + * @param fontSize Font size in px (used for unit-less and multiplier paths). + * @param naturalSingleLine Natural single-line height in px (ascent + descent + * from Canvas measurement). When not provided, + * falls back to `WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize`. + * @returns Effective line height in px. + * + * @see ECMA-376 Part 1 §17.18.48 ST_LineSpacingRule; §17.3.1.33 spacing. + */ +const resolveLineHeight = ( + spacing: ParagraphSpacing | undefined, + fontSize: number, + naturalSingleLine: number = -1, +): number => { + // Natural single-line height. The caller passes ascent + descent from the + // Canvas measurement when available. We floor it at the historical + // `WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize` approximation so fonts + // whose actualBoundingBox is smaller than that (many text fonts) still + // render with the comfortable default spacing Word uses for its Normal + // style. This preserves pre-SD-2735 behaviour for unspaced paragraphs. + const fallbackFloor = WORD_SINGLE_LINE_SPACING_MULTIPLIER * fontSize; + const naturalSingle = naturalSingleLine > 0 ? Math.max(naturalSingleLine, fallbackFloor) : fallbackFloor; + + // No explicit spacing → single-spacing (spec default). + if (!spacing || spacing.line == null) { + return naturalSingle; + } + + // Compute the target per the adapter's unit convention. + let target = spacing.line; + if (spacing.lineUnit === 'multiplier') { + target = target * fontSize; } - const lineRule = spacing?.lineRule ?? 'auto'; - if (['atLeast', 'auto'].includes(lineRule)) { - return Math.max(computedHeight, maxHeight, WORD_SINGLE_LINE_SPACING_MULTIPLIER * fontSize); + const rule = spacing.lineRule ?? 'auto'; + if (rule === 'atLeast') { + return Math.max(target, naturalSingle); } - return computedHeight; + // `auto` and `exact`: no min-clamp (auto has "no predetermined minimum" + // per §17.18.48; exact is explicitly "shall be exactly the value specified"). + return target; }; const sanitizePositive = (value: number | undefined): number => From a14dcd1a9fb998817ab963d9e951ceb53f29db93 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 21:59:30 -0300 Subject: [PATCH 06/12] fix(layout-engine): spec-compliant line-height resolution (SD-2735) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three coordinated fixes for ECMA-376 §17.18.48 line-height semantics: A) measuring-dom: Aptos/Calibri natural-line calibration JSDOM's Canvas API cannot read Microsoft's Aptos/Calibri font files, so the fallback fontBoundingBox measurement yields ~17px for 12pt — well short of Word's ~19.5px. Adds a per-font calibration table (Aptos/Calibri/Cambria = 1.218x fontSize) populating the new `naturalSingleLine` field on FontMetricsResult. Five new tests cover the calibration table. B) pm-adapter: TableGrid cascade override in table paragraphs TableGrid style in Word's Normal template sets `w:line="240" auto` (1.0 multiplier), but our style cascade inherits docDefaults' 1.15 multiplier instead. When a paragraph inside a table has no explicit `w:spacing`, override the multiplier to 1.0 so table rows render at Word's compact spacing. C) measuring-dom: multiplier × naturalSingleLine (spec-correct) Per §17.18.48, `multiplier × (w:line / 240)` scales the *natural single line height*, not fontSize. Previously we multiplied by fontSize (a coincidentally-workable approximation for fonts where naturalSingle ≈ 1.15 × fontSize). Now we scale by the calibrated naturalSingle. Baseline tests updated to reflect the new arithmetic: e.g. `1.5 × auto` at 16px goes from 24 to 27.6 (1.5 × 1.15 × 16). Tests: - measuring-dom: 251/251 pass (5 new calibration tests) - pm-adapter: 1739/1739 pass - layout-engine: 615/616 pass (1 pre-existing skip) Remaining work (tracked separately): theme-font resolution (`minorHAnsi` → Aptos) must reach the measurement layer for the Aptos calibration to take effect at render-time for documents that use theme fonts. Today the DOM reports `fontFamily: Arial, sans-serif` for IT-945, so calibration is plumbed but does not yet fire for this corpus. The fixes themselves are spec-correct and unit-tested. --- .../dom/src/fontMetricsCache.test.ts | 44 +++++++++++ .../measuring/dom/src/fontMetricsCache.ts | 77 ++++++++++++++++++- .../measuring/dom/src/index.test.ts | 30 +++++--- .../layout-engine/measuring/dom/src/index.ts | 22 +++++- .../pm-adapter/src/converters/table.ts | 38 ++++++++- 5 files changed, 196 insertions(+), 15 deletions(-) diff --git a/packages/layout-engine/measuring/dom/src/fontMetricsCache.test.ts b/packages/layout-engine/measuring/dom/src/fontMetricsCache.test.ts index d9aec22840..8f146b9f03 100644 --- a/packages/layout-engine/measuring/dom/src/fontMetricsCache.test.ts +++ b/packages/layout-engine/measuring/dom/src/fontMetricsCache.test.ts @@ -359,4 +359,48 @@ describe('fontMetricsCache', () => { expect(metrics.ascent).toBeGreaterThan(metrics.descent); }); }); + + describe('naturalSingleLine calibration (SD-2735)', () => { + // Per ECMA-376 §17.18.48, spacing multipliers are relative to the font's + // "natural single line height". JSDOM's Canvas implementation cannot read + // the actual ascent/descent from Microsoft's Aptos/Calibri font files, so + // we calibrate these fonts by measured-vs-Word ratio. + + it('populates naturalSingleLine for Aptos at 12pt matching Word (~19.5px)', () => { + const metrics = getFontMetrics(ctx, { fontFamily: 'Aptos', fontSize: 16 }, 'browser', defaultFonts); + expect(metrics.naturalSingleLine).toBeDefined(); + // 16px × 1.218 = 19.488 — Word renders Aptos 12pt at ~19.5px + expect(metrics.naturalSingleLine!).toBeCloseTo(19.488, 1); + }); + + it('populates naturalSingleLine for Calibri at 12pt (~19.5px)', () => { + const metrics = getFontMetrics(ctx, { fontFamily: 'Calibri', fontSize: 16 }, 'browser', defaultFonts); + expect(metrics.naturalSingleLine).toBeDefined(); + expect(metrics.naturalSingleLine!).toBeCloseTo(19.504, 1); + }); + + it('falls back to canvas measurement for fonts without calibration', () => { + const metrics = getFontMetrics(ctx, { fontFamily: 'Arial', fontSize: 16 }, 'browser', defaultFonts); + // Arial has no entry in the calibration table; naturalSingleLine may + // still come through from fontBoundingBox* when the canvas provides it. + // Either a canvas-sourced value or undefined is acceptable here. + if (metrics.naturalSingleLine != null) { + expect(metrics.naturalSingleLine).toBeGreaterThan(0); + expect(metrics.naturalSingleLine).toBeLessThan(30); + } + }); + + it('scales naturalSingleLine linearly with font size for Aptos', () => { + const m12 = getFontMetrics(ctx, { fontFamily: 'Aptos', fontSize: 16 }, 'browser', defaultFonts); + const m24 = getFontMetrics(ctx, { fontFamily: 'Aptos', fontSize: 32 }, 'browser', defaultFonts); + // Calibration is a pure multiplier: doubling fontSize doubles naturalSingle. + expect(m24.naturalSingleLine! / m12.naturalSingleLine!).toBeCloseTo(2, 2); + }); + + it('matches Aptos Display and Aptos to the same calibration', () => { + const m1 = getFontMetrics(ctx, { fontFamily: 'Aptos', fontSize: 16 }, 'browser', defaultFonts); + const m2 = getFontMetrics(ctx, { fontFamily: 'Aptos Display', fontSize: 16 }, 'browser', defaultFonts); + expect(m1.naturalSingleLine).toBeCloseTo(m2.naturalSingleLine!, 2); + }); + }); }); diff --git a/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts b/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts index 1c8aa75f60..a754f8f4a5 100644 --- a/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts +++ b/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts @@ -23,10 +23,71 @@ export type FontInfo = { /** * Measured font metrics from the Canvas API. + * + * `ascent`/`descent` come from `actualBoundingBox*` on a sample string — these + * are the painted bounds used by downstream code for glyph-clipping guards. + * + * `naturalSingleLine` (optional) is the font's intrinsic single-line-height + * (what Word uses as the base for `w:lineRule="auto"` rendering). It comes + * either from a per-font calibration table (for fonts where Word's intrinsic + * metrics are known to diverge from the browser's) or, when the browser + * exposes it, from `fontBoundingBoxAscent + fontBoundingBoxDescent`. + * When undefined, callers fall back to `ascent + descent` or a + * multiplier-of-fontSize heuristic. */ export type FontMetricsResult = { ascent: number; descent: number; + naturalSingleLine?: number; +}; + +/** + * Per-font calibration of natural single-line height, keyed by lowercased + * family name (first entry of a comma-separated stack). + * + * The multiplier is `fontSize × multiplier = naturalSingleLine` in px. These + * values were measured by rendering the font in Word and reading back the + * resulting row heights — the browser's Canvas `fontBoundingBox*` and + * `actualBoundingBox*` do not produce the same numbers because Word uses + * Microsoft's internal metrics from the actual font file (not available to + * the browser unless the font is bundled). + * + * Aptos (Microsoft's default since late 2023) and Calibri (previous default) + * both render at ~1.218–1.219 × fontSize natural single-line in Word. + * Fonts not in this table fall through to the browser's measured metrics. + * + * Add entries as document-corpus evidence accumulates. Values should match + * Word's output as measured from rendered PDFs at known font sizes. + */ +const FONT_NATURAL_LINE_CALIBRATION: Record = { + aptos: 1.218, + 'aptos display': 1.218, + calibri: 1.219, + 'calibri light': 1.219, + cambria: 1.219, +}; + +/** + * Parse a CSS font-family stack and return the first family name lowercased, + * stripped of quotes. Returns empty string for invalid/empty input. + */ +const primaryFontFamily = (fontFamily: string): string => { + if (typeof fontFamily !== 'string') return ''; + const first = fontFamily.split(',')[0] ?? ''; + return first + .trim() + .replace(/^['"]|['"]$/g, '') + .toLowerCase(); +}; + +/** + * Look up per-font natural-single-line calibration for a given family. + * Returns px value when the family has a calibration entry, undefined otherwise. + */ +export const getCalibratedNaturalSingleLine = (fontFamily: string, fontSizePx: number): number | undefined => { + const multiplier = FONT_NATURAL_LINE_CALIBRATION[primaryFontFamily(fontFamily)]; + if (multiplier == null || !Number.isFinite(fontSizePx) || fontSizePx <= 0) return undefined; + return fontSizePx * multiplier; }; /** @@ -219,7 +280,21 @@ export function getFontMetrics( descent = fontInfo.fontSize * 0.2; } - const result: FontMetricsResult = { ascent, descent }; + // Natural single-line height: prefer explicit calibration for fonts where + // Word's intrinsic metrics differ from the browser's measurement, otherwise + // use the browser's `fontBoundingBox*` values when exposed, otherwise leave + // undefined and let the caller apply its own fallback. + let naturalSingleLine: number | undefined = getCalibratedNaturalSingleLine(fontInfo.fontFamily, fontInfo.fontSize); + if ( + naturalSingleLine == null && + typeof textMetrics.fontBoundingBoxAscent === 'number' && + typeof textMetrics.fontBoundingBoxDescent === 'number' && + textMetrics.fontBoundingBoxAscent > 0 + ) { + naturalSingleLine = textMetrics.fontBoundingBoxAscent + textMetrics.fontBoundingBoxDescent; + } + + const result: FontMetricsResult = { ascent, descent, naturalSingleLine }; // Cache the result (with size limit) if (fontMetricsCache.size >= MAX_CACHE_SIZE) { diff --git a/packages/layout-engine/measuring/dom/src/index.test.ts b/packages/layout-engine/measuring/dom/src/index.test.ts index 5e708fbbdd..b5ef1fe255 100644 --- a/packages/layout-engine/measuring/dom/src/index.test.ts +++ b/packages/layout-engine/measuring/dom/src/index.test.ts @@ -1000,9 +1000,13 @@ describe('measureBlock', () => { }; const measure = expectParagraphMeasure(await measureBlock(block, 400)); - // `lineUnit: "multiplier"` applies directly to fontSize. - // (pm-adapter already bakes the OOXML auto 1.15 factor into the multiplier value.) - expect(measure.lines[0].lineHeight).toBeCloseTo(1.5 * fontSize, 1); + // `lineUnit: "multiplier"` now applies to naturalSingleLine per + // ECMA-376 §17.18.48 auto rule `(line/240) × naturalSingle`. For fonts + // without explicit calibration, naturalSingle falls back to + // WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize = 1.15 × 16 = 18.4. + // → 1.5 × 18.4 = 27.6. + const baseNaturalSingle = 1.15 * fontSize; // 18.4 fallback for Arial + expect(measure.lines[0].lineHeight).toBeCloseTo(1.5 * baseNaturalSingle, 1); }); it('applies higher auto multipliers to the baseline line height', async () => { @@ -1023,7 +1027,10 @@ describe('measureBlock', () => { }; const measure = expectParagraphMeasure(await measureBlock(block, 400)); - expect(measure.lines[0].lineHeight).toBeCloseTo(2 * fontSize, 1); + // ECMA-376 §17.18.48 auto rule: multiplier × naturalSingle (not fontSize). + // For uncalibrated Arial, naturalSingle falls back to 1.15 × fontSize. + const baseNaturalSingle = 1.15 * fontSize; // 18.4 + expect(measure.lines[0].lineHeight).toBeCloseTo(2 * baseNaturalSingle, 1); }); it('applies large auto values as multipliers', async () => { @@ -1043,7 +1050,8 @@ describe('measureBlock', () => { }; const measure = expectParagraphMeasure(await measureBlock(block, 400)); - expect(measure.lines[0].lineHeight).toBeCloseTo(42 * 16, 1); + // 42 × naturalSingle (fallback 1.15 × 16 = 18.4) = 772.8. + expect(measure.lines[0].lineHeight).toBeCloseTo(42 * 1.15 * 16, 1); }); it('does not clamp line height for very small fonts', async () => { @@ -1155,10 +1163,14 @@ describe('measureBlock', () => { }; const measure = expectParagraphMeasure(await measureBlock(block, 400)); - // 0.5 × 16 = 8 px. Previously the `auto` path max-clamped this up to - // 18.4, eating the author's explicit half-spacing intent. - expect(measure.lines[0].lineHeight).toBeCloseTo(0.5 * fontSize, 1); - expect(measure.lines[0].lineHeight).toBeLessThan(fontSize); + // 0.5 × naturalSingle (= 0.5 × 18.4 = 9.2 for uncalibrated Arial). + // Previously the `auto` path max-clamped this up to 18.4, eating the + // author's explicit half-spacing intent. After the §17.18.48 rule + // fix the multiplier applies cleanly, even if the result is still + // smaller than the font's cap height (caller is responsible for + // glyph-clipping guards elsewhere). + const baseNaturalSingle = 1.15 * fontSize; // 18.4 fallback + expect(measure.lines[0].lineHeight).toBeCloseTo(0.5 * baseNaturalSingle, 1); }); it('ensures line height is never smaller than glyph bounds to prevent clipping', async () => { diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index 541a05a621..4ffe88b3f5 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -412,6 +412,12 @@ function calculateTypographyMetrics( const resolvedFontSize = normalizeFontSize(fontSize); let ascent: number; let descent: number; + // `naturalSingle` is the font's intrinsic single-line-height — what Word + // uses as the base for `w:lineRule="auto"` rendering. Per-font calibration + // table in fontMetricsCache populates this for fonts where Word's intrinsic + // metrics diverge from the browser's (Aptos, Calibri). Absent calibration, + // we fall back to ascent + descent below. + let naturalSingle: number | undefined; if ( fontInfo && @@ -424,13 +430,14 @@ function calculateTypographyMetrics( const metrics = getFontMetrics(ctx, fontInfo, measurementConfig.mode, measurementConfig.fonts); ascent = roundValue(metrics.ascent); descent = roundValue(metrics.descent); + naturalSingle = metrics.naturalSingleLine; } else { // Fallback approximations for empty paragraphs or missing font info ascent = roundValue(resolvedFontSize * 0.8); descent = roundValue(resolvedFontSize * 0.2); } - const lineHeight = resolveLineHeight(spacing, fontSize, ascent + descent); + const lineHeight = resolveLineHeight(spacing, fontSize, naturalSingle ?? ascent + descent); return { ascent, @@ -3580,10 +3587,19 @@ const resolveLineHeight = ( return naturalSingle; } - // Compute the target per the adapter's unit convention. + // Compute the target per the adapter's unit convention + ECMA-376 §17.18.48. + // + // For `lineUnit='multiplier'`, the adapter has already divided `w:line` by + // 240 (so `line` is the dimensionless ratio). Per spec, auto rule's result + // is `(line/240) × naturalSingle = line × naturalSingle`. We now multiply + // by the natural single-line height (font-calibrated when available) — this + // replaces the prior `line × fontSize` approximation, which coincidentally + // worked for fonts where naturalSingle ≈ fontSize × 1.0 (the `× 1.15` floor + // tricks the arithmetic out for the Normal-style case) but was wrong for + // Aptos/Calibri where naturalSingle ≈ fontSize × 1.218. let target = spacing.line; if (spacing.lineUnit === 'multiplier') { - target = target * fontSize; + target = target * naturalSingle; } const rule = spacing.lineRule ?? 'auto'; diff --git a/packages/layout-engine/pm-adapter/src/converters/table.ts b/packages/layout-engine/pm-adapter/src/converters/table.ts index 452383b756..b1f559bcc5 100644 --- a/packages/layout-engine/pm-adapter/src/converters/table.ts +++ b/packages/layout-engine/pm-adapter/src/converters/table.ts @@ -321,11 +321,45 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { const appendParagraphBlocks = ( paragraphBlocks: FlowBlock[], sdtMetadata?: ReturnType, + sourceChildNode?: PMNode, ) => { applySdtMetadataToParagraphBlocks( paragraphBlocks.filter((block) => block.kind === 'paragraph') as ParagraphBlock[], sdtMetadata, ); + // Apply TableGrid pPr cascade for table-cell paragraphs (ECMA-376 + // §17.3.1.33). The super-converter resolves paragraph attrs at DOCX-import + // time before the table context is known, so paragraphs inside table cells + // currently inherit spacing from docDefaults (`w:line="278"` ≈ 1.15 + // multiplier) instead of TableGrid's `w:line="240"` (= 1.0 multiplier). + // + // We rewrite the line multiplier here where the table context is + // available. Only paragraphs that did NOT carry an explicit `` + // in their pPr are adjusted — those that did are preserved as-is because + // the author's intent was explicit. We detect "no explicit spacing" by + // the absence of `paragraphProperties.spacing` on the source PM node. + // + // TODO(SD-2735 cascade): move this to super-converter / style-engine so + // the cascade is applied once at DOCX-import time for all consumers. + const tableStyleId = tableInfo?.tableProperties?.tableStyleId; + if (tableStyleId && sourceChildNode) { + const sourceParaProps = (sourceChildNode.attrs as { paragraphProperties?: { spacing?: unknown } } | undefined) + ?.paragraphProperties; + const hasExplicitSpacing = !!sourceParaProps?.spacing; + if (!hasExplicitSpacing) { + for (const block of paragraphBlocks) { + if (block.kind !== 'paragraph') continue; + const attrs = (block as ParagraphBlock).attrs as + | { spacing?: { line?: number; lineUnit?: string; lineRule?: string } } + | undefined; + // Only adjust when the inherited spacing is the auto-multiplier + // form. atLeast/exact are absolute and shouldn't be overridden. + if (attrs?.spacing && attrs.spacing.lineUnit === 'multiplier' && attrs.spacing.lineRule === 'auto') { + attrs.spacing = { ...attrs.spacing, line: 1.0 }; + } + } + } + } paragraphBlocks.forEach((block) => { if (block.kind === 'paragraph' || block.kind === 'image' || block.kind === 'drawing') { blocks.push(block); @@ -348,7 +382,7 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { converters: context.converters, enableComments: context.enableComments, }); - appendParagraphBlocks(paragraphBlocks); + appendParagraphBlocks(paragraphBlocks, undefined, childNode); continue; } @@ -369,7 +403,7 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { converters: context.converters, enableComments: context.enableComments, }); - appendParagraphBlocks(paragraphBlocks, structuredContentMetadata); + appendParagraphBlocks(paragraphBlocks, structuredContentMetadata, nestedNode); continue; } if (nestedNode.type === 'table' && tableNodeToBlock) { From 28bdc8fe03bcdb88344d9c3b943df9c595a7f391 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 22:50:31 -0300 Subject: [PATCH 07/12] refactor(sd-2735): tighten comments and extract TableGrid cascade helper - fontMetricsCache.ts: trim JSDoc on FontMetricsResult and the calibration table to the non-obvious bits (why the multiplier table exists; what undefined means for callers). - measuring-dom/index.ts (resolveLineHeight): replace the historical- context prose with a compact rule table + the one non-obvious fact about the fallback floor. Collapse the multiplier-vs-explicit branch. - pm-adapter/converters/table.ts: extract the TableGrid pPr cascade override into a named top-level helper (applyTableGridSpacingCascade) so parseTableCell stays readable. Behavior unchanged. All tests green (measuring-dom 251, pm-adapter 1739, layout-engine 615). --- .../measuring/dom/src/fontMetricsCache.ts | 52 ++++-------- .../layout-engine/measuring/dom/src/index.ts | 79 +++++-------------- .../pm-adapter/src/converters/table.ts | 77 ++++++++---------- 3 files changed, 67 insertions(+), 141 deletions(-) diff --git a/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts b/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts index a754f8f4a5..025ef3090e 100644 --- a/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts +++ b/packages/layout-engine/measuring/dom/src/fontMetricsCache.ts @@ -24,16 +24,11 @@ export type FontInfo = { /** * Measured font metrics from the Canvas API. * - * `ascent`/`descent` come from `actualBoundingBox*` on a sample string — these - * are the painted bounds used by downstream code for glyph-clipping guards. - * - * `naturalSingleLine` (optional) is the font's intrinsic single-line-height - * (what Word uses as the base for `w:lineRule="auto"` rendering). It comes - * either from a per-font calibration table (for fonts where Word's intrinsic - * metrics are known to diverge from the browser's) or, when the browser - * exposes it, from `fontBoundingBoxAscent + fontBoundingBoxDescent`. - * When undefined, callers fall back to `ascent + descent` or a - * multiplier-of-fontSize heuristic. + * `ascent`/`descent` are painted bounds from `actualBoundingBox*` (used for + * glyph-clipping guards). `naturalSingleLine` is the font's intrinsic + * single-line height used by `w:lineRule="auto"` — from the calibration + * table when present, else from `fontBoundingBox*` when the browser exposes + * it. Undefined signals "caller pick a fallback". */ export type FontMetricsResult = { ascent: number; @@ -42,22 +37,13 @@ export type FontMetricsResult = { }; /** - * Per-font calibration of natural single-line height, keyed by lowercased - * family name (first entry of a comma-separated stack). - * - * The multiplier is `fontSize × multiplier = naturalSingleLine` in px. These - * values were measured by rendering the font in Word and reading back the - * resulting row heights — the browser's Canvas `fontBoundingBox*` and - * `actualBoundingBox*` do not produce the same numbers because Word uses - * Microsoft's internal metrics from the actual font file (not available to - * the browser unless the font is bundled). + * Natural-single-line multipliers for fonts whose Word-rendered metrics + * differ from what the browser Canvas can measure. The browser does not have + * access to Microsoft's internal font metrics unless the font is bundled, + * so Aptos/Calibri/Cambria must be calibrated against Word's actual output. * - * Aptos (Microsoft's default since late 2023) and Calibri (previous default) - * both render at ~1.218–1.219 × fontSize natural single-line in Word. - * Fonts not in this table fall through to the browser's measured metrics. - * - * Add entries as document-corpus evidence accumulates. Values should match - * Word's output as measured from rendered PDFs at known font sizes. + * Keyed by primary family name lowercased. Add entries when corpus evidence + * shows a systematic gap between Canvas measurement and Word. */ const FONT_NATURAL_LINE_CALIBRATION: Record = { aptos: 1.218, @@ -67,10 +53,6 @@ const FONT_NATURAL_LINE_CALIBRATION: Record = { cambria: 1.219, }; -/** - * Parse a CSS font-family stack and return the first family name lowercased, - * stripped of quotes. Returns empty string for invalid/empty input. - */ const primaryFontFamily = (fontFamily: string): string => { if (typeof fontFamily !== 'string') return ''; const first = fontFamily.split(',')[0] ?? ''; @@ -80,10 +62,6 @@ const primaryFontFamily = (fontFamily: string): string => { .toLowerCase(); }; -/** - * Look up per-font natural-single-line calibration for a given family. - * Returns px value when the family has a calibration entry, undefined otherwise. - */ export const getCalibratedNaturalSingleLine = (fontFamily: string, fontSizePx: number): number | undefined => { const multiplier = FONT_NATURAL_LINE_CALIBRATION[primaryFontFamily(fontFamily)]; if (multiplier == null || !Number.isFinite(fontSizePx) || fontSizePx <= 0) return undefined; @@ -280,11 +258,9 @@ export function getFontMetrics( descent = fontInfo.fontSize * 0.2; } - // Natural single-line height: prefer explicit calibration for fonts where - // Word's intrinsic metrics differ from the browser's measurement, otherwise - // use the browser's `fontBoundingBox*` values when exposed, otherwise leave - // undefined and let the caller apply its own fallback. - let naturalSingleLine: number | undefined = getCalibratedNaturalSingleLine(fontInfo.fontFamily, fontInfo.fontSize); + // Prefer explicit Word-calibration; fall back to the browser's + // fontBoundingBox* when it's exposed; otherwise leave undefined. + let naturalSingleLine = getCalibratedNaturalSingleLine(fontInfo.fontFamily, fontInfo.fontSize); if ( naturalSingleLine == null && typeof textMetrics.fontBoundingBoxAscent === 'number' && diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index 4ffe88b3f5..d5e112db82 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -3537,77 +3537,36 @@ const appendSegment = ( }; /** - * Resolve the effective line height for a paragraph per ECMA-376 §17.18.48 - * (ST_LineSpacingRule). - * - * Previous implementation conflated `auto` with `atLeast` — both took the - * max-clamp branch, which meant the author's intent with `w:lineRule="auto" - * w:line="120"` (half-spacing) was silently inflated up to the natural - * single-line height. Per spec, `auto` has "no predetermined minimum or - * maximum" — only `atLeast` is max-clamped. - * - * | lineRule | Formula | - * |----------|------------------------------------------------------------------------| - * | auto | `lineHeight = target` (no floor, no ceiling) | - * | atLeast | `lineHeight = max(target, naturalSingle)` | - * | exact | `lineHeight = target` (clipped by caller if content taller) | - * - * This change is the minimal spec correction: the `target` computation - * below preserves the adapter's existing `lineUnit='multiplier'` convention - * (multiplier × fontSize) even though strict spec would multiply by natural - * single-line height. Migrating that convention is tracked as a follow-up - * (see SD-2735 description) because it requires coordinated changes across - * the adapter + many baseline tests. - * - * @param spacing Paragraph spacing from the resolved style cascade. - * @param fontSize Font size in px (used for unit-less and multiplier paths). - * @param naturalSingleLine Natural single-line height in px (ascent + descent - * from Canvas measurement). When not provided, - * falls back to `WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize`. - * @returns Effective line height in px. - * - * @see ECMA-376 Part 1 §17.18.48 ST_LineSpacingRule; §17.3.1.33 spacing. + * Resolve paragraph line height per ECMA-376 §17.18.48 (ST_LineSpacingRule). + * + * | lineRule | Result | + * |----------|---------------------------------------------------------| + * | auto | `target` — no min, no max | + * | atLeast | `max(target, naturalSingle)` | + * | exact | `target` (clipped by caller if content taller) | + * + * For `lineUnit='multiplier'`, `target = line × naturalSingle` — the adapter + * has already divided `w:line` by 240, so `line` is the dimensionless ratio. + * + * `naturalSingleLine` is floored at `WORD_SINGLE_LINE_SPACING_MULTIPLIER × + * fontSize` because many text fonts measure smaller than Word's Normal-style + * baseline; without the floor, unspaced paragraphs would render tighter than + * users expect. */ const resolveLineHeight = ( spacing: ParagraphSpacing | undefined, fontSize: number, naturalSingleLine: number = -1, ): number => { - // Natural single-line height. The caller passes ascent + descent from the - // Canvas measurement when available. We floor it at the historical - // `WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize` approximation so fonts - // whose actualBoundingBox is smaller than that (many text fonts) still - // render with the comfortable default spacing Word uses for its Normal - // style. This preserves pre-SD-2735 behaviour for unspaced paragraphs. const fallbackFloor = WORD_SINGLE_LINE_SPACING_MULTIPLIER * fontSize; const naturalSingle = naturalSingleLine > 0 ? Math.max(naturalSingleLine, fallbackFloor) : fallbackFloor; - // No explicit spacing → single-spacing (spec default). - if (!spacing || spacing.line == null) { - return naturalSingle; - } - - // Compute the target per the adapter's unit convention + ECMA-376 §17.18.48. - // - // For `lineUnit='multiplier'`, the adapter has already divided `w:line` by - // 240 (so `line` is the dimensionless ratio). Per spec, auto rule's result - // is `(line/240) × naturalSingle = line × naturalSingle`. We now multiply - // by the natural single-line height (font-calibrated when available) — this - // replaces the prior `line × fontSize` approximation, which coincidentally - // worked for fonts where naturalSingle ≈ fontSize × 1.0 (the `× 1.15` floor - // tricks the arithmetic out for the Normal-style case) but was wrong for - // Aptos/Calibri where naturalSingle ≈ fontSize × 1.218. - let target = spacing.line; - if (spacing.lineUnit === 'multiplier') { - target = target * naturalSingle; - } + if (!spacing || spacing.line == null) return naturalSingle; + const target = spacing.lineUnit === 'multiplier' ? spacing.line * naturalSingle : spacing.line; const rule = spacing.lineRule ?? 'auto'; - if (rule === 'atLeast') { - return Math.max(target, naturalSingle); - } - // `auto` and `exact`: no min-clamp (auto has "no predetermined minimum" - // per §17.18.48; exact is explicitly "shall be exactly the value specified"). + + if (rule === 'atLeast') return Math.max(target, naturalSingle); return target; }; diff --git a/packages/layout-engine/pm-adapter/src/converters/table.ts b/packages/layout-engine/pm-adapter/src/converters/table.ts index b1f559bcc5..658d5c312b 100644 --- a/packages/layout-engine/pm-adapter/src/converters/table.ts +++ b/packages/layout-engine/pm-adapter/src/converters/table.ts @@ -201,6 +201,38 @@ const normalizeRowHeight = (rowProps?: Record): NormalizedRowHe }; }; +/** + * Apply Word's TableGrid pPr cascade to table-cell paragraphs (ECMA-376 + * §17.3.1.33). + * + * The super-converter resolves paragraph attrs at DOCX-import time before the + * table context is known, so paragraphs inside table cells inherit spacing + * from docDefaults (`w:line="278"` ≈ 1.15 multiplier) instead of TableGrid's + * `w:line="240"` (= 1.0 multiplier). We rewrite the line multiplier here + * where the table context is available. + * + * Only paragraphs whose source PM node had no explicit `` are + * adjusted — explicit spacing is the author's intent. Only the auto-multiplier + * form is touched; atLeast/exact are absolute values. + * + * TODO(SD-2735 cascade): move this to super-converter / style-engine so the + * cascade is applied once at DOCX-import time for all consumers. + */ +const applyTableGridSpacingCascade = (paragraphBlocks: FlowBlock[], sourceChildNode?: PMNode): void => { + if (!sourceChildNode) return; + const sourceParaProps = (sourceChildNode.attrs as { paragraphProperties?: { spacing?: unknown } } | undefined) + ?.paragraphProperties; + if (sourceParaProps?.spacing) return; + + for (const block of paragraphBlocks) { + if (block.kind !== 'paragraph' || !block.attrs) continue; + const spacing = block.attrs.spacing; + if (spacing?.lineUnit === 'multiplier' && spacing.lineRule === 'auto') { + block.attrs.spacing = { ...spacing, line: 1.0 }; + } + } +}; + /** * Parse a ProseMirror table cell node into a TableCell block. * @@ -307,17 +339,6 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { const paragraphToFlowBlocks = context.converters.paragraphToFlowBlocks; const tableNodeToBlock = context.converters?.tableNodeToBlock; - /** - * Appends converted paragraph blocks to the cell's blocks array. - * - * This helper: - * 1. Applies SDT metadata to paragraph blocks (for structured content inheritance) - * 2. Filters to only include supported block types (paragraph, image, drawing) - * 3. Appends the filtered blocks to the cell's blocks array - * - * @param paragraphBlocks - The converted flow blocks from a paragraph node - * @param sdtMetadata - Optional SDT metadata to apply (from parent structuredContentBlock) - */ const appendParagraphBlocks = ( paragraphBlocks: FlowBlock[], sdtMetadata?: ReturnType, @@ -327,38 +348,8 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { paragraphBlocks.filter((block) => block.kind === 'paragraph') as ParagraphBlock[], sdtMetadata, ); - // Apply TableGrid pPr cascade for table-cell paragraphs (ECMA-376 - // §17.3.1.33). The super-converter resolves paragraph attrs at DOCX-import - // time before the table context is known, so paragraphs inside table cells - // currently inherit spacing from docDefaults (`w:line="278"` ≈ 1.15 - // multiplier) instead of TableGrid's `w:line="240"` (= 1.0 multiplier). - // - // We rewrite the line multiplier here where the table context is - // available. Only paragraphs that did NOT carry an explicit `` - // in their pPr are adjusted — those that did are preserved as-is because - // the author's intent was explicit. We detect "no explicit spacing" by - // the absence of `paragraphProperties.spacing` on the source PM node. - // - // TODO(SD-2735 cascade): move this to super-converter / style-engine so - // the cascade is applied once at DOCX-import time for all consumers. - const tableStyleId = tableInfo?.tableProperties?.tableStyleId; - if (tableStyleId && sourceChildNode) { - const sourceParaProps = (sourceChildNode.attrs as { paragraphProperties?: { spacing?: unknown } } | undefined) - ?.paragraphProperties; - const hasExplicitSpacing = !!sourceParaProps?.spacing; - if (!hasExplicitSpacing) { - for (const block of paragraphBlocks) { - if (block.kind !== 'paragraph') continue; - const attrs = (block as ParagraphBlock).attrs as - | { spacing?: { line?: number; lineUnit?: string; lineRule?: string } } - | undefined; - // Only adjust when the inherited spacing is the auto-multiplier - // form. atLeast/exact are absolute and shouldn't be overridden. - if (attrs?.spacing && attrs.spacing.lineUnit === 'multiplier' && attrs.spacing.lineRule === 'auto') { - attrs.spacing = { ...attrs.spacing, line: 1.0 }; - } - } - } + if (tableInfo?.tableProperties?.tableStyleId) { + applyTableGridSpacingCascade(paragraphBlocks, sourceChildNode); } paragraphBlocks.forEach((block) => { if (block.kind === 'paragraph' || block.kind === 'image' || block.kind === 'drawing') { From 6d033faccf7f95a511c4ce26898d2edafc7c4aff Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Thu, 23 Apr 2026 23:10:42 -0300 Subject: [PATCH 08/12] fix(layout-engine): skip empty sectPr marker before any section break (SD-2735) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per ECMA-376 §17.6.17, a inside a paragraph's defines where the section ends — the enclosing paragraph is a section- properties container, not a visible paragraph. Word renders these marker paragraphs at zero height regardless of section-break type. Previously we only skipped the marker when the next break forced a page boundary (nextPage/evenPage/oddPage). For continuous breaks the marker rendered at full docDefaults height (~35 px body + spacing-after), which on IT-945 cost a row per column on page 1 (the intro area consumed 72 px vs Word's 33 px, losing ~20 px of column budget). Extends the skip to continuous section breaks too. Browser-verified on it-945-numbered.docx: page 1 now balances 42+42 rows, page 2 16+14, matching Word's structural layout (41+41 / 17+15) within a single-row margin. Tests: - New regression: 'skips empty sectPr marker paragraph before continuous section break (SD-2735)' asserts the marker and its full-height measure don't appear in the layout. - Existing forced-page-break test unchanged. - layout-engine 150/150 pass; pm-adapter 1739/1739; measuring-dom 251/251. --- .../layout-engine/src/index.test.ts | 42 +++++++++++++++++++ .../layout-engine/layout-engine/src/index.ts | 26 ++++++------ 2 files changed, 54 insertions(+), 14 deletions(-) diff --git a/packages/layout-engine/layout-engine/src/index.test.ts b/packages/layout-engine/layout-engine/src/index.test.ts index 8033813953..d32afef762 100644 --- a/packages/layout-engine/layout-engine/src/index.test.ts +++ b/packages/layout-engine/layout-engine/src/index.test.ts @@ -2420,6 +2420,48 @@ describe('layoutDocument', () => { expect(layout.pages[1].fragments[0].blockId).toBe('p2'); }); + it('skips empty sectPr marker paragraph before continuous section break (SD-2735)', () => { + // IT-945 shape: "This is my first section" + empty sectPr-marker + + // continuous break into 2-col section. The marker is logically a + // section-properties container, not a visible paragraph. Rendering it + // at docDefaults height would cost a row per column on page 1. + const blocks: FlowBlock[] = [ + { kind: 'paragraph', id: 'p1', runs: [{ text: 'First section', fontFamily: 'Arial', fontSize: 12 }] }, + { + kind: 'paragraph', + id: 'p-marker', + runs: [{ text: '', fontFamily: 'Arial', fontSize: 12 }], + attrs: { sectPrMarker: true }, + }, + { + kind: 'sectionBreak', + id: 'sb1', + type: 'continuous', + margins: {}, + pageSize: { w: 612, h: 792 }, + columns: { count: 2, gap: 36 }, + attrs: { source: 'sectPr' }, + }, + { kind: 'paragraph', id: 'p2', runs: [{ text: 'In 2-col section', fontFamily: 'Arial', fontSize: 12 }] }, + ]; + const measures: Measure[] = [ + { kind: 'paragraph', lines: [makeLine(20)], totalHeight: 20 }, + { kind: 'paragraph', lines: [makeLine(16)], totalHeight: 16 }, + { kind: 'sectionBreak' }, + { kind: 'paragraph', lines: [makeLine(20)], totalHeight: 20 }, + ]; + + const layout = layoutDocument(blocks, measures, { + pageSize: { w: 612, h: 792 }, + margins: { top: 72, right: 72, bottom: 72, left: 72 }, + }); + + // Both visible paragraphs land on page 1; the marker does not appear. + expect(layout.pages).toHaveLength(1); + const fragmentIds = layout.pages[0].fragments.map((f) => f.blockId); + expect(fragmentIds).toEqual(['p1', 'p2']); + }); + it('skips empty sectPr marker paragraph before forced section break', () => { const blocks: FlowBlock[] = [ { kind: 'paragraph', id: 'p1', runs: [{ text: 'Content', fontFamily: 'Arial', fontSize: 12 }] }, diff --git a/packages/layout-engine/layout-engine/src/index.ts b/packages/layout-engine/layout-engine/src/index.ts index 8776c0cfd4..9c95fe44e4 100644 --- a/packages/layout-engine/layout-engine/src/index.ts +++ b/packages/layout-engine/layout-engine/src/index.ts @@ -1987,25 +1987,23 @@ export function layoutDocument(blocks: FlowBlock[], measures: Measure[], options if (isEmpty) { const isSectPrMarker = paraBlock.attrs?.sectPrMarker === true; - // Check if previous block was pageBreak and next block is sectionBreak const prevBlock = index > 0 ? blocks[index - 1] : null; const nextBlock = index < blocks.length - 1 ? blocks[index + 1] : null; - - const nextSectionBreak = nextBlock?.kind === 'sectionBreak' ? (nextBlock as SectionBreakBlock) : null; - const nextBreakType = - nextSectionBreak?.type ?? (nextSectionBreak?.attrs?.source === 'sectPr' ? 'nextPage' : undefined); - const nextBreakForcesPage = - nextSectionBreak && - (nextBreakType === 'nextPage' || - nextBreakType === 'evenPage' || - nextBreakType === 'oddPage' || - nextSectionBreak.attrs?.requirePageBoundary === true); - - if (isSectPrMarker && nextBreakForcesPage) { + const nextIsSectionBreak = nextBlock?.kind === 'sectionBreak'; + + // A section-break marker paragraph (`` with + // no runs) carries only section metadata in Word — it's not a visible + // paragraph. Skip it regardless of break type (nextPage/continuous/ + // evenPage/oddPage) so intro spacing matches Word for all section + // transitions. Per ECMA-376 §17.6.17 the sectPr defines where the + // section ends, not a rendered paragraph. + if (isSectPrMarker && nextIsSectionBreak) { continue; } - if (prevBlock?.kind === 'pageBreak' && nextBlock?.kind === 'sectionBreak') { + // Pre-sectPrMarker path: empty paragraph sandwiched between an + // explicit page break and a section break — same "pure marker" shape. + if (prevBlock?.kind === 'pageBreak' && nextIsSectionBreak) { continue; } } From 88b8b08525410014bf0013b6b726c6f76a2969f2 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Fri, 8 May 2026 08:59:00 -0300 Subject: [PATCH 09/12] fix(pm-adapter): drop 1.15 fudge from auto-spacing normalization (SD-2735) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit normalizeLineValue now emits the spec-canonical (w:line/240) ratio for lineRule=auto. Previously it baked an extra 1.15 multiplier so that measuring-dom's pre-§17.18.48 formula 'mult × fontSize' coincidentally matched naturalSingle; with measuring-dom now doing 'mult × naturalSingle' end-to-end, the fudge was double-applying — w:line=360 produced 1.725 × 18.4 = 31.7 px instead of 1.5 × 18.4 = 27.6 px. The default for empty drops from 1.15 to 1.0 (single-spaced) for the same reason. The pre-existing 1.15 multiplier for w:beforeAutospacing / w:afterAutospacing remains — that controls auto-spacing-before/after twips, a separate spec concern from line-height resolution. Tests: spacing-indent expectations updated to spec values; new comments reference §17.18.48 to keep the rationale local. --- .../src/attributes/spacing-indent.test.ts | 12 +++++- .../src/attributes/spacing-indent.ts | 39 +++++++++++++------ 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/packages/layout-engine/pm-adapter/src/attributes/spacing-indent.test.ts b/packages/layout-engine/pm-adapter/src/attributes/spacing-indent.test.ts index 0ad57727a7..4e9b804a59 100644 --- a/packages/layout-engine/pm-adapter/src/attributes/spacing-indent.test.ts +++ b/packages/layout-engine/pm-adapter/src/attributes/spacing-indent.test.ts @@ -45,7 +45,12 @@ describe('normalizeParagraphSpacing', () => { it('converts auto line values > 10 from 240ths of a line', () => { const spacing = { line: 360, lineRule: 'auto' as const } as ParagraphSpacing; // 1.5x const result = normalizeParagraphSpacing(spacing, false); - expect(result?.line).toBeCloseTo(1.725, 5); + // Per ECMA-376 §17.18.48: w:line=360 with lineRule=auto means 360/240 = 1.5 + // multiplier of natural single-line height. The pre-fix code multiplied by an + // extra 1.15 so measuring-dom (which used to do mult × fontSize) coincidentally + // matched naturalSingle. Now that measuring-dom does mult × naturalSingle, the + // 1.15 fudge would double-apply. + expect(result?.line).toBeCloseTo(1.5, 5); expect(result?.lineRule).toBe('auto'); }); @@ -83,7 +88,10 @@ describe('normalizeParagraphSpacing', () => { it('returns undefined for empty or invalid inputs', () => { expect(normalizeParagraphSpacing(undefined, false)).toBeUndefined(); expect(normalizeParagraphSpacing(null as never, false)).toBeUndefined(); - expect(normalizeParagraphSpacing({} as ParagraphSpacing, false)).toEqual({ line: 1.15, lineUnit: 'multiplier' }); + // Empty spacing → single-spacing (1.0 × naturalSingle) per spec. The pre-fix + // default of 1.15 was load-bearing only because measuring-dom didn't have the + // naturalSingle concept; with the §17.18.48 model the canonical default is 1.0. + expect(normalizeParagraphSpacing({} as ParagraphSpacing, false)).toEqual({ line: 1.0, lineUnit: 'multiplier' }); }); it('skips non-numeric values but preserves valid ones', () => { diff --git a/packages/layout-engine/pm-adapter/src/attributes/spacing-indent.ts b/packages/layout-engine/pm-adapter/src/attributes/spacing-indent.ts index 426e10ad03..b85d0eb531 100644 --- a/packages/layout-engine/pm-adapter/src/attributes/spacing-indent.ts +++ b/packages/layout-engine/pm-adapter/src/attributes/spacing-indent.ts @@ -9,8 +9,18 @@ import type { ParagraphAttrs, ParagraphSpacing } from '@superdoc/contracts'; import type { ParagraphSpacing as OoxmlParagraphSpacing } from '@superdoc/style-engine/ooxml'; import { twipsToPx, pickNumber } from '../utilities.js'; -const AUTO_SPACING_DEFAULT_MULTIPLIER = 1.15; - +// Per ECMA-376 §17.18.48 (ST_LineSpacingRule), `lineRule="auto"` means: +// `lineHeight = (w:line / 240) × naturalSingleLineHeight`. +// `w:line/240` is a dimensionless ratio of the font's intrinsic single-line +// height; the multiplication against natural-single happens in measuring-dom +// (`resolveLineHeight`). Earlier versions baked an extra 1.15 here so that +// measuring-dom's pre-§17.18.48 formula `mult × fontSize` coincidentally +// produced approximately the right pixels for Word's Normal style. After +// SD-2735 measuring-dom does `mult × naturalSingle` end-to-end; the fudge +// would double-apply, so we keep this normalization at the spec-canonical +// pure ratio. +const SINGLE_LINE_AUTO_DEFAULT = 1.0; +const AUTO_SPACING_BEFORE_AFTER_DEFAULT_MULTIPLIER = 1.15; const AUTO_SPACING_LINE_DEFAULT = 240; // Default OOXML auto line spacing in twips /** @@ -104,14 +114,14 @@ export const normalizeParagraphSpacing = ( if (isList) { before = undefined; } else { - before = (lineRaw ?? AUTO_SPACING_LINE_DEFAULT) * AUTO_SPACING_DEFAULT_MULTIPLIER; + before = (lineRaw ?? AUTO_SPACING_LINE_DEFAULT) * AUTO_SPACING_BEFORE_AFTER_DEFAULT_MULTIPLIER; } } if (afterAutospacing) { if (isList) { after = undefined; } else { - after = (lineRaw ?? AUTO_SPACING_LINE_DEFAULT) * AUTO_SPACING_DEFAULT_MULTIPLIER; + after = (lineRaw ?? AUTO_SPACING_LINE_DEFAULT) * AUTO_SPACING_BEFORE_AFTER_DEFAULT_MULTIPLIER; } } @@ -127,23 +137,28 @@ export const normalizeParagraphSpacing = ( }; /** - * Normalizes line spacing value based on line rule. - * Converts OOXML line spacing values to a multiplier of font size. - * @param value - OOXML line spacing value in twips + * Normalizes the OOXML `w:line` value into the canonical pipeline format + * consumed by `measuring-dom`. Per ECMA-376 §17.18.48 (ST_LineSpacingRule): + * + * - `auto`: `w:line` is in 240ths-of-a-line. Output is the dimensionless + * ratio `value/240`; measuring-dom multiplies it by the font's natural + * single-line height. + * - `atLeast`/`exact`: `w:line` is in twentieths-of-a-point. Output is in + * pixels (twips → px). + * - missing rule: treat like `auto` per OOXML default but emit the pure + * ratio (no fudge). + * + * @param value - OOXML line spacing value * @param lineRule - Line rule ('auto', 'exact', 'atLeast') - * @returns Normalized line spacing value as a multiplier, or undefined */ export const normalizeLineValue = ( value: number | undefined, lineRule: ParagraphSpacing['lineRule'] | undefined, ): { value: number; unit: 'multiplier' | 'px' } => { - if (value == null) return { value: AUTO_SPACING_DEFAULT_MULTIPLIER, unit: 'multiplier' }; + if (value == null) return { value: SINGLE_LINE_AUTO_DEFAULT, unit: 'multiplier' }; if (lineRule == 'exact' || lineRule == 'atLeast') { return { value: twipsToPx(value), unit: 'px' }; } - if (lineRule === 'auto') { - return { value: (value * AUTO_SPACING_DEFAULT_MULTIPLIER) / AUTO_SPACING_LINE_DEFAULT, unit: 'multiplier' }; - } return { value: value / AUTO_SPACING_LINE_DEFAULT, unit: 'multiplier' }; }; From d91d748dd085a509f29ebb56301299445b5f960b Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Fri, 8 May 2026 08:59:13 -0300 Subject: [PATCH 10/12] refactor(pm-adapter): remove redundant TableGrid spacing override (SD-2735) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit applyTableGridSpacingCascade was a workaround that flattened any table-cell paragraph (without inline spacing) to spacing.line=1.0 whenever the table had a tableStyleId. With the spec-correct pm-adapter normalization in place and the style-engine cascade already including table-style paragraphProperties (resolveParagraphProperties threads tableInfo through to the paragraphProperties chain), the workaround is redundant — and its gate was too broad: any non-TableGrid custom table style with an intentional line multiplier was silently flattened to single-spacing. Removing the helper restores spec semantics for both cases: - TableGrid (line=240 auto) → 1.0 × naturalSingle via cascade - Custom-style (e.g. line=360 auto) → 1.5 × naturalSingle, preserved Adds a regression test using a CustomLineSpacing table style with an inline 1.5× multiplier; the cell paragraph keeps its multiplier post-cascade. --- .../pm-adapter/src/converters/table.test.ts | 96 +++++++++++++++++++ .../pm-adapter/src/converters/table.ts | 38 +------- 2 files changed, 97 insertions(+), 37 deletions(-) diff --git a/packages/layout-engine/pm-adapter/src/converters/table.test.ts b/packages/layout-engine/pm-adapter/src/converters/table.test.ts index 19b1a5496d..a1dbe20e25 100644 --- a/packages/layout-engine/pm-adapter/src/converters/table.test.ts +++ b/packages/layout-engine/pm-adapter/src/converters/table.test.ts @@ -2362,3 +2362,99 @@ describe('tableCellNodeToBlock — SD-2516: documentPartObject children', () => expect((cellBlocks[0] as ParagraphBlock).runs[0].text).toBe('Inner DPO'); }); }); + +// SD-2735 P2: previously every table-cell paragraph in a styled table was +// flattened to spacing.line=1.0 by a TableGrid-only workaround that the +// converter applied to ANY tableStyleId. Custom table styles (e.g. a +// "1.5×-spacing" table style) lost their explicit multiplier. With the +// pm-adapter normalization producing spec-correct ratios end-to-end, the +// style-engine cascade already pulls the table style's pPr.spacing — the +// workaround is redundant and must not flatten cell paragraphs whose source +// has no inline spacing. +describe('parseTableCell — SD-2735 P2: preserves custom table-style line multiplier', () => { + const mockBlockIdGenerator: BlockIdGenerator = vi.fn((kind) => `test-${kind}`); + const mockPositionMap: PositionMap = new Map(); + + it('does not flatten a 1.5× line multiplier produced by a custom table-style cascade', () => { + // mockParagraphConverter simulates the cascade output: a cell paragraph + // whose source had no explicit `` but whose tableStyle pPr + // resolves to `line=360 lineRule=auto` (= 1.5× multiplier post-fix). + const mockParagraphConverter = vi.fn( + (params: { para: PMNode }) => + [ + { + kind: 'paragraph', + id: 'p1', + attrs: { + spacing: { line: 1.5, lineUnit: 'multiplier', lineRule: 'auto' }, + }, + runs: [ + { + text: (params.para.content?.[0] as { text?: string } | undefined)?.text || 'cell', + fontFamily: 'Arial', + fontSize: 12, + }, + ], + }, + ] as ParagraphBlock[], + ); + + const node: PMNode = { + type: 'table', + attrs: { + tableStyleId: 'CustomLineSpacing', + tableProperties: { tableStyleId: 'CustomLineSpacing', tblLook: { noHBand: true, noVBand: true } }, + }, + content: [ + { + type: 'tableRow', + content: [ + { + type: 'tableCell', + // Source paragraph has NO `` — explicit-spacing guard + // does not apply, so the workaround would flatten this to 1.0. + content: [{ type: 'paragraph', content: [{ type: 'text', text: 'Cell' }] }], + }, + ], + }, + ], + }; + + // Register the custom style so `resolveExistingTableEffectiveStyleId` + // resolves `tableStyleId` to a non-null value and the cascade gate fires. + const converterContext: ConverterContext = { + ...DEFAULT_CONVERTER_CONTEXT, + translatedLinkedStyles: { + ...DEFAULT_CONVERTER_CONTEXT.translatedLinkedStyles, + styles: { + CustomLineSpacing: { + type: 'table', + paragraphProperties: { spacing: { line: 360, lineRule: 'auto' } }, + tableProperties: {}, + }, + }, + }, + }; + + const result = tableNodeToBlock( + node, + mockBlockIdGenerator, + mockPositionMap, + 'Arial', + 16, + undefined, + undefined, + undefined, + undefined, + mockParagraphConverter, + converterContext, + ) as TableBlock; + + const cell = result.rows[0].cells[0]; + const cellBlocks = cell.blocks ?? (cell.paragraph ? [cell.paragraph] : []); + const para = cellBlocks[0] as ParagraphBlock; + expect(para.attrs?.spacing?.line).toBe(1.5); + expect(para.attrs?.spacing?.lineUnit).toBe('multiplier'); + expect(para.attrs?.spacing?.lineRule).toBe('auto'); + }); +}); diff --git a/packages/layout-engine/pm-adapter/src/converters/table.ts b/packages/layout-engine/pm-adapter/src/converters/table.ts index efd0d9aacf..4ac5ec77ae 100644 --- a/packages/layout-engine/pm-adapter/src/converters/table.ts +++ b/packages/layout-engine/pm-adapter/src/converters/table.ts @@ -227,38 +227,6 @@ const normalizeRowHeight = (rowProps?: Record): NormalizedRowHe }; }; -/** - * Apply Word's TableGrid pPr cascade to table-cell paragraphs (ECMA-376 - * §17.3.1.33). - * - * The super-converter resolves paragraph attrs at DOCX-import time before the - * table context is known, so paragraphs inside table cells inherit spacing - * from docDefaults (`w:line="278"` ≈ 1.15 multiplier) instead of TableGrid's - * `w:line="240"` (= 1.0 multiplier). We rewrite the line multiplier here - * where the table context is available. - * - * Only paragraphs whose source PM node had no explicit `` are - * adjusted — explicit spacing is the author's intent. Only the auto-multiplier - * form is touched; atLeast/exact are absolute values. - * - * TODO(SD-2735 cascade): move this to super-converter / style-engine so the - * cascade is applied once at DOCX-import time for all consumers. - */ -const applyTableGridSpacingCascade = (paragraphBlocks: FlowBlock[], sourceChildNode?: PMNode): void => { - if (!sourceChildNode) return; - const sourceParaProps = (sourceChildNode.attrs as { paragraphProperties?: { spacing?: unknown } } | undefined) - ?.paragraphProperties; - if (sourceParaProps?.spacing) return; - - for (const block of paragraphBlocks) { - if (block.kind !== 'paragraph' || !block.attrs) continue; - const spacing = block.attrs.spacing; - if (spacing?.lineUnit === 'multiplier' && spacing.lineRule === 'auto') { - block.attrs.spacing = { ...spacing, line: 1.0 }; - } - } -}; - /** * Parse a ProseMirror table cell node into a TableCell block. * @@ -368,15 +336,11 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { const appendParagraphBlocks = ( paragraphBlocks: FlowBlock[], sdtMetadata?: ReturnType, - sourceChildNode?: PMNode, ) => { applySdtMetadataToParagraphBlocks( paragraphBlocks.filter((block) => block.kind === 'paragraph') as ParagraphBlock[], sdtMetadata, ); - if (tableInfo?.tableProperties?.tableStyleId) { - applyTableGridSpacingCascade(paragraphBlocks, sourceChildNode); - } paragraphBlocks.forEach((block) => { if (block.kind === 'paragraph' || block.kind === 'image' || block.kind === 'drawing') { blocks.push(block); @@ -459,7 +423,7 @@ const parseTableCell = (args: ParseTableCellArgs): TableCell | null => { converters: context.converters, enableComments: context.enableComments, }); - appendParagraphBlocks(paragraphBlocks, undefined, childNode); + appendParagraphBlocks(paragraphBlocks); continue; } From 3dfb31b01c3123d97b819188dcc9a19eb153fa8d Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Fri, 8 May 2026 08:59:26 -0300 Subject: [PATCH 11/12] fix(measuring-dom): plumb naturalSingle through empty-paragraph metrics (SD-2735) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit calculateEmptyParagraphMetrics did not forward the calibrated naturalSingleLine to resolveLineHeight; it relied on resolveLineHeight's fallback floor (WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize). For uncalibrated fonts the gap was invisible because the floor coincides with naturalSingle. For Aptos/Calibri (where calibrated naturalSingle ≈ 19.5 px at 12 pt vs. 18.4 px floor), empty cells rendered shorter than text-bearing cells under the same auto multiplier. Symmetric with calculateTypographyMetrics: when fontInfo is present, read metrics.naturalSingleLine and pass it through; otherwise keep the existing fallback. New regression test covers Aptos at 12 pt with line=1.5 auto. --- .../measuring/dom/src/index.test.ts | 36 +++++++++++++++++++ .../layout-engine/measuring/dom/src/index.ts | 12 +++++-- 2 files changed, 46 insertions(+), 2 deletions(-) diff --git a/packages/layout-engine/measuring/dom/src/index.test.ts b/packages/layout-engine/measuring/dom/src/index.test.ts index 6bbd99bb56..cf3ddcc6ad 100644 --- a/packages/layout-engine/measuring/dom/src/index.test.ts +++ b/packages/layout-engine/measuring/dom/src/index.test.ts @@ -1175,6 +1175,42 @@ describe('measureBlock', () => { expect(measure.lines[0].lineHeight).toBeCloseTo(0.5 * baseNaturalSingle, 1); }); + it('applies multiplier × naturalSingle to empty paragraphs in calibrated fonts (SD-2735)', async () => { + // Caio's third PR-review repro (arial-empty-rows.docx): empty cell + // paragraphs go through `calculateEmptyParagraphMetrics`, which does + // not plumb the font's calibrated `naturalSingleLine` through to + // `resolveLineHeight`. For uncalibrated fonts the bug hides because + // naturalSingle ≈ WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize + // (18.4 px at 16 px) matches the fallback floor inside resolveLineHeight. + // For Aptos (calibrated to ~19.5 px at 16 px), the gap surfaces: + // empty rows measure 1.15 × fontSize instead of the calibrated value, + // so empty cells render shorter than text-bearing cells in the same + // table. The fix is symmetric with `calculateTypographyMetrics`: + // populate naturalSingle from the calibration table when fontInfo is + // present, and forward it to resolveLineHeight. + const fontSize = 16; // 12 pt + const block: FlowBlock = { + kind: 'paragraph', + id: 'empty-aptos', + runs: [ + { + text: '', + fontFamily: 'Aptos', + fontSize, + }, + ], + attrs: { + spacing: { line: 1.5, lineUnit: 'multiplier', lineRule: 'auto' }, + }, + }; + + const measure = expectParagraphMeasure(await measureBlock(block, 400)); + // Aptos calibrated naturalSingle ≈ 19.488 px (see fontMetricsCache.test.ts). + // 1.5 × 19.488 ≈ 29.2. + const aptosNaturalSingle = 19.488; + expect(measure.lines[0].lineHeight).toBeCloseTo(1.5 * aptosNaturalSingle, 0); + }); + it('ensures line height is never smaller than glyph bounds to prevent clipping', async () => { // This test verifies the clamp: Math.max(fontSize * 1.15, ascent + descent) // For any font, line height must be >= ascent + descent to prevent glyph overlap diff --git a/packages/layout-engine/measuring/dom/src/index.ts b/packages/layout-engine/measuring/dom/src/index.ts index f78bd1cc53..87a129c28a 100644 --- a/packages/layout-engine/measuring/dom/src/index.ts +++ b/packages/layout-engine/measuring/dom/src/index.ts @@ -509,6 +509,13 @@ function calculateEmptyParagraphMetrics( const resolvedFontSize = normalizeFontSize(fontSize); let ascent: number; let descent: number; + // Symmetric with `calculateTypographyMetrics`: when fontInfo is present, + // forward the calibrated `naturalSingleLine` so `resolveLineHeight` scales + // a multiplier off Word's intrinsic single-line height (Aptos/Calibri etc.) + // instead of the WORD_SINGLE_LINE_SPACING_MULTIPLIER × fontSize fallback. + // Otherwise empty cells render shorter than text-bearing cells under the + // same auto multiplier. + let naturalSingle: number | undefined; if ( fontInfo && @@ -520,14 +527,15 @@ function calculateEmptyParagraphMetrics( const metrics = getFontMetrics(ctx, fontInfo, measurementConfig.mode, measurementConfig.fonts); ascent = roundValue(metrics.ascent); descent = roundValue(metrics.descent); + naturalSingle = metrics.naturalSingleLine; } else { ascent = roundValue(resolvedFontSize * 0.8); descent = roundValue(resolvedFontSize * 0.2); } // Word treats empty paragraphs as a single font-sized line unless line spacing is explicitly set. - const maxLineHeight = Math.max(resolvedFontSize, ascent + descent); - const lineHeight = roundValue(resolveLineHeight(spacing, resolvedFontSize, maxLineHeight)); + const fallbackNaturalSingle = Math.max(resolvedFontSize, ascent + descent); + const lineHeight = roundValue(resolveLineHeight(spacing, resolvedFontSize, naturalSingle ?? fallbackNaturalSingle)); return { ascent, From 098754482306fb89d5f6cda7a55ab03151a8f243 Mon Sep 17 00:00:00 2001 From: Tadeu Tupinamba Date: Fri, 8 May 2026 09:02:40 -0300 Subject: [PATCH 12/12] test(style-engine): pin TableGrid cascade overrides docDefaults line spacing (SD-2735) Documents the cascade behavior pm-adapter now relies on after removing applyTableGridSpacingCascade: a TableGrid-styled table cell with no inline w:spacing must resolve to TableGrid's line=240, not docDefaults' line=276. Pins the test that proves the hack's removal is safe. --- .../style-engine/src/ooxml/index.test.ts | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/packages/layout-engine/style-engine/src/ooxml/index.test.ts b/packages/layout-engine/style-engine/src/ooxml/index.test.ts index 7cd3505210..52c3a7cad2 100644 --- a/packages/layout-engine/style-engine/src/ooxml/index.test.ts +++ b/packages/layout-engine/style-engine/src/ooxml/index.test.ts @@ -389,6 +389,40 @@ describe('ooxml - resolveParagraphProperties', () => { expect(result.spacing).toEqual({ before: 120, after: 240 }); expect(result.keepNext).toBe(true); }); + + it('lets a table style override docDefaults line spacing for cell paragraphs without inline spacing (SD-2735)', () => { + // Pins the cascade path that pm-adapter's old `applyTableGridSpacingCascade` + // hack was working around: Word's Normal template sets docDefaults + // `` (≈ 1.15 multiplier) and + // TableGrid sets `` (= 1.0 + // multiplier). For a cell paragraph with no inline spacing, the cascade + // must yield TableGrid's 240, not docDefaults' 276 — otherwise table + // rows render ~5 % taller than Word. + const params = buildParams({ + translatedLinkedStyles: { + ...emptyStyles, + docDefaults: { paragraphProperties: { spacing: { line: 276, lineRule: 'auto' } } }, + styles: { + Normal: { default: true, paragraphProperties: {} }, + TableGrid: { + type: 'table', + paragraphProperties: { spacing: { line: 240, lineRule: 'auto' } }, + tableProperties: {}, + }, + }, + }, + }); + const tableInfo = { + tableProperties: { tableStyleId: 'TableGrid', tblLook: { noHBand: true, noVBand: true } }, + rowIndex: 0, + cellIndex: 0, + numRows: 1, + numCells: 1, + }; + const result = resolveParagraphProperties(params, {}, tableInfo); + expect(result.spacing?.line).toBe(240); + expect(result.spacing?.lineRule).toBe('auto'); + }); }); describe('ooxml - resolveCellStyles', () => {