diff --git a/packages/core/package.json b/packages/core/package.json index 682bb74b5a..fde4d8f08c 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -107,23 +107,12 @@ "@tiptap/pm": "^3.13.0", "emoji-mart": "^5.6.0", "fast-deep-equal": "^3.1.3", - "hast-util-from-dom": "^5.0.1", "prosemirror-highlight": "^0.15.1", "prosemirror-model": "^1.25.4", "prosemirror-state": "^1.4.4", "prosemirror-tables": "^1.8.3", "prosemirror-transform": "^1.11.0", "prosemirror-view": "^1.41.4", - "rehype-format": "^5.0.1", - "rehype-parse": "^9.0.1", - "rehype-remark": "^10.0.1", - "rehype-stringify": "^10.0.1", - "remark-gfm": "^4.0.1", - "remark-parse": "^11.0.0", - "remark-rehype": "^11.1.2", - "remark-stringify": "^11.0.0", - "unified": "^11.0.5", - "unist-util-visit": "^5.0.0", "uuid": "^8.3.2", "y-prosemirror": "^1.3.7", "y-protocols": "^1.0.6", @@ -131,7 +120,6 @@ }, "devDependencies": { "@types/emoji-mart": "^3.0.14", - "@types/hast": "^3.0.4", "@types/uuid": "^8.3.4", "eslint": "^8.57.1", "jsdom": "^25.0.1", diff --git a/packages/core/src/api/exporters/markdown/htmlToMarkdown.ts b/packages/core/src/api/exporters/markdown/htmlToMarkdown.ts new file mode 100644 index 0000000000..ca9266ef84 --- /dev/null +++ b/packages/core/src/api/exporters/markdown/htmlToMarkdown.ts @@ -0,0 +1,681 @@ +/** + * Custom HTML-to-Markdown serializer for BlockNote. + * Replaces the unified/rehype-remark pipeline with a direct DOM-based implementation. + * + * Input: HTML string from createExternalHTMLExporter + * Output: GFM-compatible markdown string + */ + +/** + * Convert an HTML string (from BlockNote's external HTML exporter) to markdown. + */ +export function htmlToMarkdown(html: string): string { + // Use a temporary element to parse HTML. This works in both browser and + // server (JSDOM) environments, unlike `new DOMParser()` which may not be + // globally available in Node.js. + const container = document.createElement("div"); + container.innerHTML = html; + const result = serializeChildren(container, { indent: "", inList: false }); + return result.trim() + "\n"; +} + +interface SerializeContext { + indent: string; // current indentation prefix for list nesting + inList: boolean; // whether we're inside a list +} + +// ─── Main Serializer ───────────────────────────────────────────────────────── + +function serializeChildren(node: Node, ctx: SerializeContext): string { + let result = ""; + const children = Array.from(node.childNodes); + + for (let i = 0; i < children.length; i++) { + const child = children[i]; + result += serializeNode(child, ctx); + } + + return result; +} + +function serializeNode(node: Node, ctx: SerializeContext): string { + if (node.nodeType === 3 /* Node.TEXT_NODE */) { + return node.textContent || ""; + } + + if (node.nodeType !== 1 /* Node.ELEMENT_NODE */) { + return ""; + } + + const el = node as HTMLElement; + const tag = el.tagName.toLowerCase(); + + switch (tag) { + case "p": + return serializeParagraph(el, ctx); + case "h1": + case "h2": + case "h3": + case "h4": + case "h5": + case "h6": + return serializeHeading(el, ctx); + case "blockquote": + return serializeBlockquote(el, ctx); + case "pre": + return serializeCodeBlock(el, ctx); + case "ul": + return serializeUnorderedList(el, ctx); + case "ol": + return serializeOrderedList(el, ctx); + case "table": + return serializeTable(el, ctx); + case "hr": + return ctx.indent + "***\n\n"; + case "img": + return serializeImage(el, ctx); + case "video": + return serializeVideo(el, ctx); + case "audio": + return serializeAudio(el, ctx); + case "figure": + return serializeFigure(el, ctx); + case "a": + // Block-level link (file block) + return serializeBlockLink(el, ctx); + case "details": + return serializeDetails(el, ctx); + case "div": + // Page break or generic container — serialize children + return serializeChildren(el, ctx); + case "br": + return ""; + default: + return serializeChildren(el, ctx); + } +} + +// ─── Block Serializers ─────────────────────────────────────────────────────── + +function serializeParagraph(el: HTMLElement, ctx: SerializeContext): string { + const content = serializeInlineContent(el); + // Trim leading/trailing hard breaks (matching remark behavior) + const trimmed = trimHardBreaks(content); + if (ctx.inList) { + return trimmed; + } + return ctx.indent + trimmed + "\n\n"; +} + +function serializeHeading(el: HTMLElement, ctx: SerializeContext): string { + const level = parseInt(el.tagName[1], 10); + const prefix = "#".repeat(level) + " "; + const content = serializeInlineContent(el); + return ctx.indent + prefix + content + "\n\n"; +} + +function serializeBlockquote(el: HTMLElement, ctx: SerializeContext): string { + // Check if blockquote contains block-level elements (like

) + const blockChildren = Array.from(el.children).filter((child) => { + const tag = child.tagName.toLowerCase(); + return ["p", "ul", "ol", "pre", "blockquote", "table", "hr"].includes(tag); + }); + + let content: string; + if (blockChildren.length > 0) { + // Has block-level children — serialize each + const parts: string[] = []; + for (const child of blockChildren) { + const tag = child.tagName.toLowerCase(); + if (tag === "p") { + parts.push(serializeInlineContent(child as HTMLElement)); + } else { + const innerCtx: SerializeContext = { indent: "", inList: false }; + parts.push(serializeNode(child, innerCtx).trim()); + } + } + content = parts.join("\n\n"); + } else { + // No block-level children — treat entire content as inline + content = serializeInlineContent(el); + } + + const lines = content.split("\n"); + return lines.map((line) => ctx.indent + "> " + line).join("\n") + "\n\n"; +} + +function serializeCodeBlock(el: HTMLElement, ctx: SerializeContext): string { + const codeEl = el.querySelector("code"); + if (!codeEl) {return "";} + + const language = + codeEl.getAttribute("data-language") || + extractLanguageFromClass(codeEl.className) || + ""; + + // Extract code content, handling
elements as newlines + const code = extractCodeContent(codeEl); + + // For empty code blocks, don't add a newline between the fences + if (!code) { + return ctx.indent + "```" + language + "\n```\n\n"; + } + + return ( + ctx.indent + + "```" + + language + + "\n" + + code + + (code.endsWith("\n") ? "" : "\n") + + "```\n\n" + ); +} + +function extractCodeContent(el: Element): string { + let result = ""; + for (const child of Array.from(el.childNodes)) { + if (child.nodeType === 3 /* Node.TEXT_NODE */) { + result += child.textContent || ""; + } else if (child.nodeType === 1 /* Node.ELEMENT_NODE */) { + const tag = (child as HTMLElement).tagName.toLowerCase(); + if (tag === "br") { + result += "\n"; + } else { + result += extractCodeContent(child as Element); + } + } + } + return result; +} + +function extractLanguageFromClass(className: string): string { + const match = className.match(/language-(\S+)/); + return match ? match[1] : ""; +} + +function serializeUnorderedList( + el: HTMLElement, + ctx: SerializeContext +): string { + let result = ""; + const items = Array.from(el.children).filter( + (child) => child.tagName.toLowerCase() === "li" + ); + + for (const item of items) { + result += serializeListItem(item as HTMLElement, "bullet", ctx); + } + + return result; +} + +function serializeOrderedList(el: HTMLElement, ctx: SerializeContext): string { + let result = ""; + const items = Array.from(el.children).filter( + (child) => child.tagName.toLowerCase() === "li" + ); + const startNum = parseInt(el.getAttribute("start") || "1", 10); + + for (let i = 0; i < items.length; i++) { + const num = startNum + i; + result += serializeListItem(items[i] as HTMLElement, "ordered", ctx, num); + } + + return result; +} + +function serializeListItem( + el: HTMLElement, + listType: "bullet" | "ordered", + ctx: SerializeContext, + num?: number +): string { + // Check for checkbox (task list) - direct children only + let checkbox: HTMLInputElement | null = null; + let details: HTMLElement | null = null; + + for (const child of Array.from(el.children)) { + const tag = child.tagName.toLowerCase(); + if (tag === "input" && (child as HTMLInputElement).type === "checkbox") { + checkbox = child as HTMLInputElement; + } + if (tag === "details") { + details = child as HTMLElement; + } + } + + let marker: string; + let markerWidth: number; + + if (checkbox) { + const state = checkbox.checked ? "[x]" : "[ ]"; + marker = `* ${state} `; + // For child indentation, use bullet width (2), not full checkbox marker width + markerWidth = 2; + } else if (listType === "ordered") { + marker = `${num}. `; + markerWidth = marker.length; + } else { + marker = "* "; + markerWidth = 2; + } + + // Collect the item's inline content + let inlineContent: string; + let firstContentEl: Element | null; + + if (details) { + // Toggle item: get content from summary + const summary = details.querySelector("summary"); + const summaryP = summary?.querySelector("p"); + firstContentEl = details; + inlineContent = summaryP ? serializeInlineContent(summaryP) : ""; + } else { + firstContentEl = getFirstContentElement(el, checkbox); + inlineContent = firstContentEl ? serializeInlineContent(firstContentEl) : ""; + } + + let result = ctx.indent + marker + inlineContent + "\n\n"; + + // Serialize child content (nested lists, continuation paragraphs, etc.) + const childIndent = ctx.indent + " ".repeat(markerWidth); + const childCtx: SerializeContext = { indent: childIndent, inList: true }; + + // For toggle items, also serialize children inside the details element + if (details) { + const summary = details.querySelector("summary"); + for (const child of Array.from(details.children)) { + if (child === summary) {continue;} + const childTag = child.tagName.toLowerCase(); + if (childTag === "p") { + const content = serializeInlineContent(child as HTMLElement); + result += childIndent + content + "\n\n"; + } else { + result += serializeNode(child, childCtx); + } + } + } + + const children = Array.from(el.children); + for (const child of children) { + const childTag = child.tagName.toLowerCase(); + + // Skip the first content element and checkbox + if (child === firstContentEl || (child as HTMLElement) === checkbox) {continue;} + if (childTag === "input") {continue;} + + // Nested lists and other block content + if (childTag === "ul" || childTag === "ol") { + result += serializeNode(child, childCtx); + } else if (childTag === "p") { + // Continuation paragraph within list item + const content = serializeInlineContent(child as HTMLElement); + result += childIndent + content + "\n\n"; + } else { + result += serializeNode(child, childCtx); + } + } + + return result; +} + +function getFirstContentElement( + li: HTMLElement, + checkbox: HTMLInputElement | null +): HTMLElement | null { + for (const child of Array.from(li.children)) { + if (child === checkbox) {continue;} + if (child.tagName.toLowerCase() === "input") {continue;} + const tag = child.tagName.toLowerCase(); + if (tag === "p" || tag === "span") {return child as HTMLElement;} + } + return null; +} + +// ─── Table Serializer ──────────────────────────────────────────────────────── + +function serializeTable(el: HTMLElement, ctx: SerializeContext): string { + // First, determine column count from colgroup or first row + const colgroup = el.querySelector("colgroup"); + let colCount = 0; + + if (colgroup) { + colCount = colgroup.querySelectorAll("col").length; + } + + const rows: string[][] = []; + let hasHeader = false; + + // Collect all rows, handling colspan/rowspan + const trElements = el.querySelectorAll("tr"); + // Build a grid to handle colspan/rowspan + const grid: (string | null)[][] = []; + + trElements.forEach((tr, rowIdx) => { + if (!grid[rowIdx]) {grid[rowIdx] = [];} + const cellElements = tr.querySelectorAll("th, td"); + let gridCol = 0; + + cellElements.forEach((cell) => { + // Find next empty column in this row + while (grid[rowIdx][gridCol] !== undefined) {gridCol++;} + + if (rowIdx === 0 && cell.tagName.toLowerCase() === "th") { + hasHeader = true; + } + + const content = serializeInlineContent(cell as HTMLElement).trim(); + const colspan = parseInt(cell.getAttribute("colspan") || "1", 10); + const rowspan = parseInt(cell.getAttribute("rowspan") || "1", 10); + + // Fill the grid + for (let r = 0; r < rowspan; r++) { + for (let c = 0; c < colspan; c++) { + const ri = rowIdx + r; + if (!grid[ri]) {grid[ri] = [];} + grid[ri][gridCol + c] = r === 0 && c === 0 ? content : ""; + } + } + + gridCol += colspan; + }); + + // Update colCount + if (grid[rowIdx]) { + colCount = Math.max(colCount, grid[rowIdx].length); + } + }); + + // Convert grid to rows + for (const gridRow of grid) { + const row: string[] = []; + for (let c = 0; c < colCount; c++) { + row.push(gridRow && gridRow[c] !== undefined ? (gridRow[c] ?? "") : ""); + } + rows.push(row); + } + + if (rows.length === 0) {return "";} + + // Determine column widths + const colWidths: number[] = []; + for (let c = 0; c < colCount; c++) { + let maxWidth = 3; // minimum width for separator "---" + for (const row of rows) { + const cellWidth = c < row.length ? row[c].length : 0; + maxWidth = Math.max(maxWidth, cellWidth); + } + // Use minimum of 10 to match remark output + colWidths.push(Math.max(maxWidth, 10)); + } + + let result = ""; + + if (hasHeader) { + result += ctx.indent + formatTableRow(rows[0], colWidths, colCount) + "\n"; + result += ctx.indent + formatSeparatorRow(colWidths, colCount) + "\n"; + for (let r = 1; r < rows.length; r++) { + result += + ctx.indent + formatTableRow(rows[r], colWidths, colCount) + "\n"; + } + } else { + // No header — emit empty header + separator + const emptyRow = new Array(colCount).fill(""); + result += ctx.indent + formatTableRow(emptyRow, colWidths, colCount) + "\n"; + result += ctx.indent + formatSeparatorRow(colWidths, colCount) + "\n"; + for (const row of rows) { + result += + ctx.indent + formatTableRow(row, colWidths, colCount) + "\n"; + } + } + + result += "\n"; + return result; +} + +function formatTableRow( + cells: string[], + colWidths: number[], + colCount: number +): string { + const parts: string[] = []; + for (let c = 0; c < colCount; c++) { + const cell = c < cells.length ? cells[c] : ""; + parts.push(" " + cell.padEnd(colWidths[c]) + " "); + } + return "|" + parts.join("|") + "|"; +} + +function formatSeparatorRow(colWidths: number[], colCount: number): string { + const parts: string[] = []; + for (let c = 0; c < colCount; c++) { + parts.push(" " + "-".repeat(colWidths[c]) + " "); + } + return "|" + parts.join("|") + "|"; +} + +// ─── Media Serializers ─────────────────────────────────────────────────────── + +function serializeImage(el: HTMLElement, ctx: SerializeContext): string { + const src = el.getAttribute("src") || ""; + const alt = el.getAttribute("alt") || ""; + if (!src) { + return ctx.indent + "Add image\n\n"; + } + return ctx.indent + `![${alt}](${src})\n\n`; +} + +function serializeVideo(el: HTMLElement, ctx: SerializeContext): string { + const src = + el.getAttribute("src") || el.getAttribute("data-url") || ""; + const name = el.getAttribute("data-name") || el.getAttribute("title") || ""; + if (!src) { + return ctx.indent + "Add video\n\n"; + } + return ctx.indent + `![${name}](${src})\n\n`; +} + +function serializeAudio(el: HTMLElement, ctx: SerializeContext): string { + const src = el.getAttribute("src") || ""; + if (!src) {return "";} + // Audio has no visible representation in markdown; output as link with empty text + return ctx.indent + `[](${src})\n\n`; +} + +function serializeFigure(el: HTMLElement, ctx: SerializeContext): string { + let result = ""; + + // Find the media element + const img = el.querySelector("img"); + const video = el.querySelector("video"); + const audio = el.querySelector("audio"); + const link = el.querySelector("a"); + + if (img) { + const src = img.getAttribute("src") || ""; + const alt = img.getAttribute("alt") || ""; + result += ctx.indent + `![${alt}](${src})\n\n`; + } else if (video) { + const src = + video.getAttribute("src") || video.getAttribute("data-url") || ""; + const name = + video.getAttribute("data-name") || video.getAttribute("title") || ""; + result += ctx.indent + `![${name}](${src})\n\n`; + } else if (audio) { + const src = audio.getAttribute("src") || ""; + result += ctx.indent + `[](${src})\n\n`; + } else if (link) { + result += serializeBlockLink(link as HTMLElement, ctx); + } + + // Caption + const figcaption = el.querySelector("figcaption"); + if (figcaption) { + const caption = figcaption.textContent?.trim() || ""; + if (caption) { + result += ctx.indent + caption + "\n\n"; + } + } + + return result; +} + +function serializeBlockLink(el: HTMLElement, ctx: SerializeContext): string { + const href = el.getAttribute("href") || ""; + const text = el.textContent?.trim() || ""; + if (!href) {return ctx.indent + text + "\n\n";} + return ctx.indent + `[${text}](${href})\n\n`; +} + +function serializeDetails(el: HTMLElement, ctx: SerializeContext): string { + // Toggle heading or toggle list item + const summary = el.querySelector("summary"); + if (!summary) {return serializeChildren(el, ctx);} + + // Check if summary contains a heading + const heading = summary.querySelector("h1, h2, h3, h4, h5, h6"); + if (heading) { + let result = serializeHeading(heading as HTMLElement, ctx); + // Also serialize non-summary children of details + for (const child of Array.from(el.children)) { + if (child !== summary) { + result += serializeNode(child, ctx); + } + } + return result; + } + + // Otherwise serialize the summary content + return serializeChildren(summary, ctx); +} + +// ─── Inline Content Serializer ─────────────────────────────────────────────── + +function serializeInlineContent(el: Element): string { + let result = ""; + + for (const child of Array.from(el.childNodes)) { + if (child.nodeType === 3 /* Node.TEXT_NODE */) { + result += child.textContent || ""; + } else if (child.nodeType === 1 /* Node.ELEMENT_NODE */) { + const childEl = child as HTMLElement; + const tag = childEl.tagName.toLowerCase(); + + switch (tag) { + case "strong": + case "b": { + const inner = serializeInlineContent(childEl); + const { content, trailing } = extractTrailingWhitespace(inner); + if (content) { + result += `**${content}**${trailing}`; + } else { + // All whitespace — just output it without emphasis + result += trailing; + } + break; + } + case "em": + case "i": { + const inner = serializeInlineContent(childEl); + const { content, trailing } = extractTrailingWhitespace(inner); + if (content) { + result += `*${content}*${trailing}`; + } else { + result += trailing; + } + break; + } + case "s": + case "del": + result += `~~${serializeInlineContent(childEl)}~~`; + break; + case "code": + result += "`" + (childEl.textContent || "") + "`"; + break; + case "u": + // No markdown equivalent — strip the tag, keep content + result += serializeInlineContent(childEl); + break; + case "a": { + const href = childEl.getAttribute("href") || ""; + const text = serializeInlineContent(childEl); + result += `[${text}](${href})`; + break; + } + case "br": + result += "\\\n"; + break; + case "span": + // Color spans, etc. — strip the tag, keep content + result += serializeInlineContent(childEl); + break; + case "img": { + const src = childEl.getAttribute("src") || ""; + const alt = childEl.getAttribute("alt") || ""; + result += `![${alt}](${src})`; + break; + } + case "video": { + const src = + childEl.getAttribute("src") || + childEl.getAttribute("data-url") || + ""; + const name = + childEl.getAttribute("data-name") || + childEl.getAttribute("title") || + ""; + result += `![${name}](${src})`; + break; + } + case "p": + // Paragraph inside inline context (e.g., table cell) + result += serializeInlineContent(childEl); + break; + case "input": + // Checkbox in task list — handled at block level + break; + default: + result += serializeInlineContent(childEl); + break; + } + } + } + + return result; +} + +/** + * Extract trailing whitespace from emphasis content. + * Moves trailing spaces outside the emphasis delimiters to produce valid markdown. + * E.g., `Bold ` → `**Bold** ` instead of `**Bold **`. + */ +function extractTrailingWhitespace(text: string): { + content: string; + trailing: string; +} { + const match = text.match(/^(.*?)(\s*)$/); + if (match) { + return { content: match[1], trailing: match[2] }; + } + return { content: text, trailing: "" }; +} + +/** + * Escape leading character after emphasis if it could break parsing. + * For example, "Heading" after "**Bold **" — the 'H' should be escaped + * if the trailing space was escaped. + */ + +/** + * Trim leading/trailing hard breaks from inline content. + * Matches remark behavior where
at start/end of paragraph is dropped. + */ +function trimHardBreaks(content: string): string { + // Remove leading hard breaks + let result = content.replace(/^(\\\n)+/, ""); + // Remove trailing hard breaks (including trailing backslash) + result = result.replace(/(\\\n)+$/, ""); + result = result.replace(/\\$/, ""); + return result; +} diff --git a/packages/core/src/api/exporters/markdown/markdownExporter.ts b/packages/core/src/api/exporters/markdown/markdownExporter.ts index 23aad8db7c..2f73616dc0 100644 --- a/packages/core/src/api/exporters/markdown/markdownExporter.ts +++ b/packages/core/src/api/exporters/markdown/markdownExporter.ts @@ -1,9 +1,4 @@ import { Schema } from "prosemirror-model"; -import rehypeParse from "rehype-parse"; -import rehypeRemark from "rehype-remark"; -import remarkGfm from "remark-gfm"; -import remarkStringify from "remark-stringify"; -import { unified } from "unified"; import { PartialBlock } from "../../../blocks/defaultBlocks.js"; import type { BlockNoteEditor } from "../../../editor/BlockNoteEditor.js"; @@ -13,25 +8,11 @@ import { StyleSchema, } from "../../../schema/index.js"; import { createExternalHTMLExporter } from "../html/externalHTMLExporter.js"; -import { removeUnderlines } from "./util/removeUnderlinesRehypePlugin.js"; -import { addSpacesToCheckboxes } from "./util/addSpacesToCheckboxesRehypePlugin.js"; -import { convertVideoToMarkdown } from "./util/convertVideoToMarkdownRehypePlugin.js"; +import { htmlToMarkdown } from "./htmlToMarkdown.js"; // Needs to be sync because it's used in drag handler event (SideMenuPlugin) export function cleanHTMLToMarkdown(cleanHTMLString: string) { - const markdownString = unified() - .use(rehypeParse, { fragment: true }) - .use(convertVideoToMarkdown) - .use(removeUnderlines) - .use(addSpacesToCheckboxes) - .use(rehypeRemark) - .use(remarkGfm) - .use(remarkStringify, { - handlers: { text: (node) => node.value }, - }) - .processSync(cleanHTMLString); - - return markdownString.value as string; + return htmlToMarkdown(cleanHTMLString); } export function blocksToMarkdown< diff --git a/packages/core/src/api/exporters/markdown/util/addSpacesToCheckboxesRehypePlugin.ts b/packages/core/src/api/exporters/markdown/util/addSpacesToCheckboxesRehypePlugin.ts deleted file mode 100644 index 7c03eb9a64..0000000000 --- a/packages/core/src/api/exporters/markdown/util/addSpacesToCheckboxesRehypePlugin.ts +++ /dev/null @@ -1,42 +0,0 @@ -import { Element as HASTElement, Parent as HASTParent } from "hast"; -import { fromDom } from "hast-util-from-dom"; - -/** - * Rehype plugin which adds a space after each checkbox input element. This is - * because remark doesn't add any spaces between the checkbox input and the text - * itself, but these are needed for correct Markdown syntax. - */ -export function addSpacesToCheckboxes() { - const helper = (tree: HASTParent) => { - if (tree.children && "length" in tree.children && tree.children.length) { - for (let i = tree.children.length - 1; i >= 0; i--) { - const child = tree.children[i]; - const nextChild = - i + 1 < tree.children.length ? tree.children[i + 1] : undefined; - - // Checks for paragraph element after checkbox input element. - if ( - child.type === "element" && - child.tagName === "input" && - child.properties?.type === "checkbox" && - nextChild?.type === "element" && - nextChild.tagName === "p" - ) { - // Converts paragraph to span, otherwise remark will think it needs to - // be on a new line. - nextChild.tagName = "span"; - // Adds a space after the checkbox input element. - nextChild.children.splice( - 0, - 0, - fromDom(document.createTextNode(" ")) as HASTElement, - ); - } else { - helper(child as HASTParent); - } - } - } - }; - - return helper; -} diff --git a/packages/core/src/api/exporters/markdown/util/convertVideoToMarkdownRehypePlugin.ts b/packages/core/src/api/exporters/markdown/util/convertVideoToMarkdownRehypePlugin.ts deleted file mode 100644 index a7de2e3442..0000000000 --- a/packages/core/src/api/exporters/markdown/util/convertVideoToMarkdownRehypePlugin.ts +++ /dev/null @@ -1,19 +0,0 @@ -import { Parent as HASTParent } from "hast"; -import { visit } from "unist-util-visit"; - -// Originally, rehypeParse parses videos as links, which is incorrect. -export function convertVideoToMarkdown() { - return (tree: HASTParent) => { - visit(tree, "element", (node, index, parent) => { - if (parent && node.tagName === "video") { - const src = node.properties?.src || node.properties?.["data-url"] || ""; - const name = - node.properties?.title || node.properties?.["data-name"] || ""; - parent.children[index!] = { - type: "text", - value: `![${name}](${src})`, - }; - } - }); - }; -} diff --git a/packages/core/src/api/exporters/markdown/util/removeUnderlinesRehypePlugin.ts b/packages/core/src/api/exporters/markdown/util/removeUnderlinesRehypePlugin.ts deleted file mode 100644 index 5b455d1b53..0000000000 --- a/packages/core/src/api/exporters/markdown/util/removeUnderlinesRehypePlugin.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { Element as HASTElement, Parent as HASTParent } from "hast"; - -/** - * Rehype plugin which removes tags. Used to remove underlines before converting HTML to markdown, as Markdown - * doesn't support underlines. - */ -export function removeUnderlines() { - const removeUnderlinesHelper = (tree: HASTParent) => { - let numChildElements = tree.children.length; - - for (let i = 0; i < numChildElements; i++) { - const node = tree.children[i]; - - if (node.type === "element") { - // Recursively removes underlines from child elements. - removeUnderlinesHelper(node); - - if ((node as HASTElement).tagName === "u") { - // Lifts child nodes outside underline element, deletes the underline element, and updates current index & - // the number of child elements. - if (node.children.length > 0) { - tree.children.splice(i, 1, ...node.children); - - const numElementsAdded = node.children.length - 1; - numChildElements += numElementsAdded; - i += numElementsAdded; - } else { - tree.children.splice(i, 1); - - numChildElements--; - i--; - } - } - } - } - }; - - return removeUnderlinesHelper; -} diff --git a/packages/core/src/api/parsers/html/util/__snapshots__/nestedLists.test.ts.snap b/packages/core/src/api/parsers/html/util/__snapshots__/nestedLists.test.ts.snap index 68c0a1c817..1db488255b 100644 --- a/packages/core/src/api/parsers/html/util/__snapshots__/nestedLists.test.ts.snap +++ b/packages/core/src/api/parsers/html/util/__snapshots__/nestedLists.test.ts.snap @@ -1,129 +1,144 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html exports[`Lift nested lists > Lifts multiple bullet lists 1`] = ` -" -

-" +"" `; exports[`Lift nested lists > Lifts multiple bullet lists with content in between 1`] = ` -" - -" +"" `; exports[`Lift nested lists > Lifts nested bullet lists 1`] = ` -" - -" +"" `; exports[`Lift nested lists > Lifts nested bullet lists with content after nested list 1`] = ` -" - -" +"" `; exports[`Lift nested lists > Lifts nested bullet lists without li 1`] = ` -" - -" +"" `; exports[`Lift nested lists > Lifts nested mixed lists 1`] = ` -" -
    -
    -
  1. Numbered List Item 1
  2. -
    -
      -
    • Bullet List Item 1
    • -
    • Bullet List Item 2
    • -
    -
    -
    -
  3. Numbered List Item 2
  4. -
-" +"
    +
  1. + Numbered List Item 1 +
    • +
    • + Bullet List Item 1 +
    • +
    • + Bullet List Item 2 +
    • +
    +
  2. + Numbered List Item 2 +
  3. +
" `; exports[`Lift nested lists > Lifts nested numbered lists 1`] = ` -" -
    -
    -
  1. Numbered List Item 1
  2. -
    -
      -
    1. Nested Numbered List Item 1
    2. -
    3. Nested Numbered List Item 2
    4. -
    -
    -
    -
  3. Numbered List Item 2
  4. -
-" +"
    +
  1. + Numbered List Item 1 +
    1. +
    2. + Nested Numbered List Item 1 +
    3. +
    4. + Nested Numbered List Item 2 +
    5. +
    +
  2. + Numbered List Item 2 +
  3. +
" `; diff --git a/packages/core/src/api/parsers/html/util/nestedLists.test.ts b/packages/core/src/api/parsers/html/util/nestedLists.test.ts index 03fadebefe..e695efa9c4 100644 --- a/packages/core/src/api/parsers/html/util/nestedLists.test.ts +++ b/packages/core/src/api/parsers/html/util/nestedLists.test.ts @@ -1,20 +1,9 @@ import { describe, expect, it } from "vitest"; import { nestedListsToBlockNoteStructure } from "./nestedLists.js"; -import { unified } from "unified"; -import rehypeParse from "rehype-parse"; -import rehypeFormat from "rehype-format"; -import rehypeStringify from "rehype-stringify"; async function testHTML(html: string) { const htmlNode = nestedListsToBlockNoteStructure(html); - - const pretty = await unified() - .use(rehypeParse, { fragment: true }) - .use(rehypeFormat) - .use(rehypeStringify) - .process(htmlNode.innerHTML); - - expect(pretty.value).toMatchSnapshot(); + expect(htmlNode.innerHTML).toMatchSnapshot(); } describe("Lift nested lists", () => { diff --git a/packages/core/src/api/parsers/markdown/markdownToHtml.ts b/packages/core/src/api/parsers/markdown/markdownToHtml.ts new file mode 100644 index 0000000000..54c346d764 --- /dev/null +++ b/packages/core/src/api/parsers/markdown/markdownToHtml.ts @@ -0,0 +1,969 @@ +import { isVideoUrl } from "../../../util/string.js"; + +/** + * Custom markdown-to-HTML converter for BlockNote. + * Replaces the unified/remark/rehype pipeline with a direct, minimal implementation + * that handles exactly the markdown features BlockNote needs. + */ + +// ─── HTML Escaping ─────────────────────────────────────────────────────────── + +function escapeHtml(str: string): string { + return str + .replace(/&/g, "&") + .replace(//g, ">") + .replace(/"/g, """); +} + +// ─── Helpers ───────────────────────────────────────────────────────────────── + +function isAlphanumeric(char: string | undefined): boolean { + if (!char) { + return false; + } + return /\w/.test(char); +} + +/** + * Returns true when an underscore delimiter at position `i` is "intraword", + * meaning the characters on both sides are alphanumeric (e.g. `snake_case`). + * In that case the underscore should NOT be treated as emphasis per CommonMark. + */ +function isIntraword(text: string, i: number, delimLen: number): boolean { + const before = i > 0 ? text[i - 1] : undefined; + const after = + i + delimLen < text.length ? text[i + delimLen] : undefined; + return isAlphanumeric(before) && isAlphanumeric(after); +} + +// ─── Inline Parser ─────────────────────────────────────────────────────────── + +/** + * Parse inline markdown syntax and return HTML. + * Handles: bold, italic, bold+italic, strikethrough, inline code, + * links, images (with video detection), hard line breaks, backslash escapes. + */ +function parseInline(text: string): string { + let result = ""; + let i = 0; + + while (i < text.length) { + // Backslash escape + if (text[i] === "\\" && i + 1 < text.length) { + const next = text[i + 1]; + // Hard line break: backslash at end of line + if (next === "\n") { + result += "
\n"; + i += 2; + continue; + } + // Escapable characters + if ("\\`*_{}[]()#+-.!~|>".includes(next)) { + result += escapeHtml(next); + i += 2; + continue; + } + } + + // Inline code (highest priority for inline) + if (text[i] === "`") { + const codeResult = parseInlineCode(text, i); + if (codeResult) { + result += codeResult.html; + i = codeResult.end; + continue; + } + } + + // Images ![alt](url) + if (text[i] === "!" && text[i + 1] === "[") { + const imgResult = parseImage(text, i); + if (imgResult) { + result += imgResult.html; + i = imgResult.end; + continue; + } + } + + // Links [text](url) + if (text[i] === "[") { + const linkResult = parseLink(text, i); + if (linkResult) { + result += linkResult.html; + i = linkResult.end; + continue; + } + } + + // Strikethrough ~~text~~ + if (text[i] === "~" && text[i + 1] === "~") { + const strikeResult = parseDelimited(text, i, "~~", "", ""); + if (strikeResult) { + result += strikeResult.html; + i = strikeResult.end; + continue; + } + } + + // Bold+Italic ***text*** or ___text___ + if ( + (text[i] === "*" && text[i + 1] === "*" && text[i + 2] === "*") || + (text[i] === "_" && + text[i + 1] === "_" && + text[i + 2] === "_" && + !isIntraword(text, i, 3)) + ) { + const delimiter = text.substring(i, i + 3); + const tripleResult = parseDelimited( + text, + i, + delimiter, + "", + "" + ); + if (tripleResult) { + result += tripleResult.html; + i = tripleResult.end; + continue; + } + } + + // Bold **text** or __text__ + if ( + (text[i] === "*" && text[i + 1] === "*") || + (text[i] === "_" && text[i + 1] === "_" && !isIntraword(text, i, 2)) + ) { + const delimiter = text.substring(i, i + 2); + const boldResult = parseDelimited( + text, + i, + delimiter, + "", + "" + ); + if (boldResult) { + result += boldResult.html; + i = boldResult.end; + continue; + } + } + + // Italic *text* or _text_ + if (text[i] === "*" || (text[i] === "_" && !isIntraword(text, i, 1))) { + const delimiter = text[i]; + const italicResult = parseDelimited( + text, + i, + delimiter, + "", + "" + ); + if (italicResult) { + result += italicResult.html; + i = italicResult.end; + continue; + } + } + + // Newline within paragraph (soft break) + if (text[i] === "\n") { + result += "\n"; + i++; + continue; + } + + // Regular character + result += escapeHtml(text[i]); + i++; + } + + return result; +} + +function parseInlineCode( + text: string, + start: number +): { html: string; end: number } | null { + // Count opening backticks + let openCount = 0; + let i = start; + while (i < text.length && text[i] === "`") { + openCount++; + i++; + } + + // Find matching closing backticks + let j = i; + while (j < text.length) { + if (text[j] === "`") { + let closeCount = 0; + const closeStart = j; + while (j < text.length && text[j] === "`") { + closeCount++; + j++; + } + if (closeCount === openCount) { + let code = text.substring(i, closeStart); + // Strip one leading and one trailing space if both exist + if ( + code.length >= 2 && + code[0] === " " && + code[code.length - 1] === " " + ) { + code = code.substring(1, code.length - 1); + } + return { + html: `${escapeHtml(code)}`, + end: j, + }; + } + } else { + j++; + } + } + return null; +} + +function parseImage( + text: string, + start: number +): { html: string; end: number } | null { + // ![alt](url) or ![alt](url "title") + const altStart = start + 2; // after ![ + const altEnd = text.indexOf("]", altStart); + if (altEnd === -1) {return null;} + + if (text[altEnd + 1] !== "(") {return null;} + + const urlStart = altEnd + 2; + const parenEnd = findClosingParen(text, urlStart - 1); + if (parenEnd === -1) {return null;} + + const alt = text.substring(altStart, altEnd); + let urlContent = text.substring(urlStart, parenEnd).trim(); + let title: string | undefined; + + // Check for title in quotes + const titleMatch = urlContent.match(/^(\S+)\s+"([^"]*)"$/); + if (titleMatch) { + urlContent = titleMatch[1]; + title = titleMatch[2]; + } + + const url = urlContent; + + if (isVideoUrl(url)) { + // Match remark-rehype behavior: data-name comes from the title, not alt + return { + html: ``, + end: parenEnd + 1, + }; + } + + return { + html: `${escapeHtml(alt)}`, + end: parenEnd + 1, + }; +} + +function parseLink( + text: string, + start: number +): { html: string; end: number } | null { + // [text](url) + const textStart = start + 1; + const textEnd = findClosingBracket(text, start); + if (textEnd === -1) {return null;} + + if (text[textEnd + 1] !== "(") {return null;} + + const urlStart = textEnd + 2; + const parenEnd = findClosingParen(text, textEnd + 1); + if (parenEnd === -1) {return null;} + + const linkText = text.substring(textStart, textEnd); + const url = text.substring(urlStart, parenEnd).trim(); + + return { + html: `${parseInline(linkText)}`, + end: parenEnd + 1, + }; +} + +function findClosingBracket(text: string, openPos: number): number { + let depth = 0; + for (let i = openPos; i < text.length; i++) { + if (text[i] === "\\" && i + 1 < text.length) { + i++; // skip escaped + continue; + } + if (text[i] === "[") {depth++;} + if (text[i] === "]") { + depth--; + if (depth === 0) {return i;} + } + } + return -1; +} + +function findClosingParen(text: string, openPos: number): number { + let depth = 0; + for (let i = openPos; i < text.length; i++) { + if (text[i] === "\\" && i + 1 < text.length) { + i++; + continue; + } + if (text[i] === "(") {depth++;} + if (text[i] === ")") { + depth--; + if (depth === 0) {return i;} + } + } + return -1; +} + +function parseDelimited( + text: string, + start: number, + delimiter: string, + openTag: string, + closeTag: string +): { html: string; end: number } | null { + const len = delimiter.length; + const afterOpen = start + len; + + if (afterOpen >= text.length) {return null;} + + // Opening delimiter must not be followed by whitespace + if (text[afterOpen] === " " || text[afterOpen] === "\t") {return null;} + + // Find closing delimiter + let j = afterOpen; + while (j < text.length) { + // Skip escaped characters + if (text[j] === "\\" && j + 1 < text.length) { + j += 2; + continue; + } + + if (text.substring(j, j + len) === delimiter) { + // Closing delimiter must not be preceded by whitespace + if (text[j - 1] === " " || text[j - 1] === "\t") { + j++; + continue; + } + + // For single-char delimiters, don't accept closer if it's part of a + // multi-char run (e.g., don't treat second * in ** as italic closer) + if ( + len === 1 && + j > 0 && + text[j - 1] === delimiter[0] + ) { + j++; + continue; + } + + const inner = text.substring(afterOpen, j); + if (inner.length === 0) { + j++; + continue; + } + + return { + html: openTag + parseInline(inner) + closeTag, + end: j + len, + }; + } + j++; + } + + return null; +} + +// ─── Block-Level Types ─────────────────────────────────────────────────────── + +interface BlockToken { + type: string; +} + +interface HeadingToken extends BlockToken { + type: "heading"; + level: number; + content: string; +} + +interface ParagraphToken extends BlockToken { + type: "paragraph"; + content: string; +} + +interface CodeBlockToken extends BlockToken { + type: "codeBlock"; + language: string; + code: string; +} + +interface BlockquoteToken extends BlockToken { + type: "blockquote"; + content: string; +} + +interface HorizontalRuleToken extends BlockToken { + type: "hr"; +} + +interface ListItemToken extends BlockToken { + type: "listItem"; + listType: "bullet" | "ordered" | "task"; + indent: number; + content: string; + start?: number; // for ordered lists + checked?: boolean; // for task lists + childContent?: string; // recursively parsed content within this item +} + +interface TableToken extends BlockToken { + type: "table"; + headers: string[]; + rows: string[][]; + alignments: ("left" | "center" | "right" | null)[]; +} + +type Token = + | HeadingToken + | ParagraphToken + | CodeBlockToken + | BlockquoteToken + | HorizontalRuleToken + | ListItemToken + | TableToken; + +// ─── Block-Level Tokenizer ────────────────────────────────────────────────── + +function tokenize(markdown: string): Token[] { + const lines = markdown.split("\n"); + const tokens: Token[] = []; + let i = 0; + let prevLineWasBlank = true; // treat start of document as after blank + + while (i < lines.length) { + const line = lines[i]; + + // Blank line — skip + if (line.trim() === "") { + prevLineWasBlank = true; + i++; + continue; + } + + // Fenced code block + const fenceMatch = line.match(/^(`{3,}|~{3,})(.*)$/); + if (fenceMatch) { + const fence = fenceMatch[1]; + const fenceChar = fence[0]; + const fenceLen = fence.length; + const language = fenceMatch[2].trim(); + const codeLines: string[] = []; + i++; + while (i < lines.length) { + const closingMatch = lines[i].match( + new RegExp(`^${fenceChar}{${fenceLen},}\\s*$`) + ); + if (closingMatch) { + i++; + break; + } + codeLines.push(lines[i]); + i++; + } + tokens.push({ + type: "codeBlock", + language: language || "", + code: codeLines.join("\n"), + }); + prevLineWasBlank = false; + continue; + } + + // ATX Heading + const headingMatch = line.match(/^(#{1,6})\s+(.+?)(?:\s+#+)?$/); + if (headingMatch) { + tokens.push({ + type: "heading", + level: headingMatch[1].length, + content: headingMatch[2], + }); + prevLineWasBlank = false; + i++; + continue; + } + + // Horizontal rule: ---, ***, ___ (3+ chars, optionally with spaces) + if (/^(\s{0,3})([-*_])\s*(\2\s*){2,}$/.test(line)) { + // Setext H2: --- immediately after a paragraph (no blank line between) + const prevToken = tokens[tokens.length - 1]; + if ( + !prevLineWasBlank && + line.trim().match(/^-+$/) && + prevToken && + prevToken.type === "paragraph" + ) { + const para = prevToken as ParagraphToken; + tokens[tokens.length - 1] = { + type: "heading", + level: 2, + content: para.content, + }; + prevLineWasBlank = false; + i++; + continue; + } + tokens.push({ type: "hr" }); + prevLineWasBlank = false; + i++; + continue; + } + + // Setext heading detection: check if next line is === or --- + if (i + 1 < lines.length) { + const nextLine = lines[i + 1]; + if (/^={1,}\s*$/.test(nextLine) && line.trim().length > 0) { + tokens.push({ + type: "heading", + level: 1, + content: line.trim(), + }); + prevLineWasBlank = false; + i += 2; + continue; + } + // Setext H2 --- handled in HR section above + } + + // Table: detect by looking for separator row + const tableResult = tryParseTable(lines, i); + if (tableResult) { + tokens.push(tableResult.token); + i = tableResult.nextLine; + prevLineWasBlank = false; + continue; + } + + // Blockquote + if (/^\s{0,3}>/.test(line)) { + const quoteLines: string[] = []; + while (i < lines.length && /^\s{0,3}>/.test(lines[i])) { + // Remove the > prefix + quoteLines.push(lines[i].replace(/^\s{0,3}>\s?/, "")); + i++; + } + tokens.push({ + type: "blockquote", + content: quoteLines.join("\n"), + }); + prevLineWasBlank = false; + continue; + } + + // List item (bullet, ordered, or task) + const listItemMatch = line.match( + /^(\s*)([-*+]|\d+[.)])(\s+)(\[[ xX]\] )?(.*)$/ + ); + if (listItemMatch) { + const indent = listItemMatch[1].length; + const marker = listItemMatch[2]; + const markerSpaces = listItemMatch[3]; + const checkbox = listItemMatch[4]; + const firstLineContent = listItemMatch[5]; + + let listType: "bullet" | "ordered" | "task"; + let start: number | undefined; + let checked: boolean | undefined; + + if (checkbox) { + listType = "task"; + checked = checkbox.trim() !== "[ ]"; + } else if (/^\d+[.)]$/.test(marker)) { + listType = "ordered"; + start = parseInt(marker, 10); + } else { + listType = "bullet"; + } + + // Content indent = column where content actually starts + const contentIndent = + indent + + marker.length + + markerSpaces.length + + (checkbox ? checkbox.length : 0); + + // Minimum indent for child content: anything indented past the marker + // (sub-lists can start at indent > marker position) + const minChildIndent = indent + 1; + + // Helper to check if a line belongs to this list item + const belongsToItem = (lineStr: string): boolean => { + if (lineStr.trim() === "") {return true;} // blank lines checked separately + const lineInd = lineStr.match(/^\s*/)![0].length; + // Lines at contentIndent are continuation text + if (lineInd >= contentIndent) {return true;} + // Lines between marker and content column that start a sub-list + if ( + lineInd >= minChildIndent && + lineStr.match(/^\s*([-*+]|\d+[.)])\s+/) + ) { + return true; + } + return false; + } + + // Consume ALL subsequent lines that belong to this list item + i++; + const subLines: string[] = []; + while (i < lines.length) { + const cur = lines[i]; + + if (cur.trim() === "") { + // Blank line: include if followed by content that belongs to this item + let lookAhead = i + 1; + while (lookAhead < lines.length && lines[lookAhead].trim() === "") { + lookAhead++; + } + if (lookAhead < lines.length && belongsToItem(lines[lookAhead])) { + subLines.push(""); + i++; + continue; + } + break; + } + + if (!belongsToItem(cur)) {break;} + + // Strip indent: for lines at contentIndent+, strip contentIndent chars; + // for sub-list lines between minChildIndent and contentIndent, strip minChildIndent + const lineIndent = cur.match(/^\s*/)![0].length; + if (lineIndent >= contentIndent) { + subLines.push(cur.substring(contentIndent)); + } else { + // Sub-list item between minChildIndent and contentIndent + subLines.push(cur.substring(minChildIndent)); + } + i++; + } + + // Build the list item token + // If there are sub-lines, they become child content (recursively tokenized) + // Don't trim — preserve relative indentation of sub-lines + const childContent = subLines.join("\n").replace(/^\n+|\n+$/g, ""); + tokens.push({ + type: "listItem", + listType, + indent, + content: firstLineContent.trim(), + start, + checked, + childContent: childContent || undefined, + }); + prevLineWasBlank = false; + continue; + } + + // Paragraph (default) + const paraLines: string[] = [line]; + i++; + while (i < lines.length) { + const nextLine = lines[i]; + // Stop paragraph on blank line + if (nextLine.trim() === "") {break;} + // Stop on block-level element + if (/^(#{1,6})\s/.test(nextLine)) {break;} + if (/^(`{3,}|~{3,})/.test(nextLine)) {break;} + if (/^\s{0,3}>/.test(nextLine)) {break;} + if (/^(\s{0,3})([-*_])\s*(\2\s*){2,}$/.test(nextLine)) {break;} + if (/^\s*([-*+]|\d+[.)])\s+/.test(nextLine)) {break;} + if (/^\s*\|(.+\|)+\s*$/.test(nextLine)) {break;} + // Check if next-next line is setext marker + if ( + i + 1 < lines.length && + /^[=-]+\s*$/.test(lines[i + 1]) && + nextLine.trim().length > 0 + ) { + break; + } + paraLines.push(nextLine); + i++; + } + tokens.push({ + type: "paragraph", + content: paraLines.join("\n"), + }); + prevLineWasBlank = false; + } + + return tokens; +} + +function tryParseTable( + lines: string[], + start: number +): { token: TableToken; nextLine: number } | null { + // A table needs at least a header row and a separator row + if (start + 1 >= lines.length) {return null;} + + const headerLine = lines[start]; + const separatorLine = lines[start + 1]; + + // Check separator line format: | --- | --- | or | :--- | ---: | + if (!/^\s*\|(\s*:?-+:?\s*\|)+\s*$/.test(separatorLine)) {return null;} + + // Check header line format: | ... | ... | + if (!/^\s*\|(.+\|)+\s*$/.test(headerLine)) {return null;} + + const headers = parsePipeCells(headerLine); + const alignments = parseAlignments(separatorLine); + + const rows: string[][] = []; + let i = start + 2; + while (i < lines.length) { + const line = lines[i]; + if (!/^\s*\|(.+\|)+\s*$/.test(line)) {break;} + rows.push(parsePipeCells(line)); + i++; + } + + return { + token: { + type: "table", + headers, + rows, + alignments, + }, + nextLine: i, + }; +} + +function parsePipeCells(line: string): string[] { + // Trim leading/trailing pipes and split + const trimmed = line.trim(); + const withoutOuterPipes = trimmed.startsWith("|") + ? trimmed.substring(1) + : trimmed; + const content = withoutOuterPipes.endsWith("|") + ? withoutOuterPipes.substring(0, withoutOuterPipes.length - 1) + : withoutOuterPipes; + + // Split by pipes, handling escaped pipes + const cells: string[] = []; + let current = ""; + for (let i = 0; i < content.length; i++) { + if (content[i] === "\\" && i + 1 < content.length && content[i + 1] === "|") { + current += "|"; + i++; + } else if (content[i] === "|") { + cells.push(current.trim()); + current = ""; + } else { + current += content[i]; + } + } + cells.push(current.trim()); + + return cells; +} + +function parseAlignments( + separatorLine: string +): ("left" | "center" | "right" | null)[] { + const cells = parsePipeCells(separatorLine); + return cells.map((cell) => { + const trimmed = cell.trim(); + const left = trimmed.startsWith(":"); + const right = trimmed.endsWith(":"); + if (left && right) {return "center";} + if (right) {return "right";} + if (left) {return "left";} + return null; + }); +} + +// ─── HTML Emitter ──────────────────────────────────────────────────────────── + +function tokensToHtml(tokens: Token[]): string { + let html = ""; + let i = 0; + + while (i < tokens.length) { + const token = tokens[i]; + + switch (token.type) { + case "heading": { + const t = token as HeadingToken; + html += `${parseInline(t.content)}`; + i++; + break; + } + + case "paragraph": { + const t = token as ParagraphToken; + html += `

${parseInline(t.content)}

`; + i++; + break; + } + + case "codeBlock": { + const t = token as CodeBlockToken; + const langAttr = t.language + ? ` data-language="${escapeHtml(t.language)}"` + : ""; + html += `
${escapeHtml(t.code)}
`; + i++; + break; + } + + case "blockquote": { + const t = token as BlockquoteToken; + // Recursively parse blockquote content as markdown + const innerTokens = tokenize(t.content); + const innerHtml = tokensToHtml(innerTokens); + html += `
${innerHtml}
`; + i++; + break; + } + + case "hr": + html += `
`; + i++; + break; + + case "listItem": { + // Collect consecutive list items and build nested list structure + const listHtml = emitListItems(tokens, i); + html += listHtml.html; + i = listHtml.nextIndex; + break; + } + + case "table": { + const t = token as TableToken; + html += emitTable(t); + i++; + break; + } + + default: + i++; + } + } + + return html; +} + +function emitListItems( + tokens: Token[], + startIdx: number +): { html: string; nextIndex: number } { + let html = ""; + let i = startIdx; + let currentListType: "bullet" | "ordered" | null = null; + + while (i < tokens.length && tokens[i].type === "listItem") { + const item = tokens[i] as ListItemToken; + const effectiveType = getEffectiveListType(item.listType); + + // Check if we need to switch list type + if (currentListType !== null && currentListType !== effectiveType) { + // Close current list, open new one + html += ``; + currentListType = null; + } + + // Open list if needed + if (currentListType === null) { + if (effectiveType === "ordered") { + const startAttr = + item.start !== undefined && item.start !== 1 + ? ` start="${item.start}"` + : ""; + html += ``; + } else { + html += `