diff --git a/lib/__tests__/index.test.ts b/lib/__tests__/index.test.ts index b6291fa..e6656d9 100644 --- a/lib/__tests__/index.test.ts +++ b/lib/__tests__/index.test.ts @@ -3,11 +3,28 @@ import { toDocx } from "@m2d/core"; // Adjust path based on your setup import remarkMath from "remark-math"; import remarkParse from "remark-parse"; import { unified } from "unified"; -import { describe, it } from "vitest"; +import { describe, it, vi } from "vitest"; import { mathPlugin } from "../src"; const markdown = fs.readFileSync("../sample.md", "utf-8"); +const emptyOMathCount = async (md: string) => { + const mdast = unified().use(remarkParse).use(remarkMath).parse(md); + const buffer = (await toDocx( + mdast, + {}, + { plugins: [mathPlugin()] }, + "nodebuffer", + )) as Buffer; + const { execSync } = await import("node:child_process"); + const path = `/tmp/m2d-math-test-${Math.random()}.docx`; + fs.writeFileSync(path, buffer); + const xml = execSync(`unzip -p ${path} word/document.xml`, { + encoding: "utf8", + }); + return (xml.match(//g) ?? []).length; +}; + describe("toDocx", () => { it("should handle maths", async ({ expect }) => { const mdast = unified().use(remarkParse).use(remarkMath).parse(markdown); @@ -16,4 +33,15 @@ describe("toDocx", () => { expect(docxBlob).toBeInstanceOf(Blob); }); + + it("should not emit empty oMath for unrenderable inline math", async ({ + expect, + }) => { + const error = vi.spyOn(console, "error").mockImplementation(() => {}); + + expect(await emptyOMathCount("$x$ cm$^{2}$")).toBe(0); + expect(error).toHaveBeenCalled(); + + error.mockRestore(); + }); }); diff --git a/lib/package.json b/lib/package.json index 39de213..cd0b1c6 100644 --- a/lib/package.json +++ b/lib/package.json @@ -27,7 +27,8 @@ "clean": "rm -rf dist", "dev": "tsup --watch && tsc -p tsconfig-build.json -w", "typecheck": "tsc --noEmit", - "test": "vitest run --coverage" + "test": "vitest run --coverage", + "generate:katex": "node --experimental-strip-types scripts/generate-katex-data.ts" }, "devDependencies": { "@repo/typescript-config": "workspace:*", diff --git a/lib/scripts/benchmark-bundle-formats.ts b/lib/scripts/benchmark-bundle-formats.ts new file mode 100644 index 0000000..eb900a0 --- /dev/null +++ b/lib/scripts/benchmark-bundle-formats.ts @@ -0,0 +1,289 @@ +/** + * Benchmark KaTeX symbol table serialization formats. + * Run from lib/: pnpm exec node --experimental-strip-types scripts/benchmark-bundle-formats.ts + */ +import { execSync } from "node:child_process"; +import { readFileSync, rmSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { gzipSync } from "node:zlib"; +import { + KATEX_ACCENTS, + KATEX_ALIASES, + KATEX_FUNCTIONS, + KATEX_SYMBOL_OVERRIDES, +} from "../src/katexMeta.ts"; +import { KATEX_SYMBOLS as BASE_SYMBOLS } from "../src/katexSymbols.ts"; + +const ROOT = join(dirname(fileURLToPath(import.meta.url)), ".."); +const SRC = join(ROOT, "src"); +const INDEX = join(SRC, "index.ts"); + +type Format = { + name: string; + note: string; + write: () => void; + patchIndex: (src: string) => string; +}; + +const mergedLookup: Record = { + ...KATEX_ALIASES, + ...BASE_SYMBOLS, + ...KATEX_SYMBOL_OVERRIDES, +}; + +const sortedEntries = Object.entries(mergedLookup).sort(([a], [b]) => + a.localeCompare(b), +); +const accentsJson = JSON.stringify(KATEX_ACCENTS); +const functionsJson = JSON.stringify([...KATEX_FUNCTIONS].sort()); + +const metaTail = [ + `export const KATEX_ACCENTS = ${accentsJson} as Record;`, + `export const KATEX_FUNCTIONS = new Set(${functionsJson});`, + "", +].join("\n"); + +const objectLiteralBody = sortedEntries + .map(([k, v]) => ` ${JSON.stringify(k)}: ${JSON.stringify(v)},`) + .join("\n"); + +const tupleBody = sortedEntries + .map(([k, v]) => ` [${JSON.stringify(k)}, ${JSON.stringify(v)}],`) + .join("\n"); + +const parallelKeys = sortedEntries.map(([k]) => JSON.stringify(k)).join(","); +const parallelValues = JSON.stringify(sortedEntries.map(([, v]) => v)); +const gzipB64 = gzipSync( + Buffer.from(JSON.stringify(mergedLookup), "utf8"), +).toString("base64"); + +const baselineIndex = readFileSync(INDEX, "utf8"); + +const cleanupGenerated = () => { + for (const f of ["katexData.ts"]) { + try { + rmSync(join(SRC, f)); + } catch { + /* absent */ + } + } +}; + +const mergedPatchIndex = (src: string): string => + src + .replace( + `import { KATEX_ACCENTS, KATEX_ALIASES, KATEX_FUNCTIONS, KATEX_SYMBOL_OVERRIDES } from "./katexMeta";\nimport { KATEX_SYMBOLS } from "./katexSymbols";`, + `import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOLS } from "./katexData";`, + ) + .replace( + `const resolveLatexSymbol = (name: string): string | undefined =>\n KATEX_SYMBOL_OVERRIDES[name] ?? KATEX_SYMBOLS[name] ?? KATEX_ALIASES[name];`, + `const resolveLatexSymbol = (name: string): string | undefined => KATEX_SYMBOLS[name];`, + ); + +const formats: Format[] = [ + { + name: "1-baseline-multi", + note: "PR #7: katexSymbols + katexMeta, 3-table lookup chain", + write: () => cleanupGenerated(), + patchIndex: (src) => src, + }, + { + name: "2-merged-literal", + note: "Single katexData.ts object literal + direct lookup", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: merged object literal */`, + `export const KATEX_SYMBOLS: Record = {`, + objectLiteralBody, + `};`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "3-json-parse", + note: "Single JSON.parse blob", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: JSON.parse blob */`, + `export const KATEX_SYMBOLS = JSON.parse(${JSON.stringify(JSON.stringify(mergedLookup))}) as Record;`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "4-tuple-fromEntries", + note: "Tuple array + Object.fromEntries at module init", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: tuple entries + Object.fromEntries */`, + `const ENTRIES: [string, string][] = [`, + tupleBody, + `];`, + `export const KATEX_SYMBOLS = Object.fromEntries(ENTRIES) as Record;`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "5-parallel-arrays", + note: "Parallel keys/values arrays + Object.fromEntries", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: parallel arrays */`, + `const KEYS = [${parallelKeys}] as const;`, + `const VALS = ${parallelValues} as const;`, + `export const KATEX_SYMBOLS = Object.fromEntries(KEYS.map((k, i) => [k, VALS[i]])) as Record;`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "6-gzip-base64-node", + note: "gzip+base64 blob, gunzipSync at module init (Node zlib)", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: gzip base64 (Node) */`, + `import { gunzipSync } from "node:zlib";`, + `const B64 = ${JSON.stringify(gzipB64)};`, + `export const KATEX_SYMBOLS = JSON.parse(`, + ` gunzipSync(Buffer.from(B64, "base64")).toString("utf8"),`, + `) as Record;`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "7-literal-oneline", + note: "Merged object literal on one line via JSON.stringify", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: one-line object literal */`, + `export const KATEX_SYMBOLS: Record = ${JSON.stringify(mergedLookup)};`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "8-map-constructor", + note: "new Map(entries) then lookup via .get", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: Map constructor */`, + `const ENTRIES: [string, string][] = [`, + tupleBody, + `];`, + `export const KATEX_SYMBOL_MAP = new Map(ENTRIES);`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: (src) => + mergedPatchIndex(src) + .replace( + `import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOLS } from "./katexData";`, + `import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOL_MAP } from "./katexData";`, + ) + .replace(`KATEX_SYMBOLS[name]`, `KATEX_SYMBOL_MAP.get(name)`), + }, +]; + +const measure = () => { + const cjs = readFileSync(join(ROOT, "dist/index.js")); + const esm = readFileSync(join(ROOT, "dist/index.mjs")); + const gzCjs = execSync("gzip -c dist/index.js", { + cwd: ROOT, + encoding: "buffer", + }); + const dataSrc = ["katexData.ts", "katexSymbols.ts", "katexMeta.ts"] + .map((f) => join(SRC, f)) + .filter((f) => { + try { + readFileSync(f); + return true; + } catch { + return false; + } + }) + .reduce((sum, f) => sum + readFileSync(f).length, 0); + + return { cjs: cjs.length, esm: esm.length, gzCjs: gzCjs.length, dataSrc }; +}; + +console.log("KaTeX symbol format benchmark\n"); +console.log(`Merged lookup entries: ${sortedEntries.length}`); +console.log(`Raw JSON size: ${JSON.stringify(mergedLookup).length} B`); +console.log( + `gzip(JSON) alone: ${gzipSync(Buffer.from(JSON.stringify(mergedLookup))).length} B`, +); +console.log(`gzip+base64 payload: ${gzipB64.length} chars\n`); + +const results: Array< + { name: string; note: string } & ReturnType +> = []; + +for (const format of formats) { + format.write(); + writeFileSync(INDEX, format.patchIndex(baselineIndex)); + execSync("pnpm build", { cwd: ROOT, stdio: "pipe" }); + const stats = measure(); + results.push({ name: format.name, note: format.note, ...stats }); + console.log( + `✓ ${format.name}: gzip ${stats.gzCjs} B, CJS ${stats.cjs} B, data src ${stats.dataSrc} B`, + ); +} + +writeFileSync(INDEX, baselineIndex); +cleanupGenerated(); + +console.log("\n| Format | gzip CJS | CJS | ESM | data src | vs baseline |"); +console.log("|--------|----------|-----|-----|----------|-------------|"); +const baseGz = results[0].gzCjs; +for (const r of results) { + const delta = r.gzCjs - baseGz; + const pct = ((delta / baseGz) * 100).toFixed(1); + const deltaStr = + delta === 0 ? "—" : `${delta >= 0 ? "+" : ""}${delta} B (${pct}%)`; + console.log( + `| ${r.name} | ${r.gzCjs} | ${r.cjs} | ${r.esm} | ${r.dataSrc} | ${deltaStr} |`, + ); +} + +console.log("\nNotes:"); +for (const r of results) { + console.log(`- ${r.name}: ${r.note}`); +} diff --git a/lib/scripts/generate-katex-data.ts b/lib/scripts/generate-katex-data.ts new file mode 100644 index 0000000..840601d --- /dev/null +++ b/lib/scripts/generate-katex-data.ts @@ -0,0 +1,221 @@ +/** + * Generates KaTeX-derived symbol data for @m2d/math. + * Fetches KaTeX v0.16.22 source at codegen time (MIT): + * https://github.com/KaTeX/KaTeX/tree/v0.16.22/src + * + * Run: pnpm generate:katex + */ +import { writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +const KATEX_VERSION = "0.16.22"; +const KATEX_BASE = `https://raw.githubusercontent.com/KaTeX/KaTeX/v${KATEX_VERSION}/src`; +const REGENERATE_CMD = "pnpm generate:katex"; +const SIMPLE_MACRO = /^\\([a-zA-Z@][a-zA-Z0-9@]*)$/; + +const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)); +const ROOT = join(SCRIPT_DIR, ".."); + +/** Fetch a KaTeX source file from the pinned GitHub release. */ +const fetchKatexSource = async (path: string): Promise => { + const url = `${KATEX_BASE}/${path}`; + const response = await fetch(url); + if (!response.ok) { + throw new Error( + `Failed to fetch ${url}: ${response.status} ${response.statusText}`, + ); + } + return response.text(); +}; + +const symbolMap: Record = {}; +const aliasMap: Record = {}; +const accentMap: Record = {}; +const fnSet = new Set(); +const overrideMap: Record = {}; + +/** Decode a KaTeX char literal or single-character string. */ +const decodeChar = (raw: string): string | undefined => { + if (/^\\u[0-9a-fA-F]{4}$/.test(raw)) { + return JSON.parse(`"${raw}"`) as string; + } + return raw.length === 1 ? raw : undefined; +}; + +// skipcq: JS-R1005 +/** Generate KaTeX symbol tables and write them to src/. */ +const generate = async (): Promise => { + console.log(`Fetching KaTeX v${KATEX_VERSION} from ${KATEX_BASE}`); + + const [symbolsSrc, macrosSrc, opSrc] = await Promise.all([ + fetchKatexSource("symbols.js"), + fetchKatexSource("macros.js"), + fetchKatexSource("functions/op.js"), + ]); + + for (const m of symbolsSrc.matchAll(/defineSymbol\([^\n]+\)/g)) { + const strMatch = [ + ...m[0].matchAll(/"((?:\\u[0-9a-fA-F]{4}|\\[^"]|[^"])+)"/g), + ]; + if (strMatch.length < 2) continue; + const unicode = JSON.parse(`"${strMatch[0][1]}"`) as string; + const cmd = strMatch[1][1].replace(/^\\+/, ""); + symbolMap[cmd] = unicode; + } + + /** Resolve a macro name to a single Unicode character, following aliases. */ + const resolveToUnicode = ( + name: string, + seen = new Set(), + ): string | undefined => { + if (seen.has(name)) return undefined; + seen.add(name); + if (symbolMap[name]) return symbolMap[name]; + const bodyMatch = macrosSrc.match( + new RegExp( + `defineMacro\\("\\\\\\\\${name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}",\\s*"([^"]+)"\\)`, + ), + ); + if (!bodyMatch) return undefined; + const body = bodyMatch[1]; + if (body.startsWith("\\mathrm{") && body.endsWith("}")) { + return body.slice(9, -1); + } + if (body.length === 1 && !body.startsWith("\\")) { + return body; + } + const charMatch = body.match(/\\char[`'"]((?:\\u[0-9a-fA-F]{4}|[^`'"]+))/); + if (charMatch) { + return decodeChar(charMatch[1]); + } + if (body.startsWith("\\") && !body.includes("{")) { + return resolveToUnicode(body.replace(/^\\+/, ""), seen); + } + return undefined; + }; + + for (const m of macrosSrc.matchAll( + /defineMacro\("\\\\([^"]+)",\s*"([^"]+)"\)/g, + )) { + const name = m[1]; + if (!/^[a-zA-Z@][a-zA-Z0-9@]*$/.test(name)) continue; + const resolved = resolveToUnicode(name); + if (resolved && [...resolved].length === 1) { + aliasMap[name] = resolved; + } + } + + for (const m of symbolsSrc.matchAll(/defineSymbol\([^\n]+\)/g)) { + if (!m[0].includes(", accent,")) continue; + const strMatch = [ + ...m[0].matchAll(/"((?:\\u[0-9a-fA-F]{4}|\\[^"]|[^"])+)"/g), + ]; + if (strMatch.length < 2) continue; + const chr = JSON.parse(`"${strMatch[0][1]}"`) as string; + const cmd = strMatch[1][1].replace(/^\\+/, ""); + accentMap[cmd] = chr; + } + + let blockIdx = 0; + let nextBlockIdx = opSrc.indexOf("defineFunction({", blockIdx); + while (nextBlockIdx !== -1) { + blockIdx = nextBlockIdx; + const blockEnd = opSrc.indexOf("});", blockIdx); + const block = opSrc.slice(blockIdx, blockEnd); + if (block.includes("symbol: false") && !block.includes("symbol: true")) { + const namesMatch = block.match(/names:\s*\[([\s\S]*?)\]/); + if (namesMatch) { + for (const nameMatch of namesMatch[1].matchAll(/"\\+([^"]+)"/g)) { + fnSet.add(nameMatch[1]); + } + } + } + blockIdx = blockEnd; + nextBlockIdx = opSrc.indexOf("defineFunction({", blockIdx); + } + for (const m of macrosSrc.matchAll(/defineMacro\("\\\\(liminf|limsup)",/g)) { + fnSet.add(m[1]); + } + + for (const m of macrosSrc.matchAll( + /defineMacro\("\\\\([^"]+)",\s*"\\html@mathml\{[^}]+\}\{[^}]*\\char[`'"]((?:\\u[0-9a-fA-F]{4}|[^`'"]+))/g, + )) { + const resolved = decodeChar(m[2]); + if (resolved && [...resolved].length === 1) { + overrideMap[m[1]] = resolved; + } + } + for (const m of macrosSrc.matchAll( + /defineMacro\("\\\\(q?quad)",\s*"\\\\hskip(\d+)em/g, + )) { + overrideMap[m[1]] = m[1] === "qquad" ? "\u2003\u2003" : "\u2003"; + } + for (const m of macrosSrc.matchAll( + /defineMacro\("(\\u[0-9a-fA-F]{4})",\s*"\\\\([^"]+)"\)/g, + )) { + const unicode = JSON.parse(`"${m[1]}"`) as string; + const target = `\\${m[2]}`; + if (!SIMPLE_MACRO.test(target) || unicode === "\uFE0F") continue; + const cmd = m[2]; + const resolved = resolveToUnicode(cmd) ?? unicode; + if ([...resolved].length === 1) { + overrideMap[cmd] = resolved; + } + } + for (const m of macrosSrc.matchAll( + /defineMacro\("\\\\([^"]+)",\s*"([^"]+)"\)/g, + )) { + const name = m[1]; + if (!/^[a-zA-Z@][a-zA-Z0-9@]*$/.test(name)) continue; + if (symbolMap[name] || aliasMap[name] || overrideMap[name]) continue; + const resolved = resolveToUnicode(name); + if (resolved && [...resolved].length === 1) { + overrideMap[name] = resolved; + } + } + + if (overrideMap.neq) overrideMap.ne = overrideMap.neq; + if (symbolMap["@cdots"]) overrideMap.cdots = symbolMap["@cdots"]; + + const lookupMap: Record = { + ...aliasMap, + ...symbolMap, + ...overrideMap, + }; + + const lookupLines = Object.entries(lookupMap) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([k, v]) => ` ${JSON.stringify(k)}: ${JSON.stringify(v)},`) + .join("\n"); + + const sourceNote = `KaTeX v${KATEX_VERSION} — regenerate via \`${REGENERATE_CMD}\` (fetches from ${KATEX_BASE}).`; + const functions = [...fnSet].sort(); + + writeFileSync( + join(ROOT, "src/katexData.ts"), + [ + `/** ${sourceNote} */`, + `export const KATEX_SYMBOLS: Record = {`, + lookupLines, + `};`, + ``, + `export const KATEX_ACCENTS = ${JSON.stringify(accentMap)} as Record;`, + ``, + `export const KATEX_FUNCTIONS = new Set(${JSON.stringify(functions)});`, + ``, + ].join("\n"), + ); + + console.log(`KATEX_SYMBOLS: ${Object.keys(lookupMap).length} (merged)`); + console.log(` base symbols: ${Object.keys(symbolMap).length}`); + console.log(` aliases: ${Object.keys(aliasMap).length}`); + console.log(` overrides: ${Object.keys(overrideMap).length}`); + console.log(`KATEX_ACCENTS: ${Object.keys(accentMap).length}`); + console.log(`KATEX_FUNCTIONS: ${fnSet.size}`); +}; + +generate().catch((error) => { + console.error(error); + throw error; +}); diff --git a/lib/src/index.ts b/lib/src/index.ts index ecae47b..73b1b70 100644 --- a/lib/src/index.ts +++ b/lib/src/index.ts @@ -4,6 +4,7 @@ import type * as latex from "@unified-latex/unified-latex-types"; // skipcq: JS-C1003 import type * as DOCX from "docx"; import { parseMath } from "latex-math"; +import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOLS } from "./katexData"; /** * Checks if the argument has curly brackets. @@ -17,150 +18,140 @@ const hasCurlyBrackets = ( const mapString = (docx: typeof DOCX, s: string): DOCX.MathRun => new docx.MathRun(s); -const LATEX_SYMBOLS: Record = { - textasciitilde: "~", - textasciicircum: "^", - textbackslash: "∖", - textbar: "|", - textless: "<", - textgreater: ">", - neq: "≠", - sim: "∼", - simeq: "≃", - approx: "≈", - fallingdotseq: "≒", - risingdotseq: "≓", - equiv: "≡", - geq: "≥", - geqq: "≧", - leq: "≤", - leqq: "≦", - gg: "≫", - ll: "≪", - times: "×", - div: "÷", - pm: "±", - mp: "∓", - oplus: "⊕", - ominus: "⊖", - otimes: "⊗", - oslash: "⊘", - circ: "∘", - cdot: "⋅", - bullet: "∙", - ltimes: "⋉", - rtimes: "⋊", - in: "∈", - ni: "∋", - notin: "∉", - subset: "⊂", - supset: "⊃", - subseteq: "⊆", - supseteq: "⊇", - nsubseteq: "⊈", - nsupseteq: "⊉", - subsetneq: "⊊", - supsetneq: "⊋", - cap: "∩", - cup: "∪", - emptyset: "∅", - infty: "∞", - partial: "∂", - aleph: "ℵ", - hbar: "ℏ", - wp: "℘", - Re: "ℜ", - Im: "ℑ", - alpha: "α", - beta: "β", - gamma: "γ", - delta: "δ", - epsilon: "ϵ", - zeta: "ζ", - eta: "η", - theta: "θ", - iota: "ι", - kappa: "κ", - lambda: "λ", - mu: "μ", - nu: "ν", - xi: "ξ", - pi: "π", - rho: "ρ", - sigma: "σ", - tau: "τ", - upsilon: "υ", - phi: "ϕ", - chi: "χ", - psi: "ψ", - omega: "ω", - varepsilon: "ε", - vartheta: "ϑ", - varrho: "ϱ", - varsigma: "ς", - varphi: "φ", - Gamma: "Γ", - Delta: "Δ", - Theta: "Θ", - Lambda: "Λ", - Xi: "Ξ", - Pi: "Π", - Sigma: "Σ", - Upsilon: "Υ", - Phi: "Φ", - Psi: "Ψ", - Omega: "Ω", - int: "∫", - oint: "∮", - prod: "∏", - coprod: "∐", - sum: "∑", - log: "log", - exp: "exp", - lim: "lim", - inf: "∞", - perp: "⊥", - and: "∧", - or: "∨", - not: "¬", - to: "→", - gets: "⟹", - implies: "⟹", - impliedby: "⟸", - forall: "∀", - exists: "∃", - empty: "∅", - nabla: "∇", - top: "⊤", - bot: "⊥", - angle: "∠", - backslash: "∖", - neg: "¬", - lnot: "¬", - flat: "♭", - natural: "♮", - sharp: "♯", - clubsuit: "♣", - diamondsuit: "♦", - heartsuit: "♥", - spadesuit: "♠", - varnothing: "∅", - S: "∖", - P: "∏", - bigcap: "⋀", - bigcup: "⋁", - bigwedge: "⊓", - bigvee: "⊔", - bigsqcap: "⊓", - bigsqcup: "⊔", - biguplus: "⊕", - bigoplus: "⊕", - bigotimes: "⊗", - bigodot: "⊙", - biginterleave: "⊺", - bigtimes: "⨯", +const PLUGIN_ID = "@m2d/math"; + +/** Log and skip inline/block math that would emit empty OMML. */ +const logSkippedEmptyMath = (latex: string, scope: "inline" | "block") => { + console.error( + `[${PLUGIN_ID}] Skipping empty ${scope} math for ${JSON.stringify(latex)}; no renderable OMML was produced. Empty elements break Microsoft Word.`, + ); +}; + +/** Resolve a LaTeX command name to its Unicode symbol. */ +const resolveLatexSymbol = (name: string): string | undefined => + KATEX_SYMBOLS[name]; + +type NAryOptions = { + accent: string; + limitLocationVal?: string; + children?: DOCX.MathRun[]; + subScript?: DOCX.MathRun[]; + superScript?: DOCX.MathRun[]; +}; + +type PendingNAry = DOCX.MathRun & { + isNAry: 1; + naryAccent: string; + naryLimitLoc?: string; + sub?: DOCX.MathRun[]; + sup?: DOCX.MathRun[]; +}; + +const NARY_OPERATORS: Record< + string, + { accent: string; limitLocationVal?: string } +> = { + sum: { accent: "∑" }, + prod: { accent: "∏" }, + int: { accent: "∫", limitLocationVal: "subSup" }, + iint: { accent: "∬", limitLocationVal: "subSup" }, + iiint: { accent: "∭", limitLocationVal: "subSup" }, + oint: { accent: "∮", limitLocationVal: "subSup" }, + oiint: { accent: "∯", limitLocationVal: "subSup" }, + oiiint: { accent: "∰", limitLocationVal: "subSup" }, + bigcup: { accent: "⋃" }, + bigcap: { accent: "⋂" }, + bigoplus: { accent: "⊕" }, + bigotimes: { accent: "⊗" }, +}; + +/** Whether a MathRun is a pending n-ary operator awaiting limits or body. */ +const isPendingNAry = (node: DOCX.MathRun | undefined): node is PendingNAry => + Boolean(node && (node as PendingNAry).isNAry); + +/** Build an OMML n-ary operator element. */ +const buildNAry = (docx: typeof DOCX, options: NAryOptions): DOCX.MathRun => { + /** OMML wrapper for n-ary operators such as sum and integral. */ + class MathNAry extends docx.XmlComponent { + constructor() { + super("m:nary"); + this.root.push( + docx.createMathNAryProperties({ + accent: options.accent, + hasSuperScript: Boolean(options.superScript), + hasSubScript: Boolean(options.subScript), + limitLocationVal: options.limitLocationVal, + }), + ); + if (options.subScript) { + this.root.push( + docx.createMathSubScriptElement({ children: options.subScript }), + ); + } + if (options.superScript) { + this.root.push( + docx.createMathSuperScriptElement({ children: options.superScript }), + ); + } + this.root.push(docx.createMathBase({ children: options.children ?? [] })); + } + } + return new MathNAry() as unknown as DOCX.MathRun; +}; + +/** Create an n-ary operator placeholder that accepts limits and a body later. */ +const createPendingNAry = ( + docx: typeof DOCX, + accent: string, + limitLocationVal?: string, +): PendingNAry => { + const node = buildNAry(docx, { + accent, + limitLocationVal, + children: [], + }) as PendingNAry; + node.isNAry = 1; + node.naryAccent = accent; + node.naryLimitLoc = limitLocationVal; + return node; +}; + +/** Attach sub/superscript limits to a pending n-ary operator. */ +const attachNAryLimits = ( + docx: typeof DOCX, + prev: PendingNAry, + limits: { subScript?: DOCX.MathRun[]; superScript?: DOCX.MathRun[] }, +): PendingNAry => { + const sub = limits.subScript ?? prev.sub; + const sup = limits.superScript ?? prev.sup; + const node = buildNAry(docx, { + accent: prev.naryAccent, + limitLocationVal: prev.naryLimitLoc, + children: [], + subScript: sub, + superScript: sup, + }) as PendingNAry; + node.isNAry = 1; + node.naryAccent = prev.naryAccent; + node.naryLimitLoc = prev.naryLimitLoc; + node.sub = sub; + node.sup = sup; + return node; }; +const finalizeNAry = ( + docx: typeof DOCX, + prev: PendingNAry, + children: DOCX.MathRun[], +): DOCX.MathRun => + buildNAry(docx, { + accent: prev.naryAccent, + limitLocationVal: prev.naryLimitLoc, + children, + subScript: prev.sub, + superScript: prev.sup, + }); + /** convert group to Math */ const mapGroup = (docx: typeof DOCX, nodes: latex.Node[]): DOCX.MathRun[] => { const group: DOCX.MathRun[] = []; @@ -176,11 +167,13 @@ const mapGroup = (docx: typeof DOCX, nodes: latex.Node[]): DOCX.MathRun[] => { const mapMacro = ( docx: typeof DOCX, node: latex.Macro, - runs: DOCX.MathRun[], + runs: DOCX.MathRun[] & { binomPending?: 0 | 1; binomFirst?: DOCX.MathRun[] }, ): DOCX.MathRun[] | DOCX.MathRun | null => { let returnVal: DOCX.MathRun[] | DOCX.MathRun | null = null; switch (node.content) { case "newline": + returnVal = mapString(docx, " "); + break; case "\\": // line break return null; @@ -203,22 +196,8 @@ const mapMacro = ( const prev = runs.pop(); if (!prev) break; const superScript = mapGroup(docx, node.args?.[0]?.content ?? []); - // @ts-expect-error -- using extra vars - if (prev.isSum) { - const docNode = new docx.MathSum({ - children: [], - superScript, - // @ts-expect-error -- reading extra field - subScript: prev.sub, - }); - - // @ts-expect-error -- attaching extra field - docNode.sub = prev.sub; - // @ts-expect-error -- attaching extra field - docNode.sup = superScript; - // @ts-expect-error -- attaching extra field - docNode.isSum = 1; - return docNode; + if (isPendingNAry(prev)) { + return attachNAryLimits(docx, prev, { superScript }); // @ts-expect-error -- attaching extra field } else if (prev.sub) { return new docx.MathSubSuperScript({ @@ -243,21 +222,8 @@ const mapMacro = ( const prev = runs.pop(); if (!prev) break; const subScript = mapGroup(docx, node.args?.[0]?.content ?? []); - // @ts-expect-error -- attaching extra field - if (prev.isSum) { - const docNode = new docx.MathSum({ - children: [], - subScript, - // @ts-expect-error -- reading extra field - superScript: prev.sup, - }); - // @ts-expect-error -- attaching extra field - docNode.sup = prev.sup; - // @ts-expect-error -- attaching extra field - docNode.sub = subScript; - // @ts-expect-error -- attaching extra field - docNode.isSum = 1; - return docNode; + if (isPendingNAry(prev)) { + return attachNAryLimits(docx, prev, { subScript }); // @ts-expect-error -- attaching extra field } else if (prev.sup) { return new docx.MathSubSuperScript({ @@ -280,16 +246,27 @@ const mapMacro = ( } case "hat": case "widehat": - // returnVal = docx.MathAccentCharacter(n) - returnVal = docx.createMathAccentCharacter({ accent: "^" }); - break; - case "sum": { - const docNode = new docx.MathSum({ - children: [], + returnVal = docx.createMathAccentCharacter({ + accent: KATEX_ACCENTS[node.content] ?? "^", }); - // @ts-expect-error - extra var - docNode.isSum = 1; - return docNode; + break; + case "sum": + case "prod": + case "int": + case "iint": + case "iiint": + case "oint": + case "oiint": + case "oiiint": + case "bigcup": + case "bigcap": + case "bigoplus": + case "bigotimes": { + const nary = NARY_OPERATORS[node.content]; + if (nary) { + returnVal = createPendingNAry(docx, nary.accent, nary.limitLocationVal); + } + break; } case "frac": case "tfrac": @@ -307,6 +284,26 @@ const mapMacro = ( } break; } + case "stackrel": { + const args = node.args ?? []; + if ( + args.length === 2 && + hasCurlyBrackets(args[0]) && + hasCurlyBrackets(args[1]) + ) { + returnVal = [ + docx.createMathLimitLocation({ value: "undOvr" }), + new docx.MathLimitUpper({ + children: mapGroup(docx, args[1].content), + limit: mapGroup(docx, args[0].content), + }), + ]; + } + break; + } + case "binom": + runs.binomPending = 0; + return []; case "sqrt": { const args = node.args ?? []; if (args.length === 1) { @@ -328,23 +325,50 @@ const mapMacro = ( case "left": case "right": case "vec": + case "boxed": + case "boldsymbol": return []; case "mathbf": return mapGroup(docx, node.args?.[0]?.content ?? []); default: - returnVal = mapString(docx, LATEX_SYMBOLS[node.content] ?? node.content); + if (node.content === "overline" || node.content === "widetilde") { + returnVal = docx.createMathAccentCharacter({ + accent: node.content === "overline" ? "¯" : "~", + }); + } else if ( + node.content === "mathrm" || + node.content === "mathit" || + node.content === "textbf" || + node.content === "textit" || + node.content === "underline" || + node.content === "overbrace" || + node.content === "underbrace" + ) { + const args = node.args ?? []; + if (hasCurlyBrackets(args[0])) { + returnVal = mapGroup(docx, args[0].content); + } + } else if (KATEX_ACCENTS[node.content]) { + returnVal = docx.createMathAccentCharacter({ + accent: KATEX_ACCENTS[node.content], + }); + } else if (KATEX_FUNCTIONS.has(node.content)) { + returnVal = mapString(docx, node.content); + } else { + returnVal = mapString( + docx, + resolveLatexSymbol(node.content) ?? node.content, + ); + } } - // @ts-expect-error -- reading extra field - if (runs[runs.length - 1]?.isSum && returnVal) { - const prev = runs.pop(); + if (isPendingNAry(runs[runs.length - 1]) && returnVal) { + const prev = runs.pop() as PendingNAry; return [ - new docx.MathSum({ - children: Array.isArray(returnVal) ? returnVal : [returnVal], - // @ts-expect-error -- reading extra field - superScript: prev.sup, - // @ts-expect-error -- reading extra field - subScript: prev.sub, - }), + finalizeNAry( + docx, + prev, + Array.isArray(returnVal) ? returnVal : [returnVal], + ), ]; } return returnVal; @@ -354,8 +378,30 @@ const mapMacro = ( const mapNode = ( docx: typeof DOCX, node: latex.Node, - runs: DOCX.MathRun[], + runs: DOCX.MathRun[] & { binomPending?: 0 | 1; binomFirst?: DOCX.MathRun[] }, ): DOCX.MathRun[] | false => { + if (node.type === "group" && runs.binomPending !== undefined) { + const content = mapGroup(docx, node.content); + if (runs.binomPending === 0) { + runs.binomFirst = content; + runs.binomPending = 1; + return []; + } + delete runs.binomPending; + const numerator = runs.binomFirst ?? []; + delete runs.binomFirst; + return [ + new docx.MathRoundBrackets({ + children: [ + new docx.MathFraction({ + numerator, + denominator: content, + }), + ], + }), + ]; + } + let docxNodes: DOCX.MathRun[] = []; switch (node.type) { case "string": @@ -383,18 +429,9 @@ const mapNode = ( default: } - // @ts-expect-error -- reading extra field - if (node.type !== "macro" && runs[runs.length - 1]?.isSum) { - const prev = runs.pop(); - return [ - new docx.MathSum({ - children: docxNodes, - // @ts-expect-error -- reading extra field - superScript: prev.sup, - // @ts-expect-error -- reading extra field - subScript: prev.sub, - }), - ]; + if (node.type !== "macro" && isPendingNAry(runs[runs.length - 1])) { + const prev = runs.pop() as PendingNAry; + return [finalizeNAry(docx, prev, docxNodes)]; } return docxNodes; @@ -408,7 +445,10 @@ export const parseLatex = ( const latexNodes = parseMath(value); const paragraphs: DOCX.MathRun[][] = [[]]; - let runs: DOCX.MathRun[] = paragraphs[0]; + let runs: DOCX.MathRun[] & { + binomPending?: 0 | 1; + binomFirst?: DOCX.MathRun[]; + } = paragraphs[0]; for (const node of latexNodes) { const res = mapNode(docx, node, runs); @@ -435,17 +475,27 @@ export const mathPlugin: () => IPlugin<{ if (node.type !== "inlineMath" && node.type !== "math") return []; (node as unknown as EmptyNode)._type = node.type; node.type = ""; - return [ - new docx.Math({ children: parseLatex(docx, node.value ?? "").flat() }), - ]; + const latex = node.value ?? ""; + const children = parseLatex(docx, latex).flat(); + if (!children.length) { + logSkippedEmptyMath(latex, "inline"); + return []; + } + return [new docx.Math({ children })]; }, block: (docx, node) => { if (node.type !== "math" && node.type !== "inlineMath") return []; node.type = ""; - return parseLatex(docx, node.value ?? "").map( - (runs) => + const latex = node.value ?? ""; + return parseLatex(docx, latex).flatMap((runs) => { + if (!runs.length) { + logSkippedEmptyMath(latex, "block"); + return []; + } + return [ new docx.Paragraph({ children: [new docx.Math({ children: runs })] }), - ); + ]; + }); }, }; }; diff --git a/lib/src/katexData.ts b/lib/src/katexData.ts new file mode 100644 index 0000000..16fc96e --- /dev/null +++ b/lib/src/katexData.ts @@ -0,0 +1,716 @@ +/** KaTeX v0.16.22 — regenerate via `pnpm generate:katex` (fetches from https://raw.githubusercontent.com/KaTeX/KaTeX/v0.16.22/src). */ +export const KATEX_SYMBOLS: Record = { + " ": " ", + _: "_", + "-": "−", + "--": "–", + "---": "—", + ",": ",", + ";": ";", + ":": ":", + "!": "!", + "?": "?", + ".": "˙", + "'": "’", + "''": "”", + "{": "{", + "}": "}", + "@cdots": "⋯", + "@gvertneqq": "", + "@imath": "", + "@jmath": "", + "@llcorner": "└", + "@lrcorner": "┘", + "@lvertneqq": "", + "@ngeqq": "", + "@ngeqslant": "", + "@nleqq": "", + "@nleqslant": "", + "@not": "", + "@nshortmid": "", + "@nshortparallel": "", + "@nsubseteqq": "", + "@nsupseteqq": "", + "@ulcorner": "┌", + "@urcorner": "┐", + "@varsubsetneq": "", + "@varsubsetneqq": "", + "@varsupsetneq": "", + "@varsupsetneqq": "", + "*": "∗", + "&": "&", + "#": "#", + "%": "%", + "`": "‘", + "``": "“", + "^": "ˆ", + "+": "+", + "=": "ˉ", + "|": "∥", + "~": "˜", + $: "$", + acute: "ˊ", + ae: "æ", + AE: "Æ", + alef: "ℵ", + alefsym: "ℵ", + aleph: "ℵ", + alpha: "α", + amalg: "⨿", + And: "&", + angle: "∠", + approx: "≈", + approxeq: "≊", + ast: "∗", + asymp: "≍", + backepsilon: "∍", + backprime: "‵", + backsim: "∽", + backsimeq: "⋍", + backslash: "\\", + bar: "ˉ", + barwedge: "⊼", + because: "∵", + beta: "β", + beth: "ℶ", + between: "≬", + bgroup: "{", + bigcap: "⋂", + bigcirc: "◯", + bigcup: "⋃", + bigodot: "⨀", + bigoplus: "⨁", + bigotimes: "⨂", + bigsqcup: "⨆", + bigstar: "★", + bigtriangledown: "▽", + bigtriangleup: "△", + biguplus: "⨄", + bigvee: "⋁", + bigwedge: "⋀", + blacklozenge: "⧫", + blacksquare: "■", + blacktriangle: "▲", + blacktriangledown: "▼", + blacktriangleleft: "◀", + blacktriangleright: "▶", + bot: "⊥", + bowtie: "⋈", + Box: "□", + boxdot: "⊡", + boxminus: "⊟", + boxplus: "⊞", + boxtimes: "⊠", + breve: "˘", + bull: "∙", + bullet: "∙", + bumpeq: "≏", + Bumpeq: "≎", + c: "¸", + cap: "∩", + Cap: "⋒", + cdot: "⋅", + cdotp: "⋅", + cdots: "⋯", + centerdot: "⋅", + check: "ˇ", + checkmark: "✓", + chi: "χ", + circ: "∘", + circeq: "≗", + circlearrowleft: "↺", + circlearrowright: "↻", + circledast: "⊛", + circledcirc: "⊚", + circleddash: "⊝", + circledR: "®", + circledS: "Ⓢ", + clubs: "♣", + clubsuit: "♣", + coloneqq: "≔", + Coloneqq: "⩴", + complement: "∁", + cong: "≅", + coprod: "∐", + copyright: "©", + cup: "∪", + Cup: "⋓", + curlyeqprec: "⋞", + curlyeqsucc: "⋟", + curlyvee: "⋎", + curlywedge: "⋏", + curvearrowleft: "↶", + curvearrowright: "↷", + dag: "†", + dagger: "†", + Dagger: "‡", + daleth: "ℸ", + darr: "↓", + dArr: "⇓", + Darr: "⇓", + dashleftarrow: "⇠", + dashrightarrow: "⇢", + dashv: "⊣", + dblcolon: "∷", + ddag: "‡", + ddagger: "‡", + ddot: "¨", + ddots: "⋱", + degree: "°", + delta: "δ", + Delta: "Δ", + diagdown: "╲", + diagup: "╱", + diamond: "⋄", + Diamond: "◊", + diamonds: "♢", + diamondsuit: "♢", + digamma: "ϝ", + div: "÷", + divideontimes: "⋇", + dot: "˙", + doteq: "≐", + Doteq: "≑", + doteqdot: "≑", + dotplus: "∔", + doublebarwedge: "⩞", + doublecap: "⋒", + doublecup: "⋓", + downarrow: "↓", + Downarrow: "⇓", + downdownarrows: "⇊", + downharpoonleft: "⇃", + downharpoonright: "⇂", + egroup: "}", + ell: "ℓ", + empty: "∅", + emptyset: "∅", + epsilon: "ϵ", + eqcirc: "≖", + eqcolon: "∹", + eqqcolon: "≕", + eqsim: "≂", + eqslantgtr: "⪖", + eqslantless: "⪕", + equiv: "≡", + eta: "η", + eth: "ð", + exist: "∃", + exists: "∃", + fallingdotseq: "≒", + Finv: "Ⅎ", + flat: "♭", + forall: "∀", + frown: "⌢", + Game: "⅁", + gamma: "γ", + Gamma: "Γ", + ge: "≥", + geq: "≥", + geqq: "≧", + geqslant: "⩾", + gets: "←", + gg: "≫", + ggg: "⋙", + gggtr: "⋙", + gimel: "ℷ", + gnapprox: "⪊", + gneq: "⪈", + gneqq: "≩", + gnsim: "⋧", + grave: "ˋ", + gt: ">", + gtrapprox: "⪆", + gtrdot: "⋗", + gtreqless: "⋛", + gtreqqless: "⪌", + gtrless: "≷", + gtrsim: "≳", + H: "˝", + harr: "↔", + hArr: "⇔", + Harr: "⇔", + hat: "^", + hbar: "ℏ", + hearts: "♡", + heartsuit: "♡", + hookleftarrow: "↩", + hookrightarrow: "↪", + hslash: "ℏ", + i: "ı", + iiint: "∭", + iint: "∬", + Im: "ℑ", + image: "ℑ", + imageof: "⊷", + in: "∈", + infin: "∞", + infty: "∞", + int: "∫", + intercal: "⊺", + intop: "∫", + iota: "ι", + isin: "∈", + j: "ȷ", + Join: "⋈", + kappa: "κ", + lambda: "λ", + Lambda: "Λ", + land: "∧", + lang: "⟨", + langle: "⟨", + larr: "←", + lArr: "⇐", + Larr: "⇐", + lbrace: "{", + lBrace: "⦃", + lbrack: "[", + lceil: "⌈", + ldotp: ".", + ldots: "…", + le: "≤", + leadsto: "⇝", + leftarrow: "←", + Leftarrow: "⇐", + leftarrowtail: "↢", + leftharpoondown: "↽", + leftharpoonup: "↼", + leftleftarrows: "⇇", + leftrightarrow: "↔", + Leftrightarrow: "⇔", + leftrightarrows: "⇆", + leftrightharpoons: "⇋", + leftrightsquigarrow: "↭", + leftthreetimes: "⋋", + leq: "≤", + leqq: "≦", + leqslant: "⩽", + lessapprox: "⪅", + lessdot: "⋖", + lesseqgtr: "⋚", + lesseqqgtr: "⪋", + lessgtr: "≶", + lesssim: "≲", + lfloor: "⌊", + lgroup: "⟮", + lhd: "⊲", + ll: "≪", + llbracket: "⟦", + llcorner: "⌞", + Lleftarrow: "⇚", + lll: "⋘", + llless: "⋘", + lmoustache: "⎰", + lnapprox: "⪉", + lneq: "⪇", + lneqq: "≨", + lnot: "¬", + lnsim: "⋦", + longleftarrow: "⟵", + Longleftarrow: "⟸", + longleftrightarrow: "⟷", + Longleftrightarrow: "⟺", + longmapsto: "⟼", + longrightarrow: "⟶", + Longrightarrow: "⟹", + looparrowleft: "↫", + looparrowright: "↬", + lor: "∨", + lozenge: "◊", + lparen: "(", + lq: "`", + lrarr: "↔", + lrArr: "⇔", + Lrarr: "⇔", + lrcorner: "⌟", + Lsh: "↰", + lt: "<", + ltimes: "⋉", + lvert: "∣", + lVert: "∥", + maltese: "✠", + mapsto: "↦", + mathellipsis: "…", + mathring: "˚", + mathsterling: "£", + measuredangle: "∡", + medspace: ":", + mho: "℧", + mid: "∣", + models: "⊨", + mp: "∓", + mu: "μ", + multimap: "⊸", + nabla: "∇", + natural: "♮", + ncong: "≆", + ne: "≠", + nearrow: "↗", + neg: "¬", + negthinspace: "!", + neq: "≠", + nexists: "∄", + ngeq: "≱", + ngtr: "≯", + ni: "∋", + nleftarrow: "↚", + nLeftarrow: "⇍", + nleftrightarrow: "↮", + nLeftrightarrow: "⇎", + nleq: "≰", + nless: "≮", + nmid: "∤", + nobreakspace: " ", + notin: "∉", + notni: "∌", + nparallel: "∦", + nprec: "⊀", + npreceq: "⋠", + nrightarrow: "↛", + nRightarrow: "⇏", + nsim: "≁", + nsubseteq: "⊈", + nsucc: "⊁", + nsucceq: "⋡", + nsupseteq: "⊉", + ntriangleleft: "⋪", + ntrianglelefteq: "⋬", + ntriangleright: "⋫", + ntrianglerighteq: "⋭", + nu: "ν", + nvdash: "⊬", + nvDash: "⊭", + nVdash: "⊮", + nVDash: "⊯", + nwarrow: "↖", + o: "ø", + O: "Ø", + odot: "⊙", + oe: "œ", + OE: "Œ", + oiiint: "∰", + oiint: "∯", + oint: "∮", + omega: "ω", + Omega: "Ω", + omicron: "ο", + ominus: "⊖", + oplus: "⊕", + ordinarycolon: ":", + origof: "⊶", + oslash: "⊘", + otimes: "⊗", + owns: "∋", + P: "¶", + parallel: "∥", + partial: "∂", + perp: "⊥", + phi: "ϕ", + Phi: "Φ", + pi: "π", + Pi: "Π", + pitchfork: "⋔", + plusmn: "±", + pm: "±", + pounds: "£", + prec: "≺", + precapprox: "⪷", + preccurlyeq: "≼", + preceq: "⪯", + precnapprox: "⪹", + precneqq: "⪵", + precnsim: "⋨", + precsim: "≾", + prime: "′", + prod: "∏", + propto: "∝", + psi: "ψ", + Psi: "Ψ", + qquad: "  ", + quad: " ", + r: "˚", + rang: "⟩", + rangle: "⟩", + rarr: "→", + rArr: "⇒", + Rarr: "⇒", + rbrace: "}", + rBrace: "⦄", + rbrack: "]", + rceil: "⌉", + Re: "ℜ", + real: "ℜ", + restriction: "↾", + rfloor: "⌋", + rgroup: "⟯", + rhd: "⊳", + rho: "ρ", + rightarrow: "→", + Rightarrow: "⇒", + rightarrowtail: "↣", + rightharpoondown: "⇁", + rightharpoonup: "⇀", + rightleftarrows: "⇄", + rightleftharpoons: "⇌", + rightrightarrows: "⇉", + rightsquigarrow: "⇝", + rightthreetimes: "⋌", + risingdotseq: "≓", + rmoustache: "⎱", + rparen: ")", + rq: "'", + rrbracket: "⟧", + Rrightarrow: "⇛", + Rsh: "↱", + rtimes: "⋊", + rvert: "∣", + rVert: "∥", + S: "§", + sdot: "⋅", + searrow: "↘", + sect: "§", + setminus: "∖", + sharp: "♯", + shortmid: "∣", + shortparallel: "∥", + sigma: "σ", + Sigma: "Σ", + sim: "∼", + simeq: "≃", + smallfrown: "⌢", + smallint: "∫", + smallsetminus: "∖", + smallsmile: "⌣", + smile: "⌣", + space: " ", + spades: "♠", + spadesuit: "♠", + sphericalangle: "∢", + sqcap: "⊓", + sqcup: "⊔", + sqsubset: "⊏", + sqsubseteq: "⊑", + sqsupset: "⊐", + sqsupseteq: "⊒", + square: "□", + ss: "ß", + star: "⋆", + sub: "⊂", + sube: "⊆", + subset: "⊂", + Subset: "⋐", + subseteq: "⊆", + subseteqq: "⫅", + subsetneq: "⊊", + subsetneqq: "⫋", + succ: "≻", + succapprox: "⪸", + succcurlyeq: "≽", + succeq: "⪰", + succnapprox: "⪺", + succneqq: "⪶", + succnsim: "⋩", + succsim: "≿", + sum: "∑", + supe: "⊇", + supset: "⊃", + Supset: "⋑", + supseteq: "⊇", + supseteqq: "⫆", + supsetneq: "⊋", + supsetneqq: "⫌", + surd: "√", + swarrow: "↙", + tau: "τ", + textasciicircum: "^", + textasciitilde: "~", + textbackslash: "\\", + textbar: "|", + textbardbl: "∥", + textbraceleft: "{", + textbraceright: "}", + textcircled: "◯", + textdagger: "†", + textdaggerdbl: "‡", + textdegree: "°", + textdollar: "$", + textellipsis: "…", + textemdash: "—", + textendash: "–", + textgreater: ">", + textless: "<", + textquotedblleft: "“", + textquotedblright: "”", + textquoteleft: "‘", + textquoteright: "’", + textregistered: "®", + textsterling: "£", + textunderscore: "_", + therefore: "∴", + theta: "θ", + Theta: "Θ", + thetasym: "ϑ", + thickapprox: "≈", + thicksim: "∼", + thickspace: ";", + thinspace: ",", + tilde: "~", + times: "×", + to: "→", + top: "⊤", + triangle: "△", + triangledown: "▽", + triangleleft: "◃", + trianglelefteq: "⊴", + triangleq: "≜", + triangleright: "▹", + trianglerighteq: "⊵", + twoheadleftarrow: "↞", + twoheadrightarrow: "↠", + u: "˘", + u00f0: "ð", + u0131: "ı", + u0237: "ȷ", + u0391: "A", + u0392: "B", + u0395: "E", + u0396: "Z", + u0397: "H", + u0399: "I", + u039A: "K", + u039C: "M", + u039D: "N", + u039F: "O", + u03A1: "P", + u03A4: "T", + u03A7: "X", + u2102: "C", + u210D: "H", + u210E: "h", + u2115: "N", + u2119: "P", + u211A: "Q", + u211D: "R", + u2124: "Z", + uarr: "↑", + uArr: "⇑", + Uarr: "⇑", + ulcorner: "⌜", + unlhd: "⊴", + unrhd: "⊵", + uparrow: "↑", + Uparrow: "⇑", + updownarrow: "↕", + Updownarrow: "⇕", + upharpoonleft: "↿", + upharpoonright: "↾", + uplus: "⊎", + upsilon: "υ", + Upsilon: "Υ", + upuparrows: "⇈", + urcorner: "⌝", + v: "ˇ", + varepsilon: "ε", + varkappa: "ϰ", + varnothing: "∅", + varphi: "φ", + varpi: "ϖ", + varpropto: "∝", + varrho: "ϱ", + varsigma: "ς", + vartheta: "ϑ", + vartriangle: "△", + vartriangleleft: "⊲", + vartriangleright: "⊳", + varvdots: "⋮", + vdash: "⊢", + vDash: "⊨", + Vdash: "⊩", + vdots: "⋮", + vec: "⃗", + vee: "∨", + veebar: "⊻", + vert: "∣", + Vert: "∥", + Vvdash: "⊪", + wedge: "∧", + weierp: "℘", + wp: "℘", + wr: "≀", + xi: "ξ", + Xi: "Ξ", + yen: "¥", + zeta: "ζ", +}; + +export const KATEX_ACCENTS = { + acute: "ˊ", + grave: "ˋ", + ddot: "¨", + tilde: "~", + bar: "ˉ", + breve: "˘", + check: "ˇ", + hat: "^", + vec: "⃗", + dot: "˙", + mathring: "˚", + "'": "ˊ", + "`": "ˋ", + "^": "ˆ", + "~": "˜", + "=": "ˉ", + u: "˘", + ".": "˙", + c: "¸", + r: "˚", + v: "ˇ", + H: "˝", + textcircled: "◯", +} as Record; + +export const KATEX_FUNCTIONS = new Set([ + "Pr", + "arccos", + "arcctg", + "arcsin", + "arctan", + "arctg", + "arg", + "ch", + "cos", + "cosec", + "cosh", + "cot", + "cotg", + "coth", + "csc", + "ctg", + "cth", + "deg", + "det", + "dim", + "exp", + "gcd", + "hom", + "inf", + "ker", + "lg", + "lim", + "liminf", + "limsup", + "ln", + "log", + "mathop", + "max", + "min", + "sec", + "sh", + "sin", + "sinh", + "sup", + "tan", + "tanh", + "tg", + "th", +]); diff --git a/lib/tsup.config.ts b/lib/tsup.config.ts index aaa6752..0ca18a7 100644 --- a/lib/tsup.config.ts +++ b/lib/tsup.config.ts @@ -6,7 +6,7 @@ export default defineConfig( ({ format: ["cjs", "esm"], target: "es2019", - entry: ["./src/**"], + entry: ["./src/index.ts"], sourcemap: false, clean: !options.watch, bundle: true, diff --git a/package.json b/package.json index 35d95ee..a5781a4 100644 --- a/package.json +++ b/package.json @@ -35,4 +35,4 @@ "cross-spawn@<6.0.6": ">=6.0.6" } } -} \ No newline at end of file +} diff --git a/sample.md b/sample.md index a3cea26..2fd828a 100644 --- a/sample.md +++ b/sample.md @@ -48,13 +48,16 @@ Here are some common mathematical symbols: - Fractions: $\frac{1}{2}$, $\frac{x+y}{z}$ - Square roots: $\sqrt{x}$, $\sqrt[3]{y}$ - Summations and products: $\sum_{i=1}^n i$, $\prod_{j=1}^m j$ -- Integrals: $\int_a^b f(x) dx$, $\oint_C \vec{F} \cdot d\vec{r}$ -- Limits: $\lim_{x \to \infty} \frac{1}{x}$ +- Integrals: $\int_a^b f(x) dx$, $\int_0^1 f(x)\,dx$, $\oint_C \vec{F} \cdot d\vec{r}$ +- Limits: $\lim_{x \to \infty} \frac{1}{x}$, $\lim_{n \to \infty} a_n$ - Vectors: $\vec{v}$, $\mathbf{v}$ +- Accents: $\tilde{x}$, $\bar{x}$, $\overline{AB}$, $\hat{x}$ - Matrices: $\begin{pmatrix} a & b \\ c & d \end{pmatrix}$ - Partial derivatives: $\frac{\partial f}{\partial x}$ - Infinity: $\infty$ -- Logical symbols: $\forall$, $\exists$, $\in$, $\notin$, $\subseteq$, $\supseteq$, $\land$, $\lor$, $\neg$ +- Logical symbols: $\forall$, $\exists$, $\in$, $\notin$, $\subseteq$, $\supseteq$, $\land$, $\lor$, $\neg$, $\wedge$, $\ne$, $\triangle ABC$, $\cdots$ +- Binomial coefficients: $\binom{n}{k}$ +- Stackrel: $\stackrel{\mathrm{def}}{=}$ - Trigonometric functions: $\sin(x)$, $\cos(y)$, $\tan(z)$ - Exponential and logarithmic functions: $e^x$, $\ln(y)$, $\log_{10}(z)$