From 8785f75716ff2490999850365bdb7d2815c3fd54 Mon Sep 17 00:00:00 2001 From: chitwitgit Date: Tue, 9 Jun 2026 13:51:48 +0800 Subject: [PATCH 1/7] Expand LaTeX symbol coverage using codegen from vendored tables. Replace the hand-maintained symbol map with generated tables seeded from KaTeX v0.16.22 source snippets, add accent and function handling, and document regeneration via pnpm generate:katex. Fixes md2docx/math#6 --- lib/package.json | 3 +- lib/scripts/data/katex-macros.js | 1038 ++++++++++++++++++++++++++++ lib/scripts/data/katex-op.js | 339 +++++++++ lib/scripts/data/katex-symbols.js | 895 ++++++++++++++++++++++++ lib/scripts/generate-katex-data.ts | 151 ++++ lib/src/index.ts | 160 +---- lib/src/katexMeta.ts | 108 +++ lib/src/katexSymbols.ts | 570 +++++++++++++++ lib/tsup.config.ts | 2 +- 9 files changed, 3118 insertions(+), 148 deletions(-) create mode 100644 lib/scripts/data/katex-macros.js create mode 100644 lib/scripts/data/katex-op.js create mode 100644 lib/scripts/data/katex-symbols.js create mode 100644 lib/scripts/generate-katex-data.ts create mode 100644 lib/src/katexMeta.ts create mode 100644 lib/src/katexSymbols.ts diff --git a/lib/package.json b/lib/package.json index 0df7a85..cebcce3 100644 --- a/lib/package.json +++ b/lib/package.json @@ -29,7 +29,8 @@ "typecheck": "tsc --noEmit", "lint": "eslint src/", "lint:fix": "eslint src/ --fix", - "test": "vitest run --coverage" + "test": "vitest run --coverage", + "generate:katex": "node --experimental-strip-types scripts/generate-katex-data.ts" }, "devDependencies": { "@repo/eslint-config": "workspace:*", diff --git a/lib/scripts/data/katex-macros.js b/lib/scripts/data/katex-macros.js new file mode 100644 index 0000000..5564fbc --- /dev/null +++ b/lib/scripts/data/katex-macros.js @@ -0,0 +1,1038 @@ +/** + * Vendored from KaTeX v0.16.22 (https://github.com/KaTeX/KaTeX). + * SPDX-License-Identifier: MIT + * Regenerate derived outputs: pnpm generate:katex + */ +// @flow +/** + * Predefined macros for KaTeX. + * This can be used to define some commands in terms of others. + */ + +// Export global macros object from defineMacro +import defineMacro, {_macros} from "./defineMacro"; +const macros = _macros; +export default macros; + +import fontMetricsData from "./fontMetricsData"; +import functions from "./functions"; +import symbols from "./symbols"; +import utils from "./utils"; +import {makeEm} from "./units"; +import ParseError from "./ParseError"; + + +////////////////////////////////////////////////////////////////////// +// macro tools + +defineMacro("\\noexpand", function(context) { + // The expansion is the token itself; but that token is interpreted + // as if its meaning were ‘\relax’ if it is a control sequence that + // would ordinarily be expanded by TeX’s expansion rules. + const t = context.popToken(); + if (context.isExpandable(t.text)) { + t.noexpand = true; + t.treatAsRelax = true; + } + return {tokens: [t], numArgs: 0}; +}); + +defineMacro("\\expandafter", function(context) { + // TeX first reads the token that comes immediately after \expandafter, + // without expanding it; let’s call this token t. Then TeX reads the + // token that comes after t (and possibly more tokens, if that token + // has an argument), replacing it by its expansion. Finally TeX puts + // t back in front of that expansion. + const t = context.popToken(); + context.expandOnce(true); // expand only an expandable token + return {tokens: [t], numArgs: 0}; +}); + +// LaTeX's \@firstoftwo{#1}{#2} expands to #1, skipping #2 +// TeX source: \long\def\@firstoftwo#1#2{#1} +defineMacro("\\@firstoftwo", function(context) { + const args = context.consumeArgs(2); + return {tokens: args[0], numArgs: 0}; +}); + +// LaTeX's \@secondoftwo{#1}{#2} expands to #2, skipping #1 +// TeX source: \long\def\@secondoftwo#1#2{#2} +defineMacro("\\@secondoftwo", function(context) { + const args = context.consumeArgs(2); + return {tokens: args[1], numArgs: 0}; +}); + +// LaTeX's \@ifnextchar{#1}{#2}{#3} looks ahead to the next (unexpanded) +// symbol that isn't a space, consuming any spaces but not consuming the +// first nonspace character. If that nonspace character matches #1, then +// the macro expands to #2; otherwise, it expands to #3. +defineMacro("\\@ifnextchar", function(context) { + const args = context.consumeArgs(3); // symbol, if, else + context.consumeSpaces(); + const nextToken = context.future(); + if (args[0].length === 1 && args[0][0].text === nextToken.text) { + return {tokens: args[1], numArgs: 0}; + } else { + return {tokens: args[2], numArgs: 0}; + } +}); + +// LaTeX's \@ifstar{#1}{#2} looks ahead to the next (unexpanded) symbol. +// If it is `*`, then it consumes the symbol, and the macro expands to #1; +// otherwise, the macro expands to #2 (without consuming the symbol). +// TeX source: \def\@ifstar#1{\@ifnextchar *{\@firstoftwo{#1}}} +defineMacro("\\@ifstar", "\\@ifnextchar *{\\@firstoftwo{#1}}"); + +// LaTeX's \TextOrMath{#1}{#2} expands to #1 in text mode, #2 in math mode +defineMacro("\\TextOrMath", function(context) { + const args = context.consumeArgs(2); + if (context.mode === 'text') { + return {tokens: args[0], numArgs: 0}; + } else { + return {tokens: args[1], numArgs: 0}; + } +}); + +// Lookup table for parsing numbers in base 8 through 16 +const digitToNumber = { + "0": 0, "1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, + "9": 9, "a": 10, "A": 10, "b": 11, "B": 11, "c": 12, "C": 12, + "d": 13, "D": 13, "e": 14, "E": 14, "f": 15, "F": 15, +}; + +// TeX \char makes a literal character (catcode 12) using the following forms: +// (see The TeXBook, p. 43) +// \char123 -- decimal +// \char'123 -- octal +// \char"123 -- hex +// \char`x -- character that can be written (i.e. isn't active) +// \char`\x -- character that cannot be written (e.g. %) +// These all refer to characters from the font, so we turn them into special +// calls to a function \@char dealt with in the Parser. +defineMacro("\\char", function(context) { + let token = context.popToken(); + let base; + let number = ''; + if (token.text === "'") { + base = 8; + token = context.popToken(); + } else if (token.text === '"') { + base = 16; + token = context.popToken(); + } else if (token.text === "`") { + token = context.popToken(); + if (token.text[0] === "\\") { + number = token.text.charCodeAt(1); + } else if (token.text === "EOF") { + throw new ParseError("\\char` missing argument"); + } else { + number = token.text.charCodeAt(0); + } + } else { + base = 10; + } + if (base) { + // Parse a number in the given base, starting with first `token`. + number = digitToNumber[token.text]; + if (number == null || number >= base) { + throw new ParseError(`Invalid base-${base} digit ${token.text}`); + } + let digit; + while ((digit = digitToNumber[context.future().text]) != null && + digit < base) { + number *= base; + number += digit; + context.popToken(); + } + } + return `\\@char{${number}}`; +}); + +// \newcommand{\macro}[args]{definition} +// \renewcommand{\macro}[args]{definition} +// TODO: Optional arguments: \newcommand{\macro}[args][default]{definition} +const newcommand = ( + context, existsOK: boolean, nonexistsOK: boolean, skipIfExists: boolean +) => { + let arg = context.consumeArg().tokens; + if (arg.length !== 1) { + throw new ParseError( + "\\newcommand's first argument must be a macro name"); + } + const name = arg[0].text; + + const exists = context.isDefined(name); + if (exists && !existsOK) { + throw new ParseError(`\\newcommand{${name}} attempting to redefine ` + + `${name}; use \\renewcommand`); + } + if (!exists && !nonexistsOK) { + throw new ParseError(`\\renewcommand{${name}} when command ${name} ` + + `does not yet exist; use \\newcommand`); + } + + let numArgs = 0; + arg = context.consumeArg().tokens; + if (arg.length === 1 && arg[0].text === "[") { + let argText = ''; + let token = context.expandNextToken(); + while (token.text !== "]" && token.text !== "EOF") { + // TODO: Should properly expand arg, e.g., ignore {}s + argText += token.text; + token = context.expandNextToken(); + } + if (!argText.match(/^\s*[0-9]+\s*$/)) { + throw new ParseError(`Invalid number of arguments: ${argText}`); + } + numArgs = parseInt(argText); + arg = context.consumeArg().tokens; + } + + if (!(exists && skipIfExists)) { + // Final arg is the expansion of the macro + context.macros.set(name, { + tokens: arg, + numArgs, + }); + } + return ''; +}; +defineMacro("\\newcommand", + (context) => newcommand(context, false, true, false)); +defineMacro("\\renewcommand", + (context) => newcommand(context, true, false, false)); +defineMacro("\\providecommand", + (context) => newcommand(context, true, true, true)); + +// terminal (console) tools +defineMacro("\\message", (context) => { + const arg = context.consumeArgs(1)[0]; + // eslint-disable-next-line no-console + console.log(arg.reverse().map(token => token.text).join("")); + return ''; +}); +defineMacro("\\errmessage", (context) => { + const arg = context.consumeArgs(1)[0]; + // eslint-disable-next-line no-console + console.error(arg.reverse().map(token => token.text).join("")); + return ''; +}); +defineMacro("\\show", (context) => { + const tok = context.popToken(); + const name = tok.text; + // eslint-disable-next-line no-console + console.log(tok, context.macros.get(name), functions[name], + symbols.math[name], symbols.text[name]); + return ''; +}); + +////////////////////////////////////////////////////////////////////// +// Grouping +// \let\bgroup={ \let\egroup=} +defineMacro("\\bgroup", "{"); +defineMacro("\\egroup", "}"); + +// Symbols from latex.ltx: +// \def~{\nobreakspace{}} +// \def\lq{`} +// \def\rq{'} +// \def \aa {\r a} +// \def \AA {\r A} +defineMacro("~", "\\nobreakspace"); +defineMacro("\\lq", "`"); +defineMacro("\\rq", "'"); +defineMacro("\\aa", "\\r a"); +defineMacro("\\AA", "\\r A"); + +// Copyright (C) and registered (R) symbols. Use raw symbol in MathML. +// \DeclareTextCommandDefault{\textcopyright}{\textcircled{c}} +// \DeclareTextCommandDefault{\textregistered}{\textcircled{% +// \check@mathfonts\fontsize\sf@size\z@\math@fontsfalse\selectfont R}} +// \DeclareRobustCommand{\copyright}{% +// \ifmmode{\nfss@text{\textcopyright}}\else\textcopyright\fi} +defineMacro("\\textcopyright", "\\html@mathml{\\textcircled{c}}{\\char`©}"); +defineMacro("\\copyright", + "\\TextOrMath{\\textcopyright}{\\text{\\textcopyright}}"); +defineMacro("\\textregistered", + "\\html@mathml{\\textcircled{\\scriptsize R}}{\\char`®}"); + +// Characters omitted from Unicode range 1D400–1D7FF +defineMacro("\u212C", "\\mathscr{B}"); // script +defineMacro("\u2130", "\\mathscr{E}"); +defineMacro("\u2131", "\\mathscr{F}"); +defineMacro("\u210B", "\\mathscr{H}"); +defineMacro("\u2110", "\\mathscr{I}"); +defineMacro("\u2112", "\\mathscr{L}"); +defineMacro("\u2133", "\\mathscr{M}"); +defineMacro("\u211B", "\\mathscr{R}"); +defineMacro("\u212D", "\\mathfrak{C}"); // Fraktur +defineMacro("\u210C", "\\mathfrak{H}"); +defineMacro("\u2128", "\\mathfrak{Z}"); + +// Define \Bbbk with a macro that works in both HTML and MathML. +defineMacro("\\Bbbk", "\\Bbb{k}"); + +// Unicode middle dot +// The KaTeX fonts do not contain U+00B7. Instead, \cdotp displays +// the dot at U+22C5 and gives it punct spacing. +defineMacro("\u00b7", "\\cdotp"); + +// \llap and \rlap render their contents in text mode +defineMacro("\\llap", "\\mathllap{\\textrm{#1}}"); +defineMacro("\\rlap", "\\mathrlap{\\textrm{#1}}"); +defineMacro("\\clap", "\\mathclap{\\textrm{#1}}"); + +// \mathstrut from the TeXbook, p 360 +defineMacro("\\mathstrut", "\\vphantom{(}"); + +// \underbar from TeXbook p 353 +defineMacro("\\underbar", "\\underline{\\text{#1}}"); + +// \not is defined by base/fontmath.ltx via +// \DeclareMathSymbol{\not}{\mathrel}{symbols}{"36} +// It's thus treated like a \mathrel, but defined by a symbol that has zero +// width but extends to the right. We use \rlap to get that spacing. +// For MathML we write U+0338 here. buildMathML.js will then do the overlay. +defineMacro("\\not", '\\html@mathml{\\mathrel{\\mathrlap\\@not}}{\\char"338}'); + +// Negated symbols from base/fontmath.ltx: +// \def\neq{\not=} \let\ne=\neq +// \DeclareRobustCommand +// \notin{\mathrel{\m@th\mathpalette\c@ncel\in}} +// \def\c@ncel#1#2{\m@th\ooalign{$\hfil#1\mkern1mu/\hfil$\crcr$#1#2$}} +defineMacro("\\neq", "\\html@mathml{\\mathrel{\\not=}}{\\mathrel{\\char`≠}}"); +defineMacro("\\ne", "\\neq"); +defineMacro("\u2260", "\\neq"); +defineMacro("\\notin", "\\html@mathml{\\mathrel{{\\in}\\mathllap{/\\mskip1mu}}}" + + "{\\mathrel{\\char`∉}}"); +defineMacro("\u2209", "\\notin"); + +// Unicode stacked relations +defineMacro("\u2258", "\\html@mathml{" + + "\\mathrel{=\\kern{-1em}\\raisebox{0.4em}{$\\scriptsize\\frown$}}" + + "}{\\mathrel{\\char`\u2258}}"); +defineMacro("\u2259", + "\\html@mathml{\\stackrel{\\tiny\\wedge}{=}}{\\mathrel{\\char`\u2258}}"); +defineMacro("\u225A", + "\\html@mathml{\\stackrel{\\tiny\\vee}{=}}{\\mathrel{\\char`\u225A}}"); +defineMacro("\u225B", + "\\html@mathml{\\stackrel{\\scriptsize\\star}{=}}" + + "{\\mathrel{\\char`\u225B}}"); +defineMacro("\u225D", + "\\html@mathml{\\stackrel{\\tiny\\mathrm{def}}{=}}" + + "{\\mathrel{\\char`\u225D}}"); +defineMacro("\u225E", + "\\html@mathml{\\stackrel{\\tiny\\mathrm{m}}{=}}" + + "{\\mathrel{\\char`\u225E}}"); +defineMacro("\u225F", + "\\html@mathml{\\stackrel{\\tiny?}{=}}{\\mathrel{\\char`\u225F}}"); + +// Misc Unicode +defineMacro("\u27C2", "\\perp"); +defineMacro("\u203C", "\\mathclose{!\\mkern-0.8mu!}"); +defineMacro("\u220C", "\\notni"); +defineMacro("\u231C", "\\ulcorner"); +defineMacro("\u231D", "\\urcorner"); +defineMacro("\u231E", "\\llcorner"); +defineMacro("\u231F", "\\lrcorner"); +defineMacro("\u00A9", "\\copyright"); +defineMacro("\u00AE", "\\textregistered"); +defineMacro("\uFE0F", "\\textregistered"); + +// The KaTeX fonts have corners at codepoints that don't match Unicode. +// For MathML purposes, use the Unicode code point. +defineMacro("\\ulcorner", "\\html@mathml{\\@ulcorner}{\\mathop{\\char\"231c}}"); +defineMacro("\\urcorner", "\\html@mathml{\\@urcorner}{\\mathop{\\char\"231d}}"); +defineMacro("\\llcorner", "\\html@mathml{\\@llcorner}{\\mathop{\\char\"231e}}"); +defineMacro("\\lrcorner", "\\html@mathml{\\@lrcorner}{\\mathop{\\char\"231f}}"); + +////////////////////////////////////////////////////////////////////// +// LaTeX_2ε + +// \vdots{\vbox{\baselineskip4\p@ \lineskiplimit\z@ +// \kern6\p@\hbox{.}\hbox{.}\hbox{.}}} +// We'll call \varvdots, which gets a glyph from symbols.js. +// The zero-width rule gets us an equivalent to the vertical 6pt kern. +defineMacro("\\vdots", "{\\varvdots\\rule{0pt}{15pt}}"); +defineMacro("\u22ee", "\\vdots"); + +////////////////////////////////////////////////////////////////////// +// amsmath.sty +// http://mirrors.concertpass.com/tex-archive/macros/latex/required/amsmath/amsmath.pdf + +// Italic Greek capital letters. AMS defines these with \DeclareMathSymbol, +// but they are equivalent to \mathit{\Letter}. +defineMacro("\\varGamma", "\\mathit{\\Gamma}"); +defineMacro("\\varDelta", "\\mathit{\\Delta}"); +defineMacro("\\varTheta", "\\mathit{\\Theta}"); +defineMacro("\\varLambda", "\\mathit{\\Lambda}"); +defineMacro("\\varXi", "\\mathit{\\Xi}"); +defineMacro("\\varPi", "\\mathit{\\Pi}"); +defineMacro("\\varSigma", "\\mathit{\\Sigma}"); +defineMacro("\\varUpsilon", "\\mathit{\\Upsilon}"); +defineMacro("\\varPhi", "\\mathit{\\Phi}"); +defineMacro("\\varPsi", "\\mathit{\\Psi}"); +defineMacro("\\varOmega", "\\mathit{\\Omega}"); + +//\newcommand{\substack}[1]{\subarray{c}#1\endsubarray} +defineMacro("\\substack", "\\begin{subarray}{c}#1\\end{subarray}"); + +// \renewcommand{\colon}{\nobreak\mskip2mu\mathpunct{}\nonscript +// \mkern-\thinmuskip{:}\mskip6muplus1mu\relax} +defineMacro("\\colon", "\\nobreak\\mskip2mu\\mathpunct{}" + + "\\mathchoice{\\mkern-3mu}{\\mkern-3mu}{}{}{:}\\mskip6mu\\relax"); + +// \newcommand{\boxed}[1]{\fbox{\m@th$\displaystyle#1$}} +defineMacro("\\boxed", "\\fbox{$\\displaystyle{#1}$}"); + +// \def\iff{\DOTSB\;\Longleftrightarrow\;} +// \def\implies{\DOTSB\;\Longrightarrow\;} +// \def\impliedby{\DOTSB\;\Longleftarrow\;} +defineMacro("\\iff", "\\DOTSB\\;\\Longleftrightarrow\\;"); +defineMacro("\\implies", "\\DOTSB\\;\\Longrightarrow\\;"); +defineMacro("\\impliedby", "\\DOTSB\\;\\Longleftarrow\\;"); + +// \def\dddot#1{{\mathop{#1}\limits^{\vbox to-1.4\ex@{\kern-\tw@\ex@ +// \hbox{\normalfont ...}\vss}}}} +// We use \overset which avoids the vertical shift of \mathop. +defineMacro("\\dddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ...}}{#1}}"); +defineMacro("\\ddddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ....}}{#1}}"); + +// AMSMath's automatic \dots, based on \mdots@@ macro. +const dotsByToken = { + ',': '\\dotsc', + '\\not': '\\dotsb', + // \keybin@ checks for the following: + '+': '\\dotsb', + '=': '\\dotsb', + '<': '\\dotsb', + '>': '\\dotsb', + '-': '\\dotsb', + '*': '\\dotsb', + ':': '\\dotsb', + // Symbols whose definition starts with \DOTSB: + '\\DOTSB': '\\dotsb', + '\\coprod': '\\dotsb', + '\\bigvee': '\\dotsb', + '\\bigwedge': '\\dotsb', + '\\biguplus': '\\dotsb', + '\\bigcap': '\\dotsb', + '\\bigcup': '\\dotsb', + '\\prod': '\\dotsb', + '\\sum': '\\dotsb', + '\\bigotimes': '\\dotsb', + '\\bigoplus': '\\dotsb', + '\\bigodot': '\\dotsb', + '\\bigsqcup': '\\dotsb', + '\\And': '\\dotsb', + '\\longrightarrow': '\\dotsb', + '\\Longrightarrow': '\\dotsb', + '\\longleftarrow': '\\dotsb', + '\\Longleftarrow': '\\dotsb', + '\\longleftrightarrow': '\\dotsb', + '\\Longleftrightarrow': '\\dotsb', + '\\mapsto': '\\dotsb', + '\\longmapsto': '\\dotsb', + '\\hookrightarrow': '\\dotsb', + '\\doteq': '\\dotsb', + // Symbols whose definition starts with \mathbin: + '\\mathbin': '\\dotsb', + // Symbols whose definition starts with \mathrel: + '\\mathrel': '\\dotsb', + '\\relbar': '\\dotsb', + '\\Relbar': '\\dotsb', + '\\xrightarrow': '\\dotsb', + '\\xleftarrow': '\\dotsb', + // Symbols whose definition starts with \DOTSI: + '\\DOTSI': '\\dotsi', + '\\int': '\\dotsi', + '\\oint': '\\dotsi', + '\\iint': '\\dotsi', + '\\iiint': '\\dotsi', + '\\iiiint': '\\dotsi', + '\\idotsint': '\\dotsi', + // Symbols whose definition starts with \DOTSX: + '\\DOTSX': '\\dotsx', +}; + +defineMacro("\\dots", function(context) { + // TODO: If used in text mode, should expand to \textellipsis. + // However, in KaTeX, \textellipsis and \ldots behave the same + // (in text mode), and it's unlikely we'd see any of the math commands + // that affect the behavior of \dots when in text mode. So fine for now + // (until we support \ifmmode ... \else ... \fi). + let thedots = '\\dotso'; + const next = context.expandAfterFuture().text; + if (next in dotsByToken) { + thedots = dotsByToken[next]; + } else if (next.slice(0, 4) === '\\not') { + thedots = '\\dotsb'; + } else if (next in symbols.math) { + if (utils.contains(['bin', 'rel'], symbols.math[next].group)) { + thedots = '\\dotsb'; + } + } + return thedots; +}); + +const spaceAfterDots = { + // \rightdelim@ checks for the following: + ')': true, + ']': true, + '\\rbrack': true, + '\\}': true, + '\\rbrace': true, + '\\rangle': true, + '\\rceil': true, + '\\rfloor': true, + '\\rgroup': true, + '\\rmoustache': true, + '\\right': true, + '\\bigr': true, + '\\biggr': true, + '\\Bigr': true, + '\\Biggr': true, + // \extra@ also tests for the following: + '$': true, + // \extrap@ checks for the following: + ';': true, + '.': true, + ',': true, +}; + +defineMacro("\\dotso", function(context) { + const next = context.future().text; + if (next in spaceAfterDots) { + return "\\ldots\\,"; + } else { + return "\\ldots"; + } +}); + +defineMacro("\\dotsc", function(context) { + const next = context.future().text; + // \dotsc uses \extra@ but not \extrap@, instead specially checking for + // ';' and '.', but doesn't check for ','. + if (next in spaceAfterDots && next !== ',') { + return "\\ldots\\,"; + } else { + return "\\ldots"; + } +}); + +defineMacro("\\cdots", function(context) { + const next = context.future().text; + if (next in spaceAfterDots) { + return "\\@cdots\\,"; + } else { + return "\\@cdots"; + } +}); + +defineMacro("\\dotsb", "\\cdots"); +defineMacro("\\dotsm", "\\cdots"); +defineMacro("\\dotsi", "\\!\\cdots"); +// amsmath doesn't actually define \dotsx, but \dots followed by a macro +// starting with \DOTSX implies \dotso, and then \extra@ detects this case +// and forces the added `\,`. +defineMacro("\\dotsx", "\\ldots\\,"); + +// \let\DOTSI\relax +// \let\DOTSB\relax +// \let\DOTSX\relax +defineMacro("\\DOTSI", "\\relax"); +defineMacro("\\DOTSB", "\\relax"); +defineMacro("\\DOTSX", "\\relax"); + +// Spacing, based on amsmath.sty's override of LaTeX defaults +// \DeclareRobustCommand{\tmspace}[3]{% +// \ifmmode\mskip#1#2\else\kern#1#3\fi\relax} +defineMacro("\\tmspace", "\\TextOrMath{\\kern#1#3}{\\mskip#1#2}\\relax"); +// \renewcommand{\,}{\tmspace+\thinmuskip{.1667em}} +// TODO: math mode should use \thinmuskip +defineMacro("\\,", "\\tmspace+{3mu}{.1667em}"); +// \let\thinspace\, +defineMacro("\\thinspace", "\\,"); +// \def\>{\mskip\medmuskip} +// \renewcommand{\:}{\tmspace+\medmuskip{.2222em}} +// TODO: \> and math mode of \: should use \medmuskip = 4mu plus 2mu minus 4mu +defineMacro("\\>", "\\mskip{4mu}"); +defineMacro("\\:", "\\tmspace+{4mu}{.2222em}"); +// \let\medspace\: +defineMacro("\\medspace", "\\:"); +// \renewcommand{\;}{\tmspace+\thickmuskip{.2777em}} +// TODO: math mode should use \thickmuskip = 5mu plus 5mu +defineMacro("\\;", "\\tmspace+{5mu}{.2777em}"); +// \let\thickspace\; +defineMacro("\\thickspace", "\\;"); +// \renewcommand{\!}{\tmspace-\thinmuskip{.1667em}} +// TODO: math mode should use \thinmuskip +defineMacro("\\!", "\\tmspace-{3mu}{.1667em}"); +// \let\negthinspace\! +defineMacro("\\negthinspace", "\\!"); +// \newcommand{\negmedspace}{\tmspace-\medmuskip{.2222em}} +// TODO: math mode should use \medmuskip +defineMacro("\\negmedspace", "\\tmspace-{4mu}{.2222em}"); +// \newcommand{\negthickspace}{\tmspace-\thickmuskip{.2777em}} +// TODO: math mode should use \thickmuskip +defineMacro("\\negthickspace", "\\tmspace-{5mu}{.277em}"); +// \def\enspace{\kern.5em } +defineMacro("\\enspace", "\\kern.5em "); +// \def\enskip{\hskip.5em\relax} +defineMacro("\\enskip", "\\hskip.5em\\relax"); +// \def\quad{\hskip1em\relax} +defineMacro("\\quad", "\\hskip1em\\relax"); +// \def\qquad{\hskip2em\relax} +defineMacro("\\qquad", "\\hskip2em\\relax"); + +// \tag@in@display form of \tag +defineMacro("\\tag", "\\@ifstar\\tag@literal\\tag@paren"); +defineMacro("\\tag@paren", "\\tag@literal{({#1})}"); +defineMacro("\\tag@literal", (context) => { + if (context.macros.get("\\df@tag")) { + throw new ParseError("Multiple \\tag"); + } + return "\\gdef\\df@tag{\\text{#1}}"; +}); + +// \renewcommand{\bmod}{\nonscript\mskip-\medmuskip\mkern5mu\mathbin +// {\operator@font mod}\penalty900 +// \mkern5mu\nonscript\mskip-\medmuskip} +// \newcommand{\pod}[1]{\allowbreak +// \if@display\mkern18mu\else\mkern8mu\fi(#1)} +// \renewcommand{\pmod}[1]{\pod{{\operator@font mod}\mkern6mu#1}} +// \newcommand{\mod}[1]{\allowbreak\if@display\mkern18mu +// \else\mkern12mu\fi{\operator@font mod}\,\,#1} +// TODO: math mode should use \medmuskip = 4mu plus 2mu minus 4mu +defineMacro("\\bmod", + "\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}" + + "\\mathbin{\\rm mod}" + + "\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}"); +defineMacro("\\pod", "\\allowbreak" + + "\\mathchoice{\\mkern18mu}{\\mkern8mu}{\\mkern8mu}{\\mkern8mu}(#1)"); +defineMacro("\\pmod", "\\pod{{\\rm mod}\\mkern6mu#1}"); +defineMacro("\\mod", "\\allowbreak" + + "\\mathchoice{\\mkern18mu}{\\mkern12mu}{\\mkern12mu}{\\mkern12mu}" + + "{\\rm mod}\\,\\,#1"); + +////////////////////////////////////////////////////////////////////// +// LaTeX source2e + +// \expandafter\let\expandafter\@normalcr +// \csname\expandafter\@gobble\string\\ \endcsname +// \DeclareRobustCommand\newline{\@normalcr\relax} +defineMacro("\\newline", "\\\\\\relax"); + +// \def\TeX{T\kern-.1667em\lower.5ex\hbox{E}\kern-.125emX\@} +// TODO: Doesn't normally work in math mode because \@ fails. KaTeX doesn't +// support \@ yet, so that's omitted, and we add \text so that the result +// doesn't look funny in math mode. +defineMacro("\\TeX", "\\textrm{\\html@mathml{" + + "T\\kern-.1667em\\raisebox{-.5ex}{E}\\kern-.125emX" + + "}{TeX}}"); + +// \DeclareRobustCommand{\LaTeX}{L\kern-.36em% +// {\sbox\z@ T% +// \vbox to\ht\z@{\hbox{\check@mathfonts +// \fontsize\sf@size\z@ +// \math@fontsfalse\selectfont +// A}% +// \vss}% +// }% +// \kern-.15em% +// \TeX} +// This code aligns the top of the A with the T (from the perspective of TeX's +// boxes, though visually the A appears to extend above slightly). +// We compute the corresponding \raisebox when A is rendered in \normalsize +// \scriptstyle, which has a scale factor of 0.7 (see Options.js). +const latexRaiseA = makeEm(fontMetricsData['Main-Regular']["T".charCodeAt(0)][1] - + 0.7 * fontMetricsData['Main-Regular']["A".charCodeAt(0)][1]); +defineMacro("\\LaTeX", "\\textrm{\\html@mathml{" + + `L\\kern-.36em\\raisebox{${latexRaiseA}}{\\scriptstyle A}` + + "\\kern-.15em\\TeX}{LaTeX}}"); + +// New KaTeX logo based on tweaking LaTeX logo +defineMacro("\\KaTeX", "\\textrm{\\html@mathml{" + + `K\\kern-.17em\\raisebox{${latexRaiseA}}{\\scriptstyle A}` + + "\\kern-.15em\\TeX}{KaTeX}}"); + +// \DeclareRobustCommand\hspace{\@ifstar\@hspacer\@hspace} +// \def\@hspace#1{\hskip #1\relax} +// \def\@hspacer#1{\vrule \@width\z@\nobreak +// \hskip #1\hskip \z@skip} +defineMacro("\\hspace", "\\@ifstar\\@hspacer\\@hspace"); +defineMacro("\\@hspace", "\\hskip #1\\relax"); +defineMacro("\\@hspacer", "\\rule{0pt}{0pt}\\hskip #1\\relax"); + +////////////////////////////////////////////////////////////////////// +// mathtools.sty + +//\providecommand\ordinarycolon{:} +defineMacro("\\ordinarycolon", ":"); +//\def\vcentcolon{\mathrel{\mathop\ordinarycolon}} +//TODO(edemaine): Not yet centered. Fix via \raisebox or #726 +defineMacro("\\vcentcolon", "\\mathrel{\\mathop\\ordinarycolon}"); +// \providecommand*\dblcolon{\vcentcolon\mathrel{\mkern-.9mu}\vcentcolon} +defineMacro("\\dblcolon", "\\html@mathml{" + + "\\mathrel{\\vcentcolon\\mathrel{\\mkern-.9mu}\\vcentcolon}}" + + "{\\mathop{\\char\"2237}}"); +// \providecommand*\coloneqq{\vcentcolon\mathrel{\mkern-1.2mu}=} +defineMacro("\\coloneqq", "\\html@mathml{" + + "\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}=}}" + + "{\\mathop{\\char\"2254}}"); // ≔ +// \providecommand*\Coloneqq{\dblcolon\mathrel{\mkern-1.2mu}=} +defineMacro("\\Coloneqq", "\\html@mathml{" + + "\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}=}}" + + "{\\mathop{\\char\"2237\\char\"3d}}"); +// \providecommand*\coloneq{\vcentcolon\mathrel{\mkern-1.2mu}\mathrel{-}} +defineMacro("\\coloneq", "\\html@mathml{" + + "\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}" + + "{\\mathop{\\char\"3a\\char\"2212}}"); +// \providecommand*\Coloneq{\dblcolon\mathrel{\mkern-1.2mu}\mathrel{-}} +defineMacro("\\Coloneq", "\\html@mathml{" + + "\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}" + + "{\\mathop{\\char\"2237\\char\"2212}}"); +// \providecommand*\eqqcolon{=\mathrel{\mkern-1.2mu}\vcentcolon} +defineMacro("\\eqqcolon", "\\html@mathml{" + + "\\mathrel{=\\mathrel{\\mkern-1.2mu}\\vcentcolon}}" + + "{\\mathop{\\char\"2255}}"); // ≕ +// \providecommand*\Eqqcolon{=\mathrel{\mkern-1.2mu}\dblcolon} +defineMacro("\\Eqqcolon", "\\html@mathml{" + + "\\mathrel{=\\mathrel{\\mkern-1.2mu}\\dblcolon}}" + + "{\\mathop{\\char\"3d\\char\"2237}}"); +// \providecommand*\eqcolon{\mathrel{-}\mathrel{\mkern-1.2mu}\vcentcolon} +defineMacro("\\eqcolon", "\\html@mathml{" + + "\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\vcentcolon}}" + + "{\\mathop{\\char\"2239}}"); +// \providecommand*\Eqcolon{\mathrel{-}\mathrel{\mkern-1.2mu}\dblcolon} +defineMacro("\\Eqcolon", "\\html@mathml{" + + "\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\dblcolon}}" + + "{\\mathop{\\char\"2212\\char\"2237}}"); +// \providecommand*\colonapprox{\vcentcolon\mathrel{\mkern-1.2mu}\approx} +defineMacro("\\colonapprox", "\\html@mathml{" + + "\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\approx}}" + + "{\\mathop{\\char\"3a\\char\"2248}}"); +// \providecommand*\Colonapprox{\dblcolon\mathrel{\mkern-1.2mu}\approx} +defineMacro("\\Colonapprox", "\\html@mathml{" + + "\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\approx}}" + + "{\\mathop{\\char\"2237\\char\"2248}}"); +// \providecommand*\colonsim{\vcentcolon\mathrel{\mkern-1.2mu}\sim} +defineMacro("\\colonsim", "\\html@mathml{" + + "\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\sim}}" + + "{\\mathop{\\char\"3a\\char\"223c}}"); +// \providecommand*\Colonsim{\dblcolon\mathrel{\mkern-1.2mu}\sim} +defineMacro("\\Colonsim", "\\html@mathml{" + + "\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\sim}}" + + "{\\mathop{\\char\"2237\\char\"223c}}"); + +// Some Unicode characters are implemented with macros to mathtools functions. +defineMacro("\u2237", "\\dblcolon"); // :: +defineMacro("\u2239", "\\eqcolon"); // -: +defineMacro("\u2254", "\\coloneqq"); // := +defineMacro("\u2255", "\\eqqcolon"); // =: +defineMacro("\u2A74", "\\Coloneqq"); // ::= + +////////////////////////////////////////////////////////////////////// +// colonequals.sty + +// Alternate names for mathtools's macros: +defineMacro("\\ratio", "\\vcentcolon"); +defineMacro("\\coloncolon", "\\dblcolon"); +defineMacro("\\colonequals", "\\coloneqq"); +defineMacro("\\coloncolonequals", "\\Coloneqq"); +defineMacro("\\equalscolon", "\\eqqcolon"); +defineMacro("\\equalscoloncolon", "\\Eqqcolon"); +defineMacro("\\colonminus", "\\coloneq"); +defineMacro("\\coloncolonminus", "\\Coloneq"); +defineMacro("\\minuscolon", "\\eqcolon"); +defineMacro("\\minuscoloncolon", "\\Eqcolon"); +// \colonapprox name is same in mathtools and colonequals. +defineMacro("\\coloncolonapprox", "\\Colonapprox"); +// \colonsim name is same in mathtools and colonequals. +defineMacro("\\coloncolonsim", "\\Colonsim"); + +// Additional macros, implemented by analogy with mathtools definitions: +defineMacro("\\simcolon", + "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\vcentcolon}"); +defineMacro("\\simcoloncolon", + "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\dblcolon}"); +defineMacro("\\approxcolon", + "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\vcentcolon}"); +defineMacro("\\approxcoloncolon", + "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\dblcolon}"); + +// Present in newtxmath, pxfonts and txfonts +defineMacro("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u220C}}"); +defineMacro("\\limsup", "\\DOTSB\\operatorname*{lim\\,sup}"); +defineMacro("\\liminf", "\\DOTSB\\operatorname*{lim\\,inf}"); + +////////////////////////////////////////////////////////////////////// +// From amsopn.sty +defineMacro("\\injlim", "\\DOTSB\\operatorname*{inj\\,lim}"); +defineMacro("\\projlim", "\\DOTSB\\operatorname*{proj\\,lim}"); +defineMacro("\\varlimsup", "\\DOTSB\\operatorname*{\\overline{lim}}"); +defineMacro("\\varliminf", "\\DOTSB\\operatorname*{\\underline{lim}}"); +defineMacro("\\varinjlim", "\\DOTSB\\operatorname*{\\underrightarrow{lim}}"); +defineMacro("\\varprojlim", "\\DOTSB\\operatorname*{\\underleftarrow{lim}}"); + +////////////////////////////////////////////////////////////////////// +// MathML alternates for KaTeX glyphs in the Unicode private area +defineMacro("\\gvertneqq", "\\html@mathml{\\@gvertneqq}{\u2269}"); +defineMacro("\\lvertneqq", "\\html@mathml{\\@lvertneqq}{\u2268}"); +defineMacro("\\ngeqq", "\\html@mathml{\\@ngeqq}{\u2271}"); +defineMacro("\\ngeqslant", "\\html@mathml{\\@ngeqslant}{\u2271}"); +defineMacro("\\nleqq", "\\html@mathml{\\@nleqq}{\u2270}"); +defineMacro("\\nleqslant", "\\html@mathml{\\@nleqslant}{\u2270}"); +defineMacro("\\nshortmid", "\\html@mathml{\\@nshortmid}{∤}"); +defineMacro("\\nshortparallel", "\\html@mathml{\\@nshortparallel}{∦}"); +defineMacro("\\nsubseteqq", "\\html@mathml{\\@nsubseteqq}{\u2288}"); +defineMacro("\\nsupseteqq", "\\html@mathml{\\@nsupseteqq}{\u2289}"); +defineMacro("\\varsubsetneq", "\\html@mathml{\\@varsubsetneq}{⊊}"); +defineMacro("\\varsubsetneqq", "\\html@mathml{\\@varsubsetneqq}{⫋}"); +defineMacro("\\varsupsetneq", "\\html@mathml{\\@varsupsetneq}{⊋}"); +defineMacro("\\varsupsetneqq", "\\html@mathml{\\@varsupsetneqq}{⫌}"); +defineMacro("\\imath", "\\html@mathml{\\@imath}{\u0131}"); +defineMacro("\\jmath", "\\html@mathml{\\@jmath}{\u0237}"); + +////////////////////////////////////////////////////////////////////// +// stmaryrd and semantic + +// The stmaryrd and semantic packages render the next four items by calling a +// glyph. Those glyphs do not exist in the KaTeX fonts. Hence the macros. + +defineMacro("\\llbracket", "\\html@mathml{" + + "\\mathopen{[\\mkern-3.2mu[}}" + + "{\\mathopen{\\char`\u27e6}}"); +defineMacro("\\rrbracket", "\\html@mathml{" + + "\\mathclose{]\\mkern-3.2mu]}}" + + "{\\mathclose{\\char`\u27e7}}"); + +defineMacro("\u27e6", "\\llbracket"); // blackboard bold [ +defineMacro("\u27e7", "\\rrbracket"); // blackboard bold ] + +defineMacro("\\lBrace", "\\html@mathml{" + + "\\mathopen{\\{\\mkern-3.2mu[}}" + + "{\\mathopen{\\char`\u2983}}"); +defineMacro("\\rBrace", "\\html@mathml{" + + "\\mathclose{]\\mkern-3.2mu\\}}}" + + "{\\mathclose{\\char`\u2984}}"); + +defineMacro("\u2983", "\\lBrace"); // blackboard bold { +defineMacro("\u2984", "\\rBrace"); // blackboard bold } + +// TODO: Create variable sized versions of the last two items. I believe that +// will require new font glyphs. + +// The stmaryrd function `\minuso` provides a "Plimsoll" symbol that +// superimposes the characters \circ and \mathminus. Used in chemistry. +defineMacro("\\minuso", "\\mathbin{\\html@mathml{" + + "{\\mathrlap{\\mathchoice{\\kern{0.145em}}{\\kern{0.145em}}" + + "{\\kern{0.1015em}}{\\kern{0.0725em}}\\circ}{-}}}" + + "{\\char`⦵}}"); +defineMacro("⦵", "\\minuso"); + +////////////////////////////////////////////////////////////////////// +// texvc.sty + +// The texvc package contains macros available in mediawiki pages. +// We omit the functions deprecated at +// https://en.wikipedia.org/wiki/Help:Displaying_a_formula#Deprecated_syntax + +// We also omit texvc's \O, which conflicts with \text{\O} + +defineMacro("\\darr", "\\downarrow"); +defineMacro("\\dArr", "\\Downarrow"); +defineMacro("\\Darr", "\\Downarrow"); +defineMacro("\\lang", "\\langle"); +defineMacro("\\rang", "\\rangle"); +defineMacro("\\uarr", "\\uparrow"); +defineMacro("\\uArr", "\\Uparrow"); +defineMacro("\\Uarr", "\\Uparrow"); +defineMacro("\\N", "\\mathbb{N}"); +defineMacro("\\R", "\\mathbb{R}"); +defineMacro("\\Z", "\\mathbb{Z}"); +defineMacro("\\alef", "\\aleph"); +defineMacro("\\alefsym", "\\aleph"); +defineMacro("\\Alpha", "\\mathrm{A}"); +defineMacro("\\Beta", "\\mathrm{B}"); +defineMacro("\\bull", "\\bullet"); +defineMacro("\\Chi", "\\mathrm{X}"); +defineMacro("\\clubs", "\\clubsuit"); +defineMacro("\\cnums", "\\mathbb{C}"); +defineMacro("\\Complex", "\\mathbb{C}"); +defineMacro("\\Dagger", "\\ddagger"); +defineMacro("\\diamonds", "\\diamondsuit"); +defineMacro("\\empty", "\\emptyset"); +defineMacro("\\Epsilon", "\\mathrm{E}"); +defineMacro("\\Eta", "\\mathrm{H}"); +defineMacro("\\exist", "\\exists"); +defineMacro("\\harr", "\\leftrightarrow"); +defineMacro("\\hArr", "\\Leftrightarrow"); +defineMacro("\\Harr", "\\Leftrightarrow"); +defineMacro("\\hearts", "\\heartsuit"); +defineMacro("\\image", "\\Im"); +defineMacro("\\infin", "\\infty"); +defineMacro("\\Iota", "\\mathrm{I}"); +defineMacro("\\isin", "\\in"); +defineMacro("\\Kappa", "\\mathrm{K}"); +defineMacro("\\larr", "\\leftarrow"); +defineMacro("\\lArr", "\\Leftarrow"); +defineMacro("\\Larr", "\\Leftarrow"); +defineMacro("\\lrarr", "\\leftrightarrow"); +defineMacro("\\lrArr", "\\Leftrightarrow"); +defineMacro("\\Lrarr", "\\Leftrightarrow"); +defineMacro("\\Mu", "\\mathrm{M}"); +defineMacro("\\natnums", "\\mathbb{N}"); +defineMacro("\\Nu", "\\mathrm{N}"); +defineMacro("\\Omicron", "\\mathrm{O}"); +defineMacro("\\plusmn", "\\pm"); +defineMacro("\\rarr", "\\rightarrow"); +defineMacro("\\rArr", "\\Rightarrow"); +defineMacro("\\Rarr", "\\Rightarrow"); +defineMacro("\\real", "\\Re"); +defineMacro("\\reals", "\\mathbb{R}"); +defineMacro("\\Reals", "\\mathbb{R}"); +defineMacro("\\Rho", "\\mathrm{P}"); +defineMacro("\\sdot", "\\cdot"); +defineMacro("\\sect", "\\S"); +defineMacro("\\spades", "\\spadesuit"); +defineMacro("\\sub", "\\subset"); +defineMacro("\\sube", "\\subseteq"); +defineMacro("\\supe", "\\supseteq"); +defineMacro("\\Tau", "\\mathrm{T}"); +defineMacro("\\thetasym", "\\vartheta"); +// TODO: defineMacro("\\varcoppa", "\\\mbox{\\coppa}"); +defineMacro("\\weierp", "\\wp"); +defineMacro("\\Zeta", "\\mathrm{Z}"); + +////////////////////////////////////////////////////////////////////// +// statmath.sty +// https://ctan.math.illinois.edu/macros/latex/contrib/statmath/statmath.pdf + +defineMacro("\\argmin", "\\DOTSB\\operatorname*{arg\\,min}"); +defineMacro("\\argmax", "\\DOTSB\\operatorname*{arg\\,max}"); +defineMacro("\\plim", "\\DOTSB\\mathop{\\operatorname{plim}}\\limits"); + +////////////////////////////////////////////////////////////////////// +// braket.sty +// http://ctan.math.washington.edu/tex-archive/macros/latex/contrib/braket/braket.pdf + +defineMacro("\\bra", "\\mathinner{\\langle{#1}|}"); +defineMacro("\\ket", "\\mathinner{|{#1}\\rangle}"); +defineMacro("\\braket", "\\mathinner{\\langle{#1}\\rangle}"); +defineMacro("\\Bra", "\\left\\langle#1\\right|"); +defineMacro("\\Ket", "\\left|#1\\right\\rangle"); +const braketHelper = (one) => (context) => { + const left = context.consumeArg().tokens; + const middle = context.consumeArg().tokens; + const middleDouble = context.consumeArg().tokens; + const right = context.consumeArg().tokens; + const oldMiddle = context.macros.get("|"); + const oldMiddleDouble = context.macros.get("\\|"); + context.macros.beginGroup(); + const midMacro = (double) => (context) => { + if (one) { + // Only modify the first instance of | or \| + context.macros.set("|", oldMiddle); + if (middleDouble.length) { + context.macros.set("\\|", oldMiddleDouble); + } + } + let doubled = double; + if (!double && middleDouble.length) { + // Mimic \@ifnextchar + const nextToken = context.future(); + if (nextToken.text === "|") { + context.popToken(); + doubled = true; + } + } + return { + tokens: doubled ? middleDouble : middle, + numArgs: 0, + }; + }; + context.macros.set("|", midMacro(false)); + if (middleDouble.length) { + context.macros.set("\\|", midMacro(true)); + } + const arg = context.consumeArg().tokens; + const expanded = context.expandTokens([ + ...right, ...arg, ...left, // reversed + ]); + context.macros.endGroup(); + return { + tokens: expanded.reverse(), + numArgs: 0, + }; +}; +defineMacro("\\bra@ket", braketHelper(false)); +defineMacro("\\bra@set", braketHelper(true)); +defineMacro("\\Braket", "\\bra@ket{\\left\\langle}" + + "{\\,\\middle\\vert\\,}{\\,\\middle\\vert\\,}{\\right\\rangle}"); +defineMacro("\\Set", "\\bra@set{\\left\\{\\:}" + + "{\\;\\middle\\vert\\;}{\\;\\middle\\Vert\\;}{\\:\\right\\}}"); +defineMacro("\\set", "\\bra@set{\\{\\,}{\\mid}{}{\\,\\}}"); + // has no support for special || or \| + +////////////////////////////////////////////////////////////////////// +// actuarialangle.dtx +defineMacro("\\angln", "{\\angl n}"); + +// Custom Khan Academy colors, should be moved to an optional package +defineMacro("\\blue", "\\textcolor{##6495ed}{#1}"); +defineMacro("\\orange", "\\textcolor{##ffa500}{#1}"); +defineMacro("\\pink", "\\textcolor{##ff00af}{#1}"); +defineMacro("\\red", "\\textcolor{##df0030}{#1}"); +defineMacro("\\green", "\\textcolor{##28ae7b}{#1}"); +defineMacro("\\gray", "\\textcolor{gray}{#1}"); +defineMacro("\\purple", "\\textcolor{##9d38bd}{#1}"); +defineMacro("\\blueA", "\\textcolor{##ccfaff}{#1}"); +defineMacro("\\blueB", "\\textcolor{##80f6ff}{#1}"); +defineMacro("\\blueC", "\\textcolor{##63d9ea}{#1}"); +defineMacro("\\blueD", "\\textcolor{##11accd}{#1}"); +defineMacro("\\blueE", "\\textcolor{##0c7f99}{#1}"); +defineMacro("\\tealA", "\\textcolor{##94fff5}{#1}"); +defineMacro("\\tealB", "\\textcolor{##26edd5}{#1}"); +defineMacro("\\tealC", "\\textcolor{##01d1c1}{#1}"); +defineMacro("\\tealD", "\\textcolor{##01a995}{#1}"); +defineMacro("\\tealE", "\\textcolor{##208170}{#1}"); +defineMacro("\\greenA", "\\textcolor{##b6ffb0}{#1}"); +defineMacro("\\greenB", "\\textcolor{##8af281}{#1}"); +defineMacro("\\greenC", "\\textcolor{##74cf70}{#1}"); +defineMacro("\\greenD", "\\textcolor{##1fab54}{#1}"); +defineMacro("\\greenE", "\\textcolor{##0d923f}{#1}"); +defineMacro("\\goldA", "\\textcolor{##ffd0a9}{#1}"); +defineMacro("\\goldB", "\\textcolor{##ffbb71}{#1}"); +defineMacro("\\goldC", "\\textcolor{##ff9c39}{#1}"); +defineMacro("\\goldD", "\\textcolor{##e07d10}{#1}"); +defineMacro("\\goldE", "\\textcolor{##a75a05}{#1}"); +defineMacro("\\redA", "\\textcolor{##fca9a9}{#1}"); +defineMacro("\\redB", "\\textcolor{##ff8482}{#1}"); +defineMacro("\\redC", "\\textcolor{##f9685d}{#1}"); +defineMacro("\\redD", "\\textcolor{##e84d39}{#1}"); +defineMacro("\\redE", "\\textcolor{##bc2612}{#1}"); +defineMacro("\\maroonA", "\\textcolor{##ffbde0}{#1}"); +defineMacro("\\maroonB", "\\textcolor{##ff92c6}{#1}"); +defineMacro("\\maroonC", "\\textcolor{##ed5fa6}{#1}"); +defineMacro("\\maroonD", "\\textcolor{##ca337c}{#1}"); +defineMacro("\\maroonE", "\\textcolor{##9e034e}{#1}"); +defineMacro("\\purpleA", "\\textcolor{##ddd7ff}{#1}"); +defineMacro("\\purpleB", "\\textcolor{##c6b9fc}{#1}"); +defineMacro("\\purpleC", "\\textcolor{##aa87ff}{#1}"); +defineMacro("\\purpleD", "\\textcolor{##7854ab}{#1}"); +defineMacro("\\purpleE", "\\textcolor{##543b78}{#1}"); +defineMacro("\\mintA", "\\textcolor{##f5f9e8}{#1}"); +defineMacro("\\mintB", "\\textcolor{##edf2df}{#1}"); +defineMacro("\\mintC", "\\textcolor{##e0e5cc}{#1}"); +defineMacro("\\grayA", "\\textcolor{##f6f7f7}{#1}"); +defineMacro("\\grayB", "\\textcolor{##f0f1f2}{#1}"); +defineMacro("\\grayC", "\\textcolor{##e3e5e6}{#1}"); +defineMacro("\\grayD", "\\textcolor{##d6d8da}{#1}"); +defineMacro("\\grayE", "\\textcolor{##babec2}{#1}"); +defineMacro("\\grayF", "\\textcolor{##888d93}{#1}"); +defineMacro("\\grayG", "\\textcolor{##626569}{#1}"); +defineMacro("\\grayH", "\\textcolor{##3b3e40}{#1}"); +defineMacro("\\grayI", "\\textcolor{##21242c}{#1}"); +defineMacro("\\kaBlue", "\\textcolor{##314453}{#1}"); +defineMacro("\\kaGreen", "\\textcolor{##71B307}{#1}"); diff --git a/lib/scripts/data/katex-op.js b/lib/scripts/data/katex-op.js new file mode 100644 index 0000000..3fbfac6 --- /dev/null +++ b/lib/scripts/data/katex-op.js @@ -0,0 +1,339 @@ +/** + * Vendored from KaTeX v0.16.22 (https://github.com/KaTeX/KaTeX). + * SPDX-License-Identifier: MIT + * Regenerate derived outputs: pnpm generate:katex + */ +// @flow +// Limits, symbols +import defineFunction, {ordargument} from "../defineFunction"; +import buildCommon from "../buildCommon"; +import {SymbolNode} from "../domTree"; +import * as mathMLTree from "../mathMLTree"; +import utils from "../utils"; +import Style from "../Style"; +import {assembleSupSub} from "./utils/assembleSupSub"; +import {assertNodeType} from "../parseNode"; +import {makeEm} from "../units"; + +import * as html from "../buildHTML"; +import * as mml from "../buildMathML"; + +import type {HtmlBuilderSupSub, MathMLBuilder} from "../defineFunction"; +import type {ParseNode} from "../parseNode"; + +// Most operators have a large successor symbol, but these don't. +const noSuccessor = [ + "\\smallint", +]; + +// NOTE: Unlike most `htmlBuilder`s, this one handles not only "op", but also +// "supsub" since some of them (like \int) can affect super/subscripting. +export const htmlBuilder: HtmlBuilderSupSub<"op"> = (grp, options) => { + // Operators are handled in the TeXbook pg. 443-444, rule 13(a). + let supGroup; + let subGroup; + let hasLimits = false; + let group: ParseNode<"op">; + if (grp.type === "supsub") { + // If we have limits, supsub will pass us its group to handle. Pull + // out the superscript and subscript and set the group to the op in + // its base. + supGroup = grp.sup; + subGroup = grp.sub; + group = assertNodeType(grp.base, "op"); + hasLimits = true; + } else { + group = assertNodeType(grp, "op"); + } + + const style = options.style; + + let large = false; + if (style.size === Style.DISPLAY.size && + group.symbol && + !utils.contains(noSuccessor, group.name)) { + + // Most symbol operators get larger in displaystyle (rule 13) + large = true; + } + + let base; + if (group.symbol) { + // If this is a symbol, create the symbol. + const fontName = large ? "Size2-Regular" : "Size1-Regular"; + + let stash = ""; + if (group.name === "\\oiint" || group.name === "\\oiiint") { + // No font glyphs yet, so use a glyph w/o the oval. + // TODO: When font glyphs are available, delete this code. + stash = group.name.slice(1); + group.name = stash === "oiint" ? "\\iint" : "\\iiint"; + } + + base = buildCommon.makeSymbol( + group.name, fontName, "math", options, + ["mop", "op-symbol", large ? "large-op" : "small-op"]); + + if (stash.length > 0) { + // We're in \oiint or \oiiint. Overlay the oval. + // TODO: When font glyphs are available, delete this code. + const italic = base.italic; + const oval = buildCommon.staticSvg(stash + "Size" + + (large ? "2" : "1"), options); + base = buildCommon.makeVList({ + positionType: "individualShift", + children: [ + {type: "elem", elem: base, shift: 0}, + {type: "elem", elem: oval, shift: large ? 0.08 : 0}, + ], + }, options); + group.name = "\\" + stash; + base.classes.unshift("mop"); + // $FlowFixMe + base.italic = italic; + } + } else if (group.body) { + // If this is a list, compose that list. + const inner = html.buildExpression(group.body, options, true); + if (inner.length === 1 && inner[0] instanceof SymbolNode) { + base = inner[0]; + base.classes[0] = "mop"; // replace old mclass + } else { + base = buildCommon.makeSpan(["mop"], inner, options); + } + } else { + // Otherwise, this is a text operator. Build the text from the + // operator's name. + const output = []; + for (let i = 1; i < group.name.length; i++) { + output.push(buildCommon.mathsym(group.name[i], group.mode, options)); + } + base = buildCommon.makeSpan(["mop"], output, options); + } + + // If content of op is a single symbol, shift it vertically. + let baseShift = 0; + let slant = 0; + if ((base instanceof SymbolNode + || group.name === "\\oiint" || group.name === "\\oiiint") + && !group.suppressBaseShift) { + // We suppress the shift of the base of \overset and \underset. Otherwise, + // shift the symbol so its center lies on the axis (rule 13). It + // appears that our fonts have the centers of the symbols already + // almost on the axis, so these numbers are very small. Note we + // don't actually apply this here, but instead it is used either in + // the vlist creation or separately when there are no limits. + baseShift = (base.height - base.depth) / 2 - + options.fontMetrics().axisHeight; + + // The slant of the symbol is just its italic correction. + // $FlowFixMe + slant = base.italic; + } + + if (hasLimits) { + return assembleSupSub(base, supGroup, subGroup, options, + style, slant, baseShift); + + } else { + if (baseShift) { + base.style.position = "relative"; + base.style.top = makeEm(baseShift); + } + + return base; + } +}; + +const mathmlBuilder: MathMLBuilder<"op"> = (group, options) => { + let node; + + if (group.symbol) { + // This is a symbol. Just add the symbol. + node = new mathMLTree.MathNode( + "mo", [mml.makeText(group.name, group.mode)]); + if (utils.contains(noSuccessor, group.name)) { + node.setAttribute("largeop", "false"); + } + } else if (group.body) { + // This is an operator with children. Add them. + node = new mathMLTree.MathNode( + "mo", mml.buildExpression(group.body, options)); + } else { + // This is a text operator. Add all of the characters from the + // operator's name. + node = new mathMLTree.MathNode( + "mi", [new mathMLTree.TextNode(group.name.slice(1))]); + // Append an . + // ref: https://www.w3.org/TR/REC-MathML/chap3_2.html#sec3.2.4 + const operator = new mathMLTree.MathNode("mo", + [mml.makeText("\u2061", "text")]); + if (group.parentIsSupSub) { + node = new mathMLTree.MathNode("mrow", [node, operator]); + } else { + node = mathMLTree.newDocumentFragment([node, operator]); + } + } + + return node; +}; + +const singleCharBigOps: {[string]: string} = { + "\u220F": "\\prod", + "\u2210": "\\coprod", + "\u2211": "\\sum", + "\u22c0": "\\bigwedge", + "\u22c1": "\\bigvee", + "\u22c2": "\\bigcap", + "\u22c3": "\\bigcup", + "\u2a00": "\\bigodot", + "\u2a01": "\\bigoplus", + "\u2a02": "\\bigotimes", + "\u2a04": "\\biguplus", + "\u2a06": "\\bigsqcup", +}; + +defineFunction({ + type: "op", + names: [ + "\\coprod", "\\bigvee", "\\bigwedge", "\\biguplus", "\\bigcap", + "\\bigcup", "\\intop", "\\prod", "\\sum", "\\bigotimes", + "\\bigoplus", "\\bigodot", "\\bigsqcup", "\\smallint", "\u220F", + "\u2210", "\u2211", "\u22c0", "\u22c1", "\u22c2", "\u22c3", "\u2a00", + "\u2a01", "\u2a02", "\u2a04", "\u2a06", + ], + props: { + numArgs: 0, + }, + handler: ({parser, funcName}, args) => { + let fName = funcName; + if (fName.length === 1) { + fName = singleCharBigOps[fName]; + } + return { + type: "op", + mode: parser.mode, + limits: true, + parentIsSupSub: false, + symbol: true, + name: fName, + }; + }, + htmlBuilder, + mathmlBuilder, +}); + +// Note: calling defineFunction with a type that's already been defined only +// works because the same htmlBuilder and mathmlBuilder are being used. +defineFunction({ + type: "op", + names: ["\\mathop"], + props: { + numArgs: 1, + primitive: true, + }, + handler: ({parser}, args) => { + const body = args[0]; + return { + type: "op", + mode: parser.mode, + limits: false, + parentIsSupSub: false, + symbol: false, + body: ordargument(body), + }; + }, + htmlBuilder, + mathmlBuilder, +}); + +// There are 2 flags for operators; whether they produce limits in +// displaystyle, and whether they are symbols and should grow in +// displaystyle. These four groups cover the four possible choices. + +const singleCharIntegrals: {[string]: string} = { + "\u222b": "\\int", + "\u222c": "\\iint", + "\u222d": "\\iiint", + "\u222e": "\\oint", + "\u222f": "\\oiint", + "\u2230": "\\oiiint", +}; + +// No limits, not symbols +defineFunction({ + type: "op", + names: [ + "\\arcsin", "\\arccos", "\\arctan", "\\arctg", "\\arcctg", + "\\arg", "\\ch", "\\cos", "\\cosec", "\\cosh", "\\cot", "\\cotg", + "\\coth", "\\csc", "\\ctg", "\\cth", "\\deg", "\\dim", "\\exp", + "\\hom", "\\ker", "\\lg", "\\ln", "\\log", "\\sec", "\\sin", + "\\sinh", "\\sh", "\\tan", "\\tanh", "\\tg", "\\th", + ], + props: { + numArgs: 0, + }, + handler({parser, funcName}) { + return { + type: "op", + mode: parser.mode, + limits: false, + parentIsSupSub: false, + symbol: false, + name: funcName, + }; + }, + htmlBuilder, + mathmlBuilder, +}); + +// Limits, not symbols +defineFunction({ + type: "op", + names: [ + "\\det", "\\gcd", "\\inf", "\\lim", "\\max", "\\min", "\\Pr", "\\sup", + ], + props: { + numArgs: 0, + }, + handler({parser, funcName}) { + return { + type: "op", + mode: parser.mode, + limits: true, + parentIsSupSub: false, + symbol: false, + name: funcName, + }; + }, + htmlBuilder, + mathmlBuilder, +}); + +// No limits, symbols +defineFunction({ + type: "op", + names: [ + "\\int", "\\iint", "\\iiint", "\\oint", "\\oiint", "\\oiiint", + "\u222b", "\u222c", "\u222d", "\u222e", "\u222f", "\u2230", + ], + props: { + numArgs: 0, + }, + handler({parser, funcName}) { + let fName = funcName; + if (fName.length === 1) { + fName = singleCharIntegrals[fName]; + } + return { + type: "op", + mode: parser.mode, + limits: false, + parentIsSupSub: false, + symbol: true, + name: fName, + }; + }, + htmlBuilder, + mathmlBuilder, +}); diff --git a/lib/scripts/data/katex-symbols.js b/lib/scripts/data/katex-symbols.js new file mode 100644 index 0000000..76d19e4 --- /dev/null +++ b/lib/scripts/data/katex-symbols.js @@ -0,0 +1,895 @@ +/** + * Vendored from KaTeX v0.16.22 (https://github.com/KaTeX/KaTeX). + * SPDX-License-Identifier: MIT + * Regenerate derived outputs: pnpm generate:katex + */ +// @flow +/** + * This file holds a list of all no-argument functions and single-character + * symbols (like 'a' or ';'). + * + * For each of the symbols, there are three properties they can have: + * - font (required): the font to be used for this symbol. Either "main" (the + normal font), or "ams" (the ams fonts). + * - group (required): the ParseNode group type the symbol should have (i.e. + "textord", "mathord", etc). + See https://github.com/KaTeX/KaTeX/wiki/Examining-TeX#group-types + * - replace: the character that this symbol or function should be + * replaced with (i.e. "\phi" has a replace value of "\u03d5", the phi + * character in the main font). + * + * The outermost map in the table indicates what mode the symbols should be + * accepted in (e.g. "math" or "text"). + */ + +import type {Mode} from "./types"; + +type Font = "main" | "ams"; +// Some of these have a "-token" suffix since these are also used as `ParseNode` +// types for raw text tokens, and we want to avoid conflicts with higher-level +// `ParseNode` types. These `ParseNode`s are constructed within `Parser` by +// looking up the `symbols` map. +export const ATOMS = { + "bin": 1, + "close": 1, + "inner": 1, + "open": 1, + "punct": 1, + "rel": 1, +}; +export const NON_ATOMS = { + "accent-token": 1, + "mathord": 1, + "op-token": 1, + "spacing": 1, + "textord": 1, +}; + +export type Atom = $Keys; +export type NonAtom = $Keys +export type Group = Atom | NonAtom; +type CharInfoMap = {[string]: {font: Font, group: Group, replace: ?string}}; + +const symbols: {[Mode]: CharInfoMap} = { + "math": {}, + "text": {}, +}; +export default symbols; + +/** `acceptUnicodeChar = true` is only applicable if `replace` is set. */ +export function defineSymbol( + mode: Mode, + font: Font, + group: Group, + replace: ?string, + name: string, + acceptUnicodeChar?: boolean, +) { + symbols[mode][name] = {font, group, replace}; + + if (acceptUnicodeChar && replace) { + symbols[mode][replace] = symbols[mode][name]; + } +} + +// Some abbreviations for commonly used strings. +// This helps minify the code, and also spotting typos using jshint. + +// modes: +const math = "math"; +const text = "text"; + +// fonts: +const main = "main"; +const ams = "ams"; + +// groups: +const accent = "accent-token"; +const bin = "bin"; +const close = "close"; +const inner = "inner"; +const mathord = "mathord"; +const op = "op-token"; +const open = "open"; +const punct = "punct"; +const rel = "rel"; +const spacing = "spacing"; +const textord = "textord"; + +// Now comes the symbol table + +// Relation Symbols +defineSymbol(math, main, rel, "\u2261", "\\equiv", true); +defineSymbol(math, main, rel, "\u227a", "\\prec", true); +defineSymbol(math, main, rel, "\u227b", "\\succ", true); +defineSymbol(math, main, rel, "\u223c", "\\sim", true); +defineSymbol(math, main, rel, "\u22a5", "\\perp"); +defineSymbol(math, main, rel, "\u2aaf", "\\preceq", true); +defineSymbol(math, main, rel, "\u2ab0", "\\succeq", true); +defineSymbol(math, main, rel, "\u2243", "\\simeq", true); +defineSymbol(math, main, rel, "\u2223", "\\mid", true); +defineSymbol(math, main, rel, "\u226a", "\\ll", true); +defineSymbol(math, main, rel, "\u226b", "\\gg", true); +defineSymbol(math, main, rel, "\u224d", "\\asymp", true); +defineSymbol(math, main, rel, "\u2225", "\\parallel"); +defineSymbol(math, main, rel, "\u22c8", "\\bowtie", true); +defineSymbol(math, main, rel, "\u2323", "\\smile", true); +defineSymbol(math, main, rel, "\u2291", "\\sqsubseteq", true); +defineSymbol(math, main, rel, "\u2292", "\\sqsupseteq", true); +defineSymbol(math, main, rel, "\u2250", "\\doteq", true); +defineSymbol(math, main, rel, "\u2322", "\\frown", true); +defineSymbol(math, main, rel, "\u220b", "\\ni", true); +defineSymbol(math, main, rel, "\u221d", "\\propto", true); +defineSymbol(math, main, rel, "\u22a2", "\\vdash", true); +defineSymbol(math, main, rel, "\u22a3", "\\dashv", true); +defineSymbol(math, main, rel, "\u220b", "\\owns"); + +// Punctuation +defineSymbol(math, main, punct, "\u002e", "\\ldotp"); +defineSymbol(math, main, punct, "\u22c5", "\\cdotp"); + +// Misc Symbols +defineSymbol(math, main, textord, "\u0023", "\\#"); +defineSymbol(text, main, textord, "\u0023", "\\#"); +defineSymbol(math, main, textord, "\u0026", "\\&"); +defineSymbol(text, main, textord, "\u0026", "\\&"); +defineSymbol(math, main, textord, "\u2135", "\\aleph", true); +defineSymbol(math, main, textord, "\u2200", "\\forall", true); +defineSymbol(math, main, textord, "\u210f", "\\hbar", true); +defineSymbol(math, main, textord, "\u2203", "\\exists", true); +defineSymbol(math, main, textord, "\u2207", "\\nabla", true); +defineSymbol(math, main, textord, "\u266d", "\\flat", true); +defineSymbol(math, main, textord, "\u2113", "\\ell", true); +defineSymbol(math, main, textord, "\u266e", "\\natural", true); +defineSymbol(math, main, textord, "\u2663", "\\clubsuit", true); +defineSymbol(math, main, textord, "\u2118", "\\wp", true); +defineSymbol(math, main, textord, "\u266f", "\\sharp", true); +defineSymbol(math, main, textord, "\u2662", "\\diamondsuit", true); +defineSymbol(math, main, textord, "\u211c", "\\Re", true); +defineSymbol(math, main, textord, "\u2661", "\\heartsuit", true); +defineSymbol(math, main, textord, "\u2111", "\\Im", true); +defineSymbol(math, main, textord, "\u2660", "\\spadesuit", true); +defineSymbol(math, main, textord, "\u00a7", "\\S", true); +defineSymbol(text, main, textord, "\u00a7", "\\S"); +defineSymbol(math, main, textord, "\u00b6", "\\P", true); +defineSymbol(text, main, textord, "\u00b6", "\\P"); + +// Math and Text +defineSymbol(math, main, textord, "\u2020", "\\dag"); +defineSymbol(text, main, textord, "\u2020", "\\dag"); +defineSymbol(text, main, textord, "\u2020", "\\textdagger"); +defineSymbol(math, main, textord, "\u2021", "\\ddag"); +defineSymbol(text, main, textord, "\u2021", "\\ddag"); +defineSymbol(text, main, textord, "\u2021", "\\textdaggerdbl"); + +// Large Delimiters +defineSymbol(math, main, close, "\u23b1", "\\rmoustache", true); +defineSymbol(math, main, open, "\u23b0", "\\lmoustache", true); +defineSymbol(math, main, close, "\u27ef", "\\rgroup", true); +defineSymbol(math, main, open, "\u27ee", "\\lgroup", true); + +// Binary Operators +defineSymbol(math, main, bin, "\u2213", "\\mp", true); +defineSymbol(math, main, bin, "\u2296", "\\ominus", true); +defineSymbol(math, main, bin, "\u228e", "\\uplus", true); +defineSymbol(math, main, bin, "\u2293", "\\sqcap", true); +defineSymbol(math, main, bin, "\u2217", "\\ast"); +defineSymbol(math, main, bin, "\u2294", "\\sqcup", true); +defineSymbol(math, main, bin, "\u25ef", "\\bigcirc", true); +defineSymbol(math, main, bin, "\u2219", "\\bullet", true); +defineSymbol(math, main, bin, "\u2021", "\\ddagger"); +defineSymbol(math, main, bin, "\u2240", "\\wr", true); +defineSymbol(math, main, bin, "\u2a3f", "\\amalg"); +defineSymbol(math, main, bin, "\u0026", "\\And"); // from amsmath + +// Arrow Symbols +defineSymbol(math, main, rel, "\u27f5", "\\longleftarrow", true); +defineSymbol(math, main, rel, "\u21d0", "\\Leftarrow", true); +defineSymbol(math, main, rel, "\u27f8", "\\Longleftarrow", true); +defineSymbol(math, main, rel, "\u27f6", "\\longrightarrow", true); +defineSymbol(math, main, rel, "\u21d2", "\\Rightarrow", true); +defineSymbol(math, main, rel, "\u27f9", "\\Longrightarrow", true); +defineSymbol(math, main, rel, "\u2194", "\\leftrightarrow", true); +defineSymbol(math, main, rel, "\u27f7", "\\longleftrightarrow", true); +defineSymbol(math, main, rel, "\u21d4", "\\Leftrightarrow", true); +defineSymbol(math, main, rel, "\u27fa", "\\Longleftrightarrow", true); +defineSymbol(math, main, rel, "\u21a6", "\\mapsto", true); +defineSymbol(math, main, rel, "\u27fc", "\\longmapsto", true); +defineSymbol(math, main, rel, "\u2197", "\\nearrow", true); +defineSymbol(math, main, rel, "\u21a9", "\\hookleftarrow", true); +defineSymbol(math, main, rel, "\u21aa", "\\hookrightarrow", true); +defineSymbol(math, main, rel, "\u2198", "\\searrow", true); +defineSymbol(math, main, rel, "\u21bc", "\\leftharpoonup", true); +defineSymbol(math, main, rel, "\u21c0", "\\rightharpoonup", true); +defineSymbol(math, main, rel, "\u2199", "\\swarrow", true); +defineSymbol(math, main, rel, "\u21bd", "\\leftharpoondown", true); +defineSymbol(math, main, rel, "\u21c1", "\\rightharpoondown", true); +defineSymbol(math, main, rel, "\u2196", "\\nwarrow", true); +defineSymbol(math, main, rel, "\u21cc", "\\rightleftharpoons", true); + +// AMS Negated Binary Relations +defineSymbol(math, ams, rel, "\u226e", "\\nless", true); +// Symbol names preceded by "@" each have a corresponding macro. +defineSymbol(math, ams, rel, "\ue010", "\\@nleqslant"); +defineSymbol(math, ams, rel, "\ue011", "\\@nleqq"); +defineSymbol(math, ams, rel, "\u2a87", "\\lneq", true); +defineSymbol(math, ams, rel, "\u2268", "\\lneqq", true); +defineSymbol(math, ams, rel, "\ue00c", "\\@lvertneqq"); +defineSymbol(math, ams, rel, "\u22e6", "\\lnsim", true); +defineSymbol(math, ams, rel, "\u2a89", "\\lnapprox", true); +defineSymbol(math, ams, rel, "\u2280", "\\nprec", true); +// unicode-math maps \u22e0 to \npreccurlyeq. We'll use the AMS synonym. +defineSymbol(math, ams, rel, "\u22e0", "\\npreceq", true); +defineSymbol(math, ams, rel, "\u22e8", "\\precnsim", true); +defineSymbol(math, ams, rel, "\u2ab9", "\\precnapprox", true); +defineSymbol(math, ams, rel, "\u2241", "\\nsim", true); +defineSymbol(math, ams, rel, "\ue006", "\\@nshortmid"); +defineSymbol(math, ams, rel, "\u2224", "\\nmid", true); +defineSymbol(math, ams, rel, "\u22ac", "\\nvdash", true); +defineSymbol(math, ams, rel, "\u22ad", "\\nvDash", true); +defineSymbol(math, ams, rel, "\u22ea", "\\ntriangleleft"); +defineSymbol(math, ams, rel, "\u22ec", "\\ntrianglelefteq", true); +defineSymbol(math, ams, rel, "\u228a", "\\subsetneq", true); +defineSymbol(math, ams, rel, "\ue01a", "\\@varsubsetneq"); +defineSymbol(math, ams, rel, "\u2acb", "\\subsetneqq", true); +defineSymbol(math, ams, rel, "\ue017", "\\@varsubsetneqq"); +defineSymbol(math, ams, rel, "\u226f", "\\ngtr", true); +defineSymbol(math, ams, rel, "\ue00f", "\\@ngeqslant"); +defineSymbol(math, ams, rel, "\ue00e", "\\@ngeqq"); +defineSymbol(math, ams, rel, "\u2a88", "\\gneq", true); +defineSymbol(math, ams, rel, "\u2269", "\\gneqq", true); +defineSymbol(math, ams, rel, "\ue00d", "\\@gvertneqq"); +defineSymbol(math, ams, rel, "\u22e7", "\\gnsim", true); +defineSymbol(math, ams, rel, "\u2a8a", "\\gnapprox", true); +defineSymbol(math, ams, rel, "\u2281", "\\nsucc", true); +// unicode-math maps \u22e1 to \nsucccurlyeq. We'll use the AMS synonym. +defineSymbol(math, ams, rel, "\u22e1", "\\nsucceq", true); +defineSymbol(math, ams, rel, "\u22e9", "\\succnsim", true); +defineSymbol(math, ams, rel, "\u2aba", "\\succnapprox", true); +// unicode-math maps \u2246 to \simneqq. We'll use the AMS synonym. +defineSymbol(math, ams, rel, "\u2246", "\\ncong", true); +defineSymbol(math, ams, rel, "\ue007", "\\@nshortparallel"); +defineSymbol(math, ams, rel, "\u2226", "\\nparallel", true); +defineSymbol(math, ams, rel, "\u22af", "\\nVDash", true); +defineSymbol(math, ams, rel, "\u22eb", "\\ntriangleright"); +defineSymbol(math, ams, rel, "\u22ed", "\\ntrianglerighteq", true); +defineSymbol(math, ams, rel, "\ue018", "\\@nsupseteqq"); +defineSymbol(math, ams, rel, "\u228b", "\\supsetneq", true); +defineSymbol(math, ams, rel, "\ue01b", "\\@varsupsetneq"); +defineSymbol(math, ams, rel, "\u2acc", "\\supsetneqq", true); +defineSymbol(math, ams, rel, "\ue019", "\\@varsupsetneqq"); +defineSymbol(math, ams, rel, "\u22ae", "\\nVdash", true); +defineSymbol(math, ams, rel, "\u2ab5", "\\precneqq", true); +defineSymbol(math, ams, rel, "\u2ab6", "\\succneqq", true); +defineSymbol(math, ams, rel, "\ue016", "\\@nsubseteqq"); +defineSymbol(math, ams, bin, "\u22b4", "\\unlhd"); +defineSymbol(math, ams, bin, "\u22b5", "\\unrhd"); + +// AMS Negated Arrows +defineSymbol(math, ams, rel, "\u219a", "\\nleftarrow", true); +defineSymbol(math, ams, rel, "\u219b", "\\nrightarrow", true); +defineSymbol(math, ams, rel, "\u21cd", "\\nLeftarrow", true); +defineSymbol(math, ams, rel, "\u21cf", "\\nRightarrow", true); +defineSymbol(math, ams, rel, "\u21ae", "\\nleftrightarrow", true); +defineSymbol(math, ams, rel, "\u21ce", "\\nLeftrightarrow", true); + +// AMS Misc +defineSymbol(math, ams, rel, "\u25b3", "\\vartriangle"); +defineSymbol(math, ams, textord, "\u210f", "\\hslash"); +defineSymbol(math, ams, textord, "\u25bd", "\\triangledown"); +defineSymbol(math, ams, textord, "\u25ca", "\\lozenge"); +defineSymbol(math, ams, textord, "\u24c8", "\\circledS"); +defineSymbol(math, ams, textord, "\u00ae", "\\circledR"); +defineSymbol(text, ams, textord, "\u00ae", "\\circledR"); +defineSymbol(math, ams, textord, "\u2221", "\\measuredangle", true); +defineSymbol(math, ams, textord, "\u2204", "\\nexists"); +defineSymbol(math, ams, textord, "\u2127", "\\mho"); +defineSymbol(math, ams, textord, "\u2132", "\\Finv", true); +defineSymbol(math, ams, textord, "\u2141", "\\Game", true); +defineSymbol(math, ams, textord, "\u2035", "\\backprime"); +defineSymbol(math, ams, textord, "\u25b2", "\\blacktriangle"); +defineSymbol(math, ams, textord, "\u25bc", "\\blacktriangledown"); +defineSymbol(math, ams, textord, "\u25a0", "\\blacksquare"); +defineSymbol(math, ams, textord, "\u29eb", "\\blacklozenge"); +defineSymbol(math, ams, textord, "\u2605", "\\bigstar"); +defineSymbol(math, ams, textord, "\u2222", "\\sphericalangle", true); +defineSymbol(math, ams, textord, "\u2201", "\\complement", true); +// unicode-math maps U+F0 to \matheth. We map to AMS function \eth +defineSymbol(math, ams, textord, "\u00f0", "\\eth", true); +defineSymbol(text, main, textord, "\u00f0", "\u00f0"); +defineSymbol(math, ams, textord, "\u2571", "\\diagup"); +defineSymbol(math, ams, textord, "\u2572", "\\diagdown"); +defineSymbol(math, ams, textord, "\u25a1", "\\square"); +defineSymbol(math, ams, textord, "\u25a1", "\\Box"); +defineSymbol(math, ams, textord, "\u25ca", "\\Diamond"); +// unicode-math maps U+A5 to \mathyen. We map to AMS function \yen +defineSymbol(math, ams, textord, "\u00a5", "\\yen", true); +defineSymbol(text, ams, textord, "\u00a5", "\\yen", true); +defineSymbol(math, ams, textord, "\u2713", "\\checkmark", true); +defineSymbol(text, ams, textord, "\u2713", "\\checkmark"); + +// AMS Hebrew +defineSymbol(math, ams, textord, "\u2136", "\\beth", true); +defineSymbol(math, ams, textord, "\u2138", "\\daleth", true); +defineSymbol(math, ams, textord, "\u2137", "\\gimel", true); + +// AMS Greek +defineSymbol(math, ams, textord, "\u03dd", "\\digamma", true); +defineSymbol(math, ams, textord, "\u03f0", "\\varkappa"); + +// AMS Delimiters +defineSymbol(math, ams, open, "\u250c", "\\@ulcorner", true); +defineSymbol(math, ams, close, "\u2510", "\\@urcorner", true); +defineSymbol(math, ams, open, "\u2514", "\\@llcorner", true); +defineSymbol(math, ams, close, "\u2518", "\\@lrcorner", true); + +// AMS Binary Relations +defineSymbol(math, ams, rel, "\u2266", "\\leqq", true); +defineSymbol(math, ams, rel, "\u2a7d", "\\leqslant", true); +defineSymbol(math, ams, rel, "\u2a95", "\\eqslantless", true); +defineSymbol(math, ams, rel, "\u2272", "\\lesssim", true); +defineSymbol(math, ams, rel, "\u2a85", "\\lessapprox", true); +defineSymbol(math, ams, rel, "\u224a", "\\approxeq", true); +defineSymbol(math, ams, bin, "\u22d6", "\\lessdot"); +defineSymbol(math, ams, rel, "\u22d8", "\\lll", true); +defineSymbol(math, ams, rel, "\u2276", "\\lessgtr", true); +defineSymbol(math, ams, rel, "\u22da", "\\lesseqgtr", true); +defineSymbol(math, ams, rel, "\u2a8b", "\\lesseqqgtr", true); +defineSymbol(math, ams, rel, "\u2251", "\\doteqdot"); +defineSymbol(math, ams, rel, "\u2253", "\\risingdotseq", true); +defineSymbol(math, ams, rel, "\u2252", "\\fallingdotseq", true); +defineSymbol(math, ams, rel, "\u223d", "\\backsim", true); +defineSymbol(math, ams, rel, "\u22cd", "\\backsimeq", true); +defineSymbol(math, ams, rel, "\u2ac5", "\\subseteqq", true); +defineSymbol(math, ams, rel, "\u22d0", "\\Subset", true); +defineSymbol(math, ams, rel, "\u228f", "\\sqsubset", true); +defineSymbol(math, ams, rel, "\u227c", "\\preccurlyeq", true); +defineSymbol(math, ams, rel, "\u22de", "\\curlyeqprec", true); +defineSymbol(math, ams, rel, "\u227e", "\\precsim", true); +defineSymbol(math, ams, rel, "\u2ab7", "\\precapprox", true); +defineSymbol(math, ams, rel, "\u22b2", "\\vartriangleleft"); +defineSymbol(math, ams, rel, "\u22b4", "\\trianglelefteq"); +defineSymbol(math, ams, rel, "\u22a8", "\\vDash", true); +defineSymbol(math, ams, rel, "\u22aa", "\\Vvdash", true); +defineSymbol(math, ams, rel, "\u2323", "\\smallsmile"); +defineSymbol(math, ams, rel, "\u2322", "\\smallfrown"); +defineSymbol(math, ams, rel, "\u224f", "\\bumpeq", true); +defineSymbol(math, ams, rel, "\u224e", "\\Bumpeq", true); +defineSymbol(math, ams, rel, "\u2267", "\\geqq", true); +defineSymbol(math, ams, rel, "\u2a7e", "\\geqslant", true); +defineSymbol(math, ams, rel, "\u2a96", "\\eqslantgtr", true); +defineSymbol(math, ams, rel, "\u2273", "\\gtrsim", true); +defineSymbol(math, ams, rel, "\u2a86", "\\gtrapprox", true); +defineSymbol(math, ams, bin, "\u22d7", "\\gtrdot"); +defineSymbol(math, ams, rel, "\u22d9", "\\ggg", true); +defineSymbol(math, ams, rel, "\u2277", "\\gtrless", true); +defineSymbol(math, ams, rel, "\u22db", "\\gtreqless", true); +defineSymbol(math, ams, rel, "\u2a8c", "\\gtreqqless", true); +defineSymbol(math, ams, rel, "\u2256", "\\eqcirc", true); +defineSymbol(math, ams, rel, "\u2257", "\\circeq", true); +defineSymbol(math, ams, rel, "\u225c", "\\triangleq", true); +defineSymbol(math, ams, rel, "\u223c", "\\thicksim"); +defineSymbol(math, ams, rel, "\u2248", "\\thickapprox"); +defineSymbol(math, ams, rel, "\u2ac6", "\\supseteqq", true); +defineSymbol(math, ams, rel, "\u22d1", "\\Supset", true); +defineSymbol(math, ams, rel, "\u2290", "\\sqsupset", true); +defineSymbol(math, ams, rel, "\u227d", "\\succcurlyeq", true); +defineSymbol(math, ams, rel, "\u22df", "\\curlyeqsucc", true); +defineSymbol(math, ams, rel, "\u227f", "\\succsim", true); +defineSymbol(math, ams, rel, "\u2ab8", "\\succapprox", true); +defineSymbol(math, ams, rel, "\u22b3", "\\vartriangleright"); +defineSymbol(math, ams, rel, "\u22b5", "\\trianglerighteq"); +defineSymbol(math, ams, rel, "\u22a9", "\\Vdash", true); +defineSymbol(math, ams, rel, "\u2223", "\\shortmid"); +defineSymbol(math, ams, rel, "\u2225", "\\shortparallel"); +defineSymbol(math, ams, rel, "\u226c", "\\between", true); +defineSymbol(math, ams, rel, "\u22d4", "\\pitchfork", true); +defineSymbol(math, ams, rel, "\u221d", "\\varpropto"); +defineSymbol(math, ams, rel, "\u25c0", "\\blacktriangleleft"); +// unicode-math says that \therefore is a mathord atom. +// We kept the amssymb atom type, which is rel. +defineSymbol(math, ams, rel, "\u2234", "\\therefore", true); +defineSymbol(math, ams, rel, "\u220d", "\\backepsilon"); +defineSymbol(math, ams, rel, "\u25b6", "\\blacktriangleright"); +// unicode-math says that \because is a mathord atom. +// We kept the amssymb atom type, which is rel. +defineSymbol(math, ams, rel, "\u2235", "\\because", true); +defineSymbol(math, ams, rel, "\u22d8", "\\llless"); +defineSymbol(math, ams, rel, "\u22d9", "\\gggtr"); +defineSymbol(math, ams, bin, "\u22b2", "\\lhd"); +defineSymbol(math, ams, bin, "\u22b3", "\\rhd"); +defineSymbol(math, ams, rel, "\u2242", "\\eqsim", true); +defineSymbol(math, main, rel, "\u22c8", "\\Join"); +defineSymbol(math, ams, rel, "\u2251", "\\Doteq", true); + +// AMS Binary Operators +defineSymbol(math, ams, bin, "\u2214", "\\dotplus", true); +defineSymbol(math, ams, bin, "\u2216", "\\smallsetminus"); +defineSymbol(math, ams, bin, "\u22d2", "\\Cap", true); +defineSymbol(math, ams, bin, "\u22d3", "\\Cup", true); +defineSymbol(math, ams, bin, "\u2a5e", "\\doublebarwedge", true); +defineSymbol(math, ams, bin, "\u229f", "\\boxminus", true); +defineSymbol(math, ams, bin, "\u229e", "\\boxplus", true); +defineSymbol(math, ams, bin, "\u22c7", "\\divideontimes", true); +defineSymbol(math, ams, bin, "\u22c9", "\\ltimes", true); +defineSymbol(math, ams, bin, "\u22ca", "\\rtimes", true); +defineSymbol(math, ams, bin, "\u22cb", "\\leftthreetimes", true); +defineSymbol(math, ams, bin, "\u22cc", "\\rightthreetimes", true); +defineSymbol(math, ams, bin, "\u22cf", "\\curlywedge", true); +defineSymbol(math, ams, bin, "\u22ce", "\\curlyvee", true); +defineSymbol(math, ams, bin, "\u229d", "\\circleddash", true); +defineSymbol(math, ams, bin, "\u229b", "\\circledast", true); +defineSymbol(math, ams, bin, "\u22c5", "\\centerdot"); +defineSymbol(math, ams, bin, "\u22ba", "\\intercal", true); +defineSymbol(math, ams, bin, "\u22d2", "\\doublecap"); +defineSymbol(math, ams, bin, "\u22d3", "\\doublecup"); +defineSymbol(math, ams, bin, "\u22a0", "\\boxtimes", true); + +// AMS Arrows +// Note: unicode-math maps \u21e2 to their own function \rightdasharrow. +// We'll map it to AMS function \dashrightarrow. It produces the same atom. +defineSymbol(math, ams, rel, "\u21e2", "\\dashrightarrow", true); +// unicode-math maps \u21e0 to \leftdasharrow. We'll use the AMS synonym. +defineSymbol(math, ams, rel, "\u21e0", "\\dashleftarrow", true); +defineSymbol(math, ams, rel, "\u21c7", "\\leftleftarrows", true); +defineSymbol(math, ams, rel, "\u21c6", "\\leftrightarrows", true); +defineSymbol(math, ams, rel, "\u21da", "\\Lleftarrow", true); +defineSymbol(math, ams, rel, "\u219e", "\\twoheadleftarrow", true); +defineSymbol(math, ams, rel, "\u21a2", "\\leftarrowtail", true); +defineSymbol(math, ams, rel, "\u21ab", "\\looparrowleft", true); +defineSymbol(math, ams, rel, "\u21cb", "\\leftrightharpoons", true); +defineSymbol(math, ams, rel, "\u21b6", "\\curvearrowleft", true); +// unicode-math maps \u21ba to \acwopencirclearrow. We'll use the AMS synonym. +defineSymbol(math, ams, rel, "\u21ba", "\\circlearrowleft", true); +defineSymbol(math, ams, rel, "\u21b0", "\\Lsh", true); +defineSymbol(math, ams, rel, "\u21c8", "\\upuparrows", true); +defineSymbol(math, ams, rel, "\u21bf", "\\upharpoonleft", true); +defineSymbol(math, ams, rel, "\u21c3", "\\downharpoonleft", true); +defineSymbol(math, main, rel, "\u22b6", "\\origof", true); // not in font +defineSymbol(math, main, rel, "\u22b7", "\\imageof", true); // not in font +defineSymbol(math, ams, rel, "\u22b8", "\\multimap", true); +defineSymbol(math, ams, rel, "\u21ad", "\\leftrightsquigarrow", true); +defineSymbol(math, ams, rel, "\u21c9", "\\rightrightarrows", true); +defineSymbol(math, ams, rel, "\u21c4", "\\rightleftarrows", true); +defineSymbol(math, ams, rel, "\u21a0", "\\twoheadrightarrow", true); +defineSymbol(math, ams, rel, "\u21a3", "\\rightarrowtail", true); +defineSymbol(math, ams, rel, "\u21ac", "\\looparrowright", true); +defineSymbol(math, ams, rel, "\u21b7", "\\curvearrowright", true); +// unicode-math maps \u21bb to \cwopencirclearrow. We'll use the AMS synonym. +defineSymbol(math, ams, rel, "\u21bb", "\\circlearrowright", true); +defineSymbol(math, ams, rel, "\u21b1", "\\Rsh", true); +defineSymbol(math, ams, rel, "\u21ca", "\\downdownarrows", true); +defineSymbol(math, ams, rel, "\u21be", "\\upharpoonright", true); +defineSymbol(math, ams, rel, "\u21c2", "\\downharpoonright", true); +defineSymbol(math, ams, rel, "\u21dd", "\\rightsquigarrow", true); +defineSymbol(math, ams, rel, "\u21dd", "\\leadsto"); +defineSymbol(math, ams, rel, "\u21db", "\\Rrightarrow", true); +defineSymbol(math, ams, rel, "\u21be", "\\restriction"); + +defineSymbol(math, main, textord, "\u2018", "`"); +defineSymbol(math, main, textord, "$", "\\$"); +defineSymbol(text, main, textord, "$", "\\$"); +defineSymbol(text, main, textord, "$", "\\textdollar"); +defineSymbol(math, main, textord, "%", "\\%"); +defineSymbol(text, main, textord, "%", "\\%"); +defineSymbol(math, main, textord, "_", "\\_"); +defineSymbol(text, main, textord, "_", "\\_"); +defineSymbol(text, main, textord, "_", "\\textunderscore"); +defineSymbol(math, main, textord, "\u2220", "\\angle", true); +defineSymbol(math, main, textord, "\u221e", "\\infty", true); +defineSymbol(math, main, textord, "\u2032", "\\prime"); +defineSymbol(math, main, textord, "\u25b3", "\\triangle"); +defineSymbol(math, main, textord, "\u0393", "\\Gamma", true); +defineSymbol(math, main, textord, "\u0394", "\\Delta", true); +defineSymbol(math, main, textord, "\u0398", "\\Theta", true); +defineSymbol(math, main, textord, "\u039b", "\\Lambda", true); +defineSymbol(math, main, textord, "\u039e", "\\Xi", true); +defineSymbol(math, main, textord, "\u03a0", "\\Pi", true); +defineSymbol(math, main, textord, "\u03a3", "\\Sigma", true); +defineSymbol(math, main, textord, "\u03a5", "\\Upsilon", true); +defineSymbol(math, main, textord, "\u03a6", "\\Phi", true); +defineSymbol(math, main, textord, "\u03a8", "\\Psi", true); +defineSymbol(math, main, textord, "\u03a9", "\\Omega", true); +defineSymbol(math, main, textord, "A", "\u0391"); +defineSymbol(math, main, textord, "B", "\u0392"); +defineSymbol(math, main, textord, "E", "\u0395"); +defineSymbol(math, main, textord, "Z", "\u0396"); +defineSymbol(math, main, textord, "H", "\u0397"); +defineSymbol(math, main, textord, "I", "\u0399"); +defineSymbol(math, main, textord, "K", "\u039A"); +defineSymbol(math, main, textord, "M", "\u039C"); +defineSymbol(math, main, textord, "N", "\u039D"); +defineSymbol(math, main, textord, "O", "\u039F"); +defineSymbol(math, main, textord, "P", "\u03A1"); +defineSymbol(math, main, textord, "T", "\u03A4"); +defineSymbol(math, main, textord, "X", "\u03A7"); +defineSymbol(math, main, textord, "\u00ac", "\\neg", true); +defineSymbol(math, main, textord, "\u00ac", "\\lnot"); +defineSymbol(math, main, textord, "\u22a4", "\\top"); +defineSymbol(math, main, textord, "\u22a5", "\\bot"); +defineSymbol(math, main, textord, "\u2205", "\\emptyset"); +defineSymbol(math, ams, textord, "\u2205", "\\varnothing"); +defineSymbol(math, main, mathord, "\u03b1", "\\alpha", true); +defineSymbol(math, main, mathord, "\u03b2", "\\beta", true); +defineSymbol(math, main, mathord, "\u03b3", "\\gamma", true); +defineSymbol(math, main, mathord, "\u03b4", "\\delta", true); +defineSymbol(math, main, mathord, "\u03f5", "\\epsilon", true); +defineSymbol(math, main, mathord, "\u03b6", "\\zeta", true); +defineSymbol(math, main, mathord, "\u03b7", "\\eta", true); +defineSymbol(math, main, mathord, "\u03b8", "\\theta", true); +defineSymbol(math, main, mathord, "\u03b9", "\\iota", true); +defineSymbol(math, main, mathord, "\u03ba", "\\kappa", true); +defineSymbol(math, main, mathord, "\u03bb", "\\lambda", true); +defineSymbol(math, main, mathord, "\u03bc", "\\mu", true); +defineSymbol(math, main, mathord, "\u03bd", "\\nu", true); +defineSymbol(math, main, mathord, "\u03be", "\\xi", true); +defineSymbol(math, main, mathord, "\u03bf", "\\omicron", true); +defineSymbol(math, main, mathord, "\u03c0", "\\pi", true); +defineSymbol(math, main, mathord, "\u03c1", "\\rho", true); +defineSymbol(math, main, mathord, "\u03c3", "\\sigma", true); +defineSymbol(math, main, mathord, "\u03c4", "\\tau", true); +defineSymbol(math, main, mathord, "\u03c5", "\\upsilon", true); +defineSymbol(math, main, mathord, "\u03d5", "\\phi", true); +defineSymbol(math, main, mathord, "\u03c7", "\\chi", true); +defineSymbol(math, main, mathord, "\u03c8", "\\psi", true); +defineSymbol(math, main, mathord, "\u03c9", "\\omega", true); +defineSymbol(math, main, mathord, "\u03b5", "\\varepsilon", true); +defineSymbol(math, main, mathord, "\u03d1", "\\vartheta", true); +defineSymbol(math, main, mathord, "\u03d6", "\\varpi", true); +defineSymbol(math, main, mathord, "\u03f1", "\\varrho", true); +defineSymbol(math, main, mathord, "\u03c2", "\\varsigma", true); +defineSymbol(math, main, mathord, "\u03c6", "\\varphi", true); +defineSymbol(math, main, bin, "\u2217", "*", true); +defineSymbol(math, main, bin, "+", "+"); +defineSymbol(math, main, bin, "\u2212", "-", true); +defineSymbol(math, main, bin, "\u22c5", "\\cdot", true); +defineSymbol(math, main, bin, "\u2218", "\\circ", true); +defineSymbol(math, main, bin, "\u00f7", "\\div", true); +defineSymbol(math, main, bin, "\u00b1", "\\pm", true); +defineSymbol(math, main, bin, "\u00d7", "\\times", true); +defineSymbol(math, main, bin, "\u2229", "\\cap", true); +defineSymbol(math, main, bin, "\u222a", "\\cup", true); +defineSymbol(math, main, bin, "\u2216", "\\setminus", true); +defineSymbol(math, main, bin, "\u2227", "\\land"); +defineSymbol(math, main, bin, "\u2228", "\\lor"); +defineSymbol(math, main, bin, "\u2227", "\\wedge", true); +defineSymbol(math, main, bin, "\u2228", "\\vee", true); +defineSymbol(math, main, textord, "\u221a", "\\surd"); +defineSymbol(math, main, open, "\u27e8", "\\langle", true); +defineSymbol(math, main, open, "\u2223", "\\lvert"); +defineSymbol(math, main, open, "\u2225", "\\lVert"); +defineSymbol(math, main, close, "?", "?"); +defineSymbol(math, main, close, "!", "!"); +defineSymbol(math, main, close, "\u27e9", "\\rangle", true); +defineSymbol(math, main, close, "\u2223", "\\rvert"); +defineSymbol(math, main, close, "\u2225", "\\rVert"); +defineSymbol(math, main, rel, "=", "="); +defineSymbol(math, main, rel, ":", ":"); +defineSymbol(math, main, rel, "\u2248", "\\approx", true); +defineSymbol(math, main, rel, "\u2245", "\\cong", true); +defineSymbol(math, main, rel, "\u2265", "\\ge"); +defineSymbol(math, main, rel, "\u2265", "\\geq", true); +defineSymbol(math, main, rel, "\u2190", "\\gets"); +defineSymbol(math, main, rel, ">", "\\gt", true); +defineSymbol(math, main, rel, "\u2208", "\\in", true); +defineSymbol(math, main, rel, "\ue020", "\\@not"); +defineSymbol(math, main, rel, "\u2282", "\\subset", true); +defineSymbol(math, main, rel, "\u2283", "\\supset", true); +defineSymbol(math, main, rel, "\u2286", "\\subseteq", true); +defineSymbol(math, main, rel, "\u2287", "\\supseteq", true); +defineSymbol(math, ams, rel, "\u2288", "\\nsubseteq", true); +defineSymbol(math, ams, rel, "\u2289", "\\nsupseteq", true); +defineSymbol(math, main, rel, "\u22a8", "\\models"); +defineSymbol(math, main, rel, "\u2190", "\\leftarrow", true); +defineSymbol(math, main, rel, "\u2264", "\\le"); +defineSymbol(math, main, rel, "\u2264", "\\leq", true); +defineSymbol(math, main, rel, "<", "\\lt", true); +defineSymbol(math, main, rel, "\u2192", "\\rightarrow", true); +defineSymbol(math, main, rel, "\u2192", "\\to"); +defineSymbol(math, ams, rel, "\u2271", "\\ngeq", true); +defineSymbol(math, ams, rel, "\u2270", "\\nleq", true); +defineSymbol(math, main, spacing, "\u00a0", "\\ "); +defineSymbol(math, main, spacing, "\u00a0", "\\space"); +// Ref: LaTeX Source 2e: \DeclareRobustCommand{\nobreakspace}{% +defineSymbol(math, main, spacing, "\u00a0", "\\nobreakspace"); +defineSymbol(text, main, spacing, "\u00a0", "\\ "); +defineSymbol(text, main, spacing, "\u00a0", " "); +defineSymbol(text, main, spacing, "\u00a0", "\\space"); +defineSymbol(text, main, spacing, "\u00a0", "\\nobreakspace"); +defineSymbol(math, main, spacing, null, "\\nobreak"); +defineSymbol(math, main, spacing, null, "\\allowbreak"); +defineSymbol(math, main, punct, ",", ","); +defineSymbol(math, main, punct, ";", ";"); +defineSymbol(math, ams, bin, "\u22bc", "\\barwedge", true); +defineSymbol(math, ams, bin, "\u22bb", "\\veebar", true); +defineSymbol(math, main, bin, "\u2299", "\\odot", true); +defineSymbol(math, main, bin, "\u2295", "\\oplus", true); +defineSymbol(math, main, bin, "\u2297", "\\otimes", true); +defineSymbol(math, main, textord, "\u2202", "\\partial", true); +defineSymbol(math, main, bin, "\u2298", "\\oslash", true); +defineSymbol(math, ams, bin, "\u229a", "\\circledcirc", true); +defineSymbol(math, ams, bin, "\u22a1", "\\boxdot", true); +defineSymbol(math, main, bin, "\u25b3", "\\bigtriangleup"); +defineSymbol(math, main, bin, "\u25bd", "\\bigtriangledown"); +defineSymbol(math, main, bin, "\u2020", "\\dagger"); +defineSymbol(math, main, bin, "\u22c4", "\\diamond"); +defineSymbol(math, main, bin, "\u22c6", "\\star"); +defineSymbol(math, main, bin, "\u25c3", "\\triangleleft"); +defineSymbol(math, main, bin, "\u25b9", "\\triangleright"); +defineSymbol(math, main, open, "{", "\\{"); +defineSymbol(text, main, textord, "{", "\\{"); +defineSymbol(text, main, textord, "{", "\\textbraceleft"); +defineSymbol(math, main, close, "}", "\\}"); +defineSymbol(text, main, textord, "}", "\\}"); +defineSymbol(text, main, textord, "}", "\\textbraceright"); +defineSymbol(math, main, open, "{", "\\lbrace"); +defineSymbol(math, main, close, "}", "\\rbrace"); +defineSymbol(math, main, open, "[", "\\lbrack", true); +defineSymbol(text, main, textord, "[", "\\lbrack", true); +defineSymbol(math, main, close, "]", "\\rbrack", true); +defineSymbol(text, main, textord, "]", "\\rbrack", true); +defineSymbol(math, main, open, "(", "\\lparen", true); +defineSymbol(math, main, close, ")", "\\rparen", true); +defineSymbol(text, main, textord, "<", "\\textless", true); // in T1 fontenc +defineSymbol(text, main, textord, ">", "\\textgreater", true); // in T1 fontenc +defineSymbol(math, main, open, "\u230a", "\\lfloor", true); +defineSymbol(math, main, close, "\u230b", "\\rfloor", true); +defineSymbol(math, main, open, "\u2308", "\\lceil", true); +defineSymbol(math, main, close, "\u2309", "\\rceil", true); +defineSymbol(math, main, textord, "\\", "\\backslash"); +defineSymbol(math, main, textord, "\u2223", "|"); +defineSymbol(math, main, textord, "\u2223", "\\vert"); +defineSymbol(text, main, textord, "|", "\\textbar", true); // in T1 fontenc +defineSymbol(math, main, textord, "\u2225", "\\|"); +defineSymbol(math, main, textord, "\u2225", "\\Vert"); +defineSymbol(text, main, textord, "\u2225", "\\textbardbl"); +defineSymbol(text, main, textord, "~", "\\textasciitilde"); +defineSymbol(text, main, textord, "\\", "\\textbackslash"); +defineSymbol(text, main, textord, "^", "\\textasciicircum"); +defineSymbol(math, main, rel, "\u2191", "\\uparrow", true); +defineSymbol(math, main, rel, "\u21d1", "\\Uparrow", true); +defineSymbol(math, main, rel, "\u2193", "\\downarrow", true); +defineSymbol(math, main, rel, "\u21d3", "\\Downarrow", true); +defineSymbol(math, main, rel, "\u2195", "\\updownarrow", true); +defineSymbol(math, main, rel, "\u21d5", "\\Updownarrow", true); +defineSymbol(math, main, op, "\u2210", "\\coprod"); +defineSymbol(math, main, op, "\u22c1", "\\bigvee"); +defineSymbol(math, main, op, "\u22c0", "\\bigwedge"); +defineSymbol(math, main, op, "\u2a04", "\\biguplus"); +defineSymbol(math, main, op, "\u22c2", "\\bigcap"); +defineSymbol(math, main, op, "\u22c3", "\\bigcup"); +defineSymbol(math, main, op, "\u222b", "\\int"); +defineSymbol(math, main, op, "\u222b", "\\intop"); +defineSymbol(math, main, op, "\u222c", "\\iint"); +defineSymbol(math, main, op, "\u222d", "\\iiint"); +defineSymbol(math, main, op, "\u220f", "\\prod"); +defineSymbol(math, main, op, "\u2211", "\\sum"); +defineSymbol(math, main, op, "\u2a02", "\\bigotimes"); +defineSymbol(math, main, op, "\u2a01", "\\bigoplus"); +defineSymbol(math, main, op, "\u2a00", "\\bigodot"); +defineSymbol(math, main, op, "\u222e", "\\oint"); +defineSymbol(math, main, op, "\u222f", "\\oiint"); +defineSymbol(math, main, op, "\u2230", "\\oiiint"); +defineSymbol(math, main, op, "\u2a06", "\\bigsqcup"); +defineSymbol(math, main, op, "\u222b", "\\smallint"); +defineSymbol(text, main, inner, "\u2026", "\\textellipsis"); +defineSymbol(math, main, inner, "\u2026", "\\mathellipsis"); +defineSymbol(text, main, inner, "\u2026", "\\ldots", true); +defineSymbol(math, main, inner, "\u2026", "\\ldots", true); +defineSymbol(math, main, inner, "\u22ef", "\\@cdots", true); +defineSymbol(math, main, inner, "\u22f1", "\\ddots", true); +// \vdots is a macro that uses one of these two symbols (with made-up names): +defineSymbol(math, main, textord, "\u22ee", "\\varvdots"); +defineSymbol(text, main, textord, "\u22ee", "\\varvdots"); +defineSymbol(math, main, accent, "\u02ca", "\\acute"); +defineSymbol(math, main, accent, "\u02cb", "\\grave"); +defineSymbol(math, main, accent, "\u00a8", "\\ddot"); +defineSymbol(math, main, accent, "\u007e", "\\tilde"); +defineSymbol(math, main, accent, "\u02c9", "\\bar"); +defineSymbol(math, main, accent, "\u02d8", "\\breve"); +defineSymbol(math, main, accent, "\u02c7", "\\check"); +defineSymbol(math, main, accent, "\u005e", "\\hat"); +defineSymbol(math, main, accent, "\u20d7", "\\vec"); +defineSymbol(math, main, accent, "\u02d9", "\\dot"); +defineSymbol(math, main, accent, "\u02da", "\\mathring"); +// \imath and \jmath should be invariant to \mathrm, \mathbf, etc., so use PUA +defineSymbol(math, main, mathord, "\ue131", "\\@imath"); +defineSymbol(math, main, mathord, "\ue237", "\\@jmath"); +defineSymbol(math, main, textord, "\u0131", "\u0131"); +defineSymbol(math, main, textord, "\u0237", "\u0237"); +defineSymbol(text, main, textord, "\u0131", "\\i", true); +defineSymbol(text, main, textord, "\u0237", "\\j", true); +defineSymbol(text, main, textord, "\u00df", "\\ss", true); +defineSymbol(text, main, textord, "\u00e6", "\\ae", true); +defineSymbol(text, main, textord, "\u0153", "\\oe", true); +defineSymbol(text, main, textord, "\u00f8", "\\o", true); +defineSymbol(text, main, textord, "\u00c6", "\\AE", true); +defineSymbol(text, main, textord, "\u0152", "\\OE", true); +defineSymbol(text, main, textord, "\u00d8", "\\O", true); +defineSymbol(text, main, accent, "\u02ca", "\\'"); // acute +defineSymbol(text, main, accent, "\u02cb", "\\`"); // grave +defineSymbol(text, main, accent, "\u02c6", "\\^"); // circumflex +defineSymbol(text, main, accent, "\u02dc", "\\~"); // tilde +defineSymbol(text, main, accent, "\u02c9", "\\="); // macron +defineSymbol(text, main, accent, "\u02d8", "\\u"); // breve +defineSymbol(text, main, accent, "\u02d9", "\\."); // dot above +defineSymbol(text, main, accent, "\u00b8", "\\c"); // cedilla +defineSymbol(text, main, accent, "\u02da", "\\r"); // ring above +defineSymbol(text, main, accent, "\u02c7", "\\v"); // caron +defineSymbol(text, main, accent, "\u00a8", '\\"'); // diaeresis +defineSymbol(text, main, accent, "\u02dd", "\\H"); // double acute +defineSymbol(text, main, accent, "\u25ef", "\\textcircled"); // \bigcirc glyph + +// These ligatures are detected and created in Parser.js's `formLigatures`. +export const ligatures = { + "--": true, + "---": true, + "``": true, + "''": true, +}; + +defineSymbol(text, main, textord, "\u2013", "--", true); +defineSymbol(text, main, textord, "\u2013", "\\textendash"); +defineSymbol(text, main, textord, "\u2014", "---", true); +defineSymbol(text, main, textord, "\u2014", "\\textemdash"); +defineSymbol(text, main, textord, "\u2018", "`", true); +defineSymbol(text, main, textord, "\u2018", "\\textquoteleft"); +defineSymbol(text, main, textord, "\u2019", "'", true); +defineSymbol(text, main, textord, "\u2019", "\\textquoteright"); +defineSymbol(text, main, textord, "\u201c", "``", true); +defineSymbol(text, main, textord, "\u201c", "\\textquotedblleft"); +defineSymbol(text, main, textord, "\u201d", "''", true); +defineSymbol(text, main, textord, "\u201d", "\\textquotedblright"); +// \degree from gensymb package +defineSymbol(math, main, textord, "\u00b0", "\\degree", true); +defineSymbol(text, main, textord, "\u00b0", "\\degree"); +// \textdegree from inputenc package +defineSymbol(text, main, textord, "\u00b0", "\\textdegree", true); +// TODO: In LaTeX, \pounds can generate a different character in text and math +// mode, but among our fonts, only Main-Regular defines this character "163". +defineSymbol(math, main, textord, "\u00a3", "\\pounds"); +defineSymbol(math, main, textord, "\u00a3", "\\mathsterling", true); +defineSymbol(text, main, textord, "\u00a3", "\\pounds"); +defineSymbol(text, main, textord, "\u00a3", "\\textsterling", true); +defineSymbol(math, ams, textord, "\u2720", "\\maltese"); +defineSymbol(text, ams, textord, "\u2720", "\\maltese"); + +// There are lots of symbols which are the same, so we add them in afterwards. +// All of these are textords in math mode +const mathTextSymbols = "0123456789/@.\""; +for (let i = 0; i < mathTextSymbols.length; i++) { + const ch = mathTextSymbols.charAt(i); + defineSymbol(math, main, textord, ch, ch); +} + +// All of these are textords in text mode +const textSymbols = "0123456789!@*()-=+\";:?/.,"; +for (let i = 0; i < textSymbols.length; i++) { + const ch = textSymbols.charAt(i); + defineSymbol(text, main, textord, ch, ch); +} + +// All of these are textords in text mode, and mathords in math mode +const letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; +for (let i = 0; i < letters.length; i++) { + const ch = letters.charAt(i); + defineSymbol(math, main, mathord, ch, ch); + defineSymbol(text, main, textord, ch, ch); +} + +// Blackboard bold and script letters in Unicode range +defineSymbol(math, ams, textord, "C", "\u2102"); // blackboard bold +defineSymbol(text, ams, textord, "C", "\u2102"); +defineSymbol(math, ams, textord, "H", "\u210D"); +defineSymbol(text, ams, textord, "H", "\u210D"); +defineSymbol(math, ams, textord, "N", "\u2115"); +defineSymbol(text, ams, textord, "N", "\u2115"); +defineSymbol(math, ams, textord, "P", "\u2119"); +defineSymbol(text, ams, textord, "P", "\u2119"); +defineSymbol(math, ams, textord, "Q", "\u211A"); +defineSymbol(text, ams, textord, "Q", "\u211A"); +defineSymbol(math, ams, textord, "R", "\u211D"); +defineSymbol(text, ams, textord, "R", "\u211D"); +defineSymbol(math, ams, textord, "Z", "\u2124"); +defineSymbol(text, ams, textord, "Z", "\u2124"); +defineSymbol(math, main, mathord, "h", "\u210E"); // italic h, Planck constant +defineSymbol(text, main, mathord, "h", "\u210E"); + +// The next loop loads wide (surrogate pair) characters. +// We support some letters in the Unicode range U+1D400 to U+1D7FF, +// Mathematical Alphanumeric Symbols. +// Some editors do not deal well with wide characters. So don't write the +// string into this file. Instead, create the string from the surrogate pair. +let wideChar = ""; +for (let i = 0; i < letters.length; i++) { + const ch = letters.charAt(i); + + // The hex numbers in the next line are a surrogate pair. + // 0xD835 is the high surrogate for all letters in the range we support. + // 0xDC00 is the low surrogate for bold A. + wideChar = String.fromCharCode(0xD835, 0xDC00 + i); // A-Z a-z bold + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDC34 + i); // A-Z a-z italic + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDC68 + i); // A-Z a-z bold italic + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDD04 + i); // A-Z a-z Fraktur + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDD6C + i); // A-Z a-z bold Fraktur + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDDA0 + i); // A-Z a-z sans-serif + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDDD4 + i); // A-Z a-z sans bold + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDE08 + i); // A-Z a-z sans italic + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDE70 + i); // A-Z a-z monospace + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + if (i < 26) { + // KaTeX fonts have only capital letters for blackboard bold and script. + // See exception for k below. + wideChar = String.fromCharCode(0xD835, 0xDD38 + i); // A-Z double struck + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDC9C + i); // A-Z script + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + } + + // TODO: Add bold script when it is supported by a KaTeX font. +} +// "k" is the only double struck lower case letter in the KaTeX fonts. +wideChar = String.fromCharCode(0xD835, 0xDD5C); // k double struck +defineSymbol(math, main, mathord, "k", wideChar); +defineSymbol(text, main, textord, "k", wideChar); + +// Next, some wide character numerals +for (let i = 0; i < 10; i++) { + const ch = i.toString(); + + wideChar = String.fromCharCode(0xD835, 0xDFCE + i); // 0-9 bold + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDFE2 + i); // 0-9 sans serif + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDFEC + i); // 0-9 bold sans + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); + + wideChar = String.fromCharCode(0xD835, 0xDFF6 + i); // 0-9 monospace + defineSymbol(math, main, mathord, ch, wideChar); + defineSymbol(text, main, textord, ch, wideChar); +} + +// We add these Latin-1 letters as symbols for backwards-compatibility, +// but they are not actually in the font, nor are they supported by the +// Unicode accent mechanism, so they fall back to Times font and look ugly. +// TODO(edemaine): Fix this. +export const extraLatin = "\u00d0\u00de\u00fe"; +for (let i = 0; i < extraLatin.length; i++) { + const ch = extraLatin.charAt(i); + defineSymbol(math, main, mathord, ch, ch); + defineSymbol(text, main, textord, ch, ch); +} diff --git a/lib/scripts/generate-katex-data.ts b/lib/scripts/generate-katex-data.ts new file mode 100644 index 0000000..a78aaa2 --- /dev/null +++ b/lib/scripts/generate-katex-data.ts @@ -0,0 +1,151 @@ +/** + * Generates KaTeX-derived symbol data for @m2d/math. + * Source: KaTeX v0.16.22 src/symbols.js, src/macros.js, src/functions/op.js + * + * Run: pnpm generate:katex + */ +import { readFileSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +const REGENERATE_CMD = "pnpm generate:katex"; +const SIMPLE_MACRO = /^\\([a-zA-Z@][a-zA-Z0-9@]*)$/; + +const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)); +const ROOT = join(SCRIPT_DIR, ".."); + +const symbolsSrc = readFileSync(join(SCRIPT_DIR, "data/katex-symbols.js"), "utf8"); +const macrosSrc = readFileSync(join(SCRIPT_DIR, "data/katex-macros.js"), "utf8"); +const opSrc = readFileSync(join(SCRIPT_DIR, "data/katex-op.js"), "utf8"); + +const symbolMap: Record = {}; +for (const m of symbolsSrc.matchAll(/defineSymbol\([^\n]+\)/g)) { + const strMatch = [...m[0].matchAll(/"((?:\\u[0-9a-fA-F]{4}|\\[^"]|[^"])+)"/g)]; + if (strMatch.length < 2) continue; + const unicode = JSON.parse(`"${strMatch[0][1]}"`) as string; + const cmd = strMatch[1][1].replace(/^\\+/, ""); + symbolMap[cmd] = unicode; +} + +const decodeChar = (raw: string): string | undefined => { + if (/^\\u[0-9a-fA-F]{4}$/.test(raw)) { + return JSON.parse(`"${raw}"`) as string; + } + return raw.length === 1 ? raw : undefined; +}; + +const resolveToUnicode = (name: string, seen = new Set()): string | undefined => { + if (seen.has(name)) return undefined; + seen.add(name); + if (symbolMap[name]) return symbolMap[name]; + const bodyMatch = macrosSrc.match( + new RegExp(`defineMacro\\("\\\\\\\\${name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}",\\s*"([^"]+)"\\)`), + ); + if (!bodyMatch) return undefined; + const body = bodyMatch[1]; + if (body.startsWith("\\mathrm{") && body.endsWith("}")) { + return body.slice(9, -1); + } + if (body.length === 1 && !body.startsWith("\\")) { + return body; + } + const charMatch = body.match(/\\char[`'"]((?:\\u[0-9a-fA-F]{4}|[^`'"]+))/); + if (charMatch) { + return decodeChar(charMatch[1]); + } + if (body.startsWith("\\") && !body.includes("{")) { + return resolveToUnicode(body.replace(/^\\+/, ""), seen); + } + return undefined; +}; + +const aliasMap: Record = {}; +for (const m of macrosSrc.matchAll(/defineMacro\("\\\\([^"]+)",\s*"([^"]+)"\)/g)) { + const name = m[1]; + if (!/^[a-zA-Z@][a-zA-Z0-9@]*$/.test(name)) continue; + const resolved = resolveToUnicode(name); + if (resolved && [...resolved].length === 1) { + aliasMap[name] = resolved; + } +} + +const accentMap: Record = {}; +for (const m of symbolsSrc.matchAll(/defineSymbol\([^\n]+\)/g)) { + if (!m[0].includes(", accent,")) continue; + const strMatch = [...m[0].matchAll(/"((?:\\u[0-9a-fA-F]{4}|\\[^"]|[^"])+)"/g)]; + if (strMatch.length < 2) continue; + const chr = JSON.parse(`"${strMatch[0][1]}"`) as string; + const cmd = strMatch[1][1].replace(/^\\+/, ""); + accentMap[cmd] = chr; +} + +const fnSet = new Set(); +let blockIdx = 0; +while ((blockIdx = opSrc.indexOf("defineFunction({", blockIdx)) !== -1) { + const blockEnd = opSrc.indexOf("});", blockIdx); + const block = opSrc.slice(blockIdx, blockEnd); + if (block.includes("symbol: false") && !block.includes("symbol: true")) { + const namesMatch = block.match(/names:\s*\[([\s\S]*?)\]/); + if (namesMatch) { + for (const nameMatch of namesMatch[1].matchAll(/"\\+([^"]+)"/g)) { + fnSet.add(nameMatch[1]); + } + } + } + blockIdx = blockEnd; +} +for (const m of macrosSrc.matchAll(/defineMacro\("\\\\(liminf|limsup)",/g)) { + fnSet.add(m[1]); +} + +const overrideMap: Record = {}; +for (const m of macrosSrc.matchAll( + /defineMacro\("\\\\([^"]+)",\s*"\\html@mathml\{[^}]+\}\{[^}]*\\char[`'"]((?:\\u[0-9a-fA-F]{4}|[^`'"]+))/g, +)) { + const resolved = decodeChar(m[2]); + if (resolved && [...resolved].length === 1) { + overrideMap[m[1]] = resolved; + } +} +for (const m of macrosSrc.matchAll(/defineMacro\("\\\\(q?quad)",\s*"\\hskip(\d+)em/g)) { + overrideMap[m[1]] = m[1] === "qquad" ? "\u2003\u2003" : "\u2003"; +} +for (const m of macrosSrc.matchAll(/defineMacro\("(\\u[0-9a-fA-F]{4})",\s*"\\\\([^"]+)"\)/g)) { + const unicode = JSON.parse(`"${m[1]}"`) as string; + const target = `\\${m[2]}`; + if (!SIMPLE_MACRO.test(target) || unicode === "\uFE0F") continue; + const cmd = m[2]; + const resolved = resolveToUnicode(cmd) ?? unicode; + if ([...resolved].length === 1) { + overrideMap[cmd] = resolved; + } +} +for (const m of macrosSrc.matchAll(/defineMacro\("\\\\([^"]+)",\s*"([^"]+)"\)/g)) { + const name = m[1]; + if (!/^[a-zA-Z@][a-zA-Z0-9@]*$/.test(name)) continue; + if (symbolMap[name] || aliasMap[name] || overrideMap[name]) continue; + const resolved = resolveToUnicode(name); + if (resolved && [...resolved].length === 1) { + overrideMap[name] = resolved; + } +} + +const symbolLines = Object.entries(symbolMap) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([k, v]) => ` ${JSON.stringify(k)}: ${JSON.stringify(v)},`) + .join("\n"); + +writeFileSync( + join(ROOT, "src/katexSymbols.ts"), + `/** KaTeX v0.16.22 \`defineSymbol\` entries — regenerate via \`${REGENERATE_CMD}\`. */\nexport const KATEX_SYMBOLS: Record = {\n${symbolLines}\n};\n`, +); + +writeFileSync( + join(ROOT, "src/katexMeta.ts"), + `/** KaTeX v0.16.22 derived metadata — regenerate via \`${REGENERATE_CMD}\`. */\nexport const KATEX_ALIASES: Record = ${JSON.stringify(aliasMap, null, 2)};\n\nexport const KATEX_ACCENTS: Record = ${JSON.stringify(accentMap, null, 2)};\n\nexport const KATEX_FUNCTIONS = new Set(${JSON.stringify([...fnSet].sort())});\n\n/** KaTeX macro-only symbols mapped to Unicode for Word OMML text runs. */\nexport const KATEX_SYMBOL_OVERRIDES: Record = ${JSON.stringify(overrideMap, null, 2)};\n`, +); + +console.log(`KATEX_SYMBOLS: ${Object.keys(symbolMap).length}`); +console.log(`KATEX_ALIASES: ${Object.keys(aliasMap).length}`); +console.log(`KATEX_FUNCTIONS: ${fnSet.size}`); +console.log(`KATEX_SYMBOL_OVERRIDES: ${Object.keys(overrideMap).length}`); diff --git a/lib/src/index.ts b/lib/src/index.ts index fb3896e..3432406 100644 --- a/lib/src/index.ts +++ b/lib/src/index.ts @@ -4,6 +4,8 @@ import { parseMath } from "latex-math"; import * as DOCX from "docx"; // skipcq: JS-C1003 import type * as latex from "@unified-latex/unified-latex-types"; +import { KATEX_ACCENTS, KATEX_ALIASES, KATEX_FUNCTIONS, KATEX_SYMBOL_OVERRIDES } from "./katexMeta"; +import { KATEX_SYMBOLS } from "./katexSymbols"; /** * Checks if the argument has curly brackets. @@ -14,149 +16,8 @@ const hasCurlyBrackets = (arg: latex.Argument | undefined): arg is latex.Argumen /** convert to MathRun */ const mapString = (docx: typeof DOCX, s: string): DOCX.MathRun => new docx.MathRun(s); -const LATEX_SYMBOLS: Record = { - textasciitilde: "~", - textasciicircum: "^", - textbackslash: "∖", - textbar: "|", - textless: "<", - textgreater: ">", - neq: "≠", - sim: "∼", - simeq: "≃", - approx: "≈", - fallingdotseq: "≒", - risingdotseq: "≓", - equiv: "≡", - geq: "≥", - geqq: "≧", - leq: "≤", - leqq: "≦", - gg: "≫", - ll: "≪", - times: "×", - div: "÷", - pm: "±", - mp: "∓", - oplus: "⊕", - ominus: "⊖", - otimes: "⊗", - oslash: "⊘", - circ: "∘", - cdot: "⋅", - bullet: "∙", - ltimes: "⋉", - rtimes: "⋊", - in: "∈", - ni: "∋", - notin: "∉", - subset: "⊂", - supset: "⊃", - subseteq: "⊆", - supseteq: "⊇", - nsubseteq: "⊈", - nsupseteq: "⊉", - subsetneq: "⊊", - supsetneq: "⊋", - cap: "∩", - cup: "∪", - emptyset: "∅", - infty: "∞", - partial: "∂", - aleph: "ℵ", - hbar: "ℏ", - wp: "℘", - Re: "ℜ", - Im: "ℑ", - alpha: "α", - beta: "β", - gamma: "γ", - delta: "δ", - epsilon: "ϵ", - zeta: "ζ", - eta: "η", - theta: "θ", - iota: "ι", - kappa: "κ", - lambda: "λ", - mu: "μ", - nu: "ν", - xi: "ξ", - pi: "π", - rho: "ρ", - sigma: "σ", - tau: "τ", - upsilon: "υ", - phi: "ϕ", - chi: "χ", - psi: "ψ", - omega: "ω", - varepsilon: "ε", - vartheta: "ϑ", - varrho: "ϱ", - varsigma: "ς", - varphi: "φ", - Gamma: "Γ", - Delta: "Δ", - Theta: "Θ", - Lambda: "Λ", - Xi: "Ξ", - Pi: "Π", - Sigma: "Σ", - Upsilon: "Υ", - Phi: "Φ", - Psi: "Ψ", - Omega: "Ω", - int: "∫", - oint: "∮", - prod: "∏", - coprod: "∐", - sum: "∑", - log: "log", - exp: "exp", - lim: "lim", - inf: "∞", - perp: "⊥", - and: "∧", - or: "∨", - not: "¬", - to: "→", - gets: "⟹", - implies: "⟹", - impliedby: "⟸", - forall: "∀", - exists: "∃", - empty: "∅", - nabla: "∇", - top: "⊤", - bot: "⊥", - angle: "∠", - backslash: "∖", - neg: "¬", - lnot: "¬", - flat: "♭", - natural: "♮", - sharp: "♯", - clubsuit: "♣", - diamondsuit: "♦", - heartsuit: "♥", - spadesuit: "♠", - varnothing: "∅", - S: "∖", - P: "∏", - bigcap: "⋀", - bigcup: "⋁", - bigwedge: "⊓", - bigvee: "⊔", - bigsqcap: "⊓", - bigsqcup: "⊔", - biguplus: "⊕", - bigoplus: "⊕", - bigotimes: "⊗", - bigodot: "⊙", - biginterleave: "⊺", - bigtimes: "⨯", -}; +const resolveLatexSymbol = (name: string): string | undefined => + KATEX_SYMBOL_OVERRIDES[name] ?? KATEX_SYMBOLS[name] ?? KATEX_ALIASES[name]; /** convert group to Math */ const mapGroup = (docx: typeof DOCX, nodes: latex.Node[]): DOCX.MathRun[] => { @@ -277,8 +138,9 @@ const mapMacro = ( } case "hat": case "widehat": - // returnVal = docx.MathAccentCharacter(n) - returnVal = docx.createMathAccentCharacter({ accent: "^" }); + returnVal = docx.createMathAccentCharacter({ + accent: KATEX_ACCENTS[node.content] ?? "^", + }); break; case "sum": { const docNode = new docx.MathSum({ @@ -325,7 +187,13 @@ const mapMacro = ( case "mathbf": return mapGroup(docx, node.args?.[0]?.content ?? []); default: - returnVal = mapString(docx, LATEX_SYMBOLS[node.content] ?? node.content); + if (KATEX_ACCENTS[node.content]) { + returnVal = docx.createMathAccentCharacter({ accent: KATEX_ACCENTS[node.content] }); + } else if (KATEX_FUNCTIONS.has(node.content)) { + returnVal = mapString(docx, node.content); + } else { + returnVal = mapString(docx, resolveLatexSymbol(node.content) ?? node.content); + } } // @ts-expect-error -- reading extra field if (runs[runs.length - 1]?.isSum && returnVal) { diff --git a/lib/src/katexMeta.ts b/lib/src/katexMeta.ts new file mode 100644 index 0000000..1c836e7 --- /dev/null +++ b/lib/src/katexMeta.ts @@ -0,0 +1,108 @@ +/** KaTeX v0.16.22 derived metadata — regenerate via `pnpm generate:katex`. */ +export const KATEX_ALIASES: Record = { + "bgroup": "{", + "egroup": "}", + "lq": "`", + "rq": "'", + "thinspace": ",", + "medspace": ":", + "thickspace": ";", + "negthinspace": "!", + "ordinarycolon": ":", + "notni": "∌", + "darr": "↓", + "dArr": "⇓", + "Darr": "⇓", + "lang": "⟨", + "rang": "⟩", + "uarr": "↑", + "uArr": "⇑", + "Uarr": "⇑", + "alef": "ℵ", + "alefsym": "ℵ", + "bull": "∙", + "clubs": "♣", + "Dagger": "‡", + "diamonds": "♢", + "empty": "∅", + "exist": "∃", + "harr": "↔", + "hArr": "⇔", + "Harr": "⇔", + "hearts": "♡", + "image": "ℑ", + "infin": "∞", + "isin": "∈", + "larr": "←", + "lArr": "⇐", + "Larr": "⇐", + "lrarr": "↔", + "lrArr": "⇔", + "Lrarr": "⇔", + "plusmn": "±", + "rarr": "→", + "rArr": "⇒", + "Rarr": "⇒", + "real": "ℜ", + "sdot": "⋅", + "sect": "§", + "spades": "♠", + "sub": "⊂", + "sube": "⊆", + "supe": "⊇", + "thetasym": "ϑ", + "weierp": "℘" +}; + +export const KATEX_ACCENTS: Record = { + "acute": "ˊ", + "grave": "ˋ", + "ddot": "¨", + "tilde": "~", + "bar": "ˉ", + "breve": "˘", + "check": "ˇ", + "hat": "^", + "vec": "⃗", + "dot": "˙", + "mathring": "˚", + "'": "ˊ", + "`": "ˋ", + "^": "ˆ", + "~": "˜", + "=": "ˉ", + "u": "˘", + ".": "˙", + "c": "¸", + "r": "˚", + "v": "ˇ", + "H": "˝", + "textcircled": "◯" +}; + +export const KATEX_FUNCTIONS = new Set(["Pr","arccos","arcctg","arcsin","arctan","arctg","arg","ch","cos","cosec","cosh","cot","cotg","coth","csc","ctg","cth","deg","det","dim","exp","gcd","hom","inf","ker","lg","lim","liminf","limsup","ln","log","mathop","max","min","sec","sh","sin","sinh","sup","tan","tanh","tg","th"]); + +/** KaTeX macro-only symbols mapped to Unicode for Word OMML text runs. */ +export const KATEX_SYMBOL_OVERRIDES: Record = { + "cdotp": "⋅", + "neq": "≠", + "notin": "∉", + "perp": "⊥", + "notni": "∌", + "ulcorner": "⌜", + "urcorner": "⌝", + "llcorner": "⌞", + "lrcorner": "⌟", + "copyright": "©", + "textregistered": "®", + "vdots": "⋮", + "dblcolon": "∷", + "eqcolon": "∹", + "coloneqq": "≔", + "eqqcolon": "≕", + "Coloneqq": "⩴", + "llbracket": "⟦", + "rrbracket": "⟧", + "lBrace": "⦃", + "rBrace": "⦄" +}; diff --git a/lib/src/katexSymbols.ts b/lib/src/katexSymbols.ts new file mode 100644 index 0000000..8eaef9d --- /dev/null +++ b/lib/src/katexSymbols.ts @@ -0,0 +1,570 @@ +/** KaTeX v0.16.22 `defineSymbol` entries — regenerate via `pnpm generate:katex`. */ +export const KATEX_SYMBOLS: Record = { + " ": " ", + "_": "_", + "-": "−", + "--": "–", + "---": "—", + ",": ",", + ";": ";", + ":": ":", + "!": "!", + "?": "?", + ".": "˙", + "'": "’", + "''": "”", + "{": "{", + "}": "}", + "@cdots": "⋯", + "@gvertneqq": "", + "@imath": "", + "@jmath": "", + "@llcorner": "└", + "@lrcorner": "┘", + "@lvertneqq": "", + "@ngeqq": "", + "@ngeqslant": "", + "@nleqq": "", + "@nleqslant": "", + "@not": "", + "@nshortmid": "", + "@nshortparallel": "", + "@nsubseteqq": "", + "@nsupseteqq": "", + "@ulcorner": "┌", + "@urcorner": "┐", + "@varsubsetneq": "", + "@varsubsetneqq": "", + "@varsupsetneq": "", + "@varsupsetneqq": "", + "*": "∗", + "&": "&", + "#": "#", + "%": "%", + "`": "‘", + "``": "“", + "^": "ˆ", + "+": "+", + "=": "ˉ", + "|": "∥", + "~": "˜", + "$": "$", + "acute": "ˊ", + "ae": "æ", + "AE": "Æ", + "aleph": "ℵ", + "alpha": "α", + "amalg": "⨿", + "And": "&", + "angle": "∠", + "approx": "≈", + "approxeq": "≊", + "ast": "∗", + "asymp": "≍", + "backepsilon": "∍", + "backprime": "‵", + "backsim": "∽", + "backsimeq": "⋍", + "backslash": "\\", + "bar": "ˉ", + "barwedge": "⊼", + "because": "∵", + "beta": "β", + "beth": "ℶ", + "between": "≬", + "bigcap": "⋂", + "bigcirc": "◯", + "bigcup": "⋃", + "bigodot": "⨀", + "bigoplus": "⨁", + "bigotimes": "⨂", + "bigsqcup": "⨆", + "bigstar": "★", + "bigtriangledown": "▽", + "bigtriangleup": "△", + "biguplus": "⨄", + "bigvee": "⋁", + "bigwedge": "⋀", + "blacklozenge": "⧫", + "blacksquare": "■", + "blacktriangle": "▲", + "blacktriangledown": "▼", + "blacktriangleleft": "◀", + "blacktriangleright": "▶", + "bot": "⊥", + "bowtie": "⋈", + "Box": "□", + "boxdot": "⊡", + "boxminus": "⊟", + "boxplus": "⊞", + "boxtimes": "⊠", + "breve": "˘", + "bullet": "∙", + "bumpeq": "≏", + "Bumpeq": "≎", + "c": "¸", + "cap": "∩", + "Cap": "⋒", + "cdot": "⋅", + "cdotp": "⋅", + "centerdot": "⋅", + "check": "ˇ", + "checkmark": "✓", + "chi": "χ", + "circ": "∘", + "circeq": "≗", + "circlearrowleft": "↺", + "circlearrowright": "↻", + "circledast": "⊛", + "circledcirc": "⊚", + "circleddash": "⊝", + "circledR": "®", + "circledS": "Ⓢ", + "clubsuit": "♣", + "complement": "∁", + "cong": "≅", + "coprod": "∐", + "cup": "∪", + "Cup": "⋓", + "curlyeqprec": "⋞", + "curlyeqsucc": "⋟", + "curlyvee": "⋎", + "curlywedge": "⋏", + "curvearrowleft": "↶", + "curvearrowright": "↷", + "dag": "†", + "dagger": "†", + "daleth": "ℸ", + "dashleftarrow": "⇠", + "dashrightarrow": "⇢", + "dashv": "⊣", + "ddag": "‡", + "ddagger": "‡", + "ddot": "¨", + "ddots": "⋱", + "degree": "°", + "delta": "δ", + "Delta": "Δ", + "diagdown": "╲", + "diagup": "╱", + "diamond": "⋄", + "Diamond": "◊", + "diamondsuit": "♢", + "digamma": "ϝ", + "div": "÷", + "divideontimes": "⋇", + "dot": "˙", + "doteq": "≐", + "Doteq": "≑", + "doteqdot": "≑", + "dotplus": "∔", + "doublebarwedge": "⩞", + "doublecap": "⋒", + "doublecup": "⋓", + "downarrow": "↓", + "Downarrow": "⇓", + "downdownarrows": "⇊", + "downharpoonleft": "⇃", + "downharpoonright": "⇂", + "ell": "ℓ", + "emptyset": "∅", + "epsilon": "ϵ", + "eqcirc": "≖", + "eqsim": "≂", + "eqslantgtr": "⪖", + "eqslantless": "⪕", + "equiv": "≡", + "eta": "η", + "eth": "ð", + "exists": "∃", + "fallingdotseq": "≒", + "Finv": "Ⅎ", + "flat": "♭", + "forall": "∀", + "frown": "⌢", + "Game": "⅁", + "gamma": "γ", + "Gamma": "Γ", + "ge": "≥", + "geq": "≥", + "geqq": "≧", + "geqslant": "⩾", + "gets": "←", + "gg": "≫", + "ggg": "⋙", + "gggtr": "⋙", + "gimel": "ℷ", + "gnapprox": "⪊", + "gneq": "⪈", + "gneqq": "≩", + "gnsim": "⋧", + "grave": "ˋ", + "gt": ">", + "gtrapprox": "⪆", + "gtrdot": "⋗", + "gtreqless": "⋛", + "gtreqqless": "⪌", + "gtrless": "≷", + "gtrsim": "≳", + "H": "˝", + "hat": "^", + "hbar": "ℏ", + "heartsuit": "♡", + "hookleftarrow": "↩", + "hookrightarrow": "↪", + "hslash": "ℏ", + "i": "ı", + "iiint": "∭", + "iint": "∬", + "Im": "ℑ", + "imageof": "⊷", + "in": "∈", + "infty": "∞", + "int": "∫", + "intercal": "⊺", + "intop": "∫", + "iota": "ι", + "j": "ȷ", + "Join": "⋈", + "kappa": "κ", + "lambda": "λ", + "Lambda": "Λ", + "land": "∧", + "langle": "⟨", + "lbrace": "{", + "lbrack": "[", + "lceil": "⌈", + "ldotp": ".", + "ldots": "…", + "le": "≤", + "leadsto": "⇝", + "leftarrow": "←", + "Leftarrow": "⇐", + "leftarrowtail": "↢", + "leftharpoondown": "↽", + "leftharpoonup": "↼", + "leftleftarrows": "⇇", + "leftrightarrow": "↔", + "Leftrightarrow": "⇔", + "leftrightarrows": "⇆", + "leftrightharpoons": "⇋", + "leftrightsquigarrow": "↭", + "leftthreetimes": "⋋", + "leq": "≤", + "leqq": "≦", + "leqslant": "⩽", + "lessapprox": "⪅", + "lessdot": "⋖", + "lesseqgtr": "⋚", + "lesseqqgtr": "⪋", + "lessgtr": "≶", + "lesssim": "≲", + "lfloor": "⌊", + "lgroup": "⟮", + "lhd": "⊲", + "ll": "≪", + "Lleftarrow": "⇚", + "lll": "⋘", + "llless": "⋘", + "lmoustache": "⎰", + "lnapprox": "⪉", + "lneq": "⪇", + "lneqq": "≨", + "lnot": "¬", + "lnsim": "⋦", + "longleftarrow": "⟵", + "Longleftarrow": "⟸", + "longleftrightarrow": "⟷", + "Longleftrightarrow": "⟺", + "longmapsto": "⟼", + "longrightarrow": "⟶", + "Longrightarrow": "⟹", + "looparrowleft": "↫", + "looparrowright": "↬", + "lor": "∨", + "lozenge": "◊", + "lparen": "(", + "Lsh": "↰", + "lt": "<", + "ltimes": "⋉", + "lvert": "∣", + "lVert": "∥", + "maltese": "✠", + "mapsto": "↦", + "mathellipsis": "…", + "mathring": "˚", + "mathsterling": "£", + "measuredangle": "∡", + "mho": "℧", + "mid": "∣", + "models": "⊨", + "mp": "∓", + "mu": "μ", + "multimap": "⊸", + "nabla": "∇", + "natural": "♮", + "ncong": "≆", + "nearrow": "↗", + "neg": "¬", + "nexists": "∄", + "ngeq": "≱", + "ngtr": "≯", + "ni": "∋", + "nleftarrow": "↚", + "nLeftarrow": "⇍", + "nleftrightarrow": "↮", + "nLeftrightarrow": "⇎", + "nleq": "≰", + "nless": "≮", + "nmid": "∤", + "nobreakspace": " ", + "nparallel": "∦", + "nprec": "⊀", + "npreceq": "⋠", + "nrightarrow": "↛", + "nRightarrow": "⇏", + "nsim": "≁", + "nsubseteq": "⊈", + "nsucc": "⊁", + "nsucceq": "⋡", + "nsupseteq": "⊉", + "ntriangleleft": "⋪", + "ntrianglelefteq": "⋬", + "ntriangleright": "⋫", + "ntrianglerighteq": "⋭", + "nu": "ν", + "nvdash": "⊬", + "nvDash": "⊭", + "nVdash": "⊮", + "nVDash": "⊯", + "nwarrow": "↖", + "o": "ø", + "O": "Ø", + "odot": "⊙", + "oe": "œ", + "OE": "Œ", + "oiiint": "∰", + "oiint": "∯", + "oint": "∮", + "omega": "ω", + "Omega": "Ω", + "omicron": "ο", + "ominus": "⊖", + "oplus": "⊕", + "origof": "⊶", + "oslash": "⊘", + "otimes": "⊗", + "owns": "∋", + "P": "¶", + "parallel": "∥", + "partial": "∂", + "perp": "⊥", + "phi": "ϕ", + "Phi": "Φ", + "pi": "π", + "Pi": "Π", + "pitchfork": "⋔", + "pm": "±", + "pounds": "£", + "prec": "≺", + "precapprox": "⪷", + "preccurlyeq": "≼", + "preceq": "⪯", + "precnapprox": "⪹", + "precneqq": "⪵", + "precnsim": "⋨", + "precsim": "≾", + "prime": "′", + "prod": "∏", + "propto": "∝", + "psi": "ψ", + "Psi": "Ψ", + "r": "˚", + "rangle": "⟩", + "rbrace": "}", + "rbrack": "]", + "rceil": "⌉", + "Re": "ℜ", + "restriction": "↾", + "rfloor": "⌋", + "rgroup": "⟯", + "rhd": "⊳", + "rho": "ρ", + "rightarrow": "→", + "Rightarrow": "⇒", + "rightarrowtail": "↣", + "rightharpoondown": "⇁", + "rightharpoonup": "⇀", + "rightleftarrows": "⇄", + "rightleftharpoons": "⇌", + "rightrightarrows": "⇉", + "rightsquigarrow": "⇝", + "rightthreetimes": "⋌", + "risingdotseq": "≓", + "rmoustache": "⎱", + "rparen": ")", + "Rrightarrow": "⇛", + "Rsh": "↱", + "rtimes": "⋊", + "rvert": "∣", + "rVert": "∥", + "S": "§", + "searrow": "↘", + "setminus": "∖", + "sharp": "♯", + "shortmid": "∣", + "shortparallel": "∥", + "sigma": "σ", + "Sigma": "Σ", + "sim": "∼", + "simeq": "≃", + "smallfrown": "⌢", + "smallint": "∫", + "smallsetminus": "∖", + "smallsmile": "⌣", + "smile": "⌣", + "space": " ", + "spadesuit": "♠", + "sphericalangle": "∢", + "sqcap": "⊓", + "sqcup": "⊔", + "sqsubset": "⊏", + "sqsubseteq": "⊑", + "sqsupset": "⊐", + "sqsupseteq": "⊒", + "square": "□", + "ss": "ß", + "star": "⋆", + "subset": "⊂", + "Subset": "⋐", + "subseteq": "⊆", + "subseteqq": "⫅", + "subsetneq": "⊊", + "subsetneqq": "⫋", + "succ": "≻", + "succapprox": "⪸", + "succcurlyeq": "≽", + "succeq": "⪰", + "succnapprox": "⪺", + "succneqq": "⪶", + "succnsim": "⋩", + "succsim": "≿", + "sum": "∑", + "supset": "⊃", + "Supset": "⋑", + "supseteq": "⊇", + "supseteqq": "⫆", + "supsetneq": "⊋", + "supsetneqq": "⫌", + "surd": "√", + "swarrow": "↙", + "tau": "τ", + "textasciicircum": "^", + "textasciitilde": "~", + "textbackslash": "\\", + "textbar": "|", + "textbardbl": "∥", + "textbraceleft": "{", + "textbraceright": "}", + "textcircled": "◯", + "textdagger": "†", + "textdaggerdbl": "‡", + "textdegree": "°", + "textdollar": "$", + "textellipsis": "…", + "textemdash": "—", + "textendash": "–", + "textgreater": ">", + "textless": "<", + "textquotedblleft": "“", + "textquotedblright": "”", + "textquoteleft": "‘", + "textquoteright": "’", + "textsterling": "£", + "textunderscore": "_", + "therefore": "∴", + "theta": "θ", + "Theta": "Θ", + "thickapprox": "≈", + "thicksim": "∼", + "tilde": "~", + "times": "×", + "to": "→", + "top": "⊤", + "triangle": "△", + "triangledown": "▽", + "triangleleft": "◃", + "trianglelefteq": "⊴", + "triangleq": "≜", + "triangleright": "▹", + "trianglerighteq": "⊵", + "twoheadleftarrow": "↞", + "twoheadrightarrow": "↠", + "u": "˘", + "u00f0": "ð", + "u0131": "ı", + "u0237": "ȷ", + "u0391": "A", + "u0392": "B", + "u0395": "E", + "u0396": "Z", + "u0397": "H", + "u0399": "I", + "u039A": "K", + "u039C": "M", + "u039D": "N", + "u039F": "O", + "u03A1": "P", + "u03A4": "T", + "u03A7": "X", + "u2102": "C", + "u210D": "H", + "u210E": "h", + "u2115": "N", + "u2119": "P", + "u211A": "Q", + "u211D": "R", + "u2124": "Z", + "unlhd": "⊴", + "unrhd": "⊵", + "uparrow": "↑", + "Uparrow": "⇑", + "updownarrow": "↕", + "Updownarrow": "⇕", + "upharpoonleft": "↿", + "upharpoonright": "↾", + "uplus": "⊎", + "upsilon": "υ", + "Upsilon": "Υ", + "upuparrows": "⇈", + "v": "ˇ", + "varepsilon": "ε", + "varkappa": "ϰ", + "varnothing": "∅", + "varphi": "φ", + "varpi": "ϖ", + "varpropto": "∝", + "varrho": "ϱ", + "varsigma": "ς", + "vartheta": "ϑ", + "vartriangle": "△", + "vartriangleleft": "⊲", + "vartriangleright": "⊳", + "varvdots": "⋮", + "vdash": "⊢", + "vDash": "⊨", + "Vdash": "⊩", + "vec": "⃗", + "vee": "∨", + "veebar": "⊻", + "vert": "∣", + "Vert": "∥", + "Vvdash": "⊪", + "wedge": "∧", + "wp": "℘", + "wr": "≀", + "xi": "ξ", + "Xi": "Ξ", + "yen": "¥", + "zeta": "ζ", +}; diff --git a/lib/tsup.config.ts b/lib/tsup.config.ts index 66838d0..ea966d3 100644 --- a/lib/tsup.config.ts +++ b/lib/tsup.config.ts @@ -6,7 +6,7 @@ export default defineConfig( ({ format: ["cjs", "esm"], target: "es2019", - entry: ["./src/**"], + entry: ["./src/index.ts"], sourcemap: false, clean: !options.watch, bundle: true, From f05e5b51daf516c8fcb8fc1aaa998b98147ee547 Mon Sep 17 00:00:00 2001 From: chitwitgit Date: Tue, 9 Jun 2026 14:23:29 +0800 Subject: [PATCH 2/7] Add structural OMML handlers for other LaTeX macros. Map n-ary operators, binom, stackrel, accents, and font wrappers to proper Word OMML instead of Unicode fallbacks; fix quad/ne/cdots codegen overrides. --- lib/scripts/generate-katex-data.ts | 5 +- lib/src/index.ts | 261 +++++++++++++++++++++-------- lib/src/katexMeta.ts | 6 +- 3 files changed, 204 insertions(+), 68 deletions(-) diff --git a/lib/scripts/generate-katex-data.ts b/lib/scripts/generate-katex-data.ts index a78aaa2..a002e37 100644 --- a/lib/scripts/generate-katex-data.ts +++ b/lib/scripts/generate-katex-data.ts @@ -107,7 +107,7 @@ for (const m of macrosSrc.matchAll( overrideMap[m[1]] = resolved; } } -for (const m of macrosSrc.matchAll(/defineMacro\("\\\\(q?quad)",\s*"\\hskip(\d+)em/g)) { +for (const m of macrosSrc.matchAll(/defineMacro\("\\\\(q?quad)",\s*"\\\\hskip(\d+)em/g)) { overrideMap[m[1]] = m[1] === "qquad" ? "\u2003\u2003" : "\u2003"; } for (const m of macrosSrc.matchAll(/defineMacro\("(\\u[0-9a-fA-F]{4})",\s*"\\\\([^"]+)"\)/g)) { @@ -130,6 +130,9 @@ for (const m of macrosSrc.matchAll(/defineMacro\("\\\\([^"]+)",\s*"([^"]+)"\)/g) } } +if (overrideMap.neq) overrideMap.ne = overrideMap.neq; +if (symbolMap["@cdots"]) overrideMap.cdots = symbolMap["@cdots"]; + const symbolLines = Object.entries(symbolMap) .sort(([a], [b]) => a.localeCompare(b)) .map(([k, v]) => ` ${JSON.stringify(k)}: ${JSON.stringify(v)},`) diff --git a/lib/src/index.ts b/lib/src/index.ts index 3432406..ba12eaa 100644 --- a/lib/src/index.ts +++ b/lib/src/index.ts @@ -19,6 +19,111 @@ const mapString = (docx: typeof DOCX, s: string): DOCX.MathRun => new docx.MathR const resolveLatexSymbol = (name: string): string | undefined => KATEX_SYMBOL_OVERRIDES[name] ?? KATEX_SYMBOLS[name] ?? KATEX_ALIASES[name]; +type NAryOptions = { + accent: string; + limitLocationVal?: string; + children?: DOCX.MathRun[]; + subScript?: DOCX.MathRun[]; + superScript?: DOCX.MathRun[]; +}; + +type PendingNAry = DOCX.MathRun & { + isNAry: 1; + naryAccent: string; + naryLimitLoc?: string; + sub?: DOCX.MathRun[]; + sup?: DOCX.MathRun[]; +}; + +const NARY_OPERATORS: Record = { + sum: { accent: "∑" }, + prod: { accent: "∏" }, + int: { accent: "∫", limitLocationVal: "subSup" }, + iint: { accent: "∬", limitLocationVal: "subSup" }, + iiint: { accent: "∭", limitLocationVal: "subSup" }, + oint: { accent: "∮", limitLocationVal: "subSup" }, + oiint: { accent: "∯", limitLocationVal: "subSup" }, + oiiint: { accent: "∰", limitLocationVal: "subSup" }, + bigcup: { accent: "⋃" }, + bigcap: { accent: "⋂" }, + bigoplus: { accent: "⊕" }, + bigotimes: { accent: "⊗" }, +}; + +const isPendingNAry = (node: DOCX.MathRun | undefined): node is PendingNAry => + Boolean(node && (node as PendingNAry).isNAry); + +const buildNAry = (docx: typeof DOCX, options: NAryOptions): DOCX.MathRun => { + class MathNAry extends docx.XmlComponent { + constructor() { + super("m:nary"); + this.root.push( + docx.createMathNAryProperties({ + accent: options.accent, + hasSuperScript: !!options.superScript, + hasSubScript: !!options.subScript, + limitLocationVal: options.limitLocationVal, + }), + ); + if (options.subScript) { + this.root.push(docx.createMathSubScriptElement({ children: options.subScript })); + } + if (options.superScript) { + this.root.push(docx.createMathSuperScriptElement({ children: options.superScript })); + } + this.root.push(docx.createMathBase({ children: options.children ?? [] })); + } + } + return new MathNAry() as unknown as DOCX.MathRun; +}; + +const createPendingNAry = ( + docx: typeof DOCX, + accent: string, + limitLocationVal?: string, +): PendingNAry => { + const node = buildNAry(docx, { accent, limitLocationVal, children: [] }) as PendingNAry; + node.isNAry = 1; + node.naryAccent = accent; + node.naryLimitLoc = limitLocationVal; + return node; +}; + +const attachNAryLimits = ( + docx: typeof DOCX, + prev: PendingNAry, + limits: { subScript?: DOCX.MathRun[]; superScript?: DOCX.MathRun[] }, +): PendingNAry => { + const sub = limits.subScript ?? prev.sub; + const sup = limits.superScript ?? prev.sup; + const node = buildNAry(docx, { + accent: prev.naryAccent, + limitLocationVal: prev.naryLimitLoc, + children: [], + subScript: sub, + superScript: sup, + }) as PendingNAry; + node.isNAry = 1; + node.naryAccent = prev.naryAccent; + node.naryLimitLoc = prev.naryLimitLoc; + node.sub = sub; + node.sup = sup; + return node; +}; + +const finalizeNAry = ( + docx: typeof DOCX, + prev: PendingNAry, + children: DOCX.MathRun[], +): DOCX.MathRun => + buildNAry(docx, { + accent: prev.naryAccent, + limitLocationVal: prev.naryLimitLoc, + children, + subScript: prev.sub, + superScript: prev.sup, + }); + /** convert group to Math */ const mapGroup = (docx: typeof DOCX, nodes: latex.Node[]): DOCX.MathRun[] => { const group: DOCX.MathRun[] = []; @@ -34,11 +139,13 @@ const mapGroup = (docx: typeof DOCX, nodes: latex.Node[]): DOCX.MathRun[] => { const mapMacro = ( docx: typeof DOCX, node: latex.Macro, - runs: DOCX.MathRun[], + runs: DOCX.MathRun[] & { binomPending?: 0 | 1; binomFirst?: DOCX.MathRun[] }, ): DOCX.MathRun[] | DOCX.MathRun | null => { let returnVal: DOCX.MathRun[] | DOCX.MathRun | null = null; switch (node.content) { case "newline": + returnVal = mapString(docx, " "); + break; case "\\": // line break return null; @@ -61,22 +168,8 @@ const mapMacro = ( const prev = runs.pop(); if (!prev) break; const superScript = mapGroup(docx, node.args?.[0]?.content ?? []); - // @ts-expect-error -- using extra vars - if (prev.isSum) { - const docNode = new docx.MathSum({ - children: [], - superScript, - // @ts-expect-error -- reading extra field - subScript: prev.sub, - }); - - // @ts-expect-error -- attaching extra field - docNode.sub = prev.sub; - // @ts-expect-error -- attaching extra field - docNode.sup = superScript; - // @ts-expect-error -- attaching extra field - docNode.isSum = 1; - return docNode; + if (isPendingNAry(prev)) { + return attachNAryLimits(docx, prev, { superScript }); // @ts-expect-error -- attaching extra field } else if (prev.sub) { return new docx.MathSubSuperScript({ @@ -101,21 +194,8 @@ const mapMacro = ( const prev = runs.pop(); if (!prev) break; const subScript = mapGroup(docx, node.args?.[0]?.content ?? []); - // @ts-expect-error -- attaching extra field - if (prev.isSum) { - const docNode = new docx.MathSum({ - children: [], - subScript, - // @ts-expect-error -- reading extra field - superScript: prev.sup, - }); - // @ts-expect-error -- attaching extra field - docNode.sup = prev.sup; - // @ts-expect-error -- attaching extra field - docNode.sub = subScript; - // @ts-expect-error -- attaching extra field - docNode.isSum = 1; - return docNode; + if (isPendingNAry(prev)) { + return attachNAryLimits(docx, prev, { subScript }); // @ts-expect-error -- attaching extra field } else if (prev.sup) { return new docx.MathSubSuperScript({ @@ -142,13 +222,23 @@ const mapMacro = ( accent: KATEX_ACCENTS[node.content] ?? "^", }); break; - case "sum": { - const docNode = new docx.MathSum({ - children: [], - }); - // @ts-expect-error - extra var - docNode.isSum = 1; - return docNode; + case "sum": + case "prod": + case "int": + case "iint": + case "iiint": + case "oint": + case "oiint": + case "oiiint": + case "bigcup": + case "bigcap": + case "bigoplus": + case "bigotimes": { + const nary = NARY_OPERATORS[node.content]; + if (nary) { + returnVal = createPendingNAry(docx, nary.accent, nary.limitLocationVal); + } + break; } case "frac": case "tfrac": @@ -162,6 +252,22 @@ const mapMacro = ( } break; } + case "stackrel": { + const args = node.args ?? []; + if (args.length === 2 && hasCurlyBrackets(args[0]) && hasCurlyBrackets(args[1])) { + returnVal = [ + docx.createMathLimitLocation({ value: "undOvr" }), + new docx.MathLimitUpper({ + children: mapGroup(docx, args[1].content), + limit: mapGroup(docx, args[0].content), + }), + ]; + } + break; + } + case "binom": + runs.binomPending = 0; + return []; case "sqrt": { const args = node.args ?? []; if (args.length === 1) { @@ -183,11 +289,30 @@ const mapMacro = ( case "left": case "right": case "vec": + case "boxed": + case "boldsymbol": return []; case "mathbf": return mapGroup(docx, node.args?.[0]?.content ?? []); default: - if (KATEX_ACCENTS[node.content]) { + if (node.content === "overline" || node.content === "widetilde") { + returnVal = docx.createMathAccentCharacter({ + accent: node.content === "overline" ? "¯" : "~", + }); + } else if ( + node.content === "mathrm" || + node.content === "mathit" || + node.content === "textbf" || + node.content === "textit" || + node.content === "underline" || + node.content === "overbrace" || + node.content === "underbrace" + ) { + const args = node.args ?? []; + if (hasCurlyBrackets(args[0])) { + returnVal = mapGroup(docx, args[0].content); + } + } else if (KATEX_ACCENTS[node.content]) { returnVal = docx.createMathAccentCharacter({ accent: KATEX_ACCENTS[node.content] }); } else if (KATEX_FUNCTIONS.has(node.content)) { returnVal = mapString(docx, node.content); @@ -195,18 +320,9 @@ const mapMacro = ( returnVal = mapString(docx, resolveLatexSymbol(node.content) ?? node.content); } } - // @ts-expect-error -- reading extra field - if (runs[runs.length - 1]?.isSum && returnVal) { - const prev = runs.pop(); - return [ - new docx.MathSum({ - children: Array.isArray(returnVal) ? returnVal : [returnVal], - // @ts-expect-error -- reading extra field - superScript: prev.sup, - // @ts-expect-error -- reading extra field - subScript: prev.sub, - }), - ]; + if (isPendingNAry(runs[runs.length - 1]) && returnVal) { + const prev = runs.pop() as PendingNAry; + return [finalizeNAry(docx, prev, Array.isArray(returnVal) ? returnVal : [returnVal])]; } return returnVal; }; @@ -215,8 +331,30 @@ const mapMacro = ( const mapNode = ( docx: typeof DOCX, node: latex.Node, - runs: DOCX.MathRun[], + runs: DOCX.MathRun[] & { binomPending?: 0 | 1; binomFirst?: DOCX.MathRun[] }, ): DOCX.MathRun[] | false => { + if (node.type === "group" && runs.binomPending !== undefined) { + const content = mapGroup(docx, node.content); + if (runs.binomPending === 0) { + runs.binomFirst = content; + runs.binomPending = 1; + return []; + } + delete runs.binomPending; + const numerator = runs.binomFirst ?? []; + delete runs.binomFirst; + return [ + new docx.MathRoundBrackets({ + children: [ + new docx.MathFraction({ + numerator, + denominator: content, + }), + ], + }), + ]; + } + let docxNodes: DOCX.MathRun[] = []; switch (node.type) { case "string": @@ -244,18 +382,9 @@ const mapNode = ( default: } - // @ts-expect-error -- reading extra field - if (node.type !== "macro" && runs[runs.length - 1]?.isSum) { - const prev = runs.pop(); - return [ - new docx.MathSum({ - children: docxNodes, - // @ts-expect-error -- reading extra field - superScript: prev.sup, - // @ts-expect-error -- reading extra field - subScript: prev.sub, - }), - ]; + if (node.type !== "macro" && isPendingNAry(runs[runs.length - 1])) { + const prev = runs.pop() as PendingNAry; + return [finalizeNAry(docx, prev, docxNodes)]; } return docxNodes; @@ -266,7 +395,7 @@ export const parseLatex = (docx: typeof DOCX, value: string): DOCX.MathRun[][] = const latexNodes = parseMath(value); const paragraphs: DOCX.MathRun[][] = [[]]; - let runs: DOCX.MathRun[] = paragraphs[0]; + let runs: DOCX.MathRun[] & { binomPending?: 0 | 1; binomFirst?: DOCX.MathRun[] } = paragraphs[0]; for (const node of latexNodes) { const res = mapNode(docx, node, runs); diff --git a/lib/src/katexMeta.ts b/lib/src/katexMeta.ts index 1c836e7..a528a28 100644 --- a/lib/src/katexMeta.ts +++ b/lib/src/katexMeta.ts @@ -84,6 +84,8 @@ export const KATEX_FUNCTIONS = new Set(["Pr","arccos","arcctg","arcsin", /** KaTeX macro-only symbols mapped to Unicode for Word OMML text runs. */ export const KATEX_SYMBOL_OVERRIDES: Record = { + "quad": " ", + "qquad": "  ", "cdotp": "⋅", "neq": "≠", "notin": "∉", @@ -104,5 +106,7 @@ export const KATEX_SYMBOL_OVERRIDES: Record = { "llbracket": "⟦", "rrbracket": "⟧", "lBrace": "⦃", - "rBrace": "⦄" + "rBrace": "⦄", + "ne": "≠", + "cdots": "⋯" }; From 6a09b9e530bd3b9223ceadb29c5e3462d7bf4ae3 Mon Sep 17 00:00:00 2001 From: chitwitgit Date: Wed, 10 Jun 2026 10:13:06 +0800 Subject: [PATCH 3/7] Skip empty inline math to prevent Word document corruption. Log console errors and omit unrenderable OMML instead of emitting empty elements that break Microsoft Word. --- lib/__tests__/index.test.ts | 21 ++++++++++++++++++++- lib/src/index.ts | 27 +++++++++++++++++++++++---- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/lib/__tests__/index.test.ts b/lib/__tests__/index.test.ts index ee6f188..4ecfed2 100644 --- a/lib/__tests__/index.test.ts +++ b/lib/__tests__/index.test.ts @@ -1,4 +1,4 @@ -import { describe, it } from "vitest"; +import { describe, it, vi } from "vitest"; import { toDocx } from "@m2d/core"; // Adjust path based on your setup import { unified } from "unified"; import remarkParse from "remark-parse"; @@ -8,6 +8,16 @@ import { mathPlugin } from "../src"; const markdown = fs.readFileSync("../sample.md", "utf-8"); +const emptyOMathCount = async (md: string) => { + const mdast = unified().use(remarkParse).use(remarkMath).parse(md); + const buffer = (await toDocx(mdast, {}, { plugins: [mathPlugin()] }, "nodebuffer")) as Buffer; + const { execSync } = await import("child_process"); + const path = `/tmp/m2d-math-test-${Math.random()}.docx`; + fs.writeFileSync(path, buffer); + const xml = execSync(`unzip -p ${path} word/document.xml`, { encoding: "utf8" }); + return (xml.match(//g) ?? []).length; +}; + describe("toDocx", () => { it("should handle maths", async ({ expect }) => { const mdast = unified().use(remarkParse).use(remarkMath).parse(markdown); @@ -16,4 +26,13 @@ describe("toDocx", () => { expect(docxBlob).toBeInstanceOf(Blob); }); + + it("should not emit empty oMath for unrenderable inline math", async ({ expect }) => { + const error = vi.spyOn(console, "error").mockImplementation(() => {}); + + expect(await emptyOMathCount("$x$ cm$^{2}$")).toBe(0); + expect(error).toHaveBeenCalled(); + + error.mockRestore(); + }); }); diff --git a/lib/src/index.ts b/lib/src/index.ts index ba12eaa..0d0783b 100644 --- a/lib/src/index.ts +++ b/lib/src/index.ts @@ -16,6 +16,14 @@ const hasCurlyBrackets = (arg: latex.Argument | undefined): arg is latex.Argumen /** convert to MathRun */ const mapString = (docx: typeof DOCX, s: string): DOCX.MathRun => new docx.MathRun(s); +const PLUGIN_ID = "@chitwitgit/m2d-math"; + +const logSkippedEmptyMath = (latex: string, scope: "inline" | "block") => { + console.error( + `[${PLUGIN_ID}] Skipping empty ${scope} math for ${JSON.stringify(latex)}; no renderable OMML was produced. Empty elements break Microsoft Word.`, + ); +}; + const resolveLatexSymbol = (name: string): string | undefined => KATEX_SYMBOL_OVERRIDES[name] ?? KATEX_SYMBOLS[name] ?? KATEX_ALIASES[name]; @@ -421,14 +429,25 @@ export const mathPlugin: () => IPlugin<{ if (node.type !== "inlineMath" && node.type !== "math") return []; (node as unknown as EmptyNode)._type = node.type; node.type = ""; - return [new docx.Math({ children: parseLatex(docx, node.value ?? "").flat() })]; + const latex = node.value ?? ""; + const children = parseLatex(docx, latex).flat(); + if (!children.length) { + logSkippedEmptyMath(latex, "inline"); + return []; + } + return [new docx.Math({ children })]; }, block: (docx, node) => { if (node.type !== "math" && node.type !== "inlineMath") return []; node.type = ""; - return parseLatex(docx, node.value ?? "").map( - runs => new docx.Paragraph({ children: [new docx.Math({ children: runs })] }), - ); + const latex = node.value ?? ""; + return parseLatex(docx, latex).flatMap(runs => { + if (!runs.length) { + logSkippedEmptyMath(latex, "block"); + return []; + } + return [new docx.Paragraph({ children: [new docx.Math({ children: runs })] })]; + }); }, }; }; From 4768be3a5ec4124cf920770025bd856a6ae4917c Mon Sep 17 00:00:00 2001 From: Mayank Date: Sun, 14 Jun 2026 15:23:41 +0530 Subject: [PATCH 4/7] chore: update actions node version to 24 --- .github/workflows/manual-publish.yml | 2 +- .github/workflows/publish.yml | 2 +- .github/workflows/test.yml | 2 +- .github/workflows/upgrade.yml | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/manual-publish.yml b/.github/workflows/manual-publish.yml index 57c3153..d3340b9 100644 --- a/.github/workflows/manual-publish.yml +++ b/.github/workflows/manual-publish.yml @@ -19,7 +19,7 @@ jobs: - uses: actions/setup-node@v4 with: - node-version: 20 + node-version: 24 registry-url: https://registry.npmjs.org - name: Setup Git run: | diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 917e85f..3021ac1 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -23,7 +23,7 @@ jobs: - uses: actions/setup-node@v4 with: - node-version: 20 + node-version: 24 registry-url: https://registry.npmjs.org - name: Setup Git run: | diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8264642..21199a9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,7 +15,7 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-node@v4 with: - node-version: 20 + node-version: 24 - run: npm i -g pnpm && pnpm i name: Install dependencies - name: Run unit tests diff --git a/.github/workflows/upgrade.yml b/.github/workflows/upgrade.yml index ea182e3..38b5a6c 100644 --- a/.github/workflows/upgrade.yml +++ b/.github/workflows/upgrade.yml @@ -16,7 +16,7 @@ jobs: - uses: actions/setup-node@v4 with: registry-url: https://registry.npmjs.org - node-version: 20 + node-version: 24 - name: Setup Git run: | git config --global user.name "mayank1513" From 1fe6f2717725108b28c8912605e8584a1de407ef Mon Sep 17 00:00:00 2001 From: chitwitgit <100676229+chitwitgit@users.noreply.github.com> Date: Sun, 14 Jun 2026 20:47:45 +0800 Subject: [PATCH 5/7] Address review: sample.md examples and fetch KaTeX at codegen. Add concrete newly-supported LaTeX examples to sample.md so DOCX benefits are easy to spot. Replace vendored KaTeX snippets with codegen that fetches KaTeX v0.16.22 from GitHub (symbols.js, macros.js, functions/op.js). --- lib/scripts/data/katex-macros.js | 1038 ---------------------------- lib/scripts/data/katex-op.js | 339 --------- lib/scripts/data/katex-symbols.js | 895 ------------------------ lib/scripts/generate-katex-data.ts | 262 +++---- lib/src/katexMeta.ts | 2 +- lib/src/katexSymbols.ts | 2 +- sample.md | 9 +- 7 files changed, 152 insertions(+), 2395 deletions(-) delete mode 100644 lib/scripts/data/katex-macros.js delete mode 100644 lib/scripts/data/katex-op.js delete mode 100644 lib/scripts/data/katex-symbols.js diff --git a/lib/scripts/data/katex-macros.js b/lib/scripts/data/katex-macros.js deleted file mode 100644 index 5564fbc..0000000 --- a/lib/scripts/data/katex-macros.js +++ /dev/null @@ -1,1038 +0,0 @@ -/** - * Vendored from KaTeX v0.16.22 (https://github.com/KaTeX/KaTeX). - * SPDX-License-Identifier: MIT - * Regenerate derived outputs: pnpm generate:katex - */ -// @flow -/** - * Predefined macros for KaTeX. - * This can be used to define some commands in terms of others. - */ - -// Export global macros object from defineMacro -import defineMacro, {_macros} from "./defineMacro"; -const macros = _macros; -export default macros; - -import fontMetricsData from "./fontMetricsData"; -import functions from "./functions"; -import symbols from "./symbols"; -import utils from "./utils"; -import {makeEm} from "./units"; -import ParseError from "./ParseError"; - - -////////////////////////////////////////////////////////////////////// -// macro tools - -defineMacro("\\noexpand", function(context) { - // The expansion is the token itself; but that token is interpreted - // as if its meaning were ‘\relax’ if it is a control sequence that - // would ordinarily be expanded by TeX’s expansion rules. - const t = context.popToken(); - if (context.isExpandable(t.text)) { - t.noexpand = true; - t.treatAsRelax = true; - } - return {tokens: [t], numArgs: 0}; -}); - -defineMacro("\\expandafter", function(context) { - // TeX first reads the token that comes immediately after \expandafter, - // without expanding it; let’s call this token t. Then TeX reads the - // token that comes after t (and possibly more tokens, if that token - // has an argument), replacing it by its expansion. Finally TeX puts - // t back in front of that expansion. - const t = context.popToken(); - context.expandOnce(true); // expand only an expandable token - return {tokens: [t], numArgs: 0}; -}); - -// LaTeX's \@firstoftwo{#1}{#2} expands to #1, skipping #2 -// TeX source: \long\def\@firstoftwo#1#2{#1} -defineMacro("\\@firstoftwo", function(context) { - const args = context.consumeArgs(2); - return {tokens: args[0], numArgs: 0}; -}); - -// LaTeX's \@secondoftwo{#1}{#2} expands to #2, skipping #1 -// TeX source: \long\def\@secondoftwo#1#2{#2} -defineMacro("\\@secondoftwo", function(context) { - const args = context.consumeArgs(2); - return {tokens: args[1], numArgs: 0}; -}); - -// LaTeX's \@ifnextchar{#1}{#2}{#3} looks ahead to the next (unexpanded) -// symbol that isn't a space, consuming any spaces but not consuming the -// first nonspace character. If that nonspace character matches #1, then -// the macro expands to #2; otherwise, it expands to #3. -defineMacro("\\@ifnextchar", function(context) { - const args = context.consumeArgs(3); // symbol, if, else - context.consumeSpaces(); - const nextToken = context.future(); - if (args[0].length === 1 && args[0][0].text === nextToken.text) { - return {tokens: args[1], numArgs: 0}; - } else { - return {tokens: args[2], numArgs: 0}; - } -}); - -// LaTeX's \@ifstar{#1}{#2} looks ahead to the next (unexpanded) symbol. -// If it is `*`, then it consumes the symbol, and the macro expands to #1; -// otherwise, the macro expands to #2 (without consuming the symbol). -// TeX source: \def\@ifstar#1{\@ifnextchar *{\@firstoftwo{#1}}} -defineMacro("\\@ifstar", "\\@ifnextchar *{\\@firstoftwo{#1}}"); - -// LaTeX's \TextOrMath{#1}{#2} expands to #1 in text mode, #2 in math mode -defineMacro("\\TextOrMath", function(context) { - const args = context.consumeArgs(2); - if (context.mode === 'text') { - return {tokens: args[0], numArgs: 0}; - } else { - return {tokens: args[1], numArgs: 0}; - } -}); - -// Lookup table for parsing numbers in base 8 through 16 -const digitToNumber = { - "0": 0, "1": 1, "2": 2, "3": 3, "4": 4, "5": 5, "6": 6, "7": 7, "8": 8, - "9": 9, "a": 10, "A": 10, "b": 11, "B": 11, "c": 12, "C": 12, - "d": 13, "D": 13, "e": 14, "E": 14, "f": 15, "F": 15, -}; - -// TeX \char makes a literal character (catcode 12) using the following forms: -// (see The TeXBook, p. 43) -// \char123 -- decimal -// \char'123 -- octal -// \char"123 -- hex -// \char`x -- character that can be written (i.e. isn't active) -// \char`\x -- character that cannot be written (e.g. %) -// These all refer to characters from the font, so we turn them into special -// calls to a function \@char dealt with in the Parser. -defineMacro("\\char", function(context) { - let token = context.popToken(); - let base; - let number = ''; - if (token.text === "'") { - base = 8; - token = context.popToken(); - } else if (token.text === '"') { - base = 16; - token = context.popToken(); - } else if (token.text === "`") { - token = context.popToken(); - if (token.text[0] === "\\") { - number = token.text.charCodeAt(1); - } else if (token.text === "EOF") { - throw new ParseError("\\char` missing argument"); - } else { - number = token.text.charCodeAt(0); - } - } else { - base = 10; - } - if (base) { - // Parse a number in the given base, starting with first `token`. - number = digitToNumber[token.text]; - if (number == null || number >= base) { - throw new ParseError(`Invalid base-${base} digit ${token.text}`); - } - let digit; - while ((digit = digitToNumber[context.future().text]) != null && - digit < base) { - number *= base; - number += digit; - context.popToken(); - } - } - return `\\@char{${number}}`; -}); - -// \newcommand{\macro}[args]{definition} -// \renewcommand{\macro}[args]{definition} -// TODO: Optional arguments: \newcommand{\macro}[args][default]{definition} -const newcommand = ( - context, existsOK: boolean, nonexistsOK: boolean, skipIfExists: boolean -) => { - let arg = context.consumeArg().tokens; - if (arg.length !== 1) { - throw new ParseError( - "\\newcommand's first argument must be a macro name"); - } - const name = arg[0].text; - - const exists = context.isDefined(name); - if (exists && !existsOK) { - throw new ParseError(`\\newcommand{${name}} attempting to redefine ` + - `${name}; use \\renewcommand`); - } - if (!exists && !nonexistsOK) { - throw new ParseError(`\\renewcommand{${name}} when command ${name} ` + - `does not yet exist; use \\newcommand`); - } - - let numArgs = 0; - arg = context.consumeArg().tokens; - if (arg.length === 1 && arg[0].text === "[") { - let argText = ''; - let token = context.expandNextToken(); - while (token.text !== "]" && token.text !== "EOF") { - // TODO: Should properly expand arg, e.g., ignore {}s - argText += token.text; - token = context.expandNextToken(); - } - if (!argText.match(/^\s*[0-9]+\s*$/)) { - throw new ParseError(`Invalid number of arguments: ${argText}`); - } - numArgs = parseInt(argText); - arg = context.consumeArg().tokens; - } - - if (!(exists && skipIfExists)) { - // Final arg is the expansion of the macro - context.macros.set(name, { - tokens: arg, - numArgs, - }); - } - return ''; -}; -defineMacro("\\newcommand", - (context) => newcommand(context, false, true, false)); -defineMacro("\\renewcommand", - (context) => newcommand(context, true, false, false)); -defineMacro("\\providecommand", - (context) => newcommand(context, true, true, true)); - -// terminal (console) tools -defineMacro("\\message", (context) => { - const arg = context.consumeArgs(1)[0]; - // eslint-disable-next-line no-console - console.log(arg.reverse().map(token => token.text).join("")); - return ''; -}); -defineMacro("\\errmessage", (context) => { - const arg = context.consumeArgs(1)[0]; - // eslint-disable-next-line no-console - console.error(arg.reverse().map(token => token.text).join("")); - return ''; -}); -defineMacro("\\show", (context) => { - const tok = context.popToken(); - const name = tok.text; - // eslint-disable-next-line no-console - console.log(tok, context.macros.get(name), functions[name], - symbols.math[name], symbols.text[name]); - return ''; -}); - -////////////////////////////////////////////////////////////////////// -// Grouping -// \let\bgroup={ \let\egroup=} -defineMacro("\\bgroup", "{"); -defineMacro("\\egroup", "}"); - -// Symbols from latex.ltx: -// \def~{\nobreakspace{}} -// \def\lq{`} -// \def\rq{'} -// \def \aa {\r a} -// \def \AA {\r A} -defineMacro("~", "\\nobreakspace"); -defineMacro("\\lq", "`"); -defineMacro("\\rq", "'"); -defineMacro("\\aa", "\\r a"); -defineMacro("\\AA", "\\r A"); - -// Copyright (C) and registered (R) symbols. Use raw symbol in MathML. -// \DeclareTextCommandDefault{\textcopyright}{\textcircled{c}} -// \DeclareTextCommandDefault{\textregistered}{\textcircled{% -// \check@mathfonts\fontsize\sf@size\z@\math@fontsfalse\selectfont R}} -// \DeclareRobustCommand{\copyright}{% -// \ifmmode{\nfss@text{\textcopyright}}\else\textcopyright\fi} -defineMacro("\\textcopyright", "\\html@mathml{\\textcircled{c}}{\\char`©}"); -defineMacro("\\copyright", - "\\TextOrMath{\\textcopyright}{\\text{\\textcopyright}}"); -defineMacro("\\textregistered", - "\\html@mathml{\\textcircled{\\scriptsize R}}{\\char`®}"); - -// Characters omitted from Unicode range 1D400–1D7FF -defineMacro("\u212C", "\\mathscr{B}"); // script -defineMacro("\u2130", "\\mathscr{E}"); -defineMacro("\u2131", "\\mathscr{F}"); -defineMacro("\u210B", "\\mathscr{H}"); -defineMacro("\u2110", "\\mathscr{I}"); -defineMacro("\u2112", "\\mathscr{L}"); -defineMacro("\u2133", "\\mathscr{M}"); -defineMacro("\u211B", "\\mathscr{R}"); -defineMacro("\u212D", "\\mathfrak{C}"); // Fraktur -defineMacro("\u210C", "\\mathfrak{H}"); -defineMacro("\u2128", "\\mathfrak{Z}"); - -// Define \Bbbk with a macro that works in both HTML and MathML. -defineMacro("\\Bbbk", "\\Bbb{k}"); - -// Unicode middle dot -// The KaTeX fonts do not contain U+00B7. Instead, \cdotp displays -// the dot at U+22C5 and gives it punct spacing. -defineMacro("\u00b7", "\\cdotp"); - -// \llap and \rlap render their contents in text mode -defineMacro("\\llap", "\\mathllap{\\textrm{#1}}"); -defineMacro("\\rlap", "\\mathrlap{\\textrm{#1}}"); -defineMacro("\\clap", "\\mathclap{\\textrm{#1}}"); - -// \mathstrut from the TeXbook, p 360 -defineMacro("\\mathstrut", "\\vphantom{(}"); - -// \underbar from TeXbook p 353 -defineMacro("\\underbar", "\\underline{\\text{#1}}"); - -// \not is defined by base/fontmath.ltx via -// \DeclareMathSymbol{\not}{\mathrel}{symbols}{"36} -// It's thus treated like a \mathrel, but defined by a symbol that has zero -// width but extends to the right. We use \rlap to get that spacing. -// For MathML we write U+0338 here. buildMathML.js will then do the overlay. -defineMacro("\\not", '\\html@mathml{\\mathrel{\\mathrlap\\@not}}{\\char"338}'); - -// Negated symbols from base/fontmath.ltx: -// \def\neq{\not=} \let\ne=\neq -// \DeclareRobustCommand -// \notin{\mathrel{\m@th\mathpalette\c@ncel\in}} -// \def\c@ncel#1#2{\m@th\ooalign{$\hfil#1\mkern1mu/\hfil$\crcr$#1#2$}} -defineMacro("\\neq", "\\html@mathml{\\mathrel{\\not=}}{\\mathrel{\\char`≠}}"); -defineMacro("\\ne", "\\neq"); -defineMacro("\u2260", "\\neq"); -defineMacro("\\notin", "\\html@mathml{\\mathrel{{\\in}\\mathllap{/\\mskip1mu}}}" - + "{\\mathrel{\\char`∉}}"); -defineMacro("\u2209", "\\notin"); - -// Unicode stacked relations -defineMacro("\u2258", "\\html@mathml{" + - "\\mathrel{=\\kern{-1em}\\raisebox{0.4em}{$\\scriptsize\\frown$}}" + - "}{\\mathrel{\\char`\u2258}}"); -defineMacro("\u2259", - "\\html@mathml{\\stackrel{\\tiny\\wedge}{=}}{\\mathrel{\\char`\u2258}}"); -defineMacro("\u225A", - "\\html@mathml{\\stackrel{\\tiny\\vee}{=}}{\\mathrel{\\char`\u225A}}"); -defineMacro("\u225B", - "\\html@mathml{\\stackrel{\\scriptsize\\star}{=}}" + - "{\\mathrel{\\char`\u225B}}"); -defineMacro("\u225D", - "\\html@mathml{\\stackrel{\\tiny\\mathrm{def}}{=}}" + - "{\\mathrel{\\char`\u225D}}"); -defineMacro("\u225E", - "\\html@mathml{\\stackrel{\\tiny\\mathrm{m}}{=}}" + - "{\\mathrel{\\char`\u225E}}"); -defineMacro("\u225F", - "\\html@mathml{\\stackrel{\\tiny?}{=}}{\\mathrel{\\char`\u225F}}"); - -// Misc Unicode -defineMacro("\u27C2", "\\perp"); -defineMacro("\u203C", "\\mathclose{!\\mkern-0.8mu!}"); -defineMacro("\u220C", "\\notni"); -defineMacro("\u231C", "\\ulcorner"); -defineMacro("\u231D", "\\urcorner"); -defineMacro("\u231E", "\\llcorner"); -defineMacro("\u231F", "\\lrcorner"); -defineMacro("\u00A9", "\\copyright"); -defineMacro("\u00AE", "\\textregistered"); -defineMacro("\uFE0F", "\\textregistered"); - -// The KaTeX fonts have corners at codepoints that don't match Unicode. -// For MathML purposes, use the Unicode code point. -defineMacro("\\ulcorner", "\\html@mathml{\\@ulcorner}{\\mathop{\\char\"231c}}"); -defineMacro("\\urcorner", "\\html@mathml{\\@urcorner}{\\mathop{\\char\"231d}}"); -defineMacro("\\llcorner", "\\html@mathml{\\@llcorner}{\\mathop{\\char\"231e}}"); -defineMacro("\\lrcorner", "\\html@mathml{\\@lrcorner}{\\mathop{\\char\"231f}}"); - -////////////////////////////////////////////////////////////////////// -// LaTeX_2ε - -// \vdots{\vbox{\baselineskip4\p@ \lineskiplimit\z@ -// \kern6\p@\hbox{.}\hbox{.}\hbox{.}}} -// We'll call \varvdots, which gets a glyph from symbols.js. -// The zero-width rule gets us an equivalent to the vertical 6pt kern. -defineMacro("\\vdots", "{\\varvdots\\rule{0pt}{15pt}}"); -defineMacro("\u22ee", "\\vdots"); - -////////////////////////////////////////////////////////////////////// -// amsmath.sty -// http://mirrors.concertpass.com/tex-archive/macros/latex/required/amsmath/amsmath.pdf - -// Italic Greek capital letters. AMS defines these with \DeclareMathSymbol, -// but they are equivalent to \mathit{\Letter}. -defineMacro("\\varGamma", "\\mathit{\\Gamma}"); -defineMacro("\\varDelta", "\\mathit{\\Delta}"); -defineMacro("\\varTheta", "\\mathit{\\Theta}"); -defineMacro("\\varLambda", "\\mathit{\\Lambda}"); -defineMacro("\\varXi", "\\mathit{\\Xi}"); -defineMacro("\\varPi", "\\mathit{\\Pi}"); -defineMacro("\\varSigma", "\\mathit{\\Sigma}"); -defineMacro("\\varUpsilon", "\\mathit{\\Upsilon}"); -defineMacro("\\varPhi", "\\mathit{\\Phi}"); -defineMacro("\\varPsi", "\\mathit{\\Psi}"); -defineMacro("\\varOmega", "\\mathit{\\Omega}"); - -//\newcommand{\substack}[1]{\subarray{c}#1\endsubarray} -defineMacro("\\substack", "\\begin{subarray}{c}#1\\end{subarray}"); - -// \renewcommand{\colon}{\nobreak\mskip2mu\mathpunct{}\nonscript -// \mkern-\thinmuskip{:}\mskip6muplus1mu\relax} -defineMacro("\\colon", "\\nobreak\\mskip2mu\\mathpunct{}" + - "\\mathchoice{\\mkern-3mu}{\\mkern-3mu}{}{}{:}\\mskip6mu\\relax"); - -// \newcommand{\boxed}[1]{\fbox{\m@th$\displaystyle#1$}} -defineMacro("\\boxed", "\\fbox{$\\displaystyle{#1}$}"); - -// \def\iff{\DOTSB\;\Longleftrightarrow\;} -// \def\implies{\DOTSB\;\Longrightarrow\;} -// \def\impliedby{\DOTSB\;\Longleftarrow\;} -defineMacro("\\iff", "\\DOTSB\\;\\Longleftrightarrow\\;"); -defineMacro("\\implies", "\\DOTSB\\;\\Longrightarrow\\;"); -defineMacro("\\impliedby", "\\DOTSB\\;\\Longleftarrow\\;"); - -// \def\dddot#1{{\mathop{#1}\limits^{\vbox to-1.4\ex@{\kern-\tw@\ex@ -// \hbox{\normalfont ...}\vss}}}} -// We use \overset which avoids the vertical shift of \mathop. -defineMacro("\\dddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ...}}{#1}}"); -defineMacro("\\ddddot", "{\\overset{\\raisebox{-0.1ex}{\\normalsize ....}}{#1}}"); - -// AMSMath's automatic \dots, based on \mdots@@ macro. -const dotsByToken = { - ',': '\\dotsc', - '\\not': '\\dotsb', - // \keybin@ checks for the following: - '+': '\\dotsb', - '=': '\\dotsb', - '<': '\\dotsb', - '>': '\\dotsb', - '-': '\\dotsb', - '*': '\\dotsb', - ':': '\\dotsb', - // Symbols whose definition starts with \DOTSB: - '\\DOTSB': '\\dotsb', - '\\coprod': '\\dotsb', - '\\bigvee': '\\dotsb', - '\\bigwedge': '\\dotsb', - '\\biguplus': '\\dotsb', - '\\bigcap': '\\dotsb', - '\\bigcup': '\\dotsb', - '\\prod': '\\dotsb', - '\\sum': '\\dotsb', - '\\bigotimes': '\\dotsb', - '\\bigoplus': '\\dotsb', - '\\bigodot': '\\dotsb', - '\\bigsqcup': '\\dotsb', - '\\And': '\\dotsb', - '\\longrightarrow': '\\dotsb', - '\\Longrightarrow': '\\dotsb', - '\\longleftarrow': '\\dotsb', - '\\Longleftarrow': '\\dotsb', - '\\longleftrightarrow': '\\dotsb', - '\\Longleftrightarrow': '\\dotsb', - '\\mapsto': '\\dotsb', - '\\longmapsto': '\\dotsb', - '\\hookrightarrow': '\\dotsb', - '\\doteq': '\\dotsb', - // Symbols whose definition starts with \mathbin: - '\\mathbin': '\\dotsb', - // Symbols whose definition starts with \mathrel: - '\\mathrel': '\\dotsb', - '\\relbar': '\\dotsb', - '\\Relbar': '\\dotsb', - '\\xrightarrow': '\\dotsb', - '\\xleftarrow': '\\dotsb', - // Symbols whose definition starts with \DOTSI: - '\\DOTSI': '\\dotsi', - '\\int': '\\dotsi', - '\\oint': '\\dotsi', - '\\iint': '\\dotsi', - '\\iiint': '\\dotsi', - '\\iiiint': '\\dotsi', - '\\idotsint': '\\dotsi', - // Symbols whose definition starts with \DOTSX: - '\\DOTSX': '\\dotsx', -}; - -defineMacro("\\dots", function(context) { - // TODO: If used in text mode, should expand to \textellipsis. - // However, in KaTeX, \textellipsis and \ldots behave the same - // (in text mode), and it's unlikely we'd see any of the math commands - // that affect the behavior of \dots when in text mode. So fine for now - // (until we support \ifmmode ... \else ... \fi). - let thedots = '\\dotso'; - const next = context.expandAfterFuture().text; - if (next in dotsByToken) { - thedots = dotsByToken[next]; - } else if (next.slice(0, 4) === '\\not') { - thedots = '\\dotsb'; - } else if (next in symbols.math) { - if (utils.contains(['bin', 'rel'], symbols.math[next].group)) { - thedots = '\\dotsb'; - } - } - return thedots; -}); - -const spaceAfterDots = { - // \rightdelim@ checks for the following: - ')': true, - ']': true, - '\\rbrack': true, - '\\}': true, - '\\rbrace': true, - '\\rangle': true, - '\\rceil': true, - '\\rfloor': true, - '\\rgroup': true, - '\\rmoustache': true, - '\\right': true, - '\\bigr': true, - '\\biggr': true, - '\\Bigr': true, - '\\Biggr': true, - // \extra@ also tests for the following: - '$': true, - // \extrap@ checks for the following: - ';': true, - '.': true, - ',': true, -}; - -defineMacro("\\dotso", function(context) { - const next = context.future().text; - if (next in spaceAfterDots) { - return "\\ldots\\,"; - } else { - return "\\ldots"; - } -}); - -defineMacro("\\dotsc", function(context) { - const next = context.future().text; - // \dotsc uses \extra@ but not \extrap@, instead specially checking for - // ';' and '.', but doesn't check for ','. - if (next in spaceAfterDots && next !== ',') { - return "\\ldots\\,"; - } else { - return "\\ldots"; - } -}); - -defineMacro("\\cdots", function(context) { - const next = context.future().text; - if (next in spaceAfterDots) { - return "\\@cdots\\,"; - } else { - return "\\@cdots"; - } -}); - -defineMacro("\\dotsb", "\\cdots"); -defineMacro("\\dotsm", "\\cdots"); -defineMacro("\\dotsi", "\\!\\cdots"); -// amsmath doesn't actually define \dotsx, but \dots followed by a macro -// starting with \DOTSX implies \dotso, and then \extra@ detects this case -// and forces the added `\,`. -defineMacro("\\dotsx", "\\ldots\\,"); - -// \let\DOTSI\relax -// \let\DOTSB\relax -// \let\DOTSX\relax -defineMacro("\\DOTSI", "\\relax"); -defineMacro("\\DOTSB", "\\relax"); -defineMacro("\\DOTSX", "\\relax"); - -// Spacing, based on amsmath.sty's override of LaTeX defaults -// \DeclareRobustCommand{\tmspace}[3]{% -// \ifmmode\mskip#1#2\else\kern#1#3\fi\relax} -defineMacro("\\tmspace", "\\TextOrMath{\\kern#1#3}{\\mskip#1#2}\\relax"); -// \renewcommand{\,}{\tmspace+\thinmuskip{.1667em}} -// TODO: math mode should use \thinmuskip -defineMacro("\\,", "\\tmspace+{3mu}{.1667em}"); -// \let\thinspace\, -defineMacro("\\thinspace", "\\,"); -// \def\>{\mskip\medmuskip} -// \renewcommand{\:}{\tmspace+\medmuskip{.2222em}} -// TODO: \> and math mode of \: should use \medmuskip = 4mu plus 2mu minus 4mu -defineMacro("\\>", "\\mskip{4mu}"); -defineMacro("\\:", "\\tmspace+{4mu}{.2222em}"); -// \let\medspace\: -defineMacro("\\medspace", "\\:"); -// \renewcommand{\;}{\tmspace+\thickmuskip{.2777em}} -// TODO: math mode should use \thickmuskip = 5mu plus 5mu -defineMacro("\\;", "\\tmspace+{5mu}{.2777em}"); -// \let\thickspace\; -defineMacro("\\thickspace", "\\;"); -// \renewcommand{\!}{\tmspace-\thinmuskip{.1667em}} -// TODO: math mode should use \thinmuskip -defineMacro("\\!", "\\tmspace-{3mu}{.1667em}"); -// \let\negthinspace\! -defineMacro("\\negthinspace", "\\!"); -// \newcommand{\negmedspace}{\tmspace-\medmuskip{.2222em}} -// TODO: math mode should use \medmuskip -defineMacro("\\negmedspace", "\\tmspace-{4mu}{.2222em}"); -// \newcommand{\negthickspace}{\tmspace-\thickmuskip{.2777em}} -// TODO: math mode should use \thickmuskip -defineMacro("\\negthickspace", "\\tmspace-{5mu}{.277em}"); -// \def\enspace{\kern.5em } -defineMacro("\\enspace", "\\kern.5em "); -// \def\enskip{\hskip.5em\relax} -defineMacro("\\enskip", "\\hskip.5em\\relax"); -// \def\quad{\hskip1em\relax} -defineMacro("\\quad", "\\hskip1em\\relax"); -// \def\qquad{\hskip2em\relax} -defineMacro("\\qquad", "\\hskip2em\\relax"); - -// \tag@in@display form of \tag -defineMacro("\\tag", "\\@ifstar\\tag@literal\\tag@paren"); -defineMacro("\\tag@paren", "\\tag@literal{({#1})}"); -defineMacro("\\tag@literal", (context) => { - if (context.macros.get("\\df@tag")) { - throw new ParseError("Multiple \\tag"); - } - return "\\gdef\\df@tag{\\text{#1}}"; -}); - -// \renewcommand{\bmod}{\nonscript\mskip-\medmuskip\mkern5mu\mathbin -// {\operator@font mod}\penalty900 -// \mkern5mu\nonscript\mskip-\medmuskip} -// \newcommand{\pod}[1]{\allowbreak -// \if@display\mkern18mu\else\mkern8mu\fi(#1)} -// \renewcommand{\pmod}[1]{\pod{{\operator@font mod}\mkern6mu#1}} -// \newcommand{\mod}[1]{\allowbreak\if@display\mkern18mu -// \else\mkern12mu\fi{\operator@font mod}\,\,#1} -// TODO: math mode should use \medmuskip = 4mu plus 2mu minus 4mu -defineMacro("\\bmod", - "\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}" + - "\\mathbin{\\rm mod}" + - "\\mathchoice{\\mskip1mu}{\\mskip1mu}{\\mskip5mu}{\\mskip5mu}"); -defineMacro("\\pod", "\\allowbreak" + - "\\mathchoice{\\mkern18mu}{\\mkern8mu}{\\mkern8mu}{\\mkern8mu}(#1)"); -defineMacro("\\pmod", "\\pod{{\\rm mod}\\mkern6mu#1}"); -defineMacro("\\mod", "\\allowbreak" + - "\\mathchoice{\\mkern18mu}{\\mkern12mu}{\\mkern12mu}{\\mkern12mu}" + - "{\\rm mod}\\,\\,#1"); - -////////////////////////////////////////////////////////////////////// -// LaTeX source2e - -// \expandafter\let\expandafter\@normalcr -// \csname\expandafter\@gobble\string\\ \endcsname -// \DeclareRobustCommand\newline{\@normalcr\relax} -defineMacro("\\newline", "\\\\\\relax"); - -// \def\TeX{T\kern-.1667em\lower.5ex\hbox{E}\kern-.125emX\@} -// TODO: Doesn't normally work in math mode because \@ fails. KaTeX doesn't -// support \@ yet, so that's omitted, and we add \text so that the result -// doesn't look funny in math mode. -defineMacro("\\TeX", "\\textrm{\\html@mathml{" + - "T\\kern-.1667em\\raisebox{-.5ex}{E}\\kern-.125emX" + - "}{TeX}}"); - -// \DeclareRobustCommand{\LaTeX}{L\kern-.36em% -// {\sbox\z@ T% -// \vbox to\ht\z@{\hbox{\check@mathfonts -// \fontsize\sf@size\z@ -// \math@fontsfalse\selectfont -// A}% -// \vss}% -// }% -// \kern-.15em% -// \TeX} -// This code aligns the top of the A with the T (from the perspective of TeX's -// boxes, though visually the A appears to extend above slightly). -// We compute the corresponding \raisebox when A is rendered in \normalsize -// \scriptstyle, which has a scale factor of 0.7 (see Options.js). -const latexRaiseA = makeEm(fontMetricsData['Main-Regular']["T".charCodeAt(0)][1] - - 0.7 * fontMetricsData['Main-Regular']["A".charCodeAt(0)][1]); -defineMacro("\\LaTeX", "\\textrm{\\html@mathml{" + - `L\\kern-.36em\\raisebox{${latexRaiseA}}{\\scriptstyle A}` + - "\\kern-.15em\\TeX}{LaTeX}}"); - -// New KaTeX logo based on tweaking LaTeX logo -defineMacro("\\KaTeX", "\\textrm{\\html@mathml{" + - `K\\kern-.17em\\raisebox{${latexRaiseA}}{\\scriptstyle A}` + - "\\kern-.15em\\TeX}{KaTeX}}"); - -// \DeclareRobustCommand\hspace{\@ifstar\@hspacer\@hspace} -// \def\@hspace#1{\hskip #1\relax} -// \def\@hspacer#1{\vrule \@width\z@\nobreak -// \hskip #1\hskip \z@skip} -defineMacro("\\hspace", "\\@ifstar\\@hspacer\\@hspace"); -defineMacro("\\@hspace", "\\hskip #1\\relax"); -defineMacro("\\@hspacer", "\\rule{0pt}{0pt}\\hskip #1\\relax"); - -////////////////////////////////////////////////////////////////////// -// mathtools.sty - -//\providecommand\ordinarycolon{:} -defineMacro("\\ordinarycolon", ":"); -//\def\vcentcolon{\mathrel{\mathop\ordinarycolon}} -//TODO(edemaine): Not yet centered. Fix via \raisebox or #726 -defineMacro("\\vcentcolon", "\\mathrel{\\mathop\\ordinarycolon}"); -// \providecommand*\dblcolon{\vcentcolon\mathrel{\mkern-.9mu}\vcentcolon} -defineMacro("\\dblcolon", "\\html@mathml{" + - "\\mathrel{\\vcentcolon\\mathrel{\\mkern-.9mu}\\vcentcolon}}" + - "{\\mathop{\\char\"2237}}"); -// \providecommand*\coloneqq{\vcentcolon\mathrel{\mkern-1.2mu}=} -defineMacro("\\coloneqq", "\\html@mathml{" + - "\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}=}}" + - "{\\mathop{\\char\"2254}}"); // ≔ -// \providecommand*\Coloneqq{\dblcolon\mathrel{\mkern-1.2mu}=} -defineMacro("\\Coloneqq", "\\html@mathml{" + - "\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}=}}" + - "{\\mathop{\\char\"2237\\char\"3d}}"); -// \providecommand*\coloneq{\vcentcolon\mathrel{\mkern-1.2mu}\mathrel{-}} -defineMacro("\\coloneq", "\\html@mathml{" + - "\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}" + - "{\\mathop{\\char\"3a\\char\"2212}}"); -// \providecommand*\Coloneq{\dblcolon\mathrel{\mkern-1.2mu}\mathrel{-}} -defineMacro("\\Coloneq", "\\html@mathml{" + - "\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\mathrel{-}}}" + - "{\\mathop{\\char\"2237\\char\"2212}}"); -// \providecommand*\eqqcolon{=\mathrel{\mkern-1.2mu}\vcentcolon} -defineMacro("\\eqqcolon", "\\html@mathml{" + - "\\mathrel{=\\mathrel{\\mkern-1.2mu}\\vcentcolon}}" + - "{\\mathop{\\char\"2255}}"); // ≕ -// \providecommand*\Eqqcolon{=\mathrel{\mkern-1.2mu}\dblcolon} -defineMacro("\\Eqqcolon", "\\html@mathml{" + - "\\mathrel{=\\mathrel{\\mkern-1.2mu}\\dblcolon}}" + - "{\\mathop{\\char\"3d\\char\"2237}}"); -// \providecommand*\eqcolon{\mathrel{-}\mathrel{\mkern-1.2mu}\vcentcolon} -defineMacro("\\eqcolon", "\\html@mathml{" + - "\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\vcentcolon}}" + - "{\\mathop{\\char\"2239}}"); -// \providecommand*\Eqcolon{\mathrel{-}\mathrel{\mkern-1.2mu}\dblcolon} -defineMacro("\\Eqcolon", "\\html@mathml{" + - "\\mathrel{\\mathrel{-}\\mathrel{\\mkern-1.2mu}\\dblcolon}}" + - "{\\mathop{\\char\"2212\\char\"2237}}"); -// \providecommand*\colonapprox{\vcentcolon\mathrel{\mkern-1.2mu}\approx} -defineMacro("\\colonapprox", "\\html@mathml{" + - "\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\approx}}" + - "{\\mathop{\\char\"3a\\char\"2248}}"); -// \providecommand*\Colonapprox{\dblcolon\mathrel{\mkern-1.2mu}\approx} -defineMacro("\\Colonapprox", "\\html@mathml{" + - "\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\approx}}" + - "{\\mathop{\\char\"2237\\char\"2248}}"); -// \providecommand*\colonsim{\vcentcolon\mathrel{\mkern-1.2mu}\sim} -defineMacro("\\colonsim", "\\html@mathml{" + - "\\mathrel{\\vcentcolon\\mathrel{\\mkern-1.2mu}\\sim}}" + - "{\\mathop{\\char\"3a\\char\"223c}}"); -// \providecommand*\Colonsim{\dblcolon\mathrel{\mkern-1.2mu}\sim} -defineMacro("\\Colonsim", "\\html@mathml{" + - "\\mathrel{\\dblcolon\\mathrel{\\mkern-1.2mu}\\sim}}" + - "{\\mathop{\\char\"2237\\char\"223c}}"); - -// Some Unicode characters are implemented with macros to mathtools functions. -defineMacro("\u2237", "\\dblcolon"); // :: -defineMacro("\u2239", "\\eqcolon"); // -: -defineMacro("\u2254", "\\coloneqq"); // := -defineMacro("\u2255", "\\eqqcolon"); // =: -defineMacro("\u2A74", "\\Coloneqq"); // ::= - -////////////////////////////////////////////////////////////////////// -// colonequals.sty - -// Alternate names for mathtools's macros: -defineMacro("\\ratio", "\\vcentcolon"); -defineMacro("\\coloncolon", "\\dblcolon"); -defineMacro("\\colonequals", "\\coloneqq"); -defineMacro("\\coloncolonequals", "\\Coloneqq"); -defineMacro("\\equalscolon", "\\eqqcolon"); -defineMacro("\\equalscoloncolon", "\\Eqqcolon"); -defineMacro("\\colonminus", "\\coloneq"); -defineMacro("\\coloncolonminus", "\\Coloneq"); -defineMacro("\\minuscolon", "\\eqcolon"); -defineMacro("\\minuscoloncolon", "\\Eqcolon"); -// \colonapprox name is same in mathtools and colonequals. -defineMacro("\\coloncolonapprox", "\\Colonapprox"); -// \colonsim name is same in mathtools and colonequals. -defineMacro("\\coloncolonsim", "\\Colonsim"); - -// Additional macros, implemented by analogy with mathtools definitions: -defineMacro("\\simcolon", - "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\vcentcolon}"); -defineMacro("\\simcoloncolon", - "\\mathrel{\\sim\\mathrel{\\mkern-1.2mu}\\dblcolon}"); -defineMacro("\\approxcolon", - "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\vcentcolon}"); -defineMacro("\\approxcoloncolon", - "\\mathrel{\\approx\\mathrel{\\mkern-1.2mu}\\dblcolon}"); - -// Present in newtxmath, pxfonts and txfonts -defineMacro("\\notni", "\\html@mathml{\\not\\ni}{\\mathrel{\\char`\u220C}}"); -defineMacro("\\limsup", "\\DOTSB\\operatorname*{lim\\,sup}"); -defineMacro("\\liminf", "\\DOTSB\\operatorname*{lim\\,inf}"); - -////////////////////////////////////////////////////////////////////// -// From amsopn.sty -defineMacro("\\injlim", "\\DOTSB\\operatorname*{inj\\,lim}"); -defineMacro("\\projlim", "\\DOTSB\\operatorname*{proj\\,lim}"); -defineMacro("\\varlimsup", "\\DOTSB\\operatorname*{\\overline{lim}}"); -defineMacro("\\varliminf", "\\DOTSB\\operatorname*{\\underline{lim}}"); -defineMacro("\\varinjlim", "\\DOTSB\\operatorname*{\\underrightarrow{lim}}"); -defineMacro("\\varprojlim", "\\DOTSB\\operatorname*{\\underleftarrow{lim}}"); - -////////////////////////////////////////////////////////////////////// -// MathML alternates for KaTeX glyphs in the Unicode private area -defineMacro("\\gvertneqq", "\\html@mathml{\\@gvertneqq}{\u2269}"); -defineMacro("\\lvertneqq", "\\html@mathml{\\@lvertneqq}{\u2268}"); -defineMacro("\\ngeqq", "\\html@mathml{\\@ngeqq}{\u2271}"); -defineMacro("\\ngeqslant", "\\html@mathml{\\@ngeqslant}{\u2271}"); -defineMacro("\\nleqq", "\\html@mathml{\\@nleqq}{\u2270}"); -defineMacro("\\nleqslant", "\\html@mathml{\\@nleqslant}{\u2270}"); -defineMacro("\\nshortmid", "\\html@mathml{\\@nshortmid}{∤}"); -defineMacro("\\nshortparallel", "\\html@mathml{\\@nshortparallel}{∦}"); -defineMacro("\\nsubseteqq", "\\html@mathml{\\@nsubseteqq}{\u2288}"); -defineMacro("\\nsupseteqq", "\\html@mathml{\\@nsupseteqq}{\u2289}"); -defineMacro("\\varsubsetneq", "\\html@mathml{\\@varsubsetneq}{⊊}"); -defineMacro("\\varsubsetneqq", "\\html@mathml{\\@varsubsetneqq}{⫋}"); -defineMacro("\\varsupsetneq", "\\html@mathml{\\@varsupsetneq}{⊋}"); -defineMacro("\\varsupsetneqq", "\\html@mathml{\\@varsupsetneqq}{⫌}"); -defineMacro("\\imath", "\\html@mathml{\\@imath}{\u0131}"); -defineMacro("\\jmath", "\\html@mathml{\\@jmath}{\u0237}"); - -////////////////////////////////////////////////////////////////////// -// stmaryrd and semantic - -// The stmaryrd and semantic packages render the next four items by calling a -// glyph. Those glyphs do not exist in the KaTeX fonts. Hence the macros. - -defineMacro("\\llbracket", "\\html@mathml{" + - "\\mathopen{[\\mkern-3.2mu[}}" + - "{\\mathopen{\\char`\u27e6}}"); -defineMacro("\\rrbracket", "\\html@mathml{" + - "\\mathclose{]\\mkern-3.2mu]}}" + - "{\\mathclose{\\char`\u27e7}}"); - -defineMacro("\u27e6", "\\llbracket"); // blackboard bold [ -defineMacro("\u27e7", "\\rrbracket"); // blackboard bold ] - -defineMacro("\\lBrace", "\\html@mathml{" + - "\\mathopen{\\{\\mkern-3.2mu[}}" + - "{\\mathopen{\\char`\u2983}}"); -defineMacro("\\rBrace", "\\html@mathml{" + - "\\mathclose{]\\mkern-3.2mu\\}}}" + - "{\\mathclose{\\char`\u2984}}"); - -defineMacro("\u2983", "\\lBrace"); // blackboard bold { -defineMacro("\u2984", "\\rBrace"); // blackboard bold } - -// TODO: Create variable sized versions of the last two items. I believe that -// will require new font glyphs. - -// The stmaryrd function `\minuso` provides a "Plimsoll" symbol that -// superimposes the characters \circ and \mathminus. Used in chemistry. -defineMacro("\\minuso", "\\mathbin{\\html@mathml{" + - "{\\mathrlap{\\mathchoice{\\kern{0.145em}}{\\kern{0.145em}}" + - "{\\kern{0.1015em}}{\\kern{0.0725em}}\\circ}{-}}}" + - "{\\char`⦵}}"); -defineMacro("⦵", "\\minuso"); - -////////////////////////////////////////////////////////////////////// -// texvc.sty - -// The texvc package contains macros available in mediawiki pages. -// We omit the functions deprecated at -// https://en.wikipedia.org/wiki/Help:Displaying_a_formula#Deprecated_syntax - -// We also omit texvc's \O, which conflicts with \text{\O} - -defineMacro("\\darr", "\\downarrow"); -defineMacro("\\dArr", "\\Downarrow"); -defineMacro("\\Darr", "\\Downarrow"); -defineMacro("\\lang", "\\langle"); -defineMacro("\\rang", "\\rangle"); -defineMacro("\\uarr", "\\uparrow"); -defineMacro("\\uArr", "\\Uparrow"); -defineMacro("\\Uarr", "\\Uparrow"); -defineMacro("\\N", "\\mathbb{N}"); -defineMacro("\\R", "\\mathbb{R}"); -defineMacro("\\Z", "\\mathbb{Z}"); -defineMacro("\\alef", "\\aleph"); -defineMacro("\\alefsym", "\\aleph"); -defineMacro("\\Alpha", "\\mathrm{A}"); -defineMacro("\\Beta", "\\mathrm{B}"); -defineMacro("\\bull", "\\bullet"); -defineMacro("\\Chi", "\\mathrm{X}"); -defineMacro("\\clubs", "\\clubsuit"); -defineMacro("\\cnums", "\\mathbb{C}"); -defineMacro("\\Complex", "\\mathbb{C}"); -defineMacro("\\Dagger", "\\ddagger"); -defineMacro("\\diamonds", "\\diamondsuit"); -defineMacro("\\empty", "\\emptyset"); -defineMacro("\\Epsilon", "\\mathrm{E}"); -defineMacro("\\Eta", "\\mathrm{H}"); -defineMacro("\\exist", "\\exists"); -defineMacro("\\harr", "\\leftrightarrow"); -defineMacro("\\hArr", "\\Leftrightarrow"); -defineMacro("\\Harr", "\\Leftrightarrow"); -defineMacro("\\hearts", "\\heartsuit"); -defineMacro("\\image", "\\Im"); -defineMacro("\\infin", "\\infty"); -defineMacro("\\Iota", "\\mathrm{I}"); -defineMacro("\\isin", "\\in"); -defineMacro("\\Kappa", "\\mathrm{K}"); -defineMacro("\\larr", "\\leftarrow"); -defineMacro("\\lArr", "\\Leftarrow"); -defineMacro("\\Larr", "\\Leftarrow"); -defineMacro("\\lrarr", "\\leftrightarrow"); -defineMacro("\\lrArr", "\\Leftrightarrow"); -defineMacro("\\Lrarr", "\\Leftrightarrow"); -defineMacro("\\Mu", "\\mathrm{M}"); -defineMacro("\\natnums", "\\mathbb{N}"); -defineMacro("\\Nu", "\\mathrm{N}"); -defineMacro("\\Omicron", "\\mathrm{O}"); -defineMacro("\\plusmn", "\\pm"); -defineMacro("\\rarr", "\\rightarrow"); -defineMacro("\\rArr", "\\Rightarrow"); -defineMacro("\\Rarr", "\\Rightarrow"); -defineMacro("\\real", "\\Re"); -defineMacro("\\reals", "\\mathbb{R}"); -defineMacro("\\Reals", "\\mathbb{R}"); -defineMacro("\\Rho", "\\mathrm{P}"); -defineMacro("\\sdot", "\\cdot"); -defineMacro("\\sect", "\\S"); -defineMacro("\\spades", "\\spadesuit"); -defineMacro("\\sub", "\\subset"); -defineMacro("\\sube", "\\subseteq"); -defineMacro("\\supe", "\\supseteq"); -defineMacro("\\Tau", "\\mathrm{T}"); -defineMacro("\\thetasym", "\\vartheta"); -// TODO: defineMacro("\\varcoppa", "\\\mbox{\\coppa}"); -defineMacro("\\weierp", "\\wp"); -defineMacro("\\Zeta", "\\mathrm{Z}"); - -////////////////////////////////////////////////////////////////////// -// statmath.sty -// https://ctan.math.illinois.edu/macros/latex/contrib/statmath/statmath.pdf - -defineMacro("\\argmin", "\\DOTSB\\operatorname*{arg\\,min}"); -defineMacro("\\argmax", "\\DOTSB\\operatorname*{arg\\,max}"); -defineMacro("\\plim", "\\DOTSB\\mathop{\\operatorname{plim}}\\limits"); - -////////////////////////////////////////////////////////////////////// -// braket.sty -// http://ctan.math.washington.edu/tex-archive/macros/latex/contrib/braket/braket.pdf - -defineMacro("\\bra", "\\mathinner{\\langle{#1}|}"); -defineMacro("\\ket", "\\mathinner{|{#1}\\rangle}"); -defineMacro("\\braket", "\\mathinner{\\langle{#1}\\rangle}"); -defineMacro("\\Bra", "\\left\\langle#1\\right|"); -defineMacro("\\Ket", "\\left|#1\\right\\rangle"); -const braketHelper = (one) => (context) => { - const left = context.consumeArg().tokens; - const middle = context.consumeArg().tokens; - const middleDouble = context.consumeArg().tokens; - const right = context.consumeArg().tokens; - const oldMiddle = context.macros.get("|"); - const oldMiddleDouble = context.macros.get("\\|"); - context.macros.beginGroup(); - const midMacro = (double) => (context) => { - if (one) { - // Only modify the first instance of | or \| - context.macros.set("|", oldMiddle); - if (middleDouble.length) { - context.macros.set("\\|", oldMiddleDouble); - } - } - let doubled = double; - if (!double && middleDouble.length) { - // Mimic \@ifnextchar - const nextToken = context.future(); - if (nextToken.text === "|") { - context.popToken(); - doubled = true; - } - } - return { - tokens: doubled ? middleDouble : middle, - numArgs: 0, - }; - }; - context.macros.set("|", midMacro(false)); - if (middleDouble.length) { - context.macros.set("\\|", midMacro(true)); - } - const arg = context.consumeArg().tokens; - const expanded = context.expandTokens([ - ...right, ...arg, ...left, // reversed - ]); - context.macros.endGroup(); - return { - tokens: expanded.reverse(), - numArgs: 0, - }; -}; -defineMacro("\\bra@ket", braketHelper(false)); -defineMacro("\\bra@set", braketHelper(true)); -defineMacro("\\Braket", "\\bra@ket{\\left\\langle}" + - "{\\,\\middle\\vert\\,}{\\,\\middle\\vert\\,}{\\right\\rangle}"); -defineMacro("\\Set", "\\bra@set{\\left\\{\\:}" + - "{\\;\\middle\\vert\\;}{\\;\\middle\\Vert\\;}{\\:\\right\\}}"); -defineMacro("\\set", "\\bra@set{\\{\\,}{\\mid}{}{\\,\\}}"); - // has no support for special || or \| - -////////////////////////////////////////////////////////////////////// -// actuarialangle.dtx -defineMacro("\\angln", "{\\angl n}"); - -// Custom Khan Academy colors, should be moved to an optional package -defineMacro("\\blue", "\\textcolor{##6495ed}{#1}"); -defineMacro("\\orange", "\\textcolor{##ffa500}{#1}"); -defineMacro("\\pink", "\\textcolor{##ff00af}{#1}"); -defineMacro("\\red", "\\textcolor{##df0030}{#1}"); -defineMacro("\\green", "\\textcolor{##28ae7b}{#1}"); -defineMacro("\\gray", "\\textcolor{gray}{#1}"); -defineMacro("\\purple", "\\textcolor{##9d38bd}{#1}"); -defineMacro("\\blueA", "\\textcolor{##ccfaff}{#1}"); -defineMacro("\\blueB", "\\textcolor{##80f6ff}{#1}"); -defineMacro("\\blueC", "\\textcolor{##63d9ea}{#1}"); -defineMacro("\\blueD", "\\textcolor{##11accd}{#1}"); -defineMacro("\\blueE", "\\textcolor{##0c7f99}{#1}"); -defineMacro("\\tealA", "\\textcolor{##94fff5}{#1}"); -defineMacro("\\tealB", "\\textcolor{##26edd5}{#1}"); -defineMacro("\\tealC", "\\textcolor{##01d1c1}{#1}"); -defineMacro("\\tealD", "\\textcolor{##01a995}{#1}"); -defineMacro("\\tealE", "\\textcolor{##208170}{#1}"); -defineMacro("\\greenA", "\\textcolor{##b6ffb0}{#1}"); -defineMacro("\\greenB", "\\textcolor{##8af281}{#1}"); -defineMacro("\\greenC", "\\textcolor{##74cf70}{#1}"); -defineMacro("\\greenD", "\\textcolor{##1fab54}{#1}"); -defineMacro("\\greenE", "\\textcolor{##0d923f}{#1}"); -defineMacro("\\goldA", "\\textcolor{##ffd0a9}{#1}"); -defineMacro("\\goldB", "\\textcolor{##ffbb71}{#1}"); -defineMacro("\\goldC", "\\textcolor{##ff9c39}{#1}"); -defineMacro("\\goldD", "\\textcolor{##e07d10}{#1}"); -defineMacro("\\goldE", "\\textcolor{##a75a05}{#1}"); -defineMacro("\\redA", "\\textcolor{##fca9a9}{#1}"); -defineMacro("\\redB", "\\textcolor{##ff8482}{#1}"); -defineMacro("\\redC", "\\textcolor{##f9685d}{#1}"); -defineMacro("\\redD", "\\textcolor{##e84d39}{#1}"); -defineMacro("\\redE", "\\textcolor{##bc2612}{#1}"); -defineMacro("\\maroonA", "\\textcolor{##ffbde0}{#1}"); -defineMacro("\\maroonB", "\\textcolor{##ff92c6}{#1}"); -defineMacro("\\maroonC", "\\textcolor{##ed5fa6}{#1}"); -defineMacro("\\maroonD", "\\textcolor{##ca337c}{#1}"); -defineMacro("\\maroonE", "\\textcolor{##9e034e}{#1}"); -defineMacro("\\purpleA", "\\textcolor{##ddd7ff}{#1}"); -defineMacro("\\purpleB", "\\textcolor{##c6b9fc}{#1}"); -defineMacro("\\purpleC", "\\textcolor{##aa87ff}{#1}"); -defineMacro("\\purpleD", "\\textcolor{##7854ab}{#1}"); -defineMacro("\\purpleE", "\\textcolor{##543b78}{#1}"); -defineMacro("\\mintA", "\\textcolor{##f5f9e8}{#1}"); -defineMacro("\\mintB", "\\textcolor{##edf2df}{#1}"); -defineMacro("\\mintC", "\\textcolor{##e0e5cc}{#1}"); -defineMacro("\\grayA", "\\textcolor{##f6f7f7}{#1}"); -defineMacro("\\grayB", "\\textcolor{##f0f1f2}{#1}"); -defineMacro("\\grayC", "\\textcolor{##e3e5e6}{#1}"); -defineMacro("\\grayD", "\\textcolor{##d6d8da}{#1}"); -defineMacro("\\grayE", "\\textcolor{##babec2}{#1}"); -defineMacro("\\grayF", "\\textcolor{##888d93}{#1}"); -defineMacro("\\grayG", "\\textcolor{##626569}{#1}"); -defineMacro("\\grayH", "\\textcolor{##3b3e40}{#1}"); -defineMacro("\\grayI", "\\textcolor{##21242c}{#1}"); -defineMacro("\\kaBlue", "\\textcolor{##314453}{#1}"); -defineMacro("\\kaGreen", "\\textcolor{##71B307}{#1}"); diff --git a/lib/scripts/data/katex-op.js b/lib/scripts/data/katex-op.js deleted file mode 100644 index 3fbfac6..0000000 --- a/lib/scripts/data/katex-op.js +++ /dev/null @@ -1,339 +0,0 @@ -/** - * Vendored from KaTeX v0.16.22 (https://github.com/KaTeX/KaTeX). - * SPDX-License-Identifier: MIT - * Regenerate derived outputs: pnpm generate:katex - */ -// @flow -// Limits, symbols -import defineFunction, {ordargument} from "../defineFunction"; -import buildCommon from "../buildCommon"; -import {SymbolNode} from "../domTree"; -import * as mathMLTree from "../mathMLTree"; -import utils from "../utils"; -import Style from "../Style"; -import {assembleSupSub} from "./utils/assembleSupSub"; -import {assertNodeType} from "../parseNode"; -import {makeEm} from "../units"; - -import * as html from "../buildHTML"; -import * as mml from "../buildMathML"; - -import type {HtmlBuilderSupSub, MathMLBuilder} from "../defineFunction"; -import type {ParseNode} from "../parseNode"; - -// Most operators have a large successor symbol, but these don't. -const noSuccessor = [ - "\\smallint", -]; - -// NOTE: Unlike most `htmlBuilder`s, this one handles not only "op", but also -// "supsub" since some of them (like \int) can affect super/subscripting. -export const htmlBuilder: HtmlBuilderSupSub<"op"> = (grp, options) => { - // Operators are handled in the TeXbook pg. 443-444, rule 13(a). - let supGroup; - let subGroup; - let hasLimits = false; - let group: ParseNode<"op">; - if (grp.type === "supsub") { - // If we have limits, supsub will pass us its group to handle. Pull - // out the superscript and subscript and set the group to the op in - // its base. - supGroup = grp.sup; - subGroup = grp.sub; - group = assertNodeType(grp.base, "op"); - hasLimits = true; - } else { - group = assertNodeType(grp, "op"); - } - - const style = options.style; - - let large = false; - if (style.size === Style.DISPLAY.size && - group.symbol && - !utils.contains(noSuccessor, group.name)) { - - // Most symbol operators get larger in displaystyle (rule 13) - large = true; - } - - let base; - if (group.symbol) { - // If this is a symbol, create the symbol. - const fontName = large ? "Size2-Regular" : "Size1-Regular"; - - let stash = ""; - if (group.name === "\\oiint" || group.name === "\\oiiint") { - // No font glyphs yet, so use a glyph w/o the oval. - // TODO: When font glyphs are available, delete this code. - stash = group.name.slice(1); - group.name = stash === "oiint" ? "\\iint" : "\\iiint"; - } - - base = buildCommon.makeSymbol( - group.name, fontName, "math", options, - ["mop", "op-symbol", large ? "large-op" : "small-op"]); - - if (stash.length > 0) { - // We're in \oiint or \oiiint. Overlay the oval. - // TODO: When font glyphs are available, delete this code. - const italic = base.italic; - const oval = buildCommon.staticSvg(stash + "Size" - + (large ? "2" : "1"), options); - base = buildCommon.makeVList({ - positionType: "individualShift", - children: [ - {type: "elem", elem: base, shift: 0}, - {type: "elem", elem: oval, shift: large ? 0.08 : 0}, - ], - }, options); - group.name = "\\" + stash; - base.classes.unshift("mop"); - // $FlowFixMe - base.italic = italic; - } - } else if (group.body) { - // If this is a list, compose that list. - const inner = html.buildExpression(group.body, options, true); - if (inner.length === 1 && inner[0] instanceof SymbolNode) { - base = inner[0]; - base.classes[0] = "mop"; // replace old mclass - } else { - base = buildCommon.makeSpan(["mop"], inner, options); - } - } else { - // Otherwise, this is a text operator. Build the text from the - // operator's name. - const output = []; - for (let i = 1; i < group.name.length; i++) { - output.push(buildCommon.mathsym(group.name[i], group.mode, options)); - } - base = buildCommon.makeSpan(["mop"], output, options); - } - - // If content of op is a single symbol, shift it vertically. - let baseShift = 0; - let slant = 0; - if ((base instanceof SymbolNode - || group.name === "\\oiint" || group.name === "\\oiiint") - && !group.suppressBaseShift) { - // We suppress the shift of the base of \overset and \underset. Otherwise, - // shift the symbol so its center lies on the axis (rule 13). It - // appears that our fonts have the centers of the symbols already - // almost on the axis, so these numbers are very small. Note we - // don't actually apply this here, but instead it is used either in - // the vlist creation or separately when there are no limits. - baseShift = (base.height - base.depth) / 2 - - options.fontMetrics().axisHeight; - - // The slant of the symbol is just its italic correction. - // $FlowFixMe - slant = base.italic; - } - - if (hasLimits) { - return assembleSupSub(base, supGroup, subGroup, options, - style, slant, baseShift); - - } else { - if (baseShift) { - base.style.position = "relative"; - base.style.top = makeEm(baseShift); - } - - return base; - } -}; - -const mathmlBuilder: MathMLBuilder<"op"> = (group, options) => { - let node; - - if (group.symbol) { - // This is a symbol. Just add the symbol. - node = new mathMLTree.MathNode( - "mo", [mml.makeText(group.name, group.mode)]); - if (utils.contains(noSuccessor, group.name)) { - node.setAttribute("largeop", "false"); - } - } else if (group.body) { - // This is an operator with children. Add them. - node = new mathMLTree.MathNode( - "mo", mml.buildExpression(group.body, options)); - } else { - // This is a text operator. Add all of the characters from the - // operator's name. - node = new mathMLTree.MathNode( - "mi", [new mathMLTree.TextNode(group.name.slice(1))]); - // Append an . - // ref: https://www.w3.org/TR/REC-MathML/chap3_2.html#sec3.2.4 - const operator = new mathMLTree.MathNode("mo", - [mml.makeText("\u2061", "text")]); - if (group.parentIsSupSub) { - node = new mathMLTree.MathNode("mrow", [node, operator]); - } else { - node = mathMLTree.newDocumentFragment([node, operator]); - } - } - - return node; -}; - -const singleCharBigOps: {[string]: string} = { - "\u220F": "\\prod", - "\u2210": "\\coprod", - "\u2211": "\\sum", - "\u22c0": "\\bigwedge", - "\u22c1": "\\bigvee", - "\u22c2": "\\bigcap", - "\u22c3": "\\bigcup", - "\u2a00": "\\bigodot", - "\u2a01": "\\bigoplus", - "\u2a02": "\\bigotimes", - "\u2a04": "\\biguplus", - "\u2a06": "\\bigsqcup", -}; - -defineFunction({ - type: "op", - names: [ - "\\coprod", "\\bigvee", "\\bigwedge", "\\biguplus", "\\bigcap", - "\\bigcup", "\\intop", "\\prod", "\\sum", "\\bigotimes", - "\\bigoplus", "\\bigodot", "\\bigsqcup", "\\smallint", "\u220F", - "\u2210", "\u2211", "\u22c0", "\u22c1", "\u22c2", "\u22c3", "\u2a00", - "\u2a01", "\u2a02", "\u2a04", "\u2a06", - ], - props: { - numArgs: 0, - }, - handler: ({parser, funcName}, args) => { - let fName = funcName; - if (fName.length === 1) { - fName = singleCharBigOps[fName]; - } - return { - type: "op", - mode: parser.mode, - limits: true, - parentIsSupSub: false, - symbol: true, - name: fName, - }; - }, - htmlBuilder, - mathmlBuilder, -}); - -// Note: calling defineFunction with a type that's already been defined only -// works because the same htmlBuilder and mathmlBuilder are being used. -defineFunction({ - type: "op", - names: ["\\mathop"], - props: { - numArgs: 1, - primitive: true, - }, - handler: ({parser}, args) => { - const body = args[0]; - return { - type: "op", - mode: parser.mode, - limits: false, - parentIsSupSub: false, - symbol: false, - body: ordargument(body), - }; - }, - htmlBuilder, - mathmlBuilder, -}); - -// There are 2 flags for operators; whether they produce limits in -// displaystyle, and whether they are symbols and should grow in -// displaystyle. These four groups cover the four possible choices. - -const singleCharIntegrals: {[string]: string} = { - "\u222b": "\\int", - "\u222c": "\\iint", - "\u222d": "\\iiint", - "\u222e": "\\oint", - "\u222f": "\\oiint", - "\u2230": "\\oiiint", -}; - -// No limits, not symbols -defineFunction({ - type: "op", - names: [ - "\\arcsin", "\\arccos", "\\arctan", "\\arctg", "\\arcctg", - "\\arg", "\\ch", "\\cos", "\\cosec", "\\cosh", "\\cot", "\\cotg", - "\\coth", "\\csc", "\\ctg", "\\cth", "\\deg", "\\dim", "\\exp", - "\\hom", "\\ker", "\\lg", "\\ln", "\\log", "\\sec", "\\sin", - "\\sinh", "\\sh", "\\tan", "\\tanh", "\\tg", "\\th", - ], - props: { - numArgs: 0, - }, - handler({parser, funcName}) { - return { - type: "op", - mode: parser.mode, - limits: false, - parentIsSupSub: false, - symbol: false, - name: funcName, - }; - }, - htmlBuilder, - mathmlBuilder, -}); - -// Limits, not symbols -defineFunction({ - type: "op", - names: [ - "\\det", "\\gcd", "\\inf", "\\lim", "\\max", "\\min", "\\Pr", "\\sup", - ], - props: { - numArgs: 0, - }, - handler({parser, funcName}) { - return { - type: "op", - mode: parser.mode, - limits: true, - parentIsSupSub: false, - symbol: false, - name: funcName, - }; - }, - htmlBuilder, - mathmlBuilder, -}); - -// No limits, symbols -defineFunction({ - type: "op", - names: [ - "\\int", "\\iint", "\\iiint", "\\oint", "\\oiint", "\\oiiint", - "\u222b", "\u222c", "\u222d", "\u222e", "\u222f", "\u2230", - ], - props: { - numArgs: 0, - }, - handler({parser, funcName}) { - let fName = funcName; - if (fName.length === 1) { - fName = singleCharIntegrals[fName]; - } - return { - type: "op", - mode: parser.mode, - limits: false, - parentIsSupSub: false, - symbol: true, - name: fName, - }; - }, - htmlBuilder, - mathmlBuilder, -}); diff --git a/lib/scripts/data/katex-symbols.js b/lib/scripts/data/katex-symbols.js deleted file mode 100644 index 76d19e4..0000000 --- a/lib/scripts/data/katex-symbols.js +++ /dev/null @@ -1,895 +0,0 @@ -/** - * Vendored from KaTeX v0.16.22 (https://github.com/KaTeX/KaTeX). - * SPDX-License-Identifier: MIT - * Regenerate derived outputs: pnpm generate:katex - */ -// @flow -/** - * This file holds a list of all no-argument functions and single-character - * symbols (like 'a' or ';'). - * - * For each of the symbols, there are three properties they can have: - * - font (required): the font to be used for this symbol. Either "main" (the - normal font), or "ams" (the ams fonts). - * - group (required): the ParseNode group type the symbol should have (i.e. - "textord", "mathord", etc). - See https://github.com/KaTeX/KaTeX/wiki/Examining-TeX#group-types - * - replace: the character that this symbol or function should be - * replaced with (i.e. "\phi" has a replace value of "\u03d5", the phi - * character in the main font). - * - * The outermost map in the table indicates what mode the symbols should be - * accepted in (e.g. "math" or "text"). - */ - -import type {Mode} from "./types"; - -type Font = "main" | "ams"; -// Some of these have a "-token" suffix since these are also used as `ParseNode` -// types for raw text tokens, and we want to avoid conflicts with higher-level -// `ParseNode` types. These `ParseNode`s are constructed within `Parser` by -// looking up the `symbols` map. -export const ATOMS = { - "bin": 1, - "close": 1, - "inner": 1, - "open": 1, - "punct": 1, - "rel": 1, -}; -export const NON_ATOMS = { - "accent-token": 1, - "mathord": 1, - "op-token": 1, - "spacing": 1, - "textord": 1, -}; - -export type Atom = $Keys; -export type NonAtom = $Keys -export type Group = Atom | NonAtom; -type CharInfoMap = {[string]: {font: Font, group: Group, replace: ?string}}; - -const symbols: {[Mode]: CharInfoMap} = { - "math": {}, - "text": {}, -}; -export default symbols; - -/** `acceptUnicodeChar = true` is only applicable if `replace` is set. */ -export function defineSymbol( - mode: Mode, - font: Font, - group: Group, - replace: ?string, - name: string, - acceptUnicodeChar?: boolean, -) { - symbols[mode][name] = {font, group, replace}; - - if (acceptUnicodeChar && replace) { - symbols[mode][replace] = symbols[mode][name]; - } -} - -// Some abbreviations for commonly used strings. -// This helps minify the code, and also spotting typos using jshint. - -// modes: -const math = "math"; -const text = "text"; - -// fonts: -const main = "main"; -const ams = "ams"; - -// groups: -const accent = "accent-token"; -const bin = "bin"; -const close = "close"; -const inner = "inner"; -const mathord = "mathord"; -const op = "op-token"; -const open = "open"; -const punct = "punct"; -const rel = "rel"; -const spacing = "spacing"; -const textord = "textord"; - -// Now comes the symbol table - -// Relation Symbols -defineSymbol(math, main, rel, "\u2261", "\\equiv", true); -defineSymbol(math, main, rel, "\u227a", "\\prec", true); -defineSymbol(math, main, rel, "\u227b", "\\succ", true); -defineSymbol(math, main, rel, "\u223c", "\\sim", true); -defineSymbol(math, main, rel, "\u22a5", "\\perp"); -defineSymbol(math, main, rel, "\u2aaf", "\\preceq", true); -defineSymbol(math, main, rel, "\u2ab0", "\\succeq", true); -defineSymbol(math, main, rel, "\u2243", "\\simeq", true); -defineSymbol(math, main, rel, "\u2223", "\\mid", true); -defineSymbol(math, main, rel, "\u226a", "\\ll", true); -defineSymbol(math, main, rel, "\u226b", "\\gg", true); -defineSymbol(math, main, rel, "\u224d", "\\asymp", true); -defineSymbol(math, main, rel, "\u2225", "\\parallel"); -defineSymbol(math, main, rel, "\u22c8", "\\bowtie", true); -defineSymbol(math, main, rel, "\u2323", "\\smile", true); -defineSymbol(math, main, rel, "\u2291", "\\sqsubseteq", true); -defineSymbol(math, main, rel, "\u2292", "\\sqsupseteq", true); -defineSymbol(math, main, rel, "\u2250", "\\doteq", true); -defineSymbol(math, main, rel, "\u2322", "\\frown", true); -defineSymbol(math, main, rel, "\u220b", "\\ni", true); -defineSymbol(math, main, rel, "\u221d", "\\propto", true); -defineSymbol(math, main, rel, "\u22a2", "\\vdash", true); -defineSymbol(math, main, rel, "\u22a3", "\\dashv", true); -defineSymbol(math, main, rel, "\u220b", "\\owns"); - -// Punctuation -defineSymbol(math, main, punct, "\u002e", "\\ldotp"); -defineSymbol(math, main, punct, "\u22c5", "\\cdotp"); - -// Misc Symbols -defineSymbol(math, main, textord, "\u0023", "\\#"); -defineSymbol(text, main, textord, "\u0023", "\\#"); -defineSymbol(math, main, textord, "\u0026", "\\&"); -defineSymbol(text, main, textord, "\u0026", "\\&"); -defineSymbol(math, main, textord, "\u2135", "\\aleph", true); -defineSymbol(math, main, textord, "\u2200", "\\forall", true); -defineSymbol(math, main, textord, "\u210f", "\\hbar", true); -defineSymbol(math, main, textord, "\u2203", "\\exists", true); -defineSymbol(math, main, textord, "\u2207", "\\nabla", true); -defineSymbol(math, main, textord, "\u266d", "\\flat", true); -defineSymbol(math, main, textord, "\u2113", "\\ell", true); -defineSymbol(math, main, textord, "\u266e", "\\natural", true); -defineSymbol(math, main, textord, "\u2663", "\\clubsuit", true); -defineSymbol(math, main, textord, "\u2118", "\\wp", true); -defineSymbol(math, main, textord, "\u266f", "\\sharp", true); -defineSymbol(math, main, textord, "\u2662", "\\diamondsuit", true); -defineSymbol(math, main, textord, "\u211c", "\\Re", true); -defineSymbol(math, main, textord, "\u2661", "\\heartsuit", true); -defineSymbol(math, main, textord, "\u2111", "\\Im", true); -defineSymbol(math, main, textord, "\u2660", "\\spadesuit", true); -defineSymbol(math, main, textord, "\u00a7", "\\S", true); -defineSymbol(text, main, textord, "\u00a7", "\\S"); -defineSymbol(math, main, textord, "\u00b6", "\\P", true); -defineSymbol(text, main, textord, "\u00b6", "\\P"); - -// Math and Text -defineSymbol(math, main, textord, "\u2020", "\\dag"); -defineSymbol(text, main, textord, "\u2020", "\\dag"); -defineSymbol(text, main, textord, "\u2020", "\\textdagger"); -defineSymbol(math, main, textord, "\u2021", "\\ddag"); -defineSymbol(text, main, textord, "\u2021", "\\ddag"); -defineSymbol(text, main, textord, "\u2021", "\\textdaggerdbl"); - -// Large Delimiters -defineSymbol(math, main, close, "\u23b1", "\\rmoustache", true); -defineSymbol(math, main, open, "\u23b0", "\\lmoustache", true); -defineSymbol(math, main, close, "\u27ef", "\\rgroup", true); -defineSymbol(math, main, open, "\u27ee", "\\lgroup", true); - -// Binary Operators -defineSymbol(math, main, bin, "\u2213", "\\mp", true); -defineSymbol(math, main, bin, "\u2296", "\\ominus", true); -defineSymbol(math, main, bin, "\u228e", "\\uplus", true); -defineSymbol(math, main, bin, "\u2293", "\\sqcap", true); -defineSymbol(math, main, bin, "\u2217", "\\ast"); -defineSymbol(math, main, bin, "\u2294", "\\sqcup", true); -defineSymbol(math, main, bin, "\u25ef", "\\bigcirc", true); -defineSymbol(math, main, bin, "\u2219", "\\bullet", true); -defineSymbol(math, main, bin, "\u2021", "\\ddagger"); -defineSymbol(math, main, bin, "\u2240", "\\wr", true); -defineSymbol(math, main, bin, "\u2a3f", "\\amalg"); -defineSymbol(math, main, bin, "\u0026", "\\And"); // from amsmath - -// Arrow Symbols -defineSymbol(math, main, rel, "\u27f5", "\\longleftarrow", true); -defineSymbol(math, main, rel, "\u21d0", "\\Leftarrow", true); -defineSymbol(math, main, rel, "\u27f8", "\\Longleftarrow", true); -defineSymbol(math, main, rel, "\u27f6", "\\longrightarrow", true); -defineSymbol(math, main, rel, "\u21d2", "\\Rightarrow", true); -defineSymbol(math, main, rel, "\u27f9", "\\Longrightarrow", true); -defineSymbol(math, main, rel, "\u2194", "\\leftrightarrow", true); -defineSymbol(math, main, rel, "\u27f7", "\\longleftrightarrow", true); -defineSymbol(math, main, rel, "\u21d4", "\\Leftrightarrow", true); -defineSymbol(math, main, rel, "\u27fa", "\\Longleftrightarrow", true); -defineSymbol(math, main, rel, "\u21a6", "\\mapsto", true); -defineSymbol(math, main, rel, "\u27fc", "\\longmapsto", true); -defineSymbol(math, main, rel, "\u2197", "\\nearrow", true); -defineSymbol(math, main, rel, "\u21a9", "\\hookleftarrow", true); -defineSymbol(math, main, rel, "\u21aa", "\\hookrightarrow", true); -defineSymbol(math, main, rel, "\u2198", "\\searrow", true); -defineSymbol(math, main, rel, "\u21bc", "\\leftharpoonup", true); -defineSymbol(math, main, rel, "\u21c0", "\\rightharpoonup", true); -defineSymbol(math, main, rel, "\u2199", "\\swarrow", true); -defineSymbol(math, main, rel, "\u21bd", "\\leftharpoondown", true); -defineSymbol(math, main, rel, "\u21c1", "\\rightharpoondown", true); -defineSymbol(math, main, rel, "\u2196", "\\nwarrow", true); -defineSymbol(math, main, rel, "\u21cc", "\\rightleftharpoons", true); - -// AMS Negated Binary Relations -defineSymbol(math, ams, rel, "\u226e", "\\nless", true); -// Symbol names preceded by "@" each have a corresponding macro. -defineSymbol(math, ams, rel, "\ue010", "\\@nleqslant"); -defineSymbol(math, ams, rel, "\ue011", "\\@nleqq"); -defineSymbol(math, ams, rel, "\u2a87", "\\lneq", true); -defineSymbol(math, ams, rel, "\u2268", "\\lneqq", true); -defineSymbol(math, ams, rel, "\ue00c", "\\@lvertneqq"); -defineSymbol(math, ams, rel, "\u22e6", "\\lnsim", true); -defineSymbol(math, ams, rel, "\u2a89", "\\lnapprox", true); -defineSymbol(math, ams, rel, "\u2280", "\\nprec", true); -// unicode-math maps \u22e0 to \npreccurlyeq. We'll use the AMS synonym. -defineSymbol(math, ams, rel, "\u22e0", "\\npreceq", true); -defineSymbol(math, ams, rel, "\u22e8", "\\precnsim", true); -defineSymbol(math, ams, rel, "\u2ab9", "\\precnapprox", true); -defineSymbol(math, ams, rel, "\u2241", "\\nsim", true); -defineSymbol(math, ams, rel, "\ue006", "\\@nshortmid"); -defineSymbol(math, ams, rel, "\u2224", "\\nmid", true); -defineSymbol(math, ams, rel, "\u22ac", "\\nvdash", true); -defineSymbol(math, ams, rel, "\u22ad", "\\nvDash", true); -defineSymbol(math, ams, rel, "\u22ea", "\\ntriangleleft"); -defineSymbol(math, ams, rel, "\u22ec", "\\ntrianglelefteq", true); -defineSymbol(math, ams, rel, "\u228a", "\\subsetneq", true); -defineSymbol(math, ams, rel, "\ue01a", "\\@varsubsetneq"); -defineSymbol(math, ams, rel, "\u2acb", "\\subsetneqq", true); -defineSymbol(math, ams, rel, "\ue017", "\\@varsubsetneqq"); -defineSymbol(math, ams, rel, "\u226f", "\\ngtr", true); -defineSymbol(math, ams, rel, "\ue00f", "\\@ngeqslant"); -defineSymbol(math, ams, rel, "\ue00e", "\\@ngeqq"); -defineSymbol(math, ams, rel, "\u2a88", "\\gneq", true); -defineSymbol(math, ams, rel, "\u2269", "\\gneqq", true); -defineSymbol(math, ams, rel, "\ue00d", "\\@gvertneqq"); -defineSymbol(math, ams, rel, "\u22e7", "\\gnsim", true); -defineSymbol(math, ams, rel, "\u2a8a", "\\gnapprox", true); -defineSymbol(math, ams, rel, "\u2281", "\\nsucc", true); -// unicode-math maps \u22e1 to \nsucccurlyeq. We'll use the AMS synonym. -defineSymbol(math, ams, rel, "\u22e1", "\\nsucceq", true); -defineSymbol(math, ams, rel, "\u22e9", "\\succnsim", true); -defineSymbol(math, ams, rel, "\u2aba", "\\succnapprox", true); -// unicode-math maps \u2246 to \simneqq. We'll use the AMS synonym. -defineSymbol(math, ams, rel, "\u2246", "\\ncong", true); -defineSymbol(math, ams, rel, "\ue007", "\\@nshortparallel"); -defineSymbol(math, ams, rel, "\u2226", "\\nparallel", true); -defineSymbol(math, ams, rel, "\u22af", "\\nVDash", true); -defineSymbol(math, ams, rel, "\u22eb", "\\ntriangleright"); -defineSymbol(math, ams, rel, "\u22ed", "\\ntrianglerighteq", true); -defineSymbol(math, ams, rel, "\ue018", "\\@nsupseteqq"); -defineSymbol(math, ams, rel, "\u228b", "\\supsetneq", true); -defineSymbol(math, ams, rel, "\ue01b", "\\@varsupsetneq"); -defineSymbol(math, ams, rel, "\u2acc", "\\supsetneqq", true); -defineSymbol(math, ams, rel, "\ue019", "\\@varsupsetneqq"); -defineSymbol(math, ams, rel, "\u22ae", "\\nVdash", true); -defineSymbol(math, ams, rel, "\u2ab5", "\\precneqq", true); -defineSymbol(math, ams, rel, "\u2ab6", "\\succneqq", true); -defineSymbol(math, ams, rel, "\ue016", "\\@nsubseteqq"); -defineSymbol(math, ams, bin, "\u22b4", "\\unlhd"); -defineSymbol(math, ams, bin, "\u22b5", "\\unrhd"); - -// AMS Negated Arrows -defineSymbol(math, ams, rel, "\u219a", "\\nleftarrow", true); -defineSymbol(math, ams, rel, "\u219b", "\\nrightarrow", true); -defineSymbol(math, ams, rel, "\u21cd", "\\nLeftarrow", true); -defineSymbol(math, ams, rel, "\u21cf", "\\nRightarrow", true); -defineSymbol(math, ams, rel, "\u21ae", "\\nleftrightarrow", true); -defineSymbol(math, ams, rel, "\u21ce", "\\nLeftrightarrow", true); - -// AMS Misc -defineSymbol(math, ams, rel, "\u25b3", "\\vartriangle"); -defineSymbol(math, ams, textord, "\u210f", "\\hslash"); -defineSymbol(math, ams, textord, "\u25bd", "\\triangledown"); -defineSymbol(math, ams, textord, "\u25ca", "\\lozenge"); -defineSymbol(math, ams, textord, "\u24c8", "\\circledS"); -defineSymbol(math, ams, textord, "\u00ae", "\\circledR"); -defineSymbol(text, ams, textord, "\u00ae", "\\circledR"); -defineSymbol(math, ams, textord, "\u2221", "\\measuredangle", true); -defineSymbol(math, ams, textord, "\u2204", "\\nexists"); -defineSymbol(math, ams, textord, "\u2127", "\\mho"); -defineSymbol(math, ams, textord, "\u2132", "\\Finv", true); -defineSymbol(math, ams, textord, "\u2141", "\\Game", true); -defineSymbol(math, ams, textord, "\u2035", "\\backprime"); -defineSymbol(math, ams, textord, "\u25b2", "\\blacktriangle"); -defineSymbol(math, ams, textord, "\u25bc", "\\blacktriangledown"); -defineSymbol(math, ams, textord, "\u25a0", "\\blacksquare"); -defineSymbol(math, ams, textord, "\u29eb", "\\blacklozenge"); -defineSymbol(math, ams, textord, "\u2605", "\\bigstar"); -defineSymbol(math, ams, textord, "\u2222", "\\sphericalangle", true); -defineSymbol(math, ams, textord, "\u2201", "\\complement", true); -// unicode-math maps U+F0 to \matheth. We map to AMS function \eth -defineSymbol(math, ams, textord, "\u00f0", "\\eth", true); -defineSymbol(text, main, textord, "\u00f0", "\u00f0"); -defineSymbol(math, ams, textord, "\u2571", "\\diagup"); -defineSymbol(math, ams, textord, "\u2572", "\\diagdown"); -defineSymbol(math, ams, textord, "\u25a1", "\\square"); -defineSymbol(math, ams, textord, "\u25a1", "\\Box"); -defineSymbol(math, ams, textord, "\u25ca", "\\Diamond"); -// unicode-math maps U+A5 to \mathyen. We map to AMS function \yen -defineSymbol(math, ams, textord, "\u00a5", "\\yen", true); -defineSymbol(text, ams, textord, "\u00a5", "\\yen", true); -defineSymbol(math, ams, textord, "\u2713", "\\checkmark", true); -defineSymbol(text, ams, textord, "\u2713", "\\checkmark"); - -// AMS Hebrew -defineSymbol(math, ams, textord, "\u2136", "\\beth", true); -defineSymbol(math, ams, textord, "\u2138", "\\daleth", true); -defineSymbol(math, ams, textord, "\u2137", "\\gimel", true); - -// AMS Greek -defineSymbol(math, ams, textord, "\u03dd", "\\digamma", true); -defineSymbol(math, ams, textord, "\u03f0", "\\varkappa"); - -// AMS Delimiters -defineSymbol(math, ams, open, "\u250c", "\\@ulcorner", true); -defineSymbol(math, ams, close, "\u2510", "\\@urcorner", true); -defineSymbol(math, ams, open, "\u2514", "\\@llcorner", true); -defineSymbol(math, ams, close, "\u2518", "\\@lrcorner", true); - -// AMS Binary Relations -defineSymbol(math, ams, rel, "\u2266", "\\leqq", true); -defineSymbol(math, ams, rel, "\u2a7d", "\\leqslant", true); -defineSymbol(math, ams, rel, "\u2a95", "\\eqslantless", true); -defineSymbol(math, ams, rel, "\u2272", "\\lesssim", true); -defineSymbol(math, ams, rel, "\u2a85", "\\lessapprox", true); -defineSymbol(math, ams, rel, "\u224a", "\\approxeq", true); -defineSymbol(math, ams, bin, "\u22d6", "\\lessdot"); -defineSymbol(math, ams, rel, "\u22d8", "\\lll", true); -defineSymbol(math, ams, rel, "\u2276", "\\lessgtr", true); -defineSymbol(math, ams, rel, "\u22da", "\\lesseqgtr", true); -defineSymbol(math, ams, rel, "\u2a8b", "\\lesseqqgtr", true); -defineSymbol(math, ams, rel, "\u2251", "\\doteqdot"); -defineSymbol(math, ams, rel, "\u2253", "\\risingdotseq", true); -defineSymbol(math, ams, rel, "\u2252", "\\fallingdotseq", true); -defineSymbol(math, ams, rel, "\u223d", "\\backsim", true); -defineSymbol(math, ams, rel, "\u22cd", "\\backsimeq", true); -defineSymbol(math, ams, rel, "\u2ac5", "\\subseteqq", true); -defineSymbol(math, ams, rel, "\u22d0", "\\Subset", true); -defineSymbol(math, ams, rel, "\u228f", "\\sqsubset", true); -defineSymbol(math, ams, rel, "\u227c", "\\preccurlyeq", true); -defineSymbol(math, ams, rel, "\u22de", "\\curlyeqprec", true); -defineSymbol(math, ams, rel, "\u227e", "\\precsim", true); -defineSymbol(math, ams, rel, "\u2ab7", "\\precapprox", true); -defineSymbol(math, ams, rel, "\u22b2", "\\vartriangleleft"); -defineSymbol(math, ams, rel, "\u22b4", "\\trianglelefteq"); -defineSymbol(math, ams, rel, "\u22a8", "\\vDash", true); -defineSymbol(math, ams, rel, "\u22aa", "\\Vvdash", true); -defineSymbol(math, ams, rel, "\u2323", "\\smallsmile"); -defineSymbol(math, ams, rel, "\u2322", "\\smallfrown"); -defineSymbol(math, ams, rel, "\u224f", "\\bumpeq", true); -defineSymbol(math, ams, rel, "\u224e", "\\Bumpeq", true); -defineSymbol(math, ams, rel, "\u2267", "\\geqq", true); -defineSymbol(math, ams, rel, "\u2a7e", "\\geqslant", true); -defineSymbol(math, ams, rel, "\u2a96", "\\eqslantgtr", true); -defineSymbol(math, ams, rel, "\u2273", "\\gtrsim", true); -defineSymbol(math, ams, rel, "\u2a86", "\\gtrapprox", true); -defineSymbol(math, ams, bin, "\u22d7", "\\gtrdot"); -defineSymbol(math, ams, rel, "\u22d9", "\\ggg", true); -defineSymbol(math, ams, rel, "\u2277", "\\gtrless", true); -defineSymbol(math, ams, rel, "\u22db", "\\gtreqless", true); -defineSymbol(math, ams, rel, "\u2a8c", "\\gtreqqless", true); -defineSymbol(math, ams, rel, "\u2256", "\\eqcirc", true); -defineSymbol(math, ams, rel, "\u2257", "\\circeq", true); -defineSymbol(math, ams, rel, "\u225c", "\\triangleq", true); -defineSymbol(math, ams, rel, "\u223c", "\\thicksim"); -defineSymbol(math, ams, rel, "\u2248", "\\thickapprox"); -defineSymbol(math, ams, rel, "\u2ac6", "\\supseteqq", true); -defineSymbol(math, ams, rel, "\u22d1", "\\Supset", true); -defineSymbol(math, ams, rel, "\u2290", "\\sqsupset", true); -defineSymbol(math, ams, rel, "\u227d", "\\succcurlyeq", true); -defineSymbol(math, ams, rel, "\u22df", "\\curlyeqsucc", true); -defineSymbol(math, ams, rel, "\u227f", "\\succsim", true); -defineSymbol(math, ams, rel, "\u2ab8", "\\succapprox", true); -defineSymbol(math, ams, rel, "\u22b3", "\\vartriangleright"); -defineSymbol(math, ams, rel, "\u22b5", "\\trianglerighteq"); -defineSymbol(math, ams, rel, "\u22a9", "\\Vdash", true); -defineSymbol(math, ams, rel, "\u2223", "\\shortmid"); -defineSymbol(math, ams, rel, "\u2225", "\\shortparallel"); -defineSymbol(math, ams, rel, "\u226c", "\\between", true); -defineSymbol(math, ams, rel, "\u22d4", "\\pitchfork", true); -defineSymbol(math, ams, rel, "\u221d", "\\varpropto"); -defineSymbol(math, ams, rel, "\u25c0", "\\blacktriangleleft"); -// unicode-math says that \therefore is a mathord atom. -// We kept the amssymb atom type, which is rel. -defineSymbol(math, ams, rel, "\u2234", "\\therefore", true); -defineSymbol(math, ams, rel, "\u220d", "\\backepsilon"); -defineSymbol(math, ams, rel, "\u25b6", "\\blacktriangleright"); -// unicode-math says that \because is a mathord atom. -// We kept the amssymb atom type, which is rel. -defineSymbol(math, ams, rel, "\u2235", "\\because", true); -defineSymbol(math, ams, rel, "\u22d8", "\\llless"); -defineSymbol(math, ams, rel, "\u22d9", "\\gggtr"); -defineSymbol(math, ams, bin, "\u22b2", "\\lhd"); -defineSymbol(math, ams, bin, "\u22b3", "\\rhd"); -defineSymbol(math, ams, rel, "\u2242", "\\eqsim", true); -defineSymbol(math, main, rel, "\u22c8", "\\Join"); -defineSymbol(math, ams, rel, "\u2251", "\\Doteq", true); - -// AMS Binary Operators -defineSymbol(math, ams, bin, "\u2214", "\\dotplus", true); -defineSymbol(math, ams, bin, "\u2216", "\\smallsetminus"); -defineSymbol(math, ams, bin, "\u22d2", "\\Cap", true); -defineSymbol(math, ams, bin, "\u22d3", "\\Cup", true); -defineSymbol(math, ams, bin, "\u2a5e", "\\doublebarwedge", true); -defineSymbol(math, ams, bin, "\u229f", "\\boxminus", true); -defineSymbol(math, ams, bin, "\u229e", "\\boxplus", true); -defineSymbol(math, ams, bin, "\u22c7", "\\divideontimes", true); -defineSymbol(math, ams, bin, "\u22c9", "\\ltimes", true); -defineSymbol(math, ams, bin, "\u22ca", "\\rtimes", true); -defineSymbol(math, ams, bin, "\u22cb", "\\leftthreetimes", true); -defineSymbol(math, ams, bin, "\u22cc", "\\rightthreetimes", true); -defineSymbol(math, ams, bin, "\u22cf", "\\curlywedge", true); -defineSymbol(math, ams, bin, "\u22ce", "\\curlyvee", true); -defineSymbol(math, ams, bin, "\u229d", "\\circleddash", true); -defineSymbol(math, ams, bin, "\u229b", "\\circledast", true); -defineSymbol(math, ams, bin, "\u22c5", "\\centerdot"); -defineSymbol(math, ams, bin, "\u22ba", "\\intercal", true); -defineSymbol(math, ams, bin, "\u22d2", "\\doublecap"); -defineSymbol(math, ams, bin, "\u22d3", "\\doublecup"); -defineSymbol(math, ams, bin, "\u22a0", "\\boxtimes", true); - -// AMS Arrows -// Note: unicode-math maps \u21e2 to their own function \rightdasharrow. -// We'll map it to AMS function \dashrightarrow. It produces the same atom. -defineSymbol(math, ams, rel, "\u21e2", "\\dashrightarrow", true); -// unicode-math maps \u21e0 to \leftdasharrow. We'll use the AMS synonym. -defineSymbol(math, ams, rel, "\u21e0", "\\dashleftarrow", true); -defineSymbol(math, ams, rel, "\u21c7", "\\leftleftarrows", true); -defineSymbol(math, ams, rel, "\u21c6", "\\leftrightarrows", true); -defineSymbol(math, ams, rel, "\u21da", "\\Lleftarrow", true); -defineSymbol(math, ams, rel, "\u219e", "\\twoheadleftarrow", true); -defineSymbol(math, ams, rel, "\u21a2", "\\leftarrowtail", true); -defineSymbol(math, ams, rel, "\u21ab", "\\looparrowleft", true); -defineSymbol(math, ams, rel, "\u21cb", "\\leftrightharpoons", true); -defineSymbol(math, ams, rel, "\u21b6", "\\curvearrowleft", true); -// unicode-math maps \u21ba to \acwopencirclearrow. We'll use the AMS synonym. -defineSymbol(math, ams, rel, "\u21ba", "\\circlearrowleft", true); -defineSymbol(math, ams, rel, "\u21b0", "\\Lsh", true); -defineSymbol(math, ams, rel, "\u21c8", "\\upuparrows", true); -defineSymbol(math, ams, rel, "\u21bf", "\\upharpoonleft", true); -defineSymbol(math, ams, rel, "\u21c3", "\\downharpoonleft", true); -defineSymbol(math, main, rel, "\u22b6", "\\origof", true); // not in font -defineSymbol(math, main, rel, "\u22b7", "\\imageof", true); // not in font -defineSymbol(math, ams, rel, "\u22b8", "\\multimap", true); -defineSymbol(math, ams, rel, "\u21ad", "\\leftrightsquigarrow", true); -defineSymbol(math, ams, rel, "\u21c9", "\\rightrightarrows", true); -defineSymbol(math, ams, rel, "\u21c4", "\\rightleftarrows", true); -defineSymbol(math, ams, rel, "\u21a0", "\\twoheadrightarrow", true); -defineSymbol(math, ams, rel, "\u21a3", "\\rightarrowtail", true); -defineSymbol(math, ams, rel, "\u21ac", "\\looparrowright", true); -defineSymbol(math, ams, rel, "\u21b7", "\\curvearrowright", true); -// unicode-math maps \u21bb to \cwopencirclearrow. We'll use the AMS synonym. -defineSymbol(math, ams, rel, "\u21bb", "\\circlearrowright", true); -defineSymbol(math, ams, rel, "\u21b1", "\\Rsh", true); -defineSymbol(math, ams, rel, "\u21ca", "\\downdownarrows", true); -defineSymbol(math, ams, rel, "\u21be", "\\upharpoonright", true); -defineSymbol(math, ams, rel, "\u21c2", "\\downharpoonright", true); -defineSymbol(math, ams, rel, "\u21dd", "\\rightsquigarrow", true); -defineSymbol(math, ams, rel, "\u21dd", "\\leadsto"); -defineSymbol(math, ams, rel, "\u21db", "\\Rrightarrow", true); -defineSymbol(math, ams, rel, "\u21be", "\\restriction"); - -defineSymbol(math, main, textord, "\u2018", "`"); -defineSymbol(math, main, textord, "$", "\\$"); -defineSymbol(text, main, textord, "$", "\\$"); -defineSymbol(text, main, textord, "$", "\\textdollar"); -defineSymbol(math, main, textord, "%", "\\%"); -defineSymbol(text, main, textord, "%", "\\%"); -defineSymbol(math, main, textord, "_", "\\_"); -defineSymbol(text, main, textord, "_", "\\_"); -defineSymbol(text, main, textord, "_", "\\textunderscore"); -defineSymbol(math, main, textord, "\u2220", "\\angle", true); -defineSymbol(math, main, textord, "\u221e", "\\infty", true); -defineSymbol(math, main, textord, "\u2032", "\\prime"); -defineSymbol(math, main, textord, "\u25b3", "\\triangle"); -defineSymbol(math, main, textord, "\u0393", "\\Gamma", true); -defineSymbol(math, main, textord, "\u0394", "\\Delta", true); -defineSymbol(math, main, textord, "\u0398", "\\Theta", true); -defineSymbol(math, main, textord, "\u039b", "\\Lambda", true); -defineSymbol(math, main, textord, "\u039e", "\\Xi", true); -defineSymbol(math, main, textord, "\u03a0", "\\Pi", true); -defineSymbol(math, main, textord, "\u03a3", "\\Sigma", true); -defineSymbol(math, main, textord, "\u03a5", "\\Upsilon", true); -defineSymbol(math, main, textord, "\u03a6", "\\Phi", true); -defineSymbol(math, main, textord, "\u03a8", "\\Psi", true); -defineSymbol(math, main, textord, "\u03a9", "\\Omega", true); -defineSymbol(math, main, textord, "A", "\u0391"); -defineSymbol(math, main, textord, "B", "\u0392"); -defineSymbol(math, main, textord, "E", "\u0395"); -defineSymbol(math, main, textord, "Z", "\u0396"); -defineSymbol(math, main, textord, "H", "\u0397"); -defineSymbol(math, main, textord, "I", "\u0399"); -defineSymbol(math, main, textord, "K", "\u039A"); -defineSymbol(math, main, textord, "M", "\u039C"); -defineSymbol(math, main, textord, "N", "\u039D"); -defineSymbol(math, main, textord, "O", "\u039F"); -defineSymbol(math, main, textord, "P", "\u03A1"); -defineSymbol(math, main, textord, "T", "\u03A4"); -defineSymbol(math, main, textord, "X", "\u03A7"); -defineSymbol(math, main, textord, "\u00ac", "\\neg", true); -defineSymbol(math, main, textord, "\u00ac", "\\lnot"); -defineSymbol(math, main, textord, "\u22a4", "\\top"); -defineSymbol(math, main, textord, "\u22a5", "\\bot"); -defineSymbol(math, main, textord, "\u2205", "\\emptyset"); -defineSymbol(math, ams, textord, "\u2205", "\\varnothing"); -defineSymbol(math, main, mathord, "\u03b1", "\\alpha", true); -defineSymbol(math, main, mathord, "\u03b2", "\\beta", true); -defineSymbol(math, main, mathord, "\u03b3", "\\gamma", true); -defineSymbol(math, main, mathord, "\u03b4", "\\delta", true); -defineSymbol(math, main, mathord, "\u03f5", "\\epsilon", true); -defineSymbol(math, main, mathord, "\u03b6", "\\zeta", true); -defineSymbol(math, main, mathord, "\u03b7", "\\eta", true); -defineSymbol(math, main, mathord, "\u03b8", "\\theta", true); -defineSymbol(math, main, mathord, "\u03b9", "\\iota", true); -defineSymbol(math, main, mathord, "\u03ba", "\\kappa", true); -defineSymbol(math, main, mathord, "\u03bb", "\\lambda", true); -defineSymbol(math, main, mathord, "\u03bc", "\\mu", true); -defineSymbol(math, main, mathord, "\u03bd", "\\nu", true); -defineSymbol(math, main, mathord, "\u03be", "\\xi", true); -defineSymbol(math, main, mathord, "\u03bf", "\\omicron", true); -defineSymbol(math, main, mathord, "\u03c0", "\\pi", true); -defineSymbol(math, main, mathord, "\u03c1", "\\rho", true); -defineSymbol(math, main, mathord, "\u03c3", "\\sigma", true); -defineSymbol(math, main, mathord, "\u03c4", "\\tau", true); -defineSymbol(math, main, mathord, "\u03c5", "\\upsilon", true); -defineSymbol(math, main, mathord, "\u03d5", "\\phi", true); -defineSymbol(math, main, mathord, "\u03c7", "\\chi", true); -defineSymbol(math, main, mathord, "\u03c8", "\\psi", true); -defineSymbol(math, main, mathord, "\u03c9", "\\omega", true); -defineSymbol(math, main, mathord, "\u03b5", "\\varepsilon", true); -defineSymbol(math, main, mathord, "\u03d1", "\\vartheta", true); -defineSymbol(math, main, mathord, "\u03d6", "\\varpi", true); -defineSymbol(math, main, mathord, "\u03f1", "\\varrho", true); -defineSymbol(math, main, mathord, "\u03c2", "\\varsigma", true); -defineSymbol(math, main, mathord, "\u03c6", "\\varphi", true); -defineSymbol(math, main, bin, "\u2217", "*", true); -defineSymbol(math, main, bin, "+", "+"); -defineSymbol(math, main, bin, "\u2212", "-", true); -defineSymbol(math, main, bin, "\u22c5", "\\cdot", true); -defineSymbol(math, main, bin, "\u2218", "\\circ", true); -defineSymbol(math, main, bin, "\u00f7", "\\div", true); -defineSymbol(math, main, bin, "\u00b1", "\\pm", true); -defineSymbol(math, main, bin, "\u00d7", "\\times", true); -defineSymbol(math, main, bin, "\u2229", "\\cap", true); -defineSymbol(math, main, bin, "\u222a", "\\cup", true); -defineSymbol(math, main, bin, "\u2216", "\\setminus", true); -defineSymbol(math, main, bin, "\u2227", "\\land"); -defineSymbol(math, main, bin, "\u2228", "\\lor"); -defineSymbol(math, main, bin, "\u2227", "\\wedge", true); -defineSymbol(math, main, bin, "\u2228", "\\vee", true); -defineSymbol(math, main, textord, "\u221a", "\\surd"); -defineSymbol(math, main, open, "\u27e8", "\\langle", true); -defineSymbol(math, main, open, "\u2223", "\\lvert"); -defineSymbol(math, main, open, "\u2225", "\\lVert"); -defineSymbol(math, main, close, "?", "?"); -defineSymbol(math, main, close, "!", "!"); -defineSymbol(math, main, close, "\u27e9", "\\rangle", true); -defineSymbol(math, main, close, "\u2223", "\\rvert"); -defineSymbol(math, main, close, "\u2225", "\\rVert"); -defineSymbol(math, main, rel, "=", "="); -defineSymbol(math, main, rel, ":", ":"); -defineSymbol(math, main, rel, "\u2248", "\\approx", true); -defineSymbol(math, main, rel, "\u2245", "\\cong", true); -defineSymbol(math, main, rel, "\u2265", "\\ge"); -defineSymbol(math, main, rel, "\u2265", "\\geq", true); -defineSymbol(math, main, rel, "\u2190", "\\gets"); -defineSymbol(math, main, rel, ">", "\\gt", true); -defineSymbol(math, main, rel, "\u2208", "\\in", true); -defineSymbol(math, main, rel, "\ue020", "\\@not"); -defineSymbol(math, main, rel, "\u2282", "\\subset", true); -defineSymbol(math, main, rel, "\u2283", "\\supset", true); -defineSymbol(math, main, rel, "\u2286", "\\subseteq", true); -defineSymbol(math, main, rel, "\u2287", "\\supseteq", true); -defineSymbol(math, ams, rel, "\u2288", "\\nsubseteq", true); -defineSymbol(math, ams, rel, "\u2289", "\\nsupseteq", true); -defineSymbol(math, main, rel, "\u22a8", "\\models"); -defineSymbol(math, main, rel, "\u2190", "\\leftarrow", true); -defineSymbol(math, main, rel, "\u2264", "\\le"); -defineSymbol(math, main, rel, "\u2264", "\\leq", true); -defineSymbol(math, main, rel, "<", "\\lt", true); -defineSymbol(math, main, rel, "\u2192", "\\rightarrow", true); -defineSymbol(math, main, rel, "\u2192", "\\to"); -defineSymbol(math, ams, rel, "\u2271", "\\ngeq", true); -defineSymbol(math, ams, rel, "\u2270", "\\nleq", true); -defineSymbol(math, main, spacing, "\u00a0", "\\ "); -defineSymbol(math, main, spacing, "\u00a0", "\\space"); -// Ref: LaTeX Source 2e: \DeclareRobustCommand{\nobreakspace}{% -defineSymbol(math, main, spacing, "\u00a0", "\\nobreakspace"); -defineSymbol(text, main, spacing, "\u00a0", "\\ "); -defineSymbol(text, main, spacing, "\u00a0", " "); -defineSymbol(text, main, spacing, "\u00a0", "\\space"); -defineSymbol(text, main, spacing, "\u00a0", "\\nobreakspace"); -defineSymbol(math, main, spacing, null, "\\nobreak"); -defineSymbol(math, main, spacing, null, "\\allowbreak"); -defineSymbol(math, main, punct, ",", ","); -defineSymbol(math, main, punct, ";", ";"); -defineSymbol(math, ams, bin, "\u22bc", "\\barwedge", true); -defineSymbol(math, ams, bin, "\u22bb", "\\veebar", true); -defineSymbol(math, main, bin, "\u2299", "\\odot", true); -defineSymbol(math, main, bin, "\u2295", "\\oplus", true); -defineSymbol(math, main, bin, "\u2297", "\\otimes", true); -defineSymbol(math, main, textord, "\u2202", "\\partial", true); -defineSymbol(math, main, bin, "\u2298", "\\oslash", true); -defineSymbol(math, ams, bin, "\u229a", "\\circledcirc", true); -defineSymbol(math, ams, bin, "\u22a1", "\\boxdot", true); -defineSymbol(math, main, bin, "\u25b3", "\\bigtriangleup"); -defineSymbol(math, main, bin, "\u25bd", "\\bigtriangledown"); -defineSymbol(math, main, bin, "\u2020", "\\dagger"); -defineSymbol(math, main, bin, "\u22c4", "\\diamond"); -defineSymbol(math, main, bin, "\u22c6", "\\star"); -defineSymbol(math, main, bin, "\u25c3", "\\triangleleft"); -defineSymbol(math, main, bin, "\u25b9", "\\triangleright"); -defineSymbol(math, main, open, "{", "\\{"); -defineSymbol(text, main, textord, "{", "\\{"); -defineSymbol(text, main, textord, "{", "\\textbraceleft"); -defineSymbol(math, main, close, "}", "\\}"); -defineSymbol(text, main, textord, "}", "\\}"); -defineSymbol(text, main, textord, "}", "\\textbraceright"); -defineSymbol(math, main, open, "{", "\\lbrace"); -defineSymbol(math, main, close, "}", "\\rbrace"); -defineSymbol(math, main, open, "[", "\\lbrack", true); -defineSymbol(text, main, textord, "[", "\\lbrack", true); -defineSymbol(math, main, close, "]", "\\rbrack", true); -defineSymbol(text, main, textord, "]", "\\rbrack", true); -defineSymbol(math, main, open, "(", "\\lparen", true); -defineSymbol(math, main, close, ")", "\\rparen", true); -defineSymbol(text, main, textord, "<", "\\textless", true); // in T1 fontenc -defineSymbol(text, main, textord, ">", "\\textgreater", true); // in T1 fontenc -defineSymbol(math, main, open, "\u230a", "\\lfloor", true); -defineSymbol(math, main, close, "\u230b", "\\rfloor", true); -defineSymbol(math, main, open, "\u2308", "\\lceil", true); -defineSymbol(math, main, close, "\u2309", "\\rceil", true); -defineSymbol(math, main, textord, "\\", "\\backslash"); -defineSymbol(math, main, textord, "\u2223", "|"); -defineSymbol(math, main, textord, "\u2223", "\\vert"); -defineSymbol(text, main, textord, "|", "\\textbar", true); // in T1 fontenc -defineSymbol(math, main, textord, "\u2225", "\\|"); -defineSymbol(math, main, textord, "\u2225", "\\Vert"); -defineSymbol(text, main, textord, "\u2225", "\\textbardbl"); -defineSymbol(text, main, textord, "~", "\\textasciitilde"); -defineSymbol(text, main, textord, "\\", "\\textbackslash"); -defineSymbol(text, main, textord, "^", "\\textasciicircum"); -defineSymbol(math, main, rel, "\u2191", "\\uparrow", true); -defineSymbol(math, main, rel, "\u21d1", "\\Uparrow", true); -defineSymbol(math, main, rel, "\u2193", "\\downarrow", true); -defineSymbol(math, main, rel, "\u21d3", "\\Downarrow", true); -defineSymbol(math, main, rel, "\u2195", "\\updownarrow", true); -defineSymbol(math, main, rel, "\u21d5", "\\Updownarrow", true); -defineSymbol(math, main, op, "\u2210", "\\coprod"); -defineSymbol(math, main, op, "\u22c1", "\\bigvee"); -defineSymbol(math, main, op, "\u22c0", "\\bigwedge"); -defineSymbol(math, main, op, "\u2a04", "\\biguplus"); -defineSymbol(math, main, op, "\u22c2", "\\bigcap"); -defineSymbol(math, main, op, "\u22c3", "\\bigcup"); -defineSymbol(math, main, op, "\u222b", "\\int"); -defineSymbol(math, main, op, "\u222b", "\\intop"); -defineSymbol(math, main, op, "\u222c", "\\iint"); -defineSymbol(math, main, op, "\u222d", "\\iiint"); -defineSymbol(math, main, op, "\u220f", "\\prod"); -defineSymbol(math, main, op, "\u2211", "\\sum"); -defineSymbol(math, main, op, "\u2a02", "\\bigotimes"); -defineSymbol(math, main, op, "\u2a01", "\\bigoplus"); -defineSymbol(math, main, op, "\u2a00", "\\bigodot"); -defineSymbol(math, main, op, "\u222e", "\\oint"); -defineSymbol(math, main, op, "\u222f", "\\oiint"); -defineSymbol(math, main, op, "\u2230", "\\oiiint"); -defineSymbol(math, main, op, "\u2a06", "\\bigsqcup"); -defineSymbol(math, main, op, "\u222b", "\\smallint"); -defineSymbol(text, main, inner, "\u2026", "\\textellipsis"); -defineSymbol(math, main, inner, "\u2026", "\\mathellipsis"); -defineSymbol(text, main, inner, "\u2026", "\\ldots", true); -defineSymbol(math, main, inner, "\u2026", "\\ldots", true); -defineSymbol(math, main, inner, "\u22ef", "\\@cdots", true); -defineSymbol(math, main, inner, "\u22f1", "\\ddots", true); -// \vdots is a macro that uses one of these two symbols (with made-up names): -defineSymbol(math, main, textord, "\u22ee", "\\varvdots"); -defineSymbol(text, main, textord, "\u22ee", "\\varvdots"); -defineSymbol(math, main, accent, "\u02ca", "\\acute"); -defineSymbol(math, main, accent, "\u02cb", "\\grave"); -defineSymbol(math, main, accent, "\u00a8", "\\ddot"); -defineSymbol(math, main, accent, "\u007e", "\\tilde"); -defineSymbol(math, main, accent, "\u02c9", "\\bar"); -defineSymbol(math, main, accent, "\u02d8", "\\breve"); -defineSymbol(math, main, accent, "\u02c7", "\\check"); -defineSymbol(math, main, accent, "\u005e", "\\hat"); -defineSymbol(math, main, accent, "\u20d7", "\\vec"); -defineSymbol(math, main, accent, "\u02d9", "\\dot"); -defineSymbol(math, main, accent, "\u02da", "\\mathring"); -// \imath and \jmath should be invariant to \mathrm, \mathbf, etc., so use PUA -defineSymbol(math, main, mathord, "\ue131", "\\@imath"); -defineSymbol(math, main, mathord, "\ue237", "\\@jmath"); -defineSymbol(math, main, textord, "\u0131", "\u0131"); -defineSymbol(math, main, textord, "\u0237", "\u0237"); -defineSymbol(text, main, textord, "\u0131", "\\i", true); -defineSymbol(text, main, textord, "\u0237", "\\j", true); -defineSymbol(text, main, textord, "\u00df", "\\ss", true); -defineSymbol(text, main, textord, "\u00e6", "\\ae", true); -defineSymbol(text, main, textord, "\u0153", "\\oe", true); -defineSymbol(text, main, textord, "\u00f8", "\\o", true); -defineSymbol(text, main, textord, "\u00c6", "\\AE", true); -defineSymbol(text, main, textord, "\u0152", "\\OE", true); -defineSymbol(text, main, textord, "\u00d8", "\\O", true); -defineSymbol(text, main, accent, "\u02ca", "\\'"); // acute -defineSymbol(text, main, accent, "\u02cb", "\\`"); // grave -defineSymbol(text, main, accent, "\u02c6", "\\^"); // circumflex -defineSymbol(text, main, accent, "\u02dc", "\\~"); // tilde -defineSymbol(text, main, accent, "\u02c9", "\\="); // macron -defineSymbol(text, main, accent, "\u02d8", "\\u"); // breve -defineSymbol(text, main, accent, "\u02d9", "\\."); // dot above -defineSymbol(text, main, accent, "\u00b8", "\\c"); // cedilla -defineSymbol(text, main, accent, "\u02da", "\\r"); // ring above -defineSymbol(text, main, accent, "\u02c7", "\\v"); // caron -defineSymbol(text, main, accent, "\u00a8", '\\"'); // diaeresis -defineSymbol(text, main, accent, "\u02dd", "\\H"); // double acute -defineSymbol(text, main, accent, "\u25ef", "\\textcircled"); // \bigcirc glyph - -// These ligatures are detected and created in Parser.js's `formLigatures`. -export const ligatures = { - "--": true, - "---": true, - "``": true, - "''": true, -}; - -defineSymbol(text, main, textord, "\u2013", "--", true); -defineSymbol(text, main, textord, "\u2013", "\\textendash"); -defineSymbol(text, main, textord, "\u2014", "---", true); -defineSymbol(text, main, textord, "\u2014", "\\textemdash"); -defineSymbol(text, main, textord, "\u2018", "`", true); -defineSymbol(text, main, textord, "\u2018", "\\textquoteleft"); -defineSymbol(text, main, textord, "\u2019", "'", true); -defineSymbol(text, main, textord, "\u2019", "\\textquoteright"); -defineSymbol(text, main, textord, "\u201c", "``", true); -defineSymbol(text, main, textord, "\u201c", "\\textquotedblleft"); -defineSymbol(text, main, textord, "\u201d", "''", true); -defineSymbol(text, main, textord, "\u201d", "\\textquotedblright"); -// \degree from gensymb package -defineSymbol(math, main, textord, "\u00b0", "\\degree", true); -defineSymbol(text, main, textord, "\u00b0", "\\degree"); -// \textdegree from inputenc package -defineSymbol(text, main, textord, "\u00b0", "\\textdegree", true); -// TODO: In LaTeX, \pounds can generate a different character in text and math -// mode, but among our fonts, only Main-Regular defines this character "163". -defineSymbol(math, main, textord, "\u00a3", "\\pounds"); -defineSymbol(math, main, textord, "\u00a3", "\\mathsterling", true); -defineSymbol(text, main, textord, "\u00a3", "\\pounds"); -defineSymbol(text, main, textord, "\u00a3", "\\textsterling", true); -defineSymbol(math, ams, textord, "\u2720", "\\maltese"); -defineSymbol(text, ams, textord, "\u2720", "\\maltese"); - -// There are lots of symbols which are the same, so we add them in afterwards. -// All of these are textords in math mode -const mathTextSymbols = "0123456789/@.\""; -for (let i = 0; i < mathTextSymbols.length; i++) { - const ch = mathTextSymbols.charAt(i); - defineSymbol(math, main, textord, ch, ch); -} - -// All of these are textords in text mode -const textSymbols = "0123456789!@*()-=+\";:?/.,"; -for (let i = 0; i < textSymbols.length; i++) { - const ch = textSymbols.charAt(i); - defineSymbol(text, main, textord, ch, ch); -} - -// All of these are textords in text mode, and mathords in math mode -const letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; -for (let i = 0; i < letters.length; i++) { - const ch = letters.charAt(i); - defineSymbol(math, main, mathord, ch, ch); - defineSymbol(text, main, textord, ch, ch); -} - -// Blackboard bold and script letters in Unicode range -defineSymbol(math, ams, textord, "C", "\u2102"); // blackboard bold -defineSymbol(text, ams, textord, "C", "\u2102"); -defineSymbol(math, ams, textord, "H", "\u210D"); -defineSymbol(text, ams, textord, "H", "\u210D"); -defineSymbol(math, ams, textord, "N", "\u2115"); -defineSymbol(text, ams, textord, "N", "\u2115"); -defineSymbol(math, ams, textord, "P", "\u2119"); -defineSymbol(text, ams, textord, "P", "\u2119"); -defineSymbol(math, ams, textord, "Q", "\u211A"); -defineSymbol(text, ams, textord, "Q", "\u211A"); -defineSymbol(math, ams, textord, "R", "\u211D"); -defineSymbol(text, ams, textord, "R", "\u211D"); -defineSymbol(math, ams, textord, "Z", "\u2124"); -defineSymbol(text, ams, textord, "Z", "\u2124"); -defineSymbol(math, main, mathord, "h", "\u210E"); // italic h, Planck constant -defineSymbol(text, main, mathord, "h", "\u210E"); - -// The next loop loads wide (surrogate pair) characters. -// We support some letters in the Unicode range U+1D400 to U+1D7FF, -// Mathematical Alphanumeric Symbols. -// Some editors do not deal well with wide characters. So don't write the -// string into this file. Instead, create the string from the surrogate pair. -let wideChar = ""; -for (let i = 0; i < letters.length; i++) { - const ch = letters.charAt(i); - - // The hex numbers in the next line are a surrogate pair. - // 0xD835 is the high surrogate for all letters in the range we support. - // 0xDC00 is the low surrogate for bold A. - wideChar = String.fromCharCode(0xD835, 0xDC00 + i); // A-Z a-z bold - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDC34 + i); // A-Z a-z italic - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDC68 + i); // A-Z a-z bold italic - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDD04 + i); // A-Z a-z Fraktur - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDD6C + i); // A-Z a-z bold Fraktur - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDDA0 + i); // A-Z a-z sans-serif - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDDD4 + i); // A-Z a-z sans bold - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDE08 + i); // A-Z a-z sans italic - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDE70 + i); // A-Z a-z monospace - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - if (i < 26) { - // KaTeX fonts have only capital letters for blackboard bold and script. - // See exception for k below. - wideChar = String.fromCharCode(0xD835, 0xDD38 + i); // A-Z double struck - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDC9C + i); // A-Z script - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - } - - // TODO: Add bold script when it is supported by a KaTeX font. -} -// "k" is the only double struck lower case letter in the KaTeX fonts. -wideChar = String.fromCharCode(0xD835, 0xDD5C); // k double struck -defineSymbol(math, main, mathord, "k", wideChar); -defineSymbol(text, main, textord, "k", wideChar); - -// Next, some wide character numerals -for (let i = 0; i < 10; i++) { - const ch = i.toString(); - - wideChar = String.fromCharCode(0xD835, 0xDFCE + i); // 0-9 bold - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDFE2 + i); // 0-9 sans serif - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDFEC + i); // 0-9 bold sans - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); - - wideChar = String.fromCharCode(0xD835, 0xDFF6 + i); // 0-9 monospace - defineSymbol(math, main, mathord, ch, wideChar); - defineSymbol(text, main, textord, ch, wideChar); -} - -// We add these Latin-1 letters as symbols for backwards-compatibility, -// but they are not actually in the font, nor are they supported by the -// Unicode accent mechanism, so they fall back to Times font and look ugly. -// TODO(edemaine): Fix this. -export const extraLatin = "\u00d0\u00de\u00fe"; -for (let i = 0; i < extraLatin.length; i++) { - const ch = extraLatin.charAt(i); - defineSymbol(math, main, mathord, ch, ch); - defineSymbol(text, main, textord, ch, ch); -} diff --git a/lib/scripts/generate-katex-data.ts b/lib/scripts/generate-katex-data.ts index a002e37..0a252c4 100644 --- a/lib/scripts/generate-katex-data.ts +++ b/lib/scripts/generate-katex-data.ts @@ -1,31 +1,36 @@ /** * Generates KaTeX-derived symbol data for @m2d/math. - * Source: KaTeX v0.16.22 src/symbols.js, src/macros.js, src/functions/op.js + * Fetches KaTeX v0.16.22 source at codegen time (MIT): + * https://github.com/KaTeX/KaTeX/tree/v0.16.22/src * * Run: pnpm generate:katex */ -import { readFileSync, writeFileSync } from "node:fs"; +import { writeFileSync } from "node:fs"; import { dirname, join } from "node:path"; import { fileURLToPath } from "node:url"; +const KATEX_VERSION = "0.16.22"; +const KATEX_BASE = `https://raw.githubusercontent.com/KaTeX/KaTeX/v${KATEX_VERSION}/src`; const REGENERATE_CMD = "pnpm generate:katex"; const SIMPLE_MACRO = /^\\([a-zA-Z@][a-zA-Z0-9@]*)$/; const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)); const ROOT = join(SCRIPT_DIR, ".."); -const symbolsSrc = readFileSync(join(SCRIPT_DIR, "data/katex-symbols.js"), "utf8"); -const macrosSrc = readFileSync(join(SCRIPT_DIR, "data/katex-macros.js"), "utf8"); -const opSrc = readFileSync(join(SCRIPT_DIR, "data/katex-op.js"), "utf8"); +const fetchKatexSource = async (path: string): Promise => { + const url = `${KATEX_BASE}/${path}`; + const response = await fetch(url); + if (!response.ok) { + throw new Error(`Failed to fetch ${url}: ${response.status} ${response.statusText}`); + } + return response.text(); +}; const symbolMap: Record = {}; -for (const m of symbolsSrc.matchAll(/defineSymbol\([^\n]+\)/g)) { - const strMatch = [...m[0].matchAll(/"((?:\\u[0-9a-fA-F]{4}|\\[^"]|[^"])+)"/g)]; - if (strMatch.length < 2) continue; - const unicode = JSON.parse(`"${strMatch[0][1]}"`) as string; - const cmd = strMatch[1][1].replace(/^\\+/, ""); - symbolMap[cmd] = unicode; -} +const aliasMap: Record = {}; +const accentMap: Record = {}; +const fnSet = new Set(); +const overrideMap: Record = {}; const decodeChar = (raw: string): string | undefined => { if (/^\\u[0-9a-fA-F]{4}$/.test(raw)) { @@ -34,121 +39,142 @@ const decodeChar = (raw: string): string | undefined => { return raw.length === 1 ? raw : undefined; }; -const resolveToUnicode = (name: string, seen = new Set()): string | undefined => { - if (seen.has(name)) return undefined; - seen.add(name); - if (symbolMap[name]) return symbolMap[name]; - const bodyMatch = macrosSrc.match( - new RegExp(`defineMacro\\("\\\\\\\\${name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}",\\s*"([^"]+)"\\)`), - ); - if (!bodyMatch) return undefined; - const body = bodyMatch[1]; - if (body.startsWith("\\mathrm{") && body.endsWith("}")) { - return body.slice(9, -1); - } - if (body.length === 1 && !body.startsWith("\\")) { - return body; - } - const charMatch = body.match(/\\char[`'"]((?:\\u[0-9a-fA-F]{4}|[^`'"]+))/); - if (charMatch) { - return decodeChar(charMatch[1]); - } - if (body.startsWith("\\") && !body.includes("{")) { - return resolveToUnicode(body.replace(/^\\+/, ""), seen); +const generate = async (): Promise => { + console.log(`Fetching KaTeX v${KATEX_VERSION} from ${KATEX_BASE}`); + + const [symbolsSrc, macrosSrc, opSrc] = await Promise.all([ + fetchKatexSource("symbols.js"), + fetchKatexSource("macros.js"), + fetchKatexSource("functions/op.js"), + ]); + + for (const m of symbolsSrc.matchAll(/defineSymbol\([^\n]+\)/g)) { + const strMatch = [...m[0].matchAll(/"((?:\\u[0-9a-fA-F]{4}|\\[^"]|[^"])+)"/g)]; + if (strMatch.length < 2) continue; + const unicode = JSON.parse(`"${strMatch[0][1]}"`) as string; + const cmd = strMatch[1][1].replace(/^\\+/, ""); + symbolMap[cmd] = unicode; } - return undefined; -}; -const aliasMap: Record = {}; -for (const m of macrosSrc.matchAll(/defineMacro\("\\\\([^"]+)",\s*"([^"]+)"\)/g)) { - const name = m[1]; - if (!/^[a-zA-Z@][a-zA-Z0-9@]*$/.test(name)) continue; - const resolved = resolveToUnicode(name); - if (resolved && [...resolved].length === 1) { - aliasMap[name] = resolved; + const resolveToUnicode = (name: string, seen = new Set()): string | undefined => { + if (seen.has(name)) return undefined; + seen.add(name); + if (symbolMap[name]) return symbolMap[name]; + const bodyMatch = macrosSrc.match( + new RegExp(`defineMacro\\("\\\\\\\\${name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}",\\s*"([^"]+)"\\)`), + ); + if (!bodyMatch) return undefined; + const body = bodyMatch[1]; + if (body.startsWith("\\mathrm{") && body.endsWith("}")) { + return body.slice(9, -1); + } + if (body.length === 1 && !body.startsWith("\\")) { + return body; + } + const charMatch = body.match(/\\char[`'"]((?:\\u[0-9a-fA-F]{4}|[^`'"]+))/); + if (charMatch) { + return decodeChar(charMatch[1]); + } + if (body.startsWith("\\") && !body.includes("{")) { + return resolveToUnicode(body.replace(/^\\+/, ""), seen); + } + return undefined; + }; + + for (const m of macrosSrc.matchAll(/defineMacro\("\\\\([^"]+)",\s*"([^"]+)"\)/g)) { + const name = m[1]; + if (!/^[a-zA-Z@][a-zA-Z0-9@]*$/.test(name)) continue; + const resolved = resolveToUnicode(name); + if (resolved && [...resolved].length === 1) { + aliasMap[name] = resolved; + } } -} -const accentMap: Record = {}; -for (const m of symbolsSrc.matchAll(/defineSymbol\([^\n]+\)/g)) { - if (!m[0].includes(", accent,")) continue; - const strMatch = [...m[0].matchAll(/"((?:\\u[0-9a-fA-F]{4}|\\[^"]|[^"])+)"/g)]; - if (strMatch.length < 2) continue; - const chr = JSON.parse(`"${strMatch[0][1]}"`) as string; - const cmd = strMatch[1][1].replace(/^\\+/, ""); - accentMap[cmd] = chr; -} + for (const m of symbolsSrc.matchAll(/defineSymbol\([^\n]+\)/g)) { + if (!m[0].includes(", accent,")) continue; + const strMatch = [...m[0].matchAll(/"((?:\\u[0-9a-fA-F]{4}|\\[^"]|[^"])+)"/g)]; + if (strMatch.length < 2) continue; + const chr = JSON.parse(`"${strMatch[0][1]}"`) as string; + const cmd = strMatch[1][1].replace(/^\\+/, ""); + accentMap[cmd] = chr; + } -const fnSet = new Set(); -let blockIdx = 0; -while ((blockIdx = opSrc.indexOf("defineFunction({", blockIdx)) !== -1) { - const blockEnd = opSrc.indexOf("});", blockIdx); - const block = opSrc.slice(blockIdx, blockEnd); - if (block.includes("symbol: false") && !block.includes("symbol: true")) { - const namesMatch = block.match(/names:\s*\[([\s\S]*?)\]/); - if (namesMatch) { - for (const nameMatch of namesMatch[1].matchAll(/"\\+([^"]+)"/g)) { - fnSet.add(nameMatch[1]); + let blockIdx = 0; + while ((blockIdx = opSrc.indexOf("defineFunction({", blockIdx)) !== -1) { + const blockEnd = opSrc.indexOf("});", blockIdx); + const block = opSrc.slice(blockIdx, blockEnd); + if (block.includes("symbol: false") && !block.includes("symbol: true")) { + const namesMatch = block.match(/names:\s*\[([\s\S]*?)\]/); + if (namesMatch) { + for (const nameMatch of namesMatch[1].matchAll(/"\\+([^"]+)"/g)) { + fnSet.add(nameMatch[1]); + } } } + blockIdx = blockEnd; + } + for (const m of macrosSrc.matchAll(/defineMacro\("\\\\(liminf|limsup)",/g)) { + fnSet.add(m[1]); } - blockIdx = blockEnd; -} -for (const m of macrosSrc.matchAll(/defineMacro\("\\\\(liminf|limsup)",/g)) { - fnSet.add(m[1]); -} -const overrideMap: Record = {}; -for (const m of macrosSrc.matchAll( - /defineMacro\("\\\\([^"]+)",\s*"\\html@mathml\{[^}]+\}\{[^}]*\\char[`'"]((?:\\u[0-9a-fA-F]{4}|[^`'"]+))/g, -)) { - const resolved = decodeChar(m[2]); - if (resolved && [...resolved].length === 1) { - overrideMap[m[1]] = resolved; + for (const m of macrosSrc.matchAll( + /defineMacro\("\\\\([^"]+)",\s*"\\html@mathml\{[^}]+\}\{[^}]*\\char[`'"]((?:\\u[0-9a-fA-F]{4}|[^`'"]+))/g, + )) { + const resolved = decodeChar(m[2]); + if (resolved && [...resolved].length === 1) { + overrideMap[m[1]] = resolved; + } } -} -for (const m of macrosSrc.matchAll(/defineMacro\("\\\\(q?quad)",\s*"\\\\hskip(\d+)em/g)) { - overrideMap[m[1]] = m[1] === "qquad" ? "\u2003\u2003" : "\u2003"; -} -for (const m of macrosSrc.matchAll(/defineMacro\("(\\u[0-9a-fA-F]{4})",\s*"\\\\([^"]+)"\)/g)) { - const unicode = JSON.parse(`"${m[1]}"`) as string; - const target = `\\${m[2]}`; - if (!SIMPLE_MACRO.test(target) || unicode === "\uFE0F") continue; - const cmd = m[2]; - const resolved = resolveToUnicode(cmd) ?? unicode; - if ([...resolved].length === 1) { - overrideMap[cmd] = resolved; + for (const m of macrosSrc.matchAll(/defineMacro\("\\\\(q?quad)",\s*"\\\\hskip(\d+)em/g)) { + overrideMap[m[1]] = m[1] === "qquad" ? "\u2003\u2003" : "\u2003"; } -} -for (const m of macrosSrc.matchAll(/defineMacro\("\\\\([^"]+)",\s*"([^"]+)"\)/g)) { - const name = m[1]; - if (!/^[a-zA-Z@][a-zA-Z0-9@]*$/.test(name)) continue; - if (symbolMap[name] || aliasMap[name] || overrideMap[name]) continue; - const resolved = resolveToUnicode(name); - if (resolved && [...resolved].length === 1) { - overrideMap[name] = resolved; + for (const m of macrosSrc.matchAll(/defineMacro\("(\\u[0-9a-fA-F]{4})",\s*"\\\\([^"]+)"\)/g)) { + const unicode = JSON.parse(`"${m[1]}"`) as string; + const target = `\\${m[2]}`; + if (!SIMPLE_MACRO.test(target) || unicode === "\uFE0F") continue; + const cmd = m[2]; + const resolved = resolveToUnicode(cmd) ?? unicode; + if ([...resolved].length === 1) { + overrideMap[cmd] = resolved; + } } -} - -if (overrideMap.neq) overrideMap.ne = overrideMap.neq; -if (symbolMap["@cdots"]) overrideMap.cdots = symbolMap["@cdots"]; - -const symbolLines = Object.entries(symbolMap) - .sort(([a], [b]) => a.localeCompare(b)) - .map(([k, v]) => ` ${JSON.stringify(k)}: ${JSON.stringify(v)},`) - .join("\n"); - -writeFileSync( - join(ROOT, "src/katexSymbols.ts"), - `/** KaTeX v0.16.22 \`defineSymbol\` entries — regenerate via \`${REGENERATE_CMD}\`. */\nexport const KATEX_SYMBOLS: Record = {\n${symbolLines}\n};\n`, -); - -writeFileSync( - join(ROOT, "src/katexMeta.ts"), - `/** KaTeX v0.16.22 derived metadata — regenerate via \`${REGENERATE_CMD}\`. */\nexport const KATEX_ALIASES: Record = ${JSON.stringify(aliasMap, null, 2)};\n\nexport const KATEX_ACCENTS: Record = ${JSON.stringify(accentMap, null, 2)};\n\nexport const KATEX_FUNCTIONS = new Set(${JSON.stringify([...fnSet].sort())});\n\n/** KaTeX macro-only symbols mapped to Unicode for Word OMML text runs. */\nexport const KATEX_SYMBOL_OVERRIDES: Record = ${JSON.stringify(overrideMap, null, 2)};\n`, -); - -console.log(`KATEX_SYMBOLS: ${Object.keys(symbolMap).length}`); -console.log(`KATEX_ALIASES: ${Object.keys(aliasMap).length}`); -console.log(`KATEX_FUNCTIONS: ${fnSet.size}`); -console.log(`KATEX_SYMBOL_OVERRIDES: ${Object.keys(overrideMap).length}`); + for (const m of macrosSrc.matchAll(/defineMacro\("\\\\([^"]+)",\s*"([^"]+)"\)/g)) { + const name = m[1]; + if (!/^[a-zA-Z@][a-zA-Z0-9@]*$/.test(name)) continue; + if (symbolMap[name] || aliasMap[name] || overrideMap[name]) continue; + const resolved = resolveToUnicode(name); + if (resolved && [...resolved].length === 1) { + overrideMap[name] = resolved; + } + } + + if (overrideMap.neq) overrideMap.ne = overrideMap.neq; + if (symbolMap["@cdots"]) overrideMap.cdots = symbolMap["@cdots"]; + + const symbolLines = Object.entries(symbolMap) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([k, v]) => ` ${JSON.stringify(k)}: ${JSON.stringify(v)},`) + .join("\n"); + + const sourceNote = `KaTeX v${KATEX_VERSION} — regenerate via \`${REGENERATE_CMD}\` (fetches from ${KATEX_BASE}).`; + + writeFileSync( + join(ROOT, "src/katexSymbols.ts"), + `/** ${sourceNote} */\nexport const KATEX_SYMBOLS: Record = {\n${symbolLines}\n};\n`, + ); + + writeFileSync( + join(ROOT, "src/katexMeta.ts"), + `/** ${sourceNote} */\nexport const KATEX_ALIASES: Record = ${JSON.stringify(aliasMap, null, 2)};\n\nexport const KATEX_ACCENTS: Record = ${JSON.stringify(accentMap, null, 2)};\n\nexport const KATEX_FUNCTIONS = new Set(${JSON.stringify([...fnSet].sort())});\n\n/** KaTeX macro-only symbols mapped to Unicode for Word OMML text runs. */\nexport const KATEX_SYMBOL_OVERRIDES: Record = ${JSON.stringify(overrideMap, null, 2)};\n`, + ); + + console.log(`KATEX_SYMBOLS: ${Object.keys(symbolMap).length}`); + console.log(`KATEX_ALIASES: ${Object.keys(aliasMap).length}`); + console.log(`KATEX_FUNCTIONS: ${fnSet.size}`); + console.log(`KATEX_SYMBOL_OVERRIDES: ${Object.keys(overrideMap).length}`); +}; + +generate().catch(error => { + console.error(error); + process.exit(1); +}); diff --git a/lib/src/katexMeta.ts b/lib/src/katexMeta.ts index a528a28..9a09e8b 100644 --- a/lib/src/katexMeta.ts +++ b/lib/src/katexMeta.ts @@ -1,4 +1,4 @@ -/** KaTeX v0.16.22 derived metadata — regenerate via `pnpm generate:katex`. */ +/** KaTeX v0.16.22 — regenerate via `pnpm generate:katex` (fetches from https://raw.githubusercontent.com/KaTeX/KaTeX/v0.16.22/src). */ export const KATEX_ALIASES: Record = { "bgroup": "{", "egroup": "}", diff --git a/lib/src/katexSymbols.ts b/lib/src/katexSymbols.ts index 8eaef9d..7f92426 100644 --- a/lib/src/katexSymbols.ts +++ b/lib/src/katexSymbols.ts @@ -1,4 +1,4 @@ -/** KaTeX v0.16.22 `defineSymbol` entries — regenerate via `pnpm generate:katex`. */ +/** KaTeX v0.16.22 — regenerate via `pnpm generate:katex` (fetches from https://raw.githubusercontent.com/KaTeX/KaTeX/v0.16.22/src). */ export const KATEX_SYMBOLS: Record = { " ": " ", "_": "_", diff --git a/sample.md b/sample.md index a3cea26..2fd828a 100644 --- a/sample.md +++ b/sample.md @@ -48,13 +48,16 @@ Here are some common mathematical symbols: - Fractions: $\frac{1}{2}$, $\frac{x+y}{z}$ - Square roots: $\sqrt{x}$, $\sqrt[3]{y}$ - Summations and products: $\sum_{i=1}^n i$, $\prod_{j=1}^m j$ -- Integrals: $\int_a^b f(x) dx$, $\oint_C \vec{F} \cdot d\vec{r}$ -- Limits: $\lim_{x \to \infty} \frac{1}{x}$ +- Integrals: $\int_a^b f(x) dx$, $\int_0^1 f(x)\,dx$, $\oint_C \vec{F} \cdot d\vec{r}$ +- Limits: $\lim_{x \to \infty} \frac{1}{x}$, $\lim_{n \to \infty} a_n$ - Vectors: $\vec{v}$, $\mathbf{v}$ +- Accents: $\tilde{x}$, $\bar{x}$, $\overline{AB}$, $\hat{x}$ - Matrices: $\begin{pmatrix} a & b \\ c & d \end{pmatrix}$ - Partial derivatives: $\frac{\partial f}{\partial x}$ - Infinity: $\infty$ -- Logical symbols: $\forall$, $\exists$, $\in$, $\notin$, $\subseteq$, $\supseteq$, $\land$, $\lor$, $\neg$ +- Logical symbols: $\forall$, $\exists$, $\in$, $\notin$, $\subseteq$, $\supseteq$, $\land$, $\lor$, $\neg$, $\wedge$, $\ne$, $\triangle ABC$, $\cdots$ +- Binomial coefficients: $\binom{n}{k}$ +- Stackrel: $\stackrel{\mathrm{def}}{=}$ - Trigonometric functions: $\sin(x)$, $\cos(y)$, $\tan(z)$ - Exponential and logarithmic functions: $e^x$, $\ln(y)$, $\log_{10}(z)$ From a2bab659391e1a4de47031b06faeec7c40f17e4c Mon Sep 17 00:00:00 2001 From: chitwitgit <100676229+chitwitgit@users.noreply.github.com> Date: Sun, 14 Jun 2026 21:12:44 +0800 Subject: [PATCH 6/7] Merge KaTeX symbol tables into a single lookup map. Flatten symbol, alias, and override entries into one generated katexData.ts object literal for a smaller minified bundle (~1.7% gzip savings vs three-table lookup). Add a benchmark script to compare serialization formats. --- lib/scripts/benchmark-bundle-formats.ts | 289 ++++++++++++++++++++++ lib/scripts/generate-katex-data.ts | 36 ++- lib/src/index.ts | 10 +- lib/src/{katexSymbols.ts => katexData.ts} | 146 +++++++++++ lib/src/katexMeta.ts | 156 ------------ 5 files changed, 462 insertions(+), 175 deletions(-) create mode 100644 lib/scripts/benchmark-bundle-formats.ts rename lib/src/{katexSymbols.ts => katexData.ts} (83%) delete mode 100644 lib/src/katexMeta.ts diff --git a/lib/scripts/benchmark-bundle-formats.ts b/lib/scripts/benchmark-bundle-formats.ts new file mode 100644 index 0000000..eb900a0 --- /dev/null +++ b/lib/scripts/benchmark-bundle-formats.ts @@ -0,0 +1,289 @@ +/** + * Benchmark KaTeX symbol table serialization formats. + * Run from lib/: pnpm exec node --experimental-strip-types scripts/benchmark-bundle-formats.ts + */ +import { execSync } from "node:child_process"; +import { readFileSync, rmSync, writeFileSync } from "node:fs"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; +import { gzipSync } from "node:zlib"; +import { + KATEX_ACCENTS, + KATEX_ALIASES, + KATEX_FUNCTIONS, + KATEX_SYMBOL_OVERRIDES, +} from "../src/katexMeta.ts"; +import { KATEX_SYMBOLS as BASE_SYMBOLS } from "../src/katexSymbols.ts"; + +const ROOT = join(dirname(fileURLToPath(import.meta.url)), ".."); +const SRC = join(ROOT, "src"); +const INDEX = join(SRC, "index.ts"); + +type Format = { + name: string; + note: string; + write: () => void; + patchIndex: (src: string) => string; +}; + +const mergedLookup: Record = { + ...KATEX_ALIASES, + ...BASE_SYMBOLS, + ...KATEX_SYMBOL_OVERRIDES, +}; + +const sortedEntries = Object.entries(mergedLookup).sort(([a], [b]) => + a.localeCompare(b), +); +const accentsJson = JSON.stringify(KATEX_ACCENTS); +const functionsJson = JSON.stringify([...KATEX_FUNCTIONS].sort()); + +const metaTail = [ + `export const KATEX_ACCENTS = ${accentsJson} as Record;`, + `export const KATEX_FUNCTIONS = new Set(${functionsJson});`, + "", +].join("\n"); + +const objectLiteralBody = sortedEntries + .map(([k, v]) => ` ${JSON.stringify(k)}: ${JSON.stringify(v)},`) + .join("\n"); + +const tupleBody = sortedEntries + .map(([k, v]) => ` [${JSON.stringify(k)}, ${JSON.stringify(v)}],`) + .join("\n"); + +const parallelKeys = sortedEntries.map(([k]) => JSON.stringify(k)).join(","); +const parallelValues = JSON.stringify(sortedEntries.map(([, v]) => v)); +const gzipB64 = gzipSync( + Buffer.from(JSON.stringify(mergedLookup), "utf8"), +).toString("base64"); + +const baselineIndex = readFileSync(INDEX, "utf8"); + +const cleanupGenerated = () => { + for (const f of ["katexData.ts"]) { + try { + rmSync(join(SRC, f)); + } catch { + /* absent */ + } + } +}; + +const mergedPatchIndex = (src: string): string => + src + .replace( + `import { KATEX_ACCENTS, KATEX_ALIASES, KATEX_FUNCTIONS, KATEX_SYMBOL_OVERRIDES } from "./katexMeta";\nimport { KATEX_SYMBOLS } from "./katexSymbols";`, + `import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOLS } from "./katexData";`, + ) + .replace( + `const resolveLatexSymbol = (name: string): string | undefined =>\n KATEX_SYMBOL_OVERRIDES[name] ?? KATEX_SYMBOLS[name] ?? KATEX_ALIASES[name];`, + `const resolveLatexSymbol = (name: string): string | undefined => KATEX_SYMBOLS[name];`, + ); + +const formats: Format[] = [ + { + name: "1-baseline-multi", + note: "PR #7: katexSymbols + katexMeta, 3-table lookup chain", + write: () => cleanupGenerated(), + patchIndex: (src) => src, + }, + { + name: "2-merged-literal", + note: "Single katexData.ts object literal + direct lookup", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: merged object literal */`, + `export const KATEX_SYMBOLS: Record = {`, + objectLiteralBody, + `};`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "3-json-parse", + note: "Single JSON.parse blob", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: JSON.parse blob */`, + `export const KATEX_SYMBOLS = JSON.parse(${JSON.stringify(JSON.stringify(mergedLookup))}) as Record;`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "4-tuple-fromEntries", + note: "Tuple array + Object.fromEntries at module init", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: tuple entries + Object.fromEntries */`, + `const ENTRIES: [string, string][] = [`, + tupleBody, + `];`, + `export const KATEX_SYMBOLS = Object.fromEntries(ENTRIES) as Record;`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "5-parallel-arrays", + note: "Parallel keys/values arrays + Object.fromEntries", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: parallel arrays */`, + `const KEYS = [${parallelKeys}] as const;`, + `const VALS = ${parallelValues} as const;`, + `export const KATEX_SYMBOLS = Object.fromEntries(KEYS.map((k, i) => [k, VALS[i]])) as Record;`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "6-gzip-base64-node", + note: "gzip+base64 blob, gunzipSync at module init (Node zlib)", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: gzip base64 (Node) */`, + `import { gunzipSync } from "node:zlib";`, + `const B64 = ${JSON.stringify(gzipB64)};`, + `export const KATEX_SYMBOLS = JSON.parse(`, + ` gunzipSync(Buffer.from(B64, "base64")).toString("utf8"),`, + `) as Record;`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "7-literal-oneline", + note: "Merged object literal on one line via JSON.stringify", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: one-line object literal */`, + `export const KATEX_SYMBOLS: Record = ${JSON.stringify(mergedLookup)};`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: mergedPatchIndex, + }, + { + name: "8-map-constructor", + note: "new Map(entries) then lookup via .get", + write: () => { + cleanupGenerated(); + writeFileSync( + join(SRC, "katexData.ts"), + [ + `/** benchmark: Map constructor */`, + `const ENTRIES: [string, string][] = [`, + tupleBody, + `];`, + `export const KATEX_SYMBOL_MAP = new Map(ENTRIES);`, + metaTail, + ].join("\n"), + ); + }, + patchIndex: (src) => + mergedPatchIndex(src) + .replace( + `import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOLS } from "./katexData";`, + `import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOL_MAP } from "./katexData";`, + ) + .replace(`KATEX_SYMBOLS[name]`, `KATEX_SYMBOL_MAP.get(name)`), + }, +]; + +const measure = () => { + const cjs = readFileSync(join(ROOT, "dist/index.js")); + const esm = readFileSync(join(ROOT, "dist/index.mjs")); + const gzCjs = execSync("gzip -c dist/index.js", { + cwd: ROOT, + encoding: "buffer", + }); + const dataSrc = ["katexData.ts", "katexSymbols.ts", "katexMeta.ts"] + .map((f) => join(SRC, f)) + .filter((f) => { + try { + readFileSync(f); + return true; + } catch { + return false; + } + }) + .reduce((sum, f) => sum + readFileSync(f).length, 0); + + return { cjs: cjs.length, esm: esm.length, gzCjs: gzCjs.length, dataSrc }; +}; + +console.log("KaTeX symbol format benchmark\n"); +console.log(`Merged lookup entries: ${sortedEntries.length}`); +console.log(`Raw JSON size: ${JSON.stringify(mergedLookup).length} B`); +console.log( + `gzip(JSON) alone: ${gzipSync(Buffer.from(JSON.stringify(mergedLookup))).length} B`, +); +console.log(`gzip+base64 payload: ${gzipB64.length} chars\n`); + +const results: Array< + { name: string; note: string } & ReturnType +> = []; + +for (const format of formats) { + format.write(); + writeFileSync(INDEX, format.patchIndex(baselineIndex)); + execSync("pnpm build", { cwd: ROOT, stdio: "pipe" }); + const stats = measure(); + results.push({ name: format.name, note: format.note, ...stats }); + console.log( + `✓ ${format.name}: gzip ${stats.gzCjs} B, CJS ${stats.cjs} B, data src ${stats.dataSrc} B`, + ); +} + +writeFileSync(INDEX, baselineIndex); +cleanupGenerated(); + +console.log("\n| Format | gzip CJS | CJS | ESM | data src | vs baseline |"); +console.log("|--------|----------|-----|-----|----------|-------------|"); +const baseGz = results[0].gzCjs; +for (const r of results) { + const delta = r.gzCjs - baseGz; + const pct = ((delta / baseGz) * 100).toFixed(1); + const deltaStr = + delta === 0 ? "—" : `${delta >= 0 ? "+" : ""}${delta} B (${pct}%)`; + console.log( + `| ${r.name} | ${r.gzCjs} | ${r.cjs} | ${r.esm} | ${r.dataSrc} | ${deltaStr} |`, + ); +} + +console.log("\nNotes:"); +for (const r of results) { + console.log(`- ${r.name}: ${r.note}`); +} diff --git a/lib/scripts/generate-katex-data.ts b/lib/scripts/generate-katex-data.ts index 196b469..840601d 100644 --- a/lib/scripts/generate-katex-data.ts +++ b/lib/scripts/generate-katex-data.ts @@ -178,27 +178,41 @@ const generate = async (): Promise => { if (overrideMap.neq) overrideMap.ne = overrideMap.neq; if (symbolMap["@cdots"]) overrideMap.cdots = symbolMap["@cdots"]; - const symbolLines = Object.entries(symbolMap) + const lookupMap: Record = { + ...aliasMap, + ...symbolMap, + ...overrideMap, + }; + + const lookupLines = Object.entries(lookupMap) .sort(([a], [b]) => a.localeCompare(b)) .map(([k, v]) => ` ${JSON.stringify(k)}: ${JSON.stringify(v)},`) .join("\n"); const sourceNote = `KaTeX v${KATEX_VERSION} — regenerate via \`${REGENERATE_CMD}\` (fetches from ${KATEX_BASE}).`; + const functions = [...fnSet].sort(); writeFileSync( - join(ROOT, "src/katexSymbols.ts"), - `/** ${sourceNote} */\nexport const KATEX_SYMBOLS: Record = {\n${symbolLines}\n};\n`, - ); - - writeFileSync( - join(ROOT, "src/katexMeta.ts"), - `/** ${sourceNote} */\nexport const KATEX_ALIASES: Record = ${JSON.stringify(aliasMap, null, 2)};\n\nexport const KATEX_ACCENTS: Record = ${JSON.stringify(accentMap, null, 2)};\n\nexport const KATEX_FUNCTIONS = new Set(${JSON.stringify([...fnSet].sort())});\n\n/** KaTeX macro-only symbols mapped to Unicode for Word OMML text runs. */\nexport const KATEX_SYMBOL_OVERRIDES: Record = ${JSON.stringify(overrideMap, null, 2)};\n`, + join(ROOT, "src/katexData.ts"), + [ + `/** ${sourceNote} */`, + `export const KATEX_SYMBOLS: Record = {`, + lookupLines, + `};`, + ``, + `export const KATEX_ACCENTS = ${JSON.stringify(accentMap)} as Record;`, + ``, + `export const KATEX_FUNCTIONS = new Set(${JSON.stringify(functions)});`, + ``, + ].join("\n"), ); - console.log(`KATEX_SYMBOLS: ${Object.keys(symbolMap).length}`); - console.log(`KATEX_ALIASES: ${Object.keys(aliasMap).length}`); + console.log(`KATEX_SYMBOLS: ${Object.keys(lookupMap).length} (merged)`); + console.log(` base symbols: ${Object.keys(symbolMap).length}`); + console.log(` aliases: ${Object.keys(aliasMap).length}`); + console.log(` overrides: ${Object.keys(overrideMap).length}`); + console.log(`KATEX_ACCENTS: ${Object.keys(accentMap).length}`); console.log(`KATEX_FUNCTIONS: ${fnSet.size}`); - console.log(`KATEX_SYMBOL_OVERRIDES: ${Object.keys(overrideMap).length}`); }; generate().catch((error) => { diff --git a/lib/src/index.ts b/lib/src/index.ts index 8471730..73b1b70 100644 --- a/lib/src/index.ts +++ b/lib/src/index.ts @@ -4,13 +4,7 @@ import type * as latex from "@unified-latex/unified-latex-types"; // skipcq: JS-C1003 import type * as DOCX from "docx"; import { parseMath } from "latex-math"; -import { - KATEX_ACCENTS, - KATEX_ALIASES, - KATEX_FUNCTIONS, - KATEX_SYMBOL_OVERRIDES, -} from "./katexMeta"; -import { KATEX_SYMBOLS } from "./katexSymbols"; +import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOLS } from "./katexData"; /** * Checks if the argument has curly brackets. @@ -35,7 +29,7 @@ const logSkippedEmptyMath = (latex: string, scope: "inline" | "block") => { /** Resolve a LaTeX command name to its Unicode symbol. */ const resolveLatexSymbol = (name: string): string | undefined => - KATEX_SYMBOL_OVERRIDES[name] ?? KATEX_SYMBOLS[name] ?? KATEX_ALIASES[name]; + KATEX_SYMBOLS[name]; type NAryOptions = { accent: string; diff --git a/lib/src/katexSymbols.ts b/lib/src/katexData.ts similarity index 83% rename from lib/src/katexSymbols.ts rename to lib/src/katexData.ts index aac7747..16fc96e 100644 --- a/lib/src/katexSymbols.ts +++ b/lib/src/katexData.ts @@ -52,6 +52,8 @@ export const KATEX_SYMBOLS: Record = { acute: "ˊ", ae: "æ", AE: "Æ", + alef: "ℵ", + alefsym: "ℵ", aleph: "ℵ", alpha: "α", amalg: "⨿", @@ -72,6 +74,7 @@ export const KATEX_SYMBOLS: Record = { beta: "β", beth: "ℶ", between: "≬", + bgroup: "{", bigcap: "⋂", bigcirc: "◯", bigcup: "⋃", @@ -99,6 +102,7 @@ export const KATEX_SYMBOLS: Record = { boxplus: "⊞", boxtimes: "⊠", breve: "˘", + bull: "∙", bullet: "∙", bumpeq: "≏", Bumpeq: "≎", @@ -107,6 +111,7 @@ export const KATEX_SYMBOLS: Record = { Cap: "⋒", cdot: "⋅", cdotp: "⋅", + cdots: "⋯", centerdot: "⋅", check: "ˇ", checkmark: "✓", @@ -120,10 +125,14 @@ export const KATEX_SYMBOLS: Record = { circleddash: "⊝", circledR: "®", circledS: "Ⓢ", + clubs: "♣", clubsuit: "♣", + coloneqq: "≔", + Coloneqq: "⩴", complement: "∁", cong: "≅", coprod: "∐", + copyright: "©", cup: "∪", Cup: "⋓", curlyeqprec: "⋞", @@ -134,10 +143,15 @@ export const KATEX_SYMBOLS: Record = { curvearrowright: "↷", dag: "†", dagger: "†", + Dagger: "‡", daleth: "ℸ", + darr: "↓", + dArr: "⇓", + Darr: "⇓", dashleftarrow: "⇠", dashrightarrow: "⇢", dashv: "⊣", + dblcolon: "∷", ddag: "‡", ddagger: "‡", ddot: "¨", @@ -149,6 +163,7 @@ export const KATEX_SYMBOLS: Record = { diagup: "╱", diamond: "⋄", Diamond: "◊", + diamonds: "♢", diamondsuit: "♢", digamma: "ϝ", div: "÷", @@ -166,16 +181,21 @@ export const KATEX_SYMBOLS: Record = { downdownarrows: "⇊", downharpoonleft: "⇃", downharpoonright: "⇂", + egroup: "}", ell: "ℓ", + empty: "∅", emptyset: "∅", epsilon: "ϵ", eqcirc: "≖", + eqcolon: "∹", + eqqcolon: "≕", eqsim: "≂", eqslantgtr: "⪖", eqslantless: "⪕", equiv: "≡", eta: "η", eth: "ð", + exist: "∃", exists: "∃", fallingdotseq: "≒", Finv: "Ⅎ", @@ -207,8 +227,12 @@ export const KATEX_SYMBOLS: Record = { gtrless: "≷", gtrsim: "≳", H: "˝", + harr: "↔", + hArr: "⇔", + Harr: "⇔", hat: "^", hbar: "ℏ", + hearts: "♡", heartsuit: "♡", hookleftarrow: "↩", hookrightarrow: "↪", @@ -217,21 +241,29 @@ export const KATEX_SYMBOLS: Record = { iiint: "∭", iint: "∬", Im: "ℑ", + image: "ℑ", imageof: "⊷", in: "∈", + infin: "∞", infty: "∞", int: "∫", intercal: "⊺", intop: "∫", iota: "ι", + isin: "∈", j: "ȷ", Join: "⋈", kappa: "κ", lambda: "λ", Lambda: "Λ", land: "∧", + lang: "⟨", langle: "⟨", + larr: "←", + lArr: "⇐", + Larr: "⇐", lbrace: "{", + lBrace: "⦃", lbrack: "[", lceil: "⌈", ldotp: ".", @@ -263,6 +295,8 @@ export const KATEX_SYMBOLS: Record = { lgroup: "⟮", lhd: "⊲", ll: "≪", + llbracket: "⟦", + llcorner: "⌞", Lleftarrow: "⇚", lll: "⋘", llless: "⋘", @@ -284,6 +318,11 @@ export const KATEX_SYMBOLS: Record = { lor: "∨", lozenge: "◊", lparen: "(", + lq: "`", + lrarr: "↔", + lrArr: "⇔", + Lrarr: "⇔", + lrcorner: "⌟", Lsh: "↰", lt: "<", ltimes: "⋉", @@ -295,6 +334,7 @@ export const KATEX_SYMBOLS: Record = { mathring: "˚", mathsterling: "£", measuredangle: "∡", + medspace: ":", mho: "℧", mid: "∣", models: "⊨", @@ -304,8 +344,11 @@ export const KATEX_SYMBOLS: Record = { nabla: "∇", natural: "♮", ncong: "≆", + ne: "≠", nearrow: "↗", neg: "¬", + negthinspace: "!", + neq: "≠", nexists: "∄", ngeq: "≱", ngtr: "≯", @@ -318,6 +361,8 @@ export const KATEX_SYMBOLS: Record = { nless: "≮", nmid: "∤", nobreakspace: " ", + notin: "∉", + notni: "∌", nparallel: "∦", nprec: "⊀", npreceq: "⋠", @@ -351,6 +396,7 @@ export const KATEX_SYMBOLS: Record = { omicron: "ο", ominus: "⊖", oplus: "⊕", + ordinarycolon: ":", origof: "⊶", oslash: "⊘", otimes: "⊗", @@ -364,6 +410,7 @@ export const KATEX_SYMBOLS: Record = { pi: "π", Pi: "Π", pitchfork: "⋔", + plusmn: "±", pm: "±", pounds: "£", prec: "≺", @@ -379,12 +426,20 @@ export const KATEX_SYMBOLS: Record = { propto: "∝", psi: "ψ", Psi: "Ψ", + qquad: "  ", + quad: " ", r: "˚", + rang: "⟩", rangle: "⟩", + rarr: "→", + rArr: "⇒", + Rarr: "⇒", rbrace: "}", + rBrace: "⦄", rbrack: "]", rceil: "⌉", Re: "ℜ", + real: "ℜ", restriction: "↾", rfloor: "⌋", rgroup: "⟯", @@ -403,13 +458,17 @@ export const KATEX_SYMBOLS: Record = { risingdotseq: "≓", rmoustache: "⎱", rparen: ")", + rq: "'", + rrbracket: "⟧", Rrightarrow: "⇛", Rsh: "↱", rtimes: "⋊", rvert: "∣", rVert: "∥", S: "§", + sdot: "⋅", searrow: "↘", + sect: "§", setminus: "∖", sharp: "♯", shortmid: "∣", @@ -424,6 +483,7 @@ export const KATEX_SYMBOLS: Record = { smallsmile: "⌣", smile: "⌣", space: " ", + spades: "♠", spadesuit: "♠", sphericalangle: "∢", sqcap: "⊓", @@ -435,6 +495,8 @@ export const KATEX_SYMBOLS: Record = { square: "□", ss: "ß", star: "⋆", + sub: "⊂", + sube: "⊆", subset: "⊂", Subset: "⋐", subseteq: "⊆", @@ -450,6 +512,7 @@ export const KATEX_SYMBOLS: Record = { succnsim: "⋩", succsim: "≿", sum: "∑", + supe: "⊇", supset: "⊃", Supset: "⋑", supseteq: "⊇", @@ -480,13 +543,17 @@ export const KATEX_SYMBOLS: Record = { textquotedblright: "”", textquoteleft: "‘", textquoteright: "’", + textregistered: "®", textsterling: "£", textunderscore: "_", therefore: "∴", theta: "θ", Theta: "Θ", + thetasym: "ϑ", thickapprox: "≈", thicksim: "∼", + thickspace: ";", + thinspace: ",", tilde: "~", times: "×", to: "→", @@ -525,6 +592,10 @@ export const KATEX_SYMBOLS: Record = { u211A: "Q", u211D: "R", u2124: "Z", + uarr: "↑", + uArr: "⇑", + Uarr: "⇑", + ulcorner: "⌜", unlhd: "⊴", unrhd: "⊵", uparrow: "↑", @@ -537,6 +608,7 @@ export const KATEX_SYMBOLS: Record = { upsilon: "υ", Upsilon: "Υ", upuparrows: "⇈", + urcorner: "⌝", v: "ˇ", varepsilon: "ε", varkappa: "ϰ", @@ -554,6 +626,7 @@ export const KATEX_SYMBOLS: Record = { vdash: "⊢", vDash: "⊨", Vdash: "⊩", + vdots: "⋮", vec: "⃗", vee: "∨", veebar: "⊻", @@ -561,6 +634,7 @@ export const KATEX_SYMBOLS: Record = { Vert: "∥", Vvdash: "⊪", wedge: "∧", + weierp: "℘", wp: "℘", wr: "≀", xi: "ξ", @@ -568,3 +642,75 @@ export const KATEX_SYMBOLS: Record = { yen: "¥", zeta: "ζ", }; + +export const KATEX_ACCENTS = { + acute: "ˊ", + grave: "ˋ", + ddot: "¨", + tilde: "~", + bar: "ˉ", + breve: "˘", + check: "ˇ", + hat: "^", + vec: "⃗", + dot: "˙", + mathring: "˚", + "'": "ˊ", + "`": "ˋ", + "^": "ˆ", + "~": "˜", + "=": "ˉ", + u: "˘", + ".": "˙", + c: "¸", + r: "˚", + v: "ˇ", + H: "˝", + textcircled: "◯", +} as Record; + +export const KATEX_FUNCTIONS = new Set([ + "Pr", + "arccos", + "arcctg", + "arcsin", + "arctan", + "arctg", + "arg", + "ch", + "cos", + "cosec", + "cosh", + "cot", + "cotg", + "coth", + "csc", + "ctg", + "cth", + "deg", + "det", + "dim", + "exp", + "gcd", + "hom", + "inf", + "ker", + "lg", + "lim", + "liminf", + "limsup", + "ln", + "log", + "mathop", + "max", + "min", + "sec", + "sh", + "sin", + "sinh", + "sup", + "tan", + "tanh", + "tg", + "th", +]); diff --git a/lib/src/katexMeta.ts b/lib/src/katexMeta.ts deleted file mode 100644 index 614fae6..0000000 --- a/lib/src/katexMeta.ts +++ /dev/null @@ -1,156 +0,0 @@ -/** KaTeX v0.16.22 — regenerate via `pnpm generate:katex` (fetches from https://raw.githubusercontent.com/KaTeX/KaTeX/v0.16.22/src). */ -export const KATEX_ALIASES: Record = { - bgroup: "{", - egroup: "}", - lq: "`", - rq: "'", - thinspace: ",", - medspace: ":", - thickspace: ";", - negthinspace: "!", - ordinarycolon: ":", - notni: "∌", - darr: "↓", - dArr: "⇓", - Darr: "⇓", - lang: "⟨", - rang: "⟩", - uarr: "↑", - uArr: "⇑", - Uarr: "⇑", - alef: "ℵ", - alefsym: "ℵ", - bull: "∙", - clubs: "♣", - Dagger: "‡", - diamonds: "♢", - empty: "∅", - exist: "∃", - harr: "↔", - hArr: "⇔", - Harr: "⇔", - hearts: "♡", - image: "ℑ", - infin: "∞", - isin: "∈", - larr: "←", - lArr: "⇐", - Larr: "⇐", - lrarr: "↔", - lrArr: "⇔", - Lrarr: "⇔", - plusmn: "±", - rarr: "→", - rArr: "⇒", - Rarr: "⇒", - real: "ℜ", - sdot: "⋅", - sect: "§", - spades: "♠", - sub: "⊂", - sube: "⊆", - supe: "⊇", - thetasym: "ϑ", - weierp: "℘", -}; - -export const KATEX_ACCENTS: Record = { - acute: "ˊ", - grave: "ˋ", - ddot: "¨", - tilde: "~", - bar: "ˉ", - breve: "˘", - check: "ˇ", - hat: "^", - vec: "⃗", - dot: "˙", - mathring: "˚", - "'": "ˊ", - "`": "ˋ", - "^": "ˆ", - "~": "˜", - "=": "ˉ", - u: "˘", - ".": "˙", - c: "¸", - r: "˚", - v: "ˇ", - H: "˝", - textcircled: "◯", -}; - -export const KATEX_FUNCTIONS = new Set([ - "Pr", - "arccos", - "arcctg", - "arcsin", - "arctan", - "arctg", - "arg", - "ch", - "cos", - "cosec", - "cosh", - "cot", - "cotg", - "coth", - "csc", - "ctg", - "cth", - "deg", - "det", - "dim", - "exp", - "gcd", - "hom", - "inf", - "ker", - "lg", - "lim", - "liminf", - "limsup", - "ln", - "log", - "mathop", - "max", - "min", - "sec", - "sh", - "sin", - "sinh", - "sup", - "tan", - "tanh", - "tg", - "th", -]); - -/** KaTeX macro-only symbols mapped to Unicode for Word OMML text runs. */ -export const KATEX_SYMBOL_OVERRIDES: Record = { - quad: " ", - qquad: "  ", - cdotp: "⋅", - neq: "≠", - notin: "∉", - perp: "⊥", - notni: "∌", - ulcorner: "⌜", - urcorner: "⌝", - llcorner: "⌞", - lrcorner: "⌟", - copyright: "©", - textregistered: "®", - vdots: "⋮", - dblcolon: "∷", - eqcolon: "∹", - coloneqq: "≔", - eqqcolon: "≕", - Coloneqq: "⩴", - llbracket: "⟦", - rrbracket: "⟧", - lBrace: "⦃", - rBrace: "⦄", - ne: "≠", - cdots: "⋯", -}; From 19cfdc0beba17673f46ab13ce59cea9999ce4d7c Mon Sep 17 00:00:00 2001 From: chitwitgit <100676229+chitwitgit@users.noreply.github.com> Date: Sun, 21 Jun 2026 22:34:14 +0800 Subject: [PATCH 7/7] Fix Word DOCX corruption and add OOXML schema validation tests. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refactor the LaTeX→OMML mapper so n-ary operators, accents, and scripts emit schema-valid OMML, generate operator tables from KaTeX at codegen time, and add fixture-based tests validated against the Microsoft 365 OOXML schema. --- .gitignore | 3 + .../fixtures/accents/hat-tilde-bar-vec.md | 5 + lib/__tests__/fixtures/accents/overline.md | 5 + .../fixtures/basic/block-equation.md | 5 + .../fixtures/basic/inline-equation.md | 1 + .../fixtures/basic/inline-variable.md | 1 + lib/__tests__/fixtures/basic/plain-text.md | 3 + .../display/multiple-block-equations.md | 13 + .../adjacent-unrenderable-inline.md | 5 + lib/__tests__/fixtures/inline/mixed-inline.md | 3 + .../fixtures/lists/common-symbols-list.md | 18 + .../fixtures/operators/contour-integral.md | 1 + lib/__tests__/fixtures/operators/fraction.md | 5 + .../fixtures/operators/integral-definite.md | 5 + .../fixtures/operators/integral-indefinite.md | 1 + lib/__tests__/fixtures/operators/nth-root.md | 1 + lib/__tests__/fixtures/operators/product.md | 1 + lib/__tests__/fixtures/operators/sqrt.md | 1 + lib/__tests__/fixtures/operators/summation.md | 5 + .../scripts/superscripts-subscripts.md | 1 + lib/__tests__/fixtures/structures/binomial.md | 5 + lib/__tests__/fixtures/structures/limit.md | 1 + .../structures/parentheses-fraction.md | 7 + lib/__tests__/fixtures/structures/stackrel.md | 5 + .../symbols/exponential-logarithmic.md | 1 + .../fixtures/symbols/greek-letters.md | 1 + .../fixtures/symbols/logical-symbols.md | 1 + .../fixtures/symbols/misc-symbols.md | 7 + .../fixtures/symbols/trigonometric.md | 1 + lib/__tests__/fixtures/text/text-in-math.md | 7 + lib/__tests__/fixtures/text/textcolor.md | 9 + .../unsupported/align-and-cases-skipped.md | 32 + .../fixtures/unsupported/matrices-skipped.md | 34 + lib/__tests__/helpers/assert-valid-docx.ts | 123 +++ lib/__tests__/index.test.ts | 57 +- lib/package.json | 3 +- lib/scripts/benchmark-bundle-formats.ts | 148 ++-- lib/scripts/generate-katex-data.ts | 146 ++- lib/src/index.ts | 834 +++++++++++++----- lib/src/katexData.ts | 52 +- lib/tsconfig-build.json | 2 +- pnpm-lock.yaml | 65 ++ sample.md | 12 - 43 files changed, 1264 insertions(+), 372 deletions(-) create mode 100644 lib/__tests__/fixtures/accents/hat-tilde-bar-vec.md create mode 100644 lib/__tests__/fixtures/accents/overline.md create mode 100644 lib/__tests__/fixtures/basic/block-equation.md create mode 100644 lib/__tests__/fixtures/basic/inline-equation.md create mode 100644 lib/__tests__/fixtures/basic/inline-variable.md create mode 100644 lib/__tests__/fixtures/basic/plain-text.md create mode 100644 lib/__tests__/fixtures/display/multiple-block-equations.md create mode 100644 lib/__tests__/fixtures/edge-cases/adjacent-unrenderable-inline.md create mode 100644 lib/__tests__/fixtures/inline/mixed-inline.md create mode 100644 lib/__tests__/fixtures/lists/common-symbols-list.md create mode 100644 lib/__tests__/fixtures/operators/contour-integral.md create mode 100644 lib/__tests__/fixtures/operators/fraction.md create mode 100644 lib/__tests__/fixtures/operators/integral-definite.md create mode 100644 lib/__tests__/fixtures/operators/integral-indefinite.md create mode 100644 lib/__tests__/fixtures/operators/nth-root.md create mode 100644 lib/__tests__/fixtures/operators/product.md create mode 100644 lib/__tests__/fixtures/operators/sqrt.md create mode 100644 lib/__tests__/fixtures/operators/summation.md create mode 100644 lib/__tests__/fixtures/scripts/superscripts-subscripts.md create mode 100644 lib/__tests__/fixtures/structures/binomial.md create mode 100644 lib/__tests__/fixtures/structures/limit.md create mode 100644 lib/__tests__/fixtures/structures/parentheses-fraction.md create mode 100644 lib/__tests__/fixtures/structures/stackrel.md create mode 100644 lib/__tests__/fixtures/symbols/exponential-logarithmic.md create mode 100644 lib/__tests__/fixtures/symbols/greek-letters.md create mode 100644 lib/__tests__/fixtures/symbols/logical-symbols.md create mode 100644 lib/__tests__/fixtures/symbols/misc-symbols.md create mode 100644 lib/__tests__/fixtures/symbols/trigonometric.md create mode 100644 lib/__tests__/fixtures/text/text-in-math.md create mode 100644 lib/__tests__/fixtures/text/textcolor.md create mode 100644 lib/__tests__/fixtures/unsupported/align-and-cases-skipped.md create mode 100644 lib/__tests__/fixtures/unsupported/matrices-skipped.md create mode 100644 lib/__tests__/helpers/assert-valid-docx.ts diff --git a/.gitignore b/.gitignore index d2e784d..9c4d0af 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,9 @@ dist-ssr # test coverage coverage +# generated docx artifacts for manual inspection (see lib/__tests__) +debug/fixtures/**/* + # temporary files tsup.config.bundled* diff --git a/lib/__tests__/fixtures/accents/hat-tilde-bar-vec.md b/lib/__tests__/fixtures/accents/hat-tilde-bar-vec.md new file mode 100644 index 0000000..d58dcaf --- /dev/null +++ b/lib/__tests__/fixtures/accents/hat-tilde-bar-vec.md @@ -0,0 +1,5 @@ +Accents: $\tilde{x}$, $\bar{x}$, $\hat{x}$, $\vec{v}$ + +$$ +\hat{x} +$$ diff --git a/lib/__tests__/fixtures/accents/overline.md b/lib/__tests__/fixtures/accents/overline.md new file mode 100644 index 0000000..513b587 --- /dev/null +++ b/lib/__tests__/fixtures/accents/overline.md @@ -0,0 +1,5 @@ +Overline: $\overline{AB}$ + +$$ +\overline{AB} +$$ diff --git a/lib/__tests__/fixtures/basic/block-equation.md b/lib/__tests__/fixtures/basic/block-equation.md new file mode 100644 index 0000000..9f4d4a8 --- /dev/null +++ b/lib/__tests__/fixtures/basic/block-equation.md @@ -0,0 +1,5 @@ +A simple block equation: + +$$ +x + y = z +$$ diff --git a/lib/__tests__/fixtures/basic/inline-equation.md b/lib/__tests__/fixtures/basic/inline-equation.md new file mode 100644 index 0000000..e422bb5 --- /dev/null +++ b/lib/__tests__/fixtures/basic/inline-equation.md @@ -0,0 +1 @@ +Einstein's mass-energy equivalence: $E = mc^2$ diff --git a/lib/__tests__/fixtures/basic/inline-variable.md b/lib/__tests__/fixtures/basic/inline-variable.md new file mode 100644 index 0000000..c53452a --- /dev/null +++ b/lib/__tests__/fixtures/basic/inline-variable.md @@ -0,0 +1 @@ +A single variable: $x$ diff --git a/lib/__tests__/fixtures/basic/plain-text.md b/lib/__tests__/fixtures/basic/plain-text.md new file mode 100644 index 0000000..1b310c4 --- /dev/null +++ b/lib/__tests__/fixtures/basic/plain-text.md @@ -0,0 +1,3 @@ +Hello world. + +This paragraph has no math. diff --git a/lib/__tests__/fixtures/display/multiple-block-equations.md b/lib/__tests__/fixtures/display/multiple-block-equations.md new file mode 100644 index 0000000..d2eef61 --- /dev/null +++ b/lib/__tests__/fixtures/display/multiple-block-equations.md @@ -0,0 +1,13 @@ +Multiple display equations in sequence: + +$$ +\sum_{n=1}^{\infty} \frac{1}{n^2} = \frac{\pi^2}{6} +$$ + +$$ +\int_0^\infty x^2 e^{-x^2} dx = \frac{\sqrt{\pi}}{4} +$$ + +$$ +\frac{d}{dx} \left( \frac{1}{x} \right) = -\frac{1}{x^2} +$$ diff --git a/lib/__tests__/fixtures/edge-cases/adjacent-unrenderable-inline.md b/lib/__tests__/fixtures/edge-cases/adjacent-unrenderable-inline.md new file mode 100644 index 0000000..1782256 --- /dev/null +++ b/lib/__tests__/fixtures/edge-cases/adjacent-unrenderable-inline.md @@ -0,0 +1,5 @@ +Adjacent inline math with unrenderable segment should skip only the bad part. + +The area is $x$ cm$^{2}$ in units. + +Only the variable renders; trailing superscript on text is skipped. diff --git a/lib/__tests__/fixtures/inline/mixed-inline.md b/lib/__tests__/fixtures/inline/mixed-inline.md new file mode 100644 index 0000000..fcae983 --- /dev/null +++ b/lib/__tests__/fixtures/inline/mixed-inline.md @@ -0,0 +1,3 @@ +Inline math expressions can be enclosed within single dollar signs: $E=mc^2$. You can also use `\(` and `\)` delimiters in prose: $\sum_{i=1}^{n} i^2$. + +Mixed on one line: $\alpha + \beta = \gamma$ and $\frac{1}{2}$. diff --git a/lib/__tests__/fixtures/lists/common-symbols-list.md b/lib/__tests__/fixtures/lists/common-symbols-list.md new file mode 100644 index 0000000..76e94ae --- /dev/null +++ b/lib/__tests__/fixtures/lists/common-symbols-list.md @@ -0,0 +1,18 @@ +Common mathematical symbols in a list: + +- Greek letters: $\alpha$, $\beta$, $\gamma$, $\Gamma$, $\Delta$, $\pi$, $\Pi$, $\Sigma$, $\omega$, $\Omega$ +- Superscripts and subscripts: $x^2$, $y_i$, $a^{b+c}$, $e^{-i\omega t}$ +- Fractions: $\frac{1}{2}$, $\frac{x+y}{z}$ +- Square roots: $\sqrt{x}$, $\sqrt[3]{y}$ +- Summations and products: $\sum_{i=1}^n i$, $\prod_{j=1}^m j$ +- Integrals: $\int_a^b f(x)\,dx$, $\int_0^1 f(x)\,dx$, $\oint_C \vec{F} \cdot d\vec{r}$ +- Limits: $\lim_{x \to \infty} \frac{1}{x}$, $\lim_{n \to \infty} a_n$ +- Vectors: $\vec{v}$, $\mathbf{v}$ +- Accents: $\tilde{x}$, $\bar{x}$, $\overline{AB}$, $\hat{x}$ +- Partial derivatives: $\frac{\partial f}{\partial x}$ +- Infinity: $\infty$ +- Logical symbols: $\forall$, $\exists$, $\in$, $\notin$, $\subseteq$, $\supseteq$, $\land$, $\lor$, $\neg$, $\wedge$, $\ne$, $\triangle ABC$, $\cdots$ +- Binomial coefficients: $\binom{n}{k}$ +- Stackrel: $\stackrel{\mathrm{def}}{=}$ +- Trigonometric functions: $\sin(x)$, $\cos(y)$, $\tan(z)$ +- Exponential and logarithmic functions: $e^x$, $\ln(y)$, $\log_{10}(z)$ diff --git a/lib/__tests__/fixtures/operators/contour-integral.md b/lib/__tests__/fixtures/operators/contour-integral.md new file mode 100644 index 0000000..5b1320d --- /dev/null +++ b/lib/__tests__/fixtures/operators/contour-integral.md @@ -0,0 +1 @@ +Contour integral: $\oint_C \vec{F} \cdot d\vec{r}$ diff --git a/lib/__tests__/fixtures/operators/fraction.md b/lib/__tests__/fixtures/operators/fraction.md new file mode 100644 index 0000000..30bbc13 --- /dev/null +++ b/lib/__tests__/fixtures/operators/fraction.md @@ -0,0 +1,5 @@ +Fractions: $\frac{1}{2}$, $\frac{x+y}{z}$, and a block form: + +$$ +\frac{a^2 + b^2}{c^2} = 1 +$$ diff --git a/lib/__tests__/fixtures/operators/integral-definite.md b/lib/__tests__/fixtures/operators/integral-definite.md new file mode 100644 index 0000000..1d1ce2e --- /dev/null +++ b/lib/__tests__/fixtures/operators/integral-definite.md @@ -0,0 +1,5 @@ +Definite integral: $\int_0^1 f(x)\,dx$ + +$$ +\int_0^\infty x^2 e^{-x^2} dx = \frac{\sqrt{\pi}}{4} +$$ diff --git a/lib/__tests__/fixtures/operators/integral-indefinite.md b/lib/__tests__/fixtures/operators/integral-indefinite.md new file mode 100644 index 0000000..e670110 --- /dev/null +++ b/lib/__tests__/fixtures/operators/integral-indefinite.md @@ -0,0 +1 @@ +Indefinite integral with bounds: $\int_a^b f(x)\,dx$ diff --git a/lib/__tests__/fixtures/operators/nth-root.md b/lib/__tests__/fixtures/operators/nth-root.md new file mode 100644 index 0000000..0205e68 --- /dev/null +++ b/lib/__tests__/fixtures/operators/nth-root.md @@ -0,0 +1 @@ +Nth root: $\sqrt[3]{y}$ diff --git a/lib/__tests__/fixtures/operators/product.md b/lib/__tests__/fixtures/operators/product.md new file mode 100644 index 0000000..db29376 --- /dev/null +++ b/lib/__tests__/fixtures/operators/product.md @@ -0,0 +1 @@ +Product notation: $\prod_{j=1}^{m} j$ diff --git a/lib/__tests__/fixtures/operators/sqrt.md b/lib/__tests__/fixtures/operators/sqrt.md new file mode 100644 index 0000000..ead1c9c --- /dev/null +++ b/lib/__tests__/fixtures/operators/sqrt.md @@ -0,0 +1 @@ +Square roots: $\sqrt{x}$ and nested $\sqrt{a^2 + b^2}$ diff --git a/lib/__tests__/fixtures/operators/summation.md b/lib/__tests__/fixtures/operators/summation.md new file mode 100644 index 0000000..1238afa --- /dev/null +++ b/lib/__tests__/fixtures/operators/summation.md @@ -0,0 +1,5 @@ +Summation with limits: $\sum_{i=1}^{n} x_i$ + +$$ +\sum_{n=1}^{\infty} \frac{1}{n^2} = \frac{\pi^2}{6} +$$ diff --git a/lib/__tests__/fixtures/scripts/superscripts-subscripts.md b/lib/__tests__/fixtures/scripts/superscripts-subscripts.md new file mode 100644 index 0000000..84e2abb --- /dev/null +++ b/lib/__tests__/fixtures/scripts/superscripts-subscripts.md @@ -0,0 +1 @@ +Superscripts and subscripts: $x^2$, $y_i$, $a^{b+c}$, $e^{-i\omega t}$ diff --git a/lib/__tests__/fixtures/structures/binomial.md b/lib/__tests__/fixtures/structures/binomial.md new file mode 100644 index 0000000..0b0239e --- /dev/null +++ b/lib/__tests__/fixtures/structures/binomial.md @@ -0,0 +1,5 @@ +Binomial coefficient: $\binom{n}{k}$ + +$$ +\binom{n}{k} +$$ diff --git a/lib/__tests__/fixtures/structures/limit.md b/lib/__tests__/fixtures/structures/limit.md new file mode 100644 index 0000000..169ddf2 --- /dev/null +++ b/lib/__tests__/fixtures/structures/limit.md @@ -0,0 +1 @@ +Limits: $\lim_{x \to \infty} \frac{1}{x}$, $\lim_{n \to \infty} a_n$ diff --git a/lib/__tests__/fixtures/structures/parentheses-fraction.md b/lib/__tests__/fixtures/structures/parentheses-fraction.md new file mode 100644 index 0000000..22ad9dd --- /dev/null +++ b/lib/__tests__/fixtures/structures/parentheses-fraction.md @@ -0,0 +1,7 @@ +Bold vector: $\mathbf{v}$ + +Parentheses with fraction: + +$$ +\frac{d}{dx} \left( \frac{1}{x} \right) = -\frac{1}{x^2} +$$ diff --git a/lib/__tests__/fixtures/structures/stackrel.md b/lib/__tests__/fixtures/structures/stackrel.md new file mode 100644 index 0000000..ad6e574 --- /dev/null +++ b/lib/__tests__/fixtures/structures/stackrel.md @@ -0,0 +1,5 @@ +Stackrel: $\stackrel{\mathrm{def}}{=}$ + +$$ +\stackrel{\mathrm{def}}{=} +$$ diff --git a/lib/__tests__/fixtures/symbols/exponential-logarithmic.md b/lib/__tests__/fixtures/symbols/exponential-logarithmic.md new file mode 100644 index 0000000..3665516 --- /dev/null +++ b/lib/__tests__/fixtures/symbols/exponential-logarithmic.md @@ -0,0 +1 @@ +Exponential and logarithmic functions: $e^x$, $\ln(y)$, $\log_{10}(z)$ diff --git a/lib/__tests__/fixtures/symbols/greek-letters.md b/lib/__tests__/fixtures/symbols/greek-letters.md new file mode 100644 index 0000000..bbe0498 --- /dev/null +++ b/lib/__tests__/fixtures/symbols/greek-letters.md @@ -0,0 +1 @@ +Greek letters: $\alpha$, $\beta$, $\gamma$, $\Gamma$, $\Delta$, $\pi$, $\Pi$, $\Sigma$, $\omega$, $\Omega$ diff --git a/lib/__tests__/fixtures/symbols/logical-symbols.md b/lib/__tests__/fixtures/symbols/logical-symbols.md new file mode 100644 index 0000000..734678d --- /dev/null +++ b/lib/__tests__/fixtures/symbols/logical-symbols.md @@ -0,0 +1 @@ +Logical symbols: $\forall$, $\exists$, $\in$, $\notin$, $\subseteq$, $\supseteq$, $\land$, $\lor$, $\neg$, $\wedge$, $\ne$ diff --git a/lib/__tests__/fixtures/symbols/misc-symbols.md b/lib/__tests__/fixtures/symbols/misc-symbols.md new file mode 100644 index 0000000..a7cf3c9 --- /dev/null +++ b/lib/__tests__/fixtures/symbols/misc-symbols.md @@ -0,0 +1,7 @@ +Partial derivative: $\frac{\partial f}{\partial x}$ + +Infinity: $\infty$ + +Triangle notation: $\triangle ABC$ + +Dots: $\cdots$ diff --git a/lib/__tests__/fixtures/symbols/trigonometric.md b/lib/__tests__/fixtures/symbols/trigonometric.md new file mode 100644 index 0000000..f238ff9 --- /dev/null +++ b/lib/__tests__/fixtures/symbols/trigonometric.md @@ -0,0 +1 @@ +Trigonometric functions: $\sin(x)$, $\cos(y)$, $\tan(z)$ diff --git a/lib/__tests__/fixtures/text/text-in-math.md b/lib/__tests__/fixtures/text/text-in-math.md new file mode 100644 index 0000000..18a19c3 --- /dev/null +++ b/lib/__tests__/fixtures/text/text-in-math.md @@ -0,0 +1,7 @@ +Text within math: + +$$ +\text{Let } x \text{ be a real number.} +$$ + +Inline with text macro: $\text{if } x > 0$ diff --git a/lib/__tests__/fixtures/text/textcolor.md b/lib/__tests__/fixtures/text/textcolor.md new file mode 100644 index 0000000..1c13a1e --- /dev/null +++ b/lib/__tests__/fixtures/text/textcolor.md @@ -0,0 +1,9 @@ +`\textcolor` is not styled; only the math body is rendered. + +$$ +\textcolor{red}{E=mc^2} +$$ + +$$ +\textcolor{blue}{\sum_{i=1}^n i} +$$ diff --git a/lib/__tests__/fixtures/unsupported/align-and-cases-skipped.md b/lib/__tests__/fixtures/unsupported/align-and-cases-skipped.md new file mode 100644 index 0000000..5896e6b --- /dev/null +++ b/lib/__tests__/fixtures/unsupported/align-and-cases-skipped.md @@ -0,0 +1,32 @@ +Align and aligned environments are not yet supported; math is skipped but surrounding text remains. + +$$ +\begin{align} +y &= mx + b \\ +y' &= m +\end{align} +$$ + +$$ +\begin{aligned} +(a+b)^2 &= (a+b)(a+b) \\ +&= a^2 + ab + ba + b^2 \\ +&= a^2 + 2ab + b^2 +\end{aligned} +$$ + +$$ +f(x) = \begin{cases} +x^2, & \text{if } x \ge 0 \\ +-x^2, & \text{otherwise} +\end{cases} +$$ + +$$ +f(x) = +\begin{cases} +1, & \text{if } x > 0 \\ +0, & \text{if } x = 0 \\ +-1, & \text{if } x < 0 +\end{cases} +$$ diff --git a/lib/__tests__/fixtures/unsupported/matrices-skipped.md b/lib/__tests__/fixtures/unsupported/matrices-skipped.md new file mode 100644 index 0000000..b3a2476 --- /dev/null +++ b/lib/__tests__/fixtures/unsupported/matrices-skipped.md @@ -0,0 +1,34 @@ +Matrix environments are not yet supported; math is skipped but surrounding text remains. + +Inline matrix attempt: $\begin{pmatrix} a & b \\ c & d \end{pmatrix}$ + +Block matrix attempt: + +$$ +\begin{pmatrix} +1 & 2 & 3 \\ +4 & 5 & 6 \\ +7 & 8 & 9 +\end{pmatrix} +$$ + +$$ +\begin{bmatrix} +1 & 2 \\ +3 & 4 +\end{bmatrix} +$$ + +$$ +\begin{vmatrix} +a & b \\ +c & d +\end{vmatrix} +$$ + +$$ +\begin{Vmatrix} +a & b \\ +c & d +\end{Vmatrix} +$$ diff --git a/lib/__tests__/helpers/assert-valid-docx.ts b/lib/__tests__/helpers/assert-valid-docx.ts new file mode 100644 index 0000000..bc86f90 --- /dev/null +++ b/lib/__tests__/helpers/assert-valid-docx.ts @@ -0,0 +1,123 @@ +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { toDocx } from "@m2d/core"; +import { validateFile } from "@xarsh/ooxml-validator"; +import remarkMath from "remark-math"; +import remarkParse from "remark-parse"; +import { unified } from "unified"; +import { mathPlugin } from "../../src"; + +const markdownProcessor = unified().use(remarkParse).use(remarkMath); + +type MdastRoot = Parameters[0]; + +export type DocxValidationResult = Awaited>; + +/** Directory for generated DOCX files used in manual inspection. */ +export const DEBUG_DOCX_DIR = path.resolve( + import.meta.dirname, + "../../../debug", +); + +/** Write a debug artifact under {@link DEBUG_DOCX_DIR}. */ +export const saveDebugFile = ( + filename: string, + content: string | Buffer, +): string => { + const filePath = path.join(DEBUG_DOCX_DIR, filename); + fs.mkdirSync(path.dirname(filePath), { recursive: true }); + fs.writeFileSync(filePath, content); + return filePath; +}; + +/** Write a DOCX buffer under {@link DEBUG_DOCX_DIR} for manual testing. */ +export const saveDebugDocx = (filename: string, buffer: Buffer): string => + saveDebugFile(filename, buffer); + +/** Root directory for OOXML validation fixture markdown files. */ +export const FIXTURES_DIR = path.resolve(import.meta.dirname, "../fixtures"); + +/** Recursively list all `.md` fixture files under {@link FIXTURES_DIR}. */ +export const listFixtureFiles = (): string[] => { + const fixtures: string[] = []; + + const walk = (directory: string) => { + for (const entry of fs.readdirSync(directory, { withFileTypes: true })) { + const filePath = path.join(directory, entry.name); + if (entry.isDirectory()) { + walk(filePath); + } else if (entry.isFile() && entry.name.endsWith(".md")) { + fixtures.push(filePath); + } + } + }; + + walk(FIXTURES_DIR); + return fixtures.sort(); +}; + +/** Recursively list individual `.md` fixture files (excludes `combined/`). */ +export const listIndividualFixtureFiles = (): string[] => + listFixtureFiles().filter( + (fixturePath) => !fixturePath.includes(`${path.sep}combined${path.sep}`), + ); + +/** Map a fixture markdown path to its debug DOCX output path. */ +export const fixtureDebugDocxPath = (fixturePath: string): string => + `${path.relative(FIXTURES_DIR, fixturePath).replace(/\.md$/, ".docx")}`; + +/** Build one markdown document containing every individual fixture. */ +export const buildCombinedFixtureMarkdown = (): string => + listIndividualFixtureFiles() + .map((fixturePath) => { + const label = path + .relative(FIXTURES_DIR, fixturePath) + .replace(/\.md$/, "") + .replace(/\//g, " / "); + const body = fs.readFileSync(fixturePath, "utf-8").trim(); + + return `**${label}**\n\n${body}`; + }) + .join("\n\n---\n\n"); + +/** Generate a DOCX buffer from markdown using the math plugin. */ +export const docxFromMarkdown = async (markdown: string): Promise => { + const tree = markdownProcessor.parse(markdown); + const normalized = markdownProcessor.runSync(tree); + + return (await toDocx( + normalized as MdastRoot, + {}, + { plugins: [mathPlugin()] }, + "nodebuffer", + )) as Buffer; +}; + +/** Validate a DOCX buffer against Microsoft's OOXML schema. */ +export const validateDocxBuffer = async ( + buffer: Buffer | Uint8Array, +): Promise => { + const file = path.join( + os.tmpdir(), + `m2d-math-docx-${crypto.randomUUID()}.docx`, + ); + + try { + fs.writeFileSync(file, buffer); + return await validateFile(file, { officeVersion: "Microsoft365" }); + } finally { + fs.unlinkSync(file); + } +}; + +/** Format schema validation errors for test output. */ +export const formatDocxValidationErrors = ( + result: DocxValidationResult, +): string => + result.errors + .map( + (error) => + `[${error.errorType}] ${error.path}\n ${error.xPath}\n ${error.description}`, + ) + .join("\n\n"); diff --git a/lib/__tests__/index.test.ts b/lib/__tests__/index.test.ts index e6656d9..3edc786 100644 --- a/lib/__tests__/index.test.ts +++ b/lib/__tests__/index.test.ts @@ -1,25 +1,30 @@ import fs from "node:fs"; -import { toDocx } from "@m2d/core"; // Adjust path based on your setup +import path from "node:path"; +import { toDocx } from "@m2d/core"; import remarkMath from "remark-math"; import remarkParse from "remark-parse"; import { unified } from "unified"; -import { describe, it, vi } from "vitest"; +import { describe, expect, it, vi } from "vitest"; import { mathPlugin } from "../src"; +import { + buildCombinedFixtureMarkdown, + docxFromMarkdown, + fixtureDebugDocxPath, + formatDocxValidationErrors, + listIndividualFixtureFiles, + saveDebugDocx, + saveDebugFile, + validateDocxBuffer, +} from "./helpers/assert-valid-docx"; const markdown = fs.readFileSync("../sample.md", "utf-8"); const emptyOMathCount = async (md: string) => { - const mdast = unified().use(remarkParse).use(remarkMath).parse(md); - const buffer = (await toDocx( - mdast, - {}, - { plugins: [mathPlugin()] }, - "nodebuffer", - )) as Buffer; + const buffer = await docxFromMarkdown(md); const { execSync } = await import("node:child_process"); - const path = `/tmp/m2d-math-test-${Math.random()}.docx`; - fs.writeFileSync(path, buffer); - const xml = execSync(`unzip -p ${path} word/document.xml`, { + const tempPath = `/tmp/m2d-math-test-${Math.random()}.docx`; + fs.writeFileSync(tempPath, buffer); + const xml = execSync(`unzip -p ${tempPath} word/document.xml`, { encoding: "utf8", }); return (xml.match(//g) ?? []).length; @@ -45,3 +50,31 @@ describe("toDocx", () => { error.mockRestore(); }); }); + +describe("OOXML schema validation", () => { + it.each( + listIndividualFixtureFiles().map((fixturePath) => [fixturePath]), + )("passes for %s", async (fixturePath) => { + const markdown = fs.readFileSync(fixturePath, "utf-8"); + const buffer = await docxFromMarkdown(markdown); + saveDebugDocx( + path.join("fixtures", fixtureDebugDocxPath(fixturePath)), + buffer, + ); + const result = await validateDocxBuffer(buffer); + + expect(result.ok, formatDocxValidationErrors(result)).toBe(true); + }); + + it("passes for combined all-fixtures document", async () => { + const markdown = buildCombinedFixtureMarkdown(); + const buffer = await docxFromMarkdown(markdown); + + saveDebugFile("fixtures/combined/all-fixtures.md", markdown); + saveDebugDocx("fixtures/combined/all-fixtures.docx", buffer); + + const result = await validateDocxBuffer(buffer); + + expect(result.ok, formatDocxValidationErrors(result)).toBe(true); + }); +}); diff --git a/lib/package.json b/lib/package.json index 0e9eaf1..d6c024d 100644 --- a/lib/package.json +++ b/lib/package.json @@ -23,7 +23,7 @@ } }, "scripts": { - "build": "tsup && tsc -p tsconfig-build.json && gzip -c dist/index.js | wc -c", + "build": "tsup && tsc -p tsconfig-build.json && rm -f dist/katexData.d.ts && gzip -c dist/index.js | wc -c", "clean": "rm -rf dist", "dev": "tsup --watch && tsc -p tsconfig-build.json -w", "typecheck": "tsc --noEmit", @@ -31,6 +31,7 @@ "generate:katex": "node --experimental-strip-types scripts/generate-katex-data.ts" }, "devDependencies": { + "@xarsh/ooxml-validator": "^0.3.0", "@repo/typescript-config": "workspace:*", "@testing-library/react": "^16.3.2", "@types/node": "^26.0.0", diff --git a/lib/scripts/benchmark-bundle-formats.ts b/lib/scripts/benchmark-bundle-formats.ts index eb900a0..6321da7 100644 --- a/lib/scripts/benchmark-bundle-formats.ts +++ b/lib/scripts/benchmark-bundle-formats.ts @@ -3,21 +3,24 @@ * Run from lib/: pnpm exec node --experimental-strip-types scripts/benchmark-bundle-formats.ts */ import { execSync } from "node:child_process"; -import { readFileSync, rmSync, writeFileSync } from "node:fs"; +import { readFileSync, writeFileSync } from "node:fs"; import { dirname, join } from "node:path"; import { fileURLToPath } from "node:url"; import { gzipSync } from "node:zlib"; import { KATEX_ACCENTS, - KATEX_ALIASES, KATEX_FUNCTIONS, - KATEX_SYMBOL_OVERRIDES, -} from "../src/katexMeta.ts"; -import { KATEX_SYMBOLS as BASE_SYMBOLS } from "../src/katexSymbols.ts"; + KATEX_INTEGRAL_OPS, + KATEX_LIMITS_TEXT_OPS, + KATEX_NARY_OPS, + KATEX_SYMBOLS, + type KatexNAryOp, +} from "../src/katexData.ts"; const ROOT = join(dirname(fileURLToPath(import.meta.url)), ".."); const SRC = join(ROOT, "src"); const INDEX = join(SRC, "index.ts"); +const KATEX_DATA = join(SRC, "katexData.ts"); type Format = { name: string; @@ -26,22 +29,38 @@ type Format = { patchIndex: (src: string) => string; }; -const mergedLookup: Record = { - ...KATEX_ALIASES, - ...BASE_SYMBOLS, - ...KATEX_SYMBOL_OVERRIDES, -}; - -const sortedEntries = Object.entries(mergedLookup).sort(([a], [b]) => +const sortedEntries = Object.entries(KATEX_SYMBOLS).sort(([a], [b]) => a.localeCompare(b), ); -const accentsJson = JSON.stringify(KATEX_ACCENTS); -const functionsJson = JSON.stringify([...KATEX_FUNCTIONS].sort()); + +const formatNAryOps = (ops: Record): string => + Object.entries(ops) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([k, v]) => { + const loc = v.limitLocationVal + ? `, limitLocationVal: ${JSON.stringify(v.limitLocationVal)}` + : ""; + return ` ${JSON.stringify(k)}: { accent: ${JSON.stringify(v.accent)}${loc} },`; + }) + .join("\n"); const metaTail = [ - `export const KATEX_ACCENTS = ${accentsJson} as Record;`, - `export const KATEX_FUNCTIONS = new Set(${functionsJson});`, - "", + `export const KATEX_ACCENTS = ${JSON.stringify(KATEX_ACCENTS)} as Record;`, + ``, + `export const KATEX_FUNCTIONS = new Set(${JSON.stringify([...KATEX_FUNCTIONS].sort())});`, + ``, + `export type KatexNAryOp = { accent: string; limitLocationVal?: "subSup" };`, + ``, + `export const KATEX_NARY_OPS: Record = {`, + formatNAryOps(KATEX_NARY_OPS), + `};`, + ``, + `export const KATEX_INTEGRAL_OPS: Record = {`, + formatNAryOps(KATEX_INTEGRAL_OPS), + `};`, + ``, + `export const KATEX_LIMITS_TEXT_OPS = new Set(${JSON.stringify([...KATEX_LIMITS_TEXT_OPS].sort())});`, + ``, ].join("\n"); const objectLiteralBody = sortedEntries @@ -55,80 +74,69 @@ const tupleBody = sortedEntries const parallelKeys = sortedEntries.map(([k]) => JSON.stringify(k)).join(","); const parallelValues = JSON.stringify(sortedEntries.map(([, v]) => v)); const gzipB64 = gzipSync( - Buffer.from(JSON.stringify(mergedLookup), "utf8"), + Buffer.from(JSON.stringify(KATEX_SYMBOLS), "utf8"), ).toString("base64"); const baselineIndex = readFileSync(INDEX, "utf8"); +const baselineKatexData = readFileSync(KATEX_DATA, "utf8"); -const cleanupGenerated = () => { - for (const f of ["katexData.ts"]) { - try { - rmSync(join(SRC, f)); - } catch { - /* absent */ - } - } -}; - -const mergedPatchIndex = (src: string): string => +const mapPatchIndex = (src: string): string => src .replace( - `import { KATEX_ACCENTS, KATEX_ALIASES, KATEX_FUNCTIONS, KATEX_SYMBOL_OVERRIDES } from "./katexMeta";\nimport { KATEX_SYMBOLS } from "./katexSymbols";`, - `import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOLS } from "./katexData";`, + ` KATEX_SYMBOLS,\n type KatexNAryOp,\n} from "./katexData";`, + ` type KatexNAryOp,\n KATEX_SYMBOL_MAP,\n} from "./katexData";`, ) .replace( - `const resolveLatexSymbol = (name: string): string | undefined =>\n KATEX_SYMBOL_OVERRIDES[name] ?? KATEX_SYMBOLS[name] ?? KATEX_ALIASES[name];`, - `const resolveLatexSymbol = (name: string): string | undefined => KATEX_SYMBOLS[name];`, + `const resolveLatexSymbol = (name: string): string | undefined =>\n KATEX_SYMBOLS[name];`, + `const resolveLatexSymbol = (name: string): string | undefined =>\n KATEX_SYMBOL_MAP.get(name);`, ); const formats: Format[] = [ { - name: "1-baseline-multi", - note: "PR #7: katexSymbols + katexMeta, 3-table lookup chain", - write: () => cleanupGenerated(), + name: "1-baseline-literal", + note: "Current katexData.ts: merged object literal + direct lookup", + write: () => writeFileSync(KATEX_DATA, baselineKatexData), patchIndex: (src) => src, }, { name: "2-merged-literal", - note: "Single katexData.ts object literal + direct lookup", + note: "Regenerated object literal from imported KATEX_SYMBOLS", write: () => { - cleanupGenerated(); writeFileSync( - join(SRC, "katexData.ts"), + KATEX_DATA, [ `/** benchmark: merged object literal */`, `export const KATEX_SYMBOLS: Record = {`, objectLiteralBody, `};`, + ``, metaTail, ].join("\n"), ); }, - patchIndex: mergedPatchIndex, + patchIndex: (src) => src, }, { name: "3-json-parse", note: "Single JSON.parse blob", write: () => { - cleanupGenerated(); writeFileSync( - join(SRC, "katexData.ts"), + KATEX_DATA, [ `/** benchmark: JSON.parse blob */`, - `export const KATEX_SYMBOLS = JSON.parse(${JSON.stringify(JSON.stringify(mergedLookup))}) as Record;`, + `export const KATEX_SYMBOLS = JSON.parse(${JSON.stringify(JSON.stringify(KATEX_SYMBOLS))}) as Record;`, metaTail, ].join("\n"), ); }, - patchIndex: mergedPatchIndex, + patchIndex: (src) => src, }, { name: "4-tuple-fromEntries", note: "Tuple array + Object.fromEntries at module init", write: () => { - cleanupGenerated(); writeFileSync( - join(SRC, "katexData.ts"), + KATEX_DATA, [ `/** benchmark: tuple entries + Object.fromEntries */`, `const ENTRIES: [string, string][] = [`, @@ -139,15 +147,14 @@ const formats: Format[] = [ ].join("\n"), ); }, - patchIndex: mergedPatchIndex, + patchIndex: (src) => src, }, { name: "5-parallel-arrays", note: "Parallel keys/values arrays + Object.fromEntries", write: () => { - cleanupGenerated(); writeFileSync( - join(SRC, "katexData.ts"), + KATEX_DATA, [ `/** benchmark: parallel arrays */`, `const KEYS = [${parallelKeys}] as const;`, @@ -157,15 +164,14 @@ const formats: Format[] = [ ].join("\n"), ); }, - patchIndex: mergedPatchIndex, + patchIndex: (src) => src, }, { name: "6-gzip-base64-node", note: "gzip+base64 blob, gunzipSync at module init (Node zlib)", write: () => { - cleanupGenerated(); writeFileSync( - join(SRC, "katexData.ts"), + KATEX_DATA, [ `/** benchmark: gzip base64 (Node) */`, `import { gunzipSync } from "node:zlib";`, @@ -177,31 +183,29 @@ const formats: Format[] = [ ].join("\n"), ); }, - patchIndex: mergedPatchIndex, + patchIndex: (src) => src, }, { name: "7-literal-oneline", note: "Merged object literal on one line via JSON.stringify", write: () => { - cleanupGenerated(); writeFileSync( - join(SRC, "katexData.ts"), + KATEX_DATA, [ `/** benchmark: one-line object literal */`, - `export const KATEX_SYMBOLS: Record = ${JSON.stringify(mergedLookup)};`, + `export const KATEX_SYMBOLS: Record = ${JSON.stringify(KATEX_SYMBOLS)};`, metaTail, ].join("\n"), ); }, - patchIndex: mergedPatchIndex, + patchIndex: (src) => src, }, { name: "8-map-constructor", note: "new Map(entries) then lookup via .get", write: () => { - cleanupGenerated(); writeFileSync( - join(SRC, "katexData.ts"), + KATEX_DATA, [ `/** benchmark: Map constructor */`, `const ENTRIES: [string, string][] = [`, @@ -212,13 +216,7 @@ const formats: Format[] = [ ].join("\n"), ); }, - patchIndex: (src) => - mergedPatchIndex(src) - .replace( - `import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOLS } from "./katexData";`, - `import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOL_MAP } from "./katexData";`, - ) - .replace(`KATEX_SYMBOLS[name]`, `KATEX_SYMBOL_MAP.get(name)`), + patchIndex: mapPatchIndex, }, ]; @@ -229,26 +227,16 @@ const measure = () => { cwd: ROOT, encoding: "buffer", }); - const dataSrc = ["katexData.ts", "katexSymbols.ts", "katexMeta.ts"] - .map((f) => join(SRC, f)) - .filter((f) => { - try { - readFileSync(f); - return true; - } catch { - return false; - } - }) - .reduce((sum, f) => sum + readFileSync(f).length, 0); + const dataSrc = readFileSync(KATEX_DATA).length; return { cjs: cjs.length, esm: esm.length, gzCjs: gzCjs.length, dataSrc }; }; console.log("KaTeX symbol format benchmark\n"); console.log(`Merged lookup entries: ${sortedEntries.length}`); -console.log(`Raw JSON size: ${JSON.stringify(mergedLookup).length} B`); +console.log(`Raw JSON size: ${JSON.stringify(KATEX_SYMBOLS).length} B`); console.log( - `gzip(JSON) alone: ${gzipSync(Buffer.from(JSON.stringify(mergedLookup))).length} B`, + `gzip(JSON) alone: ${gzipSync(Buffer.from(JSON.stringify(KATEX_SYMBOLS))).length} B`, ); console.log(`gzip+base64 payload: ${gzipB64.length} chars\n`); @@ -268,7 +256,7 @@ for (const format of formats) { } writeFileSync(INDEX, baselineIndex); -cleanupGenerated(); +writeFileSync(KATEX_DATA, baselineKatexData); console.log("\n| Format | gzip CJS | CJS | ESM | data src | vs baseline |"); console.log("|--------|----------|-----|-----|----------|-------------|"); diff --git a/lib/scripts/generate-katex-data.ts b/lib/scripts/generate-katex-data.ts index 840601d..2a4238a 100644 --- a/lib/scripts/generate-katex-data.ts +++ b/lib/scripts/generate-katex-data.ts @@ -32,9 +32,38 @@ const fetchKatexSource = async (path: string): Promise => { const symbolMap: Record = {}; const aliasMap: Record = {}; const accentMap: Record = {}; -const fnSet = new Set(); const overrideMap: Record = {}; +type KatexNAryOp = { accent: string; limitLocationVal?: "subSup" }; + +/** Commands excluded from generated operator tables (with reason). */ +const EXCLUDED_OPS: Record = { + mathop: "takes a body argument, not a standalone operator name", +}; + +/** Parse a defineFunction block from op.js for limits/symbol flags and names. */ +const parseOpBlock = ( + block: string, +): { limits: boolean; symbol: boolean; names: string[] } | undefined => { + if (!block.includes('type: "op"')) return undefined; + const limitsMatch = block.match(/limits:\s*(true|false)/); + const symbolMatch = block.match(/symbol:\s*(true|false)/); + if (!limitsMatch || !symbolMatch) return undefined; + + const namesMatch = block.match(/names:\s*\[([\s\S]*?)\]/); + const names: string[] = []; + if (namesMatch) { + for (const nameMatch of namesMatch[1].matchAll(/"\\\\([^"]+)"/g)) { + names.push(nameMatch[1]); + } + } + return { + limits: limitsMatch[1] === "true", + symbol: symbolMatch[1] === "true", + names, + }; +}; + /** Decode a KaTeX char literal or single-character string. */ const decodeChar = (raw: string): string | undefined => { if (/^\\u[0-9a-fA-F]{4}$/.test(raw)) { @@ -117,27 +146,6 @@ const generate = async (): Promise => { accentMap[cmd] = chr; } - let blockIdx = 0; - let nextBlockIdx = opSrc.indexOf("defineFunction({", blockIdx); - while (nextBlockIdx !== -1) { - blockIdx = nextBlockIdx; - const blockEnd = opSrc.indexOf("});", blockIdx); - const block = opSrc.slice(blockIdx, blockEnd); - if (block.includes("symbol: false") && !block.includes("symbol: true")) { - const namesMatch = block.match(/names:\s*\[([\s\S]*?)\]/); - if (namesMatch) { - for (const nameMatch of namesMatch[1].matchAll(/"\\+([^"]+)"/g)) { - fnSet.add(nameMatch[1]); - } - } - } - blockIdx = blockEnd; - nextBlockIdx = opSrc.indexOf("defineFunction({", blockIdx); - } - for (const m of macrosSrc.matchAll(/defineMacro\("\\\\(liminf|limsup)",/g)) { - fnSet.add(m[1]); - } - for (const m of macrosSrc.matchAll( /defineMacro\("\\\\([^"]+)",\s*"\\html@mathml\{[^}]+\}\{[^}]*\\char[`'"]((?:\\u[0-9a-fA-F]{4}|[^`'"]+))/g, )) { @@ -184,6 +192,67 @@ const generate = async (): Promise => { ...overrideMap, }; + const fnSet = new Set(); + const limitsTextSet = new Set(); + const naryOps: Record = {}; + const integralOps: Record = {}; + const excluded: Record = { ...EXCLUDED_OPS }; + + /** Resolve a command name to its n-ary accent character via lookupMap. */ + const resolveAccent = (cmd: string): string | undefined => lookupMap[cmd]; + + let blockIdx = 0; + let nextBlockIdx = opSrc.indexOf("defineFunction({", blockIdx); + while (nextBlockIdx !== -1) { + blockIdx = nextBlockIdx; + const blockEnd = opSrc.indexOf("});", blockIdx); + const block = opSrc.slice(blockIdx, blockEnd); + const parsed = parseOpBlock(block); + if (parsed) { + const { limits, symbol, names } = parsed; + for (const name of names) { + if (EXCLUDED_OPS[name]) continue; + + if (limits && symbol) { + const accent = resolveAccent(name); + if (accent) { + naryOps[name] = { accent }; + } else { + excluded[name] = "no resolvable accent in KATEX_SYMBOLS"; + } + } else if (!limits && symbol) { + const accent = resolveAccent(name); + if (accent) { + integralOps[name] = { accent, limitLocationVal: "subSup" }; + } else { + excluded[name] = "no resolvable accent in KATEX_SYMBOLS"; + } + } else if (limits && !symbol) { + limitsTextSet.add(name); + } else { + fnSet.add(name); + } + } + } + blockIdx = blockEnd; + nextBlockIdx = opSrc.indexOf("defineFunction({", blockIdx); + } + + for (const m of macrosSrc.matchAll(/defineMacro\("\\\\(liminf|limsup)",/g)) { + limitsTextSet.add(m[1]); + } + + for (const name of limitsTextSet) { + fnSet.delete(name); + } + for (const name of Object.keys(naryOps)) { + fnSet.delete(name); + } + for (const name of Object.keys(integralOps)) { + fnSet.delete(name); + } + fnSet.delete("mathop"); + const lookupLines = Object.entries(lookupMap) .sort(([a], [b]) => a.localeCompare(b)) .map(([k, v]) => ` ${JSON.stringify(k)}: ${JSON.stringify(v)},`) @@ -191,6 +260,16 @@ const generate = async (): Promise => { const sourceNote = `KaTeX v${KATEX_VERSION} — regenerate via \`${REGENERATE_CMD}\` (fetches from ${KATEX_BASE}).`; const functions = [...fnSet].sort(); + const formatNAryOps = (ops: Record): string => + Object.entries(ops) + .sort(([a], [b]) => a.localeCompare(b)) + .map(([k, v]) => { + const loc = v.limitLocationVal + ? `, limitLocationVal: ${JSON.stringify(v.limitLocationVal)}` + : ""; + return ` ${JSON.stringify(k)}: { accent: ${JSON.stringify(v.accent)}${loc} },`; + }) + .join("\n"); writeFileSync( join(ROOT, "src/katexData.ts"), @@ -204,6 +283,18 @@ const generate = async (): Promise => { ``, `export const KATEX_FUNCTIONS = new Set(${JSON.stringify(functions)});`, ``, + `export type KatexNAryOp = { accent: string; limitLocationVal?: "subSup" };`, + ``, + `export const KATEX_NARY_OPS: Record = {`, + formatNAryOps(naryOps), + `};`, + ``, + `export const KATEX_INTEGRAL_OPS: Record = {`, + formatNAryOps(integralOps), + `};`, + ``, + `export const KATEX_LIMITS_TEXT_OPS = new Set(${JSON.stringify([...limitsTextSet].sort())});`, + ``, ].join("\n"), ); @@ -213,6 +304,17 @@ const generate = async (): Promise => { console.log(` overrides: ${Object.keys(overrideMap).length}`); console.log(`KATEX_ACCENTS: ${Object.keys(accentMap).length}`); console.log(`KATEX_FUNCTIONS: ${fnSet.size}`); + console.log(`KATEX_NARY_OPS: ${Object.keys(naryOps).length}`); + console.log(`KATEX_INTEGRAL_OPS: ${Object.keys(integralOps).length}`); + console.log(`KATEX_LIMITS_TEXT_OPS: ${limitsTextSet.size}`); + if (Object.keys(excluded).length > 0) { + console.log("Excluded ops:"); + for (const [cmd, reason] of Object.entries(excluded).sort(([a], [b]) => + a.localeCompare(b), + )) { + console.log(` ${cmd}: ${reason}`); + } + } }; generate().catch((error) => { diff --git a/lib/src/index.ts b/lib/src/index.ts index 73b1b70..8336137 100644 --- a/lib/src/index.ts +++ b/lib/src/index.ts @@ -4,7 +4,17 @@ import type * as latex from "@unified-latex/unified-latex-types"; // skipcq: JS-C1003 import type * as DOCX from "docx"; import { parseMath } from "latex-math"; -import { KATEX_ACCENTS, KATEX_FUNCTIONS, KATEX_SYMBOLS } from "./katexData"; +import { + KATEX_ACCENTS, + KATEX_FUNCTIONS, + KATEX_INTEGRAL_OPS, + KATEX_LIMITS_TEXT_OPS, + KATEX_NARY_OPS, + KATEX_SYMBOLS, + type KatexNAryOp, +} from "./katexData"; + +type DocxApi = typeof DOCX; /** * Checks if the argument has curly brackets. @@ -14,9 +24,88 @@ const hasCurlyBrackets = ( ): arg is latex.Argument => Boolean(arg && arg.openMark === "{" && arg.closeMark === "}"); -/** convert to MathRun */ -const mapString = (docx: typeof DOCX, s: string): DOCX.MathRun => - new docx.MathRun(s); +/** Pending n-ary operator awaiting limits and/or integrand body. */ +type PendingNAry = { + kind: "nary"; + accent: string; + limitLocationVal?: string; + sub: DOCX.MathRun[]; + sup: DOCX.MathRun[]; + body: MathComponent[]; +}; + +/** Pending accent awaiting its base token. */ +type PendingAccent = { + kind: "accent"; + accentChar: string; +}; + +/** Pending limits-text operator awaiting a lower limit via subscript. */ +type PendingLimitsTextOp = { + kind: "limitsText"; + name: string; +}; + +/** Partial script node for chained sub/superscript attachment. */ +type PendingScript = + | { + kind: "script"; + variant: "sub"; + base: DOCX.MathRun; + sub: DOCX.MathRun[]; + } + | { + kind: "script"; + variant: "sup"; + base: DOCX.MathRun; + sup: DOCX.MathRun[]; + } + | { + kind: "script"; + variant: "both"; + base: DOCX.MathRun; + sub: DOCX.MathRun[]; + sup: DOCX.MathRun[]; + }; + +type PendingMarker = + | PendingNAry + | PendingAccent + | PendingLimitsTextOp + | PendingScript; + +type BinomState = + | { phase: "idle" } + | { phase: "needFirst" } + | { phase: "needSecond"; numerator: DOCX.MathRun[] }; + +/** Internal mapping state: OMML runs plus binomial context. */ +type MapContext = { + runs: MathComponent[]; + binom: BinomState; +}; + +type MathComponent = DOCX.MathRun | PendingMarker; + +type MapNodeResult = + | { type: "continue"; components: MathComponent[] } + | { type: "break" }; + +type NAryBuild = { + accent: string; + limitLocationVal?: string; + children: DOCX.MathRun[]; + subScript: DOCX.MathRun[]; + superScript: DOCX.MathRun[]; +}; + +/** Cast custom OMML XmlComponents to MathRun for docx library interop. */ +const asMathRun = (component: DOCX.XmlComponent): DOCX.MathRun => + component as unknown as DOCX.MathRun; + +/** Build an OMML math run with plain text content. */ +const makeMathRun = (docx: DocxApi, text: string): DOCX.MathRun => + new docx.MathRun(text); const PLUGIN_ID = "@m2d/math"; @@ -31,160 +120,393 @@ const logSkippedEmptyMath = (latex: string, scope: "inline" | "block") => { const resolveLatexSymbol = (name: string): string | undefined => KATEX_SYMBOLS[name]; -type NAryOptions = { - accent: string; - limitLocationVal?: string; - children?: DOCX.MathRun[]; - subScript?: DOCX.MathRun[]; - superScript?: DOCX.MathRun[]; +const isMathRun = (node: MathComponent): node is DOCX.MathRun => + !("kind" in node); + +const isPendingNAry = (node: MathComponent | undefined): node is PendingNAry => + Boolean(node && "kind" in node && node.kind === "nary"); + +const isPendingAccent = ( + node: MathComponent | undefined, +): node is PendingAccent => + Boolean(node && "kind" in node && node.kind === "accent"); + +const isPendingLimitsTextOp = ( + node: MathComponent | undefined, +): node is PendingLimitsTextOp => + Boolean(node && "kind" in node && node.kind === "limitsText"); + +const isPendingScript = ( + node: MathComponent | undefined, +): node is PendingScript => + Boolean(node && "kind" in node && node.kind === "script"); + +/** OMML accent chars must be combining marks (U+0300–U+036F, U+20D0–U+20EF). */ +const OMML_ACCENT_CHARS: Record = { + hat: "\u0302", + widehat: "\u0302", + tilde: "\u0303", + widetilde: "\u0303", + bar: "\u0304", + overline: "\u0305", + dot: "\u0307", + ddot: "\u0308", + vec: "\u20D7", + acute: "\u0301", + grave: "\u0300", + breve: "\u0306", + check: "\u030C", + mathring: "\u030A", }; -type PendingNAry = DOCX.MathRun & { - isNAry: 1; - naryAccent: string; - naryLimitLoc?: string; - sub?: DOCX.MathRun[]; - sup?: DOCX.MathRun[]; +/** Map KaTeX accent glyphs to OMML combining marks. */ +const KATEX_GLYPH_TO_OMML: Record = { + ˆ: "\u0302", + "^": "\u0302", + "˜": "\u0303", + "~": "\u0303", + ˉ: "\u0304", + "¯": "\u0305", + "˙": "\u0307", + "¨": "\u0308", + ˊ: "\u0301", + ˋ: "\u0300", + "⃗": "\u20D7", + "˘": "\u0306", + ˇ: "\u030C", + "˚": "\u030A", }; -const NARY_OPERATORS: Record< - string, - { accent: string; limitLocationVal?: string } -> = { - sum: { accent: "∑" }, - prod: { accent: "∏" }, - int: { accent: "∫", limitLocationVal: "subSup" }, - iint: { accent: "∬", limitLocationVal: "subSup" }, - iiint: { accent: "∭", limitLocationVal: "subSup" }, - oint: { accent: "∮", limitLocationVal: "subSup" }, - oiint: { accent: "∯", limitLocationVal: "subSup" }, - oiiint: { accent: "∰", limitLocationVal: "subSup" }, - bigcup: { accent: "⋃" }, - bigcap: { accent: "⋂" }, - bigoplus: { accent: "⊕" }, - bigotimes: { accent: "⊗" }, +/** Resolve accent character for a LaTeX accent command name. */ +const resolveAccentChar = (name: string): string | undefined => { + const omml = OMML_ACCENT_CHARS[name]; + if (omml) return omml; + const katexGlyph = KATEX_ACCENTS[name]; + if (katexGlyph) return KATEX_GLYPH_TO_OMML[katexGlyph]; + return undefined; }; -/** Whether a MathRun is a pending n-ary operator awaiting limits or body. */ -const isPendingNAry = (node: DOCX.MathRun | undefined): node is PendingNAry => - Boolean(node && (node as PendingNAry).isNAry); +const resolveNAryOp = (name: string): KatexNAryOp | undefined => + KATEX_INTEGRAL_OPS[name] ?? KATEX_NARY_OPS[name]; + +/** True when a macro name maps to an OMML accent combining mark. */ +const isAccentCommand = (name: string): boolean => + resolveAccentChar(name) !== undefined; + +/** String nodes may contain unparsed scripts when nested inside braced groups. */ +const UNPARSED_MATH_IN_STRING = /[\^_]|\\[a-zA-Z]/; + +const mapStringNode = (docx: DocxApi, content: string): DOCX.MathRun[] => + UNPARSED_MATH_IN_STRING.test(content) + ? mapGroup(docx, parseMath(content)) + : [makeMathRun(docx, content)]; /** Build an OMML n-ary operator element. */ -const buildNAry = (docx: typeof DOCX, options: NAryOptions): DOCX.MathRun => { - /** OMML wrapper for n-ary operators such as sum and integral. */ +const buildNAry = (docx: DocxApi, options: NAryBuild): DOCX.MathRun => { class MathNAry extends docx.XmlComponent { constructor() { super("m:nary"); + // OOXML requires m:sub, m:sup, and m:e in fixed order; all three must + // always be present. Always report both limits so docx does not emit + // subHide/supHide (which it orders incorrectly in naryPr). this.root.push( docx.createMathNAryProperties({ accent: options.accent, - hasSuperScript: Boolean(options.superScript), - hasSubScript: Boolean(options.subScript), + hasSuperScript: true, + hasSubScript: true, limitLocationVal: options.limitLocationVal, }), ); - if (options.subScript) { - this.root.push( - docx.createMathSubScriptElement({ children: options.subScript }), - ); - } - if (options.superScript) { - this.root.push( - docx.createMathSuperScriptElement({ children: options.superScript }), - ); - } - this.root.push(docx.createMathBase({ children: options.children ?? [] })); + this.root.push( + docx.createMathSubScriptElement({ + children: options.subScript, + }), + ); + this.root.push( + docx.createMathSuperScriptElement({ + children: options.superScript, + }), + ); + this.root.push(docx.createMathBase({ children: options.children })); } } - return new MathNAry() as unknown as DOCX.MathRun; + return asMathRun(new MathNAry()); +}; + +/** Build an OMML accent element (m:acc) wrapping base content. */ +const buildMathAccent = ( + docx: DocxApi, + accent: string, + children: DOCX.MathRun[], +): DOCX.MathRun => { + class MathAccent extends docx.XmlComponent { + constructor() { + super("m:acc"); + this.root.push( + new docx.BuilderElement({ + name: "m:accPr", + children: [docx.createMathAccentCharacter({ accent })], + }), + ); + this.root.push(docx.createMathBase({ children })); + } + } + return asMathRun(new MathAccent()); +}; + +/** Resolve accent base content from a macro's first braced argument. */ +const accentChildrenFromArgs = ( + docx: DocxApi, + args: latex.Argument[] | undefined, +): DOCX.MathRun[] => + hasCurlyBrackets(args?.[0]) ? mapGroup(docx, args[0].content) : []; + +/** Build an accent node, deferring base content when the parser omits braced args. */ +const mapAccentMacro = ( + docx: DocxApi, + name: string, + args: latex.Argument[] | undefined, +): MathComponent => { + const accentChar = resolveAccentChar(name); + if (!accentChar) { + return makeMathRun(docx, name); + } + const children = accentChildrenFromArgs(docx, args); + return children.length + ? buildMathAccent(docx, accentChar, children) + : { kind: "accent", accentChar }; }; /** Create an n-ary operator placeholder that accepts limits and a body later. */ const createPendingNAry = ( - docx: typeof DOCX, accent: string, limitLocationVal?: string, +): PendingNAry => ({ + kind: "nary", + accent, + limitLocationVal, + sub: [], + sup: [], + body: [], +}); + +/** Characters that end an n-ary integrand (e.g. `\int ... dx =`). */ +const terminatesNAryBody = (content: string): boolean => + content === "=" || content === "," || content === ";"; + +const finalizeBodyRuns = ( + docx: DocxApi, + body: MathComponent[], +): DOCX.MathRun[] => + body.map((component) => + isMathRun(component) ? component : finalizeComponent(docx, component), + ); + +const finalizeTrailingPendingScriptInBody = ( + docx: DocxApi, + prev: PendingNAry, ): PendingNAry => { - const node = buildNAry(docx, { - accent, - limitLocationVal, - children: [], - }) as PendingNAry; - node.isNAry = 1; - node.naryAccent = accent; - node.naryLimitLoc = limitLocationVal; - return node; + const body = [...prev.body]; + const last = body[body.length - 1]; + if (last && isPendingScript(last)) { + body[body.length - 1] = finalizeScript(docx, last); + } + return { ...prev, body }; }; -/** Attach sub/superscript limits to a pending n-ary operator. */ -const attachNAryLimits = ( - docx: typeof DOCX, +const appendToNAryBody = ( + docx: DocxApi, prev: PendingNAry, - limits: { subScript?: DOCX.MathRun[]; superScript?: DOCX.MathRun[] }, + items: MathComponent[], ): PendingNAry => { - const sub = limits.subScript ?? prev.sub; - const sup = limits.superScript ?? prev.sup; - const node = buildNAry(docx, { - accent: prev.naryAccent, - limitLocationVal: prev.naryLimitLoc, - children: [], - subScript: sub, - superScript: sup, - }) as PendingNAry; - node.isNAry = 1; - node.naryAccent = prev.naryAccent; - node.naryLimitLoc = prev.naryLimitLoc; - node.sub = sub; - node.sup = sup; - return node; + const nary = finalizeTrailingPendingScriptInBody(docx, prev); + const body = [...nary.body]; + const lastBody = body[body.length - 1]; + const mathRuns = items.filter(isMathRun); + + if (isPendingAccent(lastBody) && mathRuns.length === items.length) { + body.pop(); + body.push(buildMathAccent(docx, lastBody.accentChar, mathRuns)); + return { ...nary, body }; + } + + return { ...nary, body: [...body, ...items] }; }; +const applyScriptToNAryBody = ( + docx: DocxApi, + prev: PendingNAry, + variant: "sub" | "sup", + script: DOCX.MathRun[], +): PendingNAry => { + const body = [...prev.body]; + const last = body.pop(); + if (!last) return prev; + + let updated: MathComponent; + if (isPendingScript(last)) { + if (variant === "sup") { + updated = + last.variant === "sub" + ? finalizeScript(docx, { + kind: "script", + variant: "both", + base: last.base, + sub: last.sub, + sup: script, + }) + : finalizeScript(docx, last); + } else { + updated = + last.variant === "sup" + ? finalizeScript(docx, { + kind: "script", + variant: "both", + base: last.base, + sub: script, + sup: last.sup, + }) + : finalizeScript(docx, last); + } + } else if (isMathRun(last)) { + updated = + variant === "sup" + ? { kind: "script", variant: "sup", base: last, sup: script } + : { kind: "script", variant: "sub", base: last, sub: script }; + } else { + body.push(last); + return prev; + } + + body.push(updated); + return { ...prev, body }; +}; + +const finalizePendingNAry = (docx: DocxApi, prev: PendingNAry): DOCX.MathRun => + finalizeNAry(docx, prev, finalizeBodyRuns(docx, prev.body)); + +const attachNArySub = ( + prev: PendingNAry, + subScript: DOCX.MathRun[], +): PendingNAry => ({ ...prev, sub: subScript }); + +const attachNArySup = ( + prev: PendingNAry, + superScript: DOCX.MathRun[], +): PendingNAry => ({ ...prev, sup: superScript }); + const finalizeNAry = ( - docx: typeof DOCX, + docx: DocxApi, prev: PendingNAry, children: DOCX.MathRun[], ): DOCX.MathRun => buildNAry(docx, { - accent: prev.naryAccent, - limitLocationVal: prev.naryLimitLoc, + accent: prev.accent, + limitLocationVal: prev.limitLocationVal, children, subScript: prev.sub, superScript: prev.sup, }); +const isScriptMacro = (node: latex.Node): boolean => + node.type === "macro" && (node.content === "_" || node.content === "^"); + +/** Finalize a trailing script marker before processing the next non-script node. */ +const finalizeTrailingPendingScript = ( + docx: DocxApi, + ctx: MapContext, +): void => { + const last = ctx.runs[ctx.runs.length - 1]; + if (isPendingScript(last)) { + ctx.runs[ctx.runs.length - 1] = finalizeScript(docx, last); + } +}; + +/** Convert unfinalized internal markers to OMML for output. */ +const finalizeComponent = ( + docx: DocxApi, + component: MathComponent, +): DOCX.MathRun => { + if (isMathRun(component)) return component; + switch (component.kind) { + case "nary": + return finalizePendingNAry(docx, component); + case "accent": + return buildMathAccent(docx, component.accentChar, []); + case "limitsText": + return makeMathRun(docx, component.name); + case "script": + return finalizeScript(docx, component); + } +}; + +const finalizeScript = ( + docx: DocxApi, + pending: PendingScript, +): DOCX.MathRun => { + switch (pending.variant) { + case "both": + return new docx.MathSubSuperScript({ + subScript: pending.sub, + superScript: pending.sup, + children: [pending.base], + }); + case "sub": + return new docx.MathSubScript({ + children: [pending.base], + subScript: pending.sub, + }); + case "sup": + return new docx.MathSuperScript({ + children: [pending.base], + superScript: pending.sup, + }); + } +}; + +const createMapContext = (): MapContext => ({ + runs: [], + binom: { phase: "idle" }, +}); + /** convert group to Math */ -const mapGroup = (docx: typeof DOCX, nodes: latex.Node[]): DOCX.MathRun[] => { - const group: DOCX.MathRun[] = []; +const mapGroup = (docx: DocxApi, nodes: latex.Node[]): DOCX.MathRun[] => { + const groupCtx = createMapContext(); for (const c of nodes) { - // skipcq: JS-0357 - group.push(...(mapNode(docx, c, group) || [])); + const result = mapNode(docx, c, groupCtx); + if (result.type === "continue") { + groupCtx.runs.push(...result.components); + } } - return group; + return groupCtx.runs.map((c) => + isMathRun(c) ? c : finalizeComponent(docx, c), + ); }; /** Handle Macros */ // skipcq: JS-R1005 const mapMacro = ( - docx: typeof DOCX, + docx: DocxApi, node: latex.Macro, - runs: DOCX.MathRun[] & { binomPending?: 0 | 1; binomFirst?: DOCX.MathRun[] }, -): DOCX.MathRun[] | DOCX.MathRun | null => { - let returnVal: DOCX.MathRun[] | DOCX.MathRun | null = null; + ctx: MapContext, +): MathComponent[] | MathComponent | null => { + let returnVal: MathComponent[] | MathComponent | null = null; + const { runs } = ctx; switch (node.content) { case "newline": - returnVal = mapString(docx, " "); + returnVal = makeMathRun(docx, " "); break; case "\\": - // line break return null; case "textcolor": { const args = node.args ?? []; - // const _color = (hasCurlyBrackets(args[1]) && args[1]?.content?.[0]?.content) || ""; if (hasCurlyBrackets(args[2])) { returnVal = mapGroup(docx, args[2].content); } break; } + case "color": + return []; case "text": { const args = node.args ?? []; if (hasCurlyBrackets(args[0])) { @@ -197,76 +519,66 @@ const mapMacro = ( if (!prev) break; const superScript = mapGroup(docx, node.args?.[0]?.content ?? []); if (isPendingNAry(prev)) { - return attachNAryLimits(docx, prev, { superScript }); - // @ts-expect-error -- attaching extra field - } else if (prev.sub) { - return new docx.MathSubSuperScript({ - // @ts-expect-error -- attaching extra field - subScript: prev.sub, - superScript, - // @ts-expect-error -- attaching extra field - children: [prev.prev], - }); + if (prev.body.length === 0) { + return attachNArySup(prev, superScript); + } + return applyScriptToNAryBody(docx, prev, "sup", superScript); } - const docxNode = new docx.MathSuperScript({ - children: [prev], - superScript, - }); - // @ts-expect-error -- attaching extra field - docxNode.sup = superScript; - // @ts-expect-error -- attaching extra field - docxNode.prev = prev; - return docxNode; + if (isPendingScript(prev)) { + if (prev.variant === "sub") { + return finalizeScript(docx, { + kind: "script", + variant: "both", + base: prev.base, + sub: prev.sub, + sup: superScript, + }); + } + return finalizeScript(docx, prev); + } + if (!isMathRun(prev)) break; + return { + kind: "script", + variant: "sup", + base: prev, + sup: superScript, + }; } case "_": { const prev = runs.pop(); if (!prev) break; const subScript = mapGroup(docx, node.args?.[0]?.content ?? []); - if (isPendingNAry(prev)) { - return attachNAryLimits(docx, prev, { subScript }); - // @ts-expect-error -- attaching extra field - } else if (prev.sup) { - return new docx.MathSubSuperScript({ - subScript, - // @ts-expect-error -- attaching extra field - superScript: prev.sup, - // @ts-expect-error -- attaching extra field - children: [prev.prev], + if (isPendingLimitsTextOp(prev)) { + return new docx.MathLimitLower({ + children: [makeMathRun(docx, prev.name)], + limit: subScript, }); } - const docxNode = new docx.MathSubScript({ - children: [prev], - subScript, - }); - // @ts-expect-error -- attaching extra field - docxNode.sub = subScript; - // @ts-expect-error -- attaching extra field - docxNode.prev = prev; - return docxNode; - } - case "hat": - case "widehat": - returnVal = docx.createMathAccentCharacter({ - accent: KATEX_ACCENTS[node.content] ?? "^", - }); - break; - case "sum": - case "prod": - case "int": - case "iint": - case "iiint": - case "oint": - case "oiint": - case "oiiint": - case "bigcup": - case "bigcap": - case "bigoplus": - case "bigotimes": { - const nary = NARY_OPERATORS[node.content]; - if (nary) { - returnVal = createPendingNAry(docx, nary.accent, nary.limitLocationVal); + if (isPendingNAry(prev)) { + if (prev.body.length === 0) { + return attachNArySub(prev, subScript); + } + return applyScriptToNAryBody(docx, prev, "sub", subScript); } - break; + if (isPendingScript(prev)) { + if (prev.variant === "sup") { + return finalizeScript(docx, { + kind: "script", + variant: "both", + base: prev.base, + sub: subScript, + sup: prev.sup, + }); + } + return finalizeScript(docx, prev); + } + if (!isMathRun(prev)) break; + return { + kind: "script", + variant: "sub", + base: prev, + sub: subScript, + }; } case "frac": case "tfrac": @@ -291,18 +603,15 @@ const mapMacro = ( hasCurlyBrackets(args[0]) && hasCurlyBrackets(args[1]) ) { - returnVal = [ - docx.createMathLimitLocation({ value: "undOvr" }), - new docx.MathLimitUpper({ - children: mapGroup(docx, args[1].content), - limit: mapGroup(docx, args[0].content), - }), - ]; + returnVal = new docx.MathLimitUpper({ + children: mapGroup(docx, args[1].content), + limit: mapGroup(docx, args[0].content), + }); } break; } case "binom": - runs.binomPending = 0; + ctx.binom = { phase: "needFirst" }; return []; case "sqrt": { const args = node.args ?? []; @@ -312,7 +621,7 @@ const mapMacro = ( }); } else if (args.length === 2) { returnVal = new docx.MathRadical( - args[0].content + args[0].content?.length ? { children: mapGroup(docx, args[1].content), degree: mapGroup(docx, args[0].content), @@ -324,17 +633,22 @@ const mapMacro = ( } case "left": case "right": - case "vec": case "boxed": case "boldsymbol": return []; case "mathbf": return mapGroup(docx, node.args?.[0]?.content ?? []); - default: - if (node.content === "overline" || node.content === "widetilde") { - returnVal = docx.createMathAccentCharacter({ - accent: node.content === "overline" ? "¯" : "~", - }); + default: { + const naryOp = resolveNAryOp(node.content); + if (naryOp) { + const pending = runs[runs.length - 1]; + if (isPendingNAry(pending)) { + runs.pop(); + runs.push(finalizePendingNAry(docx, pending)); + } + returnVal = createPendingNAry(naryOp.accent, naryOp.limitLocationVal); + } else if (KATEX_LIMITS_TEXT_OPS.has(node.content)) { + returnVal = { kind: "limitsText", name: node.content }; } else if ( node.content === "mathrm" || node.content === "mathit" || @@ -348,49 +662,45 @@ const mapMacro = ( if (hasCurlyBrackets(args[0])) { returnVal = mapGroup(docx, args[0].content); } - } else if (KATEX_ACCENTS[node.content]) { - returnVal = docx.createMathAccentCharacter({ - accent: KATEX_ACCENTS[node.content], - }); + } else if (isAccentCommand(node.content)) { + returnVal = mapAccentMacro(docx, node.content, node.args); } else if (KATEX_FUNCTIONS.has(node.content)) { - returnVal = mapString(docx, node.content); + returnVal = makeMathRun(docx, node.content); } else { - returnVal = mapString( + returnVal = makeMathRun( docx, resolveLatexSymbol(node.content) ?? node.content, ); } + } } - if (isPendingNAry(runs[runs.length - 1]) && returnVal) { - const prev = runs.pop() as PendingNAry; - return [ - finalizeNAry( - docx, - prev, - Array.isArray(returnVal) ? returnVal : [returnVal], - ), - ]; + const last = runs[runs.length - 1]; + if (isPendingNAry(last) && returnVal) { + runs.pop(); + const items = Array.isArray(returnVal) ? returnVal : [returnVal]; + return appendToNAryBody(docx, last, items); } return returnVal; }; -/** Process node */ -const mapNode = ( - docx: typeof DOCX, - node: latex.Node, - runs: DOCX.MathRun[] & { binomPending?: 0 | 1; binomFirst?: DOCX.MathRun[] }, -): DOCX.MathRun[] | false => { - if (node.type === "group" && runs.binomPending !== undefined) { - const content = mapGroup(docx, node.content); - if (runs.binomPending === 0) { - runs.binomFirst = content; - runs.binomPending = 1; - return []; - } - delete runs.binomPending; - const numerator = runs.binomFirst ?? []; - delete runs.binomFirst; - return [ +const handleBinomialGroup = ( + docx: DocxApi, + node: latex.Group, + ctx: MapContext, +): MapNodeResult | null => { + if (ctx.binom.phase === "idle") return null; + + const content = mapGroup(docx, node.content); + if (ctx.binom.phase === "needFirst") { + ctx.binom = { phase: "needSecond", numerator: content }; + return { type: "continue", components: [] }; + } + + const { numerator } = ctx.binom; + ctx.binom = { phase: "idle" }; + return { + type: "continue", + components: [ new docx.MathRoundBrackets({ children: [ new docx.MathFraction({ @@ -399,68 +709,126 @@ const mapNode = ( }), ], }), - ]; + ], + }; +}; + +/** Process node */ +const mapNode = ( + docx: DocxApi, + node: latex.Node, + ctx: MapContext, +): MapNodeResult => { + if (!isScriptMacro(node)) { + finalizeTrailingPendingScript(docx, ctx); + } + + if (node.type === "group") { + const binomial = handleBinomialGroup(docx, node, ctx); + if (binomial) return binomial; } - let docxNodes: DOCX.MathRun[] = []; + let docxNodes: MathComponent[] = []; switch (node.type) { case "string": - docxNodes = [mapString(docx, node.content)]; + docxNodes = mapStringNode(docx, node.content); break; case "whitespace": - docxNodes = [mapString(docx, " ")]; + if (isPendingNAry(ctx.runs[ctx.runs.length - 1])) { + return { type: "continue", components: [] }; + } + docxNodes = [makeMathRun(docx, " ")]; break; case "macro": { - const run = mapMacro(docx, node, runs); + const run = mapMacro(docx, node, ctx); if (!run) { - // line break - return false; - } else { - docxNodes = Array.isArray(run) ? run : [run]; + return { type: "break" }; } + docxNodes = Array.isArray(run) ? run : [run]; break; } case "group": docxNodes = mapGroup(docx, node.content); break; case "environment": - // NOT SUPPORTED BY DOCX library break; default: + break; } - if (node.type !== "macro" && isPendingNAry(runs[runs.length - 1])) { - const prev = runs.pop() as PendingNAry; - return [finalizeNAry(docx, prev, docxNodes)]; + const last = ctx.runs[ctx.runs.length - 1]; + if ( + node.type === "string" && + isPendingNAry(last) && + terminatesNAryBody(node.content) + ) { + ctx.runs.pop(); + return { + type: "continue", + components: [ + finalizePendingNAry(docx, last), + ...mapStringNode(docx, node.content), + ], + }; } - return docxNodes; + if ( + node.type !== "macro" && + node.type !== "whitespace" && + isPendingNAry(last) + ) { + ctx.runs.pop(); + return { + type: "continue", + components: [appendToNAryBody(docx, last, docxNodes)], + }; + } + + const pendingAccent = ctx.runs[ctx.runs.length - 1]; + if ( + !isScriptMacro(node) && + node.type !== "whitespace" && + isPendingAccent(pendingAccent) + ) { + ctx.runs.pop(); + return { + type: "continue", + components: [ + buildMathAccent( + docx, + pendingAccent.accentChar, + docxNodes.filter(isMathRun), + ), + ], + }; + } + + return { type: "continue", components: docxNodes }; }; /** Parse latex and convert to DOCX MathRun nodes */ -export const parseLatex = ( - docx: typeof DOCX, - value: string, -): DOCX.MathRun[][] => { +export const parseLatex = (docx: DocxApi, value: string): DOCX.MathRun[][] => { const latexNodes = parseMath(value); - const paragraphs: DOCX.MathRun[][] = [[]]; - let runs: DOCX.MathRun[] & { - binomPending?: 0 | 1; - binomFirst?: DOCX.MathRun[]; - } = paragraphs[0]; + const paragraphs: MathComponent[][] = [[]]; + let ctx: MapContext = { runs: paragraphs[0], binom: { phase: "idle" } }; for (const node of latexNodes) { - const res = mapNode(docx, node, runs); - if (!res) { - // line break - runs = []; + const result = mapNode(docx, node, ctx); + if (result.type === "break") { + const runs: MathComponent[] = []; paragraphs.push(runs); + ctx = { runs, binom: { phase: "idle" } }; } else { - runs.push(...res); + ctx.runs.push(...result.components); } } - return paragraphs; + + return paragraphs.map((paragraph) => + paragraph.map((component) => + isMathRun(component) ? component : finalizeComponent(docx, component), + ), + ); }; /** @@ -473,7 +841,7 @@ export const mathPlugin: () => IPlugin<{ return { inline: (docx, node) => { if (node.type !== "inlineMath" && node.type !== "math") return []; - (node as unknown as EmptyNode)._type = node.type; + (node as EmptyNode)._type = node.type; node.type = ""; const latex = node.value ?? ""; const children = parseLatex(docx, latex).flat(); diff --git a/lib/src/katexData.ts b/lib/src/katexData.ts index 16fc96e..c85c44d 100644 --- a/lib/src/katexData.ts +++ b/lib/src/katexData.ts @@ -670,7 +670,6 @@ export const KATEX_ACCENTS = { } as Record; export const KATEX_FUNCTIONS = new Set([ - "Pr", "arccos", "arcctg", "arcsin", @@ -688,29 +687,60 @@ export const KATEX_FUNCTIONS = new Set([ "ctg", "cth", "deg", - "det", "dim", "exp", - "gcd", "hom", - "inf", "ker", "lg", - "lim", - "liminf", - "limsup", "ln", "log", - "mathop", - "max", - "min", "sec", "sh", "sin", "sinh", - "sup", "tan", "tanh", "tg", "th", ]); + +export type KatexNAryOp = { accent: string; limitLocationVal?: "subSup" }; + +export const KATEX_NARY_OPS: Record = { + bigcap: { accent: "⋂" }, + bigcup: { accent: "⋃" }, + bigodot: { accent: "⨀" }, + bigoplus: { accent: "⨁" }, + bigotimes: { accent: "⨂" }, + bigsqcup: { accent: "⨆" }, + biguplus: { accent: "⨄" }, + bigvee: { accent: "⋁" }, + bigwedge: { accent: "⋀" }, + coprod: { accent: "∐" }, + intop: { accent: "∫" }, + prod: { accent: "∏" }, + smallint: { accent: "∫" }, + sum: { accent: "∑" }, +}; + +export const KATEX_INTEGRAL_OPS: Record = { + iiint: { accent: "∭", limitLocationVal: "subSup" }, + iint: { accent: "∬", limitLocationVal: "subSup" }, + int: { accent: "∫", limitLocationVal: "subSup" }, + oiiint: { accent: "∰", limitLocationVal: "subSup" }, + oiint: { accent: "∯", limitLocationVal: "subSup" }, + oint: { accent: "∮", limitLocationVal: "subSup" }, +}; + +export const KATEX_LIMITS_TEXT_OPS = new Set([ + "Pr", + "det", + "gcd", + "inf", + "lim", + "liminf", + "limsup", + "max", + "min", + "sup", +]); diff --git a/lib/tsconfig-build.json b/lib/tsconfig-build.json index eb7efb1..3c6224a 100644 --- a/lib/tsconfig-build.json +++ b/lib/tsconfig-build.json @@ -7,6 +7,6 @@ "emitDeclarationOnly": true, "declarationMap": false }, - "include": ["src"], + "include": ["src/index.ts"], "exclude": ["dist", "node_modules", "**/*.test.*", "**/*.spec.*"] } diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 653339d..3d8e70b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -137,6 +137,9 @@ importers: '@vitest/coverage-v8': specifier: ^4.1.9 version: 4.1.9(vitest@4.1.9) + '@xarsh/ooxml-validator': + specifier: ^0.3.0 + version: 0.3.0 docx: specifier: ^9.7.1 version: 9.7.1 @@ -1621,6 +1624,41 @@ packages: '@vitest/utils@4.1.9': resolution: {integrity: sha512-A51o8ymO5PpqlWNnBP9ZHPXDIpuMtTLlGSjN7la4US+LJzoUMyhwjA5QXlm39JexgwHKW4Xjs8Z2d3dLCXOeuA==} + '@xarsh/ooxml-validator-darwin-arm64@0.3.0': + resolution: {integrity: sha512-hgxBf6YzMJCvgEKJN3wVmd4KhIkAx0eCl3ODMoi8a9m8o6N7xE0UKHJFjAku0WhAdbVrZN4w3K5oUVLCC6GnTA==} + cpu: [arm64] + os: [darwin] + + '@xarsh/ooxml-validator-darwin-x64@0.3.0': + resolution: {integrity: sha512-Mj0IEDx4lnDbz+cWqhbyQ3zrJTrtcOUhgU13qXgM4m1yS+SNhsojwfIw6NfPjeI6QTvh5rq9oocgewBe7W9LYw==} + cpu: [x64] + os: [darwin] + + '@xarsh/ooxml-validator-linux-arm64@0.3.0': + resolution: {integrity: sha512-5D8M0PF8J3eIrnUYNiHfOKyjiwnTkWRfcmUMrwMsqzSTBX0LmTRa1QjbIpAwTjKalkGq8nzutSO6eu/UmNothg==} + cpu: [arm64] + os: [linux] + + '@xarsh/ooxml-validator-linux-x64@0.3.0': + resolution: {integrity: sha512-GltV9YzcOwLdxhTvw9MIw1kgMbsjnpUvYYaKovDUwMcvwIOlb72Lealp4mZRugLDS1XRDqHbjoChL6Ko+awLpw==} + cpu: [x64] + os: [linux] + + '@xarsh/ooxml-validator-win32-arm64@0.3.0': + resolution: {integrity: sha512-mx7RqjopCZ3mNVlNyPar935FbMA61hOCyIA854L/3trLt4vmj62/spSfjkpgyKx6TKq5/HXTjMf+rNM/WKayaw==} + cpu: [arm64] + os: [win32] + + '@xarsh/ooxml-validator-win32-x64@0.3.0': + resolution: {integrity: sha512-VflQjYjMfq6Up3sclSiVL5p2dVz17RrubZF+QqXhoWCiOPlMlaMN07hx99J6Ngh6TPboAK6TjcuG8DlGIhcp0w==} + cpu: [x64] + os: [win32] + + '@xarsh/ooxml-validator@0.3.0': + resolution: {integrity: sha512-CaAu5dGQj8YKarMUxGMkxnG3ajNQ9IKEYQWsZ2/bZdoE85HVqmLTtfqMySMa0iLHdyRJ6cS7bhF+iMoBpuT6fw==} + engines: {node: '>=18'} + hasBin: true + acorn@8.17.0: resolution: {integrity: sha512-xRQbDb9BnwDafYNn6Vwl839DYVjqXYb1XVGtWAZ1kcDc6iwAL4hg3B1dZlRiuENFeO2H53gFG3in621AdERVAg==} engines: {node: '>=0.4.0'} @@ -4472,6 +4510,33 @@ snapshots: convert-source-map: 2.0.0 tinyrainbow: 3.1.0 + '@xarsh/ooxml-validator-darwin-arm64@0.3.0': + optional: true + + '@xarsh/ooxml-validator-darwin-x64@0.3.0': + optional: true + + '@xarsh/ooxml-validator-linux-arm64@0.3.0': + optional: true + + '@xarsh/ooxml-validator-linux-x64@0.3.0': + optional: true + + '@xarsh/ooxml-validator-win32-arm64@0.3.0': + optional: true + + '@xarsh/ooxml-validator-win32-x64@0.3.0': + optional: true + + '@xarsh/ooxml-validator@0.3.0': + optionalDependencies: + '@xarsh/ooxml-validator-darwin-arm64': 0.3.0 + '@xarsh/ooxml-validator-darwin-x64': 0.3.0 + '@xarsh/ooxml-validator-linux-arm64': 0.3.0 + '@xarsh/ooxml-validator-linux-x64': 0.3.0 + '@xarsh/ooxml-validator-win32-arm64': 0.3.0 + '@xarsh/ooxml-validator-win32-x64': 0.3.0 + acorn@8.17.0: {} ansi-colors@4.1.3: {} diff --git a/sample.md b/sample.md index 2fd828a..24693d7 100644 --- a/sample.md +++ b/sample.md @@ -143,18 +143,6 @@ $$ \text{Let } x \text{ be a real number.} $$ -### Colored Math - -You can color math expressions using the `\textcolor{color}{math}` command: - -$$ -\textcolor{red}{E=mc^2} -$$ - -$$ -\textcolor{blue}{\sum_{i=1}^n i} -$$ - ### Math Macros You can define custom macros: