From 5970ac45cb539c6abddbbd7f3eb7b019e2eb0fe5 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 20 Dec 2025 16:28:57 -0500 Subject: [PATCH 1/5] eng-1211 Refactor exportUtils, factor out utility functions and markdown. This is pure refactoring with no change of functionality. --- apps/roam/src/utils/exportUtils.ts | 153 ++++++++ apps/roam/src/utils/getExportTypes.ts | 529 +++----------------------- apps/roam/src/utils/pageToMarkdown.ts | 353 +++++++++++++++++ 3 files changed, 561 insertions(+), 474 deletions(-) create mode 100644 apps/roam/src/utils/exportUtils.ts create mode 100644 apps/roam/src/utils/pageToMarkdown.ts diff --git a/apps/roam/src/utils/exportUtils.ts b/apps/roam/src/utils/exportUtils.ts new file mode 100644 index 000000000..d9b2eedad --- /dev/null +++ b/apps/roam/src/utils/exportUtils.ts @@ -0,0 +1,153 @@ +import type { Result } from "roamjs-components/types/query-builder"; +import { PullBlock, TreeNode, ViewType } from "roamjs-components/types"; +import type { DiscourseNode } from "./getDiscourseNodes"; +import matchDiscourseNode from "./matchDiscourseNode"; + +type DiscourseExportResult = Result & { type: string }; + +export const uniqJsonArray = >(arr: T[]) => + Array.from( + new Set( + arr.map((r) => + JSON.stringify( + Object.entries(r).sort(([k], [k2]) => k.localeCompare(k2)), + ), + ), + ), + ).map((entries) => Object.fromEntries(JSON.parse(entries))) as T[]; + +export const getPageData = async ({ + results, + allNodes, + isExportDiscourseGraph, +}: { + results: Result[]; + allNodes: DiscourseNode[]; + isExportDiscourseGraph?: boolean; +}): Promise<(Result & { type: string })[]> => { + const allResults = results || []; + + if (isExportDiscourseGraph) return allResults as DiscourseExportResult[]; + + const matchedTexts = new Set(); + return allNodes.flatMap((n) => + (allResults + ? allResults.flatMap((r) => + Object.keys(r) + .filter((k) => k.endsWith(`-uid`) && k !== "text-uid") + .map((k) => ({ + ...r, + text: r[k.slice(0, -4)].toString(), + uid: r[k] as string, + })) + .concat({ + text: r.text, + uid: r.uid, + }), + ) + : ( + window.roamAlphaAPI.q( + "[:find (pull ?e [:block/uid :node/title]) :where [?e :node/title _]]", + ) as [Record][] + ).map(([{ title, uid }]) => ({ + text: title, + uid, + })) + ) + .filter(({ text }) => { + if (!text) return false; + if (matchedTexts.has(text)) return false; + const isMatch = matchDiscourseNode({ title: text, ...n }); + if (isMatch) matchedTexts.add(text); + + return isMatch; + }) + .map((node) => ({ ...node, type: n.text })), + ); +}; + +const getContentFromNodes = ({ + title, + allNodes, +}: { + title: string; + allNodes: DiscourseNode[]; +}) => { + const nodeFormat = allNodes.find((a) => + matchDiscourseNode({ title, ...a }), + )?.format; + if (!nodeFormat) return title; + const regex = new RegExp( + `^${nodeFormat + .replace(/\[/g, "\\[") + .replace(/]/g, "\\]") + .replace("{content}", "(.*?)") + .replace(/{[^}]+}/g, "(?:.*?)")}$`, + ); + return regex.exec(title)?.[1] || title; +}; + +export const getFilename = ({ + title = "", + maxFilenameLength, + simplifiedFilename, + allNodes, + removeSpecialCharacters, + extension = ".md", +}: { + title?: string; + maxFilenameLength: number; + simplifiedFilename: boolean; + allNodes: DiscourseNode[]; + removeSpecialCharacters: boolean; + extension?: string; +}) => { + const baseName = simplifiedFilename + ? getContentFromNodes({ title, allNodes }) + : title; + const name = `${ + removeSpecialCharacters + ? baseName.replace(/[<>:"/\\|\?*[\]]/g, "") + : baseName + }${extension}`; + + return name.length > maxFilenameLength + ? `${name.substring( + 0, + Math.ceil((maxFilenameLength - 3) / 2), + )}...${name.slice(-Math.floor((maxFilenameLength - 3) / 2))}` + : name; +}; + +export const toLink = (filename: string, uid: string, linkType: string) => { + const extensionRemoved = filename.replace(/\.\w+$/, ""); + if (linkType === "wikilinks") return `[[${extensionRemoved}]]`; + if (linkType === "alias") return `[${filename}](${filename})`; + if (linkType === "roam url") + return `[${extensionRemoved}](https://roamresearch.com/#/app/${window.roamAlphaAPI.graph.name}/page/${uid})`; + return filename; +}; + +export const pullBlockToTreeNode = ( + n: PullBlock, + v: `:${ViewType}`, +): TreeNode => ({ + text: n[":block/string"] || n[":node/title"] || "", + open: typeof n[":block/open"] === "undefined" ? true : n[":block/open"], + order: n[":block/order"] || 0, + uid: n[":block/uid"] || "", + heading: n[":block/heading"] || 0, + viewType: (n[":children/view-type"] || v).slice(1) as ViewType, + editTime: new Date(n[":edit/time"] || 0), + props: { imageResize: {}, iframe: {} }, + textAlign: n[":block/text-align"] || "left", + children: (n[":block/children"] || []) + .sort(({ [":block/order"]: a = 0 }, { [":block/order"]: b = 0 }) => a - b) + .map((r) => pullBlockToTreeNode(r, n[":children/view-type"] || v)), + parents: (n[":block/parents"] || []).map((p) => p[":db/id"] || 0), +}); + +export const collectUids = (t: TreeNode): string[] => [ + t.uid, + ...t.children.flatMap(collectUids), +]; diff --git a/apps/roam/src/utils/getExportTypes.ts b/apps/roam/src/utils/getExportTypes.ts index 3287ba820..8c023f30d 100644 --- a/apps/roam/src/utils/getExportTypes.ts +++ b/apps/roam/src/utils/getExportTypes.ts @@ -1,25 +1,25 @@ -import { BLOCK_REF_REGEX } from "roamjs-components/dom/constants"; -import getTextByBlockUid from "roamjs-components/queries/getTextByBlockUid"; import normalizePageTitle from "roamjs-components/queries/normalizePageTitle"; import getFullTreeByParentUid from "roamjs-components/queries/getFullTreeByParentUid"; -import getPageViewType from "roamjs-components/queries/getPageViewType"; -import { PullBlock, TreeNode, ViewType } from "roamjs-components/types"; -import { Result } from "roamjs-components/types/query-builder"; -import XRegExp from "xregexp"; +import { PullBlock, TreeNode } from "roamjs-components/types"; +import type { Result } from "roamjs-components/types/query-builder"; import getDiscourseNodes from "./getDiscourseNodes"; import isFlagEnabled from "./isFlagEnabled"; -import matchDiscourseNode from "./matchDiscourseNode"; import getDiscourseRelations from "./getDiscourseRelations"; import type { ExportDialogProps } from "~/components/Export"; import getPageMetadata from "./getPageMetadata"; import getDiscourseContextResults from "./getDiscourseContextResults"; import { getRelationDataUtil } from "./getRelationData"; import { ExportTypes } from "./types"; -import { - findReferencedNodeInText, - getReferencedNodeInFormat, -} from "./formatUtils"; import { getExportSettings } from "./getExportSettings"; +import { pageToMarkdown, toMarkdown } from "./pageToMarkdown"; +import { + uniqJsonArray, + getPageData, + getFilename, + toLink, + pullBlockToTreeNode, + collectUids, +} from "./exportUtils"; export const updateExportProgress = (detail: { progress: number; @@ -31,295 +31,12 @@ export const updateExportProgress = (detail: { }), ); -const pullBlockToTreeNode = (n: PullBlock, v: `:${ViewType}`): TreeNode => ({ - text: n[":block/string"] || n[":node/title"] || "", - open: typeof n[":block/open"] === "undefined" ? true : n[":block/open"], - order: n[":block/order"] || 0, - uid: n[":block/uid"] || "", - heading: n[":block/heading"] || 0, - viewType: (n[":children/view-type"] || v).slice(1) as ViewType, - editTime: new Date(n[":edit/time"] || 0), - props: { imageResize: {}, iframe: {} }, - textAlign: n[":block/text-align"] || "left", - children: (n[":block/children"] || []) - .sort(({ [":block/order"]: a = 0 }, { [":block/order"]: b = 0 }) => a - b) - .map((r) => pullBlockToTreeNode(r, n[":children/view-type"] || v)), - parents: (n[":block/parents"] || []).map((p) => p[":db/id"] || 0), -}); - -const getContentFromNodes = ({ - title, - allNodes, -}: { - title: string; - allNodes: ReturnType; -}) => { - const nodeFormat = allNodes.find((a) => - matchDiscourseNode({ title, ...a }), - )?.format; - if (!nodeFormat) return title; - const regex = new RegExp( - `^${nodeFormat - .replace(/\[/g, "\\[") - .replace(/]/g, "\\]") - .replace("{content}", "(.*?)") - .replace(/{[^}]+}/g, "(?:.*?)")}$`, - ); - return regex.exec(title)?.[1] || title; -}; - -const getFilename = ({ - title = "", - maxFilenameLength, - simplifiedFilename, - allNodes, - removeSpecialCharacters, - extension = ".md", -}: { - title?: string; - maxFilenameLength: number; - simplifiedFilename: boolean; - allNodes: ReturnType; - removeSpecialCharacters: boolean; - extension?: string; -}) => { - const baseName = simplifiedFilename - ? getContentFromNodes({ title, allNodes }) - : title; - const name = `${ - removeSpecialCharacters - ? baseName.replace(/[<>:"/\\|\?*[\]]/g, "") - : baseName - }${extension}`; - - return name.length > maxFilenameLength - ? `${name.substring( - 0, - Math.ceil((maxFilenameLength - 3) / 2), - )}...${name.slice(-Math.floor((maxFilenameLength - 3) / 2))}` - : name; -}; - -const uniqJsonArray = >(arr: T[]) => - Array.from( - new Set( - arr.map((r) => - JSON.stringify( - Object.entries(r).sort(([k], [k2]) => k.localeCompare(k2)), - ), - ), - ), - ).map((entries) => Object.fromEntries(JSON.parse(entries))) as T[]; -const viewTypeToPrefix = { - bullet: "- ", - document: "", - numbered: "1. ", -}; - -const collectUids = (t: TreeNode): string[] => [ - t.uid, - ...t.children.flatMap(collectUids), -]; - -const MATCHES_NONE = /$.+^/; - -// Roam embed syntax: {{[[embed]]: ((block-uid)) }} -// Roam embed syntax: {{[[embed-path]]: ((block-uid)) }} -// Also handles multiple parentheses: {{[[embed]]: ((((block-uid)))) }} -const EMBED_REGEX = - /{{\[\[(?:embed|embed-path)\]\]:\s*\(\(+\s*([\w\d-]{9,10})\s*\)\)+\s*}}/; - -// Roam embed-children syntax: {{[[embed-children]]: ((block-uid)) }} -const EMBED_CHILDREN_REGEX = - /{{\[\[embed-children\]\]:\s*\(\(+\s*([\w\d-]{9,10})\s*\)\)+\s*}}/; - -const toLink = (filename: string, uid: string, linkType: string) => { - const extensionRemoved = filename.replace(/\.\w+$/, ""); - if (linkType === "wikilinks") return `[[${extensionRemoved}]]`; - if (linkType === "alias") return `[${filename}](${filename})`; - if (linkType === "roam url") - return `[${extensionRemoved}](https://roamresearch.com/#/app/${window.roamAlphaAPI.graph.name}/page/${uid})`; - return filename; -}; - -const toMarkdown = ({ - c, - i = 0, - v = "bullet", - opts, -}: { - c: TreeNode; - i?: number; - v?: ViewType; - opts: { - refs: boolean; - embeds: boolean; - simplifiedFilename: boolean; - maxFilenameLength: number; - allNodes: ReturnType; - removeSpecialCharacters: boolean; - linkType: string; - flatten?: boolean; - }; -}): string => { - const { - refs, - embeds, - simplifiedFilename, - maxFilenameLength, - allNodes, - removeSpecialCharacters, - linkType, - flatten = false, - } = opts; - const processedText = c.text - .replace(embeds ? EMBED_REGEX : MATCHES_NONE, (_, blockUid) => { - const reference = getFullTreeByParentUid(blockUid); - return toMarkdown({ c: reference, i, v, opts }); - }) - .replace(embeds ? EMBED_CHILDREN_REGEX : MATCHES_NONE, (_, blockUid) => { - const reference = getFullTreeByParentUid(blockUid); - return reference.children - .map((child) => toMarkdown({ c: child, i, v, opts })) - .join("\n"); - }) - .replace(refs ? BLOCK_REF_REGEX : MATCHES_NONE, (_, blockUid) => { - const reference = getTextByBlockUid(blockUid); - return reference || blockUid; - }) - .replace(/{{\[\[TODO\]\]}}/g, v === "bullet" ? "[ ]" : "- [ ]") - .replace(/{{\[\[DONE\]\]}}/g, v === "bullet" ? "[x]" : "- [x]") - .replace(/\_\_(.+?)\_\_/g, "_$1_") // convert Roam italics __ to markdown italics _ - .replace(/(? { - if (s.name === "match") { - const name = getFilename({ - title: s.value, - allNodes, - maxFilenameLength, - simplifiedFilename, - removeSpecialCharacters, - }); - return toLink(name, c.uid, linkType); - } else if (s.name === "left" || s.name === "right") { - return ""; - } else { - return s.value; - } - }) - .join("") || processedText - : processedText; - const indentation = flatten ? "" : "".padStart(i * 4, " "); - // If this block contains an embed, treat it as document to avoid extra prefixes - const effectiveViewType = - embeds && (EMBED_REGEX.test(c.text) || EMBED_CHILDREN_REGEX.test(c.text)) - ? "document" - : v; - const viewTypePrefix = viewTypeToPrefix[effectiveViewType]; - const headingPrefix = c.heading ? `${"".padStart(c.heading, "#")} ` : ""; - const childrenMarkdown = (c.children || []) - .filter((nested) => !!nested.text || !!nested.children?.length) - .map((nested) => { - const childViewType = v !== "bullet" ? v : nested.viewType || "bullet"; - const childMarkdown = toMarkdown({ - c: nested, - i: i + 1, - v: childViewType, - opts, - }); - return `\n${childMarkdown}`; - }) - .join(""); - const lineBreak = v === "document" ? "\n" : ""; - - return `${indentation}${viewTypePrefix}${headingPrefix}${finalProcessedText}${lineBreak}${childrenMarkdown}`; -}; - -const handleDiscourseContext = async ({ - includeDiscourseContext, - uid, - pageTitle, - appendRefNodeContext, -}: { - includeDiscourseContext: boolean; - uid: string; - pageTitle: string; - appendRefNodeContext: boolean; -}) => { - if (!includeDiscourseContext) return []; - - const discourseResults = await getDiscourseContextResults({ - uid, - }); - if (!appendRefNodeContext) return discourseResults; - - const referencedDiscourseNode = getReferencedNodeInFormat({ uid }); - if (referencedDiscourseNode) { - const referencedResult = findReferencedNodeInText({ - text: pageTitle, - discourseNode: referencedDiscourseNode, - }); - if (!referencedResult) return discourseResults; - const appendedContext = { - label: referencedDiscourseNode.text, - results: { [referencedResult.uid]: referencedResult }, - }; - return [...discourseResults, appendedContext]; - } - - return discourseResults; -}; - -const handleFrontmatter = ({ - frontmatter, - rest, - result, -}: { - frontmatter: string[]; - rest: Record; - result: Result; -}) => { - const yaml = frontmatter.length - ? frontmatter - : [ - "title: {text}", - `url: https://roamresearch.com/#/app/${window.roamAlphaAPI.graph.name}/page/{uid}`, - `author: {author}`, - "date: {date}", - ]; - const resultCols = Object.keys(rest).filter((k) => !k.includes("uid")); - const yamlLines = yaml.concat(resultCols.map((k) => `${k}: {${k}}`)); - const content = yamlLines - .map((s) => - s.replace(/{([^}]+)}/g, (_, capt: string) => { - if (capt === "text") { - // Wrap title in quotes and escape additional quotes - const escapedText = result[capt].toString().replace(/"/g, '\\"'); - return `"${escapedText}"`; - } - return result[capt].toString(); - }), - ) - .join("\n"); - const output = `---\n${content}\n---`; - return output; -}; - type getExportTypesProps = { results?: ExportDialogProps["results"]; exportId: string; isExportDiscourseGraph: boolean; }; -export type DiscourseExportResult = Result & { type: string }; - const getExportTypes = ({ results, exportId, @@ -331,69 +48,29 @@ const getExportTypes = ({ allNodes.map((a) => [a.type, a.text]), ); nodeLabelByType["*"] = "Any"; - const getPageData = async ( - isExportDiscourseGraph?: boolean, - ): Promise<(Result & { type: string })[]> => { - const allResults = results || []; - - if (isExportDiscourseGraph) return allResults as DiscourseExportResult[]; - - const matchedTexts = new Set(); - return allNodes.flatMap((n) => - (allResults - ? allResults.flatMap((r) => - Object.keys(r) - .filter((k) => k.endsWith(`-uid`) && k !== "text-uid") - .map((k) => ({ - ...r, - text: r[k.slice(0, -4)].toString(), - uid: r[k] as string, - })) - .concat({ - text: r.text, - uid: r.uid, - }), - ) - : ( - window.roamAlphaAPI.q( - "[:find (pull ?e [:block/uid :node/title]) :where [?e :node/title _]]", - ) as [Record][] - ).map(([{ title, uid }]) => ({ - text: title, - uid, - })) - ) - .filter(({ text }) => { - if (!text) return false; - if (matchedTexts.has(text)) return false; - const isMatch = matchDiscourseNode({ title: text, ...n }); - if (isMatch) matchedTexts.add(text); - return isMatch; - }) - .map((node) => ({ ...node, type: n.text })), - ); - }; const getRelationData = () => getRelationDataUtil({ allRelations, nodeLabelByType, local: true }); - const getJsonData = async () => { + const getJsonData = async (results: Result[]) => { const grammar = allRelations.map(({ label, destination, source }) => ({ label, destination: nodeLabelByType[destination], source: nodeLabelByType[source], })); - const nodes = (await getPageData()).map(({ text, uid }) => { - const { date, displayName } = getPageMetadata(text); - const { children } = getFullTreeByParentUid(uid); - return { - uid, - title: text, - children, - date: date.toJSON(), - createdBy: displayName, - }; - }); + const nodes = (await getPageData({ results, allNodes })).map( + ({ text, uid }) => { + const { date, displayName } = getPageMetadata(text); + const { children } = getFullTreeByParentUid(uid); + return { + uid, + title: text, + children, + date: date.toJSON(), + createdBy: displayName, + }; + }, + ); const nodeSet = new Set(nodes.map((n) => n.uid)); return getRelationData().then((rels) => { const relations = uniqJsonArray( @@ -407,134 +84,31 @@ const getExportTypes = ({ { name: "Markdown", callback: async ({ includeDiscourseContext = false }) => { + if (!results) return []; + const settings = { + ...getExportSettings(), + includeDiscourseContext, + }; const { - frontmatter, - optsRefs, - optsEmbeds, simplifiedFilename, maxFilenameLength, removeSpecialCharacters, - linkType, - appendRefNodeContext, - } = getExportSettings(); - const allPages = await getPageData(isExportDiscourseGraph); - const gatherings = allPages.map( - ({ text, uid, context: _, type, ...rest }, i, all) => - async () => { - updateExportProgress({ progress: i / all.length, id: exportId }); - // skip a beat to let progress render - await new Promise((resolve) => setTimeout(resolve)); - const v = getPageViewType(text) || "bullet"; - const { date, displayName } = getPageMetadata(text); - const treeNode = getFullTreeByParentUid(uid); - - const discourseResults = await handleDiscourseContext({ - includeDiscourseContext, - pageTitle: text, - uid, - appendRefNodeContext, - }); - - const referenceResults = isFlagEnabled("render references") - ? ( - window.roamAlphaAPI.data.fast.q( - `[:find (pull ?pr [:node/title]) (pull ?r [:block/heading [:block/string :as "text"] [:children/view-type :as "viewType"] {:block/children ...}]) :where [?p :node/title "${normalizePageTitle( - text, - )}"] [?r :block/refs ?p] [?r :block/page ?pr]]`, - ) as [PullBlock, PullBlock][] - ).filter( - ([, { [":block/children"]: children }]) => - Array.isArray(children) && children.length, - ) - : []; - - const result: Result = { - ...rest, - date, - text, - uid, - author: displayName, - type, - }; - const yamlLines = handleFrontmatter({ - frontmatter, - rest, - result, - }); + } = settings; + const allPages = await getPageData({ + results, + allNodes, + isExportDiscourseGraph, + }); + const gatherings = allPages.map((result, i, all) => async () => { + updateExportProgress({ progress: i / all.length, id: exportId }); + // skip a beat to let progress render + await new Promise((resolve) => setTimeout(resolve)); + return pageToMarkdown(result, { + ...settings, + allNodes, + }); + }); - const content = `${yamlLines}\n\n${treeNode.children - .map((c) => - toMarkdown({ - c, - v, - i: 0, - opts: { - refs: optsRefs, - embeds: optsEmbeds, - simplifiedFilename, - allNodes, - maxFilenameLength, - removeSpecialCharacters, - linkType, - }, - }), - ) - .join("\n")}\n${ - discourseResults.length - ? `\n###### Discourse Context\n\n${discourseResults - .flatMap((r) => - Object.values(r.results).map( - (t) => - `- **${r.label}::** ${toLink( - getFilename({ - title: t.text, - maxFilenameLength, - simplifiedFilename, - allNodes, - removeSpecialCharacters, - }), - t.uid, - linkType, - )}`, - ), - ) - .join("\n")}\n` - : "" - }${ - referenceResults.length - ? `\n###### References\n\n${referenceResults - .map( - (r_1) => - `${toLink( - getFilename({ - title: r_1[0][":node/title"], - maxFilenameLength, - simplifiedFilename, - allNodes, - removeSpecialCharacters, - }), - r_1[0][":block/uid"] || "", - linkType, - )}\n\n${toMarkdown({ - c: pullBlockToTreeNode(r_1[1], ":bullet"), - opts: { - refs: optsRefs, - embeds: optsEmbeds, - simplifiedFilename, - allNodes, - maxFilenameLength, - removeSpecialCharacters, - linkType, - }, - })}`, - ) - .join("\n")}\n` - : "" - }`; - const uids = new Set(collectUids(treeNode)); - return { title: text, content, uids }; - }, - ); const pages = await gatherings.reduce( (p, c) => p.then((arr) => @@ -562,7 +136,8 @@ const getExportTypes = ({ { name: "JSON", callback: async ({ filename }) => { - const data = await getJsonData(); + if (!results) return []; + const data = await getJsonData(results); return [ { title: `${filename.replace(/\.json$/, "")}.json`, @@ -574,8 +149,9 @@ const getExportTypes = ({ { name: "Neo4j", callback: async ({ filename }) => { + if (!results) return []; const nodeHeader = "uid:ID,label:LABEL,title,author,date\n"; - const nodeData = (await getPageData()) + const nodeData = (await getPageData({ results, allNodes })) .map(({ text, uid, type }) => { const value = text.replace(new RegExp(`^\\[\\[\\w*\\]\\] - `), ""); const { displayName, date } = getPageMetadata(text); @@ -628,6 +204,7 @@ const getExportTypes = ({ { name: "PDF", callback: async ({ includeDiscourseContext = false }) => { + if (!results) return []; const { optsRefs, optsEmbeds, @@ -636,7 +213,11 @@ const getExportTypes = ({ removeSpecialCharacters, linkType, } = getExportSettings(); - const allPages = await getPageData(isExportDiscourseGraph); + const allPages = await getPageData({ + results, + allNodes, + isExportDiscourseGraph, + }); const gatherings = allPages.map(({ text, uid }, i, all) => async () => { updateExportProgress({ progress: i / all.length, id: exportId }); // skip a beat to let progress render diff --git a/apps/roam/src/utils/pageToMarkdown.ts b/apps/roam/src/utils/pageToMarkdown.ts new file mode 100644 index 000000000..3cd50cc44 --- /dev/null +++ b/apps/roam/src/utils/pageToMarkdown.ts @@ -0,0 +1,353 @@ +import { BLOCK_REF_REGEX } from "roamjs-components/dom/constants"; +import normalizePageTitle from "roamjs-components/queries/normalizePageTitle"; +import getPageUidByBlockUid from "roamjs-components/queries/getPageUidByBlockUid"; +import getTextByBlockUid from "roamjs-components/queries/getTextByBlockUid"; +import getPageMetadata from "./getPageMetadata"; +import getPageViewType from "roamjs-components/queries/getPageViewType"; +import type { Result } from "roamjs-components/types/query-builder"; +import type { DiscourseNode } from "./getDiscourseNodes"; +import type { PullBlock, TreeNode, ViewType } from "roamjs-components/types"; +import getFullTreeByParentUid from "roamjs-components/queries/getFullTreeByParentUid"; +import getDiscourseContextResults from "./getDiscourseContextResults"; +import isFlagEnabled from "./isFlagEnabled"; +import XRegExp from "xregexp"; +import { + findReferencedNodeInText, + getReferencedNodeInFormat, +} from "./formatUtils"; +import { + getFilename, + toLink, + pullBlockToTreeNode, + collectUids, +} from "./exportUtils"; +import getPageUidByPageTitle from "roamjs-components/queries/getPageUidByPageTitle"; + +const MATCHES_NONE = /$.+^/; + +// Roam embed syntax: {{[[embed]]: ((block-uid)) }} +// Roam embed syntax: {{[[embed-path]]: ((block-uid)) }} +// Also handles multiple parentheses: {{[[embed]]: ((((block-uid)))) }} +const EMBED_REGEX = + /{{\[\[(?:embed|embed-path)\]\]:\s*\(\(+\s*([\w\d-]{9,10})\s*\)\)+\s*}}/; + +// Roam embed-children syntax: {{[[embed-children]]: ((block-uid)) }} +const EMBED_CHILDREN_REGEX = + /{{\[\[embed-children\]\]:\s*\(\(+\s*([\w\d-]{9,10})\s*\)\)+\s*}}/; + +const viewTypeToPrefix = { + bullet: "- ", + document: "", + numbered: "1. ", +}; + +const handleDiscourseContext = async ({ + includeDiscourseContext, + uid, + pageTitle, + appendRefNodeContext, +}: { + includeDiscourseContext: boolean; + uid: string; + pageTitle: string; + appendRefNodeContext: boolean; +}) => { + if (!includeDiscourseContext) return []; + + const discourseResults = await getDiscourseContextResults({ + uid, + }); + if (!appendRefNodeContext) return discourseResults; + + const referencedDiscourseNode = getReferencedNodeInFormat({ uid }); + if (referencedDiscourseNode) { + const referencedResult = findReferencedNodeInText({ + text: pageTitle, + discourseNode: referencedDiscourseNode, + }); + if (!referencedResult) return discourseResults; + const appendedContext = { + label: referencedDiscourseNode.text, + results: { [referencedResult.uid]: referencedResult }, + }; + return [...discourseResults, appendedContext]; + } + + return discourseResults; +}; + +const handleFrontmatter = ({ + frontmatter, + rest, + result, +}: { + frontmatter: string[]; + rest: Record; + result: Result; +}) => { + const yaml = frontmatter.length + ? frontmatter + : [ + "title: {text}", + `url: https://roamresearch.com/#/app/${window.roamAlphaAPI.graph.name}/page/{uid}`, + `author: {author}`, + "date: {date}", + ]; + const resultCols = Object.keys(rest).filter((k) => !k.includes("uid")); + const yamlLines = yaml.concat(resultCols.map((k) => `${k}: {${k}}`)); + const content = yamlLines + .map((s) => + s.replace(/{([^}]+)}/g, (_, capt: string) => { + if (capt === "text") { + // Wrap title in quotes and escape additional quotes + const escapedText = result[capt].toString().replace(/"/g, '\\"'); + return `"${escapedText}"`; + } + return result[capt].toString(); + }), + ) + .join("\n"); + const output = `---\n${content}\n---`; + return output; +}; + +export const toMarkdown = ({ + c, + i = 0, + v = "bullet", + opts, +}: { + c: TreeNode; + i?: number; + v?: ViewType; + opts: { + refs: boolean; + embeds: boolean; + simplifiedFilename: boolean; + maxFilenameLength: number; + allNodes: DiscourseNode[]; + removeSpecialCharacters: boolean; + linkType: string; + flatten?: boolean; + }; +}): string => { + const { + refs, + embeds, + simplifiedFilename, + maxFilenameLength, + allNodes, + removeSpecialCharacters, + linkType, + flatten = false, + } = opts; + const processedText = c.text + .replace(embeds ? EMBED_REGEX : MATCHES_NONE, (_, blockUid) => { + const reference = getFullTreeByParentUid(blockUid); + return toMarkdown({ c: reference, i, v, opts }); + }) + .replace(embeds ? EMBED_CHILDREN_REGEX : MATCHES_NONE, (_, blockUid) => { + const reference = getFullTreeByParentUid(blockUid); + return reference.children + .map((child) => toMarkdown({ c: child, i, v, opts })) + .join("\n"); + }) + .replace(refs ? BLOCK_REF_REGEX : MATCHES_NONE, (_, blockUid) => { + const reference = getTextByBlockUid(blockUid); + return reference || blockUid; + }) + .replace(/{{\[\[TODO\]\]}}/g, v === "bullet" ? "[ ]" : "- [ ]") + .replace(/{{\[\[DONE\]\]}}/g, v === "bullet" ? "[x]" : "- [x]") + .replace(/\_\_(.+?)\_\_/g, "_$1_") // convert Roam italics __ to markdown italics _ + .replace(/(? { + if (s.name === "match") { + const name = getFilename({ + title: s.value, + allNodes, + maxFilenameLength, + simplifiedFilename, + removeSpecialCharacters, + }); + return toLink(name, c.uid, linkType); + } else if (s.name === "left" || s.name === "right") { + return ""; + } else { + return s.value; + } + }) + .join("") || processedText + : processedText; + const indentation = flatten ? "" : "".padStart(i * 4, " "); + // If this block contains an embed, treat it as document to avoid extra prefixes + const effectiveViewType = + embeds && (EMBED_REGEX.test(c.text) || EMBED_CHILDREN_REGEX.test(c.text)) + ? "document" + : v; + const viewTypePrefix = viewTypeToPrefix[effectiveViewType]; + const headingPrefix = c.heading ? `${"".padStart(c.heading, "#")} ` : ""; + const childrenMarkdown = (c.children || []) + .filter((nested) => !!nested.text || !!nested.children?.length) + .map((nested) => { + const childViewType = v !== "bullet" ? v : nested.viewType || "bullet"; + const childMarkdown = toMarkdown({ + c: nested, + i: i + 1, + v: childViewType, + opts, + }); + return `\n${childMarkdown}`; + }) + .join(""); + const lineBreak = v === "document" ? "\n" : ""; + + return `${indentation}${viewTypePrefix}${headingPrefix}${finalProcessedText}${lineBreak}${childrenMarkdown}`; +}; + +export const pageToMarkdown = async ( + { text, uid, context: _, type, ...rest }: Result, + { + includeDiscourseContext, + appendRefNodeContext, + frontmatter, + optsRefs, + optsEmbeds, + simplifiedFilename, + allNodes, + maxFilenameLength, + removeSpecialCharacters, + linkType, + blockRefsAsLinks = false, + blockAnchors = false, + }: { + includeDiscourseContext: boolean; + appendRefNodeContext: boolean; + frontmatter: string[]; + optsRefs: boolean; + optsEmbeds: boolean; + simplifiedFilename: boolean; + allNodes: DiscourseNode[]; + maxFilenameLength: number; + removeSpecialCharacters: boolean; + linkType: string; + blockRefsAsLinks?: boolean; + blockAnchors?: boolean; + }, +): Promise<{ title: string; content: string; uids: Set }> => { + const v = getPageViewType(text) || "bullet"; + const { date, displayName } = getPageMetadata(text); + const treeNode = getFullTreeByParentUid(uid); + + const discourseResults = await handleDiscourseContext({ + includeDiscourseContext, + pageTitle: text, + uid, + appendRefNodeContext, + }); + + const referenceResults = isFlagEnabled("render references") + ? ( + window.roamAlphaAPI.data.fast.q( + `[:find (pull ?pr [:node/title]) (pull ?r [:block/heading [:block/string :as "text"] [:children/view-type :as "viewType"] {:block/children ...}]) :where [?p :node/title "${normalizePageTitle( + text, + )}"] [?r :block/refs ?p] [?r :block/page ?pr]]`, + ) as [PullBlock, PullBlock][] + ).filter( + ([, { [":block/children"]: children }]) => + Array.isArray(children) && children.length, + ) + : []; + + const result: Result = { + ...rest, + date, + text, + uid, + author: displayName, + type, + }; + const yamlLines = handleFrontmatter({ + frontmatter, + rest, + result, + }); + + const content = `${yamlLines}\n\n${treeNode.children + .map((c) => + toMarkdown({ + c, + v, + i: 0, + opts: { + refs: optsRefs, + embeds: optsEmbeds, + simplifiedFilename, + allNodes, + maxFilenameLength, + removeSpecialCharacters, + linkType, + }, + }), + ) + .join("\n")}\n${ + discourseResults.length + ? `\n###### Discourse Context\n\n${discourseResults + .flatMap((r) => + Object.values(r.results).map( + (t) => + `- **${r.label}::** ${toLink( + getFilename({ + title: t.text, + maxFilenameLength, + simplifiedFilename, + allNodes, + removeSpecialCharacters, + }), + t.uid, + linkType, + )}`, + ), + ) + .join("\n")}\n` + : "" + }${ + referenceResults.length + ? `\n###### References\n\n${referenceResults + .map( + (r_1) => + `${toLink( + getFilename({ + title: r_1[0][":node/title"], + maxFilenameLength, + simplifiedFilename, + allNodes, + removeSpecialCharacters, + }), + r_1[0][":block/uid"] || "", + linkType, + )}\n\n${toMarkdown({ + c: pullBlockToTreeNode(r_1[1], ":bullet"), + opts: { + refs: optsRefs, + embeds: optsEmbeds, + simplifiedFilename, + allNodes, + maxFilenameLength, + removeSpecialCharacters, + linkType, + }, + })}`, + ) + .join("\n")}\n` + : "" + }`; + const uids = new Set(collectUids(treeNode)); + return { title: text, content, uids }; +}; From 4d811e46dc2588332450a4d66e20b78365b9b48b Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Thu, 25 Dec 2025 12:45:35 -0500 Subject: [PATCH 2/5] unnecessary await --- apps/roam/src/utils/exportUtils.ts | 4 ++-- apps/roam/src/utils/getExportTypes.ts | 30 +++++++++++++-------------- apps/roam/src/utils/pageToMarkdown.ts | 2 -- 3 files changed, 16 insertions(+), 20 deletions(-) diff --git a/apps/roam/src/utils/exportUtils.ts b/apps/roam/src/utils/exportUtils.ts index d9b2eedad..f94a17292 100644 --- a/apps/roam/src/utils/exportUtils.ts +++ b/apps/roam/src/utils/exportUtils.ts @@ -16,7 +16,7 @@ export const uniqJsonArray = >(arr: T[]) => ), ).map((entries) => Object.fromEntries(JSON.parse(entries))) as T[]; -export const getPageData = async ({ +export const getPageData = ({ results, allNodes, isExportDiscourseGraph, @@ -24,7 +24,7 @@ export const getPageData = async ({ results: Result[]; allNodes: DiscourseNode[]; isExportDiscourseGraph?: boolean; -}): Promise<(Result & { type: string })[]> => { +}): (Result & { type: string })[] => { const allResults = results || []; if (isExportDiscourseGraph) return allResults as DiscourseExportResult[]; diff --git a/apps/roam/src/utils/getExportTypes.ts b/apps/roam/src/utils/getExportTypes.ts index 8c023f30d..8670efc70 100644 --- a/apps/roam/src/utils/getExportTypes.ts +++ b/apps/roam/src/utils/getExportTypes.ts @@ -58,19 +58,17 @@ const getExportTypes = ({ destination: nodeLabelByType[destination], source: nodeLabelByType[source], })); - const nodes = (await getPageData({ results, allNodes })).map( - ({ text, uid }) => { - const { date, displayName } = getPageMetadata(text); - const { children } = getFullTreeByParentUid(uid); - return { - uid, - title: text, - children, - date: date.toJSON(), - createdBy: displayName, - }; - }, - ); + const nodes = getPageData({ results, allNodes }).map(({ text, uid }) => { + const { date, displayName } = getPageMetadata(text); + const { children } = getFullTreeByParentUid(uid); + return { + uid, + title: text, + children, + date: date.toJSON(), + createdBy: displayName, + }; + }); const nodeSet = new Set(nodes.map((n) => n.uid)); return getRelationData().then((rels) => { const relations = uniqJsonArray( @@ -94,7 +92,7 @@ const getExportTypes = ({ maxFilenameLength, removeSpecialCharacters, } = settings; - const allPages = await getPageData({ + const allPages = getPageData({ results, allNodes, isExportDiscourseGraph, @@ -151,7 +149,7 @@ const getExportTypes = ({ callback: async ({ filename }) => { if (!results) return []; const nodeHeader = "uid:ID,label:LABEL,title,author,date\n"; - const nodeData = (await getPageData({ results, allNodes })) + const nodeData = getPageData({ results, allNodes }) .map(({ text, uid, type }) => { const value = text.replace(new RegExp(`^\\[\\[\\w*\\]\\] - `), ""); const { displayName, date } = getPageMetadata(text); @@ -213,7 +211,7 @@ const getExportTypes = ({ removeSpecialCharacters, linkType, } = getExportSettings(); - const allPages = await getPageData({ + const allPages = getPageData({ results, allNodes, isExportDiscourseGraph, diff --git a/apps/roam/src/utils/pageToMarkdown.ts b/apps/roam/src/utils/pageToMarkdown.ts index 3cd50cc44..99b3c866b 100644 --- a/apps/roam/src/utils/pageToMarkdown.ts +++ b/apps/roam/src/utils/pageToMarkdown.ts @@ -1,6 +1,5 @@ import { BLOCK_REF_REGEX } from "roamjs-components/dom/constants"; import normalizePageTitle from "roamjs-components/queries/normalizePageTitle"; -import getPageUidByBlockUid from "roamjs-components/queries/getPageUidByBlockUid"; import getTextByBlockUid from "roamjs-components/queries/getTextByBlockUid"; import getPageMetadata from "./getPageMetadata"; import getPageViewType from "roamjs-components/queries/getPageViewType"; @@ -21,7 +20,6 @@ import { pullBlockToTreeNode, collectUids, } from "./exportUtils"; -import getPageUidByPageTitle from "roamjs-components/queries/getPageUidByPageTitle"; const MATCHES_NONE = /$.+^/; From 2b81cecacb18341512ef19f2891fb7de2586abb2 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 27 Dec 2025 09:09:10 -0500 Subject: [PATCH 3/5] coderabbit corrections --- apps/roam/src/utils/exportUtils.ts | 3 +-- apps/roam/src/utils/pageToMarkdown.ts | 2 -- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/apps/roam/src/utils/exportUtils.ts b/apps/roam/src/utils/exportUtils.ts index f94a17292..3d191ae71 100644 --- a/apps/roam/src/utils/exportUtils.ts +++ b/apps/roam/src/utils/exportUtils.ts @@ -79,8 +79,7 @@ const getContentFromNodes = ({ if (!nodeFormat) return title; const regex = new RegExp( `^${nodeFormat - .replace(/\[/g, "\\[") - .replace(/]/g, "\\]") + .replace(/[[\]\\^$.|?*+()]/g, "\\$&") .replace("{content}", "(.*?)") .replace(/{[^}]+}/g, "(?:.*?)")}$`, ); diff --git a/apps/roam/src/utils/pageToMarkdown.ts b/apps/roam/src/utils/pageToMarkdown.ts index 99b3c866b..47e06c093 100644 --- a/apps/roam/src/utils/pageToMarkdown.ts +++ b/apps/roam/src/utils/pageToMarkdown.ts @@ -222,8 +222,6 @@ export const pageToMarkdown = async ( maxFilenameLength, removeSpecialCharacters, linkType, - blockRefsAsLinks = false, - blockAnchors = false, }: { includeDiscourseContext: boolean; appendRefNodeContext: boolean; From a52ccd41625e3fe753d9570919db20933d2c49c0 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 27 Dec 2025 09:35:16 -0500 Subject: [PATCH 4/5] optimization --- apps/roam/src/utils/exportUtils.ts | 41 +++++++++++------------------- 1 file changed, 15 insertions(+), 26 deletions(-) diff --git a/apps/roam/src/utils/exportUtils.ts b/apps/roam/src/utils/exportUtils.ts index 3d191ae71..e5dc5a2b2 100644 --- a/apps/roam/src/utils/exportUtils.ts +++ b/apps/roam/src/utils/exportUtils.ts @@ -25,35 +25,24 @@ export const getPageData = ({ allNodes: DiscourseNode[]; isExportDiscourseGraph?: boolean; }): (Result & { type: string })[] => { - const allResults = results || []; - - if (isExportDiscourseGraph) return allResults as DiscourseExportResult[]; + if (isExportDiscourseGraph) return results as DiscourseExportResult[]; const matchedTexts = new Set(); + const mappedResults = results.flatMap((r) => + Object.keys(r) + .filter((k) => k.endsWith(`-uid`) && k !== "text-uid") + .map((k) => ({ + ...r, + text: r[k.slice(0, -4)].toString(), + uid: r[k] as string, + })) + .concat({ + text: r.text, + uid: r.uid, + }), + ); return allNodes.flatMap((n) => - (allResults - ? allResults.flatMap((r) => - Object.keys(r) - .filter((k) => k.endsWith(`-uid`) && k !== "text-uid") - .map((k) => ({ - ...r, - text: r[k.slice(0, -4)].toString(), - uid: r[k] as string, - })) - .concat({ - text: r.text, - uid: r.uid, - }), - ) - : ( - window.roamAlphaAPI.q( - "[:find (pull ?e [:block/uid :node/title]) :where [?e :node/title _]]", - ) as [Record][] - ).map(([{ title, uid }]) => ({ - text: title, - uid, - })) - ) + mappedResults .filter(({ text }) => { if (!text) return false; if (matchedTexts.has(text)) return false; From c2924c234ed97c203482f7273342855f270de2f3 Mon Sep 17 00:00:00 2001 From: Marc-Antoine Parent Date: Sat, 27 Dec 2025 10:05:11 -0500 Subject: [PATCH 5/5] more coderabbit corrections --- apps/roam/src/utils/pageToMarkdown.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/apps/roam/src/utils/pageToMarkdown.ts b/apps/roam/src/utils/pageToMarkdown.ts index 47e06c093..4dc760631 100644 --- a/apps/roam/src/utils/pageToMarkdown.ts +++ b/apps/roam/src/utils/pageToMarkdown.ts @@ -233,8 +233,6 @@ export const pageToMarkdown = async ( maxFilenameLength: number; removeSpecialCharacters: boolean; linkType: string; - blockRefsAsLinks?: boolean; - blockAnchors?: boolean; }, ): Promise<{ title: string; content: string; uids: Set }> => { const v = getPageViewType(text) || "bullet"; @@ -251,7 +249,7 @@ export const pageToMarkdown = async ( const referenceResults = isFlagEnabled("render references") ? ( window.roamAlphaAPI.data.fast.q( - `[:find (pull ?pr [:node/title]) (pull ?r [:block/heading [:block/string :as "text"] [:children/view-type :as "viewType"] {:block/children ...}]) :where [?p :node/title "${normalizePageTitle( + `[:find (pull ?pr [:node/title :block/uid]) (pull ?r [:block/heading [:block/string :as "text"] [:children/view-type :as "viewType"] {:block/children ...}]) :where [?p :node/title "${normalizePageTitle( text, )}"] [?r :block/refs ?p] [?r :block/page ?pr]]`, ) as [PullBlock, PullBlock][]