diff --git a/CHANGELOG.md b/CHANGELOG.md index af20b6b7..17e7bf66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p - `searchListPredicate` property: Allows to filter the complete list of search options at once. - Following optional BlueprintJs properties are forwarded now to override default behaviour: `noResults`, `createNewItemRenderer` and `itemRenderer` - `isValidNewOption` property: Checks if an input string is or can be turned into a valid new option. +- `` + - Added `cutOff` property to set maximum number of raw Markdown characters to render ### Fixed diff --git a/src/cmem/markdown/Markdown.stories.tsx b/src/cmem/markdown/Markdown.stories.tsx index ee7adef3..ee6ffc3f 100644 --- a/src/cmem/markdown/Markdown.stories.tsx +++ b/src/cmem/markdown/Markdown.stories.tsx @@ -67,3 +67,41 @@ A line with some HTML code inside. [^1]: This is the text related to the the footnote referrer. `, }; + +export const CutOff = Template.bind({}); + +CutOff.args = { + children: ` +This component renders Markdown content safely. It supports **GitHub Flavoured Markdown**, syntax highlighting for code blocks, and definition lists. + +You can: + * configure _link targets_ + * add custom __rehype__ plugins + * and filter content through an allowed elements list + +A third paragraph that will not appear once the cutOff limit is reached. + `, + cutOff: 300, +}; + +export const CutOffWithCodeFence = Template.bind({}); + +CutOffWithCodeFence.args = { + children: ` +A short paragraph before the code block. + +Here is an important code example: + +\`\`\`json +{ + "host": "localhost", + "port": 8080, + "debug": true +} +\`\`\` + +This paragraph comes after the code block and should not appear when the cutOff limit falls inside the fence above. + `, + cutOff: 110, + cutOffSuffix: "...", +}; diff --git a/src/cmem/markdown/Markdown.tsx b/src/cmem/markdown/Markdown.tsx index 1821f1ef..451a3ae3 100644 --- a/src/cmem/markdown/Markdown.tsx +++ b/src/cmem/markdown/Markdown.tsx @@ -12,6 +12,10 @@ import { TestableComponent } from "../../components"; import { HtmlContentBlock, HtmlContentBlockProps } from "../../components/Typography"; import { CLASSPREFIX as eccgui } from "../../configuration/constants"; +import utils from "./markdown.utils"; + +const DEFAULT_CUTOFF_SUFFIX = "..."; + export interface MarkdownProps extends TestableComponent { children: string; /** @@ -47,6 +51,19 @@ export interface MarkdownProps extends TestableComponent { * Configure the `HtmlContentBlock` component that is automatically used as wrapper for the parsed Markdown content. */ htmlContentBlockProps?: Omit; + /** + * Maximum number of raw Markdown characters to render. + * Content exceeding this limit is truncated at the nearest safe paragraph + * boundary (or word boundary as fallback) to preserve Markdown structure. + * No truncation when absent or ≤ 0. + */ + cutOff?: number; + /** + * Text appended as a trailing paragraph when content is truncated by `cutOff`. + * Set to `""` to suppress the indicator entirely. + * Defaults to `"..."`. + */ + cutOffSuffix?: string; } const configDefault = { @@ -109,8 +126,13 @@ export const Markdown = ({ reHypePlugins, linkTargetName = "_mdref", htmlContentBlockProps, + cutOff, + cutOffSuffix = DEFAULT_CUTOFF_SUFFIX, ...otherProps }: MarkdownProps) => { + const renderContent = + cutOff !== undefined && cutOff > 0 ? utils.truncateMarkdown(children, cutOff, cutOffSuffix) : children; + const configHtmlExternalLinks = { rel: ["nofollow"], target: linkTargetName, @@ -136,7 +158,7 @@ export const Markdown = ({ : {}; const reactMarkdownProperties = { - children: children.trim(), + children: renderContent.trim(), ...configDefault, ...configHtml, ...configTextOnly, diff --git a/src/cmem/markdown/markdown.utils.ts b/src/cmem/markdown/markdown.utils.ts index 30fb9474..a6141f9a 100644 --- a/src/cmem/markdown/markdown.utils.ts +++ b/src/cmem/markdown/markdown.utils.ts @@ -11,8 +11,72 @@ const extractNamedAnchors = (markdown: string): string[] => { return namedAnchors; }; +/** + * Truncates a markdown string at a safe block boundary before the cutOff character limit. + * Avoids cutting inside code fences. Falls back to word boundary or hard cut if no + * safe paragraph boundary exists. + */ +const truncateMarkdown = (content: string, cutOff: number, suffix?: string): string => { + if (!cutOff || cutOff <= 0 || content.length <= cutOff) { + return content; + } + + // Collect [start, end] index pairs of all triple-backtick code fence regions + const codeFenceRegex = /^(`{3,})[^\n]*\n[\s\S]*?\n\1/gm; + const fenceRanges: [number, number][] = []; + let m: RegExpExecArray | null; + while ((m = codeFenceRegex.exec(content)) !== null) { + fenceRanges.push([m.index, m.index + m[0].length]); + } + + // Also handle unclosed fences (opener with no matching close, or closed with + // a different-length backtick run than what this regex requires) + const openMarkerRegex = /^`{3,}[^\n]*/gm; + let lastUnclosedStart = -1; + let om: RegExpExecArray | null; + while ((om = openMarkerRegex.exec(content)) !== null) { + const pos = om.index; + if (!fenceRanges.some(([s, e]) => pos >= s && pos < e)) { + lastUnclosedStart = pos; + } + } + if (lastUnclosedStart !== -1) { + fenceRanges.push([lastUnclosedStart, content.length]); + } + + const isInsideFence = (pos: number): boolean => fenceRanges.some(([start, end]) => pos >= start && pos < end); + + // Walk backward from cutOff to find the last \n\n not inside a code fence + let searchFrom = cutOff; + let cutPoint = -1; + while (searchFrom > 0) { + const idx = content.lastIndexOf("\n\n", searchFrom); + if (idx === -1) break; + if (!isInsideFence(idx)) { + cutPoint = idx; + break; + } + searchFrom = idx - 1; + } + + // Fallback: last word boundary before cutOff + if (cutPoint === -1) { + const lastSpace = content.lastIndexOf(" ", cutOff); + cutPoint = lastSpace > 0 ? lastSpace : cutOff; + } + + // Avoid returning just the suffix with no content + if (cutPoint <= 0) { + cutPoint = cutOff; + } + + const truncated = content.slice(0, cutPoint).trimEnd(); + return suffix ? `${truncated}\n\n${suffix}` : truncated; +}; + const utils = { extractNamedAnchors, + truncateMarkdown, }; export default utils; diff --git a/src/cmem/markdown/markdownutils.test.ts b/src/cmem/markdown/markdownutils.test.ts index f8e78b89..e8745ad0 100644 --- a/src/cmem/markdown/markdownutils.test.ts +++ b/src/cmem/markdown/markdownutils.test.ts @@ -15,3 +15,73 @@ describe("Markdown utils", () => { expect(namedAnchors).toStrictEqual([]); }); }); + +describe("truncateMarkdown", () => { + const { truncateMarkdown } = utils; + + it("returns content unchanged when length is less than cutOff", () => { + const content = "Short content."; + expect(truncateMarkdown(content, 1000)).toBe(content); + }); + + it("cuts at the last paragraph boundary before the cutOff", () => { + const content = "First paragraph.\n\nSecond paragraph that is longer."; + // cutOff at 30 — inside "Second paragraph", should cut after first \n\n + const result = truncateMarkdown(content, 30, "..."); + expect(result).toBe("First paragraph.\n\n..."); + }); + + it("cuts at the nearest paragraph boundary when multiple exist", () => { + const content = "Para one.\n\nPara two.\n\nPara three that pushes past the limit."; + const result = truncateMarkdown(content, 35, "..."); + expect(result).toBe("Para one.\n\nPara two.\n\n..."); + }); + + it("appends nothing when suffix is empty string", () => { + const content = "First paragraph.\n\nSecond paragraph that exceeds the limit."; + const result = truncateMarkdown(content, 30, ""); + expect(result).toBe("First paragraph."); + }); + + it("falls back to word boundary when no paragraph boundary exists", () => { + const content = "This is a single long line with no paragraph breaks anywhere."; + const result = truncateMarkdown(content, 25, "..."); + expect(result).toBe("This is a single long\n\n..."); + }); + + it("hard-cuts at cutOff when no word boundary exists", () => { + const content = "abcdefghijklmnopqrstuvwxyz"; + const result = truncateMarkdown(content, 10, "..."); + expect(result).toBe("abcdefghij\n\n..."); + }); + + it("skips \\n\\n inside a code fence and backs up to pre-fence boundary", () => { + const content = ["Safe paragraph.", "", "```", "line one", "", "line two", "```", "", "After fence."].join( + "\n" + ); + const fenceStart = content.indexOf("```"); + const cutOff = fenceStart + 15; // somewhere inside the fence + const result = truncateMarkdown(content, cutOff, "..."); + expect(result).toBe("Safe paragraph.\n\n..."); + }); + + it("backs up past the fence when cutOff falls on the closing fence marker", () => { + const content = ["Intro.", "", "```", "some code", "```", "", "Outro."].join("\n"); + const closingFenceIdx = content.lastIndexOf("```"); + const result = truncateMarkdown(content, closingFenceIdx, "..."); + expect(result).toBe("Intro.\n\n..."); + }); + + it("backs up past the fence when cutOff falls on the opening fence marker", () => { + const content = ["Before.", "", "```", "code here", "```"].join("\n"); + const openingFenceIdx = content.indexOf("```"); + const result = truncateMarkdown(content, openingFenceIdx, "..."); + expect(result).toBe("Before.\n\n..."); + }); + + it("falls back to word boundary when content is entirely one code fence", () => { + const content = "```\nsome code line here\n```"; + const result = truncateMarkdown(content, 15, "..."); + expect(result).toBe("```\nsome code\n\n..."); + }); +});