Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this p
- `searchListPredicate` property: Allows to filter the complete list of search options at once.
- Following optional BlueprintJs properties are forwarded now to override default behaviour: `noResults`, `createNewItemRenderer` and `itemRenderer`
- `isValidNewOption` property: Checks if an input string is or can be turned into a valid new option.
- `<Markdown />`
- Added `cutOff` property to set maximum number of raw Markdown characters to render

### Fixed

Expand Down
38 changes: 38 additions & 0 deletions src/cmem/markdown/Markdown.stories.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,41 @@ A line with some <strong>HTML code</strong> inside.
[^1]: This is the text related to the the footnote referrer.
`,
};

export const CutOff = Template.bind({});

CutOff.args = {
children: `
This component renders Markdown content safely. It supports **GitHub Flavoured Markdown**, syntax highlighting for code blocks, and definition lists.

You can:
* configure _link targets_
* add custom __rehype__ plugins
* and filter content through an allowed elements list

A third paragraph that will not appear once the cutOff limit is reached.
`,
cutOff: 300,
};

export const CutOffWithCodeFence = Template.bind({});

CutOffWithCodeFence.args = {
children: `
A short paragraph before the code block.

Here is an important code example:

\`\`\`json
{
"host": "localhost",
"port": 8080,
"debug": true
}
\`\`\`

This paragraph comes after the code block and should not appear when the cutOff limit falls inside the fence above.
`,
cutOff: 110,
cutOffSuffix: "...",
};
24 changes: 23 additions & 1 deletion src/cmem/markdown/Markdown.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ import { TestableComponent } from "../../components";
import { HtmlContentBlock, HtmlContentBlockProps } from "../../components/Typography";
import { CLASSPREFIX as eccgui } from "../../configuration/constants";

import utils from "./markdown.utils";

const DEFAULT_CUTOFF_SUFFIX = "...";

export interface MarkdownProps extends TestableComponent {
children: string;
/**
Expand Down Expand Up @@ -47,6 +51,19 @@ export interface MarkdownProps extends TestableComponent {
* Configure the `HtmlContentBlock` component that is automatically used as wrapper for the parsed Markdown content.
*/
htmlContentBlockProps?: Omit<HtmlContentBlockProps, "children" | "className" | "data-test-id">;
/**
* Maximum number of raw Markdown characters to render.
* Content exceeding this limit is truncated at the nearest safe paragraph
* boundary (or word boundary as fallback) to preserve Markdown structure.
* No truncation when absent or ≤ 0.
*/
cutOff?: number;
/**
* Text appended as a trailing paragraph when content is truncated by `cutOff`.
* Set to `""` to suppress the indicator entirely.
* Defaults to `"..."`.
*/
cutOffSuffix?: string;
}

const configDefault = {
Expand Down Expand Up @@ -109,8 +126,13 @@ export const Markdown = ({
reHypePlugins,
linkTargetName = "_mdref",
htmlContentBlockProps,
cutOff,
cutOffSuffix = DEFAULT_CUTOFF_SUFFIX,
...otherProps
}: MarkdownProps) => {
const renderContent =
cutOff !== undefined && cutOff > 0 ? utils.truncateMarkdown(children, cutOff, cutOffSuffix) : children;

const configHtmlExternalLinks = {
rel: ["nofollow"],
target: linkTargetName,
Expand All @@ -136,7 +158,7 @@ export const Markdown = ({
: {};

const reactMarkdownProperties = {
children: children.trim(),
children: renderContent.trim(),
...configDefault,
...configHtml,
...configTextOnly,
Expand Down
64 changes: 64 additions & 0 deletions src/cmem/markdown/markdown.utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,72 @@ const extractNamedAnchors = (markdown: string): string[] => {
return namedAnchors;
};

/**
* Truncates a markdown string at a safe block boundary before the cutOff character limit.
* Avoids cutting inside code fences. Falls back to word boundary or hard cut if no
* safe paragraph boundary exists.
*/
const truncateMarkdown = (content: string, cutOff: number, suffix?: string): string => {
if (!cutOff || cutOff <= 0 || content.length <= cutOff) {
return content;
}

// Collect [start, end] index pairs of all triple-backtick code fence regions
const codeFenceRegex = /^(`{3,})[^\n]*\n[\s\S]*?\n\1/gm;
const fenceRanges: [number, number][] = [];
let m: RegExpExecArray | null;
while ((m = codeFenceRegex.exec(content)) !== null) {
fenceRanges.push([m.index, m.index + m[0].length]);
}

// Also handle unclosed fences (opener with no matching close, or closed with
// a different-length backtick run than what this regex requires)
const openMarkerRegex = /^`{3,}[^\n]*/gm;
let lastUnclosedStart = -1;
let om: RegExpExecArray | null;
while ((om = openMarkerRegex.exec(content)) !== null) {
const pos = om.index;
if (!fenceRanges.some(([s, e]) => pos >= s && pos < e)) {
lastUnclosedStart = pos;
}
}
if (lastUnclosedStart !== -1) {
fenceRanges.push([lastUnclosedStart, content.length]);
}

const isInsideFence = (pos: number): boolean => fenceRanges.some(([start, end]) => pos >= start && pos < end);

// Walk backward from cutOff to find the last \n\n not inside a code fence
let searchFrom = cutOff;
let cutPoint = -1;
while (searchFrom > 0) {
const idx = content.lastIndexOf("\n\n", searchFrom);
if (idx === -1) break;
if (!isInsideFence(idx)) {
cutPoint = idx;
break;
}
searchFrom = idx - 1;
}

// Fallback: last word boundary before cutOff
if (cutPoint === -1) {
const lastSpace = content.lastIndexOf(" ", cutOff);
cutPoint = lastSpace > 0 ? lastSpace : cutOff;
}

// Avoid returning just the suffix with no content
if (cutPoint <= 0) {
cutPoint = cutOff;
}

const truncated = content.slice(0, cutPoint).trimEnd();
return suffix ? `${truncated}\n\n${suffix}` : truncated;
};

const utils = {
extractNamedAnchors,
truncateMarkdown,
};

export default utils;
70 changes: 70 additions & 0 deletions src/cmem/markdown/markdownutils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,73 @@ describe("Markdown utils", () => {
expect(namedAnchors).toStrictEqual([]);
});
});

describe("truncateMarkdown", () => {
const { truncateMarkdown } = utils;

it("returns content unchanged when length is less than cutOff", () => {
const content = "Short content.";
expect(truncateMarkdown(content, 1000)).toBe(content);
});

it("cuts at the last paragraph boundary before the cutOff", () => {
const content = "First paragraph.\n\nSecond paragraph that is longer.";
// cutOff at 30 — inside "Second paragraph", should cut after first \n\n
const result = truncateMarkdown(content, 30, "...");
expect(result).toBe("First paragraph.\n\n...");
});

it("cuts at the nearest paragraph boundary when multiple exist", () => {
const content = "Para one.\n\nPara two.\n\nPara three that pushes past the limit.";
const result = truncateMarkdown(content, 35, "...");
expect(result).toBe("Para one.\n\nPara two.\n\n...");
});

it("appends nothing when suffix is empty string", () => {
const content = "First paragraph.\n\nSecond paragraph that exceeds the limit.";
const result = truncateMarkdown(content, 30, "");
expect(result).toBe("First paragraph.");
});

it("falls back to word boundary when no paragraph boundary exists", () => {
const content = "This is a single long line with no paragraph breaks anywhere.";
const result = truncateMarkdown(content, 25, "...");
expect(result).toBe("This is a single long\n\n...");
});

it("hard-cuts at cutOff when no word boundary exists", () => {
const content = "abcdefghijklmnopqrstuvwxyz";
const result = truncateMarkdown(content, 10, "...");
expect(result).toBe("abcdefghij\n\n...");
});

it("skips \\n\\n inside a code fence and backs up to pre-fence boundary", () => {
const content = ["Safe paragraph.", "", "```", "line one", "", "line two", "```", "", "After fence."].join(
"\n"
);
const fenceStart = content.indexOf("```");
const cutOff = fenceStart + 15; // somewhere inside the fence
const result = truncateMarkdown(content, cutOff, "...");
expect(result).toBe("Safe paragraph.\n\n...");
});

it("backs up past the fence when cutOff falls on the closing fence marker", () => {
const content = ["Intro.", "", "```", "some code", "```", "", "Outro."].join("\n");
const closingFenceIdx = content.lastIndexOf("```");
const result = truncateMarkdown(content, closingFenceIdx, "...");
expect(result).toBe("Intro.\n\n...");
});

it("backs up past the fence when cutOff falls on the opening fence marker", () => {
const content = ["Before.", "", "```", "code here", "```"].join("\n");
const openingFenceIdx = content.indexOf("```");
const result = truncateMarkdown(content, openingFenceIdx, "...");
expect(result).toBe("Before.\n\n...");
});

it("falls back to word boundary when content is entirely one code fence", () => {
const content = "```\nsome code line here\n```";
const result = truncateMarkdown(content, 15, "...");
expect(result).toBe("```\nsome code\n\n...");
});
});
Loading