Skip to content

Commit be81caf

Browse files
committed
fix(files): harden the markdown editor (CRLF chunking, href allowlist, image escaping)
Final-audit follow-ups: - splitMarkdownBlocks normalizes CRLF/CR first — a closing fence ending in \r no longer fails to match, which had collapsed Windows-authored files with fenced code into one block and defeated the linear chunker (perf regression). - normalizeLinkHref rejects file://, blob:, and other non-network schemes (script/data schemes already rejected); network scheme:// (http/ftp/…) and bare host:port still pass. - Image markdown serialization escapes alt/title delimiters and angle-brackets a src with spaces/parens, so they round-trip losslessly; linked-image anchors open in a new tab (target=_blank). - Markdown paste routes through the chunker so a large pasted blob can't freeze the main thread.
1 parent 34fad14 commit be81caf

6 files changed

Lines changed: 51 additions & 12 deletions

File tree

apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/rich-markdown-editor/image.tsx

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,11 @@ function imageMarkdown(node: JSONContent): string {
6868
if (height) parts.push(`height="${escapeAttr(String(height))}"`)
6969
image = `<img ${parts.join(' ')}>`
7070
} else {
71-
const titlePart = title ? ` "${title}"` : ''
72-
image = `![${alt}](${src}${titlePart})`
71+
// Escape so an alt with `]`/`[` or a title with `"` can't break out of the `![…](… "…")` syntax
72+
// and corrupt the round-trip; a src with spaces/parens goes in angle brackets (CommonMark).
73+
const titlePart = title ? ` "${title.replace(/["\\]/g, '\\$&')}"` : ''
74+
const safeSrc = /[\s()]/.test(src) ? `<${src}>` : src
75+
image = `![${alt.replace(/[\\[\]]/g, '\\$&')}](${safeSrc}${titlePart})`
7376
}
7477
if (!href) return image
7578
const hrefTitlePart = hrefTitle ? ` "${hrefTitle}"` : ''
@@ -245,7 +248,7 @@ function ResizableImageView({ node, updateAttributes, selected, editor }: ReactN
245248
return (
246249
<NodeViewWrapper className='relative my-4 inline-block leading-none'>
247250
{safeHref ? (
248-
<a href={safeHref} rel='noopener noreferrer' className='block'>
251+
<a href={safeHref} target='_blank' rel='noopener noreferrer' className='block'>
249252
{image}
250253
</a>
251254
) : (

apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/rich-markdown-editor/markdown-fidelity.ts

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,32 @@ export function applyFrontmatter(frontmatter: string, body: string): string {
3232
return frontmatter + body
3333
}
3434

35+
/** A leading `scheme://` URL (network protocol). */
36+
const SCHEME_URL = /^([a-z][a-z0-9+.-]*):\/\//i
37+
/** A leading `scheme:` token (per the URL grammar). */
38+
const HAS_SCHEME = /^[a-z][a-z0-9+.-]*:/i
39+
/** A bare `host:port` (digits after the colon) — looks scheme-like but is really a domain. */
40+
const HOST_PORT = /^[a-z0-9.-]+:\d+(?:[/?#]|$)/i
41+
3542
/**
3643
* Normalize a user-entered link target: prefix a bare domain with `https://` so it doesn't resolve
3744
* as an in-app relative URL, while leaving already-qualified, relative, and protocol-relative URLs
38-
* intact. Dangerous schemes (`javascript:`, `data:`, `vbscript:`, `file:`) are rejected outright
39-
* rather than mangled into a broken `https://javascript:…`.
45+
* intact. Dangerous schemes are rejected outright rather than trusted or mangled: any `scheme:`
46+
* without `//` other than `mailto:`/`tel:` (so `javascript:`, `data:`, `vbscript:`, `blob:`, …), and
47+
* `file://` (local file access). Other network `scheme://` URLs (`http(s)`, `ftp`, …) pass through.
4048
*/
4149
export function normalizeLinkHref(href: string): string {
4250
const trimmed = href.trim()
4351
if (!trimmed) return ''
44-
if (/^(?:javascript|data|vbscript|file):/i.test(trimmed)) return ''
45-
if (/^(?:https?:\/\/|mailto:|tel:|[#?])/i.test(trimmed)) return trimmed
52+
if (/^[#?]/.test(trimmed)) return trimmed
4653
if (trimmed.startsWith('//')) return `https:${trimmed}`
4754
if (trimmed.startsWith('/')) return trimmed
48-
if (/^[a-z][a-z0-9+.-]*:\/\//i.test(trimmed)) return trimmed
55+
if (/^(?:mailto|tel):/i.test(trimmed)) return trimmed
56+
const schemed = trimmed.match(SCHEME_URL)
57+
if (schemed) return /^file$/i.test(schemed[1]) ? '' : trimmed
58+
// A `scheme:` without `//` (and not mailto/tel) is a script/data scheme — reject it. A bare
59+
// host:port (digits after the colon) is a domain, not a scheme, so it falls through to https.
60+
if (HAS_SCHEME.test(trimmed) && !HOST_PORT.test(trimmed)) return ''
4961
return `https://${trimmed}`
5062
}
5163

apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/rich-markdown-editor/markdown-parse.test.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,12 @@ describe('parseMarkdownToDoc (chunked)', () => {
126126
it('a multi-paragraph list item (indented continuation) stays one block', () => {
127127
expect(splitMarkdownBlocks('1. first\n\n second para\n\n2. next')).toHaveLength(1)
128128
})
129+
it('CRLF line endings still split (a closing fence ending in \\r must close)', () => {
130+
// A Windows-authored file with fenced code must not collapse to one block (which would defeat
131+
// the chunker); the closer ending in `\r` has to match. Assert block COUNT, not just fidelity.
132+
const crlf = '```ts\r\nx\r\n```\r\n\r\npara1\r\n\r\npara2\r\n\r\npara3'
133+
expect(splitMarkdownBlocks(crlf)).toEqual(['```ts\nx\n```', 'para1', 'para2', 'para3'])
134+
})
129135
})
130136

131137
it('matches one-shot on a large mixed document (the case the chunker exists for)', () => {

apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/rich-markdown-editor/markdown-parse.ts

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,10 @@ const BLOCKQUOTE = /^[ ]{0,3}>/
5757
* would silently shatter nested fences.
5858
*/
5959
export function splitMarkdownBlocks(body: string): string[] {
60-
const lines = body.split('\n')
60+
// Normalize CRLF/CR first: the fence/list/blockquote line tests anchor on `$`, so a trailing `\r`
61+
// would stop a closing fence matching and swallow the rest of a Windows-authored file into one
62+
// block (defeating the chunker). The editor normalizes `\r` on parse anyway, so meaning is unchanged.
63+
const lines = body.replace(/\r\n?/g, '\n').split('\n')
6164
const groups: string[] = []
6265
let current: string[] = []
6366
let fence: string | null = null

apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/rich-markdown-editor/markdown-paste.ts

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import { Extension } from '@tiptap/core'
22
import { Plugin } from '@tiptap/pm/state'
3+
import { parseMarkdownToDoc } from './markdown-parse'
34

45
/**
56
* Markdown syntax hints. If pasted plain text matches any of these, it's parsed as markdown rather
@@ -41,9 +42,11 @@ export const MarkdownPaste = Extension.create({
4142
if (html) return false
4243
const text = event.clipboardData?.getData('text/plain')
4344
if (!text || !looksLikeMarkdown(text)) return false
44-
const json = editor.markdown?.parse(text)
45-
if (!json) return false
46-
return editor.commands.insertContent(json)
45+
// Parse through the chunker (linear) so pasting a large markdown blob can't freeze the
46+
// main thread the way the underlying superlinear parse would.
47+
const doc = parseMarkdownToDoc(text)
48+
if (!doc.content?.length) return false
49+
return editor.commands.insertContent(doc)
4750
},
4851
},
4952
}),

apps/sim/app/workspace/[workspaceId]/files/components/file-viewer/rich-markdown-editor/round-trip.test.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,11 @@ describe('markdown-fidelity utils', () => {
112112
expect(normalizeLinkHref('data:text/html,<script>')).toBe('')
113113
expect(normalizeLinkHref('//cdn.example.com/a.js')).toBe('https://cdn.example.com/a.js')
114114
expect(normalizeLinkHref('ftp://host/file')).toBe('ftp://host/file')
115+
// Dangerous schemes rejected; a bare host:port is still treated as a domain.
116+
expect(normalizeLinkHref('file:///etc/passwd')).toBe('')
117+
expect(normalizeLinkHref('blob:https://x.com/uuid')).toBe('')
118+
expect(normalizeLinkHref('vbscript:msgbox(1)')).toBe('')
119+
expect(normalizeLinkHref('localhost:3000/path')).toBe('https://localhost:3000/path')
115120
})
116121

117122
it('collapses trailing blank lines and preserves leading whitespace', () => {
@@ -171,6 +176,13 @@ describe('editor markdown round-trip', () => {
171176
expect(out).toContain('![alt](https://example.com/i.png)')
172177
})
173178

179+
it('round-trips an image whose alt/title contain delimiter characters (idempotent)', () => {
180+
const input = '![a [b] c](https://example.com/i.png "ti\\"tle")'
181+
const out = roundTrip(input)
182+
expect(roundTrip(out)).toBe(out)
183+
expect(out).toContain('https://example.com/i.png')
184+
})
185+
174186
it('round-trips a linked image / badge (keeps the wrapping link)', () => {
175187
const out = roundTrip(
176188
'[![build](https://img.shields.io/badge/x-green)](https://ci.example.com)'

0 commit comments

Comments
 (0)