From ba41f8b4f9677fb14c1ecbe15d73ebe12a0d3859 Mon Sep 17 00:00:00 2001 From: Reese Williams Date: Sun, 25 Jan 2026 15:05:23 +0000 Subject: [PATCH 1/2] Improve heredoc end detection for embedded languages --- vscode/grammars/ruby.cson.json | 210 ++++++++++++------------- vscode/src/test/suite/grammars.test.ts | 26 ++- 2 files changed, 126 insertions(+), 110 deletions(-) diff --git a/vscode/grammars/ruby.cson.json b/vscode/grammars/ruby.cson.json index 994ee7c4d..548dc9a42 100644 --- a/vscode/grammars/ruby.cson.json +++ b/vscode/grammars/ruby.cson.json @@ -1618,7 +1618,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)HTML)\\b\\1))", "comment": "Heredoc with embedded HTML", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)HTML)$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.html", "patterns": [ { @@ -1629,12 +1634,7 @@ } }, "contentName": "text.html", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)HTML)\\s*$)", "patterns": [ { "include": "#heredoc" @@ -1655,7 +1655,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)HAML)\\b\\1))", "comment": "Heredoc with embedded HAML", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)HAML)$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.haml", "patterns": [ { @@ -1666,12 +1671,7 @@ } }, "contentName": "text.haml", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)HAML)\\s*$)", "patterns": [ { "include": "#heredoc" @@ -1692,7 +1692,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)XML)\\b\\1))", "comment": "Heredoc with embedded XML", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)XML)$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.xml", "patterns": [ { @@ -1703,12 +1708,7 @@ } }, "contentName": "text.xml", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)XML)\\s*$)", "patterns": [ { "include": "#heredoc" @@ -1729,7 +1729,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)SQL)\\b\\1))", "comment": "Heredoc with embedded SQL", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)SQL)$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.sql", "patterns": [ { @@ -1740,12 +1745,7 @@ } }, "contentName": "source.sql", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)SQL)\\s*$)", "patterns": [ { "include": "#heredoc" @@ -1766,7 +1766,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)(?:GRAPHQL|GQL))\\b\\1))", "comment": "Heredoc with embedded GraphQL", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)(?:GRAPHQL|GQL))$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.graphql", "patterns": [ { @@ -1777,12 +1782,7 @@ } }, "contentName": "source.graphql", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)(?:GRAPHQL|GQL))\\s*$)", "patterns": [ { "include": "#heredoc" @@ -1803,7 +1803,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)CSS)\\b\\1))", "comment": "Heredoc with embedded CSS", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)CSS)$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.css", "patterns": [ { @@ -1814,12 +1819,7 @@ } }, "contentName": "source.css", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)CSS)\\s*$)", "patterns": [ { "include": "#heredoc" @@ -1840,7 +1840,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)CPP)\\b\\1))", "comment": "Heredoc with embedded C++", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)CPP)$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.cpp", "patterns": [ { @@ -1851,12 +1856,7 @@ } }, "contentName": "source.cpp", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)CPP)\\s*$)", "patterns": [ { "include": "#heredoc" @@ -1877,7 +1877,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)C)\\b\\1))", "comment": "Heredoc with embedded C", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)C)$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.c", "patterns": [ { @@ -1888,12 +1893,7 @@ } }, "contentName": "source.c", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)C)\\s*$)", "patterns": [ { "include": "#heredoc" @@ -1914,7 +1914,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)(?:JS|JAVASCRIPT))\\b\\1))", "comment": "Heredoc with embedded Javascript", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)(?:JS|JAVASCRIPT))$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.js", "patterns": [ { @@ -1925,12 +1930,7 @@ } }, "contentName": "source.js", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)(?:JS|JAVASCRIPT))\\s*$)", "patterns": [ { "include": "#heredoc" @@ -1951,7 +1951,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)JQUERY)\\b\\1))", "comment": "Heredoc with embedded jQuery Javascript", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)JQUERY)$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.js.jquery", "patterns": [ { @@ -1962,12 +1967,7 @@ } }, "contentName": "source.js.jquery", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)JQUERY)\\s*$)", "patterns": [ { "include": "#heredoc" @@ -1988,7 +1988,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)(?:SH|SHELL))\\b\\1))", "comment": "Heredoc with embedded Shell", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)(?:SH|SHELL))$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.shell", "patterns": [ { @@ -1999,12 +2004,7 @@ } }, "contentName": "source.shell", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)(?:SH|SHELL))\\s*$)", "patterns": [ { "include": "#heredoc" @@ -2025,7 +2025,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)LUA)\\b\\1))", "comment": "Heredoc with embedded Lua", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)LUA)$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.lua", "patterns": [ { @@ -2036,12 +2041,7 @@ } }, "contentName": "source.lua", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)LUA)\\s*$)", "patterns": [ { "include": "#heredoc" @@ -2062,7 +2062,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)RUBY)\\b\\1))", "comment": "Heredoc with embedded Ruby", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)RUBY)$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.ruby", "patterns": [ { @@ -2073,12 +2078,7 @@ } }, "contentName": "source.ruby", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)RUBY)\\s*$)", "patterns": [ { "include": "#heredoc" @@ -2099,7 +2099,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)(?:YAML|YML))\\b\\1))", "comment": "Heredoc with embedded YAML", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)(?:YAML|YML))$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.yaml", "patterns": [ { @@ -2110,12 +2115,7 @@ } }, "contentName": "source.yaml", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)(?:YAML|YML))\\s*$)", "patterns": [ { "include": "#heredoc" @@ -2136,7 +2136,12 @@ { "begin": "(?=(?><<[-~]?([\"'`]?)((?:[_\\w]+_|)SLIM)\\b\\1))", "comment": "Heredoc with embedded Slim", - "end": "(?!\\G)", + "end": "^\\s*((?:[_\\w]+_|)SLIM)$\\n?", + "endCaptures": { + "0": { + "name": "string.definition.end.ruby" + } + }, "name": "meta.embedded.block.slim", "patterns": [ { @@ -2147,12 +2152,7 @@ } }, "contentName": "text.slim", - "end": "^\\s*\\2$\\n?", - "endCaptures": { - "0": { - "name": "string.definition.end.ruby" - } - }, + "while": "^(?!\\s*((?:[_\\w]+_|)SLIM)\\s*$)", "patterns": [ { "include": "#heredoc" diff --git a/vscode/src/test/suite/grammars.test.ts b/vscode/src/test/suite/grammars.test.ts index 496ccaa72..38e80d1a0 100644 --- a/vscode/src/test/suite/grammars.test.ts +++ b/vscode/src/test/suite/grammars.test.ts @@ -305,6 +305,22 @@ suite("Grammars", () => { const actualTokens = tokenizeRuby(ruby); assert.deepStrictEqual(actualTokens, expectedTokens); }); + + test("embedded HEREDOC with interpolation containing question mark methods terminates correctly", () => { + // This test verifies that heredocs with embedded language highlighting properly terminate + // even when the content contains characters that might start begin/end patterns in the + // embedded grammar (like ? which starts C's ternary operator pattern). + const ruby = "< token[0] === "some_ruby"); + assert(someRubyToken, "Expected to find 'some_ruby' token"); + assert.deepStrictEqual( + someRubyToken[1], + ["source.ruby"], + "Code after heredoc terminator should be plain Ruby, not embedded C", + ); + }); }); suite("Backtick String Literals", () => { @@ -791,7 +807,10 @@ suite("Grammars", () => { return { begin: `(?=(?><<[-~]?(["'\`]?)((?:[_\\w]+_|)${delimiter})\\b\\1))`, comment: `Heredoc with embedded ${label}`, - end: "(?!\\G)", + end: `^\\s*((?:[_\\w]+_|)${delimiter})$\\n?`, + endCaptures: { + "0": { name: "string.definition.end.ruby" }, + }, name, patterns: [ { @@ -800,10 +819,7 @@ suite("Grammars", () => { "0": { name: "string.definition.begin.ruby" }, }, contentName, - end: "^\\s*\\2$\\n?", - endCaptures: { - "0": { name: "string.definition.end.ruby" }, - }, + while: `^(?!\\s*((?:[_\\w]+_|)${delimiter})\\s*$)`, patterns: [ { include: "#heredoc" }, { include: "#interpolated_ruby" }, From f9c885a247d496f49cb4b2b915f8953891a3081a Mon Sep 17 00:00:00 2001 From: Reese Williams Date: Sun, 25 Jan 2026 15:43:34 +0000 Subject: [PATCH 2/2] Add 'heredocs' as a word for cspell --- project-words | 1 + 1 file changed, 1 insertion(+) diff --git a/project-words b/project-words index 361959f25..f3de6515b 100644 --- a/project-words +++ b/project-words @@ -36,6 +36,7 @@ Floo fnmatch fooo hashkey +heredocs hostedtoolcache importmap indexables