From 2a8b7128132f4f3fd9f9db70cbef5f90ba79d7f5 Mon Sep 17 00:00:00 2001 From: David Bernheisel Date: Thu, 12 Mar 2026 15:51:39 -0400 Subject: [PATCH] Implement ~BASH sigil highlighting --- .github/workflows/main.yml | 7 + package-lock.json | 18 +- package.json | 8 +- src/test/grammar.test.ts | 289 +++++++++++++++++++++++++++++++ syntaxes/elixir.json | 342 ++++++++++++++++++++++++++++++++++++- tsconfig.json | 3 +- 6 files changed, 661 insertions(+), 6 deletions(-) create mode 100644 src/test/grammar.test.ts diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 00e13de..4dd10ef 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -60,6 +60,13 @@ jobs: # DISABLE_GPU: 1 ELS_LOCAL: 1 if: runner.os == 'Linux' + - name: Run grammar tests + run: | + npm run test:grammar + env: + # DISABLE_GPU: 1 + ELS_LOCAL: 1 + if: runner.os != 'Linux' - name: Run tests run: | npm test diff --git a/package-lock.json b/package-lock.json index c1ae0b2..c61cb0c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -24,7 +24,9 @@ "mocha": "^11.7.5", "npm-run-all": "^4.1.5", "rimraf": "^6.1.0", - "typescript": "~5.9.3" + "typescript": "~5.9.3", + "vscode-oniguruma": "^2.0.1", + "vscode-textmate": "^9.3.2" }, "engines": { "vscode": "^1.99.0" @@ -4182,6 +4184,20 @@ "integrity": "sha512-Ld1VelNuX9pdF39h2Hgaeb5hEZM2Z3jUrrMgWQAu82jMtZp7p3vJT3BzToKtZI7NgQssZje5o0zryOrhQvzQAg==", "license": "MIT" }, + "node_modules/vscode-oniguruma": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/vscode-oniguruma/-/vscode-oniguruma-2.0.1.tgz", + "integrity": "sha512-poJU8iHIWnC3vgphJnrLZyI3YdqRlR27xzqDmpPXYzA93R4Gk8z7T6oqDzDoHjoikA2aS82crdXFkjELCdJsjQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/vscode-textmate": { + "version": "9.3.2", + "resolved": "https://registry.npmjs.org/vscode-textmate/-/vscode-textmate-9.3.2.tgz", + "integrity": "sha512-n2uGbUcrjhUEBH16uGA0TvUfhWwliFZ1e3+pTjrkim1Mt7ydB41lV08aUvsi70OlzDWp6X7Bx3w/x3fAXIsN0Q==", + "dev": true, + "license": "MIT" + }, "node_modules/which": { "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", diff --git a/package.json b/package.json index 67a189c..a60ea8d 100644 --- a/package.json +++ b/package.json @@ -313,7 +313,8 @@ "entity.name.function.call.dot.elixir" ], "embeddedLanguages": { - "comment.documentation.heredoc.elixir": "markdown" + "comment.documentation.heredoc.elixir": "markdown", + "source.shell": "shellscript" } }, { @@ -824,6 +825,7 @@ "update-vscode": "node ./node_modules/vscode/bin/install", "pretest": "npm-run-all clean compile", "test": "node ./out/test/runTest.js", + "test:grammar": "npm run compile && mocha --ui tdd out/test/grammar.test.js", "lint": "biome check", "fix-formatting": "biome format --write", "esbuild-release": "npm run esbuild-base -- --minify", @@ -843,7 +845,9 @@ "mocha": "^11.7.5", "npm-run-all": "^4.1.5", "rimraf": "^6.1.0", - "typescript": "~5.9.3" + "typescript": "~5.9.3", + "vscode-oniguruma": "^2.0.1", + "vscode-textmate": "^9.3.2" }, "dependencies": { "@vscode/extension-telemetry": "^1.2.0", diff --git a/src/test/grammar.test.ts b/src/test/grammar.test.ts new file mode 100644 index 0000000..4e9ddb3 --- /dev/null +++ b/src/test/grammar.test.ts @@ -0,0 +1,289 @@ +import * as assert from "node:assert"; +import * as fs from "node:fs"; +import * as path from "node:path"; +import { loadWASM, OnigScanner, OnigString } from "vscode-oniguruma"; +import { + type IGrammar, + INITIAL, + type IOnigLib, + type IRawGrammar, + parseRawGrammar, + Registry, +} from "vscode-textmate"; + +const REPO_ROOT = path.resolve(__dirname, "../../"); + +async function createOnigLib(): Promise { + const wasmPath = path.join( + REPO_ROOT, + "node_modules/vscode-oniguruma/release/onig.wasm", + ); + const fileBuffer = fs.readFileSync(wasmPath); + const wasmBin = fileBuffer.buffer.slice( + fileBuffer.byteOffset, + fileBuffer.byteOffset + fileBuffer.byteLength, + ); + await loadWASM(wasmBin); + return { + createOnigScanner: (patterns: string[]) => new OnigScanner(patterns), + createOnigString: (s: string) => new OnigString(s), + }; +} + +function loadGrammarFile(name: string): IRawGrammar { + const grammarPath = path.join(REPO_ROOT, "syntaxes", name); + const content = fs.readFileSync(grammarPath, "utf8"); + return parseRawGrammar(content, grammarPath); +} + +function stubShellGrammar(): IRawGrammar { + return parseRawGrammar( + JSON.stringify({ + scopeName: "source.shell", + patterns: [ + { + match: + "\\b(echo|if|then|fi|for|do|done|while|case|esac|function|return|exit|set|export|local)\\b", + name: "keyword.control.shell", + }, + ], + }), + "shell.json", + ); +} + +async function createRegistry(): Promise { + const onigLib = createOnigLib(); + const grammars: Record = { + "source.elixir": "elixir.json", + }; + return new Registry({ + onigLib, + loadGrammar: async (scopeName: string) => { + const file = grammars[scopeName]; + if (file) { + return loadGrammarFile(file); + } + if (scopeName === "source.shell") { + return stubShellGrammar(); + } + return null; + }, + }); +} + +/** Tokenize a multi-line string and return all tokens with their scopes. */ +function tokenizeLines(grammar: IGrammar, text: string) { + const lines = text.split("\n"); + let ruleStack = INITIAL; + const result: Array<{ line: number; text: string; scopes: string[] }> = []; + for (let i = 0; i < lines.length; i++) { + const lineTokens = grammar.tokenizeLine(lines[i], ruleStack); + for (const token of lineTokens.tokens) { + result.push({ + line: i, + text: lines[i].substring(token.startIndex, token.endIndex), + scopes: token.scopes, + }); + } + ruleStack = lineTokens.ruleStack; + } + return result; +} + +/** Check that at least one token in the line has a scope matching the predicate. */ +function assertScopeOnLine( + tokens: ReturnType, + lineIndex: number, + scopeSubstring: string, + message?: string, +) { + const lineTokens = tokens.filter((t) => t.line === lineIndex); + const found = lineTokens.some((t) => + t.scopes.some((s) => s.includes(scopeSubstring)), + ); + assert.ok( + found, + message || + `Expected scope containing "${scopeSubstring}" on line ${lineIndex}, got scopes: ${JSON.stringify( + lineTokens.map((t) => ({ text: t.text, scopes: t.scopes })), + null, + 2, + )}`, + ); +} + +/** Check that no token in the line has a scope matching the predicate. */ +function assertNoScopeOnLine( + tokens: ReturnType, + lineIndex: number, + scopeSubstring: string, + message?: string, +) { + const lineTokens = tokens.filter((t) => t.line === lineIndex); + const found = lineTokens.some((t) => + t.scopes.some((s) => s.includes(scopeSubstring)), + ); + assert.ok( + !found, + message || + `Expected no scope containing "${scopeSubstring}" on line ${lineIndex}, but found: ${JSON.stringify( + lineTokens + .filter((t) => t.scopes.some((s) => s.includes(scopeSubstring))) + .map((t) => ({ text: t.text, scopes: t.scopes })), + null, + 2, + )}`, + ); +} + +suite("Bash sigil grammar tests", () => { + let grammar: IGrammar; + + suiteSetup(async () => { + const registry = await createRegistry(); + const g = await registry.loadGrammar("source.elixir"); + if (!g) { + throw new Error("Failed to load Elixir grammar"); + } + grammar = g; + }); + + suite("~BASH", () => { + test("heredoc with double quotes gets source.shell scope", () => { + const tokens = tokenizeLines(grammar, 'x = ~BASH"""\necho "hello"\n"""'); + assertScopeOnLine(tokens, 1, "source.shell"); + }); + + test("heredoc with double quotes and modifiers closes properly", () => { + const tokens = tokenizeLines( + grammar, + 'result = ~BASH"""\necho before\nfalse\necho after\n """eS', + ); + assertScopeOnLine(tokens, 1, "source.shell"); + // Line 4 ("""eS) should have the end punctuation, not source.shell content + const endTokens = tokens.filter( + (t) => + t.line === 4 && + t.scopes.some((s) => + s.includes("punctuation.definition.string.end"), + ), + ); + assert.ok( + endTokens.length > 0, + `Expected closing delimiter on line 4, tokens: ${JSON.stringify( + tokens + .filter((t) => t.line === 4) + .map((t) => ({ text: t.text, scopes: t.scopes })), + null, + 2, + )}`, + ); + }); + + test("double quote sigil does not consume past closing quote", () => { + const tokens = tokenizeLines( + grammar, + '~BASH"false; echo printed"S == "printed\\n"', + ); + // The == should NOT be inside source.shell + const eqTokens = tokens.filter( + (t) => t.text === "==" || t.text === " == ", + ); + for (const t of eqTokens) { + assert.ok( + !t.scopes.some((s) => s === "source.shell"), + `== should not be in source.shell scope, got: ${JSON.stringify(t.scopes)}`, + ); + } + }); + + test("heredoc with single quotes gets source.shell scope", () => { + const tokens = tokenizeLines(grammar, "x = ~BASH'''\necho 'hello'\n'''"); + assertScopeOnLine(tokens, 1, "source.shell"); + }); + + test("curlies get source.shell scope", () => { + const tokens = tokenizeLines(grammar, '~BASH{echo "hello"}'); + assertScopeOnLine(tokens, 0, "source.shell"); + }); + + test("brackets get source.shell scope", () => { + const tokens = tokenizeLines(grammar, "~BASH[echo hello]"); + assertScopeOnLine(tokens, 0, "source.shell"); + }); + + test("parens get source.shell scope", () => { + const tokens = tokenizeLines(grammar, "~BASH(echo hello)"); + assertScopeOnLine(tokens, 0, "source.shell"); + }); + + test("angle brackets get source.shell scope", () => { + const tokens = tokenizeLines(grammar, "~BASH"); + assertScopeOnLine(tokens, 0, "source.shell"); + }); + + test("pipes get source.shell scope", () => { + const tokens = tokenizeLines(grammar, "~BASH|echo hello|"); + assertScopeOnLine(tokens, 0, "source.shell"); + }); + + test("slashes get source.shell scope", () => { + const tokens = tokenizeLines(grammar, "~BASH/echo hello/"); + assertScopeOnLine(tokens, 0, "source.shell"); + }); + + test("double quotes get source.shell scope", () => { + const tokens = tokenizeLines(grammar, '~BASH"echo hello"'); + assertScopeOnLine(tokens, 0, "source.shell"); + }); + + test("single quotes get source.shell scope", () => { + const tokens = tokenizeLines(grammar, "~BASH'echo hello'"); + assertScopeOnLine(tokens, 0, "source.shell"); + }); + + test("supports Elixir interpolation", () => { + const tokens = tokenizeLines(grammar, "~BASH{echo #{name}}"); + const interpolationTokens = tokens.filter( + (t) => + t.line === 0 && + t.scopes.some((s) => s.includes("meta.embedded.line.elixir")), + ); + assert.ok( + interpolationTokens.length > 0, + "Expected interpolation tokens", + ); + }); + + test("accepts flags after closing delimiter", () => { + const tokens = tokenizeLines(grammar, "~BASH{echo hello}SEO"); + const endTokens = tokens.filter( + (t) => + t.line === 0 && + t.scopes.some((s) => s.includes("punctuation.definition.string.end")), + ); + assert.ok( + endTokens.length > 0, + "Expected closing delimiter to be recognized", + ); + }); + }); + + suite("does not interfere with other sigils", () => { + test("~r still gets regexp scope", () => { + const tokens = tokenizeLines(grammar, "~r{foo.*bar}"); + assertScopeOnLine(tokens, 0, "string.regexp"); + }); + + test("~s gets generic string scope, not shell", () => { + const tokens = tokenizeLines(grammar, "~s{hello world}"); + assertNoScopeOnLine(tokens, 0, "source.shell"); + }); + + test("~w gets generic string scope, not shell", () => { + const tokens = tokenizeLines(grammar, "~w{one two three}"); + assertNoScopeOnLine(tokens, 0, "source.shell"); + }); + }); +}); diff --git a/syntaxes/elixir.json b/syntaxes/elixir.json index eb5adb1..ccf1a90 100644 --- a/syntaxes/elixir.json +++ b/syntaxes/elixir.json @@ -1820,6 +1820,300 @@ } ] }, + { + "comment": "Bash sigil with double quoted heredoc - selective shell highlighting (excludes #string to avoid \" conflict)", + "begin": "\\s?(~BASH\"\"\")$", + "beginCaptures": { + "0": { + "name": "punctuation.definition.string.begin.elixir" + }, + "1": { + "name": "string.quoted.double.heredoc.elixir" + } + }, + "end": "^\\s*(\"\"\"[SEOevpu]*)$", + "endCaptures": { + "0": { + "name": "punctuation.definition.string.end.elixir" + }, + "1": { + "name": "string.quoted.double.heredoc.elixir" + } + }, + "name": "source.shell", + "patterns": [ + { + "include": "#interpolated_elixir" + }, + { + "include": "#escaped_char" + }, + { + "include": "#safe_shell_tokens" + } + ] + }, + { + "comment": "Bash sigil with single quoted heredoc", + "begin": "\\s?(~BASH''')$", + "beginCaptures": { + "0": { + "name": "punctuation.definition.string.begin.elixir" + }, + "1": { + "name": "string.quoted.double.heredoc.elixir" + } + }, + "end": "^\\s*('''[SEOevpu]*)$", + "endCaptures": { + "0": { + "name": "punctuation.definition.string.end.elixir" + }, + "1": { + "name": "string.quoted.double.heredoc.elixir" + } + }, + "name": "source.shell", + "patterns": [ + { + "include": "#interpolated_elixir" + }, + { + "include": "#escaped_char" + }, + { + "include": "source.shell" + } + ] + }, + { + "comment": "Bash sigil with curlies", + "begin": "~BASH\\{", + "beginCaptures": { + "0": { + "name": "punctuation.definition.string.begin.elixir" + } + }, + "end": "\\}[SEOevpu]*", + "endCaptures": { + "0": { + "name": "punctuation.definition.string.end.elixir" + } + }, + "name": "source.shell", + "patterns": [ + { + "include": "#interpolated_elixir" + }, + { + "include": "#escaped_char" + }, + { + "include": "source.shell" + }, + { + "include": "#nest_curly" + } + ] + }, + { + "comment": "Bash sigil with brackets", + "begin": "~BASH\\[", + "beginCaptures": { + "0": { + "name": "punctuation.definition.string.begin.elixir" + } + }, + "end": "\\][SEOevpu]*", + "endCaptures": { + "0": { + "name": "punctuation.definition.string.end.elixir" + } + }, + "name": "source.shell", + "patterns": [ + { + "include": "#interpolated_elixir" + }, + { + "include": "#escaped_char" + }, + { + "include": "source.shell" + }, + { + "include": "#nest_brackets" + } + ] + }, + { + "comment": "Bash sigil with angle brackets", + "begin": "~BASH\\<", + "beginCaptures": { + "0": { + "name": "punctuation.definition.string.begin.elixir" + } + }, + "end": "\\>[SEOevpu]*", + "endCaptures": { + "0": { + "name": "punctuation.definition.string.end.elixir" + } + }, + "name": "source.shell", + "patterns": [ + { + "include": "#interpolated_elixir" + }, + { + "include": "#escaped_char" + }, + { + "include": "source.shell" + }, + { + "include": "#nest_ltgt" + } + ] + }, + { + "comment": "Bash sigil with parens", + "begin": "~BASH\\(", + "beginCaptures": { + "0": { + "name": "punctuation.definition.string.begin.elixir" + } + }, + "end": "\\)[SEOevpu]*", + "endCaptures": { + "0": { + "name": "punctuation.definition.string.end.elixir" + } + }, + "name": "source.shell", + "patterns": [ + { + "include": "#interpolated_elixir" + }, + { + "include": "#escaped_char" + }, + { + "include": "source.shell" + }, + { + "include": "#nest_parens" + } + ] + }, + { + "comment": "Bash sigil with slashes", + "begin": "~BASH\\/", + "beginCaptures": { + "0": { + "name": "punctuation.definition.string.begin.elixir" + } + }, + "end": "\\/[SEOevpu]*", + "endCaptures": { + "0": { + "name": "punctuation.definition.string.end.elixir" + } + }, + "name": "source.shell", + "patterns": [ + { + "include": "#interpolated_elixir" + }, + { + "include": "#escaped_char" + }, + { + "include": "source.shell" + } + ] + }, + { + "comment": "Bash sigil with pipes", + "begin": "~BASH\\|", + "beginCaptures": { + "0": { + "name": "punctuation.definition.string.begin.elixir" + } + }, + "end": "\\|[SEOevpu]*", + "endCaptures": { + "0": { + "name": "punctuation.definition.string.end.elixir" + } + }, + "name": "source.shell", + "patterns": [ + { + "include": "#interpolated_elixir" + }, + { + "include": "#escaped_char" + }, + { + "include": "source.shell" + } + ] + }, + { + "comment": "Bash sigil with double quotes - selective shell highlighting (excludes #string to avoid \" conflict)", + "begin": "~BASH(?!\"\"\")\\\"", + "beginCaptures": { + "0": { + "name": "punctuation.definition.string.begin.elixir" + } + }, + "end": "\\\"[SEOevpu]*", + "endCaptures": { + "0": { + "name": "punctuation.definition.string.end.elixir" + } + }, + "name": "source.shell", + "patterns": [ + { + "include": "#interpolated_elixir" + }, + { + "include": "#escaped_char" + }, + { + "include": "#safe_shell_tokens" + } + ] + }, + { + "comment": "Bash sigil with single quotes - selective shell highlighting (excludes #string to avoid ' conflict)", + "begin": "~BASH(?!''')\\'", + "beginCaptures": { + "0": { + "name": "punctuation.definition.string.begin.elixir" + } + }, + "end": "\\'[SEOevpu]*", + "endCaptures": { + "0": { + "name": "punctuation.definition.string.end.elixir" + } + }, + "name": "source.shell", + "patterns": [ + { + "include": "#interpolated_elixir" + }, + { + "include": "#escaped_char" + }, + { + "include": "#safe_shell_tokens" + } + ] + }, { "begin": "~[a-z](?>\"\"\")", "beginCaptures": { @@ -2200,7 +2494,7 @@ "name": "string.quoted.double.literal.elixir" }, { - "begin": "~[A-Z][A-Z0-9]*\\'", + "begin": "~[A-Z][A-Z0-9]*(?!''')\\'", "beginCaptures": { "0": { "name": "punctuation.definition.string.begin.elixir" @@ -2216,7 +2510,7 @@ "name": "string.quoted.double.literal.elixir" }, { - "begin": "~[A-Z][A-Z0-9]*\\\"", + "begin": "~[A-Z][A-Z0-9]*(?!\"\"\")\\\"", "beginCaptures": { "0": { "name": "punctuation.definition.string.begin.elixir" @@ -2547,6 +2841,50 @@ } ], "repository": { + "safe_shell_tokens": { + "comment": "Shell tokens that do not transitively include #string — safe for use inside quote-delimited BASH sigils", + "patterns": [ + { + "include": "source.shell#comment" + }, + { + "include": "source.shell#floating_keyword" + }, + { + "include": "source.shell#keyword" + }, + { + "include": "source.shell#support" + }, + { + "include": "source.shell#modifiers" + }, + { + "include": "source.shell#pipeline" + }, + { + "include": "source.shell#normal_statement_seperator" + }, + { + "include": "source.shell#numeric_literal" + }, + { + "include": "source.shell#boolean" + }, + { + "include": "source.shell#line_continuation" + }, + { + "include": "source.shell#redirect_fix" + }, + { + "include": "source.shell#redirect_number" + }, + { + "include": "source.shell#regex_comparison" + } + ] + }, "escaped_char": { "match": "\\\\(?:[0-7]{1,3}|x[\\da-fA-F]{1,2}|.)", "name": "constant.character.escape.elixir" diff --git a/tsconfig.json b/tsconfig.json index a86e6fd..935af25 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -5,7 +5,8 @@ "lib": ["ES2020"], "outDir": "out", "sourceMap": true, - "strict": true + "strict": true, + "skipLibCheck": true }, "exclude": ["node_modules", ".vscode-test", "elixir-ls", "elixir-ls-release"] }