diff --git a/build/Directory.Build.props b/build/Directory.Build.props index 7d9da74..627bad8 100644 --- a/build/Directory.Build.props +++ b/build/Directory.Build.props @@ -3,6 +3,6 @@ latest true 8.0.5 - 1.0.9 + 1.0.10 diff --git a/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs b/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs index c77c547..6a0748d 100644 --- a/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs +++ b/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Generic; using System.IO; using BenchmarkDotNet.Attributes; @@ -11,7 +12,7 @@ namespace TextMateSharp.Benchmarks public class BigFileTokenizationBenchmark { private IGrammar _grammar = null!; - private string[] _lines = null!; + private string _content = null!; [GlobalSetup] public void Setup() @@ -40,8 +41,8 @@ public void Setup() // Load the file into memory - _lines = File.ReadAllLines(bigFilePath); - Console.WriteLine($"Loaded {_lines.Length} lines from bigfile.cs"); + _content = File.ReadAllText(bigFilePath); + Console.WriteLine($"Loaded bigfile.cs"); // Load the C# grammar RegistryOptions options = new RegistryOptions(ThemeName.DarkPlus); @@ -60,14 +61,38 @@ public int TokenizeAllLines() int totalTokens = 0; IStateStack? ruleStack = null; - for (int i = 0; i < _lines.Length; i++) + ReadOnlyMemory contentMemory = _content.AsMemory(); + + foreach (var lineRange in GetLineRanges(_content)) { - ITokenizeLineResult result = _grammar.TokenizeLine(_lines[i], ruleStack, TimeSpan.MaxValue); + ReadOnlyMemory lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length); + ITokenizeLineResult result = _grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue); ruleStack = result.RuleStack; totalTokens += result.Tokens.Length; } return totalTokens; } + + static IEnumerable<(int Start, int Length)> GetLineRanges(string content) + { + int lineStart = 0; + + for (int i = 0; i < content.Length; i++) + { + if (content[i] == '\n') + { + int lineLength = i - lineStart + 1; // Include the \n + yield return (lineStart, lineLength); + lineStart = i + 1; + } + } + + // Handle last line without terminator + if (lineStart < content.Length) + { + yield return (lineStart, content.Length - lineStart); + } + } } } diff --git a/src/TextMateSharp.Demo/Program.cs b/src/TextMateSharp.Demo/Program.cs index eed263a..f0235f2 100644 --- a/src/TextMateSharp.Demo/Program.cs +++ b/src/TextMateSharp.Demo/Program.cs @@ -1,4 +1,5 @@ using System; +using System.Collections.Generic; using System.Globalization; using System.IO; @@ -53,45 +54,48 @@ static void Main(string[] args) IStateStack? ruleStack = null; - using (StreamReader sr = new StreamReader(fileToParse)) + string fileContent = File.ReadAllText(fileToParse); + ReadOnlyMemory contentMemory = fileContent.AsMemory(); + + foreach (var lineRange in GetLineRanges(fileContent)) { - string? line = sr.ReadLine(); + bool needsLineBreak = true; + + ReadOnlyMemory lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length); + ITokenizeLineResult result = grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue); + + ruleStack = result.RuleStack; - while (line != null) + foreach (IToken token in result.Tokens) { - ITokenizeLineResult result = grammar.TokenizeLine(line, ruleStack, TimeSpan.MaxValue); + int startIndex = Math.Min(token.StartIndex, lineRange.Length); + int endIndex = Math.Min(token.EndIndex, lineRange.Length); - ruleStack = result.RuleStack; + int foreground = -1; + int background = -1; + FontStyle fontStyle = FontStyle.NotSet; - foreach (IToken token in result.Tokens) + foreach (var themeRule in theme.Match(token.Scopes)) { - int startIndex = (token.StartIndex > line.Length) ? - line.Length : token.StartIndex; - int endIndex = (token.EndIndex > line.Length) ? - line.Length : token.EndIndex; + if (foreground == -1 && themeRule.foreground > 0) + foreground = themeRule.foreground; - int foreground = -1; - int background = -1; - FontStyle fontStyle = FontStyle.NotSet; + if (background == -1 && themeRule.background > 0) + background = themeRule.background; - foreach (var themeRule in theme.Match(token.Scopes)) - { - if (foreground == -1 && themeRule.foreground > 0) - foreground = themeRule.foreground; - - if (background == -1 && themeRule.background > 0) - background = themeRule.background; + if (fontStyle == FontStyle.NotSet && themeRule.fontStyle > 0) + fontStyle = themeRule.fontStyle; + } - if (fontStyle == FontStyle.NotSet && themeRule.fontStyle > 0) - fontStyle = themeRule.fontStyle; - } + ReadOnlySpan tokenSpan = lineMemory.Span.Slice(startIndex, endIndex - startIndex); + WriteToken(tokenSpan, foreground, background, fontStyle, theme); - WriteToken(line.SubstringAtIndexes(startIndex, endIndex), foreground, background, fontStyle, theme); - } + if (tokenSpan.IndexOf('\n') != -1) + needsLineBreak = false; + } + if (needsLineBreak) Console.WriteLine(); - line = sr.ReadLine(); - } } var colorDictionary = theme.GetGuiColorDictionary(); @@ -113,11 +117,12 @@ static void Main(string[] args) Console.WriteLine("ERROR: " + ex.Message); } } - static void WriteToken(string text, int foreground, int background, FontStyle fontStyle, Theme theme) + + static void WriteToken(ReadOnlySpan text, int foreground, int background, FontStyle fontStyle, Theme theme) { if (foreground == -1) { - Console.Write(text); + Console.Out.Write(text); return; } @@ -127,7 +132,8 @@ static void WriteToken(string text, int foreground, int background, FontStyle fo Color foregroundColor = GetColor(foreground, theme); Style style = new Style(foregroundColor, backgroundColor, decoration); - Markup markup = new Markup(text.Replace("[", "[[").Replace("]", "]]"), style); + string textStr = text.ToString(); + Markup markup = new Markup(textStr.Replace("[", "[[").Replace("]", "]]"), style); AnsiConsole.Write(markup); } @@ -173,13 +179,26 @@ static Color HexToColor(string hexString) return new Color(r, g, b); } - } - internal static class StringExtensions - { - internal static string SubstringAtIndexes(this string str, int startIndex, int endIndex) + static IEnumerable<(int Start, int Length)> GetLineRanges(string content) { - return str.Substring(startIndex, endIndex - startIndex); + int lineStart = 0; + + for (int i = 0; i < content.Length; i++) + { + if (content[i] == '\n') + { + int lineLength = i - lineStart + 1; // Include the \n + yield return (lineStart, lineLength); + lineStart = i + 1; + } + } + + // Handle last line without terminator + if (lineStart < content.Length) + { + yield return (lineStart, content.Length - lineStart); + } } } } diff --git a/src/TextMateSharp.Tests/Model/TMModelTests.cs b/src/TextMateSharp.Tests/Model/TMModelTests.cs index 5e6f901..bb743fa 100644 --- a/src/TextMateSharp.Tests/Model/TMModelTests.cs +++ b/src/TextMateSharp.Tests/Model/TMModelTests.cs @@ -1,4 +1,5 @@ -using Moq; +using System; +using Moq; using NUnit.Framework; @@ -125,9 +126,9 @@ public override int GetLineLength(int lineIndex) { return _lines[lineIndex].Length; } - public override LineText GetLineText(int lineIndex) + public override LineText GetLineTextIncludingTerminators(int lineIndex) { - return _lines[lineIndex]; + return _lines[lineIndex] + Environment.NewLine; } public override int GetNumberOfLines() { diff --git a/src/TextMateSharp/Internal/Grammars/Grammar.cs b/src/TextMateSharp/Internal/Grammars/Grammar.cs index ff277ea..fe09793 100644 --- a/src/TextMateSharp/Internal/Grammars/Grammar.cs +++ b/src/TextMateSharp/Internal/Grammars/Grammar.cs @@ -1,5 +1,4 @@ using System; -using System.Buffers; using System.Collections.Generic; using TextMateSharp.Grammars; @@ -255,46 +254,37 @@ private object Tokenize(ReadOnlyMemory lineText, StateStack prevState, boo } // Check if we need to append newline - char[] rentedBuffer = null; - ReadOnlyMemory effectiveLineText; - try + ReadOnlyMemory effectiveLineText; + if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') { - if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n') - { - // Only add \n if the passed lineText didn't have it. - // Use ArrayPool to avoid per-line allocation - int requiredLength = lineText.Length + 1; - rentedBuffer = ArrayPool.Shared.Rent(requiredLength); - lineText.Span.CopyTo(rentedBuffer); - rentedBuffer[lineText.Length] = '\n'; - effectiveLineText = rentedBuffer.AsMemory(0, requiredLength); - } - else - { - effectiveLineText = lineText; - } + // Only add \n if the passed lineText didn't have it. + // We need to allocate a new buffer with the newline + char[] buffer = new char[lineText.Length + 1]; + lineText.Span.CopyTo(buffer); + buffer[lineText.Length] = '\n'; + effectiveLineText = buffer.AsMemory(); + } + else + { + effectiveLineText = lineText; + } - int lineLength = effectiveLineText.Length; - LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors); - TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState, - lineTokens, true, timeLimit); + int lineLength = effectiveLineText.Length; + LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, + _balancedBracketSelectors); + TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, + prevState, + lineTokens, true, timeLimit); - if (emitBinaryTokens) - { - return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), - tokenizeResult.Stack, tokenizeResult.StoppedEarly); - } - return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), - tokenizeResult.Stack, tokenizeResult.StoppedEarly); - } - finally + if (emitBinaryTokens) { - if (rentedBuffer != null) - { - ArrayPool.Shared.Return(rentedBuffer); - } + return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength), + tokenizeResult.Stack, tokenizeResult.StoppedEarly); } + + return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength), + tokenizeResult.Stack, tokenizeResult.StoppedEarly); } private void GenerateRootId() diff --git a/src/TextMateSharp/Model/AbstractLineList.cs b/src/TextMateSharp/Model/AbstractLineList.cs index fb255da..2046d13 100644 --- a/src/TextMateSharp/Model/AbstractLineList.cs +++ b/src/TextMateSharp/Model/AbstractLineList.cs @@ -96,7 +96,7 @@ public int GetSize() public abstract int GetNumberOfLines(); - public abstract LineText GetLineText(int lineIndex); + public abstract LineText GetLineTextIncludingTerminators(int lineIndex); public abstract int GetLineLength(int lineIndex); diff --git a/src/TextMateSharp/Model/IModelLines.cs b/src/TextMateSharp/Model/IModelLines.cs index 80f7568..c38c62d 100644 --- a/src/TextMateSharp/Model/IModelLines.cs +++ b/src/TextMateSharp/Model/IModelLines.cs @@ -4,17 +4,73 @@ namespace TextMateSharp.Model { + /// + /// Represents a document model that provides line-based access for TextMate tokenization. + /// public interface IModelLines { + /// + /// Notifies that a new line has been added at the specified index. + /// + /// The zero-based index where the line was added. void AddLine(int lineIndex); + + /// + /// Notifies that a line has been removed at the specified index. + /// + /// The zero-based index of the removed line. void RemoveLine(int lineIndex); + + /// + /// Notifies that the content of a line has changed. + /// + /// The zero-based index of the updated line. void UpdateLine(int lineIndex); + + /// + /// Gets the number of model lines currently tracked. + /// int GetSize(); + + /// + /// Gets the model line at the specified index. + /// + /// The zero-based index of the line. ModelLine Get(int lineIndex); + + /// + /// Executes an action for each model line. + /// + /// The action to execute on each line. void ForEach(Action action); + + /// + /// Gets the total number of lines in the document. + /// int GetNumberOfLines(); - LineText GetLineText(int lineIndex); + + /// + /// Gets the text content of the line at the specified index. + /// + /// The zero-based index of the line. + /// The line text wrapped in a structure. + /// + /// For optimal performance, the returned text should include the line terminator + /// (e.g., '\n' or "\r\n") if one exists. When line terminators are not included, + /// the tokenization engine will allocate a new buffer to append a newline character, + /// which impacts performance and memory usage. + /// + LineText GetLineTextIncludingTerminators(int lineIndex); + + /// + /// Gets the length of the line at the specified index. + /// + /// The zero-based index of the line. int GetLineLength(int lineIndex); + + /// + /// Releases resources used by this model. + /// void Dispose(); } } \ No newline at end of file diff --git a/src/TextMateSharp/Model/ITMModel.cs b/src/TextMateSharp/Model/ITMModel.cs index a957d74..09f9e1d 100644 --- a/src/TextMateSharp/Model/ITMModel.cs +++ b/src/TextMateSharp/Model/ITMModel.cs @@ -4,15 +4,54 @@ namespace TextMateSharp.Model { + /// + /// Represents a TextMate model that manages tokenization of a document. + /// The model coordinates between the document content and the grammar to produce tokens. + /// public interface ITMModel { + /// + /// Gets the grammar currently used for tokenization. + /// + /// The current grammar, or null if no grammar is set. IGrammar GetGrammar(); + + /// + /// Sets the grammar to use for tokenization. + /// Changing the grammar will invalidate existing tokens and trigger re-tokenization. + /// + /// The grammar to use for tokenization. void SetGrammar(IGrammar grammar); + + /// + /// Registers a listener to be notified when tokens change in the model. + /// + /// The listener to receive token change notifications. void AddModelTokensChangedListener(IModelTokensChangedListener listener); + + /// + /// Removes a previously registered token change listener. + /// + /// The listener to remove. void RemoveModelTokensChangedListener(IModelTokensChangedListener listener); + + /// + /// Releases resources used by this model and stops background tokenization. + /// void Dispose(); + + /// + /// Gets the tokens for a specific line. + /// + /// The zero-based line index. + /// A list of tokens for the specified line, or null if the line has not been tokenized yet. List GetLineTokens(int line); - void ForceTokenization(int lineIndex); + /// + /// Forces immediate tokenization of a specific line, bypassing the background tokenization queue. + /// Use this when you need tokens for a line immediately (e.g., for visible lines in the viewport). + /// + /// The zero-based index of the line to tokenize. + void ForceTokenization(int lineIndex); } } \ No newline at end of file diff --git a/src/TextMateSharp/Model/TMModel.cs b/src/TextMateSharp/Model/TMModel.cs index bafe0eb..f2d55ec 100644 --- a/src/TextMateSharp/Model/TMModel.cs +++ b/src/TextMateSharp/Model/TMModel.cs @@ -195,7 +195,7 @@ public int UpdateTokensInRange(ModelTokensChangedEventBuilder eventBuilder, int ModelLine modeLine = model._lines.Get(lineIndex); try { - text = model._lines.GetLineText(lineIndex); + text = model._lines.GetLineTextIncludingTerminators(lineIndex); // Tokenize only the first X characters r = model._tokenizer.Tokenize(text, modeLine.State, 0, MAX_LEN_TO_TOKENIZE, stopLineTokenizationAfter); }