diff --git a/build/Directory.Build.props b/build/Directory.Build.props
index 7d9da74..627bad8 100644
--- a/build/Directory.Build.props
+++ b/build/Directory.Build.props
@@ -3,6 +3,6 @@
latest
true
8.0.5
- 1.0.9
+ 1.0.10
diff --git a/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs b/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs
index c77c547..6a0748d 100644
--- a/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs
+++ b/src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs
@@ -1,4 +1,5 @@
using System;
+using System.Collections.Generic;
using System.IO;
using BenchmarkDotNet.Attributes;
@@ -11,7 +12,7 @@ namespace TextMateSharp.Benchmarks
public class BigFileTokenizationBenchmark
{
private IGrammar _grammar = null!;
- private string[] _lines = null!;
+ private string _content = null!;
[GlobalSetup]
public void Setup()
@@ -40,8 +41,8 @@ public void Setup()
// Load the file into memory
- _lines = File.ReadAllLines(bigFilePath);
- Console.WriteLine($"Loaded {_lines.Length} lines from bigfile.cs");
+ _content = File.ReadAllText(bigFilePath);
+ Console.WriteLine($"Loaded bigfile.cs");
// Load the C# grammar
RegistryOptions options = new RegistryOptions(ThemeName.DarkPlus);
@@ -60,14 +61,38 @@ public int TokenizeAllLines()
int totalTokens = 0;
IStateStack? ruleStack = null;
- for (int i = 0; i < _lines.Length; i++)
+ ReadOnlyMemory contentMemory = _content.AsMemory();
+
+ foreach (var lineRange in GetLineRanges(_content))
{
- ITokenizeLineResult result = _grammar.TokenizeLine(_lines[i], ruleStack, TimeSpan.MaxValue);
+ ReadOnlyMemory lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length);
+ ITokenizeLineResult result = _grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue);
ruleStack = result.RuleStack;
totalTokens += result.Tokens.Length;
}
return totalTokens;
}
+
+ static IEnumerable<(int Start, int Length)> GetLineRanges(string content)
+ {
+ int lineStart = 0;
+
+ for (int i = 0; i < content.Length; i++)
+ {
+ if (content[i] == '\n')
+ {
+ int lineLength = i - lineStart + 1; // Include the \n
+ yield return (lineStart, lineLength);
+ lineStart = i + 1;
+ }
+ }
+
+ // Handle last line without terminator
+ if (lineStart < content.Length)
+ {
+ yield return (lineStart, content.Length - lineStart);
+ }
+ }
}
}
diff --git a/src/TextMateSharp.Demo/Program.cs b/src/TextMateSharp.Demo/Program.cs
index eed263a..f0235f2 100644
--- a/src/TextMateSharp.Demo/Program.cs
+++ b/src/TextMateSharp.Demo/Program.cs
@@ -1,4 +1,5 @@
using System;
+using System.Collections.Generic;
using System.Globalization;
using System.IO;
@@ -53,45 +54,48 @@ static void Main(string[] args)
IStateStack? ruleStack = null;
- using (StreamReader sr = new StreamReader(fileToParse))
+ string fileContent = File.ReadAllText(fileToParse);
+ ReadOnlyMemory contentMemory = fileContent.AsMemory();
+
+ foreach (var lineRange in GetLineRanges(fileContent))
{
- string? line = sr.ReadLine();
+ bool needsLineBreak = true;
+
+ ReadOnlyMemory lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length);
+ ITokenizeLineResult result = grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue);
+
+ ruleStack = result.RuleStack;
- while (line != null)
+ foreach (IToken token in result.Tokens)
{
- ITokenizeLineResult result = grammar.TokenizeLine(line, ruleStack, TimeSpan.MaxValue);
+ int startIndex = Math.Min(token.StartIndex, lineRange.Length);
+ int endIndex = Math.Min(token.EndIndex, lineRange.Length);
- ruleStack = result.RuleStack;
+ int foreground = -1;
+ int background = -1;
+ FontStyle fontStyle = FontStyle.NotSet;
- foreach (IToken token in result.Tokens)
+ foreach (var themeRule in theme.Match(token.Scopes))
{
- int startIndex = (token.StartIndex > line.Length) ?
- line.Length : token.StartIndex;
- int endIndex = (token.EndIndex > line.Length) ?
- line.Length : token.EndIndex;
+ if (foreground == -1 && themeRule.foreground > 0)
+ foreground = themeRule.foreground;
- int foreground = -1;
- int background = -1;
- FontStyle fontStyle = FontStyle.NotSet;
+ if (background == -1 && themeRule.background > 0)
+ background = themeRule.background;
- foreach (var themeRule in theme.Match(token.Scopes))
- {
- if (foreground == -1 && themeRule.foreground > 0)
- foreground = themeRule.foreground;
-
- if (background == -1 && themeRule.background > 0)
- background = themeRule.background;
+ if (fontStyle == FontStyle.NotSet && themeRule.fontStyle > 0)
+ fontStyle = themeRule.fontStyle;
+ }
- if (fontStyle == FontStyle.NotSet && themeRule.fontStyle > 0)
- fontStyle = themeRule.fontStyle;
- }
+ ReadOnlySpan tokenSpan = lineMemory.Span.Slice(startIndex, endIndex - startIndex);
+ WriteToken(tokenSpan, foreground, background, fontStyle, theme);
- WriteToken(line.SubstringAtIndexes(startIndex, endIndex), foreground, background, fontStyle, theme);
- }
+ if (tokenSpan.IndexOf('\n') != -1)
+ needsLineBreak = false;
+ }
+ if (needsLineBreak)
Console.WriteLine();
- line = sr.ReadLine();
- }
}
var colorDictionary = theme.GetGuiColorDictionary();
@@ -113,11 +117,12 @@ static void Main(string[] args)
Console.WriteLine("ERROR: " + ex.Message);
}
}
- static void WriteToken(string text, int foreground, int background, FontStyle fontStyle, Theme theme)
+
+ static void WriteToken(ReadOnlySpan text, int foreground, int background, FontStyle fontStyle, Theme theme)
{
if (foreground == -1)
{
- Console.Write(text);
+ Console.Out.Write(text);
return;
}
@@ -127,7 +132,8 @@ static void WriteToken(string text, int foreground, int background, FontStyle fo
Color foregroundColor = GetColor(foreground, theme);
Style style = new Style(foregroundColor, backgroundColor, decoration);
- Markup markup = new Markup(text.Replace("[", "[[").Replace("]", "]]"), style);
+ string textStr = text.ToString();
+ Markup markup = new Markup(textStr.Replace("[", "[[").Replace("]", "]]"), style);
AnsiConsole.Write(markup);
}
@@ -173,13 +179,26 @@ static Color HexToColor(string hexString)
return new Color(r, g, b);
}
- }
- internal static class StringExtensions
- {
- internal static string SubstringAtIndexes(this string str, int startIndex, int endIndex)
+ static IEnumerable<(int Start, int Length)> GetLineRanges(string content)
{
- return str.Substring(startIndex, endIndex - startIndex);
+ int lineStart = 0;
+
+ for (int i = 0; i < content.Length; i++)
+ {
+ if (content[i] == '\n')
+ {
+ int lineLength = i - lineStart + 1; // Include the \n
+ yield return (lineStart, lineLength);
+ lineStart = i + 1;
+ }
+ }
+
+ // Handle last line without terminator
+ if (lineStart < content.Length)
+ {
+ yield return (lineStart, content.Length - lineStart);
+ }
}
}
}
diff --git a/src/TextMateSharp.Tests/Model/TMModelTests.cs b/src/TextMateSharp.Tests/Model/TMModelTests.cs
index 5e6f901..bb743fa 100644
--- a/src/TextMateSharp.Tests/Model/TMModelTests.cs
+++ b/src/TextMateSharp.Tests/Model/TMModelTests.cs
@@ -1,4 +1,5 @@
-using Moq;
+using System;
+using Moq;
using NUnit.Framework;
@@ -125,9 +126,9 @@ public override int GetLineLength(int lineIndex)
{
return _lines[lineIndex].Length;
}
- public override LineText GetLineText(int lineIndex)
+ public override LineText GetLineTextIncludingTerminators(int lineIndex)
{
- return _lines[lineIndex];
+ return _lines[lineIndex] + Environment.NewLine;
}
public override int GetNumberOfLines()
{
diff --git a/src/TextMateSharp/Internal/Grammars/Grammar.cs b/src/TextMateSharp/Internal/Grammars/Grammar.cs
index ff277ea..fe09793 100644
--- a/src/TextMateSharp/Internal/Grammars/Grammar.cs
+++ b/src/TextMateSharp/Internal/Grammars/Grammar.cs
@@ -1,5 +1,4 @@
using System;
-using System.Buffers;
using System.Collections.Generic;
using TextMateSharp.Grammars;
@@ -255,46 +254,37 @@ private object Tokenize(ReadOnlyMemory lineText, StateStack prevState, boo
}
// Check if we need to append newline
- char[] rentedBuffer = null;
- ReadOnlyMemory effectiveLineText;
- try
+ ReadOnlyMemory effectiveLineText;
+ if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n')
{
- if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n')
- {
- // Only add \n if the passed lineText didn't have it.
- // Use ArrayPool to avoid per-line allocation
- int requiredLength = lineText.Length + 1;
- rentedBuffer = ArrayPool.Shared.Rent(requiredLength);
- lineText.Span.CopyTo(rentedBuffer);
- rentedBuffer[lineText.Length] = '\n';
- effectiveLineText = rentedBuffer.AsMemory(0, requiredLength);
- }
- else
- {
- effectiveLineText = lineText;
- }
+ // Only add \n if the passed lineText didn't have it.
+ // We need to allocate a new buffer with the newline
+ char[] buffer = new char[lineText.Length + 1];
+ lineText.Span.CopyTo(buffer);
+ buffer[lineText.Length] = '\n';
+ effectiveLineText = buffer.AsMemory();
+ }
+ else
+ {
+ effectiveLineText = lineText;
+ }
- int lineLength = effectiveLineText.Length;
- LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors);
- TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState,
- lineTokens, true, timeLimit);
+ int lineLength = effectiveLineText.Length;
+ LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers,
+ _balancedBracketSelectors);
+ TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0,
+ prevState,
+ lineTokens, true, timeLimit);
- if (emitBinaryTokens)
- {
- return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength),
- tokenizeResult.Stack, tokenizeResult.StoppedEarly);
- }
- return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength),
- tokenizeResult.Stack, tokenizeResult.StoppedEarly);
- }
- finally
+ if (emitBinaryTokens)
{
- if (rentedBuffer != null)
- {
- ArrayPool.Shared.Return(rentedBuffer);
- }
+ return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength),
+ tokenizeResult.Stack, tokenizeResult.StoppedEarly);
}
+
+ return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength),
+ tokenizeResult.Stack, tokenizeResult.StoppedEarly);
}
private void GenerateRootId()
diff --git a/src/TextMateSharp/Model/AbstractLineList.cs b/src/TextMateSharp/Model/AbstractLineList.cs
index fb255da..2046d13 100644
--- a/src/TextMateSharp/Model/AbstractLineList.cs
+++ b/src/TextMateSharp/Model/AbstractLineList.cs
@@ -96,7 +96,7 @@ public int GetSize()
public abstract int GetNumberOfLines();
- public abstract LineText GetLineText(int lineIndex);
+ public abstract LineText GetLineTextIncludingTerminators(int lineIndex);
public abstract int GetLineLength(int lineIndex);
diff --git a/src/TextMateSharp/Model/IModelLines.cs b/src/TextMateSharp/Model/IModelLines.cs
index 80f7568..c38c62d 100644
--- a/src/TextMateSharp/Model/IModelLines.cs
+++ b/src/TextMateSharp/Model/IModelLines.cs
@@ -4,17 +4,73 @@
namespace TextMateSharp.Model
{
+ ///
+ /// Represents a document model that provides line-based access for TextMate tokenization.
+ ///
public interface IModelLines
{
+ ///
+ /// Notifies that a new line has been added at the specified index.
+ ///
+ /// The zero-based index where the line was added.
void AddLine(int lineIndex);
+
+ ///
+ /// Notifies that a line has been removed at the specified index.
+ ///
+ /// The zero-based index of the removed line.
void RemoveLine(int lineIndex);
+
+ ///
+ /// Notifies that the content of a line has changed.
+ ///
+ /// The zero-based index of the updated line.
void UpdateLine(int lineIndex);
+
+ ///
+ /// Gets the number of model lines currently tracked.
+ ///
int GetSize();
+
+ ///
+ /// Gets the model line at the specified index.
+ ///
+ /// The zero-based index of the line.
ModelLine Get(int lineIndex);
+
+ ///
+ /// Executes an action for each model line.
+ ///
+ /// The action to execute on each line.
void ForEach(Action action);
+
+ ///
+ /// Gets the total number of lines in the document.
+ ///
int GetNumberOfLines();
- LineText GetLineText(int lineIndex);
+
+ ///
+ /// Gets the text content of the line at the specified index.
+ ///
+ /// The zero-based index of the line.
+ /// The line text wrapped in a structure.
+ ///
+ /// For optimal performance, the returned text should include the line terminator
+ /// (e.g., '\n' or "\r\n") if one exists. When line terminators are not included,
+ /// the tokenization engine will allocate a new buffer to append a newline character,
+ /// which impacts performance and memory usage.
+ ///
+ LineText GetLineTextIncludingTerminators(int lineIndex);
+
+ ///
+ /// Gets the length of the line at the specified index.
+ ///
+ /// The zero-based index of the line.
int GetLineLength(int lineIndex);
+
+ ///
+ /// Releases resources used by this model.
+ ///
void Dispose();
}
}
\ No newline at end of file
diff --git a/src/TextMateSharp/Model/ITMModel.cs b/src/TextMateSharp/Model/ITMModel.cs
index a957d74..09f9e1d 100644
--- a/src/TextMateSharp/Model/ITMModel.cs
+++ b/src/TextMateSharp/Model/ITMModel.cs
@@ -4,15 +4,54 @@
namespace TextMateSharp.Model
{
+ ///
+ /// Represents a TextMate model that manages tokenization of a document.
+ /// The model coordinates between the document content and the grammar to produce tokens.
+ ///
public interface ITMModel
{
+ ///
+ /// Gets the grammar currently used for tokenization.
+ ///
+ /// The current grammar, or null if no grammar is set.
IGrammar GetGrammar();
+
+ ///
+ /// Sets the grammar to use for tokenization.
+ /// Changing the grammar will invalidate existing tokens and trigger re-tokenization.
+ ///
+ /// The grammar to use for tokenization.
void SetGrammar(IGrammar grammar);
+
+ ///
+ /// Registers a listener to be notified when tokens change in the model.
+ ///
+ /// The listener to receive token change notifications.
void AddModelTokensChangedListener(IModelTokensChangedListener listener);
+
+ ///
+ /// Removes a previously registered token change listener.
+ ///
+ /// The listener to remove.
void RemoveModelTokensChangedListener(IModelTokensChangedListener listener);
+
+ ///
+ /// Releases resources used by this model and stops background tokenization.
+ ///
void Dispose();
+
+ ///
+ /// Gets the tokens for a specific line.
+ ///
+ /// The zero-based line index.
+ /// A list of tokens for the specified line, or null if the line has not been tokenized yet.
List GetLineTokens(int line);
- void ForceTokenization(int lineIndex);
+ ///
+ /// Forces immediate tokenization of a specific line, bypassing the background tokenization queue.
+ /// Use this when you need tokens for a line immediately (e.g., for visible lines in the viewport).
+ ///
+ /// The zero-based index of the line to tokenize.
+ void ForceTokenization(int lineIndex);
}
}
\ No newline at end of file
diff --git a/src/TextMateSharp/Model/TMModel.cs b/src/TextMateSharp/Model/TMModel.cs
index bafe0eb..f2d55ec 100644
--- a/src/TextMateSharp/Model/TMModel.cs
+++ b/src/TextMateSharp/Model/TMModel.cs
@@ -195,7 +195,7 @@ public int UpdateTokensInRange(ModelTokensChangedEventBuilder eventBuilder, int
ModelLine modeLine = model._lines.Get(lineIndex);
try
{
- text = model._lines.GetLineText(lineIndex);
+ text = model._lines.GetLineTextIncludingTerminators(lineIndex);
// Tokenize only the first X characters
r = model._tokenizer.Tokenize(text, modeLine.State, 0, MAX_LEN_TO_TOKENIZE, stopLineTokenizationAfter);
}