Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion build/Directory.Build.props
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,6 @@
<LangVersion>latest</LangVersion>
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<SystemTextJsonVersion>8.0.5</SystemTextJsonVersion>
<OnigwrapVersion>1.0.9</OnigwrapVersion>
<OnigwrapVersion>1.0.10</OnigwrapVersion>
</PropertyGroup>
</Project>
35 changes: 30 additions & 5 deletions src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
using System.IO;

using BenchmarkDotNet.Attributes;
Expand All @@ -11,7 +12,7 @@ namespace TextMateSharp.Benchmarks
public class BigFileTokenizationBenchmark
{
private IGrammar _grammar = null!;
private string[] _lines = null!;
private string _content = null!;

[GlobalSetup]
public void Setup()
Expand Down Expand Up @@ -40,8 +41,8 @@ public void Setup()


// Load the file into memory
_lines = File.ReadAllLines(bigFilePath);
Console.WriteLine($"Loaded {_lines.Length} lines from bigfile.cs");
_content = File.ReadAllText(bigFilePath);
Console.WriteLine($"Loaded bigfile.cs");

// Load the C# grammar
RegistryOptions options = new RegistryOptions(ThemeName.DarkPlus);
Expand All @@ -60,14 +61,38 @@ public int TokenizeAllLines()
int totalTokens = 0;
IStateStack? ruleStack = null;

for (int i = 0; i < _lines.Length; i++)
ReadOnlyMemory<char> contentMemory = _content.AsMemory();

foreach (var lineRange in GetLineRanges(_content))
{
ITokenizeLineResult result = _grammar.TokenizeLine(_lines[i], ruleStack, TimeSpan.MaxValue);
ReadOnlyMemory<char> lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length);
ITokenizeLineResult result = _grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue);
ruleStack = result.RuleStack;
totalTokens += result.Tokens.Length;
}

return totalTokens;
}

static IEnumerable<(int Start, int Length)> GetLineRanges(string content)
{
int lineStart = 0;

for (int i = 0; i < content.Length; i++)
{
if (content[i] == '\n')
{
int lineLength = i - lineStart + 1; // Include the \n
yield return (lineStart, lineLength);
lineStart = i + 1;
}
}

// Handle last line without terminator
if (lineStart < content.Length)
{
yield return (lineStart, content.Length - lineStart);
}
}
}
}
89 changes: 54 additions & 35 deletions src/TextMateSharp.Demo/Program.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;

Expand Down Expand Up @@ -53,45 +54,48 @@ static void Main(string[] args)

IStateStack? ruleStack = null;

using (StreamReader sr = new StreamReader(fileToParse))
string fileContent = File.ReadAllText(fileToParse);
ReadOnlyMemory<char> contentMemory = fileContent.AsMemory();

foreach (var lineRange in GetLineRanges(fileContent))
{
string? line = sr.ReadLine();
bool needsLineBreak = true;

ReadOnlyMemory<char> lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length);
ITokenizeLineResult result = grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue);

ruleStack = result.RuleStack;

while (line != null)
foreach (IToken token in result.Tokens)
{
ITokenizeLineResult result = grammar.TokenizeLine(line, ruleStack, TimeSpan.MaxValue);
int startIndex = Math.Min(token.StartIndex, lineRange.Length);
int endIndex = Math.Min(token.EndIndex, lineRange.Length);

ruleStack = result.RuleStack;
int foreground = -1;
int background = -1;
FontStyle fontStyle = FontStyle.NotSet;

foreach (IToken token in result.Tokens)
foreach (var themeRule in theme.Match(token.Scopes))
{
int startIndex = (token.StartIndex > line.Length) ?
line.Length : token.StartIndex;
int endIndex = (token.EndIndex > line.Length) ?
line.Length : token.EndIndex;
if (foreground == -1 && themeRule.foreground > 0)
foreground = themeRule.foreground;

int foreground = -1;
int background = -1;
FontStyle fontStyle = FontStyle.NotSet;
if (background == -1 && themeRule.background > 0)
background = themeRule.background;

foreach (var themeRule in theme.Match(token.Scopes))
{
if (foreground == -1 && themeRule.foreground > 0)
foreground = themeRule.foreground;

if (background == -1 && themeRule.background > 0)
background = themeRule.background;
if (fontStyle == FontStyle.NotSet && themeRule.fontStyle > 0)
fontStyle = themeRule.fontStyle;
}

if (fontStyle == FontStyle.NotSet && themeRule.fontStyle > 0)
fontStyle = themeRule.fontStyle;
}
ReadOnlySpan<char> tokenSpan = lineMemory.Span.Slice(startIndex, endIndex - startIndex);
WriteToken(tokenSpan, foreground, background, fontStyle, theme);

WriteToken(line.SubstringAtIndexes(startIndex, endIndex), foreground, background, fontStyle, theme);
}
if (tokenSpan.IndexOf('\n') != -1)
needsLineBreak = false;
}

if (needsLineBreak)
Console.WriteLine();
line = sr.ReadLine();
}
}

var colorDictionary = theme.GetGuiColorDictionary();
Expand All @@ -113,11 +117,12 @@ static void Main(string[] args)
Console.WriteLine("ERROR: " + ex.Message);
}
}
static void WriteToken(string text, int foreground, int background, FontStyle fontStyle, Theme theme)

static void WriteToken(ReadOnlySpan<char> text, int foreground, int background, FontStyle fontStyle, Theme theme)
{
if (foreground == -1)
{
Console.Write(text);
Console.Out.Write(text);
return;
}

Expand All @@ -127,7 +132,8 @@ static void WriteToken(string text, int foreground, int background, FontStyle fo
Color foregroundColor = GetColor(foreground, theme);

Style style = new Style(foregroundColor, backgroundColor, decoration);
Markup markup = new Markup(text.Replace("[", "[[").Replace("]", "]]"), style);
string textStr = text.ToString();
Markup markup = new Markup(textStr.Replace("[", "[[").Replace("]", "]]"), style);

AnsiConsole.Write(markup);
}
Expand Down Expand Up @@ -173,13 +179,26 @@ static Color HexToColor(string hexString)

return new Color(r, g, b);
}
}

internal static class StringExtensions
{
internal static string SubstringAtIndexes(this string str, int startIndex, int endIndex)
static IEnumerable<(int Start, int Length)> GetLineRanges(string content)
{
return str.Substring(startIndex, endIndex - startIndex);
int lineStart = 0;

for (int i = 0; i < content.Length; i++)
{
if (content[i] == '\n')
{
int lineLength = i - lineStart + 1; // Include the \n
yield return (lineStart, lineLength);
lineStart = i + 1;
}
}

// Handle last line without terminator
if (lineStart < content.Length)
{
yield return (lineStart, content.Length - lineStart);
}
}
}
}
7 changes: 4 additions & 3 deletions src/TextMateSharp.Tests/Model/TMModelTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Moq;
using System;
using Moq;

using NUnit.Framework;

Expand Down Expand Up @@ -125,9 +126,9 @@ public override int GetLineLength(int lineIndex)
{
return _lines[lineIndex].Length;
}
public override LineText GetLineText(int lineIndex)
public override LineText GetLineTextIncludingTerminators(int lineIndex)
{
return _lines[lineIndex];
return _lines[lineIndex] + Environment.NewLine;
}
public override int GetNumberOfLines()
{
Expand Down
60 changes: 25 additions & 35 deletions src/TextMateSharp/Internal/Grammars/Grammar.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
using System;
using System.Buffers;
using System.Collections.Generic;

using TextMateSharp.Grammars;
Expand Down Expand Up @@ -255,46 +254,37 @@ private object Tokenize(ReadOnlyMemory<char> lineText, StateStack prevState, boo
}

// Check if we need to append newline
char[] rentedBuffer = null;
ReadOnlyMemory<char> effectiveLineText;

try
ReadOnlyMemory<char> effectiveLineText;
if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n')
{
if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n')
{
// Only add \n if the passed lineText didn't have it.
// Use ArrayPool to avoid per-line allocation
int requiredLength = lineText.Length + 1;
rentedBuffer = ArrayPool<char>.Shared.Rent(requiredLength);
lineText.Span.CopyTo(rentedBuffer);
rentedBuffer[lineText.Length] = '\n';
effectiveLineText = rentedBuffer.AsMemory(0, requiredLength);
}
else
{
effectiveLineText = lineText;
}
// Only add \n if the passed lineText didn't have it.
// We need to allocate a new buffer with the newline
char[] buffer = new char[lineText.Length + 1];
lineText.Span.CopyTo(buffer);
buffer[lineText.Length] = '\n';
effectiveLineText = buffer.AsMemory();
}
else
{
effectiveLineText = lineText;
}

int lineLength = effectiveLineText.Length;
LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors);
TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState,
lineTokens, true, timeLimit);
int lineLength = effectiveLineText.Length;
LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers,
_balancedBracketSelectors);
TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0,
prevState,
lineTokens, true, timeLimit);

if (emitBinaryTokens)
{
return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength),
tokenizeResult.Stack, tokenizeResult.StoppedEarly);
}
return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength),
tokenizeResult.Stack, tokenizeResult.StoppedEarly);
}
finally
if (emitBinaryTokens)
{
if (rentedBuffer != null)
{
ArrayPool<char>.Shared.Return(rentedBuffer);
}
return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength),
tokenizeResult.Stack, tokenizeResult.StoppedEarly);
}

return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength),
tokenizeResult.Stack, tokenizeResult.StoppedEarly);
}

private void GenerateRootId()
Expand Down
2 changes: 1 addition & 1 deletion src/TextMateSharp/Model/AbstractLineList.cs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ public int GetSize()

public abstract int GetNumberOfLines();

public abstract LineText GetLineText(int lineIndex);
public abstract LineText GetLineTextIncludingTerminators(int lineIndex);

public abstract int GetLineLength(int lineIndex);

Expand Down
58 changes: 57 additions & 1 deletion src/TextMateSharp/Model/IModelLines.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,73 @@

namespace TextMateSharp.Model
{
/// <summary>
/// Represents a document model that provides line-based access for TextMate tokenization.
/// </summary>
public interface IModelLines
{
/// <summary>
/// Notifies that a new line has been added at the specified index.
/// </summary>
/// <param name="lineIndex">The zero-based index where the line was added.</param>
void AddLine(int lineIndex);

/// <summary>
/// Notifies that a line has been removed at the specified index.
/// </summary>
/// <param name="lineIndex">The zero-based index of the removed line.</param>
void RemoveLine(int lineIndex);

/// <summary>
/// Notifies that the content of a line has changed.
/// </summary>
/// <param name="lineIndex">The zero-based index of the updated line.</param>
void UpdateLine(int lineIndex);

/// <summary>
/// Gets the number of model lines currently tracked.
/// </summary>
int GetSize();

/// <summary>
/// Gets the model line at the specified index.
/// </summary>
/// <param name="lineIndex">The zero-based index of the line.</param>
ModelLine Get(int lineIndex);

/// <summary>
/// Executes an action for each model line.
/// </summary>
/// <param name="action">The action to execute on each line.</param>
void ForEach(Action<ModelLine> action);

/// <summary>
/// Gets the total number of lines in the document.
/// </summary>
int GetNumberOfLines();
LineText GetLineText(int lineIndex);

/// <summary>
/// Gets the text content of the line at the specified index.
/// </summary>
/// <param name="lineIndex">The zero-based index of the line.</param>
/// <returns>The line text wrapped in a <see cref="LineText"/> structure.</returns>
/// <remarks>
/// For optimal performance, the returned text should include the line terminator
/// (e.g., '\n' or "\r\n") if one exists. When line terminators are not included,
/// the tokenization engine will allocate a new buffer to append a newline character,
/// which impacts performance and memory usage.
/// </remarks>
LineText GetLineTextIncludingTerminators(int lineIndex);

/// <summary>
/// Gets the length of the line at the specified index.
/// </summary>
/// <param name="lineIndex">The zero-based index of the line.</param>
int GetLineLength(int lineIndex);

/// <summary>
/// Releases resources used by this model.
/// </summary>
void Dispose();
}
}
Loading
Loading