Skip to content

Commit a80eb82

Browse files
authored
Merge pull request #103 from danipen/fix-readonly-spans
Replace ArrayPool with direct allocation and improve line terminator handling
2 parents 69881cc + 27c5e2d commit a80eb82

9 files changed

Lines changed: 213 additions & 83 deletions

File tree

build/Directory.Build.props

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,6 @@
33
<LangVersion>latest</LangVersion>
44
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
55
<SystemTextJsonVersion>8.0.5</SystemTextJsonVersion>
6-
<OnigwrapVersion>1.0.9</OnigwrapVersion>
6+
<OnigwrapVersion>1.0.10</OnigwrapVersion>
77
</PropertyGroup>
88
</Project>

src/TextMateSharp.Benchmarks/BigFileTokenizationBenchmark.cs

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.IO;
34

45
using BenchmarkDotNet.Attributes;
@@ -11,7 +12,7 @@ namespace TextMateSharp.Benchmarks
1112
public class BigFileTokenizationBenchmark
1213
{
1314
private IGrammar _grammar = null!;
14-
private string[] _lines = null!;
15+
private string _content = null!;
1516

1617
[GlobalSetup]
1718
public void Setup()
@@ -40,8 +41,8 @@ public void Setup()
4041

4142

4243
// Load the file into memory
43-
_lines = File.ReadAllLines(bigFilePath);
44-
Console.WriteLine($"Loaded {_lines.Length} lines from bigfile.cs");
44+
_content = File.ReadAllText(bigFilePath);
45+
Console.WriteLine($"Loaded bigfile.cs");
4546

4647
// Load the C# grammar
4748
RegistryOptions options = new RegistryOptions(ThemeName.DarkPlus);
@@ -60,14 +61,38 @@ public int TokenizeAllLines()
6061
int totalTokens = 0;
6162
IStateStack? ruleStack = null;
6263

63-
for (int i = 0; i < _lines.Length; i++)
64+
ReadOnlyMemory<char> contentMemory = _content.AsMemory();
65+
66+
foreach (var lineRange in GetLineRanges(_content))
6467
{
65-
ITokenizeLineResult result = _grammar.TokenizeLine(_lines[i], ruleStack, TimeSpan.MaxValue);
68+
ReadOnlyMemory<char> lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length);
69+
ITokenizeLineResult result = _grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue);
6670
ruleStack = result.RuleStack;
6771
totalTokens += result.Tokens.Length;
6872
}
6973

7074
return totalTokens;
7175
}
76+
77+
static IEnumerable<(int Start, int Length)> GetLineRanges(string content)
78+
{
79+
int lineStart = 0;
80+
81+
for (int i = 0; i < content.Length; i++)
82+
{
83+
if (content[i] == '\n')
84+
{
85+
int lineLength = i - lineStart + 1; // Include the \n
86+
yield return (lineStart, lineLength);
87+
lineStart = i + 1;
88+
}
89+
}
90+
91+
// Handle last line without terminator
92+
if (lineStart < content.Length)
93+
{
94+
yield return (lineStart, content.Length - lineStart);
95+
}
96+
}
7297
}
7398
}

src/TextMateSharp.Demo/Program.cs

Lines changed: 54 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
using System;
2+
using System.Collections.Generic;
23
using System.Globalization;
34
using System.IO;
45

@@ -53,45 +54,48 @@ static void Main(string[] args)
5354

5455
IStateStack? ruleStack = null;
5556

56-
using (StreamReader sr = new StreamReader(fileToParse))
57+
string fileContent = File.ReadAllText(fileToParse);
58+
ReadOnlyMemory<char> contentMemory = fileContent.AsMemory();
59+
60+
foreach (var lineRange in GetLineRanges(fileContent))
5761
{
58-
string? line = sr.ReadLine();
62+
bool needsLineBreak = true;
63+
64+
ReadOnlyMemory<char> lineMemory = contentMemory.Slice(lineRange.Start, lineRange.Length);
65+
ITokenizeLineResult result = grammar.TokenizeLine(lineMemory, ruleStack, TimeSpan.MaxValue);
66+
67+
ruleStack = result.RuleStack;
5968

60-
while (line != null)
69+
foreach (IToken token in result.Tokens)
6170
{
62-
ITokenizeLineResult result = grammar.TokenizeLine(line, ruleStack, TimeSpan.MaxValue);
71+
int startIndex = Math.Min(token.StartIndex, lineRange.Length);
72+
int endIndex = Math.Min(token.EndIndex, lineRange.Length);
6373

64-
ruleStack = result.RuleStack;
74+
int foreground = -1;
75+
int background = -1;
76+
FontStyle fontStyle = FontStyle.NotSet;
6577

66-
foreach (IToken token in result.Tokens)
78+
foreach (var themeRule in theme.Match(token.Scopes))
6779
{
68-
int startIndex = (token.StartIndex > line.Length) ?
69-
line.Length : token.StartIndex;
70-
int endIndex = (token.EndIndex > line.Length) ?
71-
line.Length : token.EndIndex;
80+
if (foreground == -1 && themeRule.foreground > 0)
81+
foreground = themeRule.foreground;
7282

73-
int foreground = -1;
74-
int background = -1;
75-
FontStyle fontStyle = FontStyle.NotSet;
83+
if (background == -1 && themeRule.background > 0)
84+
background = themeRule.background;
7685

77-
foreach (var themeRule in theme.Match(token.Scopes))
78-
{
79-
if (foreground == -1 && themeRule.foreground > 0)
80-
foreground = themeRule.foreground;
81-
82-
if (background == -1 && themeRule.background > 0)
83-
background = themeRule.background;
86+
if (fontStyle == FontStyle.NotSet && themeRule.fontStyle > 0)
87+
fontStyle = themeRule.fontStyle;
88+
}
8489

85-
if (fontStyle == FontStyle.NotSet && themeRule.fontStyle > 0)
86-
fontStyle = themeRule.fontStyle;
87-
}
90+
ReadOnlySpan<char> tokenSpan = lineMemory.Span.Slice(startIndex, endIndex - startIndex);
91+
WriteToken(tokenSpan, foreground, background, fontStyle, theme);
8892

89-
WriteToken(line.SubstringAtIndexes(startIndex, endIndex), foreground, background, fontStyle, theme);
90-
}
93+
if (tokenSpan.IndexOf('\n') != -1)
94+
needsLineBreak = false;
95+
}
9196

97+
if (needsLineBreak)
9298
Console.WriteLine();
93-
line = sr.ReadLine();
94-
}
9599
}
96100

97101
var colorDictionary = theme.GetGuiColorDictionary();
@@ -113,11 +117,12 @@ static void Main(string[] args)
113117
Console.WriteLine("ERROR: " + ex.Message);
114118
}
115119
}
116-
static void WriteToken(string text, int foreground, int background, FontStyle fontStyle, Theme theme)
120+
121+
static void WriteToken(ReadOnlySpan<char> text, int foreground, int background, FontStyle fontStyle, Theme theme)
117122
{
118123
if (foreground == -1)
119124
{
120-
Console.Write(text);
125+
Console.Out.Write(text);
121126
return;
122127
}
123128

@@ -127,7 +132,8 @@ static void WriteToken(string text, int foreground, int background, FontStyle fo
127132
Color foregroundColor = GetColor(foreground, theme);
128133

129134
Style style = new Style(foregroundColor, backgroundColor, decoration);
130-
Markup markup = new Markup(text.Replace("[", "[[").Replace("]", "]]"), style);
135+
string textStr = text.ToString();
136+
Markup markup = new Markup(textStr.Replace("[", "[[").Replace("]", "]]"), style);
131137

132138
AnsiConsole.Write(markup);
133139
}
@@ -173,13 +179,26 @@ static Color HexToColor(string hexString)
173179

174180
return new Color(r, g, b);
175181
}
176-
}
177182

178-
internal static class StringExtensions
179-
{
180-
internal static string SubstringAtIndexes(this string str, int startIndex, int endIndex)
183+
static IEnumerable<(int Start, int Length)> GetLineRanges(string content)
181184
{
182-
return str.Substring(startIndex, endIndex - startIndex);
185+
int lineStart = 0;
186+
187+
for (int i = 0; i < content.Length; i++)
188+
{
189+
if (content[i] == '\n')
190+
{
191+
int lineLength = i - lineStart + 1; // Include the \n
192+
yield return (lineStart, lineLength);
193+
lineStart = i + 1;
194+
}
195+
}
196+
197+
// Handle last line without terminator
198+
if (lineStart < content.Length)
199+
{
200+
yield return (lineStart, content.Length - lineStart);
201+
}
183202
}
184203
}
185204
}

src/TextMateSharp.Tests/Model/TMModelTests.cs

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using Moq;
1+
using System;
2+
using Moq;
23

34
using NUnit.Framework;
45

@@ -125,9 +126,9 @@ public override int GetLineLength(int lineIndex)
125126
{
126127
return _lines[lineIndex].Length;
127128
}
128-
public override LineText GetLineText(int lineIndex)
129+
public override LineText GetLineTextIncludingTerminators(int lineIndex)
129130
{
130-
return _lines[lineIndex];
131+
return _lines[lineIndex] + Environment.NewLine;
131132
}
132133
public override int GetNumberOfLines()
133134
{

src/TextMateSharp/Internal/Grammars/Grammar.cs

Lines changed: 25 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
using System;
2-
using System.Buffers;
32
using System.Collections.Generic;
43

54
using TextMateSharp.Grammars;
@@ -255,46 +254,37 @@ private object Tokenize(ReadOnlyMemory<char> lineText, StateStack prevState, boo
255254
}
256255

257256
// Check if we need to append newline
258-
char[] rentedBuffer = null;
259-
ReadOnlyMemory<char> effectiveLineText;
260257

261-
try
258+
ReadOnlyMemory<char> effectiveLineText;
259+
if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n')
262260
{
263-
if (lineText.Length == 0 || lineText.Span[lineText.Length - 1] != '\n')
264-
{
265-
// Only add \n if the passed lineText didn't have it.
266-
// Use ArrayPool to avoid per-line allocation
267-
int requiredLength = lineText.Length + 1;
268-
rentedBuffer = ArrayPool<char>.Shared.Rent(requiredLength);
269-
lineText.Span.CopyTo(rentedBuffer);
270-
rentedBuffer[lineText.Length] = '\n';
271-
effectiveLineText = rentedBuffer.AsMemory(0, requiredLength);
272-
}
273-
else
274-
{
275-
effectiveLineText = lineText;
276-
}
261+
// Only add \n if the passed lineText didn't have it.
262+
// We need to allocate a new buffer with the newline
263+
char[] buffer = new char[lineText.Length + 1];
264+
lineText.Span.CopyTo(buffer);
265+
buffer[lineText.Length] = '\n';
266+
effectiveLineText = buffer.AsMemory();
267+
}
268+
else
269+
{
270+
effectiveLineText = lineText;
271+
}
277272

278-
int lineLength = effectiveLineText.Length;
279-
LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers, _balancedBracketSelectors);
280-
TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0, prevState,
281-
lineTokens, true, timeLimit);
273+
int lineLength = effectiveLineText.Length;
274+
LineTokens lineTokens = new LineTokens(emitBinaryTokens, effectiveLineText, _tokenTypeMatchers,
275+
_balancedBracketSelectors);
276+
TokenizeStringResult tokenizeResult = LineTokenizer.TokenizeString(this, effectiveLineText, isFirstLine, 0,
277+
prevState,
278+
lineTokens, true, timeLimit);
282279

283-
if (emitBinaryTokens)
284-
{
285-
return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength),
286-
tokenizeResult.Stack, tokenizeResult.StoppedEarly);
287-
}
288-
return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength),
289-
tokenizeResult.Stack, tokenizeResult.StoppedEarly);
290-
}
291-
finally
280+
if (emitBinaryTokens)
292281
{
293-
if (rentedBuffer != null)
294-
{
295-
ArrayPool<char>.Shared.Return(rentedBuffer);
296-
}
282+
return new TokenizeLineResult2(lineTokens.GetBinaryResult(tokenizeResult.Stack, lineLength),
283+
tokenizeResult.Stack, tokenizeResult.StoppedEarly);
297284
}
285+
286+
return new TokenizeLineResult(lineTokens.GetResult(tokenizeResult.Stack, lineLength),
287+
tokenizeResult.Stack, tokenizeResult.StoppedEarly);
298288
}
299289

300290
private void GenerateRootId()

src/TextMateSharp/Model/AbstractLineList.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ public int GetSize()
9696

9797
public abstract int GetNumberOfLines();
9898

99-
public abstract LineText GetLineText(int lineIndex);
99+
public abstract LineText GetLineTextIncludingTerminators(int lineIndex);
100100

101101
public abstract int GetLineLength(int lineIndex);
102102

src/TextMateSharp/Model/IModelLines.cs

Lines changed: 57 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,17 +4,73 @@
44

55
namespace TextMateSharp.Model
66
{
7+
/// <summary>
8+
/// Represents a document model that provides line-based access for TextMate tokenization.
9+
/// </summary>
710
public interface IModelLines
811
{
12+
/// <summary>
13+
/// Notifies that a new line has been added at the specified index.
14+
/// </summary>
15+
/// <param name="lineIndex">The zero-based index where the line was added.</param>
916
void AddLine(int lineIndex);
17+
18+
/// <summary>
19+
/// Notifies that a line has been removed at the specified index.
20+
/// </summary>
21+
/// <param name="lineIndex">The zero-based index of the removed line.</param>
1022
void RemoveLine(int lineIndex);
23+
24+
/// <summary>
25+
/// Notifies that the content of a line has changed.
26+
/// </summary>
27+
/// <param name="lineIndex">The zero-based index of the updated line.</param>
1128
void UpdateLine(int lineIndex);
29+
30+
/// <summary>
31+
/// Gets the number of model lines currently tracked.
32+
/// </summary>
1233
int GetSize();
34+
35+
/// <summary>
36+
/// Gets the model line at the specified index.
37+
/// </summary>
38+
/// <param name="lineIndex">The zero-based index of the line.</param>
1339
ModelLine Get(int lineIndex);
40+
41+
/// <summary>
42+
/// Executes an action for each model line.
43+
/// </summary>
44+
/// <param name="action">The action to execute on each line.</param>
1445
void ForEach(Action<ModelLine> action);
46+
47+
/// <summary>
48+
/// Gets the total number of lines in the document.
49+
/// </summary>
1550
int GetNumberOfLines();
16-
LineText GetLineText(int lineIndex);
51+
52+
/// <summary>
53+
/// Gets the text content of the line at the specified index.
54+
/// </summary>
55+
/// <param name="lineIndex">The zero-based index of the line.</param>
56+
/// <returns>The line text wrapped in a <see cref="LineText"/> structure.</returns>
57+
/// <remarks>
58+
/// For optimal performance, the returned text should include the line terminator
59+
/// (e.g., '\n' or "\r\n") if one exists. When line terminators are not included,
60+
/// the tokenization engine will allocate a new buffer to append a newline character,
61+
/// which impacts performance and memory usage.
62+
/// </remarks>
63+
LineText GetLineTextIncludingTerminators(int lineIndex);
64+
65+
/// <summary>
66+
/// Gets the length of the line at the specified index.
67+
/// </summary>
68+
/// <param name="lineIndex">The zero-based index of the line.</param>
1769
int GetLineLength(int lineIndex);
70+
71+
/// <summary>
72+
/// Releases resources used by this model.
73+
/// </summary>
1874
void Dispose();
1975
}
2076
}

0 commit comments

Comments
 (0)