Skip to content

Commit bb8d9c1

Browse files
authored
Merge pull request #28 from rameel/sse2-support
Add SSE2 support
2 parents 4a00eee + 1cc30a2 commit bb8d9c1

3 files changed

Lines changed: 93 additions & 3 deletions

File tree

.github/workflows/test.yml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,25 @@ jobs:
3737

3838
- name: Test (Release)
3939
run: dotnet test -c Release --no-build
40+
41+
- name: Test (Debug, Avx2=Disabled)
42+
env:
43+
COMPlus_EnableAVX2: "0"
44+
run: dotnet test -c Debug --no-build
45+
46+
- name: Test (Release, Avx2=Disabled)
47+
env:
48+
COMPlus_EnableAVX2: "0"
49+
run: dotnet test -c Release --no-build
50+
51+
- name: Test (Debug, Avx2=Disabled, Sse2=Disabled)
52+
env:
53+
COMPlus_EnableAVX2: "0"
54+
COMPlus_EnableSSE2: "0"
55+
run: dotnet test -c Debug --no-build
56+
57+
- name: Test (Release, Avx2=Disabled, Sse2=Disabled)
58+
env:
59+
COMPlus_EnableAVX2: "0"
60+
COMPlus_EnableSSE2: "0"
61+
run: dotnet test -c Release --no-build
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
using System.Runtime.Intrinsics.X86;
2+
3+
namespace Ramstack.Globbing;
4+
5+
[TestFixture]
6+
public class SimdConfigurationTests
7+
{
8+
[Test]
9+
public void VerifySimdConfiguration()
10+
{
11+
var isAvx2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableAVX2") == "0";
12+
var isSse2Disabled = Environment.GetEnvironmentVariable("COMPlus_EnableSSE2") == "0";
13+
14+
Assert.That(isAvx2Disabled, Is.EqualTo(!Avx2.IsSupported));
15+
Assert.That(isSse2Disabled, Is.EqualTo(!Sse2.IsSupported));
16+
}
17+
}

Ramstack.Globbing/Internal/PathHelper.cs

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,18 +305,32 @@ public PathSegmentIterator(int length) =>
305305
[MethodImpl(MethodImplOptions.AggressiveInlining)]
306306
public (int start, int final) GetNext(ref char source, MatchFlags flags)
307307
{
308+
//
309+
// Number of bits per char (ushort) in the MoveMask output
310+
//
311+
const uint BitsPerChar = 0b11;
312+
308313
var start = _last + 1;
309314

310315
while (_position < _length)
311316
{
312-
if (Avx2.IsSupported && _mask != 0)
317+
if ((Avx2.IsSupported || Sse2.IsSupported) && _mask != 0)
313318
{
314319
var offset = BitOperations.TrailingZeroCount(_mask);
315320
_last = _position + (nint)((uint)offset >> 1);
316-
_mask &= ~(3u << offset);
317321

322+
//
323+
// Clear the bits for the current separator to process the next position in the mask
324+
//
325+
_mask &= ~(BitsPerChar << offset);
326+
327+
//
328+
// Advance position to the next chunk when no separators remain in the mask
329+
//
318330
if (_mask == 0)
319-
_position += Vector256<ushort>.Count;
331+
_position += Avx2.IsSupported
332+
? Vector256<ushort>.Count
333+
: Vector128<ushort>.Count;
320334

321335
return ((int)start, (int)_last);
322336
}
@@ -334,10 +348,47 @@ public PathSegmentIterator(int length) =>
334348
allowEscapingMask,
335349
Avx2.CompareEqual(chunk, backslash)));
336350

351+
//
352+
// Store the comparison bitmask and reuse it across iterations
353+
// as long as it contains non-zero bits.
354+
// This avoids reloading SIMD registers and repeating comparisons
355+
// on the same chunk of data.
356+
//
337357
_mask = (uint)Avx2.MoveMask(comparison.AsByte());
358+
359+
//
360+
// Advance position to the next chunk when no separators found
361+
//
338362
if (_mask == 0)
339363
_position += Vector256<ushort>.Count;
340364
}
365+
else if (Sse2.IsSupported && !Avx2.IsSupported && _position + Vector128<ushort>.Count <= _length)
366+
{
367+
var chunk = LoadVector128(ref source, _position);
368+
var allowEscapingMask = CreateAllowEscaping128Bitmask(flags);
369+
var slash = Vector128.Create((ushort)'/');
370+
var backslash = Vector128.Create((ushort)'\\');
371+
372+
var comparison = Sse2.Or(
373+
Sse2.CompareEqual(chunk, slash),
374+
Sse2.AndNot(
375+
allowEscapingMask,
376+
Sse2.CompareEqual(chunk, backslash)));
377+
378+
//
379+
// Store the comparison bitmask and reuse it across iterations
380+
// as long as it contains non-zero bits.
381+
// This avoids reloading SIMD registers and repeating comparisons
382+
// on the same chunk of data.
383+
//
384+
_mask = (uint)Sse2.MoveMask(comparison.AsByte());
385+
386+
//
387+
// Advance position to the next chunk when no separators found
388+
//
389+
if (_mask == 0)
390+
_position += Vector128<ushort>.Count;
391+
}
341392
else
342393
{
343394
for (; _position < _length; _position++)

0 commit comments

Comments
 (0)