Skip to content

Commit 9b12ce2

Browse files
committed
Shuffle scalar fallbacks added
1 parent d366d5b commit 9b12ce2

7 files changed

Lines changed: 211 additions & 103 deletions

File tree

source/TS.NET.Benchmarks/Program.cs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44

55
DefaultConfig.Instance.WithOptions(ConfigOptions.JoinSummary);
66
//_ = BenchmarkRunner.Run(typeof(Program).Assembly);
7-
//_ = BenchmarkRunner.Run<ShuffleI8Benchmark>();
7+
_ = BenchmarkRunner.Run<ShuffleI8Benchmark>();
88
//_ = BenchmarkRunner.Run<RisingEdgeTriggerBenchmark>();
99
//_ = BenchmarkRunner.Run<FallingEdgeTriggerBenchmark>();
1010
//_ = BenchmarkRunner.Run<AnyEdgeTriggerBenchmark>();
1111
//_ = BenchmarkRunner.Run<PipelineBenchmark>();
12-
_ = BenchmarkRunner.Run<BoxcarAverageI8Benchmark>();
12+
//_ = BenchmarkRunner.Run<BoxcarAverageI8Benchmark>();
1313
//_ = BenchmarkRunner.Run<SumU8toI32Benchmark>();
14-
_ = BenchmarkRunner.Run<DecimationI8Benchmark>();
14+
//_ = BenchmarkRunner.Run<DecimationI8Benchmark>();
1515
Console.ReadKey();

source/TS.NET.Benchmarks/README.md

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,19 @@
1+
## ShuffleI8
2+
3+
Scalar processing
4+
5+
| Method | Mean | Error | StdDev | Allocated |
6+
|----------------------------------- |---------:|--------:|--------:|----------:|
7+
| 'Four channel shuffle (125 x 8MS)' | 226.7 ms | 0.37 ms | 0.31 ms | 133 B |
8+
| 'Two channel shuffle (125 x 8MS)' | 238.2 ms | 0.12 ms | 0.10 ms | 21 B |
9+
10+
AVX2 processing
11+
12+
| Method | Mean | Error | StdDev | Allocated |
13+
|------------------------------------------------ |---------:|---------:|---------:|----------:|
14+
| 'Four channel shuffle (125 x 8MS)' | 34.85 ms | 0.052 ms | 0.047 ms | 7 B |
15+
| 'Two channel shuffle (125 x 8MS)' | 37.77 ms | 0.099 ms | 0.092 ms | 29 B |
16+
117
## RisingEdgeTriggerI8
218

319
Scalar processing

source/TS.NET.Benchmarks/ShuffleI8Benchmark.cs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,19 @@ public class ShuffleI8Benchmark
1212
private const int byteBufferSize = 8000000;
1313
private readonly Memory<sbyte> input = new sbyte[byteBufferSize];
1414
private readonly Memory<sbyte> output = new sbyte[byteBufferSize];
15+
private ShuffleI8 shuffle = new ShuffleI8(false);
1516

1617
[GlobalSetup]
1718
public void Setup()
1819
{
1920
Waveforms.FourChannelCountSignedByte(input.Span);
2021
}
2122

22-
[Benchmark(Description = "Four channel shuffle [production] (125 x 8MS)")]
23+
[Benchmark(Description = "Four channel shuffle (125 x 8MS)")]
2324
public void FourChannels()
2425
{
2526
for (int i = 0; i < 125; i++)
26-
ShuffleI8.FourChannels(input.Span, output.Span);
27+
shuffle.FourChannels(input.Span, output.Span);
2728
}
2829

2930
//[Benchmark(Description = "Four channel shuffle [run length 1, baseline] (125 x 8MS)")]
@@ -89,11 +90,11 @@ public void FourChannels()
8990
// Shuffle.FourChannelsRunLength32NoSimd(input.Span, output.Span);
9091
//}
9192

92-
[Benchmark(Description = "Two channel shuffle [production] (125 x 8MS)")]
93+
[Benchmark(Description = "Two channel shuffle (125 x 8MS)")]
9394
public void TwoChannels()
9495
{
9596
for (int i = 0; i < 125; i++)
96-
ShuffleI8.TwoChannels(input.Span, output.Span);
97+
shuffle.TwoChannels(input.Span, output.Span);
9798
}
9899

99100
//[Benchmark(Description = "Two channel shuffle [run length 1,variant A] (125 x 8MS)")]

source/TS.NET.Engine/Program.cs

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using Microsoft.Extensions.Logging;
33
using NReco.Logging.File;
44
using System.CommandLine;
5+
using System.Runtime.InteropServices;
56
using System.Runtime.Intrinsics.Arm;
67
using System.Runtime.Intrinsics.X86;
78
using TS.NET;
@@ -71,18 +72,22 @@ static void Start(int deviceIndex, string configurationFilePath)
7172
});
7273
var logger = loggerFactory.CreateLogger("TS.NET.Engine");
7374

74-
// Validation of CPU architecture
75-
if (!Avx2.IsSupported)
75+
if (RuntimeInformation.ProcessArchitecture == Architecture.X86 || RuntimeInformation.ProcessArchitecture == Architecture.X64)
7676
{
77-
if (AdvSimd.Arm64.IsSupported)
77+
if (!Avx2.IsSupported)
7878
{
79-
logger?.LogCritical("AArch64 not yet supported.");
80-
return;
79+
logger?.LogWarning("x86/x64 CPU without AVX2. CPU load will be high.");
80+
}
81+
}
82+
if (RuntimeInformation.ProcessArchitecture == Architecture.Arm64)
83+
{
84+
if (!AdvSimd.Arm64.IsSupported)
85+
{
86+
logger?.LogWarning("AArch64 CPU without Neon. CPU load will be high.");
8187
}
8288
else
8389
{
84-
logger?.LogCritical("CPU does not support AVX2.");
85-
return;
90+
logger?.LogWarning("AArch64 CPU with Neon. Neon hot paths not implemented. CPU load will be high.");
8691
}
8792
}
8893

source/TS.NET.Engine/Tasks/ProcessingThread.cs

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -103,15 +103,15 @@ private static void Loop(
103103
// Shuffle buffers. Only needed for 2/4 channel modes.
104104
Span<sbyte> shuffleBuffer = new sbyte[ThunderscopeMemory.Length];
105105
// --2 channel buffers
106-
int blockLength_2 = (int)ThunderscopeMemory.Length / 2;
107-
Span<sbyte> postShuffleCh1_2 = shuffleBuffer.Slice(0, blockLength_2);
108-
Span<sbyte> postShuffleCh2_2 = shuffleBuffer.Slice(blockLength_2, blockLength_2);
106+
int blockLength_2Ch = (int)ThunderscopeMemory.Length / 2;
107+
Span<sbyte> shuffleBuffer2Ch_1 = shuffleBuffer.Slice(0, blockLength_2Ch);
108+
Span<sbyte> shuffleBuffer2Ch_2 = shuffleBuffer.Slice(blockLength_2Ch, blockLength_2Ch);
109109
// --4 channel buffers
110-
int blockLength_4 = (int)ThunderscopeMemory.Length / 4;
111-
Span<sbyte> postShuffleCh1_4 = shuffleBuffer.Slice(0, blockLength_4);
112-
Span<sbyte> postShuffleCh2_4 = shuffleBuffer.Slice(blockLength_4, blockLength_4);
113-
Span<sbyte> postShuffleCh3_4 = shuffleBuffer.Slice(blockLength_4 * 2, blockLength_4);
114-
Span<sbyte> postShuffleCh4_4 = shuffleBuffer.Slice(blockLength_4 * 3, blockLength_4);
110+
int blockLength_4Ch = (int)ThunderscopeMemory.Length / 4;
111+
Span<sbyte> shuffleBuffer4Ch_1 = shuffleBuffer.Slice(0, blockLength_4Ch);
112+
Span<sbyte> shuffleBuffer4Ch_2 = shuffleBuffer.Slice(blockLength_4Ch, blockLength_4Ch);
113+
Span<sbyte> shuffleBuffer4Ch_3 = shuffleBuffer.Slice(blockLength_4Ch * 2, blockLength_4Ch);
114+
Span<sbyte> shuffleBuffer4Ch_4 = shuffleBuffer.Slice(blockLength_4Ch * 3, blockLength_4Ch);
115115
Span<uint> captureEndIndices = new uint[ThunderscopeMemory.Length / 1000]; // 1000 samples is the minimum window width
116116

117117
// Periodic debug display variables
@@ -140,6 +140,7 @@ private static void Loop(
140140

141141
AdcChannelMode cachedAdcChannelMode = AdcChannelMode.Quad;
142142
IEdgeTriggerI8 edgeTriggerI8 = new RisingEdgeTriggerI8();
143+
ShuffleI8 shuffle = new ShuffleI8();
143144
bool runMode = true;
144145
bool forceTriggerLatch = false; // "Latch" because it will reset state back to false. If the force is invoked and a trigger happens anyway, it will be reset (effectively ignoring it and only updating the bridge once).
145146
bool singleTriggerLatch = false; // "Latch" because it will reset state back to false. When reset, runTrigger will be set to false.
@@ -364,13 +365,13 @@ private static void Loop(
364365
break;
365366
case AdcChannelMode.Dual:
366367
// Shuffle
367-
ShuffleI8.TwoChannels(input: inputDataDto.Memory.SpanI8, output: shuffleBuffer);
368+
shuffle.TwoChannels(input: inputDataDto.Memory.SpanI8, output: shuffleBuffer);
368369
// Finished with the memory, return it
369370
inputChannel.Write(inputDataDto.Memory);
370371
// Write to circular buffer
371-
circularBuffer1.Write(postShuffleCh1_2);
372-
circularBuffer2.Write(postShuffleCh2_2);
373-
streamSampleCounter += postShuffleCh1_2.Length;
372+
circularBuffer1.Write(shuffleBuffer2Ch_1);
373+
circularBuffer2.Write(shuffleBuffer2Ch_2);
374+
streamSampleCounter += shuffleBuffer2Ch_1.Length;
374375
// Trigger
375376
if (runMode)
376377
{
@@ -381,9 +382,9 @@ private static void Loop(
381382
case TriggerMode.Auto:
382383
if (hardwareConfig.IsTriggerChannelAnEnabledChannel(processingConfig.TriggerChannel))
383384
{
384-
var triggerChannelBuffer = postShuffleCh2_2;
385+
var triggerChannelBuffer = shuffleBuffer2Ch_2;
385386
if (hardwareConfig.DualChannelModeIsTriggerChannelInFirstPosition(processingConfig.TriggerChannel))
386-
triggerChannelBuffer = postShuffleCh1_2;
387+
triggerChannelBuffer = shuffleBuffer2Ch_1;
387388

388389
uint captureEndCount = 0;
389390
edgeTriggerI8.Process(input: triggerChannelBuffer, captureEndIndices: captureEndIndices, out captureEndCount);
@@ -435,15 +436,15 @@ private static void Loop(
435436
break;
436437
case AdcChannelMode.Quad:
437438
// Shuffle
438-
ShuffleI8.FourChannels(input: inputDataDto.Memory.SpanI8, output: shuffleBuffer);
439+
shuffle.FourChannels(input: inputDataDto.Memory.SpanI8, output: shuffleBuffer);
439440
// Finished with the memory, return it
440441
inputChannel.Write(inputDataDto.Memory);
441442
// Write to circular buffer
442-
circularBuffer1.Write(postShuffleCh1_4);
443-
circularBuffer2.Write(postShuffleCh2_4);
444-
circularBuffer3.Write(postShuffleCh3_4);
445-
circularBuffer4.Write(postShuffleCh4_4);
446-
streamSampleCounter += postShuffleCh1_4.Length;
443+
circularBuffer1.Write(shuffleBuffer4Ch_1);
444+
circularBuffer2.Write(shuffleBuffer4Ch_2);
445+
circularBuffer3.Write(shuffleBuffer4Ch_3);
446+
circularBuffer4.Write(shuffleBuffer4Ch_4);
447+
streamSampleCounter += shuffleBuffer4Ch_1.Length;
447448
// Trigger
448449
if (runMode)
449450
{
@@ -456,10 +457,10 @@ private static void Loop(
456457
{
457458
var triggerChannelBuffer = processingConfig.TriggerChannel switch
458459
{
459-
TriggerChannel.Channel1 => postShuffleCh1_4,
460-
TriggerChannel.Channel2 => postShuffleCh2_4,
461-
TriggerChannel.Channel3 => postShuffleCh3_4,
462-
TriggerChannel.Channel4 => postShuffleCh4_4,
460+
TriggerChannel.Channel1 => shuffleBuffer4Ch_1,
461+
TriggerChannel.Channel2 => shuffleBuffer4Ch_2,
462+
TriggerChannel.Channel3 => shuffleBuffer4Ch_3,
463+
TriggerChannel.Channel4 => shuffleBuffer4Ch_4,
463464
_ => throw new ArgumentException("Invalid TriggerChannel value")
464465
};
465466

source/TS.NET.Tests/ShuffleI8Tests.cs

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,17 @@ namespace TS.NET.Tests
55
{
66
public class ShuffleI8Tests
77
{
8+
const bool forceScalar = false;
9+
810
[Fact]
911
public void ShuffleI8_FourChannels_Samples64()
1012
{
1113
const int length = 64;
1214
ReadOnlySpan<sbyte> input = [1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4];
1315
Span<sbyte> output = new sbyte[length];
1416

15-
ShuffleI8.FourChannels(input, output);
17+
var shuffle = new ShuffleI8(forceScalar);
18+
shuffle.FourChannels(input, output);
1619

1720
Span<sbyte> expectedOutput = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4];
1821

@@ -36,7 +39,8 @@ public void ShuffleI8_FourChannels_Samples128()
3639
}
3740
Span<sbyte> output = new sbyte[length];
3841

39-
ShuffleI8.FourChannels(input, output);
42+
var shuffle = new ShuffleI8(forceScalar);
43+
shuffle.FourChannels(input, output);
4044

4145
Span<sbyte> expectedOutput = new sbyte[length];
4246
var runLength = length / 4;
@@ -65,7 +69,8 @@ public void ShuffleI8_FourChannels_Samples8388608()
6569
}
6670
Span<sbyte> output = new sbyte[length];
6771

68-
ShuffleI8.FourChannels(input, output);
72+
var shuffle = new ShuffleI8(forceScalar);
73+
shuffle.FourChannels(input, output);
6974

7075
Span<sbyte> expectedOutput = new sbyte[length];
7176
var runLength = length / 4;
@@ -94,6 +99,7 @@ public void ShuffleI8_FourChannels_RunLength1_VariantA_Samples128()
9499
}
95100
Span<sbyte> output = new sbyte[length];
96101

102+
var shuffle = new ShuffleI8(forceScalar);
97103
ShuffleI8.FourChannelsRunLength1VariantA(input, output);
98104

99105
Span<sbyte> expectedOutput = new sbyte[length];
@@ -247,6 +253,7 @@ public void ShuffleI8_FourChannels_RunLength32_Samples1024()
247253
i += 32;
248254
}
249255
Span<sbyte> output = new sbyte[length];
256+
var shuffle = new ShuffleI8(forceScalar);
250257
ShuffleI8.FourChannelsRunLength32(input, output);
251258

252259
for (int i = 0; i < 256; i++)
@@ -279,7 +286,8 @@ public void ShuffleI8_TwoChannels_Samples64()
279286
}
280287
Span<sbyte> output = new sbyte[length];
281288

282-
ShuffleI8.TwoChannels(input, output);
289+
var shuffle = new ShuffleI8(forceScalar);
290+
shuffle.TwoChannels(input, output);
283291

284292
Span<sbyte> expectedOutput = new sbyte[length];
285293
var runLength = length / 2;
@@ -304,7 +312,8 @@ public void ShuffleI8_TwoChannels_Samples8388608()
304312
}
305313
Span<sbyte> output = new sbyte[length];
306314

307-
ShuffleI8.TwoChannels(input, output);
315+
var shuffle = new ShuffleI8(forceScalar);
316+
shuffle.TwoChannels(input, output);
308317

309318
Span<sbyte> expectedOutput = new sbyte[length];
310319
var runLength = length / 2;

0 commit comments

Comments
 (0)