From 737767fd8c965026dfddcac52dd521157e02841c Mon Sep 17 00:00:00 2001 From: Ugochukwu Mmaduekwe Date: Sat, 11 Apr 2026 02:18:12 +0100 Subject: [PATCH 1/4] initial cpu refactor features --- .../Delphi/PerformanceBenchmarkConsole.dpr | 3 + .../Delphi/PerformanceBenchmarkFMX.dpr | 3 + HashLib.Tests/Delphi.Tests/HashLib.Tests.dpr | 3 + HashLib/src/Checksum/HlpAdler32Dispatch.pas | 17 +- HashLib/src/Checksum/HlpCRCDispatch.pas | 15 +- HashLib/src/Crypto/HlpBlake2BDispatch.pas | 13 +- HashLib/src/Crypto/HlpBlake2SDispatch.pas | 13 +- HashLib/src/Crypto/HlpBlake3Dispatch.pas | 13 +- HashLib/src/Crypto/HlpSHA1Dispatch.pas | 19 +- HashLib/src/Crypto/HlpSHA2_256Dispatch.pas | 19 +- HashLib/src/Crypto/HlpSHA2_512Dispatch.pas | 17 +- HashLib/src/Crypto/HlpSHA3Dispatch.pas | 7 +- HashLib/src/Hash64/HlpXXHash3Dispatch.pas | 13 +- HashLib/src/Include/HashLib.inc | 60 ++++- HashLib/src/Include/HashLibFPC.inc | 44 ++++ HashLib/src/KDF/HlpArgon2Dispatch.pas | 13 +- HashLib/src/KDF/HlpScryptDispatch.pas | 13 +- .../Packages/Delphi/HashLib4PascalPackage.dpk | 7 +- .../Packages/FPC/HashLib4PascalPackage.lpk | 14 +- .../Packages/FPC/HashLib4PascalPackage.pas | 3 +- HashLib/src/Utils/HlpArmSimdFeatures.pas | 211 +++++++++++++++ HashLib/src/Utils/HlpCpuFeatures.pas | 234 ++--------------- HashLib/src/Utils/HlpSimdLevels.pas | 13 + HashLib/src/Utils/HlpX86SimdFeatures.pas | 247 ++++++++++++++++++ 24 files changed, 709 insertions(+), 305 deletions(-) create mode 100644 HashLib/src/Utils/HlpArmSimdFeatures.pas create mode 100644 HashLib/src/Utils/HlpSimdLevels.pas create mode 100644 HashLib/src/Utils/HlpX86SimdFeatures.pas diff --git a/HashLib.Benchmark/Delphi/PerformanceBenchmarkConsole.dpr b/HashLib.Benchmark/Delphi/PerformanceBenchmarkConsole.dpr index 46f4362..041f60d 100644 --- a/HashLib.Benchmark/Delphi/PerformanceBenchmarkConsole.dpr +++ b/HashLib.Benchmark/Delphi/PerformanceBenchmarkConsole.dpr @@ -125,6 +125,9 @@ uses HlpBitConverter in '..\..\HashLib\src\Utils\HlpBitConverter.pas', HlpBits in '..\..\HashLib\src\Utils\HlpBits.pas', HlpCpuFeatures in '..\..\HashLib\src\Utils\HlpCpuFeatures.pas', + HlpX86SimdFeatures in '..\..\HashLib\src\Utils\HlpX86SimdFeatures.pas', + HlpArmSimdFeatures in '..\..\HashLib\src\Utils\HlpArmSimdFeatures.pas', + HlpSimdLevels in '..\..\HashLib\src\Utils\HlpSimdLevels.pas', HlpHashLibTypes in '..\..\HashLib\src\Utils\HlpHashLibTypes.pas', HlpArrayUtils in '..\..\HashLib\src\Utils\HlpArrayUtils.pas'; diff --git a/HashLib.Benchmark/Delphi/PerformanceBenchmarkFMX.dpr b/HashLib.Benchmark/Delphi/PerformanceBenchmarkFMX.dpr index d970fb0..fbb9c13 100644 --- a/HashLib.Benchmark/Delphi/PerformanceBenchmarkFMX.dpr +++ b/HashLib.Benchmark/Delphi/PerformanceBenchmarkFMX.dpr @@ -124,6 +124,9 @@ uses HlpBitConverter in '..\..\HashLib\src\Utils\HlpBitConverter.pas', HlpBits in '..\..\HashLib\src\Utils\HlpBits.pas', HlpCpuFeatures in '..\..\HashLib\src\Utils\HlpCpuFeatures.pas', + HlpX86SimdFeatures in '..\..\HashLib\src\Utils\HlpX86SimdFeatures.pas', + HlpArmSimdFeatures in '..\..\HashLib\src\Utils\HlpArmSimdFeatures.pas', + HlpSimdLevels in '..\..\HashLib\src\Utils\HlpSimdLevels.pas', HlpHashLibTypes in '..\..\HashLib\src\Utils\HlpHashLibTypes.pas', HlpArrayUtils in '..\..\HashLib\src\Utils\HlpArrayUtils.pas'; diff --git a/HashLib.Tests/Delphi.Tests/HashLib.Tests.dpr b/HashLib.Tests/Delphi.Tests/HashLib.Tests.dpr index 96ae3f9..c62dc5a 100644 --- a/HashLib.Tests/Delphi.Tests/HashLib.Tests.dpr +++ b/HashLib.Tests/Delphi.Tests/HashLib.Tests.dpr @@ -143,6 +143,9 @@ uses HlpBitConverter in '..\..\HashLib\src\Utils\HlpBitConverter.pas', HlpBits in '..\..\HashLib\src\Utils\HlpBits.pas', HlpCpuFeatures in '..\..\HashLib\src\Utils\HlpCpuFeatures.pas', + HlpX86SimdFeatures in '..\..\HashLib\src\Utils\HlpX86SimdFeatures.pas', + HlpArmSimdFeatures in '..\..\HashLib\src\Utils\HlpArmSimdFeatures.pas', + HlpSimdLevels in '..\..\HashLib\src\Utils\HlpSimdLevels.pas', HlpHashLibTypes in '..\..\HashLib\src\Utils\HlpHashLibTypes.pas', HlpArrayUtils in '..\..\HashLib\src\Utils\HlpArrayUtils.pas', HashLibTestBase in '..\src\HashLibTestBase.pas', diff --git a/HashLib/src/Checksum/HlpAdler32Dispatch.pas b/HashLib/src/Checksum/HlpAdler32Dispatch.pas index 8044274..0ac6d7d 100644 --- a/HashLib/src/Checksum/HlpAdler32Dispatch.pas +++ b/HashLib/src/Checksum/HlpAdler32Dispatch.pas @@ -13,7 +13,8 @@ interface implementation uses - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; const ModAdler = UInt32(65521); @@ -188,28 +189,28 @@ procedure InitDispatch(); begin Adler32_Update := @Adler32_Update_Scalar; {$IFDEF HASHLIB_I386_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.SSSE3: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.SSSE3: begin Adler32_Update := @Adler32_Update_Ssse3; end; - TCpuSimdLevel.SSE2: + TX86SimdLevel.SSE2: begin Adler32_Update := @Adler32_Update_Sse2; end; end; {$ENDIF} {$IFDEF HASHLIB_X86_64_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2: begin Adler32_Update := @Adler32_Update_Avx2; end; - TCpuSimdLevel.SSSE3: + TX86SimdLevel.SSSE3: begin Adler32_Update := @Adler32_Update_Ssse3; end; - TCpuSimdLevel.SSE2: + TX86SimdLevel.SSE2: begin Adler32_Update := @Adler32_Update_Sse2; end; diff --git a/HashLib/src/Checksum/HlpCRCDispatch.pas b/HashLib/src/Checksum/HlpCRCDispatch.pas index cd3d788..dad6cdd 100644 --- a/HashLib/src/Checksum/HlpCRCDispatch.pas +++ b/HashLib/src/Checksum/HlpCRCDispatch.pas @@ -78,7 +78,8 @@ implementation uses HlpConverters, - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; // ============================================================================= // Scalar fallback implementation @@ -494,7 +495,7 @@ procedure InitDispatch(); CRC_Fold_UsesPclmul := False; {$IFDEF HASHLIB_X86_64_ASM} - if TCpuFeatures.HasVPCLMULQDQ() then + if TCpuFeatures.X86.HasVPCLMULQDQ() then begin CRC_Fold_Lsb := @CRC_Fold_Vpclmul; CRC_Fold_Msb := @CRC_Fold_Vpclmul_Msb; @@ -502,7 +503,7 @@ procedure InitDispatch(); CRC_Fold_UsesPclmul := True; Exit; end; - if TCpuFeatures.HasPCLMULQDQ() then + if TCpuFeatures.X86.HasPCLMULQDQ() then begin CRC_Fold_Lsb := @CRC_Fold_Pclmul; CRC_Fold_Msb := @CRC_Fold_Pclmul_Msb; @@ -514,14 +515,14 @@ procedure InitDispatch(); {$IFDEF HASHLIB_X86_SIMD} {$IFDEF HASHLIB_I386_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.SSSE3, TCpuSimdLevel.SSE2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.SSSE3, TX86SimdLevel.SSE2: BindSse2CrcFold; end; {$ENDIF HASHLIB_I386_ASM} {$IFDEF HASHLIB_X86_64_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2, TCpuSimdLevel.SSSE3, TCpuSimdLevel.SSE2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2, TX86SimdLevel.SSSE3, TX86SimdLevel.SSE2: BindSse2CrcFold; end; {$ENDIF HASHLIB_X86_64_ASM} diff --git a/HashLib/src/Crypto/HlpBlake2BDispatch.pas b/HashLib/src/Crypto/HlpBlake2BDispatch.pas index 3edb0bf..a79f8e8 100644 --- a/HashLib/src/Crypto/HlpBlake2BDispatch.pas +++ b/HashLib/src/Crypto/HlpBlake2BDispatch.pas @@ -22,7 +22,8 @@ implementation uses HlpBits, - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; const Blake2BSigma: array [0 .. 11, 0 .. 15] of Int32 = ( @@ -131,20 +132,20 @@ procedure InitDispatch(); begin Blake2B_Compress := @Blake2B_Compress_Scalar; {$IFDEF HASHLIB_I386_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin Blake2B_Compress := @Blake2B_Compress_Sse2; end; end; {$ENDIF} {$IFDEF HASHLIB_X86_64_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2: begin Blake2B_Compress := @Blake2B_Compress_Avx2; end; - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin Blake2B_Compress := @Blake2B_Compress_Sse2; end; diff --git a/HashLib/src/Crypto/HlpBlake2SDispatch.pas b/HashLib/src/Crypto/HlpBlake2SDispatch.pas index 1a17f07..04023d1 100644 --- a/HashLib/src/Crypto/HlpBlake2SDispatch.pas +++ b/HashLib/src/Crypto/HlpBlake2SDispatch.pas @@ -22,7 +22,8 @@ implementation uses HlpBits, - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; const Blake2SSigma: array [0 .. 9, 0 .. 15] of Int32 = ( @@ -129,20 +130,20 @@ procedure InitDispatch(); begin Blake2S_Compress := @Blake2S_Compress_Scalar; {$IFDEF HASHLIB_I386_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin Blake2S_Compress := @Blake2S_Compress_Sse2; end; end; {$ENDIF} {$IFDEF HASHLIB_X86_64_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2: begin Blake2S_Compress := @Blake2S_Compress_Avx2; end; - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin Blake2S_Compress := @Blake2S_Compress_Sse2; end; diff --git a/HashLib/src/Crypto/HlpBlake3Dispatch.pas b/HashLib/src/Crypto/HlpBlake3Dispatch.pas index c716d5b..c577bbc 100644 --- a/HashLib/src/Crypto/HlpBlake3Dispatch.pas +++ b/HashLib/src/Crypto/HlpBlake3Dispatch.pas @@ -26,7 +26,8 @@ implementation uses HlpBits, - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; const Blake3IV: array [0 .. 3] of UInt32 = ( @@ -712,8 +713,8 @@ procedure InitDispatch(); Blake3_HashMany := @Blake3_HashMany_Scalar; Blake3_ParallelDegree := 1; {$IFDEF HASHLIB_I386_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin Blake3_Compress := @Blake3_Compress_Sse2; Blake3_HashMany := @Blake3_HashMany_Sse2; @@ -722,14 +723,14 @@ procedure InitDispatch(); end; {$ENDIF} {$IFDEF HASHLIB_X86_64_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2: begin Blake3_Compress := @Blake3_Compress_Avx2; Blake3_HashMany := @Blake3_HashMany_Avx2; Blake3_ParallelDegree := 8; end; - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin Blake3_Compress := @Blake3_Compress_Sse2; Blake3_HashMany := @Blake3_HashMany_Sse2; diff --git a/HashLib/src/Crypto/HlpSHA1Dispatch.pas b/HashLib/src/Crypto/HlpSHA1Dispatch.pas index 510a048..57598f6 100644 --- a/HashLib/src/Crypto/HlpSHA1Dispatch.pas +++ b/HashLib/src/Crypto/HlpSHA1Dispatch.pas @@ -27,7 +27,8 @@ implementation uses HlpBits, HlpConverters, - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; // ============================================================================= // Scalar fallback implementation @@ -175,33 +176,33 @@ procedure InitDispatch(); begin SHA1_Compress := @SHA1_Compress_Scalar; {$IFDEF HASHLIB_I386_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.SSSE3: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.SSSE3: begin SHA1_Compress := @SHA1_Compress_Ssse3_Wrap; end; - TCpuSimdLevel.SSE2: + TX86SimdLevel.SSE2: begin SHA1_Compress := @SHA1_Compress_Sse2; end; end; {$ENDIF} {$IFDEF HASHLIB_X86_64_ASM} - if TCpuFeatures.HasSHANI() then + if TCpuFeatures.X86.HasSHANI() then begin SHA1_Compress := @SHA1_Compress_ShaNi_Wrap; Exit; end; - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2: begin SHA1_Compress := @SHA1_Compress_Avx2_Wrap; end; - TCpuSimdLevel.SSSE3: + TX86SimdLevel.SSSE3: begin SHA1_Compress := @SHA1_Compress_Ssse3_Wrap; end; - TCpuSimdLevel.SSE2: + TX86SimdLevel.SSE2: begin SHA1_Compress := @SHA1_Compress_Sse2; end; diff --git a/HashLib/src/Crypto/HlpSHA2_256Dispatch.pas b/HashLib/src/Crypto/HlpSHA2_256Dispatch.pas index a30d1e2..2ba4832 100644 --- a/HashLib/src/Crypto/HlpSHA2_256Dispatch.pas +++ b/HashLib/src/Crypto/HlpSHA2_256Dispatch.pas @@ -39,7 +39,8 @@ implementation uses HlpBits, HlpConverters, - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; // ============================================================================= // Scalar fallback implementation @@ -185,33 +186,33 @@ procedure InitDispatch(); begin SHA256_Compress := @SHA256_Compress_Scalar; {$IFDEF HASHLIB_I386_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.SSSE3: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.SSSE3: begin SHA256_Compress := @SHA256_Compress_Ssse3_Wrap; end; - TCpuSimdLevel.SSE2: + TX86SimdLevel.SSE2: begin SHA256_Compress := @SHA256_Compress_Sse2_Wrap; end; end; {$ENDIF} {$IFDEF HASHLIB_X86_64_ASM} - if TCpuFeatures.HasSHANI() then + if TCpuFeatures.X86.HasSHANI() then begin SHA256_Compress := @SHA256_Compress_ShaNi_Wrap; Exit; end; - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2: begin SHA256_Compress := @SHA256_Compress_Avx2_Wrap; end; - TCpuSimdLevel.SSSE3: + TX86SimdLevel.SSSE3: begin SHA256_Compress := @SHA256_Compress_Ssse3_Wrap; end; - TCpuSimdLevel.SSE2: + TX86SimdLevel.SSE2: begin SHA256_Compress := @SHA256_Compress_Sse2_Wrap; end; diff --git a/HashLib/src/Crypto/HlpSHA2_512Dispatch.pas b/HashLib/src/Crypto/HlpSHA2_512Dispatch.pas index f5f6e0a..e0c5b13 100644 --- a/HashLib/src/Crypto/HlpSHA2_512Dispatch.pas +++ b/HashLib/src/Crypto/HlpSHA2_512Dispatch.pas @@ -63,7 +63,8 @@ implementation uses HlpBits, HlpConverters, - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; // ============================================================================= // Scalar fallback implementation @@ -194,28 +195,28 @@ procedure InitDispatch(); begin SHA512_Compress := @SHA512_Compress_Scalar; {$IFDEF HASHLIB_I386_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.SSSE3: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.SSSE3: begin SHA512_Compress := @SHA512_Compress_Ssse3_Wrap; end; - TCpuSimdLevel.SSE2: + TX86SimdLevel.SSE2: begin SHA512_Compress := @SHA512_Compress_Sse2_Wrap; end; end; {$ENDIF} {$IFDEF HASHLIB_X86_64_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2: begin SHA512_Compress := @SHA512_Compress_Avx2_Wrap; end; - TCpuSimdLevel.SSSE3: + TX86SimdLevel.SSSE3: begin SHA512_Compress := @SHA512_Compress_Ssse3_Wrap; end; - TCpuSimdLevel.SSE2: + TX86SimdLevel.SSE2: begin SHA512_Compress := @SHA512_Compress_Sse2_Wrap; end; diff --git a/HashLib/src/Crypto/HlpSHA3Dispatch.pas b/HashLib/src/Crypto/HlpSHA3Dispatch.pas index d10df62..8ab3eb9 100644 --- a/HashLib/src/Crypto/HlpSHA3Dispatch.pas +++ b/HashLib/src/Crypto/HlpSHA3Dispatch.pas @@ -18,7 +18,8 @@ implementation uses HlpBits, HlpConverters, - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; // ============================================================================= // Round constants @@ -496,8 +497,8 @@ procedure InitDispatch(); KeccakF1600_Permute := @KeccakF1600_Scalar; KeccakF1600_Absorb := @KeccakF1600_Absorb_Scalar; {$IFDEF HASHLIB_X86_64_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2: begin KeccakF1600_Permute := @KeccakF1600_Avx2_Wrap; KeccakF1600_Absorb := @KeccakF1600_Avx2_Absorb_Wrap; diff --git a/HashLib/src/Hash64/HlpXXHash3Dispatch.pas b/HashLib/src/Hash64/HlpXXHash3Dispatch.pas index 833e375..0fedd69 100644 --- a/HashLib/src/Hash64/HlpXXHash3Dispatch.pas +++ b/HashLib/src/Hash64/HlpXXHash3Dispatch.pas @@ -22,7 +22,8 @@ interface implementation uses - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; const XXH_STRIPE_LEN = 64; @@ -213,8 +214,8 @@ procedure InitDispatch(); XXH3_ScrambleAcc := @XXH3_ScrambleAcc_Scalar; XXH3_InitSecret := @XXH3_InitSecret_Scalar; {$IFDEF HASHLIB_I386_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin XXH3_Accumulate512 := @XXH3_Accumulate512_Sse2; XXH3_Accumulate := @XXH3_Accumulate_Sse2; @@ -224,15 +225,15 @@ procedure InitDispatch(); end; {$ENDIF} {$IFDEF HASHLIB_X86_64_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2: begin XXH3_Accumulate512 := @XXH3_Accumulate512_Avx2; XXH3_Accumulate := @XXH3_Accumulate_Avx2; XXH3_ScrambleAcc := @XXH3_ScrambleAcc_Avx2; XXH3_InitSecret := @XXH3_InitSecret_Avx2; end; - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin XXH3_Accumulate512 := @XXH3_Accumulate512_Sse2; XXH3_Accumulate := @XXH3_Accumulate_Sse2; diff --git a/HashLib/src/Include/HashLib.inc b/HashLib/src/Include/HashLib.inc index 96fdf40..51c207d 100644 --- a/HashLib/src/Include/HashLib.inc +++ b/HashLib/src/Include/HashLib.inc @@ -59,6 +59,42 @@ {$ENDIF} {$IFEND} +{$IFDEF CPUARM32} + {$DEFINE HASHLIB_ARM} +{$ENDIF} + +{$IFDEF CPUARM64} + {$DEFINE HASHLIB_AARCH64} +{$ENDIF} + +{$ENDIF} + +{================================= Target OS ==================================} + +{$IFDEF MSWINDOWS} + {$DEFINE HASHLIB_MSWINDOWS} +{$ENDIF} + +{$IFDEF IOS} + {$DEFINE HASHLIB_IOS} +{$ENDIF} + +{$IFDEF MACOS} + {$IFNDEF IOS} + {$DEFINE HASHLIB_MACOS} + {$ENDIF} +{$ENDIF} + +{$IF DEFINED(HASHLIB_IOS) OR DEFINED(HASHLIB_MACOS)} + {$DEFINE HASHLIB_APPLE} +{$IFEND} + +{$IFDEF ANDROID} + {$DEFINE HASHLIB_ANDROID} +{$ENDIF} + +{$IFDEF LINUX} + {$DEFINE HASHLIB_LINUX} {$ENDIF} {========================== Common Compiler Settings ==========================} @@ -79,17 +115,33 @@ {$DEFINE HASHLIB_X86_SIMD} {$IFEND} +{$IF DEFINED(HASHLIB_ARM_ASM) OR DEFINED(HASHLIB_AARCH64_ASM)} + {$DEFINE HASHLIB_ARM_SIMD} +{$IFEND} + {$IFDEF HASHLIB_X86_SIMD} -// Uncomment ONE to force a specific x86/x86-64 SIMD dispatch level: +// Uncomment ONE to force a specific X86 SIMD dispatch level: // {$DEFINE HASHLIB_FORCE_SSE2} // {$DEFINE HASHLIB_FORCE_SSSE3} +{$ENDIF} + +{$IFDEF HASHLIB_ARM_SIMD} +// Uncomment ONE to force a specific Arm SIMD dispatch level: +// {$DEFINE HASHLIB_FORCE_NEON} +// {$DEFINE HASHLIB_FORCE_SVE} +{$ENDIF} {$IF (DEFINED(HASHLIB_FORCE_SCALAR) AND DEFINED(HASHLIB_FORCE_SSE2)) OR (DEFINED(HASHLIB_FORCE_SCALAR) AND DEFINED(HASHLIB_FORCE_SSSE3)) - OR (DEFINED(HASHLIB_FORCE_SSE2) AND DEFINED(HASHLIB_FORCE_SSSE3))} + OR (DEFINED(HASHLIB_FORCE_SSE2) AND DEFINED(HASHLIB_FORCE_SSSE3)) + OR (DEFINED(HASHLIB_FORCE_SCALAR) AND DEFINED(HASHLIB_FORCE_NEON)) + OR (DEFINED(HASHLIB_FORCE_SCALAR) AND DEFINED(HASHLIB_FORCE_SVE)) + OR (DEFINED(HASHLIB_FORCE_NEON) AND DEFINED(HASHLIB_FORCE_SVE)) + OR (DEFINED(HASHLIB_FORCE_SSE2) AND DEFINED(HASHLIB_FORCE_NEON)) + OR (DEFINED(HASHLIB_FORCE_SSE2) AND DEFINED(HASHLIB_FORCE_SVE)) + OR (DEFINED(HASHLIB_FORCE_SSSE3) AND DEFINED(HASHLIB_FORCE_NEON)) + OR (DEFINED(HASHLIB_FORCE_SSSE3) AND DEFINED(HASHLIB_FORCE_SVE))} {$MESSAGE ERROR 'Only one HASHLIB_FORCE_* define may be enabled at a time.'} {$IFEND} -{$ENDIF} - (* &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& *) diff --git a/HashLib/src/Include/HashLibFPC.inc b/HashLib/src/Include/HashLibFPC.inc index 96a4c0a..a60a506 100644 --- a/HashLib/src/Include/HashLibFPC.inc +++ b/HashLib/src/Include/HashLibFPC.inc @@ -48,6 +48,50 @@ {$DEFINE HASHLIB_X86_64_ASM} {$IFEND} +{$IFDEF CPUARM} + {$DEFINE HASHLIB_ARM} + {$DEFINE HASHLIB_ARM_ASM} +{$ENDIF} + +{$IFDEF CPUAARCH64} + {$DEFINE HASHLIB_AARCH64} + {$DEFINE HASHLIB_AARCH64_ASM} +{$ENDIF} + +{================================= Target OS ==================================} + +{$IFDEF MSWINDOWS} + {$DEFINE HASHLIB_MSWINDOWS} +{$ENDIF} + +{$IFDEF IOS} + {$DEFINE HASHLIB_IOS} +{$ENDIF} + +{$IF DEFINED(DARWIN) AND NOT DEFINED(HASHLIB_IOS)} + {$DEFINE HASHLIB_MACOS} +{$IFEND} + +{$IF DEFINED(HASHLIB_IOS) OR DEFINED(HASHLIB_MACOS)} + {$DEFINE HASHLIB_APPLE} +{$IFEND} + +{$IFDEF ANDROID} + {$DEFINE HASHLIB_ANDROID} +{$ENDIF} + +{$IFDEF LINUX} + {$DEFINE HASHLIB_LINUX} +{$ENDIF} + +{$IF DEFINED(FREEBSD) OR DEFINED(NETBSD) OR DEFINED(OPENBSD) OR DEFINED(DRAGONFLY)} + {$DEFINE HASHLIB_BSD} +{$IFEND} + +{$IFDEF SOLARIS} + {$DEFINE HASHLIB_SOLARIS} +{$ENDIF} + {========================= Compiler Mode & Optimizations ======================} {$MODE DELPHI} diff --git a/HashLib/src/KDF/HlpArgon2Dispatch.pas b/HashLib/src/KDF/HlpArgon2Dispatch.pas index 0b7f8f5..207169f 100644 --- a/HashLib/src/KDF/HlpArgon2Dispatch.pas +++ b/HashLib/src/KDF/HlpArgon2Dispatch.pas @@ -14,7 +14,8 @@ implementation uses HlpBits, - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; // ============================================================================= // Scalar fallback implementation @@ -136,20 +137,20 @@ procedure InitDispatch(); begin Argon2_FillBlock := @Argon2_FillBlock_Scalar; {$IFDEF HASHLIB_I386_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin Argon2_FillBlock := @Argon2_FillBlock_Sse2; end; end; {$ENDIF} {$IFDEF HASHLIB_X86_64_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2: begin Argon2_FillBlock := @Argon2_FillBlock_Avx2; end; - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin Argon2_FillBlock := @Argon2_FillBlock_Sse2; end; diff --git a/HashLib/src/KDF/HlpScryptDispatch.pas b/HashLib/src/KDF/HlpScryptDispatch.pas index b20ebe6..f9c399e 100644 --- a/HashLib/src/KDF/HlpScryptDispatch.pas +++ b/HashLib/src/KDF/HlpScryptDispatch.pas @@ -17,7 +17,8 @@ implementation uses HlpBits, - HlpCpuFeatures; + HlpCpuFeatures, + HlpSimdLevels; // ============================================================================= // Percival's (i*5 mod 16) permutation rearranges each 16-word Salsa20 state @@ -201,20 +202,20 @@ procedure InitDispatch(); begin Scrypt_SalsaXor := @Scrypt_SalsaXor_Scalar; {$IFDEF HASHLIB_I386_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin Scrypt_SalsaXor := @Scrypt_SalsaXor_Sse2; end; end; {$ENDIF} {$IFDEF HASHLIB_X86_64_ASM} - case TCpuFeatures.GetActiveLevel() of - TCpuSimdLevel.AVX2: + case TCpuFeatures.X86.GetSimdLevel() of + TX86SimdLevel.AVX2: begin Scrypt_SalsaXor := @Scrypt_SalsaXor_Avx2; end; - TCpuSimdLevel.SSE2, TCpuSimdLevel.SSSE3: + TX86SimdLevel.SSE2, TX86SimdLevel.SSSE3: begin Scrypt_SalsaXor := @Scrypt_SalsaXor_Sse2; end; diff --git a/HashLib/src/Packages/Delphi/HashLib4PascalPackage.dpk b/HashLib/src/Packages/Delphi/HashLib4PascalPackage.dpk index cd36180..99a06f1 100644 --- a/HashLib/src/Packages/Delphi/HashLib4PascalPackage.dpk +++ b/HashLib/src/Packages/Delphi/HashLib4PascalPackage.dpk @@ -47,9 +47,9 @@ contains HlpMultipleTransformNonBlock in '..\..\Base\HlpMultipleTransformNonBlock.pas', HlpAdler32 in '..\..\Checksum\HlpAdler32.pas', HlpAdler32Dispatch in '..\..\Checksum\HlpAdler32Dispatch.pas', - HlpGF2 in '..\..\Checksum\HlpGF2.pas', - HlpCRCDispatch in '..\..\Checksum\HlpCRCDispatch.pas', HlpCRC in '..\..\Checksum\HlpCRC.pas', + HlpCRCDispatch in '..\..\Checksum\HlpCRCDispatch.pas', + HlpGF2 in '..\..\Checksum\HlpGF2.pas', HlpCRC16 in '..\..\Checksum\HlpCRC16.pas', HlpCRC32 in '..\..\Checksum\HlpCRC32.pas', HlpCRC32Fast in '..\..\Checksum\HlpCRC32Fast.pas', @@ -151,6 +151,9 @@ contains HlpBitConverter in '..\..\Utils\HlpBitConverter.pas', HlpBits in '..\..\Utils\HlpBits.pas', HlpCpuFeatures in '..\..\Utils\HlpCpuFeatures.pas', + HlpX86SimdFeatures in '..\..\Utils\HlpX86SimdFeatures.pas', + HlpArmSimdFeatures in '..\..\Utils\HlpArmSimdFeatures.pas', + HlpSimdLevels in '..\..\Utils\HlpSimdLevels.pas', HlpHashLibTypes in '..\..\Utils\HlpHashLibTypes.pas', HlpArrayUtils in '..\..\Utils\HlpArrayUtils.pas'; diff --git a/HashLib/src/Packages/FPC/HashLib4PascalPackage.lpk b/HashLib/src/Packages/FPC/HashLib4PascalPackage.lpk index f0aa57d..60ffecc 100644 --- a/HashLib/src/Packages/FPC/HashLib4PascalPackage.lpk +++ b/HashLib/src/Packages/FPC/HashLib4PascalPackage.lpk @@ -29,7 +29,7 @@ "/> - + @@ -510,6 +510,18 @@ + + + + + + + + + + + + diff --git a/HashLib/src/Packages/FPC/HashLib4PascalPackage.pas b/HashLib/src/Packages/FPC/HashLib4PascalPackage.pas index 03da7cc..95346dc 100644 --- a/HashLib/src/Packages/FPC/HashLib4PascalPackage.pas +++ b/HashLib/src/Packages/FPC/HashLib4PascalPackage.pas @@ -32,7 +32,8 @@ interface HlpXXHash3Dispatch, HlpBlake2BDispatch, HlpBlake2SDispatch, HlpArgon2Dispatch, HlpScryptDispatch, HlpBlake3Dispatch, HlpSHA2_256Dispatch, HlpSHA2_512Dispatch, HlpSHA1Dispatch, - HlpAdler32Dispatch, HlpGF2, HlpCRCDispatch, HlpSHA3Dispatch; + HlpAdler32Dispatch, HlpGF2, HlpCRCDispatch, HlpSHA3Dispatch, + HlpArmSimdFeatures, HlpSimdLevels, HlpX86SimdFeatures; implementation diff --git a/HashLib/src/Utils/HlpArmSimdFeatures.pas b/HashLib/src/Utils/HlpArmSimdFeatures.pas new file mode 100644 index 0000000..4dd61e1 --- /dev/null +++ b/HashLib/src/Utils/HlpArmSimdFeatures.pas @@ -0,0 +1,211 @@ +unit HlpArmSimdFeatures; + +{$I ..\Include\HashLib.inc} + +interface + +uses + HlpSimdLevels; + +type + TArmSimdFeatures = class sealed + strict private + class var + FSimdLevel: TArmSimdLevel; + FHasAES: Boolean; + FHasSHA1: Boolean; + FHasSHA256: Boolean; + FHasSHA512: Boolean; + FHasSHA3: Boolean; + FHasPMULL: Boolean; + + strict private + class function CPUHasNEON(): Boolean; static; + class function CPUHasSVE(): Boolean; static; + class function CPUHasSVE2(): Boolean; static; + class function CPUHasAES(): Boolean; static; + class function CPUHasSHA1(): Boolean; static; + class function CPUHasSHA256(): Boolean; static; + class function CPUHasSHA512(): Boolean; static; + class function CPUHasSHA3(): Boolean; static; + class function CPUHasPMULL(): Boolean; static; + + private + class procedure ProbeHardwareAndCache(); static; + class procedure ApplyBuildOverrides(); static; + + public + class function GetSimdLevel(): TArmSimdLevel; static; + class function HasNEON(): Boolean; static; + class function HasSVE(): Boolean; static; + class function HasSVE2(): Boolean; static; + class function HasAES(): Boolean; static; + class function HasSHA1(): Boolean; static; + class function HasSHA256(): Boolean; static; + class function HasSHA512(): Boolean; static; + class function HasSHA3(): Boolean; static; + class function HasPMULL(): Boolean; static; + end; + +implementation + +{ TArmSimdFeatures } + +class function TArmSimdFeatures.CPUHasNEON(): Boolean; +begin + // TODO: implement platform-specific NEON detection + Result := False; +end; + +class function TArmSimdFeatures.CPUHasSVE(): Boolean; +begin + // TODO: implement platform-specific SVE detection + Result := False; +end; + +class function TArmSimdFeatures.CPUHasSVE2(): Boolean; +begin + // TODO: implement platform-specific SVE2 detection + Result := False; +end; + +class function TArmSimdFeatures.CPUHasAES(): Boolean; +begin + // TODO: implement platform-specific AES extension detection + Result := False; +end; + +class function TArmSimdFeatures.CPUHasSHA1(): Boolean; +begin + // TODO: implement platform-specific SHA1 extension detection + Result := False; +end; + +class function TArmSimdFeatures.CPUHasSHA256(): Boolean; +begin + // TODO: implement platform-specific SHA256 extension detection + Result := False; +end; + +class function TArmSimdFeatures.CPUHasSHA512(): Boolean; +begin + // TODO: implement platform-specific SHA512 extension detection + Result := False; +end; + +class function TArmSimdFeatures.CPUHasSHA3(): Boolean; +begin + // TODO: implement platform-specific SHA3 extension detection + Result := False; +end; + +class function TArmSimdFeatures.CPUHasPMULL(): Boolean; +begin + // TODO: implement platform-specific PMULL extension detection + Result := False; +end; + +class procedure TArmSimdFeatures.ProbeHardwareAndCache(); +begin + FSimdLevel := TArmSimdLevel.Scalar; + FHasAES := False; + FHasSHA1 := False; + FHasSHA256 := False; + FHasSHA512 := False; + FHasSHA3 := False; + FHasPMULL := False; + + if CPUHasNEON() then + begin + FSimdLevel := TArmSimdLevel.NEON; + + FHasAES := CPUHasAES(); + FHasSHA1 := CPUHasSHA1(); + FHasSHA256 := CPUHasSHA256(); + FHasSHA512 := CPUHasSHA512(); + FHasSHA3 := CPUHasSHA3(); + FHasPMULL := CPUHasPMULL(); + + if CPUHasSVE() then + begin + FSimdLevel := TArmSimdLevel.SVE; + if CPUHasSVE2() then + FSimdLevel := TArmSimdLevel.SVE2; + end; + end; +end; + +class procedure TArmSimdFeatures.ApplyBuildOverrides(); +begin +{$IF DEFINED(HASHLIB_FORCE_SCALAR)} + FSimdLevel := TArmSimdLevel.Scalar; + FHasAES := False; + FHasSHA1 := False; + FHasSHA256 := False; + FHasSHA512 := False; + FHasSHA3 := False; + FHasPMULL := False; +{$ELSEIF DEFINED(HASHLIB_FORCE_NEON)} + if FSimdLevel > TArmSimdLevel.NEON then + FSimdLevel := TArmSimdLevel.NEON; +{$ELSEIF DEFINED(HASHLIB_FORCE_SVE)} + if FSimdLevel > TArmSimdLevel.SVE then + FSimdLevel := TArmSimdLevel.SVE; +{$IFEND} +end; + +class function TArmSimdFeatures.GetSimdLevel(): TArmSimdLevel; +begin + Result := FSimdLevel; +end; + +class function TArmSimdFeatures.HasNEON(): Boolean; +begin + Result := FSimdLevel >= TArmSimdLevel.NEON; +end; + +class function TArmSimdFeatures.HasSVE(): Boolean; +begin + Result := FSimdLevel >= TArmSimdLevel.SVE; +end; + +class function TArmSimdFeatures.HasSVE2(): Boolean; +begin + Result := FSimdLevel >= TArmSimdLevel.SVE2; +end; + +class function TArmSimdFeatures.HasAES(): Boolean; +begin + Result := FHasAES; +end; + +class function TArmSimdFeatures.HasSHA1(): Boolean; +begin + Result := FHasSHA1; +end; + +class function TArmSimdFeatures.HasSHA256(): Boolean; +begin + Result := FHasSHA256; +end; + +class function TArmSimdFeatures.HasSHA512(): Boolean; +begin + Result := FHasSHA512; +end; + +class function TArmSimdFeatures.HasSHA3(): Boolean; +begin + Result := FHasSHA3; +end; + +class function TArmSimdFeatures.HasPMULL(): Boolean; +begin + Result := FHasPMULL; +end; + +initialization + TArmSimdFeatures.ProbeHardwareAndCache(); + TArmSimdFeatures.ApplyBuildOverrides(); + +end. diff --git a/HashLib/src/Utils/HlpCpuFeatures.pas b/HashLib/src/Utils/HlpCpuFeatures.pas index 02725fb..4965630 100644 --- a/HashLib/src/Utils/HlpCpuFeatures.pas +++ b/HashLib/src/Utils/HlpCpuFeatures.pas @@ -4,237 +4,37 @@ interface +uses + HlpSimdLevels, + HlpX86SimdFeatures, + HlpArmSimdFeatures; + type - TCpuSimdLevel = (Scalar, SSE2, SSSE3, AVX2); + TCpuFeaturesX86 = class of TX86SimdFeatures; + TCpuFeaturesArm = class of TArmSimdFeatures; TCpuFeatures = class sealed - private - class var FDetectedLevel: TCpuSimdLevel; - class var FHasSHANI: Boolean; - class var FHasPCLMULQDQ: Boolean; - class var FHasVPCLMULQDQ: Boolean; - class function CPUHasSSE2(): Boolean; static; - class function CPUHasSSSE3(): Boolean; static; - class function CPUHasAVX2(): Boolean; static; - class function CPUHasSHANI(): Boolean; static; - class function CPUHasPCLMULQDQ(): Boolean; static; - class function CPUHasVPCLMULQDQ(): Boolean; static; - class procedure DetectFeatures(); static; + strict private + class function GetX86(): TCpuFeaturesX86; static; + class function GetArm(): TCpuFeaturesArm; static; + public - class function GetActiveLevel(): TCpuSimdLevel; static; - class function HasSHANI(): Boolean; static; - class function HasPCLMULQDQ(): Boolean; static; - class function HasVPCLMULQDQ(): Boolean; static; - class function HasSSE2(): Boolean; static; - class function HasSSSE3(): Boolean; static; - class function HasAVX2(): Boolean; static; + class property X86: TCpuFeaturesX86 read GetX86; + class property Arm: TCpuFeaturesArm read GetArm; end; implementation -{$IFDEF HASHLIB_X86_SIMD} - -type - TCpuIdResult = record - RegEAX, RegEBX, RegECX, RegEDX: UInt32; - end; - -procedure CpuIdQuery(ALeaf, ASubLeaf: UInt32; AResult: Pointer); - {$I ..\Include\Simd\CpuFeatures\CpuIdQuery.inc} -end; - -procedure XGetBvQuery(AResult: Pointer); - {$I ..\Include\Simd\CpuFeatures\XGetBvQuery.inc} -end; - -{$ENDIF} - { TCpuFeatures } -class function TCpuFeatures.CPUHasSSE2(): Boolean; -{$IFDEF HASHLIB_X86_SIMD} -var - LCpuId: TCpuIdResult; -{$ENDIF} -begin -{$IFDEF HASHLIB_X86_SIMD} - CpuIdQuery(1, 0, @LCpuId); - Result := (LCpuId.RegEDX and (1 shl 26)) <> 0; -{$ELSE} - Result := False; -{$ENDIF} -end; - -class function TCpuFeatures.CPUHasSSSE3(): Boolean; -{$IFDEF HASHLIB_X86_SIMD} -var - LCpuId: TCpuIdResult; -{$ENDIF} -begin -{$IFDEF HASHLIB_X86_SIMD} - CpuIdQuery(1, 0, @LCpuId); - // SSSE3: ECX bit 9 - Result := (LCpuId.RegECX and (1 shl 9)) <> 0; -{$ELSE} - Result := False; -{$ENDIF} -end; - -class function TCpuFeatures.CPUHasAVX2(): Boolean; -{$IFDEF HASHLIB_X86_SIMD} -var - LCpuId: TCpuIdResult; - LXcr0: UInt64; -{$ENDIF} -begin -{$IFDEF HASHLIB_X86_SIMD} - CpuIdQuery(1, 0, @LCpuId); - - // OSXSAVE: ECX bit 27 (required for OS AVX state saving) - if (LCpuId.RegECX and (1 shl 27)) = 0 then - Exit(False); - - // XCR0 bits 1 and 2 must be set for AVX state support - LXcr0 := 0; - XGetBvQuery(@LXcr0); - if (UInt32(LXcr0) and $06) <> $06 then - Exit(False); - - CpuIdQuery(7, 0, @LCpuId); - - // AVX2: EBX bit 5 - Result := (LCpuId.RegEBX and (1 shl 5)) <> 0; -{$ELSE} - Result := False; -{$ENDIF} -end; - -class function TCpuFeatures.CPUHasSHANI(): Boolean; -{$IFDEF HASHLIB_X86_SIMD} -var - LCpuId: TCpuIdResult; -{$ENDIF} -begin -{$IFDEF HASHLIB_X86_SIMD} - CpuIdQuery(7, 0, @LCpuId); - // SHA-NI: EBX bit 29 - Result := (LCpuId.RegEBX and (1 shl 29)) <> 0; -{$ELSE} - Result := False; -{$ENDIF} -end; - -class function TCpuFeatures.CPUHasPCLMULQDQ(): Boolean; -{$IFDEF HASHLIB_X86_SIMD} -var - LCpuId: TCpuIdResult; -{$ENDIF} -begin -{$IFDEF HASHLIB_X86_SIMD} - CpuIdQuery(1, 0, @LCpuId); - // PCLMULQDQ: ECX bit 1 - Result := (LCpuId.RegECX and (1 shl 1)) <> 0; -{$ELSE} - Result := False; -{$ENDIF} -end; - -class function TCpuFeatures.CPUHasVPCLMULQDQ(): Boolean; -{$IFDEF HASHLIB_X86_SIMD} -var - LCpuId: TCpuIdResult; -{$ENDIF} -begin -{$IFDEF HASHLIB_X86_SIMD} - CpuIdQuery(7, 0, @LCpuId); - // VPCLMULQDQ: ECX bit 10 - Result := (LCpuId.RegECX and (1 shl 10)) <> 0; -{$ELSE} - Result := False; -{$ENDIF} -end; - -class procedure TCpuFeatures.DetectFeatures(); -begin - FDetectedLevel := TCpuSimdLevel.Scalar; - FHasSHANI := False; - FHasPCLMULQDQ := False; - FHasVPCLMULQDQ := False; - - if CPUHasSSE2() then - begin - FDetectedLevel := TCpuSimdLevel.SSE2; - FHasPCLMULQDQ := CPUHasPCLMULQDQ(); - if CPUHasSSSE3() then - begin - FDetectedLevel := TCpuSimdLevel.SSSE3; - if CPUHasAVX2() then - begin - FDetectedLevel := TCpuSimdLevel.AVX2; - FHasVPCLMULQDQ := CPUHasVPCLMULQDQ(); - end; - end; - end; - - FHasSHANI := CPUHasSHANI(); - - // Cap based on user force defines -{$IF DEFINED(HASHLIB_FORCE_SCALAR)} - FDetectedLevel := TCpuSimdLevel.Scalar; - FHasSHANI := False; - FHasPCLMULQDQ := False; - FHasVPCLMULQDQ := False; -{$ELSEIF DEFINED(HASHLIB_FORCE_SSE2)} - if FDetectedLevel > TCpuSimdLevel.SSE2 then - FDetectedLevel := TCpuSimdLevel.SSE2; - FHasSHANI := False; - FHasPCLMULQDQ := False; - FHasVPCLMULQDQ := False; -{$ELSEIF DEFINED(HASHLIB_FORCE_SSSE3)} - if FDetectedLevel > TCpuSimdLevel.SSSE3 then - FDetectedLevel := TCpuSimdLevel.SSSE3; - FHasSHANI := False; - FHasPCLMULQDQ := False; - FHasVPCLMULQDQ := False; -{$IFEND} -end; - -class function TCpuFeatures.GetActiveLevel(): TCpuSimdLevel; -begin - Result := FDetectedLevel; -end; - -class function TCpuFeatures.HasSHANI(): Boolean; -begin - Result := FHasSHANI; -end; - -class function TCpuFeatures.HasPCLMULQDQ(): Boolean; -begin - Result := FHasPCLMULQDQ; -end; - -class function TCpuFeatures.HasVPCLMULQDQ(): Boolean; +class function TCpuFeatures.GetX86(): TCpuFeaturesX86; begin - Result := FHasVPCLMULQDQ; + Result := TX86SimdFeatures; end; -class function TCpuFeatures.HasSSE2(): Boolean; +class function TCpuFeatures.GetArm(): TCpuFeaturesArm; begin - Result := Ord(FDetectedLevel) >= Ord(TCpuSimdLevel.SSE2); + Result := TArmSimdFeatures; end; -class function TCpuFeatures.HasSSSE3(): Boolean; -begin - Result := Ord(FDetectedLevel) >= Ord(TCpuSimdLevel.SSSE3); -end; - -class function TCpuFeatures.HasAVX2(): Boolean; -begin - Result := FDetectedLevel = TCpuSimdLevel.AVX2; -end; - -initialization - TCpuFeatures.DetectFeatures(); - end. diff --git a/HashLib/src/Utils/HlpSimdLevels.pas b/HashLib/src/Utils/HlpSimdLevels.pas new file mode 100644 index 0000000..678e838 --- /dev/null +++ b/HashLib/src/Utils/HlpSimdLevels.pas @@ -0,0 +1,13 @@ +unit HlpSimdLevels; + +{$I ..\Include\HashLib.inc} + +interface + +type + TX86SimdLevel = (Scalar, SSE2, SSSE3, AVX2); + TArmSimdLevel = (Scalar, NEON, SVE, SVE2); + +implementation + +end. diff --git a/HashLib/src/Utils/HlpX86SimdFeatures.pas b/HashLib/src/Utils/HlpX86SimdFeatures.pas new file mode 100644 index 0000000..f7e3f40 --- /dev/null +++ b/HashLib/src/Utils/HlpX86SimdFeatures.pas @@ -0,0 +1,247 @@ +unit HlpX86SimdFeatures; + +{$I ..\Include\HashLib.inc} + +interface + +uses + HlpSimdLevels; + +type + TX86SimdFeatures = class sealed + strict private + type + TCpuIdResult = record + RegEAX, RegEBX, RegECX, RegEDX: UInt32; + end; + + strict private + class var + FSimdLevel: TX86SimdLevel; + FHasSHANI: Boolean; + FHasPCLMULQDQ: Boolean; + FHasVPCLMULQDQ: Boolean; + + strict private + class function CPUHasSSE2(): Boolean; static; + class function CPUHasSSSE3(): Boolean; static; + class function CPUHasAVX2(): Boolean; static; + class function CPUHasSHANI(): Boolean; static; + class function CPUHasPCLMULQDQ(): Boolean; static; + class function CPUHasVPCLMULQDQ(): Boolean; static; + + private + class procedure ProbeHardwareAndCache(); static; + class procedure ApplyBuildOverrides(); static; + + public + class function GetSimdLevel(): TX86SimdLevel; static; + class function HasSSE2(): Boolean; static; + class function HasSSSE3(): Boolean; static; + class function HasAVX2(): Boolean; static; + class function HasSHANI(): Boolean; static; + class function HasPCLMULQDQ(): Boolean; static; + class function HasVPCLMULQDQ(): Boolean; static; + end; + +implementation + +{$IFDEF HASHLIB_X86_SIMD} + +procedure CpuIdQuery(ALeaf, ASubLeaf: UInt32; AResult: Pointer); + {$I ..\Include\Simd\CpuFeatures\CpuIdQuery.inc} +end; + +procedure XGetBvQuery(AResult: Pointer); + {$I ..\Include\Simd\CpuFeatures\XGetBvQuery.inc} +end; + +{$ENDIF} + +{ TX86SimdFeatures } + +class function TX86SimdFeatures.CPUHasSSE2(): Boolean; +{$IFDEF HASHLIB_X86_SIMD} +var + LCpuId: TCpuIdResult; +{$ENDIF} +begin +{$IFDEF HASHLIB_X86_SIMD} + CpuIdQuery(1, 0, @LCpuId); + Result := (LCpuId.RegEDX and (1 shl 26)) <> 0; +{$ELSE} + Result := False; +{$ENDIF} +end; + +class function TX86SimdFeatures.CPUHasSSSE3(): Boolean; +{$IFDEF HASHLIB_X86_SIMD} +var + LCpuId: TCpuIdResult; +{$ENDIF} +begin +{$IFDEF HASHLIB_X86_SIMD} + CpuIdQuery(1, 0, @LCpuId); + Result := (LCpuId.RegECX and (1 shl 9)) <> 0; +{$ELSE} + Result := False; +{$ENDIF} +end; + +class function TX86SimdFeatures.CPUHasAVX2(): Boolean; +{$IFDEF HASHLIB_X86_SIMD} +var + LCpuId: TCpuIdResult; + LXcr0: UInt64; +{$ENDIF} +begin +{$IFDEF HASHLIB_X86_SIMD} + CpuIdQuery(1, 0, @LCpuId); + + // Check OSXSAVE bit -- OS must support XSAVE/XRSTOR + if (LCpuId.RegECX and (1 shl 27)) = 0 then + Exit(False); + + // Check XCR0 for SSE and AVX state saving + LXcr0 := 0; + XGetBvQuery(@LXcr0); + if (UInt32(LXcr0) and $06) <> $06 then + Exit(False); + + // Check AVX2 bit in structured extended feature flags + CpuIdQuery(7, 0, @LCpuId); + Result := (LCpuId.RegEBX and (1 shl 5)) <> 0; +{$ELSE} + Result := False; +{$ENDIF} +end; + +class function TX86SimdFeatures.CPUHasSHANI(): Boolean; +{$IFDEF HASHLIB_X86_SIMD} +var + LCpuId: TCpuIdResult; +{$ENDIF} +begin +{$IFDEF HASHLIB_X86_SIMD} + CpuIdQuery(7, 0, @LCpuId); + Result := (LCpuId.RegEBX and (1 shl 29)) <> 0; +{$ELSE} + Result := False; +{$ENDIF} +end; + +class function TX86SimdFeatures.CPUHasPCLMULQDQ(): Boolean; +{$IFDEF HASHLIB_X86_SIMD} +var + LCpuId: TCpuIdResult; +{$ENDIF} +begin +{$IFDEF HASHLIB_X86_SIMD} + CpuIdQuery(1, 0, @LCpuId); + Result := (LCpuId.RegECX and (1 shl 1)) <> 0; +{$ELSE} + Result := False; +{$ENDIF} +end; + +class function TX86SimdFeatures.CPUHasVPCLMULQDQ(): Boolean; +{$IFDEF HASHLIB_X86_SIMD} +var + LCpuId: TCpuIdResult; +{$ENDIF} +begin +{$IFDEF HASHLIB_X86_SIMD} + CpuIdQuery(7, 0, @LCpuId); + Result := (LCpuId.RegECX and (1 shl 10)) <> 0; +{$ELSE} + Result := False; +{$ENDIF} +end; + +class procedure TX86SimdFeatures.ProbeHardwareAndCache(); +begin + FSimdLevel := TX86SimdLevel.Scalar; + FHasSHANI := False; + FHasPCLMULQDQ := False; + FHasVPCLMULQDQ := False; + + if CPUHasSSE2() then + begin + FSimdLevel := TX86SimdLevel.SSE2; + FHasPCLMULQDQ := CPUHasPCLMULQDQ(); + if CPUHasSSSE3() then + begin + FSimdLevel := TX86SimdLevel.SSSE3; + if CPUHasAVX2() then + begin + FSimdLevel := TX86SimdLevel.AVX2; + FHasVPCLMULQDQ := CPUHasVPCLMULQDQ(); + end; + end; + end; + + FHasSHANI := CPUHasSHANI(); +end; + +class procedure TX86SimdFeatures.ApplyBuildOverrides(); +begin +{$IF DEFINED(HASHLIB_FORCE_SCALAR)} + FSimdLevel := TX86SimdLevel.Scalar; + FHasSHANI := False; + FHasPCLMULQDQ := False; + FHasVPCLMULQDQ := False; +{$ELSEIF DEFINED(HASHLIB_FORCE_SSE2)} + if FSimdLevel > TX86SimdLevel.SSE2 then + FSimdLevel := TX86SimdLevel.SSE2; + FHasSHANI := False; + FHasPCLMULQDQ := False; + FHasVPCLMULQDQ := False; +{$ELSEIF DEFINED(HASHLIB_FORCE_SSSE3)} + if FSimdLevel > TX86SimdLevel.SSSE3 then + FSimdLevel := TX86SimdLevel.SSSE3; + FHasSHANI := False; + FHasPCLMULQDQ := False; + FHasVPCLMULQDQ := False; +{$IFEND} +end; + +class function TX86SimdFeatures.GetSimdLevel(): TX86SimdLevel; +begin + Result := FSimdLevel; +end; + +class function TX86SimdFeatures.HasSSE2(): Boolean; +begin + Result := FSimdLevel >= TX86SimdLevel.SSE2; +end; + +class function TX86SimdFeatures.HasSSSE3(): Boolean; +begin + Result := FSimdLevel >= TX86SimdLevel.SSSE3; +end; + +class function TX86SimdFeatures.HasAVX2(): Boolean; +begin + Result := FSimdLevel >= TX86SimdLevel.AVX2; +end; + +class function TX86SimdFeatures.HasSHANI(): Boolean; +begin + Result := FHasSHANI; +end; + +class function TX86SimdFeatures.HasPCLMULQDQ(): Boolean; +begin + Result := FHasPCLMULQDQ; +end; + +class function TX86SimdFeatures.HasVPCLMULQDQ(): Boolean; +begin + Result := FHasVPCLMULQDQ; +end; + +initialization + TX86SimdFeatures.ProbeHardwareAndCache(); + TX86SimdFeatures.ApplyBuildOverrides(); + +end. From 55710cbd715ff1520108ce4a80f92c81a1eccb7f Mon Sep 17 00:00:00 2001 From: Ugochukwu Mmaduekwe Date: Sat, 11 Apr 2026 02:37:02 +0100 Subject: [PATCH 2/4] Add AesNI check for X86 --- HashLib/src/Utils/HlpX86SimdFeatures.pas | 38 ++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 3 deletions(-) diff --git a/HashLib/src/Utils/HlpX86SimdFeatures.pas b/HashLib/src/Utils/HlpX86SimdFeatures.pas index f7e3f40..2eb7793 100644 --- a/HashLib/src/Utils/HlpX86SimdFeatures.pas +++ b/HashLib/src/Utils/HlpX86SimdFeatures.pas @@ -21,6 +21,7 @@ TCpuIdResult = record FHasSHANI: Boolean; FHasPCLMULQDQ: Boolean; FHasVPCLMULQDQ: Boolean; + FHasAESNI: Boolean; strict private class function CPUHasSSE2(): Boolean; static; @@ -29,6 +30,7 @@ TCpuIdResult = record class function CPUHasSHANI(): Boolean; static; class function CPUHasPCLMULQDQ(): Boolean; static; class function CPUHasVPCLMULQDQ(): Boolean; static; + class function CPUHasAESNI(): Boolean; static; private class procedure ProbeHardwareAndCache(); static; @@ -42,6 +44,7 @@ TCpuIdResult = record class function HasSHANI(): Boolean; static; class function HasPCLMULQDQ(): Boolean; static; class function HasVPCLMULQDQ(): Boolean; static; + class function HasAESNI(): Boolean; static; end; implementation @@ -82,6 +85,7 @@ class function TX86SimdFeatures.CPUHasSSSE3(): Boolean; begin {$IFDEF HASHLIB_X86_SIMD} CpuIdQuery(1, 0, @LCpuId); + // SSSE3: ECX bit 9 Result := (LCpuId.RegECX and (1 shl 9)) <> 0; {$ELSE} Result := False; @@ -98,18 +102,18 @@ class function TX86SimdFeatures.CPUHasAVX2(): Boolean; {$IFDEF HASHLIB_X86_SIMD} CpuIdQuery(1, 0, @LCpuId); - // Check OSXSAVE bit -- OS must support XSAVE/XRSTOR + // OSXSAVE: ECX bit 27 (required for OS AVX state saving) if (LCpuId.RegECX and (1 shl 27)) = 0 then Exit(False); - // Check XCR0 for SSE and AVX state saving + // XCR0 bits 1 and 2 must be set for AVX state support LXcr0 := 0; XGetBvQuery(@LXcr0); if (UInt32(LXcr0) and $06) <> $06 then Exit(False); - // Check AVX2 bit in structured extended feature flags CpuIdQuery(7, 0, @LCpuId); + // AVX2: EBX bit 5 Result := (LCpuId.RegEBX and (1 shl 5)) <> 0; {$ELSE} Result := False; @@ -124,6 +128,7 @@ class function TX86SimdFeatures.CPUHasSHANI(): Boolean; begin {$IFDEF HASHLIB_X86_SIMD} CpuIdQuery(7, 0, @LCpuId); + // SHA-NI: EBX bit 29 Result := (LCpuId.RegEBX and (1 shl 29)) <> 0; {$ELSE} Result := False; @@ -138,6 +143,7 @@ class function TX86SimdFeatures.CPUHasPCLMULQDQ(): Boolean; begin {$IFDEF HASHLIB_X86_SIMD} CpuIdQuery(1, 0, @LCpuId); + // PCLMULQDQ: ECX bit 1 Result := (LCpuId.RegECX and (1 shl 1)) <> 0; {$ELSE} Result := False; @@ -152,18 +158,35 @@ class function TX86SimdFeatures.CPUHasVPCLMULQDQ(): Boolean; begin {$IFDEF HASHLIB_X86_SIMD} CpuIdQuery(7, 0, @LCpuId); + // VPCLMULQDQ: ECX bit 10 Result := (LCpuId.RegECX and (1 shl 10)) <> 0; {$ELSE} Result := False; {$ENDIF} end; +class function TX86SimdFeatures.CPUHasAESNI(): Boolean; +{$IFDEF HASHLIB_X86_SIMD} +var + LCpuId: TCpuIdResult; +{$ENDIF} +begin +{$IFDEF HASHLIB_X86_SIMD} + CpuIdQuery(1, 0, @LCpuId); + // AES-NI: ECX bit 25 + Result := (LCpuId.RegECX and (1 shl 25)) <> 0; +{$ELSE} + Result := False; +{$ENDIF} +end; + class procedure TX86SimdFeatures.ProbeHardwareAndCache(); begin FSimdLevel := TX86SimdLevel.Scalar; FHasSHANI := False; FHasPCLMULQDQ := False; FHasVPCLMULQDQ := False; + FHasAESNI := False; if CPUHasSSE2() then begin @@ -181,6 +204,7 @@ class procedure TX86SimdFeatures.ProbeHardwareAndCache(); end; FHasSHANI := CPUHasSHANI(); + FHasAESNI := CPUHasAESNI(); end; class procedure TX86SimdFeatures.ApplyBuildOverrides(); @@ -190,18 +214,21 @@ class procedure TX86SimdFeatures.ApplyBuildOverrides(); FHasSHANI := False; FHasPCLMULQDQ := False; FHasVPCLMULQDQ := False; + FHasAESNI := False; {$ELSEIF DEFINED(HASHLIB_FORCE_SSE2)} if FSimdLevel > TX86SimdLevel.SSE2 then FSimdLevel := TX86SimdLevel.SSE2; FHasSHANI := False; FHasPCLMULQDQ := False; FHasVPCLMULQDQ := False; + FHasAESNI := False; {$ELSEIF DEFINED(HASHLIB_FORCE_SSSE3)} if FSimdLevel > TX86SimdLevel.SSSE3 then FSimdLevel := TX86SimdLevel.SSSE3; FHasSHANI := False; FHasPCLMULQDQ := False; FHasVPCLMULQDQ := False; + FHasAESNI := False; {$IFEND} end; @@ -240,6 +267,11 @@ class function TX86SimdFeatures.HasVPCLMULQDQ(): Boolean; Result := FHasVPCLMULQDQ; end; +class function TX86SimdFeatures.HasAESNI(): Boolean; +begin + Result := FHasAESNI; +end; + initialization TX86SimdFeatures.ProbeHardwareAndCache(); TX86SimdFeatures.ApplyBuildOverrides(); From 8b59c301922ee7c91f7298641e6c9e6bbef7c762 Mon Sep 17 00:00:00 2001 From: Ugochukwu Mmaduekwe Date: Sat, 11 Apr 2026 23:19:13 +0100 Subject: [PATCH 3/4] update defines --- HashLib/src/Include/HashLib.inc | 4 ---- HashLib/src/Include/HashLibFPC.inc | 16 ++++++---------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/HashLib/src/Include/HashLib.inc b/HashLib/src/Include/HashLib.inc index 51c207d..964626f 100644 --- a/HashLib/src/Include/HashLib.inc +++ b/HashLib/src/Include/HashLib.inc @@ -85,10 +85,6 @@ {$ENDIF} {$ENDIF} -{$IF DEFINED(HASHLIB_IOS) OR DEFINED(HASHLIB_MACOS)} - {$DEFINE HASHLIB_APPLE} -{$IFEND} - {$IFDEF ANDROID} {$DEFINE HASHLIB_ANDROID} {$ENDIF} diff --git a/HashLib/src/Include/HashLibFPC.inc b/HashLib/src/Include/HashLibFPC.inc index a60a506..dbdd893 100644 --- a/HashLib/src/Include/HashLibFPC.inc +++ b/HashLib/src/Include/HashLibFPC.inc @@ -64,6 +64,10 @@ {$DEFINE HASHLIB_MSWINDOWS} {$ENDIF} +{$IFDEF ANDROID} + {$DEFINE HASHLIB_ANDROID} +{$ENDIF} + {$IFDEF IOS} {$DEFINE HASHLIB_IOS} {$ENDIF} @@ -72,22 +76,14 @@ {$DEFINE HASHLIB_MACOS} {$IFEND} -{$IF DEFINED(HASHLIB_IOS) OR DEFINED(HASHLIB_MACOS)} - {$DEFINE HASHLIB_APPLE} +{$IF DEFINED(FREEBSD) OR DEFINED(NETBSD) OR DEFINED(OPENBSD) OR DEFINED(DRAGONFLY)} + {$DEFINE HASHLIB_BSD} {$IFEND} -{$IFDEF ANDROID} - {$DEFINE HASHLIB_ANDROID} -{$ENDIF} - {$IFDEF LINUX} {$DEFINE HASHLIB_LINUX} {$ENDIF} -{$IF DEFINED(FREEBSD) OR DEFINED(NETBSD) OR DEFINED(OPENBSD) OR DEFINED(DRAGONFLY)} - {$DEFINE HASHLIB_BSD} -{$IFEND} - {$IFDEF SOLARIS} {$DEFINE HASHLIB_SOLARIS} {$ENDIF} From d99c752b05f6efb5fd7036cb4b35d68688420f22 Mon Sep 17 00:00:00 2001 From: Ugochukwu Mmaduekwe Date: Sat, 11 Apr 2026 23:39:23 +0100 Subject: [PATCH 4/4] update defines --- HashLib/src/Include/HashLib.inc | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/HashLib/src/Include/HashLib.inc b/HashLib/src/Include/HashLib.inc index 964626f..fd9bced 100644 --- a/HashLib/src/Include/HashLib.inc +++ b/HashLib/src/Include/HashLib.inc @@ -119,25 +119,26 @@ // Uncomment ONE to force a specific X86 SIMD dispatch level: // {$DEFINE HASHLIB_FORCE_SSE2} // {$DEFINE HASHLIB_FORCE_SSSE3} + +{$IF (DEFINED(HASHLIB_FORCE_SCALAR) AND DEFINED(HASHLIB_FORCE_SSE2)) + OR (DEFINED(HASHLIB_FORCE_SCALAR) AND DEFINED(HASHLIB_FORCE_SSSE3)) + OR (DEFINED(HASHLIB_FORCE_SSE2) AND DEFINED(HASHLIB_FORCE_SSSE3))} + {$MESSAGE ERROR 'Only one HASHLIB_FORCE_* define may be enabled at a time.'} +{$IFEND} + {$ENDIF} {$IFDEF HASHLIB_ARM_SIMD} // Uncomment ONE to force a specific Arm SIMD dispatch level: // {$DEFINE HASHLIB_FORCE_NEON} // {$DEFINE HASHLIB_FORCE_SVE} -{$ENDIF} -{$IF (DEFINED(HASHLIB_FORCE_SCALAR) AND DEFINED(HASHLIB_FORCE_SSE2)) - OR (DEFINED(HASHLIB_FORCE_SCALAR) AND DEFINED(HASHLIB_FORCE_SSSE3)) - OR (DEFINED(HASHLIB_FORCE_SSE2) AND DEFINED(HASHLIB_FORCE_SSSE3)) - OR (DEFINED(HASHLIB_FORCE_SCALAR) AND DEFINED(HASHLIB_FORCE_NEON)) +{$IF (DEFINED(HASHLIB_FORCE_SCALAR) AND DEFINED(HASHLIB_FORCE_NEON)) OR (DEFINED(HASHLIB_FORCE_SCALAR) AND DEFINED(HASHLIB_FORCE_SVE)) - OR (DEFINED(HASHLIB_FORCE_NEON) AND DEFINED(HASHLIB_FORCE_SVE)) - OR (DEFINED(HASHLIB_FORCE_SSE2) AND DEFINED(HASHLIB_FORCE_NEON)) - OR (DEFINED(HASHLIB_FORCE_SSE2) AND DEFINED(HASHLIB_FORCE_SVE)) - OR (DEFINED(HASHLIB_FORCE_SSSE3) AND DEFINED(HASHLIB_FORCE_NEON)) - OR (DEFINED(HASHLIB_FORCE_SSSE3) AND DEFINED(HASHLIB_FORCE_SVE))} + OR (DEFINED(HASHLIB_FORCE_NEON) AND DEFINED(HASHLIB_FORCE_SVE))} {$MESSAGE ERROR 'Only one HASHLIB_FORCE_* define may be enabled at a time.'} {$IFEND} +{$ENDIF} + (* &&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&& *)