From e80c786ca7a7f77f44263932a94d37ee9cbaedbc Mon Sep 17 00:00:00 2001 From: "Sergey \"Shnatsel\" Davidoff" Date: Wed, 27 May 2026 21:00:15 +0100 Subject: [PATCH] Replace remaining NEON and WASM unsafe loads with checked_transmute_copy --- fearless_simd/src/generated/neon.rs | 27 ++++++++++------ fearless_simd/src/transmute.rs | 2 ++ fearless_simd_gen/src/mk_neon.rs | 48 +++++++++++++++++++++-------- 3 files changed, 55 insertions(+), 22 deletions(-) diff --git a/fearless_simd/src/generated/neon.rs b/fearless_simd/src/generated/neon.rs index b848f69cd..f89f760f7 100644 --- a/fearless_simd/src/generated/neon.rs +++ b/fearless_simd/src/generated/neon.rs @@ -780,7 +780,9 @@ impl Simd for Neon { #[inline(always)] fn from_bitmask_mask8x16(self, bits: u64) -> mask8x16 { unsafe { - let shifts = vld1q_s16([15, 14, 13, 12, 11, 10, 9, 8].as_ptr()); + let shifts = crate::transmute::checked_transmute_copy::<[i16; 8], int16x8_t>(&[ + 15, 14, 13, 12, 11, 10, 9, 8, + ]); let lo = vshlq_u16(vdupq_n_u16(bits as u16), shifts); let hi = vshlq_u16(vdupq_n_u16((bits >> 8) as u16), shifts); let lo = vcltq_s16(vreinterpretq_s16_u16(lo), vdupq_n_s16(0)); @@ -795,8 +797,9 @@ impl Simd for Neon { #[inline(always)] fn to_bitmask_mask8x16(self, a: mask8x16) -> u64 { unsafe { - let weights = - vld1q_u8([1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128].as_ptr()); + let weights = crate::transmute::checked_transmute_copy::<[u8; 16], uint8x16_t>(&[ + 1, 2, 4, 8, 16, 32, 64, 128, 1, 2, 4, 8, 16, 32, 64, 128, + ]); let bits = vandq_u8(vreinterpretq_u8_s8(a.into()), weights); let lo = vaddv_u8(vget_low_u8(bits)) as u64; let hi = vaddv_u8(vget_high_u8(bits)) as u64; @@ -1284,7 +1287,9 @@ impl Simd for Neon { #[inline(always)] fn from_bitmask_mask16x8(self, bits: u64) -> mask16x8 { unsafe { - let shifts = vld1q_s16([15, 14, 13, 12, 11, 10, 9, 8].as_ptr()); + let shifts = crate::transmute::checked_transmute_copy::<[i16; 8], int16x8_t>(&[ + 15, 14, 13, 12, 11, 10, 9, 8, + ]); let shifted = vshlq_u16(vdupq_n_u16(bits as u16), shifts); let mask = vcltq_s16(vreinterpretq_s16_u16(shifted), vdupq_n_s16(0)); vreinterpretq_s16_u16(mask).simd_into(self) @@ -1293,7 +1298,9 @@ impl Simd for Neon { #[inline(always)] fn to_bitmask_mask16x8(self, a: mask16x8) -> u64 { unsafe { - let weights = vld1q_u16([1, 2, 4, 8, 16, 32, 64, 128].as_ptr()); + let weights = crate::transmute::checked_transmute_copy::<[u16; 8], uint16x8_t>(&[ + 1, 2, 4, 8, 16, 32, 64, 128, + ]); let bits = vandq_u16(vreinterpretq_u16_s16(a.into()), weights); vaddvq_u16(bits) as u64 } @@ -1783,7 +1790,8 @@ impl Simd for Neon { #[inline(always)] fn from_bitmask_mask32x4(self, bits: u64) -> mask32x4 { unsafe { - let shifts = vld1q_s32([31, 30, 29, 28].as_ptr()); + let shifts = + crate::transmute::checked_transmute_copy::<[i32; 4], int32x4_t>(&[31, 30, 29, 28]); let shifted = vshlq_u32(vdupq_n_u32(bits as u32), shifts); let mask = vcltq_s32(vreinterpretq_s32_u32(shifted), vdupq_n_s32(0)); vreinterpretq_s32_u32(mask).simd_into(self) @@ -1792,7 +1800,8 @@ impl Simd for Neon { #[inline(always)] fn to_bitmask_mask32x4(self, a: mask32x4) -> u64 { unsafe { - let weights = vld1q_u32([1, 2, 4, 8].as_ptr()); + let weights = + crate::transmute::checked_transmute_copy::<[u32; 4], uint32x4_t>(&[1, 2, 4, 8]); let bits = vandq_u32(vreinterpretq_u32_s32(a.into()), weights); vaddvq_u32(bits) as u64 } @@ -2103,7 +2112,7 @@ impl Simd for Neon { #[inline(always)] fn from_bitmask_mask64x2(self, bits: u64) -> mask64x2 { unsafe { - let shifts = vld1q_s64([63, 62].as_ptr()); + let shifts = crate::transmute::checked_transmute_copy::<[i64; 2], int64x2_t>(&[63, 62]); let shifted = vshlq_u64(vdupq_n_u64(bits), shifts); let mask = vcltq_s64(vreinterpretq_s64_u64(shifted), vdupq_n_s64(0)); vreinterpretq_s64_u64(mask).simd_into(self) @@ -2112,7 +2121,7 @@ impl Simd for Neon { #[inline(always)] fn to_bitmask_mask64x2(self, a: mask64x2) -> u64 { unsafe { - let weights = vld1q_u64([1, 2].as_ptr()); + let weights = crate::transmute::checked_transmute_copy::<[u64; 2], uint64x2_t>(&[1, 2]); let bits = vandq_u64(vreinterpretq_u64_s64(a.into()), weights); vaddvq_u64(bits) } diff --git a/fearless_simd/src/transmute.rs b/fearless_simd/src/transmute.rs index 8f8315283..894d672d7 100644 --- a/fearless_simd/src/transmute.rs +++ b/fearless_simd/src/transmute.rs @@ -16,6 +16,7 @@ use core::arch::aarch64::{ int8x16_t, int8x16x2_t, int8x16x4_t, int16x8_t, int16x8x2_t, int16x8x4_t, int32x4_t, int32x4x2_t, int32x4x4_t, int64x2_t, int64x2x2_t, int64x2x4_t, uint8x16_t, uint8x16x2_t, uint8x16x4_t, uint16x8_t, uint16x8x2_t, uint16x8x4_t, uint32x4_t, uint32x4x2_t, uint32x4x4_t, + uint64x2_t, }; #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] use core::arch::wasm32::v128; @@ -160,6 +161,7 @@ const _: () = { unsafe impl SimdPod for uint32x4_t {} unsafe impl SimdPod for uint32x4x2_t {} unsafe impl SimdPod for uint32x4x4_t {} + unsafe impl SimdPod for uint64x2_t {} }; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] diff --git a/fearless_simd_gen/src/mk_neon.rs b/fearless_simd_gen/src/mk_neon.rs index 9765c06df..129d4052f 100644 --- a/fearless_simd_gen/src/mk_neon.rs +++ b/fearless_simd_gen/src/mk_neon.rs @@ -580,7 +580,10 @@ impl Neon { 8 => quote! { #method_sig { unsafe { - let shifts = vld1q_s16([15, 14, 13, 12, 11, 10, 9, 8].as_ptr()); + let shifts = + crate::transmute::checked_transmute_copy::<[i16; 8], int16x8_t>( + &[15, 14, 13, 12, 11, 10, 9, 8], + ); let lo = vshlq_u16(vdupq_n_u16(bits as u16), shifts); let hi = vshlq_u16(vdupq_n_u16((bits >> 8) as u16), shifts); let lo = vcltq_s16(vreinterpretq_s16_u16(lo), vdupq_n_s16(0)); @@ -595,7 +598,10 @@ impl Neon { 16 => quote! { #method_sig { unsafe { - let shifts = vld1q_s16([15, 14, 13, 12, 11, 10, 9, 8].as_ptr()); + let shifts = + crate::transmute::checked_transmute_copy::<[i16; 8], int16x8_t>( + &[15, 14, 13, 12, 11, 10, 9, 8], + ); let shifted = vshlq_u16(vdupq_n_u16(bits as u16), shifts); let mask = vcltq_s16(vreinterpretq_s16_u16(shifted), vdupq_n_s16(0)); vreinterpretq_s16_u16(mask).simd_into(self) @@ -605,7 +611,10 @@ impl Neon { 32 => quote! { #method_sig { unsafe { - let shifts = vld1q_s32([31, 30, 29, 28].as_ptr()); + let shifts = + crate::transmute::checked_transmute_copy::<[i32; 4], int32x4_t>( + &[31, 30, 29, 28], + ); let shifted = vshlq_u32(vdupq_n_u32(bits as u32), shifts); let mask = vcltq_s32(vreinterpretq_s32_u32(shifted), vdupq_n_s32(0)); vreinterpretq_s32_u32(mask).simd_into(self) @@ -615,7 +624,10 @@ impl Neon { 64 => quote! { #method_sig { unsafe { - let shifts = vld1q_s64([63, 62].as_ptr()); + let shifts = + crate::transmute::checked_transmute_copy::<[i64; 2], int64x2_t>( + &[63, 62], + ); let shifted = vshlq_u64(vdupq_n_u64(bits), shifts); let mask = vcltq_s64(vreinterpretq_s64_u64(shifted), vdupq_n_s64(0)); vreinterpretq_s64_u64(mask).simd_into(self) @@ -642,10 +654,13 @@ impl Neon { 8 => quote! { #method_sig { unsafe { - let weights = vld1q_u8([ - 1, 2, 4, 8, 16, 32, 64, 128, - 1, 2, 4, 8, 16, 32, 64, 128, - ].as_ptr()); + let weights = + crate::transmute::checked_transmute_copy::<[u8; 16], uint8x16_t>( + &[ + 1, 2, 4, 8, 16, 32, 64, 128, + 1, 2, 4, 8, 16, 32, 64, 128, + ], + ); let bits = vandq_u8(vreinterpretq_u8_s8(a.into()), weights); let lo = vaddv_u8(vget_low_u8(bits)) as u64; let hi = vaddv_u8(vget_high_u8(bits)) as u64; @@ -656,9 +671,10 @@ impl Neon { 16 => quote! { #method_sig { unsafe { - let weights = vld1q_u16([ - 1, 2, 4, 8, 16, 32, 64, 128, - ].as_ptr()); + let weights = + crate::transmute::checked_transmute_copy::<[u16; 8], uint16x8_t>( + &[1, 2, 4, 8, 16, 32, 64, 128], + ); let bits = vandq_u16(vreinterpretq_u16_s16(a.into()), weights); vaddvq_u16(bits) as u64 } @@ -667,7 +683,10 @@ impl Neon { 32 => quote! { #method_sig { unsafe { - let weights = vld1q_u32([1, 2, 4, 8].as_ptr()); + let weights = + crate::transmute::checked_transmute_copy::<[u32; 4], uint32x4_t>( + &[1, 2, 4, 8], + ); let bits = vandq_u32(vreinterpretq_u32_s32(a.into()), weights); vaddvq_u32(bits) as u64 } @@ -676,7 +695,10 @@ impl Neon { 64 => quote! { #method_sig { unsafe { - let weights = vld1q_u64([1, 2].as_ptr()); + let weights = + crate::transmute::checked_transmute_copy::<[u64; 2], uint64x2_t>( + &[1, 2], + ); let bits = vandq_u64(vreinterpretq_u64_s64(a.into()), weights); vaddvq_u64(bits) }