@@ -141,8 +141,23 @@ sinm_composite(const uint32_t *in1, const uint32_t *in2, uint32_t *out, int32_t
141141#define simd__andnot_ps (a , b ) simd_prefix_float(andnot_ps(a, b))
142142#define simd__add_epi32 (a , b ) simd_prefix_float(add_epi32(a, b))
143143#define simd__sub_epi32 (a , b ) simd_prefix_float(sub_epi32(a, b))
144+
145+ #if defined(__AVX__ ) || defined(__SSE4_1__ )
144146#define simd__max_epi32 (a , b ) simd_prefix_float(max_epi32(a, b))
145147#define simd__min_epi32 (a , b ) simd_prefix_float(min_epi32(a, b))
148+ #else
149+ static sinm__inline __m128i sinm__sse2_max_epi32 (__m128i a , __m128i b ) {
150+ __m128i mask = _mm_cmpgt_epi32 (a , b );
151+ return _mm_or_si128 (_mm_and_si128 (mask , a ), _mm_andnot_si128 (mask , b ));
152+ }
153+ static sinm__inline __m128i sinm__sse2_min_epi32 (__m128i a , __m128i b ) {
154+ __m128i mask = _mm_cmpgt_epi32 (b , a );
155+ return _mm_or_si128 (_mm_and_si128 (mask , a ), _mm_andnot_si128 (mask , b ));
156+ }
157+ #define simd__max_epi32 (a , b ) sinm__sse2_max_epi32(a, b)
158+ #define simd__min_epi32 (a , b ) sinm__sse2_min_epi32(a, b)
159+ #endif
160+
146161#define simd__loadu_ps (a ) simd_prefix_float(loadu_ps(a))
147162#define simd__srli_epi32 (a , i ) simd_prefix_float(srli_epi32(a, i))
148163#define simd__slli_epi32 (a , i ) simd_prefix_float(slli_epi32(a, i))
0 commit comments