From f05d6a70801f1fc9dab28a62c779d411e8a095cc Mon Sep 17 00:00:00 2001 From: zhyajie Date: Tue, 24 Feb 2026 07:39:46 +0000 Subject: [PATCH] Fix uninitialized tmp in float_to_bf16_rtn_asm() causing wrong results under -O3 Initialize `tmp` to 0 in the inline assembly of `float_to_bf16_rtn_asm()`. Without initialization, the compiler under -O3 may alias the `tmp` operand (%1) with the ROUND_BIAS_FOR_BF16 input operand (%3) in the same VGPR, causing v_bfe_u32 to overwrite the 0x7fff bias before v_add3_u32 reads it. This produces incorrect BF16 rounding for ~50% of inputs. --- include/ck_tile/core/numeric/bfloat16.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ck_tile/core/numeric/bfloat16.hpp b/include/ck_tile/core/numeric/bfloat16.hpp index e193c58915f..32f45699d63 100644 --- a/include/ck_tile/core/numeric/bfloat16.hpp +++ b/include/ck_tile/core/numeric/bfloat16.hpp @@ -174,7 +174,7 @@ uint16_t float_to_bf16_rtn_asm(float f) #else uint32_t check_nan; #endif - uint32_t tmp; + uint32_t tmp = 0; asm volatile("\n \ v_cmp_u_f32 %0, %2, %2 \n \ v_bfe_u32 %1, %2, 16, 1 \n \