diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 6806acbc965..f87a188ebf8 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -8441,6 +8441,7 @@ static void GMULT(byte *x, byte m[256][WC_AES_BLOCK_SIZE]) XMEMCPY(x, Z, WC_AES_BLOCK_SIZE); #elif defined(WC_32BIT_CPU) +#ifndef WOLFSSL_USE_ALIGN byte Z[WC_AES_BLOCK_SIZE + WC_AES_BLOCK_SIZE]; byte a; word32* pZ; @@ -8472,6 +8473,24 @@ static void GMULT(byte *x, byte m[256][WC_AES_BLOCK_SIZE]) pm = (word32*)(m[x[0]]); px[0] = pZ[0] ^ pm[0]; px[1] = pZ[1] ^ pm[1]; px[2] = pZ[2] ^ pm[2]; px[3] = pZ[3] ^ pm[3]; +#else + byte Z[WC_AES_BLOCK_SIZE + WC_AES_BLOCK_SIZE]; + byte a; + int i; + + XMEMCPY(Z + 16, m[x[15]], WC_AES_BLOCK_SIZE); + a = Z[16 + 15]; + Z[15] = R[a][0]; + Z[16] ^= R[a][1]; + for (i = 14; i > 0; i--) { + xorbuf(Z + i + 1, m[x[i]], WC_AES_BLOCK_SIZE); + a = Z[16 + i]; + Z[i] = R[a][0]; + Z[i+1] ^= R[a][1]; + } + xorbuf(Z + 1, m[x[0]], WC_AES_BLOCK_SIZE); + XMEMCPY(x, Z + 1, WC_AES_BLOCK_SIZE); +#endif #else byte Z[WC_AES_BLOCK_SIZE + WC_AES_BLOCK_SIZE]; byte a; diff --git a/wolfcrypt/src/memory.c b/wolfcrypt/src/memory.c index 8d416e78470..93b0bd0d126 100644 --- a/wolfcrypt/src/memory.c +++ b/wolfcrypt/src/memory.c @@ -239,8 +239,8 @@ void wc_MemZero_Free(void) int i; fprintf(stderr, "[MEM_ZERO] Unseen: %d\n", nextIdx); for (i = 0; i < nextIdx; i++) { - fprintf(stderr, " %s - %p:%ld\n", memZero[i].name, memZero[i].addr, - memZero[i].len); + fprintf(stderr, " %s - %p:%lu\n", memZero[i].name, memZero[i].addr, + (unsigned long)memZero[i].len); } } /* Uninitialized value in next index. */ @@ -301,9 +301,10 @@ void wc_MemZero_Check(void* addr, size_t len) for (j = 0; j < memZero[i].len; j++) { if (((unsigned char*)memZero[i].addr)[j] != 0) { /* Byte not zero - abort! */ - fprintf(stderr, "\n[MEM_ZERO] %s:%p + %ld is not zero\n", - memZero[i].name, memZero[i].addr, j); - fprintf(stderr, "[MEM_ZERO] Checking %p:%ld\n", addr, len); + fprintf(stderr, "\n[MEM_ZERO] %s:%p + %lu is not zero\n", + memZero[i].name, memZero[i].addr, (unsigned long)j); + fprintf(stderr, "[MEM_ZERO] Checking %p:%lu\n", addr, + (unsigned long)len); #ifndef TEST_ALWAYS_RUN_TO_END abort(); #endif diff --git a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c index 726c02905f9..a758fbd3ba6 100644 --- a/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-32-curve25519_c.c @@ -59,9 +59,9 @@ #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_init() +WC_OMIT_FRAME_POINTER void fe_init(void) #else -WC_OMIT_FRAME_POINTER void fe_init() +WC_OMIT_FRAME_POINTER void fe_init(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -81,9 +81,9 @@ WC_OMIT_FRAME_POINTER void fe_init() void fe_add_sub_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_add_sub_op() +WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) #else -WC_OMIT_FRAME_POINTER void fe_add_sub_op() +WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -300,9 +300,9 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op() void fe_sub_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sub_op() +WC_OMIT_FRAME_POINTER void fe_sub_op(void) #else -WC_OMIT_FRAME_POINTER void fe_sub_op() +WC_OMIT_FRAME_POINTER void fe_sub_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -379,9 +379,9 @@ WC_OMIT_FRAME_POINTER void fe_sub(fe r, const fe a, const fe b) void fe_add_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_add_op() +WC_OMIT_FRAME_POINTER void fe_add_op(void) #else -WC_OMIT_FRAME_POINTER void fe_add_op() +WC_OMIT_FRAME_POINTER void fe_add_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -2510,9 +2510,9 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) void fe_mul_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_mul_op() +WC_OMIT_FRAME_POINTER void fe_mul_op(void) #else -WC_OMIT_FRAME_POINTER void fe_mul_op() +WC_OMIT_FRAME_POINTER void fe_mul_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -2905,9 +2905,9 @@ WC_OMIT_FRAME_POINTER void fe_mul_op() #else void fe_mul_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_mul_op() +WC_OMIT_FRAME_POINTER void fe_mul_op(void) #else -WC_OMIT_FRAME_POINTER void fe_mul_op() +WC_OMIT_FRAME_POINTER void fe_mul_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -3086,9 +3086,9 @@ WC_OMIT_FRAME_POINTER void fe_mul(fe r, const fe a, const fe b) #if defined(WOLFSSL_ARM_ARCH) && (WOLFSSL_ARM_ARCH < 6) void fe_sq_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sq_op() +WC_OMIT_FRAME_POINTER void fe_sq_op(void) #else -WC_OMIT_FRAME_POINTER void fe_sq_op() +WC_OMIT_FRAME_POINTER void fe_sq_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -3374,9 +3374,9 @@ WC_OMIT_FRAME_POINTER void fe_sq_op() #else void fe_sq_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sq_op() +WC_OMIT_FRAME_POINTER void fe_sq_op(void) #else -WC_OMIT_FRAME_POINTER void fe_sq_op() +WC_OMIT_FRAME_POINTER void fe_sq_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG diff --git a/wolfcrypt/src/port/arm/armv8-aes-asm.S b/wolfcrypt/src/port/arm/armv8-aes-asm.S index fa48e67b178..7a3d2ef74bc 100644 --- a/wolfcrypt/src/port/arm/armv8-aes-asm.S +++ b/wolfcrypt/src/port/arm/armv8-aes-asm.S @@ -5736,7 +5736,7 @@ L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done: eor x14, x14, x14 - lsl x24, x4, #3 + ubfiz x24, x4, #3, #32 mov v28.d[0], x14 mov v28.d[1], x24 rev64 v28.16b, v28.16b @@ -7099,10 +7099,10 @@ L_aes_gcm_encrypt_arm64_crypto_192_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_192_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -8637,10 +8637,10 @@ L_aes_gcm_encrypt_arm64_crypto_256_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_256_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 aese v14.16b, v0.16b @@ -9944,10 +9944,10 @@ L_aes_gcm_encrypt_arm64_crypto_128_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_128_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -10537,7 +10537,7 @@ L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes: # Done GHASH L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done: eor x14, x14, x14 - lsl x24, x4, #3 + ubfiz x24, x4, #3, #32 mov v28.d[0], x14 mov v28.d[1], x24 rev64 v28.16b, v28.16b @@ -11893,10 +11893,10 @@ L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_192_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -11945,6 +11945,7 @@ L_aes_gcm_decrypt_arm64_crypto_192_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded L_aes_gcm_decrypt_arm64_crypto_192_part_tag: + ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -13446,10 +13447,10 @@ L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_256_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 aese v14.16b, v0.16b @@ -13506,6 +13507,7 @@ L_aes_gcm_decrypt_arm64_crypto_256_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded L_aes_gcm_decrypt_arm64_crypto_256_part_tag: + ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -14768,10 +14770,10 @@ L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_128_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -14816,6 +14818,7 @@ L_aes_gcm_decrypt_arm64_crypto_128_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded L_aes_gcm_decrypt_arm64_crypto_128_part_tag: + ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -15364,7 +15367,7 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done: eor x14, x14, x14 - lsl x24, x4, #3 + ubfiz x24, x4, #3, #32 mov v28.d[0], x14 mov v28.d[1], x24 rev64 v28.16b, v28.16b @@ -16698,10 +16701,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -18207,10 +18210,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 aese v14.16b, v0.16b @@ -19485,10 +19488,10 @@ L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero: # Done GHASH L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -20056,7 +20059,7 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes: # Done GHASH L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done: eor x14, x14, x14 - lsl x24, x4, #3 + ubfiz x24, x4, #3, #32 mov v28.d[0], x14 mov v28.d[1], x24 rev64 v28.16b, v28.16b @@ -21383,10 +21386,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -21434,6 +21437,7 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag: + ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -22907,10 +22911,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 aese v14.16b, v0.16b @@ -22966,6 +22970,7 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag: + ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -24200,10 +24205,10 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte: L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes: L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done: ld1 {v14.2d}, [x12] - lsl x8, x8, #3 + ubfiz x8, x8, #3, #32 rbit x8, x8 mov v28.d[0], x8 - lsl x2, x2, #3 + ubfiz x2, x2, #3, #32 rbit x2, x2 mov v28.d[1], x2 eor v26.16b, v26.16b, v28.16b @@ -24247,6 +24252,7 @@ L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done: ld1 {v28.16b}, [x5] b L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag: + ubfiz x6, x6, #0, #32 eor v28.16b, v28.16b, v28.16b mov x17, x6 st1 {v28.2d}, [x11] @@ -24420,7 +24426,7 @@ L_aes_gcm_init_arm64_crypto_end_bytes: # Done GHASH L_aes_gcm_init_arm64_crypto_partial_done: eor x7, x7, x7 - lsl x13, x3, #3 + ubfiz x13, x3, #3, #32 mov v7.d[0], x7 mov v7.d[1], x13 rev64 v7.16b, v7.16b @@ -28782,10 +28788,10 @@ _AES_GCM_encrypt_final_AARCH64: ld1 {v4.2d}, [x5] ushr v6.2d, v6.2d, #56 ld1 {v7.2d}, [x6] - lsl x4, x4, #3 + ubfiz x4, x4, #3, #32 rbit x4, x4 mov v0.d[0], x4 - lsl x3, x3, #3 + ubfiz x3, x3, #3, #32 rbit x3, x3 mov v0.d[1], x3 eor v5.16b, v5.16b, v0.16b @@ -32668,10 +32674,10 @@ _AES_GCM_decrypt_final_AARCH64: ld1 {v4.2d}, [x5] ushr v6.2d, v6.2d, #56 ld1 {v7.2d}, [x6] - lsl x4, x4, #3 + ubfiz x4, x4, #3, #32 rbit x4, x4 mov v0.d[0], x4 - lsl x3, x3, #3 + ubfiz x3, x3, #3, #32 rbit x3, x3 mov v0.d[1], x3 eor v5.16b, v5.16b, v0.16b @@ -32696,6 +32702,7 @@ _AES_GCM_decrypt_final_AARCH64: ld1 {v0.16b}, [x1] b L_aes_gcm_decrypt_final_arm64_crypto_tag_loaded L_aes_gcm_decrypt_final_arm64_crypto_part_tag: + ubfiz x2, x2, #0, #32 eor v0.16b, v0.16b, v0.16b mov x10, x2 st1 {v0.2d}, [x0] @@ -32856,7 +32863,7 @@ L_aes_gcm_init_arm64_crypto_eor3_end_bytes: # Done GHASH L_aes_gcm_init_arm64_crypto_eor3_partial_done: eor x7, x7, x7 - lsl x13, x3, #3 + ubfiz x13, x3, #3, #32 mov v7.d[0], x7 mov v7.d[1], x13 rev64 v7.16b, v7.16b @@ -37114,10 +37121,10 @@ _AES_GCM_encrypt_final_AARCH64_EOR3: ld1 {v4.2d}, [x5] ushr v6.2d, v6.2d, #56 ld1 {v7.2d}, [x6] - lsl x4, x4, #3 + ubfiz x4, x4, #3, #32 rbit x4, x4 mov v0.d[0], x4 - lsl x3, x3, #3 + ubfiz x3, x3, #3, #32 rbit x3, x3 mov v0.d[1], x3 eor v5.16b, v5.16b, v0.16b @@ -40915,10 +40922,10 @@ _AES_GCM_decrypt_final_AARCH64_EOR3: ld1 {v4.2d}, [x5] ushr v6.2d, v6.2d, #56 ld1 {v7.2d}, [x6] - lsl x4, x4, #3 + ubfiz x4, x4, #3, #32 rbit x4, x4 mov v0.d[0], x4 - lsl x3, x3, #3 + ubfiz x3, x3, #3, #32 rbit x3, x3 mov v0.d[1], x3 eor v5.16b, v5.16b, v0.16b @@ -40942,6 +40949,7 @@ _AES_GCM_decrypt_final_AARCH64_EOR3: ld1 {v0.16b}, [x1] b L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_loaded L_aes_gcm_decrypt_final_arm64_crypto_eor3_part_tag: + ubfiz x2, x2, #0, #32 eor v0.16b, v0.16b, v0.16b mov x10, x2 st1 {v0.2d}, [x0] diff --git a/wolfcrypt/src/port/arm/armv8-aes-asm_c.c b/wolfcrypt/src/port/arm/armv8-aes-asm_c.c index 7c6e43e9729..291d3d1214c 100644 --- a/wolfcrypt/src/port/arm/armv8-aes-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-aes-asm_c.c @@ -3540,8 +3540,6 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, byte* key, byte* tmp, word32* left, word32 nr) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" "ld1 {v15.2d}, [%x[reg]]\n\t" @@ -5191,7 +5189,6 @@ void AES_CTR_encrypt_AARCH64(const byte* in, byte* out, word32 sz, byte* reg, "rev x11, x10\n\t" "rev x12, x9\n\t" "stp x11, x12, [%x[reg]]\n\t" - "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [reg] "+r" (reg), [key] "+r" (key), [tmp] "+r" (tmp), [left] "+r" (left), [nr] "+r" (nr) : [in] "r" (in) @@ -5264,19 +5261,11 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-80]!\n\t" - "add x29, sp, #0\n\t" - "str %w[nr], [sp, #72]\n\t" - "str %x[reg], [sp, #64]\n\t" - "str %x[tmp], [sp, #56]\n\t" - "str %x[gcm_h], [sp, #48]\n\t" - "str %x[key], [sp, #40]\n\t" - "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" - "ld1 {v22.2d}, [x10]\n\t" - "cmp w8, #0x40\n\t" + "ld1 {v22.2d}, [%x[gcm_h]]\n\t" + "cmp %w[aadSz], #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" @@ -5291,7 +5280,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" - "cmp w8, #0x100\n\t" + "cmp %w[aadSz], #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" @@ -5322,7 +5311,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ - "cmp w8, #0x400\n\t" + "cmp %w[aadSz], #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" @@ -5379,7 +5368,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done */ "\n" "L_aes_gcm_encrypt_arm64_crypto_h_done_%=:\n\t" - "lsr w14, w8, #4\n\t" + "lsr w14, %w[aadSz], #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_1_%=\n\t" "cmp w14, #16\n\t" @@ -5617,41 +5606,41 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.ne L_aes_gcm_encrypt_arm64_crypto_aad_both_1_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_done_%=:\n\t" - "and w14, w8, #15\n\t" + "and w14, %w[aadSz], #15\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [x11], #8\n\t" + "str x19, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [x11], #4\n\t" + "str w19, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [x11], #2\n\t" + "strh w19, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [x11], #1\n\t" + "strb w19, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_aad_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v18.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v18.2d}, [%x[tmp]]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -5716,37 +5705,37 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "cbz x24, L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [x11], #8\n\t" + "str x19, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [x11], #4\n\t" + "str w19, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [x11], #2\n\t" + "strh w19, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [x11], #1\n\t" + "strb w19, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_end_bytes_%=:\n\t" - "sub x11, x11, x24\n\t" - "ld1 {v18.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x24\n\t" + "ld1 {v18.2d}, [%x[tmp]]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -5768,7 +5757,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" - "lsl x24, %x[nonceSz], #3\n\t" + "ubfiz x24, %x[nonceSz], #3, #32\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" @@ -5792,9 +5781,9 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "rev w15, w15\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_done_nonce_%=:\n\t" - "st1 {v13.2d}, [x12]\n\t" + "st1 {v13.2d}, [%x[reg]]\n\t" "lsr w14, %w[sz], #4\n\t" - "cmp w13, #12\n\t" + "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_gcm_encrypt_arm64_crypto_start_256_%=\n\t" /* AES_GCM_192 */ @@ -5803,7 +5792,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -5836,7 +5825,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -5853,7 +5842,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -5870,7 +5859,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -5887,7 +5876,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -5904,7 +5893,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -5921,7 +5910,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -5938,7 +5927,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -5956,7 +5945,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -5973,7 +5962,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -5994,7 +5983,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -6015,7 +6004,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -6032,7 +6021,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -6057,14 +6046,14 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -6105,7 +6094,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -6135,7 +6124,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -6163,7 +6152,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -6192,7 +6181,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -6220,7 +6209,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -6248,7 +6237,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -6278,7 +6267,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -6306,7 +6295,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" @@ -6331,7 +6320,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -6354,7 +6343,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -6371,7 +6360,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -6388,7 +6377,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -6405,7 +6394,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -6517,10 +6506,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" - "ld1 {v12.2d}, [x9]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" + "ld1 {v12.2d}, [%x[key]]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_192_start_1_%=\n\t" @@ -7031,37 +7020,37 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_192_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v16.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v16.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -7091,31 +7080,31 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_192_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_out_start_byte_%=\n\t" @@ -7126,10 +7115,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_192_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_start_zero_%=\n\t" - "sub x11, x11, #16\n\t" - "ld1 {v14.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], #16\n\t" + "ld1 {v14.2d}, [%x[tmp]]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -7150,11 +7139,11 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_192_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -7204,30 +7193,30 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_partial_%=:\n\t" - "st1 {v26.16b}, [x11]\n\t" + "st1 {v26.16b}, [%x[tmp]]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_dw_%=\n\t" - "ldr x16, [x11], #8\n\t" + "ldr x16, [%x[tmp]], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_sw_%=\n\t" - "ldr w16, [x11], #4\n\t" + "ldr w16, [%x[tmp]], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=\n\t" - "ldrh w16, [x11], #2\n\t" + "ldrh w16, [%x[tmp]], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_192_tag_end_bytes_%=\n\t" - "ldrb w16, [x11], #1\n\t" + "ldrb w16, [%x[tmp]], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_192_tag_start_byte_%=\n\t" @@ -7243,7 +7232,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -7276,7 +7265,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7293,7 +7282,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7310,7 +7299,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7327,7 +7316,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7344,7 +7333,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7361,7 +7350,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7378,7 +7367,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7396,7 +7385,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7413,7 +7402,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7434,7 +7423,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7455,7 +7444,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7472,7 +7461,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7489,7 +7478,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #208]\n\t" + "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7506,7 +7495,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #224]\n\t" + "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7531,14 +7520,14 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -7579,7 +7568,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -7609,7 +7598,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -7637,7 +7626,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -7666,7 +7655,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -7694,7 +7683,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -7722,7 +7711,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -7752,7 +7741,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -7780,7 +7769,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" @@ -7805,7 +7794,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -7828,7 +7817,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7845,7 +7834,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7862,7 +7851,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7879,7 +7868,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #208]\n\t" + "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -7896,7 +7885,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #224]\n\t" + "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -7913,7 +7902,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -8025,10 +8014,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" - "ld1 {v12.2d}, [x9], #16\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" + "ld1 {v12.2d}, [%x[key]], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_256_start_1_%=\n\t" @@ -8151,7 +8140,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8342,7 +8331,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8483,7 +8472,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8556,10 +8545,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" @@ -8588,37 +8577,37 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_256_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v16.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v16.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -8647,38 +8636,38 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_256_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_out_start_byte_%=\n\t" @@ -8689,10 +8678,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_256_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_start_zero_%=\n\t" - "sub x11, x11, #16\n\t" - "ld1 {v14.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], #16\n\t" + "ld1 {v14.2d}, [%x[tmp]]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -8713,11 +8702,11 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_256_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" @@ -8753,19 +8742,19 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" - "ldr q11, [x9, #-32]\n\t" + "ldr q11, [%x[key], #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" - "ldr q12, [x9, #-16]\n\t" + "ldr q12, [%x[key], #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" @@ -8775,30 +8764,30 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_partial_%=:\n\t" - "st1 {v26.16b}, [x11]\n\t" + "st1 {v26.16b}, [%x[tmp]]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_dw_%=\n\t" - "ldr x16, [x11], #8\n\t" + "ldr x16, [%x[tmp]], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_sw_%=\n\t" - "ldr w16, [x11], #4\n\t" + "ldr w16, [%x[tmp]], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=\n\t" - "ldrh w16, [x11], #2\n\t" + "ldrh w16, [%x[tmp]], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_256_tag_end_bytes_%=\n\t" - "ldrb w16, [x11], #1\n\t" + "ldrb w16, [%x[tmp]], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_256_tag_start_byte_%=\n\t" @@ -8814,7 +8803,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -8847,7 +8836,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8864,7 +8853,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -8881,7 +8870,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8898,7 +8887,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -8915,7 +8904,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8932,7 +8921,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -8949,7 +8938,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -8967,7 +8956,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -8984,7 +8973,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -9005,7 +8994,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -9034,14 +9023,14 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -9082,7 +9071,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -9112,7 +9101,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -9140,7 +9129,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -9169,7 +9158,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -9197,7 +9186,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -9225,7 +9214,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -9255,7 +9244,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -9283,7 +9272,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v29.16b\n\t" @@ -9308,7 +9297,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -9331,7 +9320,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -9348,7 +9337,7 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -9460,10 +9449,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" - "ld1 {v10.2d}, [x9]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" + "ld1 {v10.2d}, [%x[key]]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_128_start_1_%=\n\t" @@ -9930,37 +9919,37 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_128_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v16.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v16.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -9986,31 +9975,31 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_128_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_out_start_byte_%=\n\t" @@ -10021,10 +10010,10 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_128_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_start_zero_%=\n\t" - "sub x11, x11, #16\n\t" - "ld1 {v14.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], #16\n\t" + "ld1 {v14.2d}, [%x[tmp]]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -10045,11 +10034,11 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_128_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -10095,30 +10084,30 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_partial_%=:\n\t" - "st1 {v26.16b}, [x11]\n\t" + "st1 {v26.16b}, [%x[tmp]]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_dw_%=\n\t" - "ldr x16, [x11], #8\n\t" + "ldr x16, [%x[tmp]], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_sw_%=\n\t" - "ldr w16, [x11], #4\n\t" + "ldr w16, [%x[tmp]], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=\n\t" - "ldrh w16, [x11], #2\n\t" + "ldrh w16, [%x[tmp]], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_128_tag_end_bytes_%=\n\t" - "ldrb w16, [x11], #1\n\t" + "ldrb w16, [%x[tmp]], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_128_tag_start_byte_%=\n\t" @@ -10127,7 +10116,6 @@ void AES_GCM_encrypt_AARCH64(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_encrypt_arm64_crypto_done_%=:\n\t" - "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tag] "+r" (tag), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), @@ -10148,19 +10136,11 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, int nr) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-80]!\n\t" - "add x29, sp, #0\n\t" - "str %w[nr], [sp, #72]\n\t" - "str %x[reg], [sp, #64]\n\t" - "str %x[tmp], [sp, #56]\n\t" - "str %x[gcm_h], [sp, #48]\n\t" - "str %x[key], [sp, #40]\n\t" - "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" - "ld1 {v22.2d}, [x10]\n\t" - "cmp w8, #0x40\n\t" + "ld1 {v22.2d}, [%x[gcm_h]]\n\t" + "cmp %w[aadSz], #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" @@ -10175,7 +10155,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" - "cmp w8, #0x100\n\t" + "cmp %w[aadSz], #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" @@ -10206,7 +10186,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ - "cmp w8, #0x400\n\t" + "cmp %w[aadSz], #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" @@ -10263,7 +10243,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done */ "\n" "L_aes_gcm_decrypt_arm64_crypto_h_done_%=:\n\t" - "lsr w14, w8, #4\n\t" + "lsr w14, %w[aadSz], #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_1_%=\n\t" "cmp w14, #16\n\t" @@ -10501,41 +10481,41 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.ne L_aes_gcm_decrypt_arm64_crypto_aad_both_1_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_done_%=:\n\t" - "and w14, w8, #15\n\t" + "and w14, %w[aadSz], #15\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [x11], #8\n\t" + "str x19, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [x11], #4\n\t" + "str w19, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [x11], #2\n\t" + "strh w19, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [x11], #1\n\t" + "strb w19, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_aad_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v18.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v18.2d}, [%x[tmp]]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -10600,37 +10580,37 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "cbz x24, L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [x11], #8\n\t" + "str x19, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [x11], #4\n\t" + "str w19, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [x11], #2\n\t" + "strh w19, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [x11], #1\n\t" + "strb w19, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_end_bytes_%=:\n\t" - "sub x11, x11, x24\n\t" - "ld1 {v18.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x24\n\t" + "ld1 {v18.2d}, [%x[tmp]]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -10652,7 +10632,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_decrypt_arm64_crypto_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" - "lsl x24, %x[nonceSz], #3\n\t" + "ubfiz x24, %x[nonceSz], #3, #32\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" @@ -10676,9 +10656,9 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "rev w15, w15\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_done_nonce_%=:\n\t" - "st1 {v13.2d}, [x12]\n\t" + "st1 {v13.2d}, [%x[reg]]\n\t" "lsr w14, %w[sz], #4\n\t" - "cmp w13, #12\n\t" + "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_start_128_%=\n\t" "b.gt L_aes_gcm_decrypt_arm64_crypto_start_256_%=\n\t" /* AES_GCM_192 */ @@ -10687,7 +10667,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -10720,7 +10700,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10737,7 +10717,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10754,7 +10734,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10771,7 +10751,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10788,7 +10768,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10805,7 +10785,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10822,7 +10802,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10840,7 +10820,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10857,7 +10837,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10878,7 +10858,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10899,7 +10879,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -10916,7 +10896,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -10941,14 +10921,14 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -10990,7 +10970,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -11020,7 +11000,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -11049,7 +11029,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -11077,7 +11057,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -11105,7 +11085,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -11135,7 +11115,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -11164,7 +11144,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -11191,7 +11171,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" @@ -11216,7 +11196,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -11238,7 +11218,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -11255,7 +11235,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -11272,7 +11252,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -11289,7 +11269,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -11401,10 +11381,10 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" - "ld1 {v12.2d}, [x9]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" + "ld1 {v12.2d}, [%x[key]]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_192_start_1_%=\n\t" @@ -11916,37 +11896,37 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_192_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [x11]\n\t" + "st1 {v15.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v15.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v15.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -11995,30 +11975,30 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [x11]\n\t" + "st1 {v14.2d}, [%x[tmp]]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_out_start_byte_%=\n\t" @@ -12026,11 +12006,11 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_192_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -12080,50 +12060,51 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_part_tag_%=:\n\t" + "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [x11], #8\n\t" + "str x16, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [x11], #4\n\t" + "str w16, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [x11], #2\n\t" + "strh w16, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_192_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [x11], #1\n\t" + "strb w16, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_end_bytes_%=:\n\t" - "sub x11, x11, %x[tagSz]\n\t" - "ld1 {v28.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" + "ld1 {v28.2d}, [%x[tmp]]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [x11]\n\t" + "st1 {v26.2d}, [%x[tmp]]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add x11, x11, %x[tagSz]\n\t" + "add %x[tmp], %x[tmp], %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_calc_tag_byte_%=:\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_192_calc_tag_byte_%=\n\t" - "subs x11, x11, #16\n\t" - "ld1 {v26.2d}, [x11]\n\t" + "subs %x[tmp], %x[tmp], #16\n\t" + "ld1 {v26.2d}, [%x[tmp]]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_192_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -12144,7 +12125,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -12177,7 +12158,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12194,7 +12175,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12211,7 +12192,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12228,7 +12209,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12245,7 +12226,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12262,7 +12243,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12279,7 +12260,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12297,7 +12278,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12314,7 +12295,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12335,7 +12316,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12356,7 +12337,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12373,7 +12354,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12390,7 +12371,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #208]\n\t" + "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12407,7 +12388,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #224]\n\t" + "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12432,14 +12413,14 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -12481,7 +12462,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -12511,7 +12492,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -12540,7 +12521,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -12568,7 +12549,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -12596,7 +12577,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -12626,7 +12607,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -12655,7 +12636,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -12682,7 +12663,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" @@ -12707,7 +12688,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -12729,7 +12710,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12746,7 +12727,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12763,7 +12744,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12780,7 +12761,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #208]\n\t" + "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -12797,7 +12778,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #224]\n\t" + "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -12814,7 +12795,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -12926,10 +12907,10 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" - "ld1 {v12.2d}, [x9], #16\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" + "ld1 {v12.2d}, [%x[key]], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_256_start_1_%=\n\t" @@ -13052,7 +13033,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13243,7 +13224,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13384,7 +13365,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13457,10 +13438,10 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" @@ -13489,37 +13470,37 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_256_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [x11]\n\t" + "st1 {v15.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v15.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v15.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -13564,40 +13545,40 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v28.d[1], v31.d[0]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" /* Done GHASH */ "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [x11]\n\t" + "st1 {v14.2d}, [%x[tmp]]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_out_start_byte_%=\n\t" @@ -13605,11 +13586,11 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_256_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" @@ -13645,19 +13626,19 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" - "ldr q11, [x9, #-32]\n\t" + "ldr q11, [%x[key], #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" - "ldr q12, [x9, #-16]\n\t" + "ldr q12, [%x[key], #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" @@ -13667,50 +13648,51 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_part_tag_%=:\n\t" + "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [x11], #8\n\t" + "str x16, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [x11], #4\n\t" + "str w16, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [x11], #2\n\t" + "strh w16, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_256_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [x11], #1\n\t" + "strb w16, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_end_bytes_%=:\n\t" - "sub x11, x11, %x[tagSz]\n\t" - "ld1 {v28.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" + "ld1 {v28.2d}, [%x[tmp]]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [x11]\n\t" + "st1 {v26.2d}, [%x[tmp]]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add x11, x11, %x[tagSz]\n\t" + "add %x[tmp], %x[tmp], %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_calc_tag_byte_%=:\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_256_calc_tag_byte_%=\n\t" - "subs x11, x11, #16\n\t" - "ld1 {v26.2d}, [x11]\n\t" + "subs %x[tmp], %x[tmp], #16\n\t" + "ld1 {v26.2d}, [%x[tmp]]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_256_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -13731,7 +13713,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -13764,7 +13746,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13781,7 +13763,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -13798,7 +13780,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13815,7 +13797,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -13832,7 +13814,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13849,7 +13831,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -13866,7 +13848,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13884,7 +13866,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -13901,7 +13883,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -13922,7 +13904,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -13951,14 +13933,14 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -14000,7 +13982,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -14030,7 +14012,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -14059,7 +14041,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v26.16b, v31.16b\n\t" @@ -14087,7 +14069,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v26.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v30.16b, v30.16b, v26.16b\n\t" @@ -14115,7 +14097,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v30.16b, v30.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -14145,7 +14127,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -14174,7 +14156,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -14201,7 +14183,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v31.16b, v31.16b, v30.16b\n\t" @@ -14226,7 +14208,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -14248,7 +14230,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -14265,7 +14247,7 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -14377,10 +14359,10 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" - "ld1 {v10.2d}, [x9]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" + "ld1 {v10.2d}, [%x[key]]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_128_start_1_%=\n\t" @@ -14847,37 +14829,37 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_128_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [x11]\n\t" + "st1 {v15.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v15.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v15.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -14922,30 +14904,30 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [x11]\n\t" + "st1 {v14.2d}, [%x[tmp]]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_out_start_byte_%=\n\t" @@ -14953,11 +14935,11 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_128_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -15003,50 +14985,51 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_part_tag_%=:\n\t" + "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [x11], #8\n\t" + "str x16, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [x11], #4\n\t" + "str w16, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [x11], #2\n\t" + "strh w16, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_128_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [x11], #1\n\t" + "strb w16, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_end_bytes_%=:\n\t" - "sub x11, x11, %x[tagSz]\n\t" - "ld1 {v28.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" + "ld1 {v28.2d}, [%x[tmp]]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [x11]\n\t" + "st1 {v26.2d}, [%x[tmp]]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add x11, x11, %x[tagSz]\n\t" + "add %x[tmp], %x[tmp], %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_calc_tag_byte_%=:\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_128_calc_tag_byte_%=\n\t" - "subs x11, x11, #16\n\t" - "ld1 {v26.2d}, [x11]\n\t" + "subs %x[tmp], %x[tmp], #16\n\t" + "ld1 {v26.2d}, [%x[tmp]]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_128_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -15060,12 +15043,11 @@ int AES_GCM_decrypt_AARCH64(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_decrypt_arm64_crypto_done_%=:\n\t" - "ldp x29, x30, [sp], #0x50\n\t" - : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), - [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), - [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), [reg] "+r" (reg), - [nr] "+r" (nr) - : [in] "r" (in), [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad) + : [in] "+r" (in), [out] "+r" (out), [sz] "+r" (sz), + [nonceSz] "+r" (nonceSz), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), + [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), + [reg] "+r" (reg), [nr] "+r" (nr) + : [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad) : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", @@ -15082,19 +15064,11 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, word32 aadSz, byte* key, byte* gcm_h, byte* tmp, byte* reg, int nr) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-80]!\n\t" - "add x29, sp, #0\n\t" - "str %w[nr], [sp, #72]\n\t" - "str %x[reg], [sp, #64]\n\t" - "str %x[tmp], [sp, #56]\n\t" - "str %x[gcm_h], [sp, #48]\n\t" - "str %x[key], [sp, #40]\n\t" - "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" - "ld1 {v22.2d}, [x10]\n\t" - "cmp w8, #0x40\n\t" + "ld1 {v22.2d}, [%x[gcm_h]]\n\t" + "cmp %w[aadSz], #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" @@ -15109,7 +15083,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" - "cmp w8, #0x100\n\t" + "cmp %w[aadSz], #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" @@ -15139,7 +15113,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ - "cmp w8, #0x400\n\t" + "cmp %w[aadSz], #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" @@ -15194,7 +15168,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_h_done_%=:\n\t" - "lsr w14, w8, #4\n\t" + "lsr w14, %w[aadSz], #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "cmp w14, #16\n\t" @@ -15417,41 +15391,41 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_aad_both_1_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_done_%=:\n\t" - "and w14, w8, #15\n\t" + "and w14, %w[aadSz], #15\n\t" "cbz w14, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [x11], #8\n\t" + "str x19, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [x11], #4\n\t" + "str w19, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [x11], #2\n\t" + "strh w19, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_eor3_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [x11], #1\n\t" + "strb w19, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_aad_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v18.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v18.2d}, [%x[tmp]]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -15514,37 +15488,37 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cbz x24, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [x11], #8\n\t" + "str x19, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [x11], #4\n\t" + "str w19, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [x11], #2\n\t" + "strh w19, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [x11], #1\n\t" + "strb w19, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_end_bytes_%=:\n\t" - "sub x11, x11, x24\n\t" - "ld1 {v18.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x24\n\t" + "ld1 {v18.2d}, [%x[tmp]]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -15565,7 +15539,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" - "lsl x24, %x[nonceSz], #3\n\t" + "ubfiz x24, %x[nonceSz], #3, #32\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" @@ -15588,9 +15562,9 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "rev w15, w15\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_done_nonce_%=:\n\t" - "st1 {v13.2d}, [x12]\n\t" + "st1 {v13.2d}, [%x[reg]]\n\t" "lsr w14, %w[sz], #4\n\t" - "cmp w13, #12\n\t" + "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_start_128_%=\n\t" "b.gt L_aes_gcm_encrypt_arm64_crypto_eor3_start_256_%=\n\t" /* AES_GCM_192 */ @@ -15599,7 +15573,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -15632,7 +15606,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15649,7 +15623,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15666,7 +15640,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15683,7 +15657,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15700,7 +15674,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15717,7 +15691,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15734,7 +15708,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15752,7 +15726,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15769,7 +15743,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15790,7 +15764,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15811,7 +15785,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -15828,7 +15802,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -15853,14 +15827,14 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -15901,7 +15875,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -15931,7 +15905,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -15958,7 +15932,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -15986,7 +15960,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -16013,7 +15987,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -16039,7 +16013,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -16068,7 +16042,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -16095,7 +16069,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" @@ -16119,7 +16093,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -16142,7 +16116,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -16159,7 +16133,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -16176,7 +16150,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -16193,7 +16167,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -16297,10 +16271,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" - "ld1 {v12.2d}, [x9]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" + "ld1 {v12.2d}, [%x[key]]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_1_%=\n\t" @@ -16800,37 +16774,37 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v16.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v16.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -16860,31 +16834,31 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v14.16b, v11.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" @@ -16895,10 +16869,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_start_zero_%=\n\t" - "sub x11, x11, #16\n\t" - "ld1 {v14.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], #16\n\t" + "ld1 {v14.2d}, [%x[tmp]]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -16918,11 +16892,11 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -16971,30 +16945,30 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_partial_%=:\n\t" - "st1 {v26.16b}, [x11]\n\t" + "st1 {v26.16b}, [%x[tmp]]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_dw_%=\n\t" - "ldr x16, [x11], #8\n\t" + "ldr x16, [%x[tmp]], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_sw_%=\n\t" - "ldr w16, [x11], #4\n\t" + "ldr w16, [%x[tmp]], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" - "ldrh w16, [x11], #2\n\t" + "ldrh w16, [%x[tmp]], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_end_bytes_%=\n\t" - "ldrb w16, [x11], #1\n\t" + "ldrb w16, [%x[tmp]], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" @@ -17010,7 +16984,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -17043,7 +17017,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17060,7 +17034,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17077,7 +17051,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17094,7 +17068,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17111,7 +17085,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17128,7 +17102,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17145,7 +17119,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17163,7 +17137,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17180,7 +17154,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17201,7 +17175,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17222,7 +17196,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v3.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17239,7 +17213,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17256,7 +17230,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #208]\n\t" + "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17273,7 +17247,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #224]\n\t" + "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17298,14 +17272,14 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -17346,7 +17320,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -17376,7 +17350,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -17403,7 +17377,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -17431,7 +17405,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -17458,7 +17432,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17484,7 +17458,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -17513,7 +17487,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -17540,7 +17514,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" @@ -17564,7 +17538,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -17587,7 +17561,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17604,7 +17578,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17621,7 +17595,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17638,7 +17612,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #208]\n\t" + "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -17655,7 +17629,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #224]\n\t" + "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -17672,7 +17646,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -17776,10 +17750,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" - "ld1 {v12.2d}, [x9], #16\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" + "ld1 {v12.2d}, [%x[key]], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_1_%=\n\t" @@ -17902,7 +17876,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18089,7 +18063,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18226,7 +18200,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18297,10 +18271,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" @@ -18328,37 +18302,37 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v16.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v16.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -18387,38 +18361,38 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" @@ -18429,10 +18403,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_start_zero_%=\n\t" - "sub x11, x11, #16\n\t" - "ld1 {v14.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], #16\n\t" + "ld1 {v14.2d}, [%x[tmp]]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -18452,11 +18426,11 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" @@ -18492,18 +18466,18 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" - "ldr q11, [x9, #-32]\n\t" + "ldr q11, [%x[key], #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" - "ldr q12, [x9, #-16]\n\t" + "ldr q12, [%x[key], #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" @@ -18513,30 +18487,30 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_partial_%=:\n\t" - "st1 {v26.16b}, [x11]\n\t" + "st1 {v26.16b}, [%x[tmp]]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_dw_%=\n\t" - "ldr x16, [x11], #8\n\t" + "ldr x16, [%x[tmp]], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_sw_%=\n\t" - "ldr w16, [x11], #4\n\t" + "ldr w16, [%x[tmp]], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" - "ldrh w16, [x11], #2\n\t" + "ldrh w16, [%x[tmp]], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_end_bytes_%=\n\t" - "ldrb w16, [x11], #1\n\t" + "ldrb w16, [%x[tmp]], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" @@ -18552,7 +18526,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -18585,7 +18559,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18602,7 +18576,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -18619,7 +18593,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18636,7 +18610,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -18653,7 +18627,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18670,7 +18644,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -18687,7 +18661,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18705,7 +18679,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -18722,7 +18696,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18743,7 +18717,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -18772,14 +18746,14 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v1.16b, v1.16b, v9.16b\n\t" "eor v2.16b, v2.16b, v10.16b\n\t" "eor v3.16b, v3.16b, v11.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v18.16b, v19.16b, v20.16b, v21.16b}, [%x[out]], #0x40\n\t" "st1 {v0.16b, v1.16b, v2.16b, v3.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -18820,7 +18794,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v9.s[3], w19\n\t" "mov v10.s[3], w17\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" @@ -18850,7 +18824,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v2.16b, v2.16b, #8\n\t" @@ -18877,7 +18851,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v1.16b, v1.16b, #8\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v24.1d\n\t" @@ -18905,7 +18879,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v25.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -18932,7 +18906,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -18958,7 +18932,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^7 */ @@ -18987,7 +18961,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v7.2d, v18.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -19014,7 +18988,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v31.16b, v31.16b, v29.16b, v30.16b\n\t" @@ -19038,7 +19012,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v19.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v20.16b}, [%x[in]], #16\n\t" @@ -19061,7 +19035,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -19078,7 +19052,7 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v18.16b, v18.16b, v14.16b\n\t" "eor v19.16b, v19.16b, v15.16b\n\t" "eor v20.16b, v20.16b, v16.16b\n\t" @@ -19182,10 +19156,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" - "ld1 {v10.2d}, [x9]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" + "ld1 {v10.2d}, [%x[key]]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_done_%=\n\t" "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_1_%=\n\t" @@ -19641,37 +19615,37 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done_%=\n\t" "eor v16.16b, v16.16b, v16.16b\n\t" "mov w19, w14\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v16.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v16.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rev w16, w15\n\t" @@ -19697,31 +19671,31 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "eor v14.16b, v14.16b, v10.16b\n\t" "eor v16.16b, v16.16b, v14.16b\n\t" - "st1 {v16.2d}, [x11]\n\t" + "st1 {v16.2d}, [%x[tmp]]\n\t" "mov w19, w14\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub x19, x19, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub x19, x19, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub x19, x19, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs x19, x19, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" @@ -19732,10 +19706,10 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero_%=:\n\t" "subs x17, x17, #1\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_start_zero_%=\n\t" - "sub x11, x11, #16\n\t" - "ld1 {v14.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], #16\n\t" + "ld1 {v14.2d}, [%x[tmp]]\n\t" "rbit v14.16b, v14.16b\n\t" "eor v15.16b, v26.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -19755,11 +19729,11 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -19804,30 +19778,30 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_partial_%=:\n\t" - "st1 {v26.16b}, [x11]\n\t" + "st1 {v26.16b}, [%x[tmp]]\n\t" "cmp %w[tagSz], #8\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_dw_%=\n\t" - "ldr x16, [x11], #8\n\t" + "ldr x16, [%x[tmp]], #8\n\t" "sub %w[tagSz], %w[tagSz], #8\n\t" "str x16, [%x[tag]], #8\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_dw_%=:\n\t" "cmp %w[tagSz], #4\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_sw_%=\n\t" - "ldr w16, [x11], #4\n\t" + "ldr w16, [%x[tmp]], #4\n\t" "sub %w[tagSz], %w[tagSz], #4\n\t" "str w16, [%x[tag]], #4\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_sw_%=:\n\t" "cmp %w[tagSz], #2\n\t" "b.lt L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" - "ldrh w16, [x11], #2\n\t" + "ldrh w16, [%x[tmp]], #2\n\t" "sub %w[tagSz], %w[tagSz], #2\n\t" "strh w16, [%x[tag]], #2\n\t" "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=:\n\t" "cbz %w[tagSz], L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_end_bytes_%=\n\t" - "ldrb w16, [x11], #1\n\t" + "ldrb w16, [%x[tmp]], #1\n\t" "subs %w[tagSz], %w[tagSz], #1\n\t" "strb w16, [%x[tag]], #1\n\t" "b.ne L_aes_gcm_encrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" @@ -19836,7 +19810,6 @@ void AES_GCM_encrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_encrypt_arm64_crypto_eor3_done_%=:\n\t" - "ldp x29, x30, [sp], #0x50\n\t" : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), [tag] "+r" (tag), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), @@ -19857,19 +19830,11 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, int nr) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-80]!\n\t" - "add x29, sp, #0\n\t" - "str %w[nr], [sp, #72]\n\t" - "str %x[reg], [sp, #64]\n\t" - "str %x[tmp], [sp, #56]\n\t" - "str %x[gcm_h], [sp, #48]\n\t" - "str %x[key], [sp, #40]\n\t" - "str %w[aadSz], [sp, #32]\n\t" "movi v27.16b, #0x87\n\t" "eor v26.16b, v26.16b, v26.16b\n\t" "ushr v27.2d, v27.2d, #56\n\t" - "ld1 {v22.2d}, [x10]\n\t" - "cmp w8, #0x40\n\t" + "ld1 {v22.2d}, [%x[gcm_h]]\n\t" + "cmp %w[aadSz], #0x40\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #32\n\t" "csetm x17, lt\n\t" @@ -19884,7 +19849,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v31.1q, v29.2d, v27.2d\n\t" "mov v30.d[1], v29.d[0]\n\t" "eor v23.16b, v30.16b, v31.16b\n\t" - "cmp w8, #0x100\n\t" + "cmp %w[aadSz], #0x100\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x40\n\t" "csetm x17, lt\n\t" @@ -19914,7 +19879,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v30.d[1], v29.d[0]\n\t" "eor v25.16b, v30.16b, v31.16b\n\t" /* Done */ - "cmp w8, #0x400\n\t" + "cmp %w[aadSz], #0x400\n\t" "csetm x16, lt\n\t" "cmp %w[sz], #0x200\n\t" "csetm x17, lt\n\t" @@ -19969,7 +19934,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_h_done_%=:\n\t" - "lsr w14, w8, #4\n\t" + "lsr w14, %w[aadSz], #4\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_1_%=\n\t" "cmp w14, #16\n\t" @@ -20192,41 +20157,41 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_aad_both_1_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_done_%=:\n\t" - "and w14, w8, #15\n\t" + "and w14, %w[aadSz], #15\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w14\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_dw_%=\n\t" "ldr x19, [%x[aad]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [x11], #8\n\t" + "str x19, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_sw_%=\n\t" "ldr w19, [%x[aad]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [x11], #4\n\t" + "str w19, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "ldrh w19, [%x[aad]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [x11], #2\n\t" + "strh w19, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_eor3_aad_end_bytes_%=\n\t" "ldrb w19, [%x[aad]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [x11], #1\n\t" + "strb w19, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_aad_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_aad_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v18.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v18.2d}, [%x[tmp]]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v26.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -20289,37 +20254,37 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "cbz x24, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done_%=\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov w20, w24\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp w20, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_dw_%=\n\t" "ldr x19, [%x[nonce]], #8\n\t" "sub w20, w20, #8\n\t" - "str x19, [x11], #8\n\t" + "str x19, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_dw_%=:\n\t" "cmp w20, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_sw_%=\n\t" "ldr w19, [%x[nonce]], #4\n\t" "sub w20, w20, #4\n\t" - "str w19, [x11], #4\n\t" + "str w19, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_sw_%=:\n\t" "cmp w20, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "ldrh w19, [%x[nonce]], #2\n\t" "sub w20, w20, #2\n\t" - "strh w19, [x11], #2\n\t" + "strh w19, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=:\n\t" "cbz w20, L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes_%=\n\t" "ldrb w19, [%x[nonce]], #1\n\t" "subs w20, w20, #1\n\t" - "strb w19, [x11], #1\n\t" + "strb w19, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_end_bytes_%=:\n\t" - "sub x11, x11, x24\n\t" - "ld1 {v18.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x24\n\t" + "ld1 {v18.2d}, [%x[tmp]]\n\t" "rbit v18.16b, v18.16b\n\t" "eor v21.16b, v13.16b, v18.16b\n\t" /* X = C * H^1 */ @@ -20340,7 +20305,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_nonce_partial_done_%=:\n\t" "eor x14, x14, x14\n\t" - "lsl x24, %x[nonceSz], #3\n\t" + "ubfiz x24, %x[nonceSz], #3, #32\n\t" "mov v28.d[0], x14\n\t" "mov v28.d[1], x24\n\t" "rev64 v28.16b, v28.16b\n\t" @@ -20363,9 +20328,9 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "rev w15, w15\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_done_nonce_%=:\n\t" - "st1 {v13.2d}, [x12]\n\t" + "st1 {v13.2d}, [%x[reg]]\n\t" "lsr w14, %w[sz], #4\n\t" - "cmp w13, #12\n\t" + "cmp %w[nr], #12\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_start_128_%=\n\t" "b.gt L_aes_gcm_decrypt_arm64_crypto_eor3_start_256_%=\n\t" /* AES_GCM_192 */ @@ -20374,7 +20339,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -20407,7 +20372,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20424,7 +20389,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20441,7 +20406,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20458,7 +20423,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20475,7 +20440,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20492,7 +20457,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20509,7 +20474,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20527,7 +20492,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20544,7 +20509,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20565,7 +20530,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20586,7 +20551,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20603,7 +20568,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20628,14 +20593,14 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -20677,7 +20642,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -20707,7 +20672,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -20735,7 +20700,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -20762,7 +20727,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20788,7 +20753,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -20817,7 +20782,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -20845,7 +20810,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -20871,7 +20836,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" @@ -20895,7 +20860,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -20917,7 +20882,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20934,7 +20899,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -20951,7 +20916,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -20968,7 +20933,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -21072,10 +21037,10 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" - "ld1 {v12.2d}, [x9]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" + "ld1 {v12.2d}, [%x[key]]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_1_%=\n\t" @@ -21576,37 +21541,37 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [x11]\n\t" + "st1 {v15.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v15.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v15.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -21654,30 +21619,30 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [x11]\n\t" + "st1 {v14.2d}, [%x[tmp]]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_start_byte_%=\n\t" @@ -21685,11 +21650,11 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_eor3_192_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -21738,50 +21703,51 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_part_tag_%=:\n\t" + "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [x11], #8\n\t" + "str x16, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [x11], #4\n\t" + "str w16, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [x11], #2\n\t" + "strh w16, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [x11], #1\n\t" + "strb w16, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_end_bytes_%=:\n\t" - "sub x11, x11, %x[tagSz]\n\t" - "ld1 {v28.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" + "ld1 {v28.2d}, [%x[tmp]]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [x11]\n\t" + "st1 {v26.2d}, [%x[tmp]]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add x11, x11, %x[tagSz]\n\t" + "add %x[tmp], %x[tmp], %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_calc_tag_byte_%=:\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_192_calc_tag_byte_%=\n\t" - "subs x11, x11, #16\n\t" - "ld1 {v26.2d}, [x11]\n\t" + "subs %x[tmp], %x[tmp], #16\n\t" + "ld1 {v26.2d}, [%x[tmp]]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_192_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -21802,7 +21768,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -21835,7 +21801,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -21852,7 +21818,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -21869,7 +21835,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -21886,7 +21852,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -21903,7 +21869,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -21920,7 +21886,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -21937,7 +21903,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -21955,7 +21921,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -21972,7 +21938,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -21993,7 +21959,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22014,7 +21980,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -22031,7 +21997,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22048,7 +22014,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #208]\n\t" + "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -22065,7 +22031,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #224]\n\t" + "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22090,14 +22056,14 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -22139,7 +22105,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -22169,7 +22135,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -22197,7 +22163,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -22224,7 +22190,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22250,7 +22216,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -22279,7 +22245,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -22307,7 +22273,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -22333,7 +22299,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" @@ -22357,7 +22323,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -22379,7 +22345,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22396,7 +22362,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #176]\n\t" + "ldr q13, [%x[key], #176]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -22413,7 +22379,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #192]\n\t" + "ldr q12, [%x[key], #192]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22430,7 +22396,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #208]\n\t" + "ldr q13, [%x[key], #208]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -22447,7 +22413,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #224]\n\t" + "ldr q12, [%x[key], #224]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -22464,7 +22430,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -22568,10 +22534,10 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [x9], #0x40\n\t" - "ld1 {v12.2d}, [x9], #16\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d, v10.2d, v11.2d}, [%x[key]], #0x40\n\t" + "ld1 {v12.2d}, [%x[key]], #16\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_1_%=\n\t" @@ -22694,7 +22660,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -22881,7 +22847,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v16.16b, v16.16b\n\t" "aese v17.16b, v11.16b\n\t" "aesmc v17.16b, v17.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23018,7 +22984,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v11.16b\n\t" "aesmc v15.16b, v15.16b\n\t" - "ld1 {v29.2d, v30.2d}, [x9]\n\t" + "ld1 {v29.2d, v30.2d}, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23089,10 +23055,10 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v18.16b}, [%x[in]], #16\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" @@ -23120,37 +23086,37 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [x11]\n\t" + "st1 {v15.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v15.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v15.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -23195,39 +23161,39 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v26.16b, v28.16b, v30.16b\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" /* Done GHASH */ "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [x11]\n\t" + "st1 {v14.2d}, [%x[tmp]]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_start_byte_%=\n\t" @@ -23235,11 +23201,11 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_eor3_256_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "aese v14.16b, v0.16b\n\t" @@ -23275,18 +23241,18 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v14.16b, v9.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "mov v28.d[1], v31.d[0]\n\t" - "ldr q11, [x9, #-32]\n\t" + "ldr q11, [%x[key], #-32]\n\t" "aese v14.16b, v10.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v26.16b, v28.16b, v30.16b\n\t" - "ldr q12, [x9, #-16]\n\t" + "ldr q12, [%x[key], #-16]\n\t" "aese v14.16b, v11.16b\n\t" "aesmc v14.16b, v14.16b\n\t" - "ldr q29, [x9]\n\t" + "ldr q29, [%x[key]]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "rbit v26.16b, v26.16b\n\t" - "ldr q30, [x9, #16]\n\t" + "ldr q30, [%x[key], #16]\n\t" "aese v14.16b, v29.16b\n\t" "eor v14.16b, v14.16b, v30.16b\n\t" "eor v26.16b, v26.16b, v14.16b\n\t" @@ -23296,50 +23262,51 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_part_tag_%=:\n\t" + "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [x11], #8\n\t" + "str x16, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [x11], #4\n\t" + "str w16, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [x11], #2\n\t" + "strh w16, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [x11], #1\n\t" + "strb w16, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_end_bytes_%=:\n\t" - "sub x11, x11, %x[tagSz]\n\t" - "ld1 {v28.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" + "ld1 {v28.2d}, [%x[tmp]]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [x11]\n\t" + "st1 {v26.2d}, [%x[tmp]]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add x11, x11, %x[tagSz]\n\t" + "add %x[tmp], %x[tmp], %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_calc_tag_byte_%=:\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_256_calc_tag_byte_%=\n\t" - "subs x11, x11, #16\n\t" - "ld1 {v26.2d}, [x11]\n\t" + "subs %x[tmp], %x[tmp], #16\n\t" + "ld1 {v26.2d}, [%x[tmp]]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_256_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -23360,7 +23327,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_4_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -23393,7 +23360,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "mov v10.s[3], w17\n\t" "rev w16, w15\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23410,7 +23377,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23427,7 +23394,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23444,7 +23411,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23461,7 +23428,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23478,7 +23445,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23495,7 +23462,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23513,7 +23480,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23530,7 +23497,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v12.16b\n\t" @@ -23551,7 +23518,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23580,14 +23547,14 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v9.16b, v9.16b, v1.16b\n\t" "eor v10.16b, v10.16b, v2.16b\n\t" "eor v11.16b, v11.16b, v3.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "st1 {v14.16b, v15.16b, v16.16b, v17.16b}, [%x[out]], #0x40\n\t" "st1 {v8.16b, v9.16b, v10.16b, v11.16b}, [%x[out]], #0x40\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_8_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_both_8_%=:\n\t" - "ldr q12, [x9]\n\t" + "ldr q12, [%x[key]]\n\t" "add w24, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "add w23, w15, #2\n\t" @@ -23629,7 +23596,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "rev w16, w15\n\t" "eor v18.16b, v18.16b, v26.16b\n\t" "mov v11.s[3], w16\n\t" - "ldr q13, [x9, #16]\n\t" + "ldr q13, [%x[key], #16]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X = C * H^1 */ @@ -23659,7 +23626,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ext v26.16b, v2.16b, v2.16b, #8\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #32]\n\t" + "ldr q12, [%x[key], #32]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull v31.1q, v26.1d, v23.1d\n\t" @@ -23687,7 +23654,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v26.2d, v24.2d\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #48]\n\t" + "ldr q13, [%x[key], #48]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" @@ -23714,7 +23681,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor3 v30.16b, v30.16b, v26.16b, v31.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #64]\n\t" + "ldr q12, [%x[key], #64]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23740,7 +23707,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #80]\n\t" + "ldr q13, [%x[key], #80]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" /* X += C * H^6 */ @@ -23769,7 +23736,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "pmull2 v26.1q, v6.2d, v19.2d\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #96]\n\t" + "ldr q12, [%x[key], #96]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "eor v28.16b, v28.16b, v31.16b\n\t" @@ -23797,7 +23764,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v29.16b, v29.16b, v26.16b\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #112]\n\t" + "ldr q13, [%x[key], #112]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ext v26.16b, v18.16b, v18.16b, #8\n\t" @@ -23823,7 +23790,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" "subs w14, w14, #8\n\t" - "ldr q12, [x9, #128]\n\t" + "ldr q12, [%x[key], #128]\n\t" "aese v14.16b, v13.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "pmull2 v30.1q, v31.2d, v27.2d\n\t" @@ -23847,7 +23814,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "ld1 {v20.16b}, [%x[in]], #16\n\t" "aese v11.16b, v13.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q13, [x9, #144]\n\t" + "ldr q13, [%x[key], #144]\n\t" "aese v14.16b, v12.16b\n\t" "aesmc v14.16b, v14.16b\n\t" "ld1 {v21.16b}, [%x[in]], #16\n\t" @@ -23869,7 +23836,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "aesmc v10.16b, v10.16b\n\t" "aese v11.16b, v12.16b\n\t" "aesmc v11.16b, v11.16b\n\t" - "ldr q12, [x9, #160]\n\t" + "ldr q12, [%x[key], #160]\n\t" "aese v14.16b, v13.16b\n\t" "eor v14.16b, v14.16b, v12.16b\n\t" "aese v15.16b, v13.16b\n\t" @@ -23886,7 +23853,7 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "eor v10.16b, v10.16b, v12.16b\n\t" "aese v11.16b, v13.16b\n\t" "eor v11.16b, v11.16b, v12.16b\n\t" - "ld1 {v13.2d}, [x12]\n\t" + "ld1 {v13.2d}, [%x[reg]]\n\t" "eor v14.16b, v14.16b, v18.16b\n\t" "eor v15.16b, v15.16b, v19.16b\n\t" "eor v16.16b, v16.16b, v20.16b\n\t" @@ -23990,10 +23957,10 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_4_%=:\n\t" - "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [x9], #0x40\n\t" - "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [x9], #0x40\n\t" - "ld1 {v8.2d, v9.2d}, [x9], #32\n\t" - "ld1 {v10.2d}, [x9]\n\t" + "ld1 {v0.2d, v1.2d, v2.2d, v3.2d}, [%x[key]], #0x40\n\t" + "ld1 {v4.2d, v5.2d, v6.2d, v7.2d}, [%x[key]], #0x40\n\t" + "ld1 {v8.2d, v9.2d}, [%x[key]], #32\n\t" + "ld1 {v10.2d}, [%x[key]]\n\t" "cmp w14, #1\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_done_%=\n\t" "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_1_%=\n\t" @@ -24449,37 +24416,37 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b.eq L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done_%=\n\t" "eor v15.16b, v15.16b, v15.16b\n\t" "mov w19, w14\n\t" - "st1 {v15.2d}, [x11]\n\t" + "st1 {v15.2d}, [%x[tmp]]\n\t" "cmp x19, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_dw_%=\n\t" "ldr x17, [%x[in]], #8\n\t" "sub x19, x19, #8\n\t" - "str x17, [x11], #8\n\t" + "str x17, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_dw_%=:\n\t" "cmp x19, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_sw_%=\n\t" "ldr w17, [%x[in]], #4\n\t" "sub x19, x19, #4\n\t" - "str w17, [x11], #4\n\t" + "str w17, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_sw_%=:\n\t" "cmp x19, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "ldrh w17, [%x[in]], #2\n\t" "sub x19, x19, #2\n\t" - "strh w17, [x11], #2\n\t" + "strh w17, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=:\n\t" "cbz x19, L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_bytes_%=\n\t" "ldrb w17, [%x[in]], #1\n\t" "subs x19, x19, #1\n\t" - "strb w17, [x11], #1\n\t" + "strb w17, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_end_bytes_%=:\n\t" - "sub x11, x11, x14\n\t" - "ld1 {v15.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], x14\n\t" + "ld1 {v15.2d}, [%x[tmp]]\n\t" "add w15, w15, #1\n\t" "mov v14.16b, v13.16b\n\t" "rbit v15.16b, v15.16b\n\t" @@ -24523,30 +24490,30 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, /* Done GHASH */ "rbit v15.16b, v15.16b\n\t" "eor v14.16b, v14.16b, v15.16b\n\t" - "st1 {v14.2d}, [x11]\n\t" + "st1 {v14.2d}, [%x[tmp]]\n\t" "cmp w14, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_dw_%=\n\t" - "ldr x17, [x11], #8\n\t" + "ldr x17, [%x[tmp]], #8\n\t" "sub w14, w14, #8\n\t" "str x17, [%x[out]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_dw_%=:\n\t" "cmp w14, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_sw_%=\n\t" - "ldr w17, [x11], #4\n\t" + "ldr w17, [%x[tmp]], #4\n\t" "sub w14, w14, #4\n\t" "str w17, [%x[out]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_sw_%=:\n\t" "cmp w14, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" - "ldrh w17, [x11], #2\n\t" + "ldrh w17, [%x[tmp]], #2\n\t" "sub w14, w14, #2\n\t" "strh w17, [%x[out]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=:\n\t" "cbz w14, L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes_%=\n\t" - "ldrb w17, [x11], #1\n\t" + "ldrb w17, [%x[tmp]], #1\n\t" "subs w14, w14, #1\n\t" "strb w17, [%x[out]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_start_byte_%=\n\t" @@ -24554,11 +24521,11 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "L_aes_gcm_decrypt_arm64_crypto_eor3_128_out_end_bytes_%=:\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_partial_done_%=:\n\t" - "ld1 {v14.2d}, [x12]\n\t" - "lsl x8, x8, #3\n\t" - "rbit x8, x8\n\t" - "mov v28.d[0], x8\n\t" - "lsl %x[sz], %x[sz], #3\n\t" + "ld1 {v14.2d}, [%x[reg]]\n\t" + "ubfiz %x[aadSz], %x[aadSz], #3, #32\n\t" + "rbit %x[aadSz], %x[aadSz]\n\t" + "mov v28.d[0], %x[aadSz]\n\t" + "ubfiz %x[sz], %x[sz], #3, #32\n\t" "rbit %x[sz], %x[sz]\n\t" "mov v28.d[1], %x[sz]\n\t" "eor v26.16b, v26.16b, v28.16b\n\t" @@ -24603,50 +24570,51 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, "b L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_part_tag_%=:\n\t" + "ubfiz %x[tagSz], %x[tagSz], #0, #32\n\t" "eor v28.16b, v28.16b, v28.16b\n\t" "mov x17, %x[tagSz]\n\t" - "st1 {v28.2d}, [x11]\n\t" + "st1 {v28.2d}, [%x[tmp]]\n\t" "cmp x17, #8\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_dw_%=\n\t" "ldr x16, [%x[tag]], #8\n\t" "sub x17, x17, #8\n\t" - "str x16, [x11], #8\n\t" + "str x16, [%x[tmp]], #8\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_dw_%=:\n\t" "cmp x17, #4\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_sw_%=\n\t" "ldr w16, [%x[tag]], #4\n\t" "sub x17, x17, #4\n\t" - "str w16, [x11], #4\n\t" + "str w16, [%x[tmp]], #4\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_sw_%=:\n\t" "cmp x17, #2\n\t" "b.lt L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "ldrh w16, [%x[tag]], #2\n\t" "sub x17, x17, #2\n\t" - "strh w16, [x11], #2\n\t" + "strh w16, [%x[tmp]], #2\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=:\n\t" "cbz x17, L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_end_bytes_%=\n\t" "ldrb w16, [%x[tag]], #1\n\t" "subs x17, x17, #1\n\t" - "strb w16, [x11], #1\n\t" + "strb w16, [%x[tmp]], #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_start_byte_%=\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_end_bytes_%=:\n\t" - "sub x11, x11, %x[tagSz]\n\t" - "ld1 {v28.2d}, [x11]\n\t" + "sub %x[tmp], %x[tmp], %x[tagSz]\n\t" + "ld1 {v28.2d}, [%x[tmp]]\n\t" "mov x17, #16\n\t" - "st1 {v26.2d}, [x11]\n\t" + "st1 {v26.2d}, [%x[tmp]]\n\t" "sub x17, x17, %x[tagSz]\n\t" - "add x11, x11, %x[tagSz]\n\t" + "add %x[tmp], %x[tmp], %x[tagSz]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_calc_tag_byte_%=:\n\t" - "strb wzr, [x11], #1\n\t" + "strb wzr, [%x[tmp]], #1\n\t" "subs x17, x17, #1\n\t" "b.ne L_aes_gcm_decrypt_arm64_crypto_eor3_128_calc_tag_byte_%=\n\t" - "subs x11, x11, #16\n\t" - "ld1 {v26.2d}, [x11]\n\t" + "subs %x[tmp], %x[tmp], #16\n\t" + "ld1 {v26.2d}, [%x[tmp]]\n\t" "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_128_tag_loaded_%=:\n\t" "eor v28.16b, v28.16b, v26.16b\n\t" @@ -24660,12 +24628,11 @@ int AES_GCM_decrypt_AARCH64_EOR3(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_gcm_decrypt_arm64_crypto_eor3_done_%=:\n\t" - "ldp x29, x30, [sp], #0x50\n\t" - : [out] "+r" (out), [sz] "+r" (sz), [nonceSz] "+r" (nonceSz), - [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), [key] "+r" (key), - [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), [reg] "+r" (reg), - [nr] "+r" (nr) - : [in] "r" (in), [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad) + : [in] "+r" (in), [out] "+r" (out), [sz] "+r" (sz), + [nonceSz] "+r" (nonceSz), [tagSz] "+r" (tagSz), [aadSz] "+r" (aadSz), + [key] "+r" (key), [gcm_h] "+r" (gcm_h), [tmp] "+r" (tmp), + [reg] "+r" (reg), [nr] "+r" (nr) + : [nonce] "r" (nonce), [tag] "r" (tag), [aad] "r" (aad) : "memory", "cc", "x14", "x15", "x16", "x17", "x19", "x20", "x21", "x22", "x23", "x24", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", @@ -24781,7 +24748,7 @@ void AES_GCM_init_AARCH64(byte* key, int nr, const byte* nonce, word32 nonceSz, "\n" "L_aes_gcm_init_arm64_crypto_partial_done_%=:\n\t" "eor x7, x7, x7\n\t" - "lsl x13, %x[nonceSz], #3\n\t" + "ubfiz x13, %x[nonceSz], #3, #32\n\t" "mov v7.d[0], x7\n\t" "mov v7.d[1], x13\n\t" "rev64 v7.16b, v7.16b\n\t" @@ -25299,8 +25266,6 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" @@ -29113,7 +29078,6 @@ void AES_GCM_encrypt_update_AARCH64(const byte* key, int nr, byte* out, "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" - "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) @@ -29134,10 +29098,10 @@ void AES_GCM_encrypt_final_AARCH64(byte* tag, byte* authTag, word32 tbytes, "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" - "lsl %x[abytes], %x[abytes], #3\n\t" + "ubfiz %x[abytes], %x[abytes], #3, #32\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" - "lsl %x[nbytes], %x[nbytes], #3\n\t" + "ubfiz %x[nbytes], %x[nbytes], #3, #32\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" @@ -29207,8 +29171,6 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" @@ -33022,7 +32984,6 @@ void AES_GCM_decrypt_update_AARCH64(const byte* key, int nr, byte* out, "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" - "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) @@ -33039,17 +33000,15 @@ void AES_GCM_decrypt_final_AARCH64(byte* tag, const byte* authTag, int* res) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ld1 {v5.2d}, [%x[tag]]\n\t" "movi v6.16b, #0x87\n\t" "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" - "lsl %x[abytes], %x[abytes], #3\n\t" + "ubfiz %x[abytes], %x[abytes], #3, #32\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" - "lsl %x[nbytes], %x[nbytes], #3\n\t" + "ubfiz %x[nbytes], %x[nbytes], #3, #32\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" @@ -33075,6 +33034,7 @@ void AES_GCM_decrypt_final_AARCH64(byte* tag, const byte* authTag, "b L_aes_gcm_decrypt_final_arm64_crypto_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_part_tag_%=:\n\t" + "ubfiz %x[tbytes], %x[tbytes], #0, #32\n\t" "eor v0.16b, v0.16b, v0.16b\n\t" "mov x10, %x[tbytes]\n\t" "st1 {v0.2d}, [%x[tag]]\n\t" @@ -33131,7 +33091,6 @@ void AES_GCM_decrypt_final_AARCH64(byte* tag, const byte* authTag, "and x8, x8, x11\n\t" "add w8, w8, #0xb4\n\t" "str w8, [%x[res]]\n\t" - "ldp x29, x30, [sp], #32\n\t" : [tag] "+r" (tag), [tbytes] "+r" (tbytes), [nbytes] "+r" (nbytes), [abytes] "+r" (abytes), [h] "+r" (h), [initCtr] "+r" (initCtr), [res] "+r" (res) @@ -33243,7 +33202,7 @@ void AES_GCM_init_AARCH64_EOR3(byte* key, int nr, const byte* nonce, "\n" "L_aes_gcm_init_arm64_crypto_eor3_partial_done_%=:\n\t" "eor x7, x7, x7\n\t" - "lsl x13, %x[nonceSz], #3\n\t" + "ubfiz x13, %x[nonceSz], #3, #32\n\t" "mov v7.d[0], x7\n\t" "mov v7.d[1], x13\n\t" "rev64 v7.16b, v7.16b\n\t" @@ -33741,8 +33700,6 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" @@ -37471,7 +37428,6 @@ void AES_GCM_encrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" - "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) @@ -37492,10 +37448,10 @@ void AES_GCM_encrypt_final_AARCH64_EOR3(byte* tag, byte* authTag, word32 tbytes, "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" - "lsl %x[abytes], %x[abytes], #3\n\t" + "ubfiz %x[abytes], %x[abytes], #3, #32\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" - "lsl %x[nbytes], %x[nbytes], #3\n\t" + "ubfiz %x[nbytes], %x[nbytes], #3, #32\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" @@ -37564,8 +37520,6 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, const byte* in, word32 nbytes, byte* tag, byte* h, byte* counter) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ld1 {v13.2d}, [%x[counter]]\n\t" "movi v27.16b, #0x87\n\t" "ld1 {v26.2d}, [%x[tag]]\n\t" @@ -41295,7 +41249,6 @@ void AES_GCM_decrypt_update_AARCH64_EOR3(const byte* key, int nr, byte* out, "mov v13.s[3], w9\n\t" "st1 {v26.2d}, [%x[tag]]\n\t" "st1 {v13.2d}, [%x[counter]]\n\t" - "ldp x29, x30, [sp], #32\n\t" : [nr] "+r" (nr), [out] "+r" (out), [nbytes] "+r" (nbytes), [tag] "+r" (tag), [h] "+r" (h), [counter] "+r" (counter) : [key] "r" (key), [in] "r" (in) @@ -41312,17 +41265,15 @@ void AES_GCM_decrypt_final_AARCH64_EOR3(byte* tag, const byte* authTag, int* res) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ld1 {v5.2d}, [%x[tag]]\n\t" "movi v6.16b, #0x87\n\t" "ld1 {v4.2d}, [%x[h]]\n\t" "ushr v6.2d, v6.2d, #56\n\t" "ld1 {v7.2d}, [%x[initCtr]]\n\t" - "lsl %x[abytes], %x[abytes], #3\n\t" + "ubfiz %x[abytes], %x[abytes], #3, #32\n\t" "rbit %x[abytes], %x[abytes]\n\t" "mov v0.d[0], %x[abytes]\n\t" - "lsl %x[nbytes], %x[nbytes], #3\n\t" + "ubfiz %x[nbytes], %x[nbytes], #3, #32\n\t" "rbit %x[nbytes], %x[nbytes]\n\t" "mov v0.d[1], %x[nbytes]\n\t" "eor v5.16b, v5.16b, v0.16b\n\t" @@ -41347,6 +41298,7 @@ void AES_GCM_decrypt_final_AARCH64_EOR3(byte* tag, const byte* authTag, "b L_aes_gcm_decrypt_final_arm64_crypto_eor3_tag_loaded_%=\n\t" "\n" "L_aes_gcm_decrypt_final_arm64_crypto_eor3_part_tag_%=:\n\t" + "ubfiz %x[tbytes], %x[tbytes], #0, #32\n\t" "eor v0.16b, v0.16b, v0.16b\n\t" "mov x10, %x[tbytes]\n\t" "st1 {v0.2d}, [%x[tag]]\n\t" @@ -41403,7 +41355,6 @@ void AES_GCM_decrypt_final_AARCH64_EOR3(byte* tag, const byte* authTag, "and x8, x8, x11\n\t" "add w8, w8, #0xb4\n\t" "str w8, [%x[res]]\n\t" - "ldp x29, x30, [sp], #32\n\t" : [tag] "+r" (tag), [tbytes] "+r" (tbytes), [nbytes] "+r" (nbytes), [abytes] "+r" (abytes), [h] "+r" (h), [initCtr] "+r" (initCtr), [res] "+r" (res) @@ -41421,8 +41372,6 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key2]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key2]], #0x40\n\t" "ld1 {v4.16b}, [%x[i]]\n\t" @@ -42432,7 +42381,6 @@ void AES_XTS_encrypt_AARCH64(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_xts_encrypt_arm64_crypto_done_%=:\n\t" - "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i) @@ -42448,8 +42396,6 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, const byte* i, byte* key, byte* key2, byte* tmp, int nr) { __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ld1 {v16.2d, v17.2d, v18.2d, v19.2d}, [%x[key2]], #0x40\n\t" "ld1 {v20.2d, v21.2d, v22.2d, v23.2d}, [%x[key2]], #0x40\n\t" "ld1 {v4.16b}, [%x[i]]\n\t" @@ -43554,7 +43500,6 @@ void AES_XTS_decrypt_AARCH64(const byte* in, byte* out, word32 sz, #endif /* !NO_AES_128 */ "\n" "L_aes_xts_decrypt_arm64_crypto_done_%=:\n\t" - "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i) @@ -48906,8 +48851,6 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, const word8* te = L_AES_ARM64_NEON_te; const word8* shuffle = L_AES_ARM64_NEON_shift_rows_shuffle; __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[te]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[te]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[te]], #0x40\n\t" @@ -49953,7 +49896,6 @@ void AES_XTS_encrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_AES_XTS_encrypt_NEON_data_done_%=:\n\t" - "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [te] "r" (te), [shuffle] "r" (shuffle) @@ -49974,8 +49916,6 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, const word8* shuffle = L_AES_ARM64_NEON_shift_rows_shuffle; const word8* invshuffle = L_AES_ARM64_NEON_shift_rows_invshuffle; __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [%[te]], #0x40\n\t" "ld1 {v20.16b, v21.16b, v22.16b, v23.16b}, [%[te]], #0x40\n\t" "ld1 {v24.16b, v25.16b, v26.16b, v27.16b}, [%[te]], #0x40\n\t" @@ -51485,7 +51425,6 @@ void AES_XTS_decrypt_NEON(const byte* in, byte* out, word32 sz, const byte* i, "st1 {v0.16b}, [%x[out]]\n\t" "\n" "L_AES_XTS_decrypt_NEON_data_done_%=:\n\t" - "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [te] "r" (te), [td] "r" (td), @@ -54671,8 +54610,6 @@ void AES_XTS_encrypt(const byte* in, byte* out, word32 sz, const byte* i, { const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "mov x9, #0x87\n\t" "mov x26, %x[key2]\n\t" "ldp x21, x22, [%x[i]]\n\t" @@ -55597,7 +55534,6 @@ void AES_XTS_encrypt(const byte* in, byte* out, word32 sz, const byte* i, "stp x10, x11, [%x[out]]\n\t" "\n" "L_AES_XTS_encrypt_done_data_%=:\n\t" - "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [te] "r" (te) @@ -55614,8 +55550,6 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, const word8* td4 = L_AES_ARM64_td4; const word32* te = L_AES_ARM64_te; __asm__ __volatile__ ( - "stp x29, x30, [sp, #-32]!\n\t" - "add x29, sp, #0\n\t" "ands w11, %w[sz], #15\n\t" "cset w11, ne\n\t" "lsl w11, w11, #4\n\t" @@ -56765,7 +56699,6 @@ void AES_XTS_decrypt(const byte* in, byte* out, word32 sz, const byte* i, "stp x12, x13, [%x[out]]\n\t" "\n" "L_AES_XTS_decrypt_done_data_%=:\n\t" - "ldp x29, x30, [sp], #32\n\t" : [out] "+r" (out), [sz] "+r" (sz), [key] "+r" (key), [key2] "+r" (key2), [tmp] "+r" (tmp), [nr] "+r" (nr) : [in] "r" (in), [i] "r" (i), [td] "r" (td), [td4] "r" (td4), diff --git a/wolfcrypt/src/port/arm/armv8-curve25519_c.c b/wolfcrypt/src/port/arm/armv8-curve25519_c.c index b8515b00549..7f1b14a4d62 100644 --- a/wolfcrypt/src/port/arm/armv8-curve25519_c.c +++ b/wolfcrypt/src/port/arm/armv8-curve25519_c.c @@ -38,7 +38,7 @@ #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) #include -void fe_init() +void fe_init(void) { __asm__ __volatile__ ( "\n\t" @@ -229,8 +229,8 @@ int fe_isnonzero(const fe a) "orr %x[a], x1, x2\n\t" "orr x3, x3, x4\n\t" "orr %x[a], %x[a], x3\n\t" + : [a] "+r" (a) : - : [a] "r" (a) : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6" ); return (word32)(size_t)a; @@ -248,8 +248,8 @@ int fe_isnegative(const fe a) "adc x5, x4, xzr\n\t" "and %x[a], x1, #1\n\t" "eor %x[a], %x[a], x5, lsr 63\n\t" + : [a] "+r" (a) : - : [a] "r" (a) : "memory", "cc", "x1", "x2", "x3", "x4", "x5", "x6" ); return (word32)(size_t)a; @@ -4362,7 +4362,7 @@ int curve25519_base(byte* r, const byte* n) /* Store */ "stp x14, x15, [%x[r]]\n\t" "stp x16, x17, [%x[r], #16]\n\t" - "mov x0, xzr\n\t" + "mov %x[r], xzr\n\t" "ldp x29, x30, [sp], #0xb0\n\t" : [r] "+r" (r) : [n] "r" (n), [x2] "r" (x2) @@ -6969,7 +6969,7 @@ int curve25519(byte* r, const byte* n, const byte* a) /* Store */ "stp x14, x15, [%x[r]]\n\t" "stp x16, x17, [%x[r], #16]\n\t" - "mov x0, xzr\n\t" + "mov %x[r], xzr\n\t" "ldp x29, x30, [sp], #0xc0\n\t" : [r] "+r" (r) : [n] "r" (n), [a] "r" (a) diff --git a/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c index 54a92c47fc1..0c00ffbb48d 100644 --- a/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c +++ b/wolfcrypt/src/port/arm/armv8-mlkem-asm_c.c @@ -8406,11 +8406,11 @@ int mlkem_cmp_neon(const byte* a, const byte* b, int sz) "orr v8.16b, v8.16b, v10.16b\n\t" "ext v9.16b, v8.16b, v8.16b, #8\n\t" "orr v8.16b, v8.16b, v9.16b\n\t" - "mov x0, v8.d[0]\n\t" - "subs x0, x0, xzr\n\t" - "csetm w0, ne\n\t" - : [sz] "+r" (sz) - : [a] "r" (a), [b] "r" (b) + "mov %x[a], v8.d[0]\n\t" + "subs %x[a], %x[a], xzr\n\t" + "csetm %w[a], ne\n\t" + : [a] "+r" (a), [sz] "+r" (sz) + : [b] "r" (b) : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8", "v9", "v10", "v11" ); @@ -9089,7 +9089,7 @@ unsigned int mlkem_rej_uniform_neon(sword16* p, unsigned int len, const byte* r, "b L_mlkem_rej_uniform_loop_lt_4_%=\n\t" "\n" "L_mlkem_rej_uniform_done_%=:\n\t" - "mov x0, x12\n\t" + "mov %x[p], x12\n\t" : [p] "+r" (p), [len] "+r" (len), [rLen] "+r" (rLen) : [r] "r" (r), [mask] "r" (mask), [q] "r" (q), [bits] "r" (bits), [indices] "r" (indices) diff --git a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c index 3738854f551..2e4358f1878 100644 --- a/wolfcrypt/src/port/arm/thumb2-curve25519_c.c +++ b/wolfcrypt/src/port/arm/thumb2-curve25519_c.c @@ -59,9 +59,9 @@ #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_init() +WC_OMIT_FRAME_POINTER void fe_init(void) #else -WC_OMIT_FRAME_POINTER void fe_init() +WC_OMIT_FRAME_POINTER void fe_init(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -81,9 +81,9 @@ WC_OMIT_FRAME_POINTER void fe_init() void fe_add_sub_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_add_sub_op() +WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) #else -WC_OMIT_FRAME_POINTER void fe_add_sub_op() +WC_OMIT_FRAME_POINTER void fe_add_sub_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -189,9 +189,9 @@ WC_OMIT_FRAME_POINTER void fe_add_sub_op() void fe_sub_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sub_op() +WC_OMIT_FRAME_POINTER void fe_sub_op(void) #else -WC_OMIT_FRAME_POINTER void fe_sub_op() +WC_OMIT_FRAME_POINTER void fe_sub_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -261,9 +261,9 @@ WC_OMIT_FRAME_POINTER void fe_sub(fe r, const fe a, const fe b) void fe_add_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_add_op() +WC_OMIT_FRAME_POINTER void fe_add_op(void) #else -WC_OMIT_FRAME_POINTER void fe_add_op() +WC_OMIT_FRAME_POINTER void fe_add_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -1764,9 +1764,9 @@ WC_OMIT_FRAME_POINTER void fe_cmov_table(fe* r, const fe* base, signed char b) #ifdef WOLFSSL_ARM_ARCH_7M void fe_mul_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_mul_op() +WC_OMIT_FRAME_POINTER void fe_mul_op(void) #else -WC_OMIT_FRAME_POINTER void fe_mul_op() +WC_OMIT_FRAME_POINTER void fe_mul_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -2155,9 +2155,9 @@ WC_OMIT_FRAME_POINTER void fe_mul_op() #else void fe_mul_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_mul_op() +WC_OMIT_FRAME_POINTER void fe_mul_op(void) #else -WC_OMIT_FRAME_POINTER void fe_mul_op() +WC_OMIT_FRAME_POINTER void fe_mul_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -2327,9 +2327,9 @@ WC_OMIT_FRAME_POINTER void fe_mul(fe r, const fe a, const fe b) #ifdef WOLFSSL_ARM_ARCH_7M void fe_sq_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sq_op() +WC_OMIT_FRAME_POINTER void fe_sq_op(void) #else -WC_OMIT_FRAME_POINTER void fe_sq_op() +WC_OMIT_FRAME_POINTER void fe_sq_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG @@ -2611,9 +2611,9 @@ WC_OMIT_FRAME_POINTER void fe_sq_op() #else void fe_sq_op(void); #ifndef WOLFSSL_NO_VAR_ASSIGN_REG -WC_OMIT_FRAME_POINTER void fe_sq_op() +WC_OMIT_FRAME_POINTER void fe_sq_op(void) #else -WC_OMIT_FRAME_POINTER void fe_sq_op() +WC_OMIT_FRAME_POINTER void fe_sq_op(void) #endif /* !WOLFSSL_NO_VAR_ASSIGN_REG */ { #ifndef WOLFSSL_NO_VAR_ASSIGN_REG diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index e95bba5ae5e..c854bceb5d7 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -15616,6 +15616,18 @@ static wc_test_ret_t aes_cbc_large_msg_test(Aes* enc, Aes* dec) /* Iterate from one WC_AES_BLOCK_SIZE of bigMsg through the whole * message by WC_AES_BLOCK_SIZE for each size of AES key. */ for (keySz = 16; keySz <= 32; keySz += 8) { + #ifdef NO_AES_128 + if (keySz == 16) + continue; + #endif + #ifdef NO_AES_192 + if (keySz == 24) + continue; + #endif + #ifdef NO_AES_256 + if (keySz == 32) + continue; + #endif for (msgSz = WC_AES_BLOCK_SIZE; msgSz <= sizeof(bigMsg); msgSz += WC_AES_BLOCK_SIZE) { @@ -16377,7 +16389,9 @@ static wc_test_ret_t aes_xts_partial_test_common(XtsAes *aes, const unsigned char *c2, word32 c2Sz) { wc_test_ret_t ret = 0; +#if defined(WOLFSSL_AESXTS_STREAM) || defined(HAVE_AES_DECRYPT) byte buf[WC_AES_BLOCK_SIZE * 2 + 8]; +#endif byte cipher[WC_AES_BLOCK_SIZE * 2 + 8]; #ifdef WOLFSSL_AESXTS_STREAM struct XtsAesStreamData stream; @@ -56006,7 +56020,9 @@ static wc_test_ret_t test_mldsa_decode_level(const byte* rawKey, byte* der = NULL; wc_MlDsaKey *key = NULL; #else +#if !defined(WOLFSSL_MLDSA_NO_ASN1) && defined(WOLFSSL_ASN_TEMPLATE) byte der[MLDSA_MAX_PRV_KEY_DER_SIZE]; +#endif wc_MlDsaKey key[1]; #endif diff --git a/wolfssl/wolfcrypt/sha3.h b/wolfssl/wolfcrypt/sha3.h index cfa699146b0..2c93d5682d6 100644 --- a/wolfssl/wolfcrypt/sha3.h +++ b/wolfssl/wolfcrypt/sha3.h @@ -50,24 +50,43 @@ #endif /* in bytes */ +/* Digest and block sizes are macros (like the other hash headers, e.g. + * sha256.h) rather than enum values so they are visible to the preprocessor - + * e.g. the WC_MIN_DIGEST_SIZE selection in hash.h evaluates them in #if. */ +#define WC_SHA3_224_DIGEST_SIZE 28 +#define WC_SHA3_256_DIGEST_SIZE 32 +#define WC_SHA3_384_DIGEST_SIZE 48 +#define WC_SHA3_512_DIGEST_SIZE 64 + +#if !defined(HAVE_SELFTEST) || \ + defined(HAVE_SELFTEST_VERSION) && (HAVE_SELFTEST_VERSION >= 2) +/* These values are used for HMAC, not SHA-3 directly. + * They come from from FIPS PUB 202. */ +#define WC_SHA3_128_BLOCK_SIZE 168 +#define WC_SHA3_224_BLOCK_SIZE 144 +#define WC_SHA3_256_BLOCK_SIZE 136 +#define WC_SHA3_384_BLOCK_SIZE 104 +#define WC_SHA3_512_BLOCK_SIZE 72 +#else +/* For SELFTEST version < 2, define WC_SHA3_128_BLOCK_SIZE + * for Kyber/Dilithium */ +#define WC_SHA3_128_BLOCK_SIZE 168 +#endif + enum { /* SHAKE-128 */ WC_SHA3_128_COUNT = 21, WC_SHA3_224 = WC_HASH_TYPE_SHA3_224, - WC_SHA3_224_DIGEST_SIZE = 28, WC_SHA3_224_COUNT = 18, WC_SHA3_256 = WC_HASH_TYPE_SHA3_256, - WC_SHA3_256_DIGEST_SIZE = 32, WC_SHA3_256_COUNT = 17, WC_SHA3_384 = WC_HASH_TYPE_SHA3_384, - WC_SHA3_384_DIGEST_SIZE = 48, WC_SHA3_384_COUNT = 13, WC_SHA3_512 = WC_HASH_TYPE_SHA3_512, - WC_SHA3_512_DIGEST_SIZE = 64, WC_SHA3_512_COUNT = 9, #ifdef WOLFSSL_SHAKE128 @@ -77,20 +96,6 @@ enum { WC_SHAKE256 = WC_HASH_TYPE_SHAKE256, #endif -#if !defined(HAVE_SELFTEST) || \ - defined(HAVE_SELFTEST_VERSION) && (HAVE_SELFTEST_VERSION >= 2) - /* These values are used for HMAC, not SHA-3 directly. - * They come from from FIPS PUB 202. */ - WC_SHA3_128_BLOCK_SIZE = 168, - WC_SHA3_224_BLOCK_SIZE = 144, - WC_SHA3_256_BLOCK_SIZE = 136, - WC_SHA3_384_BLOCK_SIZE = 104, - WC_SHA3_512_BLOCK_SIZE = 72, -#else - /* For SELFTEST version < 2, define WC_SHA3_128_BLOCK_SIZE - * for Kyber/Dilithium */ - WC_SHA3_128_BLOCK_SIZE = 168, -#endif WOLF_ENUM_DUMMY_LAST_ELEMENT(WC_SHA3) }; diff --git a/wolfssl/wolfcrypt/sha512.h b/wolfssl/wolfcrypt/sha512.h index 4491700a2f3..791c153048a 100644 --- a/wolfssl/wolfcrypt/sha512.h +++ b/wolfssl/wolfcrypt/sha512.h @@ -224,7 +224,9 @@ struct wc_Sha512 { #endif /* HAVE_FIPS */ -#if defined(WOLFSSL_SHA512) +/* SHA-384 reuses the SHA-512 transform, so these internal functions are + * needed whenever either algorithm is enabled. */ +#if defined(WOLFSSL_SHA512) || defined(WOLFSSL_SHA384) #ifdef WOLFSSL_ARMASM #if !defined(WOLFSSL_ARMASM_NO_NEON) @@ -239,8 +241,10 @@ WOLFSSL_LOCAL void Transform_Sha512_Len_crypto(wc_Sha512* sha512, WOLFSSL_LOCAL void Transform_Sha512_Len_base(wc_Sha512* sha512, const byte* data, word32 len); #endif -#endif +#endif /* WOLFSSL_ARMASM */ +#endif /* WOLFSSL_SHA512 || WOLFSSL_SHA384 */ +#if defined(WOLFSSL_SHA512) WOLFSSL_API int wc_InitSha512(wc_Sha512* sha); WOLFSSL_API int wc_InitSha512_ex(wc_Sha512* sha, void* heap, int devId); WOLFSSL_API int wc_Sha512Update(wc_Sha512* sha, const byte* data, word32 len); diff --git a/wolfssl/wolfcrypt/sp_int.h b/wolfssl/wolfcrypt/sp_int.h index 31936d40753..97c2265934d 100644 --- a/wolfssl/wolfcrypt/sp_int.h +++ b/wolfssl/wolfcrypt/sp_int.h @@ -453,9 +453,10 @@ typedef struct sp_dh_ctx { #ifdef WOLFSSL_MYSQL_COMPATIBLE /* MySQL wants to be able to use 8192-bit numbers. */ #define SP_INT_BITS 8192 - #elif !defined(WOLFSSL_HAVE_SP_RSA) && !defined(WOLFSSL_HAVE_SP_DH) && \ - !defined(WOLFSSL_HAVE_SP_ECC) - /* Not using SP - must be SP math all. */ + #elif defined(WOLFSSL_SP_MATH_ALL) || \ + (!defined(WOLFSSL_HAVE_SP_RSA) && \ + !defined(WOLFSSL_HAVE_SP_DH) && !defined(WOLFSSL_HAVE_SP_ECC)) + /* Using multi-precision implementation. */ #if !defined(NO_RSA) || !defined(NO_DH) || !defined(NO_DSA) /* Support max size FFHDE parameters compiled in. */ #if !defined(NO_DH) && defined(HAVE_FFDHE_8192)