Skip to content

Commit 6acf4f8

Browse files
committed
Merge branch 'main' of github.com:jedisct1/ipcrypt2
* 'main' of github.com:jedisct1/ipcrypt2: Improve parallelism in ipcrypt_pfx_encrypt_ip16()
2 parents e998c02 + 8b4e136 commit 6acf4f8

1 file changed

Lines changed: 71 additions & 32 deletions

File tree

src/ipcrypt2.c

Lines changed: 71 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -973,62 +973,101 @@ void
973973
ipcrypt_pfx_encrypt_ip16(const IPCryptPFX *ipcrypt, uint8_t ip16[16])
974974
{
975975
PFXState st;
976-
BlockVec e1, e2, e;
976+
BlockVec e1_0, e2_0, e_0, e1_1, e2_1, e_1;
977977
uint8_t encrypted_ip[16];
978-
uint8_t padded_prefix[16];
979-
uint8_t t[16];
978+
uint8_t padded_prefix_0[16], padded_prefix_1[16];
979+
uint8_t t_0[16], t_1[16];
980980
size_t i;
981-
unsigned int bit_pos;
981+
unsigned int bit_pos_0, bit_pos_1;
982982
unsigned int prefix_start = 0;
983983
unsigned int prefix_len_bits;
984-
uint8_t cipher_bit;
985-
uint8_t original_bit;
984+
uint8_t cipher_bit_0, cipher_bit_1;
985+
uint8_t original_bit_0, original_bit_1;
986986

987987
memcpy(&st, ipcrypt->opaque, sizeof st);
988988
if (ipcrypt_is_mapped_ipv4(ip16)) {
989989
prefix_start = 96;
990990
}
991991

992-
ipcrypt_pfx_pad_prefix(padded_prefix, prefix_start);
992+
ipcrypt_pfx_pad_prefix(padded_prefix_0, prefix_start);
993993

994994
memset(encrypted_ip, 0, 16);
995995
if (prefix_start == 96) {
996996
encrypted_ip[10] = 0xff;
997997
encrypted_ip[11] = 0xff;
998998
}
999999

1000-
for (prefix_len_bits = prefix_start; prefix_len_bits < 128; prefix_len_bits++) {
1000+
// Process two bits per iteration for better parallelism
1001+
for (prefix_len_bits = prefix_start; prefix_len_bits < 128; prefix_len_bits += 2) {
1002+
// Prepare padded_prefix_1 for the second iteration
1003+
memcpy(padded_prefix_1, padded_prefix_0, 16);
1004+
bit_pos_0 = 127 - prefix_len_bits;
1005+
original_bit_0 = ipcrypt_pfx_get_bit(ip16, bit_pos_0);
1006+
ipcrypt_pfx_shift_left(padded_prefix_1);
1007+
ipcrypt_pfx_set_bit(padded_prefix_1, 0, original_bit_0);
1008+
10011009
#ifdef AES_XENCRYPT
1002-
// For AArch64 with AES_XENCRYPT macros.
1003-
e1 = AES_XENCRYPT(LOAD128(padded_prefix), st.k1keys[0]);
1004-
e2 = AES_XENCRYPT(LOAD128(padded_prefix), st.k2keys[0]);
1010+
// For AArch64 with AES_XENCRYPT macros - process two encryptions in parallel
1011+
e1_0 = AES_XENCRYPT(LOAD128(padded_prefix_0), st.k1keys[0]);
1012+
e2_0 = AES_XENCRYPT(LOAD128(padded_prefix_0), st.k2keys[0]);
1013+
e1_1 = AES_XENCRYPT(LOAD128(padded_prefix_1), st.k1keys[0]);
1014+
e2_1 = AES_XENCRYPT(LOAD128(padded_prefix_1), st.k2keys[0]);
1015+
10051016
for (i = 1; i < ROUNDS - 1; i++) {
1006-
e1 = AES_XENCRYPT(e1, st.k1keys[i]);
1007-
e2 = AES_XENCRYPT(e2, st.k2keys[i]);
1017+
e1_0 = AES_XENCRYPT(e1_0, st.k1keys[i]);
1018+
e2_0 = AES_XENCRYPT(e2_0, st.k2keys[i]);
1019+
e1_1 = AES_XENCRYPT(e1_1, st.k1keys[i]);
1020+
e2_1 = AES_XENCRYPT(e2_1, st.k2keys[i]);
10081021
}
1009-
e1 = AES_XENCRYPTLAST(e1, st.k1keys[i]);
1010-
e2 = AES_XENCRYPTLAST(e2, st.k2keys[i]);
1011-
e1 = XOR128(e1, st.k1keys[ROUNDS]);
1012-
e2 = XOR128(e2, st.k2keys[ROUNDS]);
1022+
1023+
e1_0 = AES_XENCRYPTLAST(e1_0, st.k1keys[i]);
1024+
e2_0 = AES_XENCRYPTLAST(e2_0, st.k2keys[i]);
1025+
e1_1 = AES_XENCRYPTLAST(e1_1, st.k1keys[i]);
1026+
e2_1 = AES_XENCRYPTLAST(e2_1, st.k2keys[i]);
1027+
1028+
e1_0 = XOR128(e1_0, st.k1keys[ROUNDS]);
1029+
e2_0 = XOR128(e2_0, st.k2keys[ROUNDS]);
1030+
e1_1 = XOR128(e1_1, st.k1keys[ROUNDS]);
1031+
e2_1 = XOR128(e2_1, st.k2keys[ROUNDS]);
10131032
#else
1014-
// For x86_64 or a fallback.
1015-
e1 = XOR128(LOAD128(padded_prefix), st.k1keys[0]);
1016-
e2 = XOR128(LOAD128(padded_prefix), st.k2keys[0]);
1033+
// For x86_64 or a fallback - process two encryptions in parallel
1034+
e1_0 = XOR128(LOAD128(padded_prefix_0), st.k1keys[0]);
1035+
e2_0 = XOR128(LOAD128(padded_prefix_0), st.k2keys[0]);
1036+
e1_1 = XOR128(LOAD128(padded_prefix_1), st.k1keys[0]);
1037+
e2_1 = XOR128(LOAD128(padded_prefix_1), st.k2keys[0]);
1038+
10171039
for (i = 1; i < ROUNDS; i++) {
1018-
e1 = AES_ENCRYPT(e1, st.k1keys[i]);
1019-
e2 = AES_ENCRYPT(e2, st.k2keys[i]);
1040+
e1_0 = AES_ENCRYPT(e1_0, st.k1keys[i]);
1041+
e2_0 = AES_ENCRYPT(e2_0, st.k2keys[i]);
1042+
e1_1 = AES_ENCRYPT(e1_1, st.k1keys[i]);
1043+
e2_1 = AES_ENCRYPT(e2_1, st.k2keys[i]);
10201044
}
1021-
e1 = AES_ENCRYPTLAST(e1, st.k1keys[ROUNDS]);
1022-
e2 = AES_ENCRYPTLAST(e2, st.k2keys[ROUNDS]);
1045+
1046+
e1_0 = AES_ENCRYPTLAST(e1_0, st.k1keys[ROUNDS]);
1047+
e2_0 = AES_ENCRYPTLAST(e2_0, st.k2keys[ROUNDS]);
1048+
e1_1 = AES_ENCRYPTLAST(e1_1, st.k1keys[ROUNDS]);
1049+
e2_1 = AES_ENCRYPTLAST(e2_1, st.k2keys[ROUNDS]);
10231050
#endif
1024-
e = XOR128(e1, e2);
1025-
STORE128(t, e);
1026-
cipher_bit = t[15] & 1;
1027-
bit_pos = 127 - prefix_len_bits;
1028-
original_bit = ipcrypt_pfx_get_bit(ip16, bit_pos);
1029-
ipcrypt_pfx_set_bit(encrypted_ip, bit_pos, original_bit ^ cipher_bit);
1030-
ipcrypt_pfx_shift_left(padded_prefix);
1031-
ipcrypt_pfx_set_bit(padded_prefix, 0, original_bit);
1051+
1052+
// Process results for first bit
1053+
e_0 = XOR128(e1_0, e2_0);
1054+
STORE128(t_0, e_0);
1055+
cipher_bit_0 = t_0[15] & 1;
1056+
1057+
// Process results for second bit
1058+
e_1 = XOR128(e1_1, e2_1);
1059+
STORE128(t_1, e_1);
1060+
cipher_bit_1 = t_1[15] & 1;
1061+
bit_pos_1 = bit_pos_0 - 1;
1062+
original_bit_1 = ipcrypt_pfx_get_bit(ip16, bit_pos_1);
1063+
1064+
ipcrypt_pfx_set_bit(encrypted_ip, bit_pos_0, original_bit_0 ^ cipher_bit_0);
1065+
ipcrypt_pfx_set_bit(encrypted_ip, bit_pos_1, original_bit_1 ^ cipher_bit_1);
1066+
1067+
// Update padded_prefix_0 for next iteration
1068+
ipcrypt_pfx_shift_left(padded_prefix_1);
1069+
ipcrypt_pfx_set_bit(padded_prefix_1, 0, original_bit_1);
1070+
memcpy(padded_prefix_0, padded_prefix_1, 16);
10321071
}
10331072
memcpy(ip16, encrypted_ip, 16);
10341073
}

0 commit comments

Comments
 (0)