@@ -973,62 +973,101 @@ void
973973ipcrypt_pfx_encrypt_ip16 (const IPCryptPFX * ipcrypt , uint8_t ip16 [16 ])
974974{
975975 PFXState st ;
976- BlockVec e1 , e2 , e ;
976+ BlockVec e1_0 , e2_0 , e_0 , e1_1 , e2_1 , e_1 ;
977977 uint8_t encrypted_ip [16 ];
978- uint8_t padded_prefix [16 ];
979- uint8_t t [16 ];
978+ uint8_t padded_prefix_0 [ 16 ], padded_prefix_1 [16 ];
979+ uint8_t t_0 [ 16 ], t_1 [16 ];
980980 size_t i ;
981- unsigned int bit_pos ;
981+ unsigned int bit_pos_0 , bit_pos_1 ;
982982 unsigned int prefix_start = 0 ;
983983 unsigned int prefix_len_bits ;
984- uint8_t cipher_bit ;
985- uint8_t original_bit ;
984+ uint8_t cipher_bit_0 , cipher_bit_1 ;
985+ uint8_t original_bit_0 , original_bit_1 ;
986986
987987 memcpy (& st , ipcrypt -> opaque , sizeof st );
988988 if (ipcrypt_is_mapped_ipv4 (ip16 )) {
989989 prefix_start = 96 ;
990990 }
991991
992- ipcrypt_pfx_pad_prefix (padded_prefix , prefix_start );
992+ ipcrypt_pfx_pad_prefix (padded_prefix_0 , prefix_start );
993993
994994 memset (encrypted_ip , 0 , 16 );
995995 if (prefix_start == 96 ) {
996996 encrypted_ip [10 ] = 0xff ;
997997 encrypted_ip [11 ] = 0xff ;
998998 }
999999
1000- for (prefix_len_bits = prefix_start ; prefix_len_bits < 128 ; prefix_len_bits ++ ) {
1000+ // Process two bits per iteration for better parallelism
1001+ for (prefix_len_bits = prefix_start ; prefix_len_bits < 128 ; prefix_len_bits += 2 ) {
1002+ // Prepare padded_prefix_1 for the second iteration
1003+ memcpy (padded_prefix_1 , padded_prefix_0 , 16 );
1004+ bit_pos_0 = 127 - prefix_len_bits ;
1005+ original_bit_0 = ipcrypt_pfx_get_bit (ip16 , bit_pos_0 );
1006+ ipcrypt_pfx_shift_left (padded_prefix_1 );
1007+ ipcrypt_pfx_set_bit (padded_prefix_1 , 0 , original_bit_0 );
1008+
10011009#ifdef AES_XENCRYPT
1002- // For AArch64 with AES_XENCRYPT macros.
1003- e1 = AES_XENCRYPT (LOAD128 (padded_prefix ), st .k1keys [0 ]);
1004- e2 = AES_XENCRYPT (LOAD128 (padded_prefix ), st .k2keys [0 ]);
1010+ // For AArch64 with AES_XENCRYPT macros - process two encryptions in parallel
1011+ e1_0 = AES_XENCRYPT (LOAD128 (padded_prefix_0 ), st .k1keys [0 ]);
1012+ e2_0 = AES_XENCRYPT (LOAD128 (padded_prefix_0 ), st .k2keys [0 ]);
1013+ e1_1 = AES_XENCRYPT (LOAD128 (padded_prefix_1 ), st .k1keys [0 ]);
1014+ e2_1 = AES_XENCRYPT (LOAD128 (padded_prefix_1 ), st .k2keys [0 ]);
1015+
10051016 for (i = 1 ; i < ROUNDS - 1 ; i ++ ) {
1006- e1 = AES_XENCRYPT (e1 , st .k1keys [i ]);
1007- e2 = AES_XENCRYPT (e2 , st .k2keys [i ]);
1017+ e1_0 = AES_XENCRYPT (e1_0 , st .k1keys [i ]);
1018+ e2_0 = AES_XENCRYPT (e2_0 , st .k2keys [i ]);
1019+ e1_1 = AES_XENCRYPT (e1_1 , st .k1keys [i ]);
1020+ e2_1 = AES_XENCRYPT (e2_1 , st .k2keys [i ]);
10081021 }
1009- e1 = AES_XENCRYPTLAST (e1 , st .k1keys [i ]);
1010- e2 = AES_XENCRYPTLAST (e2 , st .k2keys [i ]);
1011- e1 = XOR128 (e1 , st .k1keys [ROUNDS ]);
1012- e2 = XOR128 (e2 , st .k2keys [ROUNDS ]);
1022+
1023+ e1_0 = AES_XENCRYPTLAST (e1_0 , st .k1keys [i ]);
1024+ e2_0 = AES_XENCRYPTLAST (e2_0 , st .k2keys [i ]);
1025+ e1_1 = AES_XENCRYPTLAST (e1_1 , st .k1keys [i ]);
1026+ e2_1 = AES_XENCRYPTLAST (e2_1 , st .k2keys [i ]);
1027+
1028+ e1_0 = XOR128 (e1_0 , st .k1keys [ROUNDS ]);
1029+ e2_0 = XOR128 (e2_0 , st .k2keys [ROUNDS ]);
1030+ e1_1 = XOR128 (e1_1 , st .k1keys [ROUNDS ]);
1031+ e2_1 = XOR128 (e2_1 , st .k2keys [ROUNDS ]);
10131032#else
1014- // For x86_64 or a fallback.
1015- e1 = XOR128 (LOAD128 (padded_prefix ), st .k1keys [0 ]);
1016- e2 = XOR128 (LOAD128 (padded_prefix ), st .k2keys [0 ]);
1033+ // For x86_64 or a fallback - process two encryptions in parallel
1034+ e1_0 = XOR128 (LOAD128 (padded_prefix_0 ), st .k1keys [0 ]);
1035+ e2_0 = XOR128 (LOAD128 (padded_prefix_0 ), st .k2keys [0 ]);
1036+ e1_1 = XOR128 (LOAD128 (padded_prefix_1 ), st .k1keys [0 ]);
1037+ e2_1 = XOR128 (LOAD128 (padded_prefix_1 ), st .k2keys [0 ]);
1038+
10171039 for (i = 1 ; i < ROUNDS ; i ++ ) {
1018- e1 = AES_ENCRYPT (e1 , st .k1keys [i ]);
1019- e2 = AES_ENCRYPT (e2 , st .k2keys [i ]);
1040+ e1_0 = AES_ENCRYPT (e1_0 , st .k1keys [i ]);
1041+ e2_0 = AES_ENCRYPT (e2_0 , st .k2keys [i ]);
1042+ e1_1 = AES_ENCRYPT (e1_1 , st .k1keys [i ]);
1043+ e2_1 = AES_ENCRYPT (e2_1 , st .k2keys [i ]);
10201044 }
1021- e1 = AES_ENCRYPTLAST (e1 , st .k1keys [ROUNDS ]);
1022- e2 = AES_ENCRYPTLAST (e2 , st .k2keys [ROUNDS ]);
1045+
1046+ e1_0 = AES_ENCRYPTLAST (e1_0 , st .k1keys [ROUNDS ]);
1047+ e2_0 = AES_ENCRYPTLAST (e2_0 , st .k2keys [ROUNDS ]);
1048+ e1_1 = AES_ENCRYPTLAST (e1_1 , st .k1keys [ROUNDS ]);
1049+ e2_1 = AES_ENCRYPTLAST (e2_1 , st .k2keys [ROUNDS ]);
10231050#endif
1024- e = XOR128 (e1 , e2 );
1025- STORE128 (t , e );
1026- cipher_bit = t [15 ] & 1 ;
1027- bit_pos = 127 - prefix_len_bits ;
1028- original_bit = ipcrypt_pfx_get_bit (ip16 , bit_pos );
1029- ipcrypt_pfx_set_bit (encrypted_ip , bit_pos , original_bit ^ cipher_bit );
1030- ipcrypt_pfx_shift_left (padded_prefix );
1031- ipcrypt_pfx_set_bit (padded_prefix , 0 , original_bit );
1051+
1052+ // Process results for first bit
1053+ e_0 = XOR128 (e1_0 , e2_0 );
1054+ STORE128 (t_0 , e_0 );
1055+ cipher_bit_0 = t_0 [15 ] & 1 ;
1056+
1057+ // Process results for second bit
1058+ e_1 = XOR128 (e1_1 , e2_1 );
1059+ STORE128 (t_1 , e_1 );
1060+ cipher_bit_1 = t_1 [15 ] & 1 ;
1061+ bit_pos_1 = bit_pos_0 - 1 ;
1062+ original_bit_1 = ipcrypt_pfx_get_bit (ip16 , bit_pos_1 );
1063+
1064+ ipcrypt_pfx_set_bit (encrypted_ip , bit_pos_0 , original_bit_0 ^ cipher_bit_0 );
1065+ ipcrypt_pfx_set_bit (encrypted_ip , bit_pos_1 , original_bit_1 ^ cipher_bit_1 );
1066+
1067+ // Update padded_prefix_0 for next iteration
1068+ ipcrypt_pfx_shift_left (padded_prefix_1 );
1069+ ipcrypt_pfx_set_bit (padded_prefix_1 , 0 , original_bit_1 );
1070+ memcpy (padded_prefix_0 , padded_prefix_1 , 16 );
10321071 }
10331072 memcpy (ip16 , encrypted_ip , 16 );
10341073}
0 commit comments