diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index bd15513f199d71..a67aa413906fac 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -637,6 +637,8 @@ class CodeGen final : public CodeGenInterface void genAmd64EmitterUnitTestsApx(); void genAmd64EmitterUnitTestsAvx10v2(); void genAmd64EmitterUnitTestsCCMP(); + void genAmd64EmitterUnitTestsCFCMOV(); + void genAmd64EmitterUnitTestsCTEST(); #endif #endif // defined(DEBUG) @@ -1652,9 +1654,11 @@ class CodeGen final : public CodeGenInterface static insOpts ShiftOpToInsOpts(genTreeOps op); #elif defined(TARGET_XARCH) static instruction JumpKindToCmov(emitJumpKind condition); +#ifdef TARGET_AMD64 static instruction JumpKindToCcmp(emitJumpKind condition); static insOpts OptsFromCFlags(insCflags flags); -#endif +#endif // TARGET_AMD64 +#endif // TARGET_XARCH void inst_JCC(GenCondition condition, BasicBlock* target); void inst_SETCC(GenCondition condition, var_types type, regNumber dstReg); diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index b93ca108bb29b4..9ab94d99448c94 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -2751,6 +2751,14 @@ void CodeGen::genEmitterUnitTests() { genAmd64EmitterUnitTestsCCMP(); } + if (unitTestSectionAll || (strstr(unitTestSection, "cfcmov") != nullptr)) + { + genAmd64EmitterUnitTestsCFCMOV(); + } + if (unitTestSectionAll || (strstr(unitTestSection, "ctest") != nullptr)) + { + genAmd64EmitterUnitTestsCTEST(); + } #elif defined(TARGET_ARM64) if (unitTestSectionAll || (strstr(unitTestSection, "general") != nullptr)) diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index b8d5ffb8f9452c..31b9d736ac63aa 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -1436,6 +1436,7 @@ instruction CodeGen::JumpKindToCmov(emitJumpKind condition) return s_table[condition]; } +#ifdef TARGET_AMD64 //------------------------------------------------------------------------ // JumpKindToCcmp: // Convert an emitJumpKind to the corresponding ccmp instruction. @@ -1475,6 +1476,7 @@ instruction CodeGen::JumpKindToCcmp(emitJumpKind condition) assert((condition >= EJ_NONE) && (condition < EJ_COUNT)); return s_table[condition]; } +#endif // TARGET_AMD64 //------------------------------------------------------------------------ // genCodeForCompare: Produce code for a GT_SELECT/GT_SELECTCC node. @@ -8668,8 +8670,9 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, regSet.verifyRegistersUsed(killMask); } +#ifdef TARGET_AMD64 //----------------------------------------------------------------------------------------- -// OptsFromCFlags - Convert condition flags into approxpriate insOpts. +// OptsFromCFlags - Convert condition flags into appropriate insOpts. // // Arguments: // flags - The condition flags to be converted. @@ -8679,7 +8682,7 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize, // // Notes: // This function maps the condition flags (e.g., CF, ZF, SF, OF) to the appropriate -// instruction options used for setting the default flag values in extneded EVEX +// instruction options used for setting the default flag values in extended EVEX // encoding conditional instructions. // insOpts CodeGen::OptsFromCFlags(insCflags flags) @@ -8696,8 +8699,6 @@ insOpts CodeGen::OptsFromCFlags(insCflags flags) return (insOpts)opts; } -#ifdef TARGET_AMD64 - //----------------------------------------------------------------------------------------- // genCodeForCCMP - Generate code for a conditional compare (CCMP) node. // @@ -8736,7 +8737,17 @@ void CodeGen::genCodeForCCMP(GenTreeCCMP* ccmp) if (op2->isContainedIntOrIImmed()) { GenTreeIntConCommon* intConst = op2->AsIntConCommon(); - emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts); + if (intConst->IconValue() == 0) + { + // ctest reg, reg is 1-byte shorter encoding than ccmp reg, 0. + static_assert((FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION) == 32); + instruction ctestIns = (instruction)(ccmpIns + FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION); + emit->emitIns_R_R(ctestIns, cmpSize, srcReg1, srcReg1, opts); + } + else + { + emit->emitIns_R_I(ccmpIns, cmpSize, srcReg1, (int)intConst->IconValue(), opts); + } } else { @@ -9340,6 +9351,100 @@ void CodeGen::genAmd64EmitterUnitTestsAvx10v2() theEmitter->emitIns_R_R(INS_vmovw_simd, EA_16BYTE, REG_XMM0, REG_XMM1); } +/***************************************************************************** + * Unit tests for the CFCMOV instructions. + */ + +void CodeGen::genAmd64EmitterUnitTestsCFCMOV() +{ + emitter* theEmitter = GetEmitter(); + genDefineTempLabel(genCreateTempLabel()); + + GenTreePhysReg physReg(REG_EDX); + physReg.SetRegNum(REG_EDX); + GenTreeIndir load = indirForm(TYP_INT, &physReg); + + // Test all CC codes + for (uint32_t ins = FIRST_CFCMOV_INSTRUCTION; ins <= LAST_CFCMOV_INSTRUCTION; ins++) + { + theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_RAX, REG_RCX, INS_OPTS_NONE); + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_NONE); + + theEmitter->emitIns_R_A((instruction)ins, EA_8BYTE, REG_EAX, &load, INS_OPTS_NONE); + theEmitter->emitIns_R_A((instruction)ins, EA_4BYTE, REG_EAX, &load, INS_OPTS_NONE); + + theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_EAX, REG_ECX, 4); + theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_EAX, REG_ECX, 4); + + theEmitter->emitIns_R_ARX((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 1, 0); + theEmitter->emitIns_R_ARX((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 1, 0); + theEmitter->emitIns_R_ARX((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, 4); + theEmitter->emitIns_R_ARX((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, 4); + + theEmitter->emitIns_AR_R((instruction)ins, EA_8BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_nf); + theEmitter->emitIns_AR_R((instruction)ins, EA_4BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_nf); + + theEmitter->emitIns_ARX_R((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, 4, INS_OPTS_EVEX_nf); + theEmitter->emitIns_ARX_R((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, 4, INS_OPTS_EVEX_nf); + + theEmitter->emitIns_ARX_R((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_NA, 2, 0, INS_OPTS_EVEX_nf); + theEmitter->emitIns_ARX_R((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_NA, 2, 0, INS_OPTS_EVEX_nf); + + theEmitter->emitIns_R_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_EAX, REG_ECX, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_EAX, REG_ECX, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_A((instruction)ins, EA_8BYTE, REG_R16, REG_R17, &load, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_A((instruction)ins, EA_4BYTE, REG_R16, REG_R17, &load, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + + theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R10, REG_R16, 0, 0, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R10, REG_R16, 0, 0, + (insOpts)(INS_OPTS_EVEX_nd | INS_OPTS_EVEX_nf)); + } + + // Test all CC codes + for (uint32_t ins = INS_cmovo; ins <= INS_cmovg; ins++) + { + theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_RAX, REG_RCX); + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX); + theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_RCX); + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_RCX); + theEmitter->emitIns_R_R((instruction)ins, EA_8BYTE, REG_R16, REG_RCX); + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_R16, REG_RCX); + theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_RAX, REG_RCX, 2); + theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, 2); + theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_R10, REG_RCX, 2); + theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_R10, REG_RCX, 2); + theEmitter->emitIns_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_RCX, 2); + theEmitter->emitIns_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_RCX, 2); + theEmitter->emitIns_R_S((instruction)ins, EA_8BYTE, REG_RAX, 0, 0); + theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_RAX, 0, 0); + theEmitter->emitIns_R_S((instruction)ins, EA_8BYTE, REG_R10, 0, 0); + theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_R10, 0, 0); + theEmitter->emitIns_R_S((instruction)ins, EA_8BYTE, REG_R16, 0, 0); + theEmitter->emitIns_R_S((instruction)ins, EA_4BYTE, REG_R16, 0, 0); + theEmitter->emitIns_R_R_R((instruction)ins, EA_8BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_R((instruction)ins, EA_4BYTE, REG_R10, REG_EAX, REG_ECX, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_8BYTE, REG_R16, REG_R17, REG_R18, 2, + (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_AR((instruction)ins, EA_4BYTE, REG_R16, REG_R17, REG_R18, 2, + (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_A((instruction)ins, EA_8BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_A((instruction)ins, EA_4BYTE, REG_R16, REG_R17, &load, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R17, REG_R10, 0, 0, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R17, REG_R10, 0, 0, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_S((instruction)ins, EA_8BYTE, REG_R17, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd)); + theEmitter->emitIns_R_R_S((instruction)ins, EA_4BYTE, REG_R17, REG_R16, 0, 0, (insOpts)(INS_OPTS_EVEX_nd)); + } +} + /***************************************************************************** * Unit tests for the CCMP instructions. */ @@ -9368,7 +9473,7 @@ void CodeGen::genAmd64EmitterUnitTestsCCMP() // Test all dfv for (int i = 0; i < 16; i++) { - theEmitter->emitIns_R_R(INS_ccmpe, EA_4BYTE, REG_RAX, REG_RCX, (insOpts)(i << INS_OPTS_EVEX_dfv_byte_offset)); + theEmitter->emitIns_R_R(INS_ccmpe, EA_4BYTE, REG_RAX, REG_RCX, (insOpts)(i << INS_OPTS_EVEX_dfv_shift)); } // ============ @@ -9390,7 +9495,7 @@ void CodeGen::genAmd64EmitterUnitTestsCCMP() // Test all dfv for (int i = 0; i < 16; i++) { - theEmitter->emitIns_R_S(INS_ccmpe, EA_4BYTE, REG_RAX, 0, 0, (insOpts)(i << INS_OPTS_EVEX_dfv_byte_offset)); + theEmitter->emitIns_R_S(INS_ccmpe, EA_4BYTE, REG_RAX, 0, 0, (insOpts)(i << INS_OPTS_EVEX_dfv_shift)); } // ============ @@ -9418,6 +9523,79 @@ void CodeGen::genAmd64EmitterUnitTestsCCMP() theEmitter->emitIns_R_C(INS_ccmpe, EA_4BYTE, REG_RAX, hnd, 4, INS_OPTS_EVEX_dfv_cf); } +/***************************************************************************** + * Unit tests for the CTEST instructions. + */ +void CodeGen::genAmd64EmitterUnitTestsCTEST() +{ + static_assert(FIRST_CTEST_INSTRUCTION - FIRST_CCMP_INSTRUCTION == 32); + emitter* theEmitter = GetEmitter(); + genDefineTempLabel(genCreateTempLabel()); + GenTreePhysReg physReg(REG_EDX); + physReg.SetRegNum(REG_EDX); + GenTreeIndir load = indirForm(TYP_INT, &physReg); + + // ============ + // Test RR form + // ============ + + // Test all sizes + theEmitter->emitIns_R_R(INS_test, EA_4BYTE, REG_EAX, REG_ECX); + theEmitter->emitIns_R_R(INS_cteste, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_cteste, EA_8BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_cteste, EA_2BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_R(INS_cteste, EA_1BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + + // Test all CC codes + for (uint32_t ins = FIRST_CTEST_INSTRUCTION; ins <= LAST_CTEST_INSTRUCTION; ins++) + { + theEmitter->emitIns_R_R((instruction)ins, EA_4BYTE, REG_RAX, REG_RCX, INS_OPTS_EVEX_dfv_cf); + } + + // Test all dfv + for (int i = 0; i < 16; i++) + { + theEmitter->emitIns_R_R(INS_cteste, EA_4BYTE, REG_RAX, REG_RCX, (insOpts)(i << INS_OPTS_EVEX_dfv_shift)); + } + + // ============ + // Test RI form (test small and large sizes and constants) + // ============ + + theEmitter->emitIns_R_I(INS_test, EA_8BYTE, REG_RAX, 123); + theEmitter->emitIns_R_I(INS_cteste, EA_8BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_cteste, EA_8BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_test, EA_4BYTE, REG_RAX, 123); + theEmitter->emitIns_R_I(INS_cteste, EA_4BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_cteste, EA_4BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_test, EA_2BYTE, REG_RAX, 123); + theEmitter->emitIns_R_I(INS_cteste, EA_2BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_cteste, EA_2BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + theEmitter->emitIns_R_I(INS_test, EA_1BYTE, REG_RAX, 123); + theEmitter->emitIns_R_I(INS_cteste, EA_1BYTE, REG_RAX, 123, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_R_I(INS_cteste, EA_1BYTE, REG_RAX, 270, INS_OPTS_EVEX_dfv_cf); + + // ============ + // Test MR form (test small and large sizes) + // ============ + + theEmitter->emitIns_AR_R(INS_cteste, EA_1BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_AR_R(INS_cteste, EA_2BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_AR_R(INS_cteste, EA_4BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_AR_R(INS_cteste, EA_8BYTE, REG_EAX, REG_ECX, 4, INS_OPTS_EVEX_dfv_cf); + + // ============ + // Test MI form + // ============ + + theEmitter->emitIns_I_AR(INS_cteste, EA_1BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_I_AR(INS_cteste, EA_2BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_I_AR(INS_cteste, EA_4BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf); + theEmitter->emitIns_I_AR(INS_cteste, EA_8BYTE, 123, REG_R18, 2, INS_OPTS_EVEX_dfv_cf); +} #endif // defined(DEBUG) && defined(TARGET_AMD64) #ifdef PROFILING_SUPPORTED diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 06eec05ce90b60..8a5c26af3dc0ce 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -942,60 +942,139 @@ bool emitter::DoJitUseApxNDD(instruction ins) const #endif } +inline bool emitter::IsApxConditionalInstruction(instruction ins) +{ +#ifdef TARGET_AMD64 + return (IsCCMP(ins) || IsCFCMOV(ins) || IsCTEST(ins)); +#else + return false; +#endif +} + inline bool emitter::IsCCMP(instruction ins) { +#ifdef TARGET_AMD64 return (ins >= FIRST_CCMP_INSTRUCTION && ins <= LAST_CCMP_INSTRUCTION); +#else + return false; +#endif +} + +inline bool emitter::IsCTEST(instruction ins) +{ +#ifdef TARGET_AMD64 + return (ins >= FIRST_CTEST_INSTRUCTION && ins <= LAST_CTEST_INSTRUCTION); +#else + return false; +#endif +} + +inline bool emitter::IsCFCMOV(instruction ins) +{ +#ifdef TARGET_AMD64 + return (ins >= FIRST_CFCMOV_INSTRUCTION && ins <= LAST_CFCMOV_INSTRUCTION); +#else + return false; +#endif } //------------------------------------------------------------------------ -// GetCCFromCCMP: Get a condition code from a ccmp instruction +// ImmCanUseSByteEncoding: Returns true if `val` fits in a sign-extended byte +// AND `ins` supports the imm8s (sign-extended-byte) immediate encoding. +// +// Arguments: +// ins - The instruction being encoded. +// val - The immediate value. +// +// Returns: +// True when the x86 imm8s short-immediate form is legal: the value can be +// sign-extended from one byte and the instruction does not unconditionally +// require a full-width immediate. MOV, TEST, and CTEST never use imm8s +// regardless of the value. +// +// Notes: +// Callers must additionally suppress the result when the immediate carries +// a relocation (relocs cannot be placed in a single byte). +// +/* static */ bool emitter::ImmCanUseSByteEncoding(instruction ins, ssize_t val) +{ + target_ssize_t targetVal = (target_ssize_t)val; + + if (targetVal != val) + { + return false; + } + // MOV, TEST, and CTEST do not have SByte encoding form. + return ((signed char)targetVal == targetVal) && (ins != INS_mov) && (ins != INS_test) && !IsCTEST(ins); +} + +//------------------------------------------------------------------------ +// GetCCFromCCMPOrCTEST: Get a condition code from a conditional instruction // // Arguments: // ins - The instruction to check. // // Returns: -// `insCC` representing the condition code for a ccmp instruction. -// ccmpx instructions share the same instruction encoding unlike +// `insCC` representing the condition code for a ccmp / ctest instruction. +// ccmpcc / ctestcc instructions share the same instruction encoding unlike // other x86 status bit instructions and instead have a CC coded into // the EVEX prefix. // -inline insCC emitter::GetCCFromCCMP(instruction ins) +inline insCC emitter::GetCCFromCCMPOrCTEST(instruction ins) { - assert(IsCCMP(ins)); + assert(IsCTEST(ins) || IsCCMP(ins)); switch (ins) { +#ifdef TARGET_AMD64 case INS_ccmpo: + case INS_ctesto: return INS_CC_O; case INS_ccmpno: + case INS_ctestno: return INS_CC_NO; case INS_ccmpb: + case INS_ctestb: return INS_CC_B; case INS_ccmpae: + case INS_ctestae: return INS_CC_AE; case INS_ccmpe: + case INS_cteste: return INS_CC_E; case INS_ccmpne: + case INS_ctestne: return INS_CC_NE; case INS_ccmpbe: + case INS_ctestbe: return INS_CC_BE; case INS_ccmpa: + case INS_ctesta: return INS_CC_A; case INS_ccmps: + case INS_ctests: return INS_CC_S; case INS_ccmpns: + case INS_ctestns: return INS_CC_NS; case INS_ccmpt: + case INS_ctestt: return INS_CC_TRUE; case INS_ccmpf: + case INS_ctestf: return INS_CC_FALSE; case INS_ccmpl: + case INS_ctestl: return INS_CC_L; case INS_ccmpge: + case INS_ctestge: return INS_CC_GE; case INS_ccmple: + case INS_ctestle: return INS_CC_LE; case INS_ccmpg: + case INS_ctestg: return INS_CC_G; +#endif default: unreached(); } @@ -2043,7 +2122,8 @@ bool emitter::TakesApxExtendedEvexPrefix(const instrDesc* id) const #if defined(DEBUG) if (m_compiler->DoJitStressPromotedEvexEncoding()) { - return true; + // EVEX prefixed CMOV has different semantics from non-EVEX CMOV, so we should not promote CMOV in stress mode. + return !insIsCMOV(ins); } #endif // DEBUG if (IsApxOnlyInstruction(ins)) @@ -2137,7 +2217,7 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt { if (!IsEvexEncodableInstruction(ins)) { - // Legacy-promoted insutrcions are not labeled with Encoding_EVEX. + // Legacy-promoted instructions are not labeled with Encoding_EVEX. code |= MAP4_IN_BYTE_EVEX_PREFIX; } @@ -2165,18 +2245,29 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt } #ifdef TARGET_AMD64 - if (IsCCMP(ins)) + if (IsCCMP(ins) || IsCTEST(ins)) { + // CCMP and CTEST have 2 special fields in the EVEX prefix: + // 1. Source condition code (SCC): EVEX.P3[3:0] — the instruction executes only when this condition is + // satisfied. + // 2. Default flags value (DFV): EVEX.P2[6:3] — the value written to EFLAGS when the SCC condition is not + // met. + // + // In the code_t packing (see DEFAULT_BYTE_EVEX_PREFIX), the EVEX prefix occupies bits [63:32], + // with P3 at bits [39:32] and P2 at bits [47:40]: + // SCC (EVEX.P3[3:0]) -> code_t bits [35:32] -> shift left by 32 + // DFV (EVEX.P2[6:3]) -> code_t bits [46:43] -> shift left by 43 + // The current mask clears code_t bits [42:32] before repopulating SCC and DFV below. code &= 0xFFFF87F0FFFFFFFF; - code |= ((size_t)id->idGetEvexDFV()) << 43; - code |= ((size_t)GetCCFromCCMP(ins)) << 32; + code |= ((size_t)GetCCFromCCMPOrCTEST(ins)) << 32; // SCC: EVEX.P3[3:0] = code_t[35:32] + code |= ((size_t)id->idGetEvexDFV()) << 43; // DFV: EVEX.P2[6:3] = code_t[46:43] } #endif return code; } - // No APX-NDD instructions should reach code below. + // All APX-promoted-EVEX instructions should be handled above, no APX extended EVEX instruction should reach here. assert(!IsApxExtendedEvexInstruction(ins)); if (attr == EA_32BYTE) @@ -2281,7 +2372,7 @@ emitter::code_t emitter::AddEvexPrefix(const instrDesc* id, code_t code, emitAtt default: { #ifdef TARGET_AMD64 - if (IsCCMP(id->idIns())) // Special case for conditional ins such as CCMP, CCMOV + if (IsCCMP(id->idIns()) || IsCTEST(id->idIns())) { break; } @@ -5051,7 +5142,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code, int val { instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); - bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); + bool valInByte = ImmCanUseSByteEncoding(ins, val); #ifdef TARGET_AMD64 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate @@ -5265,7 +5356,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var assert(id->idIns() != INS_invalid); instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); - bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); + bool valInByte = ImmCanUseSByteEncoding(ins, val); #ifdef TARGET_AMD64 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate @@ -5403,7 +5494,9 @@ UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code) // kmov instructions reach this path with EA_8BYTE size, even on x86 || IsKMOVInstruction(ins) // The prefetch instructions are always 3 bytes and have part of their modr/m byte hardcoded - || isPrefetch(ins)); + || isPrefetch(ins) + // cmov instructions reach this path with EA_2BYTE + || insIsCMOV(ins)); size = (attrSize == EA_2BYTE) && (ins == INS_cmpxchg) ? 4 : 3; } @@ -5582,7 +5675,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeAM(instrDesc* id, code_t code, int val assert(id->idIns() != INS_invalid); instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); - bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); + bool valInByte = ImmCanUseSByteEncoding(ins, val); // We should never generate BT mem,reg because it has poor performance. BT mem,imm might be useful // but it requires special handling of the immediate value (it is always encoded in a byte). @@ -5646,7 +5739,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code, int val { instruction ins = id->idIns(); UNATIVE_OFFSET valSize = EA_SIZE_IN_BYTES(id->idOpSize()); - bool valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test); + bool valInByte = ImmCanUseSByteEncoding(ins, val); #ifdef TARGET_AMD64 // mov reg, imm64 is the only opcode which takes a full 8 byte immediate @@ -7046,7 +7139,7 @@ void emitter::emitIns_R_I(instruction ins, UNATIVE_OFFSET sz; instrDesc* id; insFormat fmt = emitInsModeFormat(ins, IF_RRD_CNS); - bool valInByte = ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test); + bool valInByte = ImmCanUseSByteEncoding(ins, val); // BT reg,imm might be useful but it requires special handling of the immediate value // (it is always encoded in a byte). Let's not complicate things until this is needed. @@ -8268,8 +8361,8 @@ void emitter::emitIns_R_S_I( void emitter::emitIns_R_R_A( instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, insOpts instOptions) { - assert(IsSimdInstruction(ins)); - assert(IsThreeOperandAVXInstruction(ins)); + assert(IsSimdInstruction(ins) || IsApxExtendedEvexInstruction(ins)); + assert(IsThreeOperandAVXInstruction(ins) || IsApxExtendedEvexInstruction(ins)); ssize_t offs = indir->Offset(); instrDesc* id = emitNewInstrAmd(attr, offs); @@ -8280,8 +8373,13 @@ void emitter::emitIns_R_R_A( emitHandleMemOp(indir, id, (ins == INS_mulx) ? IF_RWR_RWR_ARD : emitInsModeFormat(ins, IF_RRD_RRD_ARD), ins); - SetEvexBroadcastIfNeeded(id, instOptions); - SetEvexEmbMaskIfNeeded(id, instOptions); + if (IsSimdInstruction(ins)) + { + SetEvexBroadcastIfNeeded(id, instOptions); + SetEvexEmbMaskIfNeeded(id, instOptions); + } + SetEvexNdIfNeeded(id, instOptions); + SetEvexNfIfNeeded(id, instOptions); UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); @@ -8290,10 +8388,11 @@ void emitter::emitIns_R_R_A( emitCurIGsize += sz; } -void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs) +void emitter::emitIns_R_R_AR( + instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs, insOpts instOptions) { - assert(IsSimdInstruction(ins)); - assert(IsThreeOperandAVXInstruction(ins)); + assert(IsSimdInstruction(ins) || IsApxExtendedEvexInstruction(ins)); + assert(IsThreeOperandAVXInstruction(ins) || IsApxExtendedEvexInstruction(ins)); instrDesc* id = emitNewInstrAmd(attr, offs); @@ -8305,6 +8404,9 @@ void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, reg id->idAddr()->iiaAddrMode.amBaseReg = base; id->idAddr()->iiaAddrMode.amIndxReg = REG_NA; + SetEvexNdIfNeeded(id, instOptions); + SetEvexNfIfNeeded(id, instOptions); + UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)); id->idCodeSize(sz); @@ -8981,7 +9083,8 @@ void emitter::emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE f * Add an instruction with a static member + constant. */ -void emitter::emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val) +void emitter::emitIns_C_I( + instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, int offs, int val, insOpts instOptions) { // Static always need relocs if (!jitStaticFldIsGlobAddr(fldHnd)) @@ -9250,6 +9353,9 @@ void emitter::emitIns_I_AR(instruction ins, emitAttr attr, int val, regNumber re assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly + SetEvexNfIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); + sz = emitInsSizeAM(id, insCodeMI(ins), val); id->idCodeSize(sz); @@ -9671,6 +9777,9 @@ void emitter::emitIns_ARX_R(instruction ins, id->idIns(ins); id->idInsFmt(fmt); + SetEvexNfIfNeeded(id, instOptions); + SetEvexDFVIfNeeded(id, instOptions); + id->idAddr()->iiaAddrMode.amBaseReg = base; id->idAddr()->iiaAddrMode.amIndxReg = index; id->idAddr()->iiaAddrMode.amScale = emitEncodeScale(scale); @@ -12784,7 +12893,7 @@ void emitter::emitDispIns( printf(" %-9s", sstr); #ifdef TARGET_AMD64 - if (IsCCMP(id->idIns())) + if (IsCCMP(id->idIns()) || IsCTEST(id->idIns())) { // print finite set notation for DFV unsigned dfv = id->idGetEvexDFV(); @@ -14441,7 +14550,8 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // Does the constant fit in a byte? // SSE/AVX do not need to modify opcode - if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test) + + if (ImmCanUseSByteEncoding(ins, cval) && !addc->cnsReloc) { if (id->idInsFmt() != IF_ARW_SHF && !IsSimdInstruction(ins)) { @@ -14700,6 +14810,12 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { dst += emitOutputByte(dst, 0x66); } + + if (IsCFCMOV(ins)) + { + // The opcode for CFCMOV does not follow the rule of "|= 0x01" to handle 16b/32b/64b operands. + break; + } FALLTHROUGH; } @@ -15424,7 +15540,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // Does the constant fit in a byte? // SSE/AVX/AVX512 do not need to modify opcode - if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test) + if (ImmCanUseSByteEncoding(ins, cval) && !addc->cnsReloc) { if ((id->idInsFmt() != IF_SRW_SHF) && (id->idInsFmt() != IF_RRW_SRD_CNS) && (id->idInsFmt() != IF_RWR_RRD_SRD_CNS) && !IsSimdInstruction(ins)) @@ -15974,7 +16090,7 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) { ssize_t cval = addc->cnsVal; // Does the constant fit in a byte? - if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test) + if (ImmCanUseSByteEncoding(ins, cval) && !addc->cnsReloc) { // SSE/AVX do not need to modify opcode if (id->idInsFmt() != IF_MRW_SHF && !IsSimdInstruction(ins)) @@ -16878,7 +16994,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) code = AddX86PrefixIfNeeded(id, code, size); code = insEncodeMRreg(id, code); - if (ins != INS_test && !IsShiftInstruction(ins)) + if (ins != INS_test && !IsShiftInstruction(ins) && !IsCFCMOV(ins) && !IsCTEST(ins)) { code |= 2; } @@ -16903,7 +17019,8 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) dst += emitOutputByte(dst, 0x66); } - code |= 0x1; + if (!IsCFCMOV(ins)) + code |= 0x1; break; case EA_4BYTE: @@ -16917,7 +17034,8 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) assert((code & EXTENDED_EVEX_PP_BITS) == 0); } #endif - code |= 0x1; + if (!IsCFCMOV(ins)) + code |= 0x1; break; #ifdef TARGET_AMD64 @@ -16934,7 +17052,8 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) } // Set the 'w' bit to get the large version - code |= 0x1; + if (!IsCFCMOV(ins)) + code |= 0x1; break; #endif // TARGET_AMD64 @@ -16975,11 +17094,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) regCode = insEncodeReg012(id, reg2, size, &code); } -#ifdef TARGET_AMD64 - if (TakesSimdPrefix(id) && !IsCCMP(ins)) -#else - if (TakesSimdPrefix(id)) -#endif + if (TakesSimdPrefix(id) && !IsApxConditionalInstruction(ins)) { // In case of AVX instructions that take 3 operands, we generally want to encode reg1 // as first source. In this case, reg1 is both a source and a destination. @@ -17130,7 +17245,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) case EA_2BYTE: case EA_4BYTE: // Set the 'w' bit to get the large version - code = insIsCMOV(ins) ? code : (code | (0x01)); + code = (insIsCMOV(ins) || IsCFCMOV(ins)) ? code : (code | (0x01)); break; #ifdef TARGET_AMD64 @@ -17140,7 +17255,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) code = AddRexWPrefix(id, code); // TODO-APX : Revisit. does xor or other cases need to be handled // differently? see emitOutputRR // Set the 'w' bit to get the large version - code = insIsCMOV(ins) ? code : (code | (0x01)); + code = (insIsCMOV(ins) || IsCFCMOV(ins)) ? code : (code | (0x01)); break; #endif // TARGET_AMD64 @@ -17242,7 +17357,7 @@ BYTE* emitter::emitOutputRI(BYTE* dst, instrDesc* id) instruction ins = id->idIns(); regNumber reg = id->idReg1(); ssize_t val = emitGetInsSC(id); - bool valInByte = ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test); + bool valInByte = ImmCanUseSByteEncoding(ins, val); assert(!id->idHasReg2()); @@ -18951,7 +19066,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) code = insEncodeMIreg(id, mReg, size, code); rReg = REG_NA; ssize_t val = emitGetInsSC(id); - bool valInByte = ((signed char)val == (target_ssize_t)val) && (ins != INS_mov) && (ins != INS_test); + bool valInByte = ImmCanUseSByteEncoding(ins, val); switch (size) { diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 38ade6a38f3930..adf76b712476aa 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -659,7 +659,7 @@ void SetEvexDFVIfNeeded(instrDesc* id, insOpts instOptions) if ((instOptions & INS_OPTS_EVEX_dfv_MASK) != 0) { assert(UsePromotedEVEXEncoding()); - assert(IsCCMP(id->idIns())); + assert(IsCCMP(id->idIns()) || IsCTEST(id->idIns())); id->idSetEvexDFV(instOptions); } #endif @@ -777,8 +777,12 @@ static bool IsRexW1Instruction(instruction ins); static bool IsRexWXInstruction(instruction ins); static bool IsRexW1EvexInstruction(instruction ins); +static bool IsApxConditionalInstruction(instruction ins); static bool IsCCMP(instruction ins); -static insCC GetCCFromCCMP(instruction ins); +static bool IsCTEST(instruction ins); +static bool IsCFCMOV(instruction ins); +static bool ImmCanUseSByteEncoding(instruction ins, ssize_t val); +static insCC GetCCFromCCMPOrCTEST(instruction ins); bool isAvx512Blendv(instruction ins) { @@ -967,7 +971,13 @@ void emitIns_R_R_A(instruction ins, GenTreeIndir* indir, insOpts instOptions = INS_OPTS_NONE); -void emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber base, int offs); +void emitIns_R_R_AR(instruction ins, + emitAttr attr, + regNumber reg1, + regNumber reg2, + regNumber base, + int offs, + insOpts instOptions = INS_OPTS_NONE); void emitIns_R_AR_R(instruction ins, emitAttr attr, @@ -1095,7 +1105,12 @@ void emitIns_R_C(instruction ins, void emitIns_C_R(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fldHnd, regNumber reg, int offs); -void emitIns_C_I(instruction ins, emitAttr attr, CORINFO_FIELD_HANDLE fdlHnd, int offs, int val); +void emitIns_C_I(instruction ins, + emitAttr attr, + CORINFO_FIELD_HANDLE fldHnd, + int offs, + int val, + insOpts instOptions = INS_OPTS_NONE); void emitIns_IJ(emitAttr attr, regNumber reg, unsigned base); diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 1c0fbb35371a41..e17233cc251abe 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -295,8 +295,8 @@ enum insOpts: unsigned // One-bit: 0b1000_0000 INS_OPTS_EVEX_nf_MASK = 0x80, // mask for APX-EVEX.nf related features - INS_OPTS_EVEX_nf = 1 << 7, // NDD form for legacy instructions - INS_OPTS_EVEX_dfv_byte_offset = 8, // save the bit offset for first dfv flag pos + INS_OPTS_EVEX_nf = 1 << 7, // No-Flag for legacy instructions + INS_OPTS_EVEX_dfv_shift = 8, // bit shift for the first dfv flag position INS_OPTS_EVEX_dfv_cf = 1 << 8, INS_OPTS_EVEX_dfv_zf = 1 << 9, diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 53e3e28d3adc7d..cceeea809d6c08 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -102,24 +102,24 @@ INST3(movsx, "movsx", IUM_WR, BAD_CODE, BAD_CODE, #ifdef TARGET_AMD64 INST3(movsxd, "movsxd", IUM_WR, BAD_CODE, BAD_CODE, 0x000063, ZERO, 4X, INS_TT_NONE, REX_W1 | Encoding_REX2) #endif -INST3(movzx, "movzx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00B6, ZERO, 4X, INS_TT_NONE, INS_FLAGS_Has_Wbit | Encoding_REX2) - -INST3(cmovo, "cmovo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, 1C, 2X, INS_TT_NONE, Reads_OF | Encoding_REX2) -INST3(cmovno, "cmovno", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, 1C, 2X, INS_TT_NONE, Reads_OF | Encoding_REX2) -INST3(cmovb, "cmovb", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, 1C, 2X, INS_TT_NONE, Reads_CF | Encoding_REX2) -INST3(cmovae, "cmovae", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, 1C, 2X, INS_TT_NONE, Reads_CF | Encoding_REX2) -INST3(cmove, "cmove", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, 1C, 2X, INS_TT_NONE, Reads_ZF | Encoding_REX2) -INST3(cmovne, "cmovne", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, 1C, 2X, INS_TT_NONE, Reads_ZF | Encoding_REX2) -INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | Encoding_REX2) -INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | Encoding_REX2) -INST3(cmovs, "cmovs", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, 1C, 2X, INS_TT_NONE, Reads_SF | Encoding_REX2) -INST3(cmovns, "cmovns", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, 1C, 2X, INS_TT_NONE, Reads_SF | Encoding_REX2) -INST3(cmovp, "cmovp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, 1C, 2X, INS_TT_NONE, Reads_PF | Encoding_REX2) -INST3(cmovnp, "cmovnp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, 1C, 2X, INS_TT_NONE, Reads_PF | Encoding_REX2) -INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Encoding_REX2) -INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Encoding_REX2) -INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | Encoding_REX2) -INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | Encoding_REX2) +INST3(movzx, "movzx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00B6, ZERO, 4X, INS_TT_NONE, INS_FLAGS_Has_Wbit | Encoding_REX2) + +INST3(cmovo, "cmovo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, 1C, 2X, INS_TT_NONE, Reads_OF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovno, "cmovno", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, 1C, 2X, INS_TT_NONE, Reads_OF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovb, "cmovb", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, 1C, 2X, INS_TT_NONE, Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovae, "cmovae", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, 1C, 2X, INS_TT_NONE, Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmove, "cmove", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, 1C, 2X, INS_TT_NONE, Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovne, "cmovne", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, 1C, 2X, INS_TT_NONE, Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovs, "cmovs", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, 1C, 2X, INS_TT_NONE, Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovns, "cmovns", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, 1C, 2X, INS_TT_NONE, Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovp, "cmovp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, 1C, 2X, INS_TT_NONE, Reads_PF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovnp, "cmovnp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, 1C, 2X, INS_TT_NONE, Reads_PF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD) +INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | Encoding_REX2 | INS_Flags_Has_NDD) INST3(xchg, "xchg", IUM_RW, 0x000086, BAD_CODE, 0x000086, ILLEGAL, ILLEGAL, INS_TT_NONE, INS_FLAGS_Has_Wbit | Encoding_REX2) INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, 3C, 1C, INS_TT_NONE, Writes_OF | Undefined_SF | Undefined_ZF | Undefined_AF | Undefined_PF | Writes_CF | INS_FLAGS_Has_Sbit | INS_Flags_Has_NDD | INS_Flags_Has_NF | Encoding_REX2) @@ -1141,6 +1141,7 @@ INST3(vucomxss, "vucomxss", IUM_RD, BAD_CODE, BAD_ // id nm um mr mi rm lat tp tt flags +#ifdef TARGET_AMD64 #define FIRST_APX_INSTRUCTION INS_ccmpo #define FIRST_CCMP_INSTRUCTION INS_ccmpo INST3(ccmpo, "ccmpo", IUM_RD, 0x000038, 0x0003880, 0x00003A, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) @@ -1160,6 +1161,42 @@ INST3(ccmpge, "ccmpge", IUM_RD, 0x000038, 0x0003880, 0x INST3(ccmple, "ccmple", IUM_RD, 0x000038, 0x0003880, 0x00003A, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) INST3(ccmpg, "ccmpg", IUM_RD, 0x000038, 0x0003880, 0x00003A, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) #define LAST_CCMP_INSTRUCTION INS_ccmpg +#define FIRST_CFCMOV_INSTRUCTION INS_cfcmovo +INST3(cfcmovo, "cfcmovo", IUM_WR, 0x000040, BAD_CODE, 0x000040, 1C, 2X, INS_TT_NONE, Reads_OF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovno, "cfcmovno", IUM_WR, 0x000041, BAD_CODE, 0x000041, 1C, 2X, INS_TT_NONE, Reads_OF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovb, "cfcmovb", IUM_WR, 0x000042, BAD_CODE, 0x000042, 1C, 2X, INS_TT_NONE, Reads_CF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovae, "cfcmovae", IUM_WR, 0x000043, BAD_CODE, 0x000043, 1C, 2X, INS_TT_NONE, Reads_CF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmove, "cfcmove", IUM_WR, 0x000044, BAD_CODE, 0x000044, 1C, 2X, INS_TT_NONE, Reads_ZF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovne, "cfcmovne", IUM_WR, 0x000045, BAD_CODE, 0x000045, 1C, 2X, INS_TT_NONE, Reads_ZF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovbe, "cfcmovbe", IUM_WR, 0x000046, BAD_CODE, 0x000046, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmova, "cfcmova", IUM_WR, 0x000047, BAD_CODE, 0x000047, 1C, 2X, INS_TT_NONE, Reads_ZF | Reads_CF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovs, "cfcmovs", IUM_WR, 0x000048, BAD_CODE, 0x000048, 1C, 2X, INS_TT_NONE, Reads_SF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovns, "cfcmovns", IUM_WR, 0x000049, BAD_CODE, 0x000049, 1C, 2X, INS_TT_NONE, Reads_SF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovp, "cfcmovp", IUM_WR, 0x00004A, BAD_CODE, 0x00004A, 1C, 2X, INS_TT_NONE, Reads_PF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovnp, "cfcmovnp", IUM_WR, 0x00004B, BAD_CODE, 0x00004B, 1C, 2X, INS_TT_NONE, Reads_PF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovl, "cfcmovl", IUM_WR, 0x00004C, BAD_CODE, 0x00004C, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovge, "cfcmovge", IUM_WR, 0x00004D, BAD_CODE, 0x00004D, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovle, "cfcmovle", IUM_WR, 0x00004E, BAD_CODE, 0x00004E, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +INST3(cfcmovg, "cfcmovg", IUM_WR, 0x00004F, BAD_CODE, 0x00004F, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF | INS_Flags_Has_NDD | INS_Flags_Has_NF) +#define LAST_CFCMOV_INSTRUCTION INS_cfcmovg +#define FIRST_CTEST_INSTRUCTION INS_ctesto +INST3(ctesto, "ctesto", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestno, "ctestno", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestb, "ctestb", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestae, "ctestae", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(cteste, "cteste", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestne, "ctestne", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestbe, "ctestbe", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctesta, "ctesta", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctests, "ctests", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestns, "ctestns", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestt, "ctestt", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestf, "ctestf", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestl, "ctestl", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestge, "ctestge", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestle, "ctestle", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +INST3(ctestg, "ctestg", IUM_RD, 0x000084, 0x00000F6, BAD_CODE, 1C, 2X, INS_TT_NONE, Writes_OF | Writes_SF | Writes_ZF | Writes_CF | INS_FLAGS_Has_Sbit) +#define LAST_CTEST_INSTRUCTION INS_ctestg INST3(crc32_apx, "crc32", IUM_RW, BAD_CODE, BAD_CODE, 0x0000F0, 3C, 1C, INS_TT_NONE, INS_FLAGS_None) INST3(movbe_apx, "movbe", IUM_WR, 0x000061, BAD_CODE, 0x000060, 1C, 2X, INS_TT_NONE, INS_FLAGS_None) @@ -1180,6 +1217,7 @@ INST3(setge_apx, "setzuge", IUM_WR, SSEDBLMAP(4, 0x4D), BAD_COD INST3(setle_apx, "setzule", IUM_WR, SSEDBLMAP(4, 0x4E), BAD_CODE, BAD_CODE, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF) INST3(setg_apx, "setzug", IUM_WR, SSEDBLMAP(4, 0x4F), BAD_CODE, BAD_CODE, 1C, 2X, INS_TT_NONE, Reads_OF | Reads_SF | Reads_ZF) #define LAST_APX_INSTRUCTION INS_setg_apx +#endif // TARGET_AMD64 // Scalar instructions in SSE4.2 INST3(crc32, "crc32", IUM_RW, BAD_CODE, BAD_CODE, PSSE38(0xF2, 0xF0), 3C, 1C, INS_TT_NONE, INS_FLAGS_None)