From 06dda1571dcc1a9e93508061e83b19474c1ad143 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 21 May 2021 10:41:12 -0700 Subject: [PATCH 01/10] Correctly track how x86 instructions read/write flags --- src/coreclr/jit/instr.h | 35 +++- src/coreclr/jit/instrsxarch.h | 310 +++++++++++++++++----------------- 2 files changed, 183 insertions(+), 162 deletions(-) diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index ed001fdc1bc722..9799d34b8260cb 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -86,14 +86,35 @@ enum GCtype : unsigned }; #if defined(TARGET_XARCH) -enum insFlags: uint8_t +enum insFlags : uint32_t { - INS_FLAGS_None = 0x00, - INS_FLAGS_ReadsFlags = 0x01, - INS_FLAGS_WritesFlags = 0x02, - INS_FLAGS_x87Instr = 0x04, - INS_Flags_IsDstDstSrcAVXInstruction = 0x08, - INS_Flags_IsDstSrcSrcAVXInstruction = 0x10, + INS_FLAGS_None = 0, + + INS_FLAGS_ReadsCF = 1 << 0, + INS_FLAGS_ReadsPF = 1 << 1, + INS_FLAGS_ReadsAF = 1 << 2, + INS_FLAGS_ReadsZF = 1 << 3, + INS_FLAGS_ReadsSF = 1 << 4, + INS_FLAGS_ReadsDF = 1 << 5, + INS_FLAGS_ReadsOF = 1 << 6, + INS_FLAGS_ReadsAllFlagsExceptAF = INS_FLAGS_ReadsCF | INS_FLAGS_ReadsPF | INS_FLAGS_ReadsZF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF, + INS_FLAGS_ReadsAllFlags = INS_FLAGS_ReadsAF | INS_FLAGS_ReadsAllFlagsExceptAF, + + INS_FLAGS_WritesCF = 1 << 7, + INS_FLAGS_WritesPF = 1 << 8, + INS_FLAGS_WritesAF = 1 << 9, + INS_FLAGS_WritesZF = 1 << 10, + INS_FLAGS_WritesSF = 1 << 11, + INS_FLAGS_WritesDF = 1 << 12, + INS_FLAGS_WritesOF = 1 << 13, + INS_FLAGS_WritesAllFlagsExceptCF = INS_FLAGS_WritesPF | INS_FLAGS_WritesAF | INS_FLAGS_WritesZF | INS_FLAGS_WritesSF | INS_FLAGS_WritesOF, + INS_FLAGS_WritesAllFlagsExceptOF = INS_FLAGS_WritesCF | INS_FLAGS_WritesPF | INS_FLAGS_WritesAF | INS_FLAGS_WritesZF | INS_FLAGS_WritesSF, + INS_FLAGS_WritesAllFlags = INS_FLAGS_WritesCF | INS_FLAGS_WritesAllFlagsExceptCF, + + INS_FLAGS_x87Instr = 1 << 14, + + INS_Flags_IsDstDstSrcAVXInstruction = 1 << 15, + INS_Flags_IsDstSrcSrcAVXInstruction = 1 << 16, // TODO-Cleanup: Remove this flag and its usage from TARGET_XARCH INS_FLAGS_DONT_CARE = 0x00, diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 25e5de73caf746..6b082c6ff149b2 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -57,25 +57,25 @@ INST5(pop, "pop", IUM_WR, 0x00008E, BAD_CODE, INST5(push_hide, "push", IUM_RD, 0x0030FE, 0x000068, BAD_CODE, BAD_CODE, 0x000050, INS_FLAGS_None) INST5(pop_hide, "pop", IUM_WR, 0x00008E, BAD_CODE, BAD_CODE, BAD_CODE, 0x000058, INS_FLAGS_None) -INST5(inc, "inc", IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000040, INS_FLAGS_WritesFlags) -INST5(inc_l, "inc", IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C0FE, INS_FLAGS_WritesFlags) -INST5(dec, "dec", IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000048, INS_FLAGS_WritesFlags) -INST5(dec_l, "dec", IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C8FE, INS_FLAGS_WritesFlags) +INST5(inc, "inc", IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000040, INS_FLAGS_WritesAllFlagsExceptCF) +INST5(inc_l, "inc", IUM_RW, 0x0000FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C0FE, INS_FLAGS_WritesAllFlagsExceptCF) +INST5(dec, "dec", IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x000048, INS_FLAGS_WritesAllFlagsExceptCF) +INST5(dec_l, "dec", IUM_RW, 0x0008FE, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C8FE, INS_FLAGS_WritesAllFlagsExceptCF) // Multi-byte opcodes without modrm are represented in mixed endian fashion. // See comment around quarter way through this file for more information. INST5(bswap, "bswap", IUM_RW, 0x0F00C8, BAD_CODE, BAD_CODE, BAD_CODE, 0x00C80F, INS_FLAGS_None) // id nm um mr mi rm a4 flags -INST4(add, "add", IUM_RW, 0x000000, 0x000080, 0x000002, 0x000004, INS_FLAGS_WritesFlags) -INST4(or, "or", IUM_RW, 0x000008, 0x000880, 0x00000A, 0x00000C, INS_FLAGS_WritesFlags) -INST4(adc, "adc", IUM_RW, 0x000010, 0x001080, 0x000012, 0x000014, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) -INST4(sbb, "sbb", IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) -INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, INS_FLAGS_WritesFlags) -INST4(sub, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, INS_FLAGS_WritesFlags) -INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, INS_FLAGS_WritesFlags) -INST4(cmp, "cmp", IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, INS_FLAGS_WritesFlags) -INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, INS_FLAGS_WritesFlags) +INST4(add, "add", IUM_RW, 0x000000, 0x000080, 0x000002, 0x000004, INS_FLAGS_WritesAllFlags) +INST4(or, "or", IUM_RW, 0x000008, 0x000880, 0x00000A, 0x00000C, INS_FLAGS_WritesAllFlags) // CF = 0, AF = ?, OF = 0 +INST4(adc, "adc", IUM_RW, 0x000010, 0x001080, 0x000012, 0x000014, INS_FLAGS_ReadsCF | INS_FLAGS_WritesAllFlags) +INST4(sbb, "sbb", IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, INS_FLAGS_ReadsCF | INS_FLAGS_WritesAllFlags) +INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, INS_FLAGS_WritesAllFlags) // CF = 0, AF = ?, OF = 0 +INST4(sub, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, INS_FLAGS_WritesAllFlags) +INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, INS_FLAGS_WritesAllFlags) // CF = 0, AF = ?, OF = 0 +INST4(cmp, "cmp", IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, INS_FLAGS_WritesAllFlags) +INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, INS_FLAGS_WritesAllFlags) // CF = 0, AF = ?, OF = 0 INST4(mov, "mov", IUM_WR, 0x000088, 0x0000C6, 0x00008A, 0x0000B0, INS_FLAGS_None) INST4(lea, "lea", IUM_WR, BAD_CODE, BAD_CODE, 0x00008D, BAD_CODE, INS_FLAGS_None) @@ -84,10 +84,10 @@ INST4(lea, "lea", IUM_WR, BAD_CODE, BAD_CODE, // Note that emitter has only partial support for BT. It can only emit the reg,reg form // and the registers need to be reversed to get the correct encoding. -INST3(bt, "bt", IUM_RD, 0x0F00A3, BAD_CODE, 0x0F00A3, INS_FLAGS_WritesFlags) +INST3(bt, "bt", IUM_RD, 0x0F00A3, BAD_CODE, 0x0F00A3, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ?, OF = ? -INST3(bsf, "bsf", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BC, INS_FLAGS_WritesFlags) -INST3(bsr, "bsr", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BD, INS_FLAGS_WritesFlags) +INST3(bsf, "bsf", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BC, INS_FLAGS_WritesAllFlags) // CF = ?, PF = ?, AF = ?, SF = ?, OF = ? +INST3(bsr, "bsr", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BD, INS_FLAGS_WritesAllFlags) // CF = ?, PF = ?, AF = ?, SF = ?, OF = ? INST3(movsx, "movsx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00BE, INS_FLAGS_None) #ifdef TARGET_AMD64 @@ -95,25 +95,25 @@ INST3(movsxd, "movsxd", IUM_WR, BAD_CODE, BAD_CODE, #endif INST3(movzx, "movzx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00B6, INS_FLAGS_None) -INST3(cmovo, "cmovo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, INS_FLAGS_ReadsFlags) -INST3(cmovno, "cmovno", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, INS_FLAGS_ReadsFlags) -INST3(cmovb, "cmovb", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, INS_FLAGS_ReadsFlags) -INST3(cmovae, "cmovae", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, INS_FLAGS_ReadsFlags) -INST3(cmove, "cmove", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, INS_FLAGS_ReadsFlags) -INST3(cmovne, "cmovne", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, INS_FLAGS_ReadsFlags) -INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, INS_FLAGS_ReadsFlags) -INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, INS_FLAGS_ReadsFlags) -INST3(cmovs, "cmovs", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, INS_FLAGS_ReadsFlags) -INST3(cmovns, "cmovns", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, INS_FLAGS_ReadsFlags) -INST3(cmovp, "cmovp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, INS_FLAGS_ReadsFlags) -INST3(cmovnp, "cmovnp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, INS_FLAGS_ReadsFlags) -INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, INS_FLAGS_ReadsFlags) -INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, INS_FLAGS_ReadsFlags) -INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, INS_FLAGS_ReadsFlags) -INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, INS_FLAGS_ReadsFlags) +INST3(cmovo, "cmovo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovno, "cmovno", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovb, "cmovb", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovae, "cmovae", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmove, "cmove", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovne, "cmovne", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovs, "cmovs", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovns, "cmovns", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovp, "cmovp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovnp, "cmovnp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, INS_FLAGS_ReadsAllFlagsExceptAF) INST3(xchg, "xchg", IUM_RW, 0x000086, BAD_CODE, 0x000086, INS_FLAGS_None) -INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, INS_FLAGS_WritesFlags) // op1 *= op2 +INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? // id nm um mr mi rm flags @@ -121,25 +121,25 @@ INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, // as 2-operand instructions with the target register being implicit // implicit_reg = op1*op2_icon #define INSTMUL INST3 -INSTMUL(imul_AX, "imul", IUM_RD, BAD_CODE, 0x000068, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_CX, "imul", IUM_RD, BAD_CODE, 0x000868, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_DX, "imul", IUM_RD, BAD_CODE, 0x001068, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_BX, "imul", IUM_RD, BAD_CODE, 0x001868, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_SP, "imul", IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_BP, "imul", IUM_RD, BAD_CODE, 0x002868, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_SI, "imul", IUM_RD, BAD_CODE, 0x003068, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_DI, "imul", IUM_RD, BAD_CODE, 0x003868, BAD_CODE, INS_FLAGS_WritesFlags) +INSTMUL(imul_AX, "imul", IUM_RD, BAD_CODE, 0x000068, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_CX, "imul", IUM_RD, BAD_CODE, 0x000868, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_DX, "imul", IUM_RD, BAD_CODE, 0x001068, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_BX, "imul", IUM_RD, BAD_CODE, 0x001868, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_SP, "imul", IUM_RD, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_BP, "imul", IUM_RD, BAD_CODE, 0x002868, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_SI, "imul", IUM_RD, BAD_CODE, 0x003068, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_DI, "imul", IUM_RD, BAD_CODE, 0x003868, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? #ifdef TARGET_AMD64 -INSTMUL(imul_08, "imul", IUM_RD, BAD_CODE, 0x4400000068, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_09, "imul", IUM_RD, BAD_CODE, 0x4400000868, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_10, "imul", IUM_RD, BAD_CODE, 0x4400001068, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_11, "imul", IUM_RD, BAD_CODE, 0x4400001868, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_12, "imul", IUM_RD, BAD_CODE, 0x4400002068, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_13, "imul", IUM_RD, BAD_CODE, 0x4400002868, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_14, "imul", IUM_RD, BAD_CODE, 0x4400003068, BAD_CODE, INS_FLAGS_WritesFlags) -INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868, BAD_CODE, INS_FLAGS_WritesFlags) +INSTMUL(imul_08, "imul", IUM_RD, BAD_CODE, 0x4400000068, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_09, "imul", IUM_RD, BAD_CODE, 0x4400000868, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_10, "imul", IUM_RD, BAD_CODE, 0x4400001068, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_11, "imul", IUM_RD, BAD_CODE, 0x4400001868, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_12, "imul", IUM_RD, BAD_CODE, 0x4400002068, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_13, "imul", IUM_RD, BAD_CODE, 0x4400002868, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_14, "imul", IUM_RD, BAD_CODE, 0x4400003068, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INSTMUL(imul_15, "imul", IUM_RD, BAD_CODE, 0x4400003868, BAD_CODE, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? #endif // TARGET_AMD64 @@ -301,10 +301,10 @@ INST3(cvttpd2dq, "cvttpd2dq", IUM_WR, BAD_CODE, BAD_CODE, INST3(cvtdq2pd, "cvtdq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_FLAGS_None) // cvt packed DWORDs to doubles // SSE2 comparison instructions -INST3(comiss, "comiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), INS_FLAGS_None) // ordered compare singles -INST3(comisd, "comisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), INS_FLAGS_None) // ordered compare doubles -INST3(ucomiss, "ucomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), INS_FLAGS_None) // unordered compare singles -INST3(ucomisd, "ucomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), INS_FLAGS_None) // unordered compare doubles +INST3(comiss, "comiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), INS_FLAGS_WritesAllFlags) // ordered compare singles: AF = 0, SF = 0, OF = 0 +INST3(comisd, "comisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), INS_FLAGS_WritesAllFlags) // ordered compare doubles: AF = 0, SF = 0, OF = 0 +INST3(ucomiss, "ucomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), INS_FLAGS_WritesAllFlags) // unordered compare singles: AF = 0, SF = 0, OF = 0 +INST3(ucomisd, "ucomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), INS_FLAGS_WritesAllFlags) // unordered compare doubles: AF = 0, SF = 0, OF = 0 // SSE2 packed single/double comparison operations. // Note that these instructions not only compare but also overwrite the first source. @@ -593,10 +593,10 @@ INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, // BMI2 INST3(rorx, "rorx", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xF0), INS_FLAGS_None) -INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit -INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract -INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Zero High Bits Starting with Specified Bit Position -INST3(mulx, "mulx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF6), INS_Flags_IsDstDstSrcAVXInstruction) // Unsigned Multiply Without Affecting Flags +INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit +INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract +INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_FLAGS_WritesAllFlags | INS_Flags_IsDstDstSrcAVXInstruction) // Zero High Bits Starting with Specified Bit Position: PF = ?, AF = ?, OF = 0 +INST3(mulx, "mulx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF6), INS_Flags_IsDstDstSrcAVXInstruction) // Unsigned Multiply Without Affecting Flags INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) @@ -609,61 +609,61 @@ INST3(crc32, "crc32", IUM_WR, BAD_CODE, BAD_CODE, INST3(tzcnt, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBC), INS_FLAGS_None) // Count the Number of Trailing Zero Bits // LZCNT -INST3(lzcnt, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBD), INS_FLAGS_None) +INST3(lzcnt, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBD), INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, SF = ? OF = ? // POPCNT -INST3(popcnt, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), INS_FLAGS_None) +INST3(popcnt, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), INS_FLAGS_WritesAllFlags) // CF = 0, PF = 0, AF = 0, SF = 0, OF = 0 // id nm um mr mi flags INST2(ret, "ret", IUM_RD, 0x0000C3, 0x0000C2, INS_FLAGS_None) INST2(loop, "loop", IUM_RD, BAD_CODE, 0x0000E2, INS_FLAGS_None) -INST2(call, "call", IUM_RD, 0x0010FF, 0x0000E8, INS_FLAGS_WritesFlags) - -INST2(rol, "rol", IUM_RW, 0x0000D2, BAD_CODE, INS_FLAGS_WritesFlags) -INST2(rol_1, "rol", IUM_RW, 0x0000D0, 0x0000D0, INS_FLAGS_WritesFlags) -INST2(rol_N, "rol", IUM_RW, 0x0000C0, 0x0000C0, INS_FLAGS_WritesFlags) -INST2(ror, "ror", IUM_RW, 0x0008D2, BAD_CODE, INS_FLAGS_WritesFlags) -INST2(ror_1, "ror", IUM_RW, 0x0008D0, 0x0008D0, INS_FLAGS_WritesFlags) -INST2(ror_N, "ror", IUM_RW, 0x0008C0, 0x0008C0, INS_FLAGS_WritesFlags) - -INST2(rcl, "rcl", IUM_RW, 0x0010D2, BAD_CODE, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) -INST2(rcl_1, "rcl", IUM_RW, 0x0010D0, 0x0010D0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) -INST2(rcl_N, "rcl", IUM_RW, 0x0010C0, 0x0010C0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) -INST2(rcr, "rcr", IUM_RW, 0x0018D2, BAD_CODE, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) -INST2(rcr_1, "rcr", IUM_RW, 0x0018D0, 0x0018D0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) -INST2(rcr_N, "rcr", IUM_RW, 0x0018C0, 0x0018C0, INS_FLAGS_ReadsFlags | INS_FLAGS_WritesFlags) -INST2(shl, "shl", IUM_RW, 0x0020D2, BAD_CODE, INS_FLAGS_WritesFlags) -INST2(shl_1, "shl", IUM_RW, 0x0020D0, 0x0020D0, INS_FLAGS_WritesFlags) -INST2(shl_N, "shl", IUM_RW, 0x0020C0, 0x0020C0, INS_FLAGS_WritesFlags) -INST2(shr, "shr", IUM_RW, 0x0028D2, BAD_CODE, INS_FLAGS_WritesFlags) -INST2(shr_1, "shr", IUM_RW, 0x0028D0, 0x0028D0, INS_FLAGS_WritesFlags) -INST2(shr_N, "shr", IUM_RW, 0x0028C0, 0x0028C0, INS_FLAGS_WritesFlags) -INST2(sar, "sar", IUM_RW, 0x0038D2, BAD_CODE, INS_FLAGS_WritesFlags) -INST2(sar_1, "sar", IUM_RW, 0x0038D0, 0x0038D0, INS_FLAGS_WritesFlags) -INST2(sar_N, "sar", IUM_RW, 0x0038C0, 0x0038C0, INS_FLAGS_WritesFlags) +INST2(call, "call", IUM_RD, 0x0010FF, 0x0000E8, INS_FLAGS_None) + +INST2(rol, "rol", IUM_RW, 0x0000D2, BAD_CODE, INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) // OF = ? +INST2(rol_1, "rol", IUM_RW, 0x0000D0, 0x0000D0, INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) +INST2(rol_N, "rol", IUM_RW, 0x0000C0, 0x0000C0, INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) // OF = ? +INST2(ror, "ror", IUM_RW, 0x0008D2, BAD_CODE, INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) // OF = ? +INST2(ror_1, "ror", IUM_RW, 0x0008D0, 0x0008D0, INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) +INST2(ror_N, "ror", IUM_RW, 0x0008C0, 0x0008C0, INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) // OF = ? + +INST2(rcl, "rcl", IUM_RW, 0x0010D2, BAD_CODE, INS_FLAGS_ReadsCF | INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) // OF = ? +INST2(rcl_1, "rcl", IUM_RW, 0x0010D0, 0x0010D0, INS_FLAGS_ReadsCF | INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) +INST2(rcl_N, "rcl", IUM_RW, 0x0010C0, 0x0010C0, INS_FLAGS_ReadsCF | INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) // OF = ? +INST2(rcr, "rcr", IUM_RW, 0x0018D2, BAD_CODE, INS_FLAGS_ReadsCF | INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) // OF = ? +INST2(rcr_1, "rcr", IUM_RW, 0x0018D0, 0x0018D0, INS_FLAGS_ReadsCF | INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) +INST2(rcr_N, "rcr", IUM_RW, 0x0018C0, 0x0018C0, INS_FLAGS_ReadsCF | INS_FLAGS_WritesCF | INS_FLAGS_WritesOF) // OF = ? +INST2(shl, "shl", IUM_RW, 0x0020D2, BAD_CODE, INS_FLAGS_WritesAllFlags) // AF = ?, OF = ? +INST2(shl_1, "shl", IUM_RW, 0x0020D0, 0x0020D0, INS_FLAGS_WritesAllFlags) // AF = ? +INST2(shl_N, "shl", IUM_RW, 0x0020C0, 0x0020C0, INS_FLAGS_WritesAllFlags) // AF = ?, OF = ? +INST2(shr, "shr", IUM_RW, 0x0028D2, BAD_CODE, INS_FLAGS_WritesAllFlags) // AF = ?, OF = ? +INST2(shr_1, "shr", IUM_RW, 0x0028D0, 0x0028D0, INS_FLAGS_WritesAllFlags) // AF = ? +INST2(shr_N, "shr", IUM_RW, 0x0028C0, 0x0028C0, INS_FLAGS_WritesAllFlags) // AF = ?, OF = ? +INST2(sar, "sar", IUM_RW, 0x0038D2, BAD_CODE, INS_FLAGS_WritesAllFlags) // AF = ?, OF = ? +INST2(sar_1, "sar", IUM_RW, 0x0038D0, 0x0038D0, INS_FLAGS_WritesAllFlags) // AF = ? +INST2(sar_N, "sar", IUM_RW, 0x0038C0, 0x0038C0, INS_FLAGS_WritesAllFlags) // AF = ?, OF = ? // id nm um mr flags -INST1(r_movsb, "rep movsb", IUM_RD, 0x00A4F3, INS_FLAGS_None) -INST1(r_movsd, "rep movsd", IUM_RD, 0x00A5F3, INS_FLAGS_None) +INST1(r_movsb, "rep movsb", IUM_RD, 0x00A4F3, INS_FLAGS_ReadsDF) +INST1(r_movsd, "rep movsd", IUM_RD, 0x00A5F3, INS_FLAGS_ReadsDF) #if defined(TARGET_AMD64) -INST1(r_movsq, "rep movsq", IUM_RD, 0xF3A548, INS_FLAGS_None) +INST1(r_movsq, "rep movsq", IUM_RD, 0xF3A548, INS_FLAGS_ReadsDF) #endif // defined(TARGET_AMD64) -INST1(movsb, "movsb", IUM_RD, 0x0000A4, INS_FLAGS_None) -INST1(movsd, "movsd", IUM_RD, 0x0000A5, INS_FLAGS_None) +INST1(movsb, "movsb", IUM_RD, 0x0000A4, INS_FLAGS_ReadsDF) +INST1(movsd, "movsd", IUM_RD, 0x0000A5, INS_FLAGS_ReadsDF) #if defined(TARGET_AMD64) -INST1(movsq, "movsq", IUM_RD, 0x00A548, INS_FLAGS_None) +INST1(movsq, "movsq", IUM_RD, 0x00A548, INS_FLAGS_ReadsDF) #endif // defined(TARGET_AMD64) -INST1(r_stosb, "rep stosb", IUM_RD, 0x00AAF3, INS_FLAGS_None) -INST1(r_stosd, "rep stosd", IUM_RD, 0x00ABF3, INS_FLAGS_None) +INST1(r_stosb, "rep stosb", IUM_RD, 0x00AAF3, INS_FLAGS_ReadsDF) +INST1(r_stosd, "rep stosd", IUM_RD, 0x00ABF3, INS_FLAGS_ReadsDF) #if defined(TARGET_AMD64) -INST1(r_stosq, "rep stosq", IUM_RD, 0xF3AB48, INS_FLAGS_None) +INST1(r_stosq, "rep stosq", IUM_RD, 0xF3AB48, INS_FLAGS_ReadsDF) #endif // defined(TARGET_AMD64) -INST1(stosb, "stosb", IUM_RD, 0x0000AA, INS_FLAGS_None) -INST1(stosd, "stosd", IUM_RD, 0x0000AB, INS_FLAGS_None) +INST1(stosb, "stosb", IUM_RD, 0x0000AA, INS_FLAGS_ReadsDF) +INST1(stosd, "stosd", IUM_RD, 0x0000AB, INS_FLAGS_ReadsDF) #if defined(TARGET_AMD64) -INST1(stosq, "stosq", IUM_RD, 0x00AB48, INS_FLAGS_None) +INST1(stosq, "stosq", IUM_RD, 0x00AB48, INS_FLAGS_ReadsDF) #endif // defined(TARGET_AMD64) INST1(int3, "int3", IUM_RD, 0x0000CC, INS_FLAGS_None) @@ -672,22 +672,22 @@ INST1(lock, "lock", IUM_RD, 0x0000F0, INST1(leave, "leave", IUM_RD, 0x0000C9, INS_FLAGS_None) -INST1(neg, "neg", IUM_RW, 0x0018F6, INS_FLAGS_WritesFlags) -INST1(not, "not", IUM_RW, 0x0010F6, INS_FLAGS_WritesFlags) +INST1(neg, "neg", IUM_RW, 0x0018F6, INS_FLAGS_WritesAllFlags) +INST1(not, "not", IUM_RW, 0x0010F6, INS_FLAGS_None) -INST1(cdq, "cdq", IUM_RD, 0x000099, INS_FLAGS_WritesFlags) -INST1(idiv, "idiv", IUM_RD, 0x0038F6, INS_FLAGS_WritesFlags) -INST1(imulEAX, "imul", IUM_RD, 0x0028F6, INS_FLAGS_WritesFlags) // edx:eax = eax*op1 -INST1(div, "div", IUM_RD, 0x0030F6, INS_FLAGS_WritesFlags) -INST1(mulEAX, "mul", IUM_RD, 0x0020F6, INS_FLAGS_WritesFlags) +INST1(cdq, "cdq", IUM_RD, 0x000099, INS_FLAGS_None) +INST1(idiv, "idiv", IUM_RD, 0x0038F6, INS_FLAGS_WritesAllFlags) // CF = ?, PF = ?, AF = ?, ZF = ?, SF = ?, OF = ? +INST1(imulEAX, "imul", IUM_RD, 0x0028F6, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? +INST1(div, "div", IUM_RD, 0x0030F6, INS_FLAGS_WritesAllFlags) // CF = ?, PF = ?, AF = ?, ZF = ?, SF = ?, OF = ? +INST1(mulEAX, "mul", IUM_RD, 0x0020F6, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? -INST1(sahf, "sahf", IUM_RD, 0x00009E, INS_FLAGS_WritesFlags) +INST1(sahf, "sahf", IUM_RD, 0x00009E, INS_FLAGS_WritesAllFlagsExceptOF) -INST1(xadd, "xadd", IUM_RW, 0x0F00C0, INS_FLAGS_WritesFlags) -INST1(cmpxchg, "cmpxchg", IUM_RW, 0x0F00B0, INS_FLAGS_WritesFlags) +INST1(xadd, "xadd", IUM_RW, 0x0F00C0, INS_FLAGS_WritesAllFlags) +INST1(cmpxchg, "cmpxchg", IUM_RW, 0x0F00B0, INS_FLAGS_WritesAllFlags) -INST1(shld, "shld", IUM_RW, 0x0F00A4, INS_FLAGS_WritesFlags) -INST1(shrd, "shrd", IUM_RW, 0x0F00AC, INS_FLAGS_WritesFlags) +INST1(shld, "shld", IUM_RW, 0x0F00A4, INS_FLAGS_WritesAllFlags) // AF = ?, OF = ? +INST1(shrd, "shrd", IUM_RW, 0x0F00AC, INS_FLAGS_WritesAllFlags) // AF = ?, OF = ? // For RyuJIT/x86, we follow the x86 calling convention that requires // us to return floating point value on the x87 FP stack, so we need @@ -697,22 +697,22 @@ INST1(fld, "fld", IUM_WR, 0x0000D9, INST1(fstp, "fstp", IUM_WR, 0x0018D9, INS_FLAGS_x87Instr) #endif // TARGET_X86 -INST1(seto, "seto", IUM_WR, 0x0F0090, INS_FLAGS_ReadsFlags) -INST1(setno, "setno", IUM_WR, 0x0F0091, INS_FLAGS_ReadsFlags) -INST1(setb, "setb", IUM_WR, 0x0F0092, INS_FLAGS_ReadsFlags) -INST1(setae, "setae", IUM_WR, 0x0F0093, INS_FLAGS_ReadsFlags) -INST1(sete, "sete", IUM_WR, 0x0F0094, INS_FLAGS_ReadsFlags) -INST1(setne, "setne", IUM_WR, 0x0F0095, INS_FLAGS_ReadsFlags) -INST1(setbe, "setbe", IUM_WR, 0x0F0096, INS_FLAGS_ReadsFlags) -INST1(seta, "seta", IUM_WR, 0x0F0097, INS_FLAGS_ReadsFlags) -INST1(sets, "sets", IUM_WR, 0x0F0098, INS_FLAGS_ReadsFlags) -INST1(setns, "setns", IUM_WR, 0x0F0099, INS_FLAGS_ReadsFlags) -INST1(setp, "setp", IUM_WR, 0x0F009A, INS_FLAGS_ReadsFlags) -INST1(setnp, "setnp", IUM_WR, 0x0F009B, INS_FLAGS_ReadsFlags) -INST1(setl, "setl", IUM_WR, 0x0F009C, INS_FLAGS_ReadsFlags) -INST1(setge, "setge", IUM_WR, 0x0F009D, INS_FLAGS_ReadsFlags) -INST1(setle, "setle", IUM_WR, 0x0F009E, INS_FLAGS_ReadsFlags) -INST1(setg, "setg", IUM_WR, 0x0F009F, INS_FLAGS_ReadsFlags) +INST1(seto, "seto", IUM_WR, 0x0F0090, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setno, "setno", IUM_WR, 0x0F0091, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setb, "setb", IUM_WR, 0x0F0092, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setae, "setae", IUM_WR, 0x0F0093, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(sete, "sete", IUM_WR, 0x0F0094, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setne, "setne", IUM_WR, 0x0F0095, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setbe, "setbe", IUM_WR, 0x0F0096, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(seta, "seta", IUM_WR, 0x0F0097, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(sets, "sets", IUM_WR, 0x0F0098, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setns, "setns", IUM_WR, 0x0F0099, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setp, "setp", IUM_WR, 0x0F009A, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setnp, "setnp", IUM_WR, 0x0F009B, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setl, "setl", IUM_WR, 0x0F009C, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setge, "setge", IUM_WR, 0x0F009D, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setle, "setle", IUM_WR, 0x0F009E, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(setg, "setg", IUM_WR, 0x0F009F, INS_FLAGS_ReadsAllFlagsExceptAF) #ifdef TARGET_AMD64 // A jump with rex prefix. This is used for register indirect @@ -723,40 +723,40 @@ INST1(rex_jmp, "rex.jmp", IUM_RD, 0x0020FE, INST1(i_jmp, "jmp", IUM_RD, 0x0020FE, INS_FLAGS_None) INST0(jmp, "jmp", IUM_RD, 0x0000EB, INS_FLAGS_None) -INST0(jo, "jo", IUM_RD, 0x000070, INS_FLAGS_ReadsFlags) -INST0(jno, "jno", IUM_RD, 0x000071, INS_FLAGS_ReadsFlags) -INST0(jb, "jb", IUM_RD, 0x000072, INS_FLAGS_ReadsFlags) -INST0(jae, "jae", IUM_RD, 0x000073, INS_FLAGS_ReadsFlags) -INST0(je, "je", IUM_RD, 0x000074, INS_FLAGS_ReadsFlags) -INST0(jne, "jne", IUM_RD, 0x000075, INS_FLAGS_ReadsFlags) -INST0(jbe, "jbe", IUM_RD, 0x000076, INS_FLAGS_ReadsFlags) -INST0(ja, "ja", IUM_RD, 0x000077, INS_FLAGS_ReadsFlags) -INST0(js, "js", IUM_RD, 0x000078, INS_FLAGS_ReadsFlags) -INST0(jns, "jns", IUM_RD, 0x000079, INS_FLAGS_ReadsFlags) -INST0(jp, "jp", IUM_RD, 0x00007A, INS_FLAGS_ReadsFlags) -INST0(jnp, "jnp", IUM_RD, 0x00007B, INS_FLAGS_ReadsFlags) -INST0(jl, "jl", IUM_RD, 0x00007C, INS_FLAGS_ReadsFlags) -INST0(jge, "jge", IUM_RD, 0x00007D, INS_FLAGS_ReadsFlags) -INST0(jle, "jle", IUM_RD, 0x00007E, INS_FLAGS_ReadsFlags) -INST0(jg, "jg", IUM_RD, 0x00007F, INS_FLAGS_ReadsFlags) +INST0(jo, "jo", IUM_RD, 0x000070, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jno, "jno", IUM_RD, 0x000071, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jb, "jb", IUM_RD, 0x000072, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jae, "jae", IUM_RD, 0x000073, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(je, "je", IUM_RD, 0x000074, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jne, "jne", IUM_RD, 0x000075, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jbe, "jbe", IUM_RD, 0x000076, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(ja, "ja", IUM_RD, 0x000077, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(js, "js", IUM_RD, 0x000078, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jns, "jns", IUM_RD, 0x000079, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jp, "jp", IUM_RD, 0x00007A, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jnp, "jnp", IUM_RD, 0x00007B, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jl, "jl", IUM_RD, 0x00007C, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jge, "jge", IUM_RD, 0x00007D, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jle, "jle", IUM_RD, 0x00007E, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jg, "jg", IUM_RD, 0x00007F, INS_FLAGS_ReadsAllFlagsExceptAF) INST0(l_jmp, "jmp", IUM_RD, 0x0000E9, INS_FLAGS_None) -INST0(l_jo, "jo", IUM_RD, 0x00800F, INS_FLAGS_ReadsFlags) -INST0(l_jno, "jno", IUM_RD, 0x00810F, INS_FLAGS_ReadsFlags) -INST0(l_jb, "jb", IUM_RD, 0x00820F, INS_FLAGS_ReadsFlags) -INST0(l_jae, "jae", IUM_RD, 0x00830F, INS_FLAGS_ReadsFlags) -INST0(l_je, "je", IUM_RD, 0x00840F, INS_FLAGS_ReadsFlags) -INST0(l_jne, "jne", IUM_RD, 0x00850F, INS_FLAGS_ReadsFlags) -INST0(l_jbe, "jbe", IUM_RD, 0x00860F, INS_FLAGS_ReadsFlags) -INST0(l_ja, "ja", IUM_RD, 0x00870F, INS_FLAGS_ReadsFlags) -INST0(l_js, "js", IUM_RD, 0x00880F, INS_FLAGS_ReadsFlags) -INST0(l_jns, "jns", IUM_RD, 0x00890F, INS_FLAGS_ReadsFlags) -INST0(l_jp, "jp", IUM_RD, 0x008A0F, INS_FLAGS_ReadsFlags) -INST0(l_jnp, "jnp", IUM_RD, 0x008B0F, INS_FLAGS_ReadsFlags) -INST0(l_jl, "jl", IUM_RD, 0x008C0F, INS_FLAGS_ReadsFlags) -INST0(l_jge, "jge", IUM_RD, 0x008D0F, INS_FLAGS_ReadsFlags) -INST0(l_jle, "jle", IUM_RD, 0x008E0F, INS_FLAGS_ReadsFlags) -INST0(l_jg, "jg", IUM_RD, 0x008F0F, INS_FLAGS_ReadsFlags) +INST0(l_jo, "jo", IUM_RD, 0x00800F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jno, "jno", IUM_RD, 0x00810F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jb, "jb", IUM_RD, 0x00820F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jae, "jae", IUM_RD, 0x00830F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_je, "je", IUM_RD, 0x00840F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jne, "jne", IUM_RD, 0x00850F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jbe, "jbe", IUM_RD, 0x00860F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_ja, "ja", IUM_RD, 0x00870F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_js, "js", IUM_RD, 0x00880F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jns, "jns", IUM_RD, 0x00890F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jp, "jp", IUM_RD, 0x008A0F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jnp, "jnp", IUM_RD, 0x008B0F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jl, "jl", IUM_RD, 0x008C0F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jge, "jge", IUM_RD, 0x008D0F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jle, "jle", IUM_RD, 0x008E0F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jg, "jg", IUM_RD, 0x008F0F, INS_FLAGS_ReadsAllFlagsExceptAF) INST0(align, "align", IUM_RD, BAD_CODE, INS_FLAGS_None) From b6f80ea177673ae1a98feee9c5f79b40c4a02127 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Mon, 24 May 2021 14:54:16 -0700 Subject: [PATCH 02/10] For GT_EQ/GT_NE, reuse flag --- src/coreclr/jit/clrjit.natvis | 6 ++++ src/coreclr/jit/codegenxarch.cpp | 3 +- src/coreclr/jit/emitxarch.cpp | 49 +++++++++++++------------------- src/coreclr/jit/emitxarch.h | 3 +- 4 files changed, 29 insertions(+), 32 deletions(-) diff --git a/src/coreclr/jit/clrjit.natvis b/src/coreclr/jit/clrjit.natvis index 07cfc192b5ae04..49399d3e23d71c 100644 --- a/src/coreclr/jit/clrjit.natvis +++ b/src/coreclr/jit/clrjit.natvis @@ -155,6 +155,12 @@ The .NET Foundation licenses this file to you under the MIT license. IG{igNum,d} + + {_idIns,en} {_idReg1,en} + {_idIns,en} {_idReg1,en}, {_idLargeCns,d} + {_idIns,en} + + Size={m_nSize} diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 9a8c00dc355381..8ec7a1d8d450d0 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -6035,8 +6035,7 @@ void CodeGen::genCompareInt(GenTree* treeNode) // TYP_UINT and TYP_ULONG should not appear here, only small types can be unsigned assert(!varTypeIsUnsigned(type) || varTypeIsSmall(type)); - bool needsOCFlags = !tree->OperIs(GT_EQ, GT_NE); - if (canReuseFlags && emit->AreFlagsSetToZeroCmp(op1->GetRegNum(), emitTypeSize(type), needsOCFlags)) + if (canReuseFlags && emit->AreFlagsSetToZeroCmp(op1->GetRegNum(), emitTypeSize(type), tree->OperGet())) { JITDUMP("Not emitting compare due to flags being already set\n"); } diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index c22d52bdc49ae3..7006df7381ce05 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -144,6 +144,11 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins) return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstSrcSrcAVXInstruction) != 0) && IsAVXInstruction(ins); } +bool emitter::IsWriteZFFlags(instruction ins) +{ + return (CodeGenInterface::instInfo[ins] & INS_FLAGS_WritesZF) != 0; +} + //------------------------------------------------------------------------ // AreUpper32BitsZero: check if some previously emitted // instruction set the upper 32 bits of reg to zero. @@ -230,17 +235,19 @@ bool emitter::AreUpper32BitsZero(regNumber reg) // // Notes: // Currently only looks back one instruction. -bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, bool needsOCFlags) +bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps treeOps) { assert(reg != REG_NA); + // Don't look back across IG boundaries (possible control flow) if (emitCurIGinsCnt == 0 && ((emitCurIG->igFlags & IGF_EXTEND) == 0)) { return false; } - instrDesc* id = emitLastIns; - insFormat fmt = id->idInsFmt(); + instrDesc* id = emitLastIns; + instruction lastIns = id->idIns(); + insFormat fmt = id->idInsFmt(); // make sure op1 is a reg switch (fmt) @@ -269,34 +276,18 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, bool needsOCF return false; } - switch (id->idIns()) + // these always set OF and CF to 0 + if ((lastIns == INS_and) || (lastIns == INS_or) || (lastIns == INS_xor)) { - case INS_adc: - case INS_add: - case INS_dec: - case INS_dec_l: - case INS_inc: - case INS_inc_l: - case INS_neg: - case INS_shr_1: - case INS_shl_1: - case INS_sar_1: - case INS_sbb: - case INS_sub: - case INS_xadd: - if (needsOCFlags) - { - return false; - } - FALLTHROUGH; - // these always set OC to 0 - case INS_and: - case INS_or: - case INS_xor: - return id->idOpSize() == opSize; + return id->idOpSize() == opSize; + } - default: - break; + if ((treeOps == GT_EQ) || (treeOps == GT_NE)) + { + if (IsWriteZFFlags(lastIns)) + { + return id->idOpSize() == opSize; + } } return false; diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index d395a29ec9b138..fb300d1302cb30 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -98,7 +98,7 @@ static bool IsMovInstruction(instruction ins); bool AreUpper32BitsZero(regNumber reg); -bool AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, bool needsOCFlags); +bool AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps treeOps); bool hasRexPrefix(code_t code) { @@ -171,6 +171,7 @@ void SetContains256bitAVX(bool value) bool IsDstDstSrcAVXInstruction(instruction ins); bool IsDstSrcSrcAVXInstruction(instruction ins); +bool IsWriteZFFlags(instruction ins); bool IsThreeOperandAVXInstruction(instruction ins) { return (IsDstDstSrcAVXInstruction(ins) || IsDstSrcSrcAVXInstruction(ins)); From 88673740d5489bbf5531e8c63d76bc1c393da7d7 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 25 May 2021 10:54:38 -0700 Subject: [PATCH 03/10] Explicit flags for jcc, setcc, comvcc --- src/coreclr/jit/instrsxarch.h | 128 +++++++++++++++++----------------- 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 6b082c6ff149b2..36d8fbfe0b1cf8 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -95,22 +95,22 @@ INST3(movsxd, "movsxd", IUM_WR, BAD_CODE, BAD_CODE, #endif INST3(movzx, "movzx", IUM_WR, BAD_CODE, BAD_CODE, 0x0F00B6, INS_FLAGS_None) -INST3(cmovo, "cmovo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovno, "cmovno", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovb, "cmovb", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovae, "cmovae", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmove, "cmove", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovne, "cmovne", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovs, "cmovs", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovns, "cmovns", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovp, "cmovp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovnp, "cmovnp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, INS_FLAGS_ReadsAllFlagsExceptAF) -INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST3(cmovo, "cmovo", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0040, INS_FLAGS_ReadsOF) +INST3(cmovno, "cmovno", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0041, INS_FLAGS_ReadsOF) +INST3(cmovb, "cmovb", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0042, INS_FLAGS_ReadsCF) +INST3(cmovae, "cmovae", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, INS_FLAGS_ReadsCF) +INST3(cmove, "cmove", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, INS_FLAGS_ReadsZF) +INST3(cmovne, "cmovne", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, INS_FLAGS_ReadsZF) +INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) +INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) +INST3(cmovs, "cmovs", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, INS_FLAGS_ReadsSF) +INST3(cmovns, "cmovns", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, INS_FLAGS_ReadsSF) +INST3(cmovp, "cmovp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, INS_FLAGS_ReadsPF) +INST3(cmovnp, "cmovnp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, INS_FLAGS_ReadsPF) +INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) +INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) +INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) +INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) INST3(xchg, "xchg", IUM_RW, 0x000086, BAD_CODE, 0x000086, INS_FLAGS_None) INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? @@ -697,22 +697,22 @@ INST1(fld, "fld", IUM_WR, 0x0000D9, INST1(fstp, "fstp", IUM_WR, 0x0018D9, INS_FLAGS_x87Instr) #endif // TARGET_X86 -INST1(seto, "seto", IUM_WR, 0x0F0090, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setno, "setno", IUM_WR, 0x0F0091, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setb, "setb", IUM_WR, 0x0F0092, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setae, "setae", IUM_WR, 0x0F0093, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(sete, "sete", IUM_WR, 0x0F0094, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setne, "setne", IUM_WR, 0x0F0095, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setbe, "setbe", IUM_WR, 0x0F0096, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(seta, "seta", IUM_WR, 0x0F0097, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(sets, "sets", IUM_WR, 0x0F0098, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setns, "setns", IUM_WR, 0x0F0099, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setp, "setp", IUM_WR, 0x0F009A, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setnp, "setnp", IUM_WR, 0x0F009B, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setl, "setl", IUM_WR, 0x0F009C, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setge, "setge", IUM_WR, 0x0F009D, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setle, "setle", IUM_WR, 0x0F009E, INS_FLAGS_ReadsAllFlagsExceptAF) -INST1(setg, "setg", IUM_WR, 0x0F009F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST1(seto, "seto", IUM_WR, 0x0F0090, INS_FLAGS_ReadsOF) +INST1(setno, "setno", IUM_WR, 0x0F0091, INS_FLAGS_ReadsOF) +INST1(setb, "setb", IUM_WR, 0x0F0092, INS_FLAGS_ReadsCF) +INST1(setae, "setae", IUM_WR, 0x0F0093, INS_FLAGS_ReadsCF) +INST1(sete, "sete", IUM_WR, 0x0F0094, INS_FLAGS_ReadsZF) +INST1(setne, "setne", IUM_WR, 0x0F0095, INS_FLAGS_ReadsZF) +INST1(setbe, "setbe", IUM_WR, 0x0F0096, INS_FLAGS_ReadsZF | INS_FLAGS_ReadsCF) +INST1(seta, "seta", IUM_WR, 0x0F0097, INS_FLAGS_ReadsZF | INS_FLAGS_ReadsCF) +INST1(sets, "sets", IUM_WR, 0x0F0098, INS_FLAGS_ReadsSF) +INST1(setns, "setns", IUM_WR, 0x0F0099, INS_FLAGS_ReadsSF) +INST1(setp, "setp", IUM_WR, 0x0F009A, INS_FLAGS_ReadsPF) +INST1(setnp, "setnp", IUM_WR, 0x0F009B, INS_FLAGS_ReadsPF) +INST1(setl, "setl", IUM_WR, 0x0F009C, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) +INST1(setge, "setge", IUM_WR, 0x0F009D, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) +INST1(setle, "setle", IUM_WR, 0x0F009E, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) +INST1(setg, "setg", IUM_WR, 0x0F009F, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) #ifdef TARGET_AMD64 // A jump with rex prefix. This is used for register indirect @@ -723,40 +723,40 @@ INST1(rex_jmp, "rex.jmp", IUM_RD, 0x0020FE, INST1(i_jmp, "jmp", IUM_RD, 0x0020FE, INS_FLAGS_None) INST0(jmp, "jmp", IUM_RD, 0x0000EB, INS_FLAGS_None) -INST0(jo, "jo", IUM_RD, 0x000070, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jno, "jno", IUM_RD, 0x000071, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jb, "jb", IUM_RD, 0x000072, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jae, "jae", IUM_RD, 0x000073, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(je, "je", IUM_RD, 0x000074, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jne, "jne", IUM_RD, 0x000075, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jbe, "jbe", IUM_RD, 0x000076, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(ja, "ja", IUM_RD, 0x000077, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(js, "js", IUM_RD, 0x000078, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jns, "jns", IUM_RD, 0x000079, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jp, "jp", IUM_RD, 0x00007A, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jnp, "jnp", IUM_RD, 0x00007B, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jl, "jl", IUM_RD, 0x00007C, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jge, "jge", IUM_RD, 0x00007D, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jle, "jle", IUM_RD, 0x00007E, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(jg, "jg", IUM_RD, 0x00007F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(jo, "jo", IUM_RD, 0x000070, INS_FLAGS_ReadsOF) +INST0(jno, "jno", IUM_RD, 0x000071, INS_FLAGS_ReadsOF) +INST0(jb, "jb", IUM_RD, 0x000072, INS_FLAGS_ReadsCF) +INST0(jae, "jae", IUM_RD, 0x000073, INS_FLAGS_ReadsCF) +INST0(je, "je", IUM_RD, 0x000074, INS_FLAGS_ReadsZF) +INST0(jne, "jne", IUM_RD, 0x000075, INS_FLAGS_ReadsZF) +INST0(jbe, "jbe", IUM_RD, 0x000076, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) +INST0(ja, "ja", IUM_RD, 0x000077, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) +INST0(js, "js", IUM_RD, 0x000078, INS_FLAGS_ReadsSF) +INST0(jns, "jns", IUM_RD, 0x000079, INS_FLAGS_ReadsSF) +INST0(jp, "jp", IUM_RD, 0x00007A, INS_FLAGS_ReadsPF) +INST0(jnp, "jnp", IUM_RD, 0x00007B, INS_FLAGS_ReadsPF) +INST0(jl, "jl", IUM_RD, 0x00007C, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) +INST0(jge, "jge", IUM_RD, 0x00007D, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) +INST0(jle, "jle", IUM_RD, 0x00007E, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) +INST0(jg, "jg", IUM_RD, 0x00007F, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) INST0(l_jmp, "jmp", IUM_RD, 0x0000E9, INS_FLAGS_None) -INST0(l_jo, "jo", IUM_RD, 0x00800F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jno, "jno", IUM_RD, 0x00810F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jb, "jb", IUM_RD, 0x00820F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jae, "jae", IUM_RD, 0x00830F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_je, "je", IUM_RD, 0x00840F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jne, "jne", IUM_RD, 0x00850F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jbe, "jbe", IUM_RD, 0x00860F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_ja, "ja", IUM_RD, 0x00870F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_js, "js", IUM_RD, 0x00880F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jns, "jns", IUM_RD, 0x00890F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jp, "jp", IUM_RD, 0x008A0F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jnp, "jnp", IUM_RD, 0x008B0F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jl, "jl", IUM_RD, 0x008C0F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jge, "jge", IUM_RD, 0x008D0F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jle, "jle", IUM_RD, 0x008E0F, INS_FLAGS_ReadsAllFlagsExceptAF) -INST0(l_jg, "jg", IUM_RD, 0x008F0F, INS_FLAGS_ReadsAllFlagsExceptAF) +INST0(l_jo, "jo", IUM_RD, 0x00800F, INS_FLAGS_ReadsOF) +INST0(l_jno, "jno", IUM_RD, 0x00810F, INS_FLAGS_ReadsOF) +INST0(l_jb, "jb", IUM_RD, 0x00820F, INS_FLAGS_ReadsCF) +INST0(l_jae, "jae", IUM_RD, 0x00830F, INS_FLAGS_ReadsCF) +INST0(l_je, "je", IUM_RD, 0x00840F, INS_FLAGS_ReadsZF) +INST0(l_jne, "jne", IUM_RD, 0x00850F, INS_FLAGS_ReadsZF) +INST0(l_jbe, "jbe", IUM_RD, 0x00860F, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) +INST0(l_ja, "ja", IUM_RD, 0x00870F, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) +INST0(l_js, "js", IUM_RD, 0x00880F, INS_FLAGS_ReadsSF) +INST0(l_jns, "jns", IUM_RD, 0x00890F, INS_FLAGS_ReadsSF) +INST0(l_jp, "jp", IUM_RD, 0x008A0F, INS_FLAGS_ReadsPF) +INST0(l_jnp, "jnp", IUM_RD, 0x008B0F, INS_FLAGS_ReadsPF) +INST0(l_jl, "jl", IUM_RD, 0x008C0F, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) +INST0(l_jge, "jge", IUM_RD, 0x008D0F, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) +INST0(l_jle, "jle", IUM_RD, 0x008E0F, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) +INST0(l_jg, "jg", IUM_RD, 0x008F0F, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) INST0(align, "align", IUM_RD, BAD_CODE, INS_FLAGS_None) From 01fb6baab63d74f9f89f6775ec6b0d3d70ad7c6d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 25 May 2021 11:23:55 -0700 Subject: [PATCH 04/10] Add reset flags --- src/coreclr/jit/emitxarch.cpp | 10 ++++++ src/coreclr/jit/instr.h | 20 +++++++++-- src/coreclr/jit/instrsxarch.h | 68 +++++++++++++++++------------------ 3 files changed, 61 insertions(+), 37 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 7006df7381ce05..fcd15bcc1f85a6 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -144,6 +144,16 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins) return ((CodeGenInterface::instInfo[ins] & INS_Flags_IsDstSrcSrcAVXInstruction) != 0) && IsAVXInstruction(ins); } +//------------------------------------------------------------------------ +// IsWriteZFFlags: check if the instruction write the +// ZF flag. +// +// Arguments: +// ins - instruction to test +// +// Return Value: +// true if instruction writes the ZF flag, false otherwise. +// bool emitter::IsWriteZFFlags(instruction ins) { return (CodeGenInterface::instInfo[ins] & INS_FLAGS_WritesZF) != 0; diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 9799d34b8260cb..78b8af099ee789 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -90,6 +90,7 @@ enum insFlags : uint32_t { INS_FLAGS_None = 0, + // Reads EFLAGS INS_FLAGS_ReadsCF = 1 << 0, INS_FLAGS_ReadsPF = 1 << 1, INS_FLAGS_ReadsAF = 1 << 2, @@ -98,8 +99,12 @@ enum insFlags : uint32_t INS_FLAGS_ReadsDF = 1 << 5, INS_FLAGS_ReadsOF = 1 << 6, INS_FLAGS_ReadsAllFlagsExceptAF = INS_FLAGS_ReadsCF | INS_FLAGS_ReadsPF | INS_FLAGS_ReadsZF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF, + INS_FLAGS_Reads_CF_ZF_Flags = INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF, + INS_FLAGS_Reads_OF_SF_Flags = INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF, + INS_FLAGS_Reads_OF_SF_ZF_Flags = INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF, INS_FLAGS_ReadsAllFlags = INS_FLAGS_ReadsAF | INS_FLAGS_ReadsAllFlagsExceptAF, + // Writes EFLAGS INS_FLAGS_WritesCF = 1 << 7, INS_FLAGS_WritesPF = 1 << 8, INS_FLAGS_WritesAF = 1 << 9, @@ -111,10 +116,19 @@ enum insFlags : uint32_t INS_FLAGS_WritesAllFlagsExceptOF = INS_FLAGS_WritesCF | INS_FLAGS_WritesPF | INS_FLAGS_WritesAF | INS_FLAGS_WritesZF | INS_FLAGS_WritesSF, INS_FLAGS_WritesAllFlags = INS_FLAGS_WritesCF | INS_FLAGS_WritesAllFlagsExceptCF, - INS_FLAGS_x87Instr = 1 << 14, + // Resets EFLAGS + INS_FLAGS_Resets_OF_Flags = 1 << 14, + INS_FLAGS_Resets_CF_OF_Flags = 1 << 15, + INS_FLAGS_Resets_OF_SF_PF_Flags = 1 << 16, + INS_FLAGS_Resets_OF_SF_PF_Flags = 1 << 17, + INS_FLAGS_ResetsAllFlagsExceptZF = 1 << 18, - INS_Flags_IsDstDstSrcAVXInstruction = 1 << 15, - INS_Flags_IsDstSrcSrcAVXInstruction = 1 << 16, + // x87 instruction + INS_FLAGS_x87Instr = 1 << 19, + + // Avx + INS_Flags_IsDstDstSrcAVXInstruction = 1 << 20, + INS_Flags_IsDstSrcSrcAVXInstruction = 1 << 21, // TODO-Cleanup: Remove this flag and its usage from TARGET_XARCH INS_FLAGS_DONT_CARE = 0x00, diff --git a/src/coreclr/jit/instrsxarch.h b/src/coreclr/jit/instrsxarch.h index 36d8fbfe0b1cf8..cdbb65c9a59832 100644 --- a/src/coreclr/jit/instrsxarch.h +++ b/src/coreclr/jit/instrsxarch.h @@ -68,14 +68,14 @@ INST5(bswap, "bswap", IUM_RW, 0x0F00C8, BAD_CODE, // id nm um mr mi rm a4 flags INST4(add, "add", IUM_RW, 0x000000, 0x000080, 0x000002, 0x000004, INS_FLAGS_WritesAllFlags) -INST4(or, "or", IUM_RW, 0x000008, 0x000880, 0x00000A, 0x00000C, INS_FLAGS_WritesAllFlags) // CF = 0, AF = ?, OF = 0 +INST4(or, "or", IUM_RW, 0x000008, 0x000880, 0x00000A, 0x00000C, INS_FLAGS_WritesAllFlags | INS_FLAGS_Resets_CF_OF_Flags) INST4(adc, "adc", IUM_RW, 0x000010, 0x001080, 0x000012, 0x000014, INS_FLAGS_ReadsCF | INS_FLAGS_WritesAllFlags) INST4(sbb, "sbb", IUM_RW, 0x000018, 0x001880, 0x00001A, 0x00001C, INS_FLAGS_ReadsCF | INS_FLAGS_WritesAllFlags) -INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, INS_FLAGS_WritesAllFlags) // CF = 0, AF = ?, OF = 0 +INST4(and, "and", IUM_RW, 0x000020, 0x002080, 0x000022, 0x000024, INS_FLAGS_WritesAllFlags | INS_FLAGS_Resets_CF_OF_Flags) // AF = ? INST4(sub, "sub", IUM_RW, 0x000028, 0x002880, 0x00002A, 0x00002C, INS_FLAGS_WritesAllFlags) -INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, INS_FLAGS_WritesAllFlags) // CF = 0, AF = ?, OF = 0 +INST4(xor, "xor", IUM_RW, 0x000030, 0x003080, 0x000032, 0x000034, INS_FLAGS_WritesAllFlags | INS_FLAGS_Resets_CF_OF_Flags) // AF = ? INST4(cmp, "cmp", IUM_RD, 0x000038, 0x003880, 0x00003A, 0x00003C, INS_FLAGS_WritesAllFlags) -INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, INS_FLAGS_WritesAllFlags) // CF = 0, AF = ?, OF = 0 +INST4(test, "test", IUM_RD, 0x000084, 0x0000F6, 0x000084, 0x0000A8, INS_FLAGS_WritesAllFlags | INS_FLAGS_Resets_CF_OF_Flags) // AF = ? INST4(mov, "mov", IUM_WR, 0x000088, 0x0000C6, 0x00008A, 0x0000B0, INS_FLAGS_None) INST4(lea, "lea", IUM_WR, BAD_CODE, BAD_CODE, 0x00008D, BAD_CODE, INS_FLAGS_None) @@ -101,16 +101,16 @@ INST3(cmovb, "cmovb", IUM_WR, BAD_CODE, BAD_CODE, INST3(cmovae, "cmovae", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0043, INS_FLAGS_ReadsCF) INST3(cmove, "cmove", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0044, INS_FLAGS_ReadsZF) INST3(cmovne, "cmovne", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0045, INS_FLAGS_ReadsZF) -INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) -INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) +INST3(cmovbe, "cmovbe", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0046, INS_FLAGS_Reads_CF_ZF_Flags) +INST3(cmova, "cmova", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0047, INS_FLAGS_Reads_CF_ZF_Flags) INST3(cmovs, "cmovs", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0048, INS_FLAGS_ReadsSF) INST3(cmovns, "cmovns", IUM_WR, BAD_CODE, BAD_CODE, 0x0F0049, INS_FLAGS_ReadsSF) INST3(cmovp, "cmovp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004A, INS_FLAGS_ReadsPF) INST3(cmovnp, "cmovnp", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004B, INS_FLAGS_ReadsPF) -INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) -INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) -INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) -INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) +INST3(cmovl, "cmovl", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004C, INS_FLAGS_Reads_OF_SF_Flags) +INST3(cmovge, "cmovge", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004D, INS_FLAGS_Reads_OF_SF_Flags) +INST3(cmovle, "cmovle", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004E, INS_FLAGS_Reads_OF_SF_ZF_Flags) +INST3(cmovg, "cmovg", IUM_WR, BAD_CODE, BAD_CODE, 0x0F004F, INS_FLAGS_Reads_OF_SF_ZF_Flags) INST3(xchg, "xchg", IUM_RW, 0x000086, BAD_CODE, 0x000086, INS_FLAGS_None) INST3(imul, "imul", IUM_RW, 0x0F00AC, BAD_CODE, 0x0F00AF, INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, ZF = ?, SF = ? @@ -301,10 +301,10 @@ INST3(cvttpd2dq, "cvttpd2dq", IUM_WR, BAD_CODE, BAD_CODE, INST3(cvtdq2pd, "cvtdq2pd", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xE6), INS_FLAGS_None) // cvt packed DWORDs to doubles // SSE2 comparison instructions -INST3(comiss, "comiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), INS_FLAGS_WritesAllFlags) // ordered compare singles: AF = 0, SF = 0, OF = 0 -INST3(comisd, "comisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), INS_FLAGS_WritesAllFlags) // ordered compare doubles: AF = 0, SF = 0, OF = 0 -INST3(ucomiss, "ucomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), INS_FLAGS_WritesAllFlags) // unordered compare singles: AF = 0, SF = 0, OF = 0 -INST3(ucomisd, "ucomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), INS_FLAGS_WritesAllFlags) // unordered compare doubles: AF = 0, SF = 0, OF = 0 +INST3(comiss, "comiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2F), INS_FLAGS_WritesAllFlags | INS_FLAGS_Resets_OF_SF_PF_Flags) // ordered compare singles +INST3(comisd, "comisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2F), INS_FLAGS_WritesAllFlags | INS_FLAGS_Resets_OF_SF_PF_Flags) // ordered compare doubles +INST3(ucomiss, "ucomiss", IUM_RD, BAD_CODE, BAD_CODE, PCKFLT(0x2E), INS_FLAGS_WritesAllFlags | INS_FLAGS_Resets_OF_SF_PF_Flags) // unordered compare singles +INST3(ucomisd, "ucomisd", IUM_RD, BAD_CODE, BAD_CODE, PCKDBL(0x2E), INS_FLAGS_WritesAllFlags | INS_FLAGS_Resets_OF_SF_PF_Flags) // unordered compare doubles // SSE2 packed single/double comparison operations. // Note that these instructions not only compare but also overwrite the first source. @@ -595,7 +595,7 @@ INST3(bextr, "bextr", IUM_WR, BAD_CODE, BAD_CODE, INST3(rorx, "rorx", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xF0), INS_FLAGS_None) INST3(pdep, "pdep", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Deposit INST3(pext, "pext", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_Flags_IsDstDstSrcAVXInstruction) // Parallel Bits Extract -INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_FLAGS_WritesAllFlags | INS_Flags_IsDstDstSrcAVXInstruction) // Zero High Bits Starting with Specified Bit Position: PF = ?, AF = ?, OF = 0 +INST3(bzhi, "bzhi", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF5), INS_FLAGS_WritesAllFlags | INS_Flags_IsDstDstSrcAVXInstruction | INS_FLAGS_Resets_OF_Flags) // Zero High Bits Starting with Specified Bit Position: PF = ?, AF = ? INST3(mulx, "mulx", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF6), INS_Flags_IsDstDstSrcAVXInstruction) // Unsigned Multiply Without Affecting Flags INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None) @@ -612,7 +612,7 @@ INST3(tzcnt, "tzcnt", IUM_WR, BAD_CODE, BAD_CODE, INST3(lzcnt, "lzcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xBD), INS_FLAGS_WritesAllFlags) // PF = ?, AF = ?, SF = ? OF = ? // POPCNT -INST3(popcnt, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), INS_FLAGS_WritesAllFlags) // CF = 0, PF = 0, AF = 0, SF = 0, OF = 0 +INST3(popcnt, "popcnt", IUM_WR, BAD_CODE, BAD_CODE, SSEFLT(0xB8), INS_FLAGS_WritesAllFlags | INS_FLAGS_ResetsAllFlagsExceptZF) // id nm um mr mi flags INST2(ret, "ret", IUM_RD, 0x0000C3, 0x0000C2, INS_FLAGS_None) @@ -703,16 +703,16 @@ INST1(setb, "setb", IUM_WR, 0x0F0092, INST1(setae, "setae", IUM_WR, 0x0F0093, INS_FLAGS_ReadsCF) INST1(sete, "sete", IUM_WR, 0x0F0094, INS_FLAGS_ReadsZF) INST1(setne, "setne", IUM_WR, 0x0F0095, INS_FLAGS_ReadsZF) -INST1(setbe, "setbe", IUM_WR, 0x0F0096, INS_FLAGS_ReadsZF | INS_FLAGS_ReadsCF) -INST1(seta, "seta", IUM_WR, 0x0F0097, INS_FLAGS_ReadsZF | INS_FLAGS_ReadsCF) +INST1(setbe, "setbe", IUM_WR, 0x0F0096, INS_FLAGS_Reads_CF_ZF_Flags) +INST1(seta, "seta", IUM_WR, 0x0F0097, INS_FLAGS_Reads_CF_ZF_Flags) INST1(sets, "sets", IUM_WR, 0x0F0098, INS_FLAGS_ReadsSF) INST1(setns, "setns", IUM_WR, 0x0F0099, INS_FLAGS_ReadsSF) INST1(setp, "setp", IUM_WR, 0x0F009A, INS_FLAGS_ReadsPF) INST1(setnp, "setnp", IUM_WR, 0x0F009B, INS_FLAGS_ReadsPF) -INST1(setl, "setl", IUM_WR, 0x0F009C, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) -INST1(setge, "setge", IUM_WR, 0x0F009D, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) -INST1(setle, "setle", IUM_WR, 0x0F009E, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) -INST1(setg, "setg", IUM_WR, 0x0F009F, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) +INST1(setl, "setl", IUM_WR, 0x0F009C, INS_FLAGS_Reads_OF_SF_Flags) +INST1(setge, "setge", IUM_WR, 0x0F009D, INS_FLAGS_Reads_OF_SF_Flags) +INST1(setle, "setle", IUM_WR, 0x0F009E, INS_FLAGS_Reads_OF_SF_ZF_Flags) +INST1(setg, "setg", IUM_WR, 0x0F009F, INS_FLAGS_Reads_OF_SF_ZF_Flags) #ifdef TARGET_AMD64 // A jump with rex prefix. This is used for register indirect @@ -729,16 +729,16 @@ INST0(jb, "jb", IUM_RD, 0x000072, INST0(jae, "jae", IUM_RD, 0x000073, INS_FLAGS_ReadsCF) INST0(je, "je", IUM_RD, 0x000074, INS_FLAGS_ReadsZF) INST0(jne, "jne", IUM_RD, 0x000075, INS_FLAGS_ReadsZF) -INST0(jbe, "jbe", IUM_RD, 0x000076, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) -INST0(ja, "ja", IUM_RD, 0x000077, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) +INST0(jbe, "jbe", IUM_RD, 0x000076, INS_FLAGS_Reads_CF_ZF_Flags) +INST0(ja, "ja", IUM_RD, 0x000077, INS_FLAGS_Reads_CF_ZF_Flags) INST0(js, "js", IUM_RD, 0x000078, INS_FLAGS_ReadsSF) INST0(jns, "jns", IUM_RD, 0x000079, INS_FLAGS_ReadsSF) INST0(jp, "jp", IUM_RD, 0x00007A, INS_FLAGS_ReadsPF) INST0(jnp, "jnp", IUM_RD, 0x00007B, INS_FLAGS_ReadsPF) -INST0(jl, "jl", IUM_RD, 0x00007C, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) -INST0(jge, "jge", IUM_RD, 0x00007D, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) -INST0(jle, "jle", IUM_RD, 0x00007E, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) -INST0(jg, "jg", IUM_RD, 0x00007F, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) +INST0(jl, "jl", IUM_RD, 0x00007C, INS_FLAGS_Reads_OF_SF_Flags) +INST0(jge, "jge", IUM_RD, 0x00007D, INS_FLAGS_Reads_OF_SF_Flags) +INST0(jle, "jle", IUM_RD, 0x00007E, INS_FLAGS_Reads_OF_SF_ZF_Flags) +INST0(jg, "jg", IUM_RD, 0x00007F, INS_FLAGS_Reads_OF_SF_ZF_Flags) INST0(l_jmp, "jmp", IUM_RD, 0x0000E9, INS_FLAGS_None) INST0(l_jo, "jo", IUM_RD, 0x00800F, INS_FLAGS_ReadsOF) @@ -747,16 +747,16 @@ INST0(l_jb, "jb", IUM_RD, 0x00820F, INST0(l_jae, "jae", IUM_RD, 0x00830F, INS_FLAGS_ReadsCF) INST0(l_je, "je", IUM_RD, 0x00840F, INS_FLAGS_ReadsZF) INST0(l_jne, "jne", IUM_RD, 0x00850F, INS_FLAGS_ReadsZF) -INST0(l_jbe, "jbe", IUM_RD, 0x00860F, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) -INST0(l_ja, "ja", IUM_RD, 0x00870F, INS_FLAGS_ReadsCF | INS_FLAGS_ReadsZF) +INST0(l_jbe, "jbe", IUM_RD, 0x00860F, INS_FLAGS_Reads_CF_ZF_Flags) +INST0(l_ja, "ja", IUM_RD, 0x00870F, INS_FLAGS_Reads_CF_ZF_Flags) INST0(l_js, "js", IUM_RD, 0x00880F, INS_FLAGS_ReadsSF) INST0(l_jns, "jns", IUM_RD, 0x00890F, INS_FLAGS_ReadsSF) INST0(l_jp, "jp", IUM_RD, 0x008A0F, INS_FLAGS_ReadsPF) INST0(l_jnp, "jnp", IUM_RD, 0x008B0F, INS_FLAGS_ReadsPF) -INST0(l_jl, "jl", IUM_RD, 0x008C0F, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) -INST0(l_jge, "jge", IUM_RD, 0x008D0F, INS_FLAGS_ReadsSF | INS_FLAGS_ReadsOF) -INST0(l_jle, "jle", IUM_RD, 0x008E0F, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) -INST0(l_jg, "jg", IUM_RD, 0x008F0F, INS_FLAGS_ReadsOF | INS_FLAGS_ReadsSF | INS_FLAGS_ReadsZF) +INST0(l_jl, "jl", IUM_RD, 0x008C0F, INS_FLAGS_Reads_OF_SF_Flags) +INST0(l_jge, "jge", IUM_RD, 0x008D0F, INS_FLAGS_Reads_OF_SF_Flags) +INST0(l_jle, "jle", IUM_RD, 0x008E0F, INS_FLAGS_Reads_OF_SF_ZF_Flags) +INST0(l_jg, "jg", IUM_RD, 0x008F0F, INS_FLAGS_Reads_OF_SF_ZF_Flags) INST0(align, "align", IUM_RD, BAD_CODE, INS_FLAGS_None) From 81eb9ebbb4d7b0612faa6c6538cec15656fde391 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Fri, 28 May 2021 16:10:43 -0700 Subject: [PATCH 05/10] remove duplicate enum --- src/coreclr/jit/instr.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/instr.h b/src/coreclr/jit/instr.h index 78b8af099ee789..01cd38bd8e6ce7 100644 --- a/src/coreclr/jit/instr.h +++ b/src/coreclr/jit/instr.h @@ -120,15 +120,14 @@ enum insFlags : uint32_t INS_FLAGS_Resets_OF_Flags = 1 << 14, INS_FLAGS_Resets_CF_OF_Flags = 1 << 15, INS_FLAGS_Resets_OF_SF_PF_Flags = 1 << 16, - INS_FLAGS_Resets_OF_SF_PF_Flags = 1 << 17, - INS_FLAGS_ResetsAllFlagsExceptZF = 1 << 18, + INS_FLAGS_ResetsAllFlagsExceptZF = 1 << 17, // x87 instruction - INS_FLAGS_x87Instr = 1 << 19, + INS_FLAGS_x87Instr = 1 << 18, // Avx - INS_Flags_IsDstDstSrcAVXInstruction = 1 << 20, - INS_Flags_IsDstSrcSrcAVXInstruction = 1 << 21, + INS_Flags_IsDstDstSrcAVXInstruction = 1 << 19, + INS_Flags_IsDstSrcSrcAVXInstruction = 1 << 20, // TODO-Cleanup: Remove this flag and its usage from TARGET_XARCH INS_FLAGS_DONT_CARE = 0x00, From 53c4325c8299b131d364e528c2ec450e59651f8f Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 1 Jun 2021 12:33:20 -0700 Subject: [PATCH 06/10] Handle cases where shift-amount is 0 --- src/coreclr/jit/clrjit.natvis | 4 ++- src/coreclr/jit/emitxarch.cpp | 66 +++++++++++++++++++++++++++++++++-- src/coreclr/jit/emitxarch.h | 2 ++ 3 files changed, 69 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/clrjit.natvis b/src/coreclr/jit/clrjit.natvis index 49399d3e23d71c..3f90e9a38149d4 100644 --- a/src/coreclr/jit/clrjit.natvis +++ b/src/coreclr/jit/clrjit.natvis @@ -157,7 +157,9 @@ The .NET Foundation licenses this file to you under the MIT license. {_idIns,en} {_idReg1,en} - {_idIns,en} {_idReg1,en}, {_idLargeCns,d} + {_idIns,en} {_idReg1,en}, {_idLargeCns,d} + {_idIns,en} {_idReg1,en}, {_idLargeCns,d} + {_idIns,en} {_idReg1,en}, {_idSmallCns,d} {_idIns,en} diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index fcd15bcc1f85a6..2089cefc10614d 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -159,6 +159,69 @@ bool emitter::IsWriteZFFlags(instruction ins) return (CodeGenInterface::instInfo[ins] & INS_FLAGS_WritesZF) != 0; } + +//------------------------------------------------------------------------ +// IsFlagsModified: check if the instruction modifies the flags. +// +// Arguments: +// id - instruction to test +// +// Return Value: +// true if instruction modified any flag, false otherwise. +// +bool emitter::IsFlagsModified(instrDesc* id) +{ + instruction ins = id->idIns(); + insFormat fmt = id->idInsFmt(); + + if (fmt == IF_RRW_SHF) + { + if (id->idIsLargeCns()) + { + return true; + } + else if (id->idSmallCns() == 0) + { + switch (ins) + { + // If shift-amount for below instructions is 0, then flags are unaffected. + case INS_rcl_N: + case INS_rcr_N: + case INS_rol_N: + case INS_ror_N: + case INS_shl_N: + case INS_shr_N: + case INS_sar_N: + return false; + default: + return true; + + } + } + } + else if (fmt == IF_RRW) + { + switch (ins) + { + // If shift-amount for below instructions is 0, then flags are unaffected. + // So, to be conservative, do not optimize if the instruction has register + // as the shift-amount operand. + case INS_rcl: + case INS_rcr: + case INS_rol: + case INS_ror: + case INS_shl: + case INS_shr: + case INS_sar: + return false; + default: + return true; + } + } + + return true; +} + //------------------------------------------------------------------------ // AreUpper32BitsZero: check if some previously emitted // instruction set the upper 32 bits of reg to zero. @@ -276,7 +339,6 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr case IF_RRD: case IF_RRW: break; - default: return false; } @@ -294,7 +356,7 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr if ((treeOps == GT_EQ) || (treeOps == GT_NE)) { - if (IsWriteZFFlags(lastIns)) + if (IsWriteZFFlags(lastIns) && IsFlagsModified(id)) { return id->idOpSize() == opSize; } diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index fb300d1302cb30..04781f3e8bfcb9 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -172,6 +172,8 @@ void SetContains256bitAVX(bool value) bool IsDstDstSrcAVXInstruction(instruction ins); bool IsDstSrcSrcAVXInstruction(instruction ins); bool IsWriteZFFlags(instruction ins); +bool IsFlagsModified(instrDesc* id); + bool IsThreeOperandAVXInstruction(instruction ins) { return (IsDstDstSrcAVXInstruction(ins) || IsDstSrcSrcAVXInstruction(ins)); From 7ded4699a16380f926e405686e77ac3cf9c2d76a Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Tue, 1 Jun 2021 13:45:03 -0700 Subject: [PATCH 07/10] Add helper method for Resets OF/CF flags --- src/coreclr/jit/emitxarch.cpp | 18 +++++++++++++++--- src/coreclr/jit/emitxarch.h | 1 + 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 2089cefc10614d..2f418be55aeb09 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -159,6 +159,20 @@ bool emitter::IsWriteZFFlags(instruction ins) return (CodeGenInterface::instInfo[ins] & INS_FLAGS_WritesZF) != 0; } +//------------------------------------------------------------------------ +// IsResetsOCFlags: check if the instruction resets the +// OF and CF flag. +// +// Arguments: +// ins - instruction to test +// +// Return Value: +// true if instruction resets the OF and CF flag, false otherwise. +// +bool emitter::IsResetsOCFlags(instruction ins) +{ + return (CodeGenInterface::instInfo[ins] & INS_FLAGS_Resets_CF_OF_Flags) != 0; +} //------------------------------------------------------------------------ // IsFlagsModified: check if the instruction modifies the flags. @@ -195,7 +209,6 @@ bool emitter::IsFlagsModified(instrDesc* id) return false; default: return true; - } } } @@ -348,8 +361,7 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr return false; } - // these always set OF and CF to 0 - if ((lastIns == INS_and) || (lastIns == INS_or) || (lastIns == INS_xor)) + if (IsResetsOCFlags(lastIns)) { return id->idOpSize() == opSize; } diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 04781f3e8bfcb9..0496145bc9b32c 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -172,6 +172,7 @@ void SetContains256bitAVX(bool value) bool IsDstDstSrcAVXInstruction(instruction ins); bool IsDstSrcSrcAVXInstruction(instruction ins); bool IsWriteZFFlags(instruction ins); +bool IsResetsOCFlags(instruction ins); bool IsFlagsModified(instrDesc* id); bool IsThreeOperandAVXInstruction(instruction ins) From e7e04b71a2bf1bce037531bb7d8f7b6aa2121e48 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 2 Jun 2021 11:06:49 -0700 Subject: [PATCH 08/10] Rename methods --- src/coreclr/jit/emitxarch.cpp | 12 ++++++------ src/coreclr/jit/emitxarch.h | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 2f418be55aeb09..bfd49daf24d916 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -145,7 +145,7 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins) } //------------------------------------------------------------------------ -// IsWriteZFFlags: check if the instruction write the +// DoesWritesZeroFlag: check if the instruction write the // ZF flag. // // Arguments: @@ -154,13 +154,13 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins) // Return Value: // true if instruction writes the ZF flag, false otherwise. // -bool emitter::IsWriteZFFlags(instruction ins) +bool emitter::DoesWritesZeroFlag(instruction ins) { return (CodeGenInterface::instInfo[ins] & INS_FLAGS_WritesZF) != 0; } //------------------------------------------------------------------------ -// IsResetsOCFlags: check if the instruction resets the +// DoesResetsOverflowAndCarryFlags: check if the instruction resets the // OF and CF flag. // // Arguments: @@ -169,7 +169,7 @@ bool emitter::IsWriteZFFlags(instruction ins) // Return Value: // true if instruction resets the OF and CF flag, false otherwise. // -bool emitter::IsResetsOCFlags(instruction ins) +bool emitter::DoesResetsOverflowAndCarryFlags(instruction ins) { return (CodeGenInterface::instInfo[ins] & INS_FLAGS_Resets_CF_OF_Flags) != 0; } @@ -361,14 +361,14 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr return false; } - if (IsResetsOCFlags(lastIns)) + if (DoesResetsOverflowAndCarryFlags(lastIns)) { return id->idOpSize() == opSize; } if ((treeOps == GT_EQ) || (treeOps == GT_NE)) { - if (IsWriteZFFlags(lastIns) && IsFlagsModified(id)) + if (DoesWritesZeroFlag(lastIns) && IsFlagsModified(id)) { return id->idOpSize() == opSize; } diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 0496145bc9b32c..b04d38c08c816d 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -171,8 +171,8 @@ void SetContains256bitAVX(bool value) bool IsDstDstSrcAVXInstruction(instruction ins); bool IsDstSrcSrcAVXInstruction(instruction ins); -bool IsWriteZFFlags(instruction ins); -bool IsResetsOCFlags(instruction ins); +bool DoesWritesZeroFlag(instruction ins); +bool DoesResetsOverflowAndCarryFlags(instruction ins); bool IsFlagsModified(instrDesc* id); bool IsThreeOperandAVXInstruction(instruction ins) From b1e0fc3d199d7901743bfdd1d1fd48781b8083b2 Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Wed, 2 Jun 2021 12:14:56 -0700 Subject: [PATCH 09/10] one more rename --- src/coreclr/jit/emitxarch.cpp | 8 ++++---- src/coreclr/jit/emitxarch.h | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index bfd49daf24d916..a95a95cb4c840f 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -175,15 +175,15 @@ bool emitter::DoesResetsOverflowAndCarryFlags(instruction ins) } //------------------------------------------------------------------------ -// IsFlagsModified: check if the instruction modifies the flags. +// IsFlagsAlwaysModified: check if the instruction always modifies the flags. // // Arguments: // id - instruction to test // // Return Value: -// true if instruction modified any flag, false otherwise. +// true if instruction always modified any flag, false otherwise. // -bool emitter::IsFlagsModified(instrDesc* id) +bool emitter::IsFlagsAlwaysModified(instrDesc* id) { instruction ins = id->idIns(); insFormat fmt = id->idInsFmt(); @@ -368,7 +368,7 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr if ((treeOps == GT_EQ) || (treeOps == GT_NE)) { - if (DoesWritesZeroFlag(lastIns) && IsFlagsModified(id)) + if (DoesWritesZeroFlag(lastIns) && IsFlagsAlwaysModified(id)) { return id->idOpSize() == opSize; } diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index b04d38c08c816d..a386b0f9b9554b 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -173,7 +173,7 @@ bool IsDstDstSrcAVXInstruction(instruction ins); bool IsDstSrcSrcAVXInstruction(instruction ins); bool DoesWritesZeroFlag(instruction ins); bool DoesResetsOverflowAndCarryFlags(instruction ins); -bool IsFlagsModified(instrDesc* id); +bool IsFlagsAlwaysModified(instrDesc* id); bool IsThreeOperandAVXInstruction(instruction ins) { From 068fb1588d1e4898fbe82504c5c49300a482943d Mon Sep 17 00:00:00 2001 From: Kunal Pathak Date: Thu, 3 Jun 2021 18:44:00 -0700 Subject: [PATCH 10/10] review feedback --- src/coreclr/jit/emitxarch.cpp | 28 ++++++++++++++++------------ src/coreclr/jit/emitxarch.h | 4 ++-- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index a95a95cb4c840f..83c3f1f197b280 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -145,7 +145,7 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins) } //------------------------------------------------------------------------ -// DoesWritesZeroFlag: check if the instruction write the +// DoesWriteZeroFlag: check if the instruction write the // ZF flag. // // Arguments: @@ -154,14 +154,14 @@ bool emitter::IsDstSrcSrcAVXInstruction(instruction ins) // Return Value: // true if instruction writes the ZF flag, false otherwise. // -bool emitter::DoesWritesZeroFlag(instruction ins) +bool emitter::DoesWriteZeroFlag(instruction ins) { return (CodeGenInterface::instInfo[ins] & INS_FLAGS_WritesZF) != 0; } //------------------------------------------------------------------------ -// DoesResetsOverflowAndCarryFlags: check if the instruction resets the -// OF and CF flag. +// DoesResetOverflowAndCarryFlags: check if the instruction resets the +// OF and CF flag to 0. // // Arguments: // ins - instruction to test @@ -169,19 +169,20 @@ bool emitter::DoesWritesZeroFlag(instruction ins) // Return Value: // true if instruction resets the OF and CF flag, false otherwise. // -bool emitter::DoesResetsOverflowAndCarryFlags(instruction ins) +bool emitter::DoesResetOverflowAndCarryFlags(instruction ins) { return (CodeGenInterface::instInfo[ins] & INS_FLAGS_Resets_CF_OF_Flags) != 0; } //------------------------------------------------------------------------ -// IsFlagsAlwaysModified: check if the instruction always modifies the flags. +// IsFlagsAlwaysModified: check if the instruction guarantee to modify any flags. // // Arguments: // id - instruction to test // // Return Value: -// true if instruction always modified any flag, false otherwise. +// false, if instruction is guaranteed to not modify any flag. +// true, if instruction will modify some flag. // bool emitter::IsFlagsAlwaysModified(instrDesc* id) { @@ -311,9 +312,9 @@ bool emitter::AreUpper32BitsZero(regNumber reg) // the same values as if there were a compare to 0 // // Arguments: -// reg - register of interest -// opSize - size of register -// needsOCFlags - additionally check the overflow and carry flags +// reg - register of interest +// opSize - size of register +// treeOps - type of tree node operation // // Return Value: // true if the previous instruction set the flags for reg @@ -361,14 +362,17 @@ bool emitter::AreFlagsSetToZeroCmp(regNumber reg, emitAttr opSize, genTreeOps tr return false; } - if (DoesResetsOverflowAndCarryFlags(lastIns)) + // Certain instruction like and, or and xor modifies exactly same flags + // as "test" instruction. + // They reset OF and CF to 0 and modifies SF, ZF and PF. + if (DoesResetOverflowAndCarryFlags(lastIns)) { return id->idOpSize() == opSize; } if ((treeOps == GT_EQ) || (treeOps == GT_NE)) { - if (DoesWritesZeroFlag(lastIns) && IsFlagsAlwaysModified(id)) + if (DoesWriteZeroFlag(lastIns) && IsFlagsAlwaysModified(id)) { return id->idOpSize() == opSize; } diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index a386b0f9b9554b..cda15711c2c095 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -171,8 +171,8 @@ void SetContains256bitAVX(bool value) bool IsDstDstSrcAVXInstruction(instruction ins); bool IsDstSrcSrcAVXInstruction(instruction ins); -bool DoesWritesZeroFlag(instruction ins); -bool DoesResetsOverflowAndCarryFlags(instruction ins); +bool DoesWriteZeroFlag(instruction ins); +bool DoesResetOverflowAndCarryFlags(instruction ins); bool IsFlagsAlwaysModified(instrDesc* id); bool IsThreeOperandAVXInstruction(instruction ins)