Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 25 additions & 17 deletions src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14299,15 +14299,15 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
result.insThroughput = PERFSCORE_THROUGHPUT_2X; // one or two components
result.insLatency = PERFSCORE_LATENCY_1C;

if (id->idInsFmt() == IF_RWR_LABEL)
if (insFmt == IF_RWR_LABEL)
{
// RIP relative addressing
//
// - throughput is only 1 per cycle
//
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
}
else if (id->idInsFmt() != IF_RWR_SRD)
else if (insFmt != IF_RWR_SRD)
{
if (id->idAddr()->iiaAddrMode.amIndxReg != REG_NA)
{
Expand Down Expand Up @@ -14355,11 +14355,17 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_imul_14:
case INS_imul_15:
#endif // TARGET_AMD64
case INS_imul:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_3C;
break;

case INS_mulEAX:
case INS_imulEAX:
case INS_imul:
// uops.info: mul/imul rdx:rax,reg latency is 3 only if the low half of the result is needed, but in that
// case codegen uses imul reg,reg instruction form (except for unsigned overflow checks, which are rare)
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_3C;
result.insLatency += PERFSCORE_LATENCY_4C;
break;

case INS_div:
Expand All @@ -14373,12 +14379,11 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
{
assert(id->idOpSize() == EA_4BYTE);
result.insThroughput = PERFSCORE_THROUGHPUT_6C;
result.insThroughput = PERFSCORE_LATENCY_26C;
result.insLatency = PERFSCORE_LATENCY_26C;
}
break;

case INS_idiv:
result.insThroughput = PERFSCORE_THROUGHPUT_6C;
// The integer divide instructions have long latenies
if ((id->idOpSize() == EA_8BYTE))
{
Expand All @@ -14389,7 +14394,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
{
assert(id->idOpSize() == EA_4BYTE);
result.insThroughput = PERFSCORE_THROUGHPUT_6C;
result.insThroughput = PERFSCORE_LATENCY_26C;
result.insLatency = PERFSCORE_LATENCY_26C;
}
break;

Expand All @@ -14416,19 +14421,21 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case IF_ARW_CNS:
// ins [mem], cns
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency += PERFSCORE_LATENCY_1C;
break;

case IF_RRW: // probably should use INS_shl_N
case IF_RRW:
// ins reg, cl
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
result.insLatency = PERFSCORE_LATENCY_2C;
break;

case IF_MRW: // probably should use INS_shr_N
case IF_MRW:
case IF_SRW:
case IF_ARW:
// ins [mem], cl
result.insThroughput = PERFSCORE_THROUGHPUT_4C;
result.insLatency += PERFSCORE_LATENCY_2C;
break;

default:
Expand All @@ -14441,7 +14448,7 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_shl_1:
case INS_shr_1:
case INS_sar_1:
result.insLatency = PERFSCORE_LATENCY_1C;
result.insLatency += PERFSCORE_LATENCY_1C;
switch (insFmt)
{
case IF_RRW:
Expand All @@ -14466,27 +14473,27 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_ror_1:
case INS_rol_1:
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency = PERFSCORE_LATENCY_1C;
result.insLatency += PERFSCORE_LATENCY_1C;
break;

case INS_shl_N:
case INS_shr_N:
case INS_sar_N:
case INS_ror_N:
case INS_rol_N:
result.insLatency = PERFSCORE_LATENCY_2C;
result.insLatency += PERFSCORE_LATENCY_1C;
switch (insFmt)
{
case IF_RRW_SHF:
// ins reg, cl
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
// ins reg, cns
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
break;

case IF_MRW_SHF:
case IF_SRW_SHF:
case IF_ARW_SHF:
// ins [mem], cl
result.insThroughput = PERFSCORE_THROUGHPUT_4C;
// ins [mem], cns
result.insThroughput = PERFSCORE_THROUGHPUT_2C;
break;

default:
Expand All @@ -14499,13 +14506,14 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
case INS_rcr:
case INS_rcl:
result.insThroughput = PERFSCORE_THROUGHPUT_6C;
result.insLatency = PERFSCORE_LATENCY_6C;
result.insLatency += PERFSCORE_LATENCY_6C;
break;

case INS_rcr_1:
case INS_rcl_1:
// uops.info
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
result.insLatency += PERFSCORE_LATENCY_2C;
break;

case INS_shld:
Expand Down