diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index eb190f97f0e479..e00dff0f7bacd3 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -1235,6 +1235,10 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #else instruction genGetInsForOper(genTreeOps oper, var_types type); #endif + instruction genGetVolatileLdStIns(instruction currentIns, + regNumber targetReg, + GenTreeIndir* indir, + bool* needsBarrier); bool genEmitOptimizedGCWriteBarrier(GCInfo::WriteBarrierForm writeBarrierForm, GenTree* addr, GenTree* data); GenTree* getCallTarget(const GenTreeCall* call, CORINFO_METHOD_HANDLE* methHnd); regNumber getCallIndirectionCellReg(GenTreeCall* call); diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index f99602d21d953b..c7dc32ddd559a0 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -4207,22 +4207,10 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree) if (tree->IsVolatile()) { - bool addrIsInReg = addr->isUsedFromReg(); - bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0); + bool needsBarrier = true; + ins = genGetVolatileLdStIns(ins, dataReg, tree, &needsBarrier); - if ((ins == INS_strb) && addrIsInReg) - { - ins = INS_stlrb; - } - else if ((ins == INS_strh) && addrIsInReg && addrIsAligned) - { - ins = INS_stlrh; - } - else if ((ins == INS_str) && genIsValidIntReg(dataReg) && addrIsInReg && addrIsAligned) - { - ins = INS_stlr; - } - else + if (needsBarrier) { // issue a full memory barrier before a volatile StInd // Note: We cannot issue store barrier ishst because it is a weaker barrier. diff --git a/src/coreclr/jit/codegenarmarch.cpp b/src/coreclr/jit/codegenarmarch.cpp index af7442c2958982..afeb4e1a439d54 100644 --- a/src/coreclr/jit/codegenarmarch.cpp +++ b/src/coreclr/jit/codegenarmarch.cpp @@ -1728,6 +1728,134 @@ void CodeGen::genCodeForIndexAddr(GenTreeIndexAddr* node) genProduceReg(node); } +//------------------------------------------------------------------------ +// genGetVolatileLdStIns: Determine the most efficient instruction to perform a +// volatile load or store and whether an explicit barrier is required or not. +// +// Arguments: +// currentIns - the current instruction to perform load/store +// targetReg - the target register +// indir - the indirection node representing the volatile load/store +// needsBarrier - OUT parameter. Set to true if an explicit memory barrier is required. +// +// Return Value: +// instruction to perform the volatile load/store with. +// +instruction CodeGen::genGetVolatileLdStIns(instruction currentIns, + regNumber targetReg, + GenTreeIndir* indir, + bool* needsBarrier) +{ + assert(indir->IsVolatile()); + + if (!genIsValidIntReg(targetReg)) + { + // We don't have special instructions to perform volatile loads/stores for non-integer registers. + *needsBarrier = true; + return currentIns; + } + + assert(!varTypeIsFloating(indir)); + assert(!varTypeIsSIMD(indir)); + +#ifdef TARGET_ARM64 + *needsBarrier = false; + + if (indir->IsUnaligned() && (currentIns != INS_ldrb) && (currentIns != INS_strb)) + { + // We have to use a normal load/store + explicit memory barrier + // to avoid unaligned access exceptions. + *needsBarrier = true; + return currentIns; + } + + const bool addrIsInReg = indir->Addr()->isUsedFromReg(); + + // With RCPC2 (arm64 v8.4+) we can work with simple addressing modes like [reg + simm9] + const bool shouldUseRcpc2 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc2) && !addrIsInReg && + indir->Addr()->OperIs(GT_LEA) && !indir->HasIndex() && (indir->Scale() == 1) && + emitter::emitIns_valid_imm_for_unscaled_ldst_offset(indir->Offset()); + + if (shouldUseRcpc2) + { + assert(!addrIsInReg); + switch (currentIns) + { + // Loads + + case INS_ldrb: + return INS_ldapurb; + + case INS_ldrh: + return INS_ldapurh; + + case INS_ldr: + return INS_ldapur; + + // Stores + + case INS_strb: + return INS_stlurb; + + case INS_strh: + return INS_stlurh; + + case INS_str: + return INS_stlur; + + default: + *needsBarrier = true; + return currentIns; + } + } + + // Only RCPC2 (arm64 v8.4+) allows us to deal with contained addresses. + // In other cases we'll have to emit a normal load/store + explicit memory barrier. + // It means that lower should generally avoid generating contained addresses for volatile loads/stores + // so we can use cheaper instructions. + if (!addrIsInReg) + { + *needsBarrier = true; + return currentIns; + } + + // RCPC1 (arm64 v8.3+) offers a bit more relaxed memory ordering than ldar. Which is sufficient for + // .NET memory model's requirements, see https://github.com/dotnet/runtime/issues/67374 + const bool hasRcpc1 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc); + switch (currentIns) + { + // Loads + + case INS_ldrb: + return hasRcpc1 ? INS_ldaprb : INS_ldarb; + + case INS_ldrh: + return hasRcpc1 ? INS_ldaprh : INS_ldarh; + + case INS_ldr: + return hasRcpc1 ? INS_ldapr : INS_ldar; + + // Stores + + case INS_strb: + return INS_stlrb; + + case INS_strh: + return INS_stlrh; + + case INS_str: + return INS_stlr; + + default: + *needsBarrier = true; + return currentIns; + } +#else // TARGET_ARM64 + *needsBarrier = true; + return currentIns; +#endif // !TARGET_ARM64 +} + //------------------------------------------------------------------------ // genCodeForIndir: Produce code for a GT_IND node. // @@ -1755,58 +1883,9 @@ void CodeGen::genCodeForIndir(GenTreeIndir* tree) bool emitBarrier = false; - if ((tree->gtFlags & GTF_IND_VOLATILE) != 0) + if (tree->IsVolatile()) { -#ifdef TARGET_ARM64 - bool addrIsInReg = tree->Addr()->isUsedFromReg(); - bool addrIsAligned = ((tree->gtFlags & GTF_IND_UNALIGNED) == 0); - - // On arm64-v8.3+ we can use ldap* instructions with acquire/release semantics to avoid - // full memory barriers if mixed with STLR - bool hasRcpc = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc); - - // On arm64-v8.4+ we can use ldapur* instructions with acquire/release semantics to - // avoid full memory barriers if address is contained and unscaled - bool hasRcpc2 = compiler->compOpportunisticallyDependsOn(InstructionSet_Rcpc2); - - bool handledWithLdapur = false; - if (hasRcpc2 && !addrIsInReg && tree->Addr()->OperIs(GT_LEA) && !tree->HasIndex() && (tree->Scale() == 1) && - emitter::emitIns_valid_imm_for_unscaled_ldst_offset(tree->Offset())) - { - if (ins == INS_ldrb) - { - ins = INS_ldapurb; - handledWithLdapur = true; - } - else if ((ins == INS_ldrh) && addrIsAligned) - { - ins = INS_ldapurh; - handledWithLdapur = true; - } - else if ((ins == INS_ldr) && addrIsAligned && genIsValidIntReg(targetReg)) - { - ins = INS_ldapur; - handledWithLdapur = true; - } - } - - if ((ins == INS_ldrb) && addrIsInReg) - { - ins = hasRcpc ? INS_ldaprb : INS_ldarb; - } - else if ((ins == INS_ldrh) && addrIsInReg && addrIsAligned) - { - ins = hasRcpc ? INS_ldaprh : INS_ldarh; - } - else if ((ins == INS_ldr) && addrIsInReg && addrIsAligned && genIsValidIntReg(targetReg)) - { - ins = hasRcpc ? INS_ldapr : INS_ldar; - } - else if (!handledWithLdapur) -#endif // TARGET_ARM64 - { - emitBarrier = true; - } + ins = genGetVolatileLdStIns(ins, targetReg, tree, &emitBarrier); } GetEmitter()->emitInsLoadStoreOp(ins, emitActualTypeSize(type), targetReg, tree);