Skip to content
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions src/coreclr/inc/corinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -3372,8 +3372,18 @@ class ICorDynamicInfo : public ICorStaticInfo
#define IMAGE_REL_BASED_REL32 0x10
#define IMAGE_REL_BASED_THUMB_BRANCH24 0x13
#define IMAGE_REL_SECREL 0x104

// Linux x64
// GD model
#define IMAGE_REL_TLSGD 0x105

// Linux arm64
// TLSDESC (dynamic)
#define IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21 0x107
#define IMAGE_REL_AARCH64_TLSDESC_LD64_LO12 0x108
#define IMAGE_REL_AARCH64_TLSDESC_ADD_LO12 0x109
#define IMAGE_REL_AARCH64_TLSDESC_CALL 0x10A

// The identifier for ARM32-specific PC-relative address
// computation corresponds to the following instruction
// sequence:
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* b8a05f18-503e-47e4-9193-931c50b151d1 */
0xb8a05f18,
0x503e,
0x47e4,
{0x91, 0x93, 0x93, 0x1c, 0x50, 0xb1, 0x51, 0xd1}
constexpr GUID JITEEVersionIdentifier = { /* 0fb71692-0ee6-4914-88a8-6446e45f23e8 */
0x0fb71692,
0x0ee6,
0x4914,
{0x88, 0xa8, 0x64, 0x46, 0xe4, 0x5f, 0x23, 0xe8}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
5 changes: 5 additions & 0 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2333,6 +2333,11 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
if (con->ImmedValNeedsReloc(compiler))
{
attr = EA_SET_FLG(attr, EA_CNS_RELOC_FLG);
if (tree->IsTlsIconHandle())
{
// no need to generate because we generate it as part of GT_CALL
break;
}
}

if (targetType == TYP_BYREF)
Expand Down
44 changes: 44 additions & 0 deletions src/coreclr/jit/codegenarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3623,6 +3623,44 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
//
assert(genIsValidIntReg(target->GetRegNum()));

bool isTlsHandleTarget = false;
#ifdef TARGET_ARM64
isTlsHandleTarget =
compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsUnix && target->IsTlsIconHandle();

if (isTlsHandleTarget)
{
assert(call->gtFlags & GTF_TLS_GET_ADDR);
emitter* emitter = GetEmitter();
emitAttr attr = (emitAttr)(EA_CNS_TLSGD_RELOC | EA_CNS_RELOC_FLG | retSize);
GenTreeIntCon* iconNode = target->AsIntCon();
methHnd = (CORINFO_METHOD_HANDLE)iconNode->gtIconVal;
retSize = EA_SET_FLG(retSize, EA_CNS_TLSGD_RELOC);

// For NativeAOT, linux/arm64, linker wants the following pattern, so we will generate
// it as part of the call. Generating individual instructions is tricky to get it
// correct in the format the way linker needs. Also, we might end up spilling or
// reloading a register, which can break the pattern.
//
// adrp x0, :tlsdesc:tlsRoot ; R_AARCH64_TLSDESC_ADR_PAGE21
// add x0, x0, #0 ; R_AARCH64_TLSDESC_ADD_LO12
// mrs x1, tpidr_el0
// ldr x2, [x0] ; R_AARCH64_TLSDESC_LD64_LO12
// blr x2 ; R_AARCH64_TLSDESC_CALL
// add x0, x1, x0
// We guaranteed in LSRA that r0, r1 and r2 are assigned to this node.

// adrp/add
instGen_Set_Reg_To_Imm(attr, REG_R0, (ssize_t)methHnd,
INS_FLAGS_DONT_CARE DEBUGARG(iconNode->gtTargetHandle) DEBUGARG(iconNode->gtFlags));
// mrs
emitter->emitIns_R(INS_mrs_tpid0, attr, REG_R1);

// ldr
emitter->emitIns_R_R_I(INS_ldr, attr, target->GetRegNum(), REG_R0, (ssize_t)methHnd);
}
#endif

// clang-format off
genEmitCall(emitter::EC_INDIR_R,
methHnd,
Expand All @@ -3633,6 +3671,12 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
di,
target->GetRegNum(),
call->IsFastTailCall());

if (isTlsHandleTarget)
{
// add x0, x1, x0
GetEmitter()->emitIns_R_R_R(INS_add, EA_8BYTE, REG_R0, REG_R1, REG_R0);
}
// clang-format on
}
else
Expand Down
58 changes: 49 additions & 9 deletions src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ void emitter::emitInsSanityCheck(instrDesc* id)
assert(isIntegerRegister(id->idReg1()) || // ZR
isVectorRegister(id->idReg1()));
assert(isIntegerRegister(id->idReg2())); // SP
assert(emitGetInsSC(id) == 0);
assert((emitGetInsSC(id) == 0) || (id->idIsTlsGD()));
assert(insOptsNone(id->idInsOpt()));
break;

Expand Down Expand Up @@ -8521,7 +8521,7 @@ void emitter::emitIns_R_R_I(instruction ins,
reg2 = encodingSPtoZR(reg2);

ssize_t mask = (1 << scale) - 1; // the mask of low bits that must be zero to encode the immediate
if (imm == 0)
if (imm == 0 || EA_IS_CNS_TLSGD_RELOC(attr))
{
assert(insOptsNone(opt)); // PRE/POST Index doesn't make sense with an immediate of zero

Expand Down Expand Up @@ -8628,7 +8628,11 @@ void emitter::emitIns_R_R_I(instruction ins,

id->idReg1(reg1);
id->idReg2(reg2);

if (EA_IS_CNS_TLSGD_RELOC(attr))
{
assert(imm != 0);
id->idSetTlsGD();
}
dispIns(id);
appendToCurIG(id);
}
Expand Down Expand Up @@ -13032,6 +13036,10 @@ void emitter::emitIns_R_AI(instruction ins,
id->idAddr()->iiaAddr = (BYTE*)addr;
id->idReg1(ireg);
id->idSetIsDspReloc();
if (EA_IS_CNS_TLSGD_RELOC(attr))
{
id->idSetTlsGD();
}
#ifdef DEBUG
id->idDebugOnlyInfo()->idMemCookie = targetHandle;
id->idDebugOnlyInfo()->idFlags = gtFlags;
Expand All @@ -13055,6 +13063,10 @@ void emitter::emitIns_R_AI(instruction ins,
id->idAddr()->iiaAddr = (BYTE*)addr;
id->idReg1(ireg);
id->idReg2(ireg);
if (EA_IS_CNS_TLSGD_RELOC(attr))
{
id->idSetTlsGD();
}

dispIns(id);
appendToCurIG(id);
Expand Down Expand Up @@ -13513,8 +13525,21 @@ void emitter::emitIns_Call(EmitCallType callType,
id->idIns(ins);
id->idInsFmt(fmt);

id->idReg3(ireg);
assert(xreg == REG_NA);
if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && EA_IS_CNS_TLSGD_RELOC(retSize))
{
// For NativeAOT linux/arm64, we need to also record the relocation of methHnd
// Since we do not have space to embed it in instrDesc, we store the register in
// reg1 and instead use the `iiaAdd` to store the method handle. Likewise, during
// emitOutputInstr, we retrieve the register from reg1 for this specific case.
id->idSetTlsGD();
id->idReg1(ireg);
id->idAddr()->iiaAddr = (BYTE*)methHnd;
}
else
{
id->idReg3(ireg);
}
}
else
{
Expand Down Expand Up @@ -16820,10 +16845,19 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
assert(insOptsNone(id->idInsOpt()));
assert((ins == INS_br_tail) || (ins == INS_blr));
code = emitInsCode(ins, fmt);
code |= insEncodeReg_Rn(id->idReg3()); // nnnnn

sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
if (emitComp->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && id->idIsTlsGD())
{
emitRecordRelocation(odst, (CORINFO_METHOD_HANDLE)id->idAddr()->iiaAddr,
IMAGE_REL_AARCH64_TLSDESC_CALL);
code |= insEncodeReg_Rn(id->idReg1()); // nnnnn
}
else
{
code |= insEncodeReg_Rn(id->idReg3()); // nnnnn
}
dst += emitOutputCall(ig, dst, id, code);
sz = id->idIsLargeCall() ? sizeof(instrDescCGCA) : sizeof(instrDesc);
break;

case IF_LS_1A: // LS_1A XX...V..iiiiiiii iiiiiiiiiiittttt Rt PC imm(1MB)
Expand Down Expand Up @@ -16852,6 +16886,10 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
}
code |= insEncodeReg_Rn(id->idReg2()); // nnnnn
dst += emitOutput_Instr(dst, code);
if (id->idIsTlsGD())
{
emitRecordRelocation(odst, (void*)emitGetInsSC(id), IMAGE_REL_AARCH64_TLSDESC_LD64_LO12);
}
break;

case IF_LS_2B: // LS_2B .X.......Xiiiiii iiiiiinnnnnttttt Rt Rn imm(0-4095)
Expand Down Expand Up @@ -17117,7 +17155,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
code = emitInsCode(ins, fmt);
code |= insEncodeReg_Rd(id->idReg1()); // ddddd
dst += emitOutput_Instr(dst, code);
emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEBASE_REL21);
emitRecordRelocation(odst, id->idAddr()->iiaAddr, id->idIsTlsGD() ? IMAGE_REL_AARCH64_TLSDESC_ADR_PAGE21
: IMAGE_REL_ARM64_PAGEBASE_REL21);
}
else
{
Expand Down Expand Up @@ -17160,7 +17199,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
{
assert(sz == sizeof(instrDesc));
assert(id->idAddr()->iiaAddr != nullptr);
emitRecordRelocation(odst, id->idAddr()->iiaAddr, IMAGE_REL_ARM64_PAGEOFFSET_12A);
emitRecordRelocation(odst, id->idAddr()->iiaAddr, id->idIsTlsGD() ? IMAGE_REL_AARCH64_TLSDESC_ADD_LO12
: IMAGE_REL_ARM64_PAGEOFFSET_12A);
}
break;

Expand Down Expand Up @@ -20119,7 +20159,7 @@ void emitter::emitDispInsHelp(

case IF_LS_2A: // LS_2A .X.......X...... ......nnnnnttttt Rt Rn
assert(insOptsNone(id->idInsOpt()));
assert(emitGetInsSC(id) == 0);
assert((emitGetInsSC(id) == 0) || id->idIsTlsGD());
emitDispReg(id->idReg1(), emitInsTargetRegSize(id), true);
emitDispAddrRI(id->idReg2(), id->idInsOpt(), 0);
break;
Expand Down
10 changes: 10 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -2234,6 +2234,16 @@ struct GenTree
return (gtOper == GT_CNS_INT) ? (gtFlags & GTF_ICON_HDL_MASK) : GTF_EMPTY;
}

bool IsTlsIconHandle()
{
if (IsIconHandle())
{
GenTreeFlags tlsFlags = (GTF_ICON_TLSGD_OFFSET | GTF_ICON_TLS_HDL);
return ((gtFlags & tlsFlags) == tlsFlags);
}
return false;
}

// Mark this node as no longer being a handle; clear its GTF_ICON_*_HDL bits.
void ClearIconHandleMask()
{
Expand Down
83 changes: 56 additions & 27 deletions src/coreclr/jit/helperexpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -591,8 +591,8 @@ bool Compiler::fgExpandThreadLocalAccessForCallNativeAOT(BasicBlock** pBlock, St
// use(tlsRoot);
// ...

GenTree* tlsRootAddr = nullptr;
CORINFO_CONST_LOOKUP tlsRootObject = threadStaticInfo.tlsRootObject;
GenTree* tlsRootAddr = nullptr;
CORINFO_GENERIC_HANDLE tlsRootObject = threadStaticInfo.tlsRootObject.handle;

if (TargetOS::IsWindows)
{
Expand All @@ -613,42 +613,71 @@ bool Compiler::fgExpandThreadLocalAccessForCallNativeAOT(BasicBlock** pBlock, St
tlsValue = gtNewIndir(TYP_I_IMPL, tlsValue, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);

// This resolves to an offset which is TYP_INT
GenTree* tlsRootOffset = gtNewIconNode((size_t)tlsRootObject.handle, TYP_INT);
GenTree* tlsRootOffset = gtNewIconNode((size_t)tlsRootObject, TYP_INT);
tlsRootOffset->gtFlags |= GTF_ICON_SECREL_OFFSET;

// Add the tlsValue and tlsRootOffset to produce tlsRootAddr.
tlsRootAddr = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsValue, tlsRootOffset);
}
else if (TargetOS::IsUnix)
{
// Code sequence to access thread local variable on linux/x64:
// data16
// lea rdi, 0x7FE5C418CD28 ; tlsRootObject
// data16 data16
// call _tls_get_addr
//
// This sequence along with `data16` prefix is expected by the linker so it
// will patch these with TLS access.
GenTree* tls_get_addr_val =
gtNewIconHandleNode((size_t)threadStaticInfo.tlsGetAddrFtnPtr.handle, GTF_ICON_FTN_ADDR);
tls_get_addr_val->SetContained();
if (TargetArchitecture::IsX64)
{
// Code sequence to access thread local variable on linux/x64:
// data16
// lea rdi, 0x7FE5C418CD28 ; tlsRootObject
// data16 data16
// call _tls_get_addr
//
// This sequence along with `data16` prefix is expected by the linker so it
// will patch these with TLS access.
GenTree* tls_get_addr_val =
gtNewIconHandleNode((size_t)threadStaticInfo.tlsGetAddrFtnPtr.handle, GTF_ICON_FTN_ADDR);
tls_get_addr_val->SetContained();

GenTreeCall* tlsRefCall = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL);
tlsRefCall->gtFlags |= GTF_TLS_GET_ADDR;

// This is an indirect call which takes an argument.
// Populate and set the ABI appropriately.
assert(tlsRootObject != 0);
GenTree* tlsArg = gtNewIconNode((size_t)tlsRootObject, TYP_I_IMPL);
tlsArg->gtFlags |= GTF_ICON_TLSGD_OFFSET;
tlsRefCall->gtArgs.PushBack(this, NewCallArg::Primitive(tlsArg));

fgMorphArgs(tlsRefCall);

tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT);
tlsRootAddr = tlsRefCall;
}
else if (TargetArchitecture::IsArm64)
{
/*
x0 = adrp :tlsdesc:tlsRoot ; 1st parameter
x0 += tlsdesc_lo12:tlsRoot ; update 1st parameter

// GenTreeCall* tlsRefCall = gtNewCallNode(CT_ tls_get_addr_val, TYP_I_IMPL);
GenTreeCall* tlsRefCall = gtNewIndCallNode(tls_get_addr_val, TYP_I_IMPL);
tlsRefCall->gtFlags |= GTF_TLS_GET_ADDR;
// //
x1 = tpidr_el0 ; 2nd parameter

// This is an indirect call which takes an argument.
// Populate and set the ABI appropriately.
assert(tlsRootObject.handle != 0);
GenTree* tlsArg = gtNewIconNode((size_t)tlsRootObject.handle, TYP_I_IMPL);
tlsArg->gtFlags |= GTF_ICON_TLSGD_OFFSET;
tlsRefCall->gtArgs.PushBack(this, NewCallArg::Primitive(tlsArg));
x2 = [x0] ; call
blr x2

fgMorphArgs(tlsRefCall);
*/

tlsRefCall->gtFlags |= GTF_EXCEPT | (tls_get_addr_val->gtFlags & GTF_GLOB_EFFECT);
tlsRootAddr = tlsRefCall;
GenTree* tlsRootOffset = gtNewIconHandleNode((size_t)tlsRootObject, GTF_ICON_TLS_HDL);
tlsRootOffset->gtFlags |= GTF_ICON_TLSGD_OFFSET;

GenTree* tlsCallIndir = gtCloneExpr(tlsRootOffset);
GenTreeCall* tlsRefCall = gtNewIndCallNode(tlsCallIndir, TYP_I_IMPL);
tlsRefCall->gtFlags |= GTF_TLS_GET_ADDR;
fgMorphArgs(tlsRefCall);

tlsRefCall->gtFlags |= GTF_EXCEPT | (tlsCallIndir->gtFlags & GTF_GLOB_EFFECT);
tlsRootAddr = tlsRefCall;
}
else
{
unreached();
}
}
else
{
Expand Down
15 changes: 15 additions & 0 deletions src/coreclr/jit/lsraarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -368,6 +368,21 @@ int LinearScan::BuildCall(GenTreeCall* call)

if (ctrlExpr != nullptr)
{
#ifdef TARGET_ARM64
if (compiler->IsTargetAbi(CORINFO_NATIVEAOT_ABI) && TargetOS::IsUnix && (call->gtArgs.CountArgs() == 0) &&
ctrlExpr->IsTlsIconHandle())
{
// For NativeAOT linux/arm64, we generate the needed code as part of
// call node because the generated code has to be in specific format
// that linker can patch. As such, the code needs specific registers
// that we will attach to this node to guarantee that they are available
// during generating this node.
assert(call->gtFlags & GTF_TLS_GET_ADDR);
newRefPosition(REG_R0, currentLoc, RefTypeFixedReg, nullptr, genRegMask(REG_R0));
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do these have to be R0/R1/R2 for the linker, or is this just a choice you've made to make things simpler?

Or is it the case that since the ultimate expansion has a call these choices don't add new constraints?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They are used by the linker to match the pattern as seen here and thats how we did that in hand-assembly code here.

newRefPosition(REG_R1, currentLoc, RefTypeFixedReg, nullptr, genRegMask(REG_R1));
ctrlExprCandidates = genRegMask(REG_R2);
}
#endif
BuildUse(ctrlExpr, ctrlExprCandidates);
srcCount++;
}
Expand Down
Loading