Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Enable fake hot/cold splitting on ARM64
This commit contains fixes for various bugs exposed by enabling fake
hot/cold splitting on ARM64:
- Branches between hot/cold sections are now always long.
- The pseudoinstruction for loading a constant from the cold section
did not support loading 16-byte data into vector registers, as it
temporarily loaded the constant into an 8-byte integer register. Now,
16-byte constants are loaded directly into vector registers via an
`ld1` instruction.
- Tests involving loading 16-byte constants exposed the data section
is not always aligned to its largest constant. Now, the data section
is always aligned to `emitConsDsc.alignment` when calling `eeAllocMem`.
- Asserts/NYIs blocking hot/cold splitting on ARM64 have been removed.

Fake hot/cold splitting requires we fake unwind info by treating each
split function as one hot section. A more architecture-agnostic
approach for this has been applied.
  • Loading branch information
Aman Khalid authored and amanasifkhalid committed Jun 14, 2022
commit d2bbed8a05d7c14420b437a179330e5f405a6148
4 changes: 0 additions & 4 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -8000,10 +8000,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
void unwindReserveFuncHelper(FuncInfoDsc* func, bool isHotCode);
void unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pColdCode, bool isHotCode);

#ifdef DEBUG
void fakeUnwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode);
#endif // DEBUG

#endif // TARGET_AMD64 || (TARGET_X86 && FEATURE_EH_FUNCLETS)

UNATIVE_OFFSET unwindGetCurrentOffset(FuncInfoDsc* func);
Expand Down
9 changes: 2 additions & 7 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6045,13 +6045,8 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
// For arm64/LoongArch64, we want to allocate JIT data always adjacent to code similar to what native compiler does.
// This way allows us to use a single `ldr` to access such data like float constant/jmp table.

UNATIVE_OFFSET roDataAlignmentDelta = 0;
if (emitConsDsc.dsdOffs && (emitConsDsc.alignment == TARGET_POINTER_SIZE))
{
UNATIVE_OFFSET roDataAlignment = TARGET_POINTER_SIZE; // 8 Byte align by default.
roDataAlignmentDelta = (UNATIVE_OFFSET)ALIGN_UP(emitTotalHotCodeSize, roDataAlignment) - emitTotalHotCodeSize;
assert((roDataAlignmentDelta == 0) || (roDataAlignmentDelta == 4));
}
const UNATIVE_OFFSET roDataAlignmentDelta =
(UNATIVE_OFFSET)ALIGN_UP(emitTotalHotCodeSize, emitConsDsc.alignment) - emitTotalHotCodeSize;

args.hotCodeSize = emitTotalHotCodeSize + roDataAlignmentDelta + emitConsDsc.dsdOffs;
args.coldCodeSize = emitTotalColdCodeSize;
Expand Down
6 changes: 5 additions & 1 deletion src/coreclr/jit/emitarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8422,7 +8422,8 @@ void emitter::emitIns_J(instruction ins, BasicBlock* dst, int instrCount)
{
case INS_bl_local:
idjShort = true;
// Fall through.
fmt = IF_BI_0A;
break;
case INS_b:
// Unconditional jump is a single form.
// Assume is long in case we cross hot/cold sections.
Expand Down Expand Up @@ -9825,6 +9826,9 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
// Special case: emit add + ld1 instructions for loading 16-byte data into vector register.
if (isVectorRegister(dstReg) && (opSize == EA_16BYTE))
{
// Low 4 bits should be 0 -- 16-byte JIT data should be aligned on 16 bytes.
assert((imm12 & 15) == 0);

const emitAttr elemSize = EA_1BYTE;
const insOpts opt = optMakeArrangement(opSize, elemSize);

Expand Down
18 changes: 17 additions & 1 deletion src/coreclr/jit/unwind.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,16 @@ void Compiler::unwindGetFuncLocations(FuncInfoDsc* func,
// The hot section only goes up to the cold section
assert(fgFirstFuncletBB == nullptr);

*ppEndLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstColdBlock));
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting())
{
*ppEndLoc = nullptr; // If fake-splitting, "trick" VM by pretending entire function is hot.
}
else
#endif // DEBUG
{
*ppEndLoc = new (this, CMK_UnwindInfo) emitLocation(ehEmitCookie(fgFirstColdBlock));
}
}
else
{
Expand Down Expand Up @@ -259,6 +268,13 @@ void Compiler::unwindEmitFuncCFI(FuncInfoDsc* func, void* pHotCode, void* pColdC
DWORD unwindCodeBytes = 0;
BYTE* pUnwindBlock = nullptr;

#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting())
{
pColdCode = nullptr;
}
#endif // DEBUG

if (func->startLoc == nullptr)
{
startOffset = 0;
Expand Down
67 changes: 25 additions & 42 deletions src/coreclr/jit/unwindamd64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -656,18 +656,17 @@ void Compiler::unwindReserve()
//
void Compiler::unwindReserveFunc(FuncInfoDsc* func)
{
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr))
unwindReserveFuncHelper(func, true);

if (fgFirstColdBlock != nullptr)
{
assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets.
unwindReserveFuncHelper(func, true);
}
else
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting())
{
assert(func->funKind == FUNC_ROOT); // No splitting of funclets.
}
else
#endif // DEBUG
{
unwindReserveFuncHelper(func, true);

if (fgFirstColdBlock != nullptr)
{
unwindReserveFuncHelper(func, false);
}
Expand Down Expand Up @@ -859,7 +858,17 @@ void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pCo

if (isHotCode)
{
assert(endOffset <= info.compTotalHotCodeSize);
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr))
{
assert(endOffset <= info.compNativeCodeSize);
}
else
#endif // DEBUG
{
assert(endOffset <= info.compTotalHotCodeSize);
}

pColdCode = nullptr;
}
else
Expand Down Expand Up @@ -890,43 +899,17 @@ void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode
static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);

#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != nullptr))
unwindEmitFuncHelper(func, pHotCode, pColdCode, true);

if (pColdCode != nullptr)
{
fakeUnwindEmitFuncHelper(func, pHotCode);
}
else
#ifdef DEBUG
if (!JitConfig.JitFakeProcedureSplitting())
#endif // DEBUG
{
unwindEmitFuncHelper(func, pHotCode, pColdCode, true);

if (pColdCode != nullptr)
{
unwindEmitFuncHelper(func, pHotCode, pColdCode, false);
}
}
}

#ifdef DEBUG
void Compiler::fakeUnwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode)
{
assert(fgFirstColdBlock != nullptr);
assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets.

const UNATIVE_OFFSET startOffset = 0;
const UNATIVE_OFFSET endOffset = info.compNativeCodeSize;
const DWORD unwindCodeBytes = sizeof(func->unwindCodes) - func->unwindCodeSlot;
BYTE* pUnwindBlock = &func->unwindCodes[func->unwindCodeSlot];

if (opts.dspUnwind)
{
DumpUnwindInfo(true, startOffset, endOffset, (const UNWIND_INFO* const)pUnwindBlock);
}

// Pass pColdCode = nullptr; VM allocs unwind info for combined hot/cold section
eeAllocUnwindInfo((BYTE*)pHotCode, nullptr, startOffset, endOffset, unwindCodeBytes, pUnwindBlock,
(CorJitFuncKind)func->funKind);
}
#endif // DEBUG

#endif // TARGET_AMD64
45 changes: 34 additions & 11 deletions src/coreclr/jit/unwindarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -563,13 +563,20 @@ void Compiler::unwindReserve()
void Compiler::unwindReserveFunc(FuncInfoDsc* func)
{
BOOL isFunclet = (func->funKind == FUNC_ROOT) ? FALSE : TRUE;
bool funcHasColdSection = false;
bool funcHasColdSection = (fgFirstColdBlock != nullptr);

#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && funcHasColdSection)
{
funcHasColdSection = false; // "Trick" the VM into thinking we don't have a cold section.
}
#endif // DEBUG

#if defined(FEATURE_CFI_SUPPORT)
if (generateCFIUnwindCodes())
{
DWORD unwindCodeBytes = 0;
if (fgFirstColdBlock != nullptr)
if (funcHasColdSection)
{
eeReserveUnwindInfo(isFunclet, true /*isColdCode*/, unwindCodeBytes);
}
Expand All @@ -584,7 +591,7 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func)
// cold section. This needs to be done before we split into fragments, as each
// of the hot and cold sections can have multiple fragments.

if (fgFirstColdBlock != NULL)
if (funcHasColdSection)
{
assert(!isFunclet); // TODO-CQ: support hot/cold splitting with EH

Expand All @@ -595,8 +602,6 @@ void Compiler::unwindReserveFunc(FuncInfoDsc* func)
func->uwiCold = new (this, CMK_UnwindInfo) UnwindInfo();
func->uwiCold->InitUnwindInfo(this, startLoc, endLoc);
func->uwiCold->HotColdSplitCodes(&func->uwi);

funcHasColdSection = true;
}

// First we need to split the function or funclet into fragments that are no larger
Expand Down Expand Up @@ -1604,11 +1609,19 @@ void UnwindFragmentInfo::Allocate(
UNATIVE_OFFSET endOffset;
UNATIVE_OFFSET codeSize;

// We don't support hot/cold splitting with EH, so if there is cold code, this
// better not be a funclet!
// TODO-CQ: support funclets in cold code

noway_assert(isHotCode || funKind == CORJIT_FUNC_ROOT);
// We don't support hot/cold splitting with EH, so if there is cold code, this
// better not be a funclet!
// TODO-CQ: support funclets in cold code
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != NULL))
{
noway_assert(isHotCode && (funKind == CORJIT_FUNC_ROOT));
}
else
#endif // DEBUG
{
noway_assert(isHotCode || (funKind == CORJIT_FUNC_ROOT));
}

// Compute the final size, and start and end offsets of the fragment

Expand Down Expand Up @@ -1656,7 +1669,17 @@ void UnwindFragmentInfo::Allocate(

if (isHotCode)
{
assert(endOffset <= uwiComp->info.compTotalHotCodeSize);
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != NULL))
{
assert(endOffset <= uwiComp->info.compNativeCodeSize);
}
else
#endif // DEBUG
{
assert(endOffset <= uwiComp->info.compTotalHotCodeSize);
}

pColdCode = NULL;
}
else
Expand Down
63 changes: 25 additions & 38 deletions src/coreclr/jit/unwindx86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,18 +113,17 @@ void Compiler::unwindEmit(void* pHotCode, void* pColdCode)
//
void Compiler::unwindReserveFunc(FuncInfoDsc* func)
{
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr))
unwindReserveFuncHelper(func, true);

if (fgFirstColdBlock != nullptr)
{
assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets.
unwindReserveFuncHelper(func, true);
}
else
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting())
{
assert(func->funKind == FUNC_ROOT); // No splitting of funclets.
}
else
#endif // DEBUG
{
unwindReserveFuncHelper(func, true);

if (fgFirstColdBlock != nullptr)
{
unwindReserveFuncHelper(func, false);
}
Expand Down Expand Up @@ -164,17 +163,13 @@ void Compiler::unwindEmitFunc(FuncInfoDsc* func, void* pHotCode, void* pColdCode
static_assert_no_msg(FUNC_HANDLER == (FuncKind)CORJIT_FUNC_HANDLER);
static_assert_no_msg(FUNC_FILTER == (FuncKind)CORJIT_FUNC_FILTER);

#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (pColdCode != nullptr))
unwindEmitFuncHelper(func, pHotCode, pColdCode, true);

if (pColdCode != nullptr)
{
fakeUnwindEmitFuncHelper(func, pHotCode);
}
else
#ifdef DEBUG
if (!JitConfig.JitFakeProcedureSplitting())
#endif // DEBUG
{
unwindEmitFuncHelper(func, pHotCode, pColdCode, true);

if (pColdCode != nullptr)
{
unwindEmitFuncHelper(func, pHotCode, pColdCode, false);
}
Expand Down Expand Up @@ -258,7 +253,17 @@ void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pCo

if (isHotCode)
{
assert(endOffset <= info.compTotalHotCodeSize);
#ifdef DEBUG
if (JitConfig.JitFakeProcedureSplitting() && (fgFirstColdBlock != nullptr))
{
assert(endOffset <= info.compNativeCodeSize);
}
else
#endif // DEBUG
{
assert(endOffset <= info.compTotalHotCodeSize);
}

pColdCode = nullptr;
}
else
Expand All @@ -276,22 +281,4 @@ void Compiler::unwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode, void* pCo
(BYTE*)&unwindInfo, (CorJitFuncKind)func->funKind);
}

#ifdef DEBUG
void Compiler::fakeUnwindEmitFuncHelper(FuncInfoDsc* func, void* pHotCode)
{
assert(fgFirstColdBlock != nullptr);
assert(func->funKind == FUNC_ROOT); // No fake-splitting of funclets.

const UNATIVE_OFFSET startOffset = 0;
const UNATIVE_OFFSET endOffset = info.compNativeCodeSize;

UNWIND_INFO unwindInfo;
unwindInfo.FunctionLength = (ULONG)(endOffset);

// Pass pColdCode = nullptr; VM allocs unwind info for combined hot/cold section
eeAllocUnwindInfo((BYTE*)pHotCode, nullptr, startOffset, endOffset, sizeof(UNWIND_INFO), (BYTE*)&unwindInfo,
(CorJitFuncKind)func->funKind);
}
#endif // DEBUG

#endif // FEATURE_EH_FUNCLETS
1 change: 0 additions & 1 deletion src/tests/Common/testenvironment.proj
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@
COMPlus_EnableSSE42;
COMPlus_EnableSSSE3;
COMPlus_ForceRelocs;
COMPlus_GCgen0size;
COMPlus_GCStress;
COMPlus_GCName;
COMPlus_gcServer;
Expand Down