Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/coreclr/vm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,6 @@ if(CLR_CMAKE_TARGET_ARCH_AMD64)
${ARCH_SOURCES_DIR}/GenericCLRToCOMCallStubs.asm
${ARCH_SOURCES_DIR}/getstate.asm
${ARCH_SOURCES_DIR}/JitHelpers_Fast.asm
${ARCH_SOURCES_DIR}/JitHelpers_FastMP.asm
${ARCH_SOURCES_DIR}/JitHelpers_FastWriteBarriers.asm
${ARCH_SOURCES_DIR}/JitHelpers_SingleAppDomain.asm
${ARCH_SOURCES_DIR}/JitHelpers_Slow.asm
Expand Down
20 changes: 0 additions & 20 deletions src/coreclr/vm/amd64/AsmMacros.inc
Original file line number Diff line number Diff line change
Expand Up @@ -206,26 +206,6 @@ INLINE_GETTHREAD macro Reg

endm

;
; Inlined macro to get the current thread's allocation context
; Trashes rax and r11
;

INLINE_GET_ALLOC_CONTEXT macro Reg

EXTERN _tls_index: DWORD
EXTERN t_thread_alloc_context: DWORD

mov r11d, [_tls_index]
mov rax, gs:[OFFSET__TEB__ThreadLocalStoragePointer]
mov rax, [rax + r11 * 8]
mov r11d, SECTIONREL t_thread_alloc_context
add rax, r11
mov Reg, rax

endm


; if you change this code there will be corresponding code in JITInterfaceGen.cpp which will need to be changed
;

Expand Down
75 changes: 0 additions & 75 deletions src/coreclr/vm/amd64/JitHelpers_FastMP.asm

This file was deleted.

1 change: 1 addition & 0 deletions src/coreclr/vm/arm/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1776,6 +1776,7 @@ void InitJITHelpers1()
SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);

ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString);
}
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/vm/arm64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -895,6 +895,7 @@ void InitJITHelpers1()
SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);

ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString);
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/comutilnative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -925,7 +925,7 @@ FCIMPL0(INT64, GCInterface::GetAllocatedBytesForCurrentThread)

INT64 currentAllocated = 0;
Thread *pThread = GetThread();
gc_alloc_context* ac = &t_thread_alloc_context;
gc_alloc_context* ac = &t_gc_thread_locals.alloc_context;
currentAllocated = ac->alloc_bytes + ac->alloc_bytes_uoh - (ac->alloc_limit - ac->alloc_ptr);

return currentAllocated;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gccover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1859,7 +1859,7 @@ void DoGcStress (PCONTEXT regs, NativeCodeVersion nativeCodeVersion)
// BUG(github #10318) - when not using allocation contexts, the alloc lock
// must be acquired here. Until fixed, this assert prevents random heap corruption.
assert(GCHeapUtilities::UseThreadAllocationContexts());
GCHeapUtilities::GetGCHeap()->StressHeap(&t_thread_alloc_context);
GCHeapUtilities::GetGCHeap()->StressHeap(&t_gc_thread_locals.alloc_context);

// StressHeap can exit early w/o forcing a SuspendEE to trigger the instruction update
// We can not rely on the return code to determine if the instruction update happened
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gcenv.ee.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -443,7 +443,7 @@ gc_alloc_context * GCToEEInterface::GetAllocContext()
return nullptr;
}

return &t_thread_alloc_context;
return &t_gc_thread_locals.alloc_context;
}

void GCToEEInterface::GcEnumAllocContexts(enum_alloc_context_func* fn, void* param)
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gcheaputilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ GVAL_IMPL_INIT(gc_alloc_context, g_global_alloc_context, {});
// on MP systems, each thread has its own allocation chunk so we can avoid
// lock prefixes and expensive MP cache snooping stuff
#ifndef _MSC_VER
__thread gc_alloc_context t_thread_alloc_context;
thread_local GCThreadLocals t_gc_thread_locals;
#endif

enum GC_LOAD_STATUS {
Expand Down
15 changes: 11 additions & 4 deletions src/coreclr/vm/gcheaputilities.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,20 @@ GVAL_DECL(gc_alloc_context, g_global_alloc_context);
}
#endif // !DACCESS_COMPILE

// on MP systems, each thread has its own allocation chunk so we can avoid
// lock prefixes and expensive MP cache snooping stuff
struct GCThreadLocals
{
// on MP systems, each thread has its own allocation chunk so we can avoid
// lock prefixes and expensive MP cache snooping stuff
gc_alloc_context alloc_context;
};

#ifdef _MSC_VER
EXTERN_C __declspec(selectany) __declspec(thread) gc_alloc_context t_thread_alloc_context;
// use selectany to avoid initialization de-optimization issues in the compiler
__declspec(selectany)
#else
EXTERN_C __thread gc_alloc_context t_thread_alloc_context;
extern
#endif
thread_local GCThreadLocals t_gc_thread_locals;

extern "C" uint32_t* g_card_bundle_table;
extern "C" uint8_t* g_ephemeral_low;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gchelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ inline gc_alloc_context* GetThreadAllocContext()

assert(GCHeapUtilities::UseThreadAllocationContexts());

return &t_thread_alloc_context;
return &t_gc_thread_locals.alloc_context;
}

// When not using per-thread allocation contexts, we (the EE) need to take care that
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/gcstress.h
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,7 @@ namespace _GCStress
// BUG(github #10318) - when not using allocation contexts, the alloc lock
// must be acquired here. Until fixed, this assert prevents random heap corruption.
_ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
GCHeapUtilities::GetGCHeap()->StressHeap(&t_thread_alloc_context);
GCHeapUtilities::GetGCHeap()->StressHeap(&t_gc_thread_locals.alloc_context);
}

FORCEINLINE
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/vm/i386/stublinkerx86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2434,7 +2434,7 @@ namespace
{
gc_alloc_context* STDCALL GetAllocContextHelper()
{
return &t_thread_alloc_context;
return &t_gc_thread_locals.alloc_context;
}
}
#endif
Expand Down Expand Up @@ -2490,8 +2490,8 @@ VOID StubLinkerCPU::X86EmitCurrentThreadAllocContextFetch(X86Reg dstreg, unsigne

X86EmitIndexRegLoad(dstreg, dstreg, sizeof(void *) * _tls_index);

_ASSERTE(Thread::GetOffsetOfThreadStatic(&t_thread_alloc_context) < INT_MAX);
X86EmitAddReg(dstreg, (int32_t)Thread::GetOffsetOfThreadStatic(&t_thread_alloc_context));
_ASSERTE(Thread::GetOffsetOfThreadStatic(&t_gc_thread_locals.alloc_context) < INT_MAX);
X86EmitAddReg(dstreg, (int32_t)Thread::GetOffsetOfThreadStatic(&t_gc_thread_locals.alloc_context));

#endif // TARGET_UNIX
}
Expand Down
63 changes: 52 additions & 11 deletions src/coreclr/vm/jithelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1668,7 +1668,7 @@ HCIMPL1_RAW(Object*, JIT_NewS_MP_FastPortable, CORINFO_CLASS_HANDLE typeHnd_)
} CONTRACTL_END;

_ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
gc_alloc_context *allocContext = &t_thread_alloc_context;
gc_alloc_context *allocContext = &t_gc_thread_locals.alloc_context;

TypeHandle typeHandle(typeHnd_);
_ASSERTE(!typeHandle.IsTypeDesc()); // heap objects must have method tables
Expand Down Expand Up @@ -1785,7 +1785,7 @@ HCIMPL1_RAW(StringObject*, AllocateString_MP_FastPortable, DWORD stringLength)
return HCCALL1(FramedAllocateString, stringLength);
}

gc_alloc_context *allocContext = &t_thread_alloc_context;
gc_alloc_context *allocContext = &t_gc_thread_locals.alloc_context;

SIZE_T totalSize = StringObject::GetSize(stringLength);

Expand Down Expand Up @@ -1901,7 +1901,7 @@ HCIMPL2_RAW(Object*, JIT_NewArr1VC_MP_FastPortable, CORINFO_CLASS_HANDLE arrayMT
return HCCALL2(JIT_NewArr1, arrayMT, size);
}

gc_alloc_context *allocContext = &t_thread_alloc_context;
gc_alloc_context *allocContext = &t_gc_thread_locals.alloc_context;

MethodTable *pArrayMT = (MethodTable *)arrayMT;

Expand Down Expand Up @@ -1959,11 +1959,6 @@ HCIMPL2_RAW(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayM
return HCCALL2(JIT_NewArr1, arrayMT, size);
}

// This is typically the only call in the fast path. Making the call early seems to be better, as it allows the compiler
// to use volatile registers for intermediate values. This reduces the number of push/pop instructions and eliminates
// some reshuffling of intermediate values into nonvolatile registers around the call.
Thread *thread = GetThread();

SIZE_T totalSize = componentCount * sizeof(void *);
_ASSERTE(totalSize / sizeof(void *) == componentCount);

Expand All @@ -1975,7 +1970,7 @@ HCIMPL2_RAW(Object*, JIT_NewArr1OBJ_MP_FastPortable, CORINFO_CLASS_HANDLE arrayM

_ASSERTE(ALIGN_UP(totalSize, DATA_ALIGNMENT) == totalSize);

gc_alloc_context *allocContext = &t_thread_alloc_context;
gc_alloc_context *allocContext = &t_gc_thread_locals.alloc_context;
BYTE *allocPtr = allocContext->alloc_ptr;
_ASSERTE(allocPtr <= allocContext->alloc_limit);
if (totalSize > static_cast<SIZE_T>(allocContext->alloc_limit - allocPtr))
Expand Down Expand Up @@ -2109,14 +2104,60 @@ HCIMPLEND
// VALUETYPE/BYREF HELPERS
//
//========================================================================
/*************************************************************/
HCIMPL2_RAW(Object*, JIT_Box_MP_FastPortable, CORINFO_CLASS_HANDLE type, void* unboxedData)
{
CONTRACTL {
THROWS;
DISABLED(GC_TRIGGERS);
MODE_COOPERATIVE;
} CONTRACTL_END;

if (unboxedData == nullptr)
{
// Tail call to the slow helper
return HCCALL2(JIT_Box, type, unboxedData);
}

_ASSERTE(GCHeapUtilities::UseThreadAllocationContexts());
gc_alloc_context *allocContext = &t_gc_thread_locals.alloc_context;

TypeHandle typeHandle(type);
_ASSERTE(!typeHandle.IsTypeDesc()); // heap objects must have method tables
MethodTable *methodTable = typeHandle.AsMethodTable();
// The fast helper should never be called for nullable types.
_ASSERTE(!methodTable->IsNullable());

SIZE_T size = methodTable->GetBaseSize();
_ASSERTE(size % DATA_ALIGNMENT == 0);

BYTE *allocPtr = allocContext->alloc_ptr;
_ASSERTE(allocPtr <= allocContext->alloc_limit);
if (size > static_cast<SIZE_T>(allocContext->alloc_limit - allocPtr))
{
// Tail call to the slow helper
return HCCALL2(JIT_Box, type, unboxedData);
}

allocContext->alloc_ptr = allocPtr + size;

_ASSERTE(allocPtr != nullptr);
Object *object = reinterpret_cast<Object *>(allocPtr);
_ASSERTE(object->HasEmptySyncBlockInfo());
object->SetMethodTable(methodTable);

// Copy the data into the object
CopyValueClass(object->UnBox(), unboxedData, methodTable);

return object;
}
HCIMPLEND_RAW

/*************************************************************/
HCIMPL2(Object*, JIT_Box, CORINFO_CLASS_HANDLE type, void* unboxedData)
{
FCALL_CONTRACT;

// <TODO>TODO: if we care, we could do a fast trial allocation
// and avoid the building the frame most times</TODO>
OBJECTREF newobj = NULL;
HELPER_METHOD_FRAME_BEGIN_RET_NOPOLL(); // Set up a frame
GCPROTECT_BEGININTERIOR(unboxedData);
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/vm/jitinterfacegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ void InitJITHelpers1()
#ifdef TARGET_UNIX
SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);

Expand All @@ -75,7 +76,7 @@ void InitJITHelpers1()
{
SetJitHelperFunction(CORINFO_HELP_NEWSFAST, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_BoxFastMP);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);

Expand Down
1 change: 1 addition & 0 deletions src/coreclr/vm/loongarch64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -930,6 +930,7 @@ void InitJITHelpers1()
SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);

ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString);
}
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/vm/riscv64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,7 @@ void InitJITHelpers1()
SetJitHelperFunction(CORINFO_HELP_NEWSFAST_ALIGN8, JIT_NewS_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_VC, JIT_NewArr1VC_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_NEWARR_1_OBJ, JIT_NewArr1OBJ_MP_FastPortable);
SetJitHelperFunction(CORINFO_HELP_BOX, JIT_Box_MP_FastPortable);

ECall::DynamicallyAssignFCallImpl(GetEEFuncEntryPoint(AllocateString_MP_FastPortable), ECall::FastAllocateString);
}
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/vm/threads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -964,9 +964,9 @@ HRESULT Thread::DetachThread(BOOL fDLLThreadDetach)
GCX_COOP();
// GetTotalAllocatedBytes reads dead_threads_non_alloc_bytes, but will suspend EE, being in COOP mode we cannot race with that
// however, there could be other threads terminating and doing the same Add.
InterlockedExchangeAdd64((LONG64*)&dead_threads_non_alloc_bytes, t_thread_alloc_context.alloc_limit - t_thread_alloc_context.alloc_ptr);
GCHeapUtilities::GetGCHeap()->FixAllocContext(&t_thread_alloc_context, NULL, NULL);
t_thread_alloc_context.init(); // re-initialize the context.
InterlockedExchangeAdd64((LONG64*)&dead_threads_non_alloc_bytes, t_gc_thread_locals.alloc_context.alloc_limit - t_gc_thread_locals.alloc_context.alloc_ptr);
GCHeapUtilities::GetGCHeap()->FixAllocContext(&t_gc_thread_locals.alloc_context, NULL, NULL);
t_gc_thread_locals.alloc_context.init(); // re-initialize the context.

// Clear out the alloc context pointer for this thread. When TLS is gone, this pointer will point into freed memory.
m_alloc_context = nullptr;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/vm/threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -950,7 +950,7 @@ class Thread
gc_alloc_context* m_alloc_context;

public:
inline void InitAllocContext() { LIMITED_METHOD_CONTRACT; m_alloc_context = &t_thread_alloc_context; }
inline void InitAllocContext() { LIMITED_METHOD_CONTRACT; m_alloc_context = &t_gc_thread_locals.alloc_context; }

inline gc_alloc_context *GetAllocContext() { LIMITED_METHOD_CONTRACT; return m_alloc_context; }

Expand Down
Loading