Skip to content
This repository was archived by the owner on Nov 1, 2020. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Android & ARM64: FEATURE_EMULATED_TLS
Android on ARM64 doesn't follow the recommended way how to implement Thread Local Storage (TLS). Instead they are using a custom solution when a __thread variable is used. This solution requires that internally always a C function needs to be called when a TLS variable is access. While the compiler and linker take care of all the c/c++ code the helper code written in assembler needs to be modified. 

This implements the changes for the current thread and thunks data TLS variable. It would work on systems that follow the recommended implementation, too. But as it has a larger overhead compared to the current inlined solution it needs to be activated for systems with this issue by using FEATURE_EMULATED_TLS
  • Loading branch information
RalfKornmannEnvision committed Sep 13, 2020
commit d1cfbf158f801307dcb66c59b31379365a5d76fb
12 changes: 12 additions & 0 deletions src/Native/Runtime/arm64/AllocFast.S
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAll
LEAF_ENTRY RhpNewFast, _TEXT

// x1 = GetThread()
#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_1
#else
INLINE_GETTHREAD x1
#endif

//
// x0 contains EEType pointer
Expand Down Expand Up @@ -135,7 +139,11 @@ NewOutOfMemory:
// x1 == element count
// x2 == string size

#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_3
#else
INLINE_GETTHREAD x3
#endif

// Load potential new object address into x12.
ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
Expand Down Expand Up @@ -196,7 +204,11 @@ StringSizeOverflow:
// x1 == element count
// x2 == array size

#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_3
#else
INLINE_GETTHREAD x3
#endif

// Load potential new object address into x12.
ldr x12, [x3, #OFFSETOF__Thread__m_alloc_context__alloc_ptr]
Expand Down
36 changes: 32 additions & 4 deletions src/Native/Runtime/arm64/ExceptionHandling.S
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,11 @@
ALLOC_THROW_FRAME HARDWARE_EXCEPTION

// x2 = GetThread()
#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_2
#else
INLINE_GETTHREAD x2
#endif

add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo*
str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null
Expand Down Expand Up @@ -259,7 +263,11 @@
ALLOC_THROW_FRAME SOFTWARE_EXCEPTION

// x2 = GetThread()
#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_2
#else
INLINE_GETTHREAD x2
#endif

// There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return
// address could have been hijacked when we were in that C# code and we must remove the hijack and
Expand Down Expand Up @@ -349,7 +357,11 @@ NotHijacked:
ALLOC_THROW_FRAME SOFTWARE_EXCEPTION

// x2 = GetThread()
#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_2
#else
INLINE_GETTHREAD x2
#endif

add x1, sp, #rsp_offsetof_ExInfo // x1 <- ExInfo*
str xzr, [x1, #OFFSETOF__ExInfo__m_exception] // pExInfo->m_exception = null
Expand Down Expand Up @@ -408,7 +420,11 @@ NotHijacked:
//
// clear the DoNotTriggerGc flag, trashes x4-x6
//
INLINE_GETTHREAD x5 // x5 <- Thread*
#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_5
#else
INLINE_GETTHREAD x5
#endif

ldr x4, [x5, #OFFSETOF__Thread__m_threadAbortException]
sub x4, x4, x0
Expand Down Expand Up @@ -447,7 +463,11 @@ ClearSuccess_Catch:

// @TODO: add debug-only validation code for ExInfo pop

INLINE_GETTHREAD x1 // x1 <- Thread*
#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_1
#else
INLINE_GETTHREAD x1
#endif

// We must unhijack the thread at this point because the section of stack where the hijack is applied
// may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack.
Expand Down Expand Up @@ -517,7 +537,11 @@ NoAbort:
//
// clear the DoNotTriggerGc flag, trashes x2-x4
//
INLINE_GETTHREAD x2 // x2 <- Thread*
#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_2
#else
INLINE_GETTHREAD x2
#endif

add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags

Expand Down Expand Up @@ -555,7 +579,11 @@ ClearSuccess:
//
// set the DoNotTriggerGc flag, trashes x1-x3
//
INLINE_GETTHREAD x2 // x2 <- Thread*
#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_2
#else
INLINE_GETTHREAD x2
#endif

add x12, x2, #OFFSETOF__Thread__m_ThreadStateFlags
SetRetry:
Expand Down
6 changes: 6 additions & 0 deletions src/Native/Runtime/arm64/InteropThunksHelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,11 @@ POINTER_SIZE = 0x08
// Custom calling convention:
// xip0 pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers)

#ifdef FEATURE_EMULATED_TLS
GETTHUNKDATA_ETLS_9
#else
INLINE_GET_TLS_VAR x9, tls_thunkData
#endif

// x9 = base address of TLS data
// xip0 = address of context cell in thunk's data
Expand All @@ -45,6 +49,7 @@ POINTER_SIZE = 0x08
LEAF_END RhGetCommonStubAddress, _TEXT


#ifndef FEATURE_EMULATED_TLS
//
// IntPtr RhGetCurrentThunkContext()
//
Expand All @@ -57,3 +62,4 @@ POINTER_SIZE = 0x08
ret

LEAF_END RhGetCurrentThunkContext, _TEXT
#endif //FEATURE_EMULATED_TLS
14 changes: 12 additions & 2 deletions src/Native/Runtime/arm64/PInvoke.S
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,12 @@ NoAbort:
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
LEAF_ENTRY RhpReversePInvoke, _TEXT

INLINE_GETTHREAD x10 // x10 = Thread
#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_10
#else
INLINE_GETTHREAD x10
#endif

str x10, [x9, #8] // save Thread pointer for RhpReversePInvokeReturn

// x9 = reverse pinvoke frame
Expand Down Expand Up @@ -287,8 +292,13 @@ NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler
str x9, [x0, #OFFSETOF__PInvokeTransitionFrame__m_Flags]

// get TLS global variable address
// r0 = GetThread()

#ifdef FEATURE_EMULATED_TLS
GETTHREAD_ETLS_10
#else
INLINE_GETTHREAD x10
#endif

str x10, [x0, #OFFSETOF__PInvokeTransitionFrame__m_pThread]
str x0, [x10, #OFFSETOF__Thread__m_pTransitionFrame]

Expand Down
12 changes: 12 additions & 0 deletions src/Native/Runtime/unix/PalRedhawkUnix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,18 @@ thread_local TlsDestructionMonitor tls_destructionMonitor;
// This thread local variable is used for delegate marshalling
DECLSPEC_THREAD intptr_t tls_thunkData;

#ifdef FEATURE_EMULATED_TLS
EXTERN_C intptr_t* RhpGetThunkData()
{
return &tls_thunkData;
}

EXTERN_C intptr_t RhGetCurrentThunkContext()
{
return tls_thunkData;
}
#endif //FEATURE_EMULATED_TLS

// Attach thread to PAL.
// It can be called multiple times for the same thread.
// It fails fast if a different thread was already registered.
Expand Down
91 changes: 87 additions & 4 deletions src/Native/Runtime/unix/unixasmmacrosarm64.inc
Original file line number Diff line number Diff line change
Expand Up @@ -172,13 +172,96 @@ C_FUNC(\Name):
add \target, \target, #:tprel_lo12_nc:\var
.endm

.macro INLINE_GETTHREAD target
INLINE_GET_TLS_VAR \target, tls_CurrentThread
.endm

.macro GETTHREAD_ETLS_1
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-32 // ;; Push down stack pointer and store FP and LR
str x0, [sp, #0x10]

.macro PREPARE_INLINE_GETTHREAD
.global tls_CurrentThread
bl RhpGetThread
mov x1, x0

ldr x0, [sp, #0x10]
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #32
.endm

.macro INLINE_GETTHREAD target
INLINE_GET_TLS_VAR \target, tls_CurrentThread
.macro GETTHREAD_ETLS_2
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-32 // ;; Push down stack pointer and store FP and LR
stp x0, x1, [sp, #0x10]

bl RhpGetThread
mov x2, x0

ldp x0, x1, [sp, #0x10]
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #32
.endm

.macro GETTHREAD_ETLS_3
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-48 // ;; Push down stack pointer and store FP and LR
stp x0, x1, [sp, #0x10]
str x2, [sp, #0x20]

bl RhpGetThread
mov x3, x0

ldp x0, x1, [sp, #0x10]
ldr x2, [sp, #0x20]
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #48
.endm

.macro GETTHREAD_ETLS_5
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-64 // ;; Push down stack pointer and store FP and LR
stp x0, x1, [sp, #0x10]
stp x2, x3, [sp, #0x20]
str x4, [sp, #0x30]

bl RhpGetThread
mov x5, x0

ldp x0, x1, [sp, #0x10]
ldp x2, x3, [sp, #0x20]
ldr x4, [sp, #0x30]
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #64
.endm

.macro GETTHREAD_ETLS_10
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-96 // ;; Push down stack pointer and store FP and LR
stp x0, x1, [sp, #0x10]
stp x2, x3, [sp, #0x20]
stp x4, x5, [sp, #0x30]
stp x6, x7, [sp, #0x40]
stp x8, x9, [sp, #0x50]

bl RhpGetThread
mov x10, x0

ldp x0, x1, [sp, #0x10]
ldp x2, x3, [sp, #0x20]
ldp x4, x5, [sp, #0x30]
ldp x6, x7, [sp, #0x40]
ldp x8, x9, [sp, #0x50]
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #96
.endm

.macro GETTHUNKDATA_ETLS_9
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, #-96 // ;; Push down stack pointer and store FP and LR
stp x0, x1, [sp, #0x10]
stp x2, x3, [sp, #0x20]
stp x4, x5, [sp, #0x30]
stp x6, x7, [sp, #0x40]
stp x8, xip0, [sp, #0x50]

bl RhpGetThunkData
mov x9, x0

ldp x0, x1, [sp, #0x10]
ldp x2, x3, [sp, #0x20]
ldp x4, x5, [sp, #0x30]
ldp x6, x7, [sp, #0x40]
ldp x8, xip0, [sp, #0x50]
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, #96
.endm

.macro ArmInterlockedOperationBarrier
Expand Down