Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 25 additions & 45 deletions src/coreclr/src/vm/amd64/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -52,17 +52,12 @@
# we can align to 16 and be guaranteed to not exceed the frame size
.equ STACK_FUDGE_FACTOR, 0x8

# Space to keep xmm0 and xmm1
.equ SIZEOF_FP_ARG_SPILL, 0x10*2

.equ OFFSETOF_FP_ARG_SPILL, SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA + STACK_FUDGE_FACTOR

# SIZEOF_STACK_FRAME is how many bytes we reserve in our ELT helpers below
# There are three components, the first is space for profiler platform specific
# data struct that we spill the general purpose registers to, then space to
# spill xmm0 and xmm1, then finally 8 bytes of padding to ensure that the xmm
# register reads/writes are aligned on 16 bytes.
.equ SIZEOF_STACK_FRAME, SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA + SIZEOF_FP_ARG_SPILL + STACK_FUDGE_FACTOR
.equ SIZEOF_STACK_FRAME, SIZEOF_PROFILE_PLATFORM_SPECIFIC_DATA + STACK_FUDGE_FACTOR

.equ PROFILE_ENTER, 0x1
.equ PROFILE_LEAVE, 0x2
Expand Down Expand Up @@ -131,15 +126,6 @@ NESTED_ENTRY ProfileEnterNaked, _TEXT, NoHandler
mov r10, 0x1 # PROFILE_ENTER
mov [rsp + 0xa8], r10d # -- struct flags field

# get aligned stack ptr (rsp + OFFSETOF_FP_ARG_SPILL) & (-16)
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16

# we need to be able to restore the fp return register
# save fp return registers
movdqa [rax + 0x00], xmm0
movdqa [rax + 0x10], xmm1

END_PROLOGUE

# rdi already contains the clientInfo
Expand All @@ -148,10 +134,14 @@ NESTED_ENTRY ProfileEnterNaked, _TEXT, NoHandler
call C_FUNC(ProfileEnter)

# restore fp return registers
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16
movdqa xmm0, [rax + 0x00]
movdqa xmm1, [rax + 0x10]
movsd xmm0, real8 ptr [rsp + 0x38] # -- struct flt0 field
movsd xmm1, real8 ptr [rsp + 0x40] # -- struct flt1 field
movsd xmm2, real8 ptr [rsp + 0x48] # -- struct flt2 field
movsd xmm3, real8 ptr [rsp + 0x50] # -- struct flt3 field
movsd xmm4, real8 ptr [rsp + 0x58] # -- struct flt4 field
movsd xmm5, real8 ptr [rsp + 0x60] # -- struct flt5 field
movsd xmm6, real8 ptr [rsp + 0x68] # -- struct flt6 field
movsd xmm7, real8 ptr [rsp + 0x70] # -- struct flt7 field

# restore arg registers
mov rdi, [rsp + 0x78]
Expand Down Expand Up @@ -216,26 +206,21 @@ NESTED_ENTRY ProfileLeaveNaked, _TEXT, NoHandler
mov r10, 0x2 # PROFILE_LEAVE
mov [rsp + 0xa8], r10d # flags -- struct flags field

# get aligned stack ptr (rsp + OFFSETOF_FP_ARG_SPILL) & (-16)
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16

# we need to be able to restore the fp return register
# save fp return registers
movdqa [rax + 0x00], xmm0
movdqa [rax + 0x10], xmm1

END_PROLOGUE

# rdi already contains the clientInfo
lea rsi, [rsp + 0x0]
call C_FUNC(ProfileLeave)

# restore fp return registers
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16
movdqa xmm0, [rax + 0x00]
movdqa xmm1, [rax + 0x10]
movsd xmm0, real8 ptr [rsp + 0x38] # -- struct flt0 field
movsd xmm1, real8 ptr [rsp + 0x40] # -- struct flt1 field
movsd xmm2, real8 ptr [rsp + 0x48] # -- struct flt2 field
movsd xmm3, real8 ptr [rsp + 0x50] # -- struct flt3 field
movsd xmm4, real8 ptr [rsp + 0x58] # -- struct flt4 field
movsd xmm5, real8 ptr [rsp + 0x60] # -- struct flt5 field
movsd xmm6, real8 ptr [rsp + 0x68] # -- struct flt6 field
movsd xmm7, real8 ptr [rsp + 0x70] # -- struct flt7 field

# restore int return register
mov rax, [rsp + 0x28]
Expand Down Expand Up @@ -295,26 +280,21 @@ NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler
mov r10, 0x2 # PROFILE_LEAVE
mov [rsp + 0xa8], r10d # flags -- struct flags field

# get aligned stack ptr (rsp + OFFSETOF_FP_ARG_SPILL) & (-16)
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16

# we need to be able to restore the fp return register
# save fp return registers
movdqa [rax + 0x00], xmm0
movdqa [rax + 0x10], xmm1

END_PROLOGUE

# rdi already contains the clientInfo
lea rsi, [rsp + 0x0]
call C_FUNC(ProfileTailcall)

# restore fp return registers
lea rax, [rsp + OFFSETOF_FP_ARG_SPILL]
and rax, -16
movdqa xmm0, [rax + 0x00]
movdqa xmm1, [rax + 0x10]
movsd xmm0, real8 ptr [rsp + 0x38] # -- struct flt0 field
movsd xmm1, real8 ptr [rsp + 0x40] # -- struct flt1 field
movsd xmm2, real8 ptr [rsp + 0x48] # -- struct flt2 field
movsd xmm3, real8 ptr [rsp + 0x50] # -- struct flt3 field
movsd xmm4, real8 ptr [rsp + 0x58] # -- struct flt4 field
movsd xmm5, real8 ptr [rsp + 0x60] # -- struct flt5 field
movsd xmm6, real8 ptr [rsp + 0x68] # -- struct flt6 field
movsd xmm7, real8 ptr [rsp + 0x70] # -- struct flt7 field

# restore int return register
mov rax, [rsp + 0x28]
Expand Down
44 changes: 37 additions & 7 deletions src/coreclr/src/vm/amd64/profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ ProfileArgIterator::ProfileArgIterator(MetaSig * pSig, void * platformSpecificHa
PROFILE_PLATFORM_SPECIFIC_DATA* pData = (PROFILE_PLATFORM_SPECIFIC_DATA*)m_handle;
#ifdef UNIX_AMD64_ABI
m_bufferPos = 0;
ZeroMemory(pData->buffer, PROFILE_PLATFORM_SPECIFIC_DATA_BUFFER_SIZE * sizeof(UINT64));
#endif // UNIX_AMD64_ABI

// unwind a frame and get the Rsp for the profiled method to make sure it matches
Expand Down Expand Up @@ -483,17 +484,46 @@ LPVOID ProfileArgIterator::GetReturnBufferAddr(void)
// by our calling convention, but is required by our profiler spec.
return (LPVOID)pData->rax;
}

CorElementType t = m_argIterator.GetSig()->GetReturnType();
if (ELEMENT_TYPE_VOID != t)
if (ELEMENT_TYPE_VOID == t)
{
return NULL;
}

#ifdef UNIX_AMD64_ABI
if (m_argIterator.GetSig()->GetReturnTypeSize() == 16)
{
if (ELEMENT_TYPE_R4 == t || ELEMENT_TYPE_R8 == t)
pData->rax = pData->flt0;
_ASSERTE(m_bufferPos == 0 && "Nothing else should be using the scratch space during a return");

// The unix x64 ABI has a special case where a 16 byte struct will be passed in registers
// and if there are integer and float args it will be passed in rax/etc and xmm/etc, respectively
// which means the values are noncontiguous. Just like the argument passing above
// we copy it in to the buffer to fake it being contiguous.
UINT flags = m_argIterator.GetFPReturnSize();

return &(pData->rax);
// The lower two bits are used to indicate whether struct args are floating point or integer
if (flags & 1)
{
pData->buffer[0] = pData->flt0;
pData->buffer[1] = (flags & 2) ? pData->flt1 : pData->rax;
}
else
{
pData->buffer[0] = pData->rax;
pData->buffer[1] = (flags & 2) ? pData->flt0 : pData->rdx;
}

return pData->buffer;
}
else
return NULL;
#endif // UNIX_AMD64_ABI

if (ELEMENT_TYPE_R4 == t || ELEMENT_TYPE_R8 == t)
{
pData->rax = pData->flt0;
}

return &(pData->rax);
}

#undef PROFILE_ENTER
Expand Down
201 changes: 77 additions & 124 deletions src/coreclr/src/vm/arm/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -366,134 +366,87 @@ LEAF_ENTRY JIT_ProfilerEnterLeaveTailcallStub, _TEXT
bx lr
LEAF_END JIT_ProfilerEnterLeaveTailcallStub, _TEXT

//
// EXTERN_C void ProfileEnterNaked(FunctionIDOrClientID functionIDOrClientID);
//
NESTED_ENTRY ProfileEnterNaked, _TEXT, NoHandler
PROLOG_PUSH "{r4, r5, r7, r11, lr}"
PROLOG_STACK_SAVE_OFFSET r7, #8

// fields of PROFILE_PLATFORM_SPECIFIC_DATA, in reverse order

// UINT32 r0; // Keep r0 & r1 contiguous to make returning 64-bit results easier
// UINT32 r1;
// void *r11;
// void *Pc;
// union // Float arg registers as 32-bit (s0-s15) and 64-bit (d0-d7)
// {
// UINT32 s[16];
// UINT64 d[8];
// };
// FunctionID functionId;
// void *probeSp; // stack pointer of managed function
// void *profiledSp; // location of arguments on stack
// LPVOID hiddenArg;
// UINT32 flags;
movw r4, #1
push { /* flags */ r4 }
movw r4, #0
push { /* hiddenArg */ r4 }
add r5, r11, #8
push { /* profiledSp */ r5 }
add r5, sp, #32
push { /* probeSp */ r5 }
push { /* functionId */ r0 }
#define PROFILE_ENTER 1
#define PROFILE_LEAVE 2
#define PROFILE_TAILCALL 4
// size of profiler data structure plus alignment padding
#define SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA 104+4

// typedef struct _PROFILE_PLATFORM_SPECIFIC_DATA
// {
// UINT32 r0; // Keep r0 & r1 contiguous to make returning 64-bit results easier
// UINT32 r1;
// void *R11;
// void *Pc;
// union // Float arg registers as 32-bit (s0-s15) and 64-bit (d0-d7)
// {
// UINT32 s[16];
// UINT64 d[8];
// };
// FunctionID functionId;
// void *probeSp; // stack pointer of managed function
// void *profiledSp; // location of arguments on stack
// LPVOID hiddenArg;
// UINT32 flags;
// } PROFILE_PLATFORM_SPECIFIC_DATA, *PPROFILE_PLATFORM_SPECIFIC_DATA;

.macro GenerateProfileHelper helper, flags
NESTED_ENTRY \helper\()Naked, _TEXT, NoHandler
PROLOG_PUSH "{r0,r3,r9,r12}"

// for the 5 arguments that do not need popped plus 4 bytes of alignment
alloc_stack 6*4

// push fp regs
vpush.64 { d0 - d7 }
push { lr }
push { r11 }
push { /* return value, r4 is NULL */ r4 }
push { /* return value, r4 is NULL */ r4 }
mov r1, sp
bl C_FUNC(ProfileEnter)
EPILOG_STACK_RESTORE_OFFSET r7, #8
EPILOG_POP "{r4, r5, r7, r11, pc}"
NESTED_END ProfileEnterNaked, _TEXT

//
// EXTERN_C void ProfileLeaveNaked(FunctionIDOrClientID functionIDOrClientID);
//
NESTED_ENTRY ProfileLeaveNaked, _TEXT, NoHandler
PROLOG_PUSH "{r1, r2, r4, r5, r7, r11, lr}"
PROLOG_STACK_SAVE_OFFSET r7, #16

// fields of PROFILE_PLATFORM_SPECIFIC_DATA, in reverse order

// UINT32 r0; // Keep r0 & r1 contiguous to make returning 64-bit results easier
// UINT32 r1;
// void *r11;
// void *Pc;
// union // Float arg registers as 32-bit (s0-s15) and 64-bit (d0-d7)
// {
// UINT32 s[16];
// UINT64 d[8];
// };
// FunctionID functionId;
// void *probeSp; // stack pointer of managed function
// void *profiledSp; // location of arguments on stack
// LPVOID hiddenArg;
// UINT32 flags;
movw r4, #2
push { /* flags */ r4 }
movw r4, #0
push { /* hiddenArg */ r4 }
add r5, r11, #8
push { /* profiledSp */ r5 }
add r5, sp, #40
push { /* probeSp */ r5 }
push { /* functionId */ r0 }
vpush.64 { d0 - d7 }
push { lr }
push { r11 }
push { r1 }
push { r0 }
mov r1, sp
bl C_FUNC(ProfileLeave)
EPILOG_STACK_RESTORE_OFFSET r7, #16
EPILOG_POP "{r1, r2, r4, r5, r7, r11, pc}"
NESTED_END ProfileLeaveNaked, _TEXT
// next three fields pc, r11, r1
push { r1, r11, lr}

//
// EXTERN_C void ProfileTailcallNaked(FunctionIDOrClientID functionIDOrClientID);
//
NESTED_ENTRY ProfileTailcallNaked, _TEXT, NoHandler
PROLOG_PUSH "{r1, r2, r4, r5, r7, r11, lr}"
PROLOG_STACK_SAVE_OFFSET r7, #16

// fields of PROFILE_PLATFORM_SPECIFIC_DATA, in reverse order

// UINT32 r0; // Keep r0 & r1 contiguous to make returning 64-bit results easier
// UINT32 r1;
// void *r11;
// void *Pc;
// union // Float arg registers as 32-bit (s0-s15) and 64-bit (d0-d7)
// {
// UINT32 s[16];
// UINT64 d[8];
// };
// FunctionID functionId;
// void *probeSp; // stack pointer of managed function
// void *profiledSp; // location of arguments on stack
// LPVOID hiddenArg;
// UINT32 flags;
movw r4, #2
push { /* flags */ r4 }
movw r4, #0
push { /* hiddenArg */ r4 }
add r5, r11, #8
push { /* profiledSp */ r5 }
add r5, sp, #40
push { /* probeSp */ r5 }
push { /* functionId */ r0 }
vpush.64 { d0 - d7 }
push { lr }
push { r11 }
push { r1 }
push { r0 }
// return value is in r2 instead of r0 because functionID is passed in r0
push { r2 }

CHECK_STACK_ALIGNMENT

// set the other args, starting with functionID
str r0, [sp, #80]

// probeSp is the original sp when this stub was called
add r2, sp, SIZEOF__PROFILE_PLATFORM_SPECIFIC_DATA+20
str r2, [sp, #84]

// get the address of the arguments from the frame pointer, store in profiledSp
add r2, r11, #8
str r2, [sp, #88]

// clear hiddenArg
movw r2, #0
str r2, [sp, #92]

// set the flag to indicate what hook this is
movw r2, \flags
str r2, [sp, #96]

// sp is the address of PROFILE_PLATFORM_SPECIFIC_DATA, then call to C++
mov r1, sp
bl C_FUNC(ProfileTailcall)
EPILOG_STACK_RESTORE_OFFSET r7, #16
EPILOG_POP "{r1, r2, r4, r5, r7, r11, pc}"
NESTED_END ProfileTailcallNaked, _TEXT
bl C_FUNC(\helper)

// restore all our regs
pop { r2 }
pop { r1, r11, lr}
vpop.64 { d0 - d7 }

free_stack 6*4

EPILOG_POP "{r0,r3,r9,r12}"

bx lr
NESTED_END \helper\()Naked, _TEXT
.endm

GenerateProfileHelper ProfileEnter, PROFILE_ENTER
GenerateProfileHelper ProfileLeave, PROFILE_LEAVE
GenerateProfileHelper ProfileTailcall, PROFILE_TAILCALL

#endif

Expand Down
Loading